Merge remote-tracking branch 'origin/kvx-work' into kvx-work-velus

author: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2021-04-19 18:17:38 +0200
committer: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2021-04-19 18:17:38 +0200
commit: 54c05043c20a1d028d905d1a08d15a811542781d (patch)
tree: c7cf94daf31c08e850e2ff620ae7b64753c8d5d6
parent: e6714c8db021117c7bfbaf8fd102a07fc7b42692 (diff)
parent: a05f92785ffa93e4001d2a2e9a630351593fabc2 (diff)
download: compcert-kvx-54c05043c20a1d028d905d1a08d15a811542781d.tar.gz
compcert-kvx-54c05043c20a1d028d905d1a08d15a811542781d.zip
386 files changed, 41602 insertions, 11069 deletions
diff --git a/.gitignore b/.gitignore
index 1eb13a29..fa9b1c67 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@
 # Emacs saves
 *~
 # Executables and configuration
+/tools/compiler_expand
 /ccomp
 /ccomp.byte
 /ccomp.prof
@@ -31,10 +32,12 @@
 /Makefile.config
 /.merlin
 /_CoqProject
+/compile.pl
 # Generated files
 /.depend
 /.depend.extr
 /compcert.ini
+/compcert.config
 /x86/ConstpropOp.v
 /x86/SelectOp.v
 /x86/SelectLong.v
@@ -73,6 +76,7 @@
 /lib/Responsefile.ml
 /driver/Version.ml
 /driver/Compiler.v
+/extraction/extraction.v
 # Documentation
 /doc/coq2html
 /doc/coq2html.ml
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 3b1a86fd..7f992502 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -3,7 +3,7 @@ stages:
 
 check-admitted:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - eval `opam config env`
     - opam update
@@ -22,7 +22,7 @@ check-admitted:
     
 build_x86_64:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - eval `opam config env`
     - opam update
@@ -43,7 +43,7 @@ build_x86_64:
 
 build_ia32:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install gcc-multilib
@@ -66,7 +66,7 @@ build_ia32:
 
 build_aarch64:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install gcc-aarch64-linux-gnu qemu-user
@@ -89,7 +89,7 @@ build_aarch64:
 
 build_arm:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install gcc-arm-linux-gnueabi qemu-user
@@ -113,7 +113,7 @@ build_arm:
 
 build_armhf:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install gcc-arm-linux-gnueabihf qemu-user
@@ -136,7 +136,7 @@ build_armhf:
 
 build_ppc:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user
@@ -157,7 +157,7 @@ build_ppc:
 
 build_ppc64:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install gcc-powerpc64-linux-gnu
@@ -178,7 +178,7 @@ build_ppc64:
 
 build_rv64:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install gcc-riscv64-linux-gnu qemu-user
@@ -201,7 +201,7 @@ build_rv64:
 
 build_rv32:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install gcc-riscv64-linux-gnu qemu-user
@@ -222,12 +222,12 @@ build_rv32:
 
 build_kvx:
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install sshpass openssh-client libzip4 lttng-tools liblttng-ctl-dev liblttng-ust-dev babeltrace
     - ./.download_from_Kalray.sh
-    - rm -f download/*dkms*.deb download/*eclipse*.deb download/*llvm*.deb download/*board-mgmt* download/*oce-host* download/*pocl*
+    - rm -f download/*dkms*.deb download/*eclipse*.deb download/*llvm*.deb download/*board-mgmt* download/*oce-host* download/*pocl* download/*flash-util* download/*barebox*
     - sudo dpkg -i download/*.deb
     - rm -rf download
     - eval `opam config env`
@@ -237,7 +237,7 @@ build_kvx:
     - source /opt/kalray/accesscore/kalray.sh && ./config_kvx.sh
     - source /opt/kalray/accesscore/kalray.sh && make -j "$NJOBS"
     - source /opt/kalray/accesscore/kalray.sh && make -C test CCOMPOPTS=-static SIMU='kvx-cluster -- ' EXECUTE='kvx-cluster -- ' all test
-    - source /opt/kalray/accesscore/kalray.sh && make -C test/monniaux/yarpgen TARGET_CC='kvx-cos-gcc' EXECUTE='kvx-cluster -- ' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
+    - source /opt/kalray/accesscore/kalray.sh && ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='kvx-cos-gcc' EXECUTE='kvx-cluster -- ' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
   rules:
     - if: '$CI_COMMIT_BRANCH == "kvx-work"'
       when: always
@@ -249,12 +249,12 @@ build_kvx:
 
 pages: # TODO: change to "deploy" when "build" succeeds (or integrate with "build_kvx" above ?)
   stage: build
-  image: coqorg/coq:8.11.2-ocaml-4.09.1-flambda
+  image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
   before_script:
     - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
     - sudo apt-get -y install sshpass openssh-client libzip4 lttng-tools liblttng-ctl-dev liblttng-ust-dev babeltrace
     - ./.download_from_Kalray.sh
-    - rm -f download/*dkms*.deb download/*eclipse*.deb download/*llvm*.deb download/*board-mgmt* download/*oce-host* download/*pocl*
+    - rm -f download/*dkms*.deb download/*eclipse*.deb download/*llvm*.deb download/*board-mgmt* download/*oce-host* download/*pocl* download/*flash-util* download/*barebox*
     - sudo dpkg -i download/*.deb
     - rm -rf download
     - eval `opam config env`
diff --git a/Changelog b/Changelog
index 8cf4e548..f86691a6 100644
--- a/Changelog
+++ b/Changelog
@@ -1,3 +1,54 @@
+Release 3.8, 2020-11-16
+=======================
+
+New features:
+- Support `_Static_assert` from ISO C11.
+- Support `__builtin_constant_p` from GCC and Clang.
+- New port: x86 64 bits Windows with the Cygwin 64 environment.
+  (configure with target `x86_64-cygwin`).
+- The following built-in functions are now available for all ports:
+  `__builtin_sqrt`, `__builtin_fabsf`, and all variants of
+  `__builtin_clz` and `__builtin_ctz`.
+- Added `__builtin_fmin` and `__builtin_fmax` for AArch64.
+
+Removed features:
+- The x86 32 bits port is no longer supported under macOS.
+
+Compiler internals:
+- Simpler translation of CompCert C casts used for their effects but
+  not for their values.
+- Known builtins whose results are unused are eliminated earlier.
+- Improved error reporting for `++` and `--` applied to pointers to
+  incomplete types.
+- Improved error reporting for redefinitions and implicit definitions
+  of built-in functions.
+- Added formal semantics for some PowerPC built-ins.
+
+The clightgen tool:
+- New `-canonical-idents` mode, selected by default, to change the way
+  C identifiers are encoded as CompCert idents (positive numbers).
+  In `-canonical-idents` mode, a fixed one-to-one encoding is used
+  so that the same identifier occurring in different compilation units
+  encodes to the same number.
+- The `-short-idents` flag restores the previous encoding where
+  C identifiers are consecutively numbered in order of appearance,
+  causing the same identifier to have different numbers in different
+  compilation units.
+- Removed the automatic translation of annotation builtins to Coq
+  logical assertions, which was never used and possibly confusing.
+
+Coq and OCaml development:
+- Compatibility with Coq 8.12.1, 8.12.0, 8.11.2, 8.11.1.
+- Can use already-installed Flocq and MenhirLib libraries instead of their
+  local copies (options `-use-external-Flocq` and `-use-external-MenhirLib`
+  to the `configure` script).
+- Automatically build to OCaml bytecode on platforms where OCaml
+  native-code compilation is not available.
+- Install the `compcert.config` summary of configuration choices
+  in the same directory as the Coq development.
+- Updated the list of dual-licensed source files.
+
+
 Release 3.7, 2020-03-31
 =======================
 
diff --git a/LICENSE b/LICENSE
index 5a7ae79f..61b84219 100644
--- a/LICENSE
+++ b/LICENSE
@@ -46,8 +46,8 @@ option) any later version:
 
   all files in the exportclight/ directory
 
-  the Archi.v, CBuiltins.ml, and extractionMachdep.v files
-  in directories arm, powerpc, riscV, x86, x86_32, x86_64
+  the Archi.v, Builtins1.v, CBuiltins.ml, and extractionMachdep.v files
+  in directories aarch64, arm, powerpc, riscV, x86, x86_32, x86_64
 
   extraction/extraction.v
 
diff --git a/Makefile b/Makefile
index ba8add27..62635d70 100644
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,12 @@
 #######################################################################
 
 include Makefile.config
+include VERSION
+
+BUILDVERSION ?= $(version)
+BUILDNR ?= $(buildnr)
+TAG ?= $(tag)
+BRANCH ?= $(branch)
 
 ifeq ($(wildcard $(ARCH)_$(BITSIZE)),)
 ARCHDIRS?=$(ARCH)
@@ -23,16 +29,27 @@ endif
 
 BACKENDLIB?=Asmgenproof0.v Asmgenproof1.v
 
-DIRS=lib common $(ARCHDIRS) backend cfrontend driver \
-  flocq/Core flocq/Prop flocq/Calc flocq/IEEE754 \
-  exportclight MenhirLib cparser
+DIRS := lib lib/Impure common $(ARCHDIRS) scheduling backend cfrontend driver \
+	exportclight cparser
+
+RECDIRS:=lib common $(ARCHDIRS) scheduling backend cfrontend driver flocq exportclight \
+  cparser
 
-RECDIRS=lib common $(ARCHDIRS) backend cfrontend driver flocq exportclight \
-  MenhirLib cparser
+COQINCLUDES := $(foreach d, $(DIRS), -R $(d) compcert.$(subst /,.,$d))
 
-COQINCLUDES=$(foreach d, $(RECDIRS), -R $(d) $(subst /,.,compcert.$(d)))
+ifeq ($(LIBRARY_FLOCQ),local)
+DIRS += flocq/Core flocq/Prop flocq/Calc flocq/IEEE754
+RECDIRS += flocq
+COQINCLUDES += -R flocq Flocq
+endif
 
-COQCOPTS ?= -w -undeclared-scope
+ifeq ($(LIBRARY_MENHIRLIB),local)
+DIRS += MenhirLib
+RECDIRS += MenhirLib
+COQINCLUDES += -R MenhirLib MenhirLib
+endif
+
+COQCOPTS ?= -w -undeclared-scope -w -omega-is-deprecated
 COQC="$(COQBIN)coqc" -q $(COQINCLUDES) $(COQCOPTS)
 COQDEP="$(COQBIN)coqdep" $(COQINCLUDES)
 COQDOC="$(COQBIN)coqdoc"
@@ -46,6 +63,7 @@ GPATH=$(DIRS)
 
 # Flocq
 
+ifeq ($(LIBRARY_FLOCQ),local)
 FLOCQ=\
   Raux.v Zaux.v Defs.v Digits.v Float_prop.v FIX.v FLT.v FLX.v FTZ.v \
   Generic_fmt.v Round_pred.v Round_NE.v Ulp.v Core.v \
@@ -53,6 +71,9 @@ FLOCQ=\
   Div_sqrt_error.v Mult_error.v Plus_error.v \
   Relative.v Sterbenz.v Round_odd.v Double_rounding.v \
   Binary.v Bits.v
+else
+FLOCQ=
+endif
 
 # General-purpose libraries (in lib/)
 
@@ -60,7 +81,9 @@ VLIB=Axioms.v Coqlib.v Intv.v Maps.v Heaps.v Lattice.v Ordered.v \
   HashedSet.v \
   Iteration.v Zbits.v Integers.v Archi.v IEEE754_extra.v Floats.v \
   Parmov.v UnionFind.v Wfsimpl.v \
-  Postorder.v FSetAVLplus.v IntvSets.v Decidableplus.v BoolEqual.v
+  Postorder.v FSetAVLplus.v IntvSets.v Decidableplus.v BoolEqual.v \
+  ImpConfig.v  ImpExtern.v  ImpIO.v     ImpMonads.v \
+  ImpCore.v    ImpHCons.v   ImpLoops.v  ImpPrelude.v
 
 # Parts common to the front-ends and the back-end (in common/)
 
@@ -83,7 +106,7 @@ BACKEND=\
   Profiling.v Profilingproof.v \
   ProfilingExploit.v ProfilingExploitproof.v \
   Renumber.v Renumberproof.v \
-  Duplicate.v Duplicateproof.v \
+  Duplicate.v Duplicateproof.v Duplicatepasses.v \
   RTLtyping.v \
   Kildall.v Liveness.v \
   ValueDomain.v ValueAOp.v ValueAnalysis.v \
@@ -93,6 +116,7 @@ BACKEND=\
   CSE2deps.v CSE2depsproof.v \
   CSE2.v CSE2proof.v \
   CSE3analysis.v CSE3analysisproof.v CSE3.v CSE3proof.v \
+  KillUselessMoves.v KillUselessMovesproof.v \
   LICM.v LICMproof.v \
   NeedDomain.v NeedOp.v Deadcode.v Deadcodeproof.v \
   Unusedglob.v Unusedglobproof.v \
@@ -109,8 +133,16 @@ BACKEND=\
   Mach.v \
   Bounds.v Stacklayout.v Stacking.v Stackingproof.v \
   Asm.v Asmgen.v Asmgenproof.v Asmaux.v \
+	RTLpathSE_simplify.v \
   $(BACKENDLIB)
 
+SCHEDULING= \
+  RTLpathLivegenproof.v    RTLpathSE_simu_specs.v \
+  RTLpathLivegen.v         RTLpathSE_impl.v \
+  RTLpathproof.v           RTLpathSE_theory.v \
+  RTLpathSchedulerproof.v  RTLpath.v \
+  RTLpathScheduler.v       RTLpathWFcheck.v
+
 # C front-end modules (in cfrontend/)
 
 CFRONTEND=Ctypes.v Cop.v Csyntax.v Csem.v Ctyping.v Cstrategy.v Cexec.v \
@@ -126,9 +158,13 @@ PARSER=Cabs.v Parser.v
 
 # MenhirLib
 
+ifeq ($(LIBRARY_MENHIRLIB),local)
 MENHIRLIB=Alphabet.v Automaton.v Grammar.v Interpreter_complete.v \
   Interpreter_correct.v Interpreter.v Main.v Validator_complete.v \
   Validator_safe.v Validator_classes.v
+else
+MENHIRLIB=
+endif
 
 # Putting everything together (in driver/)
 
@@ -136,7 +172,7 @@ DRIVER=Compopts.v Compiler.v Complements.v
 
 # All source files
 
-FILES=$(VLIB) $(COMMON) $(BACKEND) $(CFRONTEND) $(DRIVER) $(FLOCQ) \
+FILES=$(VLIB) $(COMMON) $(BACKEND) $(SCHEDULING) $(CFRONTEND) $(DRIVER) $(FLOCQ) \
   $(MENHIRLIB) $(PARSER)
 
 # Generated source files
@@ -158,6 +194,9 @@ endif
 ifeq ($(CLIGHTGEN),true)
 	$(MAKE) clightgen
 endif
+ifeq ($(INSTALL_COQDEV),true)
+	$(MAKE) compcert.config
+endif
 
 proof: $(FILES:.v=.vo)
 
@@ -179,6 +218,10 @@ ccomp: .depend.extr compcert.ini driver/Version.ml FORCE
 ccomp.byte: .depend.extr compcert.ini driver/Version.ml FORCE
 	$(MAKE) -f Makefile.extr ccomp.byte
 
+# DM force compilation without checking dependencies
+ccomp.force: .depend.extr compcert.ini driver/Version.ml FORCE
+	$(MAKE) -f Makefile.extr ccomp.force
+
 clightgen: .depend.extr compcert.ini exportclight/Clightdefs.vo driver/Version.ml FORCE
 	$(MAKE) -f Makefile.extr clightgen
 clightgen.byte: .depend.extr compcert.ini exportclight/Clightdefs.vo driver/Version.ml FORCE
@@ -198,11 +241,25 @@ documentation: $(FILES)
           $(filter-out doc/coq2html cparser/Parser.v, $^)
 
 tools/ndfun: tools/ndfun.ml
+ifeq ($(OCAML_NATIVE_COMP),true)
 	ocamlopt -o tools/ndfun str.cmxa tools/ndfun.ml
-tools/modorder: tools/modorder.ml
-	ocamlopt -o tools/modorder str.cmxa tools/modorder.ml
+else
+	ocamlc -o tools/ndfun str.cma tools/ndfun.ml
+endif
+
 tools/compiler_expand: tools/compiler_expand.ml
+ifeq ($(OCAML_NATIVE_COMP),true)
 	ocamlopt -o $@ $+
+else
+	ocamlc -o $@ $+
+endif
+
+tools/modorder: tools/modorder.ml
+ifeq ($(OCAML_NATIVE_COMP),true)
+	ocamlopt -o tools/modorder str.cmxa tools/modorder.ml
+else
+	ocamlc -o tools/modorder str.cma tools/modorder.ml
+endif
 
 latexdoc:
 	cd doc; $(COQDOC) --latex -o doc/doc.tex -g $(FILES)
@@ -218,6 +275,15 @@ latexdoc:
 	@tools/ndfun $*.vp > $*.v || { rm -f $*.v; exit 2; }
 	@chmod a-w $*.v
 
+# this trick aims to allow extraction to depend on the target processor
+# (currently: export extra Coq-functions for OCaml code, depending on the target)
+extraction/extraction.v: Makefile extraction/extraction.vexpand
+	(echo "(* WARNING: this file is generated from extraction.vexpand *)"; \
+	 echo "(* by the Makefile -- target \"extraction/extraction.v\"   *)"; \
+	 cat extraction/extraction.vexpand; \
+	 echo "$(EXTRA_EXTRACTION)"; \
+	 echo ".") > extraction/extraction.v
+
 driver/Compiler.v: driver/Compiler.vexpand tools/compiler_expand
 	tools/compiler_expand driver/Compiler.vexpand $@
 
@@ -231,6 +297,7 @@ compcert.ini: Makefile.config
 	 echo "linker_options=$(CLINKER_OPTIONS)";\
          echo "arch=$(ARCH)"; \
          echo "model=$(MODEL)"; \
+	       echo "os=$(OS)"; \
          echo "abi=$(ABI)"; \
          echo "endianness=$(ENDIANNESS)"; \
          echo "system=$(SYSTEM)"; \
@@ -240,14 +307,29 @@ compcert.ini: Makefile.config
 	 echo "response_file_style=$(RESPONSEFILE)";) \
         > compcert.ini
 
+compcert.config: Makefile.config
+	(echo "# CompCert configuration parameters"; \
+        echo "COMPCERT_ARCH=$(ARCH)"; \
+        echo "COMPCERT_MODEL=$(MODEL)"; \
+        echo "COMPCERT_ABI=$(ABI)"; \
+        echo "COMPCERT_ENDIANNESS=$(ENDIANNESS)"; \
+        echo "COMPCERT_BITSIZE=$(BITSIZE)"; \
+        echo "COMPCERT_SYSTEM=$(SYSTEM)"; \
+        echo "COMPCERT_VERSION=$(BUILDVERSION)"; \
+        echo "COMPCERT_BUILDNR=$(BUILDNR)"; \
+        echo "COMPCERT_TAG=$(TAG)"; \
+        echo "COMPCERT_BRANCH=$(BRANCH)" \
+        ) > compcert.config
+
 driver/Version.ml: VERSION
-	cat VERSION \
-	| sed -e 's|\(.*\)=\(.*\)|let \1 = \"\2\"|g' \
-	>driver/Version.ml
+	(echo 'let version = "$(BUILDVERSION)"'; \
+         echo 'let buildnr = "$(BUILDNR)"'; \
+         echo 'let tag = "$(TAG)"'; \
+         echo 'let branch = "$(BRANCH)"') > driver/Version.ml
 
 cparser/Parser.v: cparser/Parser.vy
 	@rm -f $@
-	$(MENHIR) --coq --coq-lib-path compcert.MenhirLib --coq-no-version-check cparser/Parser.vy
+	$(MENHIR) --coq --coq-no-version-check cparser/Parser.vy
 	@chmod a-w $@
 
 depend: $(GENERATED) depend1
@@ -274,6 +356,7 @@ ifeq ($(INSTALL_COQDEV),true)
           install -m 0644 $$d/*.vo $(DESTDIR)$(COQDEVDIR)/$$d/; \
 	done
 	install -m 0644 ./VERSION $(DESTDIR)$(COQDEVDIR)
+	install -m 0644 ./compcert.config $(DESTDIR)$(COQDEVDIR)
 	@(echo "To use, pass the following to coq_makefile or add the following to _CoqProject:"; echo "-R $(COQDEVDIR) compcert") > $(DESTDIR)$(COQDEVDIR)/README
 endif
 
@@ -282,10 +365,10 @@ clean:
 	rm -f $(patsubst %, %/*.vo*, $(DIRS))
 	rm -f $(patsubst %, %/.*.aux, $(DIRS))
 	rm -rf doc/html doc/*.glob
-	rm -f driver/Version.ml
-	rm -f compcert.ini
-	rm -f extraction/STAMP extraction/*.ml extraction/*.mli .depend.extr
-	rm -f tools/ndfun tools/modorder tools/*.cm? tools/*.o
+	rm -f driver/Version.ml driver/Compiler.v
+	rm -f compcert.ini compcert.config
+	rm -f extraction/STAMP extraction/*.ml extraction/*.mli .depend.extr extraction/extraction.v
+	rm -f tools/ndfun tools/modorder tools/compiler_expand tools/*.cm? tools/*.o
 	rm -f $(GENERATED) .depend
 	rm -f .lia.cache
 	$(MAKE) -f Makefile.extr clean
@@ -305,6 +388,9 @@ check-proof: $(FILES)
 print-includes:
 	@echo $(COQINCLUDES)
 
+CoqProject:
+	@echo $(COQINCLUDES) > _CoqProject
+
 -include .depend
 
 FORCE:
diff --git a/Makefile.extr b/Makefile.extr
index 1f5e6aeb..6d8611a9 100644
--- a/Makefile.extr
+++ b/Makefile.extr
@@ -19,7 +19,8 @@ include Makefile.config
 
 #
 # Variables from Makefile.config:
-# -OCAML_OPT_COMP: can we use the native version
+# -OCAML_NATIVE_COMP: native-code compilation is supported
+# -OCAML_OPT_COMP: can we use the natively-compiled compilers
 # -COMPFLAGS: compile options
 # -LINK_OPT: additional linker flags for the native binary
 #
@@ -42,20 +43,29 @@ cparser/pre_parser_messages.ml:
 # Directories containing plain Caml code
 
 DIRS=extraction \
-  lib common $(ARCH) backend cfrontend cparser driver \
-  exportclight debug kvx/unittest kvx/abstractbb/Impure/ocaml
+  lib common $(ARCH) scheduling backend cfrontend cparser driver \
+  exportclight debug kvx/unittest lib/Impure/ocaml
 
 INCLUDES=$(patsubst %,-I %, $(DIRS))
 
 # Control of warnings:
 
-WARNINGS=-w +a-4-9-27-42 -strict-sequence -safe-string -warn-error +a #Deprication returns with ocaml 4.03
+# WARNINGS=-w +a-4-9-27-42 -strict-sequence -safe-string -warn-error +a #Deprication returns with ocaml 4.03
+WARNINGS=-w +a-4-9-27-42
+
 extraction/%.cmx: WARNINGS +=-w -20-27-32..34-39-41-44..45-60-67
 extraction/%.cmo: WARNINGS +=-w -20-27-32..34-39-41-44..45-60-67
 cparser/pre_parser.cmx: WARNINGS += -w -41
 cparser/pre_parser.cmo: WARNINGS += -w -41
 
-COMPFLAGS+=-g $(INCLUDES) -I "$(MENHIR_DIR)" $(WARNINGS)
+# Turn warnings into errors, but not for released tarballs
+ifeq ($(wildcard .git),.git)
+WARN_ERRORS=-warn-error +a
+else
+WARN_ERRORS=
+endif
+
+COMPFLAGS+=-g -strict-sequence -safe-string $(INCLUDES) -I "$(MENHIR_DIR)" $(WARNINGS) $(WARN_ERRORS)
 
 # Using .opt compilers if available
 
@@ -71,6 +81,7 @@ OCAMLDEP=ocamldep$(DOTOPT) -slash $(INCLUDES)
 
 OCAMLLEX=ocamllex -q
 MODORDER=tools/modorder .depend.extr
+COPY=cp
 
 PARSERS=cparser/pre_parser.mly
 LEXERS=cparser/Lexer.mll lib/Tokenize.mll \
@@ -88,19 +99,35 @@ ifeq ($(wildcard .depend.extr),.depend.extr)
 
 CCOMP_OBJS:=$(shell $(MODORDER) driver/Driver.cmx)
 
+ifeq ($(OCAML_NATIVE_COMP),true)
 ccomp: $(CCOMP_OBJS)
 	@echo "Linking $@"
 	@$(OCAMLOPT) -o $@ $(LIBS) $(LINK_OPT) $+
+else
+ccomp: ccomp.byte
+	@echo "Copying to $@"
+	@$(COPY) $+ $@
+endif
 
+# DM force compilation without checking dependencies
+ccomp.force:
+	$(OCAMLOPT) -o $@ $(LIBS) $(LINK_OPT) $(CCOMP_OBJS)
+	
 ccomp.byte: $(CCOMP_OBJS:.cmx=.cmo)
 	@echo "Linking $@"
 	@$(OCAMLC) -o $@ $(LIBS_BYTE) $+
 
 CLIGHTGEN_OBJS:=$(shell $(MODORDER) exportclight/Clightgen.cmx)
 
+ifeq ($(OCAML_NATIVE_COMP),true)
 clightgen: $(CLIGHTGEN_OBJS)
 	@echo "Linking $@"
 	@$(OCAMLOPT) -o $@ $(LIBS) $(LINK_OPT) $+
+else
+clightgen: clightgen.byte
+	@echo "Copying to $@"
+	@$(COPY) $+ $@
+endif
 
 clightgen.byte: $(CLIGHTGEN_OBJS:.cmx=.cmo)
 	@echo "Linking $@"
diff --git a/MenhirLib/Alphabet.v b/MenhirLib/Alphabet.v
index 29070e3d..530e3b4a 100644
--- a/MenhirLib/Alphabet.v
+++ b/MenhirLib/Alphabet.v
@@ -11,7 +11,8 @@
 (*                                                                          *)
 (****************************************************************************)
 
-From Coq Require Import Omega List Syntax Relations RelationClasses.
+From Coq Require Import Omega List Relations RelationClasses.
+Import ListNotations.
 
 Local Obligation Tactic := intros.
 
diff --git a/MenhirLib/Grammar.v b/MenhirLib/Grammar.v
index a371318b..9be374e8 100644
--- a/MenhirLib/Grammar.v
+++ b/MenhirLib/Grammar.v
@@ -11,7 +11,8 @@
 (*                                                                          *)
 (****************************************************************************)
 
-From Coq Require Import List Syntax Orders.
+From Coq Require Import List Orders.
+Import ListNotations.
 Require Import Alphabet.
 
 (** The terminal non-terminal alphabets of the grammar. **)
diff --git a/MenhirLib/Interpreter.v b/MenhirLib/Interpreter.v
index 568597ba..07aeae5a 100644
--- a/MenhirLib/Interpreter.v
+++ b/MenhirLib/Interpreter.v
@@ -12,6 +12,7 @@
 (****************************************************************************)
 
 From Coq Require Import List Syntax.
+Import ListNotations.
 From Coq.ssr Require Import ssreflect.
 Require Automaton.
 Require Import Alphabet Grammar Validator_safe.
diff --git a/MenhirLib/Interpreter_complete.v b/MenhirLib/Interpreter_complete.v
index ec69592b..51f2524b 100644
--- a/MenhirLib/Interpreter_complete.v
+++ b/MenhirLib/Interpreter_complete.v
@@ -11,7 +11,8 @@
 (*                                                                          *)
 (****************************************************************************)
 
-From Coq Require Import List Syntax Arith.
+From Coq Require Import List Arith.
+Import ListNotations.
 From Coq.ssr Require Import ssreflect.
 Require Import Alphabet Grammar.
 Require Automaton Interpreter Validator_complete.
diff --git a/MenhirLib/Interpreter_correct.v b/MenhirLib/Interpreter_correct.v
index 1325f610..083be5b7 100644
--- a/MenhirLib/Interpreter_correct.v
+++ b/MenhirLib/Interpreter_correct.v
@@ -11,7 +11,8 @@
 (*                                                                          *)
 (****************************************************************************)
 
-From Coq Require Import List Syntax.
+From Coq Require Import List.
+Import ListNotations.
 Require Import Alphabet.
 Require Grammar Automaton Interpreter.
 From Coq.ssr Require Import ssreflect.
diff --git a/MenhirLib/Validator_complete.v b/MenhirLib/Validator_complete.v
index ebb74500..9ba3e53c 100644
--- a/MenhirLib/Validator_complete.v
+++ b/MenhirLib/Validator_complete.v
@@ -13,6 +13,7 @@
 
 From Coq Require Import List Syntax Derive.
 From Coq.ssr Require Import ssreflect.
+Import ListNotations.
 Require Automaton.
 Require Import Alphabet Validator_classes.
 
diff --git a/MenhirLib/Validator_safe.v b/MenhirLib/Validator_safe.v
index 628d2009..e7a54b47 100644
--- a/MenhirLib/Validator_safe.v
+++ b/MenhirLib/Validator_safe.v
@@ -12,6 +12,7 @@
 (****************************************************************************)
 
 From Coq Require Import List Syntax Derive.
+Import ListNotations.
 From Coq.ssr Require Import ssreflect.
 Require Automaton.
 Require Import Alphabet Validator_classes.
diff --git a/README.md b/README.md
index 59ff7447..3990048e 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 # CompCert
-The verified C compiler.
+The formally-verified C compiler.
 
 ## Overview
 The CompCert C verified compiler is a compiler for a large subset of the
@@ -13,28 +13,34 @@ source C code.
 
 For more information on CompCert (supported platforms, supported C
 features, installation instructions, using the compiler, etc), please
-refer to the [Web site](http://compcert.inria.fr/) and especially
-the [user's manual](http://compcert.inria.fr/man/).
+refer to the [Web site](https://compcert.org/) and especially
+the [user's manual](https://compcert.org/man/).
 
-## VERIMAG version
+## Verimag-Kalray version
 This is a special version with additions from Verimag and Kalray :
 
-	* Some general-purpose optimization phases (e.g. profiling).
-	* A backend for the KVX processor.
+* A backend for the KVX processor: see [`README_Kalray.md`](README_Kalray.md) for details.
+* Some general-purpose optimization phases (e.g. profiling).
+  * see [`PROFILING.md`](PROFILING.md) for details on the profiling system
 	
 The people responsible for this version are
 
-	* Sylvain Boulmé (Grenoble-INP, Verimag)
-	* David Monniaux (CNRS, Verimag)
-	* Cyril Six (Kalray)
-	
-See also `README_Kalray.md` and `PROFILING.md` and [the online documentation](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx).
+* Sylvain Boulmé (Grenoble-INP, Verimag)
+* David Monniaux (CNRS, Verimag)
+* Cyril Six (Kalray)
+
+## Papers, docs, etc on this CompCert version
+
+* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend
+(also on [YouTube if you need subtitles](https://www.youtube.com/watch?v=RAzMDS9OVSw)).
+* [Certified and Efficient Instruction Scheduling](https://hal.archives-ouvertes.fr/hal-02185883), an OOPSLA'20 paper, by Six, Boulmé and Monniaux.
+* [the documentation of the KVX backend Coq sources](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx)
 
 ## License
 CompCert is not free software.  This non-commercial release can only
 be used for evaluation, research, educational and personal purposes.
 A commercial version of CompCert, without this restriction and with
-professional support, can be purchased from
+professional support and extra features, can be purchased from
 [AbsInt](https://www.absint.com).  See the file `LICENSE` for more
 information.
 
diff --git a/VERSION b/VERSION
index b60e8d9b..d5a86723 100644
--- a/VERSION
+++ b/VERSION
@@ -1,3 +1,4 @@
-version=3.7
+version=3.8
 buildnr=
 tag=
+branch=
diff --git a/aarch64/Archi.v b/aarch64/Archi.v
index 7d7b6887..7f39d1fa 100644
--- a/aarch64/Archi.v
+++ b/aarch64/Archi.v
@@ -6,15 +6,17 @@
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
 (* *********************************************************************)
 
 (** Architecture-dependent parameters for AArch64 *)
 
+From Flocq Require Import Binary Bits.
 Require Import ZArith List.
-(*From Flocq*)
-Require Import Binary Bits.
 
 Definition ptr64 := true.
 
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index 47cd3051..067d32fb 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -10,13 +10,20 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Abstract syntax and semantics for AArch64 assembly language *)
+(** Abstract syntax and semantics for AArch64 assembly language 
+
+W.r.t Asmblock, this is a "flat" syntax and semantics of "aarch64" assembly:
+  - without the basic block structure
+  - without the hierarchy of instructions.
+
+*)
 
 Require Import Coqlib Zbits Maps.
 Require Import AST Integers Floats.
 Require Import Values Memory Events Globalenvs Smallstep.
 Require Import Locations Conventions.
 Require Stacklayout.
+Require Import OptionMonad.
 
 (** * Abstract syntax *)
 
@@ -43,6 +50,9 @@ Coercion RR1: ireg >-> iregsp.
 Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
 Proof. decide equality. Defined.
 
+Lemma iregsp_eq: forall (x y: iregsp), {x=y} + {x<>y}.
+Proof. decide equality; apply ireg_eq. Defined.
+
 (** In assembly files, [Dn] denotes the low 64-bit of a vector register,
     and [Sn] the low 32 bits. *)
 
@@ -66,21 +76,29 @@ Inductive crbit: Type :=
 Lemma crbit_eq: forall (x y: crbit), {x=y} + {x<>y}.
 Proof. decide equality. Defined.
 
+Inductive dreg : Type :=
+  | IR: iregsp -> dreg  (**r 64- or 32-bit integer registers *)
+  | FR: freg -> dreg.   (**r double- or single-precision float registers *)
+
 (** We model the following registers of the ARM architecture. *)
 
 Inductive preg: Type :=
-  | IR: ireg -> preg   (**r 64- or 32-bit integer registers *)
-  | FR: freg -> preg   (**r double- or single-precision float registers *)
-  | CR: crbit -> preg  (**r bits in the condition register *)
-  | SP: preg           (**r register X31 used as stack pointer *)
-  | PC: preg.          (**r program counter *)
-
-Coercion IR: ireg >-> preg.
-Coercion FR: freg >-> preg.
+  | DR: dreg -> preg     (** see dreg **)
+  | CR: crbit -> preg    (**r bits in the condition register *)
+  | PC: preg.            (**r program counter *)
+
+(* XXX: ireg no longer coerces to preg *)
+Coercion IR: iregsp >-> dreg.
+Coercion FR: freg >-> dreg.
+Coercion DR: dreg >-> preg.
+Definition SP:preg := XSP.
 Coercion CR: crbit >-> preg.
 
+Lemma dreg_eq: forall (x y: dreg), {x=y} + {x<>y}.
+Proof. decide equality. apply iregsp_eq. apply freg_eq. Defined.
+
 Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}.
-Proof. decide equality. apply ireg_eq. apply freg_eq. apply crbit_eq. Defined.
+Proof. decide equality. apply dreg_eq. apply crbit_eq. Defined.
 
 Module PregEq.
   Definition t := preg.
@@ -92,8 +110,6 @@ Module Pregmap := EMap(PregEq).
 Definition preg_of_iregsp (r: iregsp) : preg :=
   match r with RR1 r => IR r | XSP => SP end.
 
-Coercion preg_of_iregsp: iregsp >-> preg.
-
 (** Conventional name for return address ([RA]) *)
 
 Notation "'RA'" := X30 (only parsing) : asm.
@@ -177,14 +193,16 @@ Inductive instruction: Type :=
   | Pldrsh (sz: isize) (rd: ireg) (a: addressing)                     (**r load int16, sign-extend *)
   | Pldrzw (rd: ireg) (a: addressing)                                 (**r load int32, zero-extend to int64 *)
   | Pldrsw (rd: ireg) (a: addressing)                                 (**r load int32, sign-extend to int64 *)
-  | Pldp (rd1 rd2: ireg) (a: addressing)                               (**r load two int64 *)
+  | Pldpw (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing)   (**r load two int32 *)
+  | Pldpx (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing)   (**r load two int64 *)
   | Pstrw (rs: ireg) (a: addressing)                                  (**r store int32 *)
   | Pstrw_a (rs: ireg) (a: addressing)                                (**r store int32 as any32 *)
   | Pstrx (rs: ireg) (a: addressing)                                  (**r store int64 *)
   | Pstrx_a (rs: ireg) (a: addressing)                                (**r store int64 as any64 *)
   | Pstrb (rs: ireg) (a: addressing)                                  (**r store int8 *)
   | Pstrh (rs: ireg) (a: addressing)                                  (**r store int16 *)
-  | Pstp (rs1 rs2: ireg) (a: addressing)                              (**r store two int64 *)
+  | Pstpw (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing)   (**r store two int32 *)
+  | Pstpx (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing)   (**r store two int64 *)
   (** Integer arithmetic, immediate *)
   | Paddimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z)              (**r addition *)
   | Psubimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z)              (**r subtraction *)
@@ -237,6 +255,7 @@ Inductive instruction: Type :=
   | Pclz (sz: isize) (rd r1: ireg)                                    (**r count leading zero bits *)
   | Prev (sz: isize) (rd r1: ireg)                                    (**r reverse bytes *)
   | Prev16 (sz: isize) (rd r1: ireg)                                  (**r reverse bytes in each 16-bit word *)
+  | Prbit (sz: isize) (rd r1: ireg)                                   (**r reverse bits *)
   (** Conditional data processing *)
   | Pcsel (rd: ireg) (r1 r2: ireg) (c: testcond)                      (**r int conditional move *)
   | Pcset (rd: ireg) (c: testcond)                                    (**r set to 1/0 if cond is true/false *)
@@ -254,9 +273,13 @@ Inductive instruction: Type :=
   | Pldrs (rd: freg) (a: addressing)                                  (**r load float32 (single precision) *)
   | Pldrd (rd: freg) (a: addressing)                                  (**r load float64 (double precision) *)
   | Pldrd_a (rd: freg) (a: addressing)                                (**r load float64 as any64 *)
+  | Pldps (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing)   (**r load two float32 *)
+  | Pldpd (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing)   (**r load two float64 *)
   | Pstrs (rs: freg) (a: addressing)                                  (**r store float32 *)
   | Pstrd (rs: freg) (a: addressing)                                  (**r store float64 *)
   | Pstrd_a (rs: freg) (a: addressing)                                (**r store float64 as any64 *)
+  | Pstps (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing)   (**r store two float32 *)
+  | Pstpd (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing)   (**r store two float64 *)
   (** Floating-point move *)
   | Pfmov (rd r1: freg)
   | Pfmovimms (rd: freg) (f: float32)                                 (**r load float32 constant *)
@@ -282,6 +305,8 @@ Inductive instruction: Type :=
   | Pfmsub (sz: fsize) (rd r1 r2 r3: freg)                            (**r [rd = r3 - r1 * r2] *)
   | Pfnmadd (sz: fsize) (rd r1 r2 r3: freg)                           (**r [rd = - r3 - r1 * r2] *)
   | Pfnmsub (sz: fsize) (rd r1 r2 r3: freg)                           (**r [rd = - r3 + r1 * r2] *)
+  | Pfmax (sz: fsize) (rd r1 r2: freg)                                (**r maximum *)
+  | Pfmin (sz: fsize) (rd r1 r2: freg)                                (**r minimum *)
   (** Floating-point comparison *)
   | Pfcmp (sz: fsize) (r1 r2: freg)                                   (**r compare [r1] and [r2] *)
   | Pfcmp0 (sz: fsize) (r1: freg)                                     (**r compare [r1] and [+0.0] *)
@@ -307,9 +332,30 @@ Definition code := list instruction.
 Record function : Type := mkfunction { fn_sig: signature; fn_code: code }.
 Definition fundef := AST.fundef function.
 Definition program := AST.program fundef unit.
+Definition genv := Genv.t fundef unit.
+
+(** The two functions below axiomatize how the linker processes
+  symbolic references [symbol + offset].  It computes the
+  difference between the address and the PC, and splits it into:
+  - 12 low bits usable as an offset in an addressing mode;
+  - 21 high bits usable as argument to the ADRP instruction.
+
+  In CompCert's model, we cannot really describe PC-relative addressing,
+  but we can claim that the address of [symbol + offset] decomposes
+  as the sum of
+  - a low part, usable as an offset in an addressing mode;
+  - a high part, usable as argument to the ADRP instruction. *)
+
+Parameter symbol_low: genv -> ident -> ptrofs -> val.
+Parameter symbol_high: genv -> ident -> ptrofs -> val.
+
+Axiom symbol_high_low:
+  forall (ge: genv) (id: ident) (ofs: ptrofs),
+  Val.addl (symbol_high ge id ofs) (symbol_low ge id ofs) = Genv.symbol_address ge id ofs.
 
 (** * Operational semantics *)
 
+
 (** The semantics operates over a single mapping from registers
   (type [preg]) to values.  We maintain (but do not enforce)
   the convention that integer registers are mapped to values of
@@ -317,22 +363,19 @@ Definition program := AST.program fundef unit.
   and condition bits to either [Vzero] or [Vone]. *)
 
 Definition regset := Pregmap.t val.
-Definition genv := Genv.t fundef unit.
 
 (** The value of an [ireg0] is either the value of the integer register,
     or 0. *)
 
-Definition ir0w (rs: regset) (r: ireg0) : val :=
-  match r with RR0 r => rs (IR r) | XZR => Vint Int.zero end.
-Definition ir0x (rs: regset) (r: ireg0) : val :=
-  match r with RR0 r => rs (IR r) | XZR => Vlong Int64.zero end.
+Definition ir0 (is:isize) (rs: regset) (r: ireg0) : val :=
+  match r with RR0 r => rs (IR r) | XZR => if is (* is W *) then Vint Int.zero else Vlong Int64.zero end.
 
 (** Concise notations for accessing and updating the values of registers. *)
 
 Notation "a # b" := (a b) (at level 1, only parsing) : asm.
 Notation "a # b <- c" := (Pregmap.set b c a) (at level 1, b at next level) : asm.
-Notation "a ## b" := (ir0w a b) (at level 1, only parsing) : asm.
-Notation "a ### b" := (ir0x a b) (at level 1, only parsing) : asm.
+Notation "a ## b" := (ir0 W a b) (at level 1, only parsing) : asm.
+Notation "a ### b" := (ir0 X a b) (at level 1, only parsing) : asm.
 
 Open Scope asm.
 
@@ -366,84 +409,16 @@ Fixpoint set_res (res: builtin_res preg) (v: val) (rs: regset) : regset :=
   | BR_splitlong hi lo => set_res lo (Val.loword v) (set_res hi (Val.hiword v) rs)
   end.
 
-(** The two functions below axiomatize how the linker processes
-  symbolic references [symbol + offset].  It computes the
-  difference between the address and the PC, and splits it into:
-  - 12 low bits usable as an offset in an addressing mode;
-  - 21 high bits usable as argument to the ADRP instruction.
-
-  In CompCert's model, we cannot really describe PC-relative addressing,
-  but we can claim that the address of [symbol + offset] decomposes
-  as the sum of
-  - a low part, usable as an offset in an addressing mode;
-  - a high part, usable as argument to the ADRP instruction. *)
-
-Parameter symbol_low: genv -> ident -> ptrofs -> val.
-Parameter symbol_high: genv -> ident -> ptrofs -> val.
-
-Axiom symbol_high_low:
-  forall (ge: genv) (id: ident) (ofs: ptrofs),
-  Val.addl (symbol_high ge id ofs) (symbol_low ge id ofs) = Genv.symbol_address ge id ofs.
-
-Section RELSEM.
-
-Variable ge: genv.
-
-(** Looking up instructions in a code sequence by position. *)
-
-Fixpoint find_instr (pos: Z) (c: code) {struct c} : option instruction :=
-  match c with
-  | nil => None
-  | i :: il => if zeq pos 0 then Some i else find_instr (pos - 1) il
-  end.
-
-(** Position corresponding to a label *)
-
-Definition is_label (lbl: label) (instr: instruction) : bool :=
-  match instr with
-  | Plabel lbl' => if peq lbl lbl' then true else false
-  | _ => false
-  end.
-
-Lemma is_label_correct:
-  forall lbl instr,
-  if is_label lbl instr then instr = Plabel lbl else instr <> Plabel lbl.
-Proof.
-  intros.  destruct instr; simpl; try discriminate. destruct (peq lbl lbl0); congruence.
-Qed.
-
-Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z :=
-  match c with
-  | nil => None
-  | instr :: c' =>
-      if is_label lbl instr then Some (pos + 1) else label_pos lbl (pos + 1) c'
-  end.
-
 (** The semantics is purely small-step and defined as a function
   from the current state (a register set + a memory state)
   to either [Next rs' m'] where [rs'] and [m'] are the updated register
   set and memory state after execution of the instruction at [rs#PC],
   or [Stuck] if the processor is stuck. *)
 
-Inductive outcome: Type :=
-  | Next: regset -> mem -> outcome
-  | Stuck: outcome.
-
-(** Manipulations over the [PC] register: continuing with the next
-  instruction ([nextinstr]) or branching to a label ([goto_label]). *)
-
-Definition nextinstr (rs: regset) :=
-  rs#PC <- (Val.offset_ptr rs#PC Ptrofs.one).
-
-Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) :=
-  match label_pos lbl 0 (fn_code f) with
-  | None => Stuck
-  | Some pos =>
-      match rs#PC with
-      | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m
-      | _ => Stuck
-    end
-  end.
+Record state: Type := State { _rs: regset; _m: mem }.
+Definition outcome := option state.
+Definition Next rs m: outcome := Some (State rs m).
+Definition Stuck: outcome := None.
 
 (** Testing a condition *)
 
@@ -513,10 +488,10 @@ Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
 
 (** Integer "is zero?" test *)
 
-Definition eval_testzero (sz: isize) (v: val) (m: mem): option bool :=
+Definition eval_testzero (sz: isize) (v: val): option bool :=
   match sz with
-  | W => Val.cmpu_bool (Mem.valid_pointer m) Ceq v (Vint Int.zero)
-  | X => Val.cmplu_bool (Mem.valid_pointer m) Ceq v (Vlong Int64.zero)
+  | W => Val.mxcmpu_bool Ceq v (Vint Int.zero)
+  | X => Val.mxcmplu_bool Ceq v (Vlong Int64.zero)
   end.
 
 (** Integer "bit is set?" test *)
@@ -527,48 +502,19 @@ Definition eval_testbit (sz: isize) (v: val) (n: int): option bool :=
   | X => Val.cmpl_bool Cne (Val.andl v (Vlong (Int64.shl' Int64.one n))) (Vlong Int64.zero)
   end.
 
-(** Evaluating an addressing mode *)
-
-Definition eval_addressing (a: addressing) (rs: regset): val :=
-  match a with
-  | ADimm base n => Val.addl rs#base (Vlong n)
-  | ADreg base r => Val.addl rs#base rs#r
-  | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n))
-  | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n))
-  | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n))
-  | ADadr base id ofs => Val.addl rs#base (symbol_low ge id ofs)
-  | ADpostincr base n => Vundef (* not modeled yet *)
-  end.
-
-(** Auxiliaries for memory accesses *)
-
-Definition exec_load (chunk: memory_chunk) (transf: val -> val)
-                     (a: addressing) (r: preg) (rs: regset) (m: mem) :=
-  match Mem.loadv chunk m (eval_addressing a rs) with
-  | None => Stuck
-  | Some v => Next (nextinstr (rs#r <- (transf v))) m
-  end.
-
-Definition exec_store (chunk: memory_chunk)
-                      (a: addressing) (v: val)
-                      (rs: regset) (m: mem) :=
-  match Mem.storev chunk m (eval_addressing a rs) v with
-  | None => Stuck
-  | Some m' => Next (nextinstr rs) m'
-  end.
 
 (** Comparisons *)
 
-Definition compare_int (rs: regset) (v1 v2: val) (m: mem) :=
+Definition compare_int (rs: regset) (v1 v2: val) :=
   rs#CN <- (Val.negative (Val.sub v1 v2))
-    #CZ <- (Val.cmpu (Mem.valid_pointer m) Ceq v1 v2)
-    #CC <- (Val.cmpu (Mem.valid_pointer m) Cge v1 v2)
+    #CZ <- (Val.mxcmpu Ceq v1 v2)
+    #CC <- (Val.mxcmpu Cge v1 v2)
     #CV <- (Val.sub_overflow v1 v2).
 
-Definition compare_long (rs: regset) (v1 v2: val) (m: mem) :=
+Definition compare_long (rs: regset) (v1 v2: val) :=
   rs#CN <- (Val.negativel (Val.subl v1 v2))
-    #CZ <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq v1 v2))
-    #CC <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Cge v1 v2))
+    #CZ <- (Val.mxcmplu Ceq v1 v2)
+    #CC <- (Val.mxcmplu Cge v1 v2)
     #CV <- (Val.subl_overflow v1 v2).
 
 (** Semantics of [fcmp] instructions:
@@ -669,19 +615,230 @@ Definition float64_of_bits (v: val): val :=
   | _ => Vundef
   end.
 
-(** Execution of a single instruction [i] in initial state
-    [rs] and [m].  Return updated state.  For instructions
-    that correspond to actual AArch64 instructions, the cases are
-    straightforward transliterations of the informal descriptions
-    given in the ARMv8 reference manuals.  For pseudo-instructions,
-    refer to the informal descriptions given above.
-
-    Note that we set to [Vundef] the registers used as temporaries by
-    the expansions of the pseudo-instructions, so that the code we
-    generate cannot use those registers to hold values that must
-    survive the execution of the pseudo-instruction.
+(* Recognition of FP numbers that are supported by the fmov #imm instructions:
+   "a normalized binary floating point encoding with 1 sign bit,
+    4 bits of fraction and a 3-bit exponent"
 *)
 
+Definition is_immediate_float64 (f: float): bool :=
+  let bits := Float.to_bits f in
+  let exp :=
+    Int64.sub
+      (Int64.and (Int64.shr' bits (Int.repr 52))
+        (Int64.repr 2047)) (Int64.repr 1023) in
+  let mant :=
+    Int64.and bits (Int64.repr 4503599627370495) in
+    andb (Int64.cmp Cge exp (Int64.repr (-3)))
+    (andb (Int64.cmp Cle exp (Int64.repr 4))
+      (Int64.eq
+        (Int64.and mant
+          (Int64.repr 4222124650659840)) mant)).
+
+Definition is_immediate_float32 (f: float32): bool :=
+  let bits := Float32.to_bits f in
+  let exp :=
+    Int.sub
+      (Int.and (Int.shr bits (Int.repr 23))
+        (Int.repr 255)) (Int.repr 127) in
+  let mant :=
+    Int.and bits (Int.repr 8388607) in
+    andb (Int.cmp Cge exp (Int.repr (-3)))
+    (andb (Int.cmp Cle exp (Int.repr 4))
+      (Int.eq
+        (Int.and mant
+          (Int.repr 7864320)) mant)).
+
+(** Translation of the LTL/Linear/Mach view of machine registers
+  to the AArch64 view.  Note that no LTL register maps to [X16],
+  [X18], nor [X30].
+  [X18] is reserved as the platform register and never used by the
+  code generated by CompCert.
+  [X30] is used for the return address, and can also be used as temporary.
+  [X16] can be used as temporary. *)
+
+Definition dreg_of (r: mreg) : dreg :=
+  match r with
+  | R0 => X0   | R1 => X1   | R2 => X2   | R3 => X3
+  | R4 => X4   | R5 => X5   | R6 => X6   | R7 => X7
+  | R8 => X8   | R9 => X9   | R10 => X10 | R11 => X11
+  | R12 => X12 | R13 => X13 | R14 => X14 | R15 => X15
+  | R17 => X17 | R19 => X19
+  | R20 => X20 | R21 => X21 | R22 => X22 | R23 => X23
+  | R24 => X24 | R25 => X25 | R26 => X26 | R27 => X27
+  | R28 => X28 | R29 => X29
+  | F0 => D0   | F1 => D1   | F2 => D2   | F3 => D3
+  | F4 => D4   | F5 => D5   | F6 => D6   | F7 => D7
+  | F8 => D8   | F9 => D9   | F10 => D10 | F11 => D11
+  | F12 => D12 | F13 => D13 | F14 => D14 | F15 => D15
+  | F16 => D16 | F17 => D17 | F18 => D18 | F19 => D19
+  | F20 => D20 | F21 => D21 | F22 => D22 | F23 => D23
+  | F24 => D24 | F25 => D25 | F26 => D26 | F27 => D27
+  | F28 => D28 | F29 => D29 | F30 => D30 | F31 => D31
+  end.
+
+Definition preg_of (r: mreg) : preg :=
+  dreg_of r.
+
+(** Undefine all registers except SP and callee-save registers *)
+
+Definition undef_caller_save_regs (rs: regset) : regset :=
+  fun r =>
+    if preg_eq r SP
+    || In_dec preg_eq r (List.map preg_of (List.filter is_callee_save all_mregs))
+    then rs r
+    else Vundef.
+
+(** Extract the values of the arguments of an external call.
+    We exploit the calling conventions from module [Conventions], except that
+    we use AArch64 registers instead of locations. *)
+
+Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
+  | extcall_arg_reg: forall r,
+      extcall_arg rs m (R r) (rs (preg_of r))
+  | extcall_arg_stack: forall ofs ty bofs v,
+      bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
+      Mem.loadv (chunk_of_type ty) m
+                (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v ->
+      extcall_arg rs m (Locations.S Outgoing ofs ty) v.
+
+Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop :=
+  | extcall_arg_one: forall l v,
+      extcall_arg rs m l v ->
+      extcall_arg_pair rs m (One l) v
+  | extcall_arg_twolong: forall hi lo vhi vlo,
+      extcall_arg rs m hi vhi ->
+      extcall_arg rs m lo vlo ->
+      extcall_arg_pair rs m (Twolong hi lo) (Val.longofwords vhi vlo).
+
+Definition extcall_arguments
+    (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
+  list_forall2 (extcall_arg_pair rs m) (loc_arguments sg) args.
+
+Definition loc_external_result (sg: signature) : rpair preg :=
+  map_rpair preg_of (loc_result sg).
+
+(** Looking up instructions in a code sequence by position. *)
+
+Fixpoint find_instr (pos: Z) (c: code) {struct c} : option instruction :=
+  match c with
+  | nil => None
+  | i :: il => if zeq pos 0 then Some i else find_instr (pos - 1) il
+  end.
+
+(** Position corresponding to a label *)
+
+Definition is_label (lbl: label) (instr: instruction) : bool :=
+  match instr with
+  | Plabel lbl' => if peq lbl lbl' then true else false
+  | _ => false
+  end.
+
+Lemma is_label_correct:
+  forall lbl instr,
+  if is_label lbl instr then instr = Plabel lbl else instr <> Plabel lbl.
+Proof.
+  intros.  destruct instr; simpl; try discriminate. destruct (peq lbl lbl0); congruence.
+Qed.
+
+Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z :=
+  match c with
+  | nil => None
+  | instr :: c' =>
+      if is_label lbl instr then Some pos else label_pos lbl (pos + 1) c'
+  end.
+
+Definition nextinstr (rs: regset) :=
+  rs#PC <- (Val.offset_ptr rs#PC Ptrofs.one).
+
+Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) :=
+  match label_pos lbl 0 (fn_code f) with
+  | None => Stuck
+  | Some pos =>
+      match rs#PC with
+      | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m
+      | _ => Stuck
+    end
+  end.
+
+Section RELSEM.
+
+Variable ge: genv.
+
+(** Evaluating an addressing mode *)
+
+Definition eval_addressing (a: addressing) (rs: regset): val :=
+  match a with
+  | ADimm base n => Val.addl rs#base (Vlong n)
+  | ADreg base r => Val.addl rs#base rs#r
+  | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n))
+  | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n))
+  | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n))
+  | ADadr base id ofs => Val.addl rs#base (symbol_low ge id ofs)
+  | ADpostincr base n => Vundef
+  end.
+
+Definition is_pair_addressing_mode_correct (a: addressing): bool :=
+  match a with
+  | ADimm _ _ => true
+  | _ => false
+  end.
+
+Definition get_offset_addr (a: addressing) (ofs: Z) : addressing :=
+  match a with
+  | ADimm base n => (ADimm base (Int64.add n (Int64.repr ofs)))
+  | _ => a
+  end.
+
+(** Auxiliaries for memory accesses *)
+
+Definition exec_load (chunk: memory_chunk) (transf: val -> val)
+                     (a: addressing) (r: preg) (rs: regset) (m: mem) :=
+  match Mem.loadv chunk m (eval_addressing a rs) with
+  | None => Stuck
+  | Some v => Next (nextinstr (rs#r <- (transf v))) m
+  end.
+
+Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
+                     (a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) :=
+  if is_pair_addressing_mode_correct a then
+    let addr := (eval_addressing a rs) in
+    let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
+    let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
+        match Mem.loadv chk1 m addr with
+        | None => Stuck
+        | Some v1 =>
+            match Mem.loadv chk2 m addr' with
+            | None => Stuck
+            | Some v2 =>
+                Next (nextinstr ((rs#rd1 <- (transf v1))#rd2 <- (transf v2))) m
+            end
+        end
+  else Stuck.
+
+Definition exec_store (chunk: memory_chunk)
+                      (a: addressing) (v: val)
+                      (rs: regset) (m: mem) :=
+  match Mem.storev chunk m (eval_addressing a rs) v with
+  | None => Stuck
+  | Some m' => Next (nextinstr rs) m'
+  end.
+
+Definition exec_store_double (chk1 chk2: memory_chunk)
+                      (a: addressing) (v1 v2: val)
+                      (rs: regset) (m: mem) :=
+  if is_pair_addressing_mode_correct a then
+    let addr := (eval_addressing a rs) in
+    let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
+    let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
+      match Mem.storev chk1 m addr v1 with
+      | None => Stuck
+      | Some m' => match Mem.storev chk2 m' addr' v2 with
+                   | None => Stuck
+                   | Some m'' => Next (nextinstr rs) m''
+                   end
+      end
+  else Stuck.
+
 Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : outcome :=
   match i with
   (** Branches *)
@@ -704,13 +861,13 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pret r =>
       Next (rs#PC <- (rs#r)) m
   | Pcbnz sz r lbl =>
-      match eval_testzero sz rs#r m with
+      match eval_testzero sz rs#r with
       | Some true => Next (nextinstr rs) m
       | Some false => goto_label f lbl rs m
       | None => Stuck
       end
   | Pcbz sz r lbl =>
-      match eval_testzero sz rs#r m with
+      match eval_testzero sz rs#r with
       | Some true => goto_label f lbl rs m
       | Some false => Next (nextinstr rs) m
       | None => Stuck
@@ -778,13 +935,13 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Psubimm X rd r1 n =>
       Next (nextinstr (rs#rd <- (Val.subl rs#r1 (Vlong (Int64.repr n))))) m
   | Pcmpimm W r1 n =>
-      Next (nextinstr (compare_int rs rs#r1 (Vint (Int.repr n)) m)) m
+      Next (nextinstr (compare_int rs rs#r1 (Vint (Int.repr n)))) m
   | Pcmpimm X r1 n =>
-      Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.repr n)) m)) m
+      Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.repr n)))) m
   | Pcmnimm W r1 n =>
-      Next (nextinstr (compare_int rs rs#r1 (Vint (Int.neg (Int.repr n))) m)) m
+      Next (nextinstr (compare_int rs rs#r1 (Vint (Int.neg (Int.repr n))))) m
   | Pcmnimm X r1 n =>
-      Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.neg (Int64.repr n))) m)) m
+      Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.neg (Int64.repr n))))) m
   (** Move integer register *)
   | Pmov rd r1 =>
       Next (nextinstr (rs#rd <- (rs#r1))) m
@@ -802,9 +959,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Porrimm X rd r1 n =>
       Next (nextinstr (rs#rd <- (Val.orl rs###r1 (Vlong (Int64.repr n))))) m
   | Ptstimm W r1 n =>
-      Next (nextinstr (compare_int rs (Val.and rs#r1 (Vint (Int.repr n))) (Vint Int.zero) m)) m
+      Next (nextinstr (compare_int rs (Val.and rs#r1 (Vint (Int.repr n))) (Vint Int.zero))) m
   | Ptstimm X r1 n =>
-      Next (nextinstr (compare_long rs (Val.andl rs#r1 (Vlong (Int64.repr n))) (Vlong Int64.zero) m)) m
+      Next (nextinstr (compare_long rs (Val.andl rs#r1 (Vlong (Int64.repr n))) (Vlong Int64.zero))) m
   (** Move wide immediate *)
   | Pmovz W rd n pos =>
       Next (nextinstr (rs#rd <- (Vint (Int.repr (Z.shiftl n pos))))) m
@@ -850,22 +1007,22 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Psub X rd r1 r2 s =>
       Next (nextinstr (rs#rd <- (Val.subl rs###r1 (eval_shift_op_long rs#r2 s)))) m
   | Pcmp W r1 r2 s =>
-      Next (nextinstr (compare_int rs rs##r1 (eval_shift_op_int rs#r2 s) m)) m
+      Next (nextinstr (compare_int rs rs##r1 (eval_shift_op_int rs#r2 s))) m
   | Pcmp X r1 r2 s =>
-      Next (nextinstr (compare_long rs rs###r1 (eval_shift_op_long rs#r2 s) m)) m
+      Next (nextinstr (compare_long rs rs###r1 (eval_shift_op_long rs#r2 s))) m
   | Pcmn W r1 r2 s =>
-      Next (nextinstr (compare_int rs rs##r1 (Val.neg (eval_shift_op_int rs#r2 s)) m)) m
+      Next (nextinstr (compare_int rs rs##r1 (Val.neg (eval_shift_op_int rs#r2 s)))) m
   | Pcmn X r1 r2 s =>
-      Next (nextinstr (compare_long rs rs###r1 (Val.negl (eval_shift_op_long rs#r2 s)) m)) m
+      Next (nextinstr (compare_long rs rs###r1 (Val.negl (eval_shift_op_long rs#r2 s)))) m
   (** Integer arithmetic, extending register *)
   | Paddext rd r1 r2 x =>
       Next (nextinstr (rs#rd <- (Val.addl rs#r1 (eval_extend rs#r2 x)))) m
   | Psubext rd r1 r2 x =>
       Next (nextinstr (rs#rd <- (Val.subl rs#r1 (eval_extend rs#r2 x)))) m
   | Pcmpext r1 r2 x =>
-      Next (nextinstr (compare_long rs rs#r1 (eval_extend rs#r2 x) m)) m
+      Next (nextinstr (compare_long rs rs#r1 (eval_extend rs#r2 x))) m
   | Pcmnext r1 r2 x =>
-      Next (nextinstr (compare_long rs rs#r1 (Val.negl (eval_extend rs#r2 x)) m)) m
+      Next (nextinstr (compare_long rs rs#r1 (Val.negl (eval_extend rs#r2 x)))) m
   (** Logical, shifted register *)
   | Pand W rd r1 r2 s =>
       Next (nextinstr (rs#rd <- (Val.and rs##r1 (eval_shift_op_int rs#r2 s)))) m
@@ -892,9 +1049,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Porn X rd r1 r2 s =>
       Next (nextinstr (rs#rd <- (Val.orl rs###r1 (Val.notl (eval_shift_op_long rs#r2 s))))) m
   | Ptst W r1 r2 s =>
-      Next (nextinstr (compare_int rs (Val.and rs##r1 (eval_shift_op_int rs#r2 s)) (Vint Int.zero) m)) m
+      Next (nextinstr (compare_int rs (Val.and rs##r1 (eval_shift_op_int rs#r2 s)) (Vint Int.zero))) m
   | Ptst X r1 r2 s =>
-      Next (nextinstr (compare_long rs (Val.andl rs###r1 (eval_shift_op_long rs#r2 s)) (Vlong Int64.zero) m)) m
+      Next (nextinstr (compare_long rs (Val.andl rs###r1 (eval_shift_op_long rs#r2 s)) (Vlong Int64.zero))) m
   (** Variable shifts *)
   | Pasrv W rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.shr rs#r1 rs#r2))) m
@@ -966,10 +1123,16 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   (** Floating-point move *)
   | Pfmov rd r1 =>
       Next (nextinstr (rs#rd <- (rs#r1))) m
-  | Pfmovimms rd f =>
-      Next (nextinstr (rs#rd <- (Vsingle f))) m
+  | Pfmovimms rd f => 
+      if is_immediate_float32 f then
+        Next (nextinstr (rs#rd <- (Vsingle f))) m
+      else
+        Next (nextinstr ((rs#rd <- (Vsingle f))#X16 <- Vundef)) m
   | Pfmovimmd rd f =>
-      Next (nextinstr (rs#rd <- (Vfloat f))) m
+      if is_immediate_float64 f then
+        Next (nextinstr (rs#rd <- (Vfloat f))) m
+      else
+        Next (nextinstr ((rs#rd <- (Vfloat f))#X16 <- Vundef)) m
   | Pfmovi S rd r1 =>
       Next (nextinstr (rs#rd <- (float32_of_bits rs##r1))) m
   | Pfmovi D rd r1 =>
@@ -1094,104 +1257,52 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       | Vint n =>
           match list_nth_z tbl (Int.unsigned n) with
           | None => Stuck
-          | Some lbl => goto_label f lbl (rs#X16 <- Vundef #X17 <- Vundef) m
+          | Some lbl => goto_label f lbl (rs#X16 <- Vundef) m
           end
       | _ => Stuck
       end
   | Pbuiltin ef args res => Stuck    (**r treated specially below *)
+  (** loads and stores pairs int/int64 *)
+  | Pldpw rd1 rd2 chk1 chk2 a =>
+      exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+  | Pldpx rd1 rd2 chk1 chk2 a =>
+      exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+  | Pstpw rs1 rs2 chk1 chk2 a =>
+      exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+  | Pstpx rs1 rs2 chk1 chk2 a =>
+      exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+  (** loads and stores pairs floating-point *)
+  | Pldps rd1 rd2 chk1 chk2 a =>
+      exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+  | Pldpd rd1 rd2 chk1 chk2 a =>
+      exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+  | Pstps rs1 rs2 chk1 chk2 a =>
+      exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+  | Pstpd rs1 rs2 chk1 chk2 a =>
+      exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+  | Pnop => Next (nextinstr rs) m
   (** The following instructions and directives are not generated directly
       by Asmgen, so we do not model them. *)
-  | Pldp _ _ _
-  | Pstp _ _ _
   | Pcls _ _ _
   | Pclz _ _ _
   | Prev _ _ _
   | Prev16 _ _ _
+  | Prbit _ _ _
   | Pfsqrt _ _ _
   | Pfmadd _ _ _ _ _
   | Pfmsub _ _ _ _ _
   | Pfnmadd _ _ _ _ _
   | Pfnmsub _ _ _ _ _
-  | Pnop
+  | Pfmax _ _ _ _
+  | Pfmin _ _ _ _
   | Pcfi_adjust _ 
   | Pcfi_rel_offset _ =>
       Stuck
   end.
 
-(** Translation of the LTL/Linear/Mach view of machine registers
-  to the AArch64 view.  Note that no LTL register maps to [X16],
-  [X18], nor [X30].
-  [X18] is reserved as the platform register and never used by the
-  code generated by CompCert.
-  [X30] is used for the return address, and can also be used as temporary.
-  [X16] can be used as temporary. *)
-
-Definition preg_of (r: mreg) : preg :=
-  match r with
-  | R0 => X0   | R1 => X1   | R2 => X2   | R3 => X3
-  | R4 => X4   | R5 => X5   | R6 => X6   | R7 => X7
-  | R8 => X8   | R9 => X9   | R10 => X10 | R11 => X11
-  | R12 => X12 | R13 => X13 | R14 => X14 | R15 => X15
-  | R17 => X17 | R19 => X19
-  | R20 => X20 | R21 => X21 | R22 => X22 | R23 => X23
-  | R24 => X24 | R25 => X25 | R26 => X26 | R27 => X27
-  | R28 => X28 | R29 => X29
-  | F0 => D0   | F1 => D1   | F2 => D2   | F3 => D3
-  | F4 => D4   | F5 => D5   | F6 => D6   | F7 => D7
-  | F8 => D8   | F9 => D9   | F10 => D10 | F11 => D11
-  | F12 => D12 | F13 => D13 | F14 => D14 | F15 => D15
-  | F16 => D16 | F17 => D17 | F18 => D18 | F19 => D19
-  | F20 => D20 | F21 => D21 | F22 => D22 | F23 => D23
-  | F24 => D24 | F25 => D25 | F26 => D26 | F27 => D27
-  | F28 => D28 | F29 => D29 | F30 => D30 | F31 => D31
-  end.
-
-(** Undefine all registers except SP and callee-save registers *)
-
-Definition undef_caller_save_regs (rs: regset) : regset :=
-  fun r =>
-    if preg_eq r SP
-    || In_dec preg_eq r (List.map preg_of (List.filter is_callee_save all_mregs))
-    then rs r
-    else Vundef.
-
-(** Extract the values of the arguments of an external call.
-    We exploit the calling conventions from module [Conventions], except that
-    we use AArch64 registers instead of locations. *)
-
-Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
-  | extcall_arg_reg: forall r,
-      extcall_arg rs m (R r) (rs (preg_of r))
-  | extcall_arg_stack: forall ofs ty bofs v,
-      bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv (chunk_of_type ty) m
-                (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v ->
-      extcall_arg rs m (Locations.S Outgoing ofs ty) v.
-
-Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop :=
-  | extcall_arg_one: forall l v,
-      extcall_arg rs m l v ->
-      extcall_arg_pair rs m (One l) v
-  | extcall_arg_twolong: forall hi lo vhi vlo,
-      extcall_arg rs m hi vhi ->
-      extcall_arg rs m lo vlo ->
-      extcall_arg_pair rs m (Twolong hi lo) (Val.longofwords vhi vlo).
-
-Definition extcall_arguments
-    (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
-  list_forall2 (extcall_arg_pair rs m) (loc_arguments sg) args.
-
-Definition loc_external_result (sg: signature) : rpair preg :=
-  map_rpair preg_of (loc_result sg).
-
-(** Execution of the instruction at [rs#PC]. *)
-
-Inductive state: Type :=
-  | State: regset -> mem -> state.
-
 Inductive step: state -> trace -> state -> Prop :=
   | exec_step_internal:
-      forall b ofs f i rs m rs' m',
+      forall b ofs (f:function) i rs m rs' m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal f) ->
       find_instr (Ptrofs.unsigned ofs) f.(fn_code) = Some i ->
@@ -1206,7 +1317,7 @@ Inductive step: state -> trace -> state -> Prop :=
       external_call ef ge vargs m t vres m' ->
       rs' = nextinstr
               (set_res res vres
-                (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+                (undef_regs (DR X16 :: DR X30 :: map preg_of (destroyed_by_builtin ef)) rs)) ->
       step (State rs m) t (State rs' m')
   | exec_step_external:
       forall b ef args res rs m t rs' m',
@@ -1302,11 +1413,11 @@ Qed.
 
 Definition data_preg (r: preg) : bool :=
   match r with
-  | IR X16 => false
-  | IR X30 => false
-  | IR _ => true
-  | FR _ => true
+  | DR (IR X16) => false
+  | DR (IR X30) => false
+  | DR (IR _) => true
+  | DR (FR _) => true
   | CR _ => false
-  | SP => true
+  (* | SP => true; subsumed by IR (iregsp) *)
   | PC => false
   end.
diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v
new file mode 100644
index 00000000..c606002a
--- /dev/null
+++ b/aarch64/Asmblock.v
@@ -0,0 +1,1048 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           Justus Fasse       UGA, VERIMAG                   *)
+(*           Xavier Leroy       INRIA Paris-Rocquencourt       *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+
+(* Asmblock language for aarch64
+
+WORK IN PROGRESS: we want to define an Asmblock syntax, with an Asmblock semantics
+(e.g. we don't need the parallel semantics of Asmvliw)
+
+
+NOTE: this file is inspired from
+   - aarch64/Asm.v
+   - kvx/Asmvliw.v (only the Asmblock syntax)
+   - kvx/Asmblock.v
+*)
+
+
+(** Abstract syntax and semantics for AArch64 assembly language *)
+
+Require Import Coqlib Zbits Maps.
+Require Import AST Integers Floats.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Locations Conventions.
+Require Stacklayout.
+Require Import OptionMonad Asm.
+Require Export Asm.
+
+Local Open Scope option_monad_scope.
+
+Notation regset := Asm.regset.
+
+(** * Abstract syntax *)
+
+(* First task: splitting the big [instruction] type below into CFI and basic instructions.
+   Actually a finer splitting in order to regroup "similar" instructions could be much better for automation of the scheduler proof!
+   e.g. "similar" means identical "interface" w.r.t. pseudo-registers when translated to AbstractBB,
+   or with a "similar" semantics.
+
+   see example of loads below.
+*)
+
+(** Control Flow instructions
+
+*)
+Inductive cf_instruction : Type :=
+  | Pb (lbl: label)                                                   (**r branch *)
+  | Pbc (c: testcond) (lbl: label)                                    (**r conditional branch *)
+  | Pbl (id: ident) (sg: signature)                                   (**r jump to function and link *)
+  | Pbs (id: ident) (sg: signature)                                   (**r jump to function *)
+  | Pblr (r: ireg) (sg: signature)                                    (**r indirect jump and link *)
+  | Pbr (r: ireg) (sg: signature)                                     (**r indirect jump *)
+  | Pret (r: ireg)                                                    (**r return *)
+  | Pcbnz (sz: isize) (r: ireg) (lbl: label)                          (**r branch if not zero *)
+  | Pcbz (sz: isize) (r: ireg) (lbl: label)                           (**r branch if zero *)
+  | Ptbnz (sz: isize) (r: ireg) (n: int) (lbl: label)                 (**r branch if bit n is not zero *)
+  | Ptbz (sz: isize) (r: ireg) (n: int) (lbl: label)                  (**r branch if bit n is zero *)
+  (** Pseudo-instructions *)
+  | Pbtbl (r1: ireg) (tbl: list label)                                (**r N-way branch through a jump table *)
+  .
+
+(*
+A builtin is considered as a control-flow instruction, because it could emit a trace (cf. Machblock semantics).
+Here, we do not need to have builtins alone in basic-blocks (on the contrary to KVX bundles).
+*)
+
+Inductive control: Type :=
+  | PCtlFlow  (i: cf_instruction)
+  (** Pseudo-instructions *)
+  | Pbuiltin (ef: external_function)
+             (args: list (builtin_arg dreg)) (res: builtin_res dreg)  (**r built-in function (pseudo) *)
+  .
+
+(** Basic instructions *)
+
+(* Loads waiting for (rd: dreg) (a: addressing)
+ * XXX Use dreg because exec_load is defined in terms of it, thus allowing us to
+ * treat integer and floating point loads the same. *)
+Inductive load_rd_a: Type :=
+  (* Integer load *)
+  | Pldrw                                   (**r load int32 *)
+  | Pldrw_a                                 (**r load int32 as any32 *)
+  | Pldrx                                   (**r load int64 *)
+  | Pldrx_a                                 (**r load int64 as any64 *)
+  | Pldrb (sz: isize)                       (**r load int8, zero-extend *)
+  | Pldrsb (sz: isize)                      (**r load int8, sign-extend *)
+  | Pldrh (sz: isize)                       (**r load int16, zero-extend *)
+  | Pldrsh (sz: isize)                      (**r load int16, sign-extend *)
+  | Pldrzw                                  (**r load int32, zero-extend to int64 *)
+  | Pldrsw                                  (**r load int32, sign-extend to int64 *)
+  (* Floating-point load *)
+  | Pldrs                                   (**r load float32 (single precision) *)
+  | Pldrd                                   (**r load float64 (double precision) *)
+  | Pldrd_a                                 (**r load float64 as any64 *)
+  .
+
+Inductive load_rd1_rd2_a: Type :=
+  | Pldpw
+  | Pldpx
+  | Pldps
+  | Pldpd
+  .
+
+Inductive ld_instruction: Type :=
+  | PLd_rd_a (ld: load_rd_a) (rd: dreg) (a: addressing)
+  | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: dreg) (chk1 chk2: memory_chunk) (a: addressing)   (**r load two int64 *)
+  .
+
+Inductive store_rs_a : Type :=
+  (* Integer store *)
+  | Pstrw                                   (**r store int32 *)
+  | Pstrw_a                                 (**r store int32 as any32 *)
+  | Pstrx                                   (**r store int64 *)
+  | Pstrx_a                                 (**r store int64 as any64 *)
+  | Pstrb                                   (**r store int8 *)
+  | Pstrh                                   (**r store int16 *)
+  (* Floating-point store *)
+  | Pstrs                                   (**r store float32 *)
+  | Pstrd                                   (**r store float64 *)
+  | Pstrd_a                                 (**r store float64 as any64 *)
+  .
+
+Inductive store_rs1_rs2_a : Type :=
+  | Pstpw
+  | Pstpx
+  | Pstps
+  | Pstpd
+  .
+
+Inductive st_instruction : Type :=
+  | PSt_rs_a (st: store_rs_a) (rs: dreg) (a: addressing)
+  | Pstp (st: store_rs1_rs2_a) (rs1 rs2: dreg) (chk1 chk2: memory_chunk) (a: addressing)  (**r store two int64 *)
+  .
+
+Inductive arith_p : Type :=
+  (** PC-relative addressing *)
+  | Padrp (id: ident) (ofs: ptrofs)                        (**r set [rd] to high address of [id + ofs] *)
+  (** Move wide immediate *)
+  | Pmovz (sz: isize) (n: Z) (pos: Z)                      (**r move [n << pos] to [rd] *)
+  | Pmovn (sz: isize) (n: Z) (pos: Z)                      (**r move [NOT(n << pos)] to [rd] *)
+  (** Floating-point move *)
+  | Pfmovimms (f: float32)                                 (**r load float32 constant *)
+  | Pfmovimmd (f: float)                                   (**r load float64 constant *)
+.
+
+Inductive arith_comparison_p : Type :=
+  (** Floating-point comparison *)
+  | Pfcmp0 (sz: fsize)                                     (**r compare [r1] and [+0.0] *)
+  (** Integer arithmetic, immediate *)
+  | Pcmpimm (sz: isize) (n: Z)                        (**r compare *)
+  | Pcmnimm (sz: isize) (n: Z)                        (**r compare negative *)
+  (** Logical, immediate *)
+  | Ptstimm (sz: isize) (n: Z)                        (**r and, then set flags *)
+.
+
+Inductive arith_pp : Type :=
+  (** Move integer register *)
+  | Pmov
+  (** Move wide immediate *)
+  (* XXX: has to have the same register supplied both times *)
+  | Pmovk (sz: isize) (n: Z) (pos: Z)           (**r insert 16 bits of [n] at [pos] in rd *)
+  (** PC-relative addressing *)
+  | Paddadr (id: ident) (ofs: ptrofs)           (**r add the low address of [id + ofs] *)
+  (** Bit-field operations *)
+  | Psbfiz (sz: isize) (r: int) (s: Z)          (**r sign extend and shift left *)
+  | Psbfx (sz: isize) (r: int) (s: Z)           (**r shift right and sign extend *)
+  | Pubfiz (sz: isize) (r: int) (s: Z)          (**r zero extend and shift left *)
+  | Pubfx (sz: isize) (r: int) (s: Z)           (**r shift right and zero extend *)
+(* Bit operations are not used in the aarch64/asm semantics
+  (** Bit operations *)
+  | Pcls (sz: isize)                            (**r count leading sign bits *)
+  | Pclz (sz: isize)                            (**r count leading zero bits *)
+  | Prev (sz: isize)                            (**r reverse bytes *)
+  | Prev16 (sz: isize)                          (**r reverse bytes in each 16-bit word *)
+*)
+  (** Floating-point move *)
+  | Pfmov
+  (** Floating-point conversions *)
+  | Pfcvtds                                           (**r convert float32 to float64 *)
+  | Pfcvtsd                                           (**r convert float64 to float32 *)
+  (** Floating-point arithmetic *)
+  | Pfabs (sz: fsize)                                 (**r absolute value *)
+  | Pfneg (sz: fsize)                                 (**r negation *)
+  (* Pfsqrt is not used in the semantics of aarch64/asm
+  | Pfsqrt (sz: fsize)                                (**r square root *) *)
+  (** Floating-point conversions *)
+  | Pscvtf (fsz: fsize) (isz: isize)                  (**r convert signed int to float *)
+  | Pucvtf (fsz: fsize) (isz: isize)                  (**r convert unsigned int to float *)
+  | Pfcvtzs (isz: isize) (fsz: fsize)                 (**r convert float to signed int *)
+  | Pfcvtzu (isz: isize) (fsz: fsize)                 (**r convert float to unsigned int *)
+  (** Integer arithmetic, immediate *)
+  | Paddimm (sz: isize) (n: Z)                        (**r addition *)
+  | Psubimm (sz: isize) (n: Z)                        (**r subtraction *)
+.
+
+Inductive arith_comparison_r0r : Type :=
+  (** Integer arithmetic, shifted register *)
+  | Pcmp (is:isize) (s: shift_op)             (**r compare *)
+  | Pcmn (is:isize) (s: shift_op)             (**r compare negative *)
+  (** Logical, shifted register *)
+  | Ptst (is:isize) (s: shift_op)             (**r and, then set flags *)
+.
+
+Inductive arith_comparison_pp : Type :=
+  (** Integer arithmetic, extending register *)
+  | Pcmpext (x: extend_op)                      (**r int64-int32 cmp *)
+  | Pcmnext (x: extend_op)                      (**r int64-int32 cmn *)
+  (** Floating-point comparison *)
+  | Pfcmp (sz: fsize)                                 (**r compare [r1] and [r2] *)
+.
+
+Inductive arith_ppp : Type :=
+  (** Variable shifts *)
+  | Pasrv (sz: isize)                                 (**r arithmetic right shift *)
+  | Plslv (sz: isize)                                 (**r left shift *)
+  | Plsrv (sz: isize)                                 (**r logical right shift *)
+  | Prorv (sz: isize)                                 (**r rotate right *)
+  (** Integer multiply/divide *)
+  | Psmulh                                            (**r signed multiply high *)
+  | Pumulh                                            (**r unsigned multiply high *)
+  | Psdiv (sz: isize)                                 (**r signed division *)
+  | Pudiv (sz: isize)                                 (**r unsigned division *)
+  (** Integer arithmetic, extending register *)
+  | Paddext (x: extend_op)                            (**r int64-int32 add *)
+  | Psubext (x: extend_op)                            (**r int64-int32 sub *)
+  (** Floating-point arithmetic *)
+  | Pfadd (sz: fsize)                                (**r addition *)
+  | Pfdiv (sz: fsize)                                (**r division *)
+  | Pfmul (sz: fsize)                                (**r multiplication *)
+  | Pfsub (sz: fsize)                                (**r subtraction *)
+.
+
+Inductive arith_rr0r : Type :=
+  (** Integer arithmetic, shifted register *)
+  | Padd (sz:isize) (s: shift_op)                               (**r addition *)
+  | Psub (sz:isize) (s: shift_op)                               (**r subtraction *)
+  (** Logical, shifted register *)
+  | Pand (sz:isize) (s: shift_op)                               (**r and *)
+  | Pbic (sz:isize) (s: shift_op)                               (**r and-not *)
+  | Peon (sz:isize) (s: shift_op)                               (**r xor-not *)
+  | Peor (sz:isize) (s: shift_op)                               (**r xor *)
+  | Porr (sz:isize) (s: shift_op)                               (**r or *)
+  | Porn (sz:isize) (s: shift_op)                               (**r or-not *)
+.
+
+
+Inductive arith_rr0 : Type :=
+  (** Logical, immediate *)
+  | Pandimm (sz: isize) (n: Z)                     (**r and *)
+  | Peorimm (sz: isize) (n: Z)                     (**r xor *)
+  | Porrimm (sz: isize) (n: Z)                     (**r or *)
+.
+
+Inductive arith_arrrr0 : Type :=
+  (** Integer multiply/divide *)
+  | Pmadd (sz: isize)                               (**r multiply-add *)
+  | Pmsub (sz: isize)                               (**r multiply-sub *)
+.
+
+(* Currently not used by the semantics of aarch64/Asm
+ * Inductive arith_apppp : Type :=
+ *   (** Floating-point arithmetic *)
+ *   | Pfmadd (sz: fsize)                              (**r [rd = r3 + r1 * r2] *)
+ *   | Pfmsub (sz: fsize)                              (**r [rd = r3 - r1 * r2] *)
+ * .
+
+ * Inductive arith_aapppp : Type :=
+ *   (** Floating-point arithmetic *)
+ *   | Pfnmadd (sz: fsize)                             (**r [rd = - r3 - r1 * r2] *)
+ *   | Pfnmsub (sz: fsize)                             (**r [rd = - r3 + r1 * r2] *)
+ * . *)
+
+(* Notes on the naming scheme used here:
+ * R: ireg
+ * R0: ireg0
+ * Rsp: iregsp
+ * F: freg
+ * W/X: Occur in conjunction with R0, says whether an ireg0 should be evaluated
+ *      as W regsiter (32 bit) or X register (64 bit)
+ * S/D: Used for completeness sake. Only used for copying an integer register
+ *      to a floating point register. Could be removed.
+ * A: These instructions perform an additional arithmetic operation
+      XXX Does this interpretation match the use in kvx/Asmvliw?
+ * Comparison: For these instructions the first register is not the target.
+ *             Instead, the condition register is mutated.
+ *)
+Inductive ar_instruction : Type :=
+  | PArithP (i : arith_p) (rd : dreg)
+  | PArithPP (i : arith_pp) (rd rs : dreg)
+  | PArithPPP (i : arith_ppp) (rd r1 r2 : dreg)
+  | PArithRR0R (i : arith_rr0r) (rd : ireg) (r1 : ireg0) (r2 : ireg)
+  | PArithRR0 (i : arith_rr0) (rd : ireg) (r1 : ireg0)
+  | PArithARRRR0 (i : arith_arrrr0) (rd r1 r2 : ireg) (r3 : ireg0)
+  (* Pfmadd and Pfmsub are currently not used by the semantics of aarch64/Asm
+  | PArithAPPPP (i : arith_apppp) (rd r1 r2 r3 : preg) *)
+  (* Pfnmadd and Pfnmsub are currently not used by the semantics of aarch64/Asm
+  | PArithAAPPPP (i : arith_aapppp) (rd r1 r2 r3 : preg) *)
+  | PArithComparisonPP (i : arith_comparison_pp) (r1 r2 : dreg)
+  | PArithComparisonR0R (i : arith_comparison_r0r) (r1 : ireg0) (r2 : ireg)
+  | PArithComparisonP (i : arith_comparison_p) (r1 : dreg)
+  (* Instructions without indirection sine they would be the only ones *)
+  (* PArithCP: Pcsetm is commented out by aarch64/Asm, so Pcset is alone *)
+  | Pcset (rd : ireg) (c : testcond)                                  (**r set to 1/0 if cond is true/false *)
+  (* PArithFR0 *)
+  | Pfmovi (fsz : fsize) (rd : freg) (r1 : ireg0)                     (**r copy int reg to FP reg *)
+  (* PArithCPPP *)
+  | Pcsel (rd r1 r2 : dreg) (c : testcond)                            (**r int/float conditional move *)
+  (* PArithAFFF *)
+  | Pfnmul (fsz : fsize) (rd r1 r2 : freg)                            (**r multiply-negate *)
+.
+
+Inductive basic : Type :=
+  | PArith (i: ar_instruction)
+  | PLoad (ld: ld_instruction)
+  | PStore (st: st_instruction)
+  | Pallocframe (sz: Z) (linkofs: ptrofs)                             (**r allocate new stack frame *)
+  | Pfreeframe (sz: Z) (linkofs: ptrofs)                              (**r deallocate stack frame and restore previous frame *)
+  | Ploadsymbol (rd: ireg) (id: ident)                                (**r load the address of [id] *)
+  | Pcvtsw2x (rd: ireg) (r1: ireg)                                    (**r sign-extend 32-bit int to 64-bit *)
+  | Pcvtuw2x (rd: ireg) (r1: ireg)                                    (**r zero-extend 32-bit int to 64-bit *)
+  | Pcvtx2w (rd: ireg)                                                (**r retype a 64-bit int as a 32-bit int *)
+  | Pnop                                                              (**r no operation *)
+(* NOT USED IN THE SEMANTICS !
+  | Pcfi_adjust (ofs: int)                                            (**r .cfi_adjust debug directive *)
+  | Pcfi_rel_offset (ofs: int)                                        (**r .cfi_rel_offset debug directive *)
+*)
+.
+
+Coercion PCtlFlow: cf_instruction >-> control.
+Coercion PLoad: ld_instruction >-> basic.
+Coercion PStore : st_instruction >-> basic.
+Coercion PArithP: arith_p >-> Funclass.
+Coercion PArithPP: arith_pp >-> Funclass.
+Coercion PArithPPP: arith_ppp >-> Funclass.
+Coercion PArithRR0: arith_rr0 >-> Funclass.
+Coercion PArithRR0R: arith_rr0r >-> Funclass.
+Coercion PArithARRRR0: arith_arrrr0 >-> Funclass.
+Coercion PArithComparisonP: arith_comparison_p >-> Funclass.
+Coercion PArithComparisonPP: arith_comparison_pp >-> Funclass.
+Coercion PArithComparisonR0R: arith_comparison_r0r >-> Funclass.
+Coercion PArith: ar_instruction >-> basic.
+
+
+(* Not used in Coq, declared in ocaml directly in PostpassSchedulingOracle.ml
+Inductive instruction : Type :=
+  | PBasic    (i: basic)
+  | PControl  (i: control).
+
+Coercion PBasic:    basic >-> instruction.
+Coercion PControl:  control >-> instruction. *)
+
+(** * Definition of a bblock
+
+A bblock must contain at least one instruction.
+
+This choice simplifies the definition of [find_bblock] below:
+indeed, each address of a code block identifies at most one bblock.
+*)
+
+Definition non_empty_body (body: list basic): bool :=
+  match body with
+  | nil => false
+  | _ => true
+  end.
+
+Definition non_empty_exit (exit: option control): bool :=
+  match exit with
+  | None => false
+  | _ => true
+  end.
+
+Definition non_empty_bblockb (body: list basic) (exit: option control): bool := non_empty_body body || non_empty_exit exit.
+
+(** A bblock is well-formed if he contains at least one instruction. *)
+
+Record bblock := mk_bblock {
+  header: list label;
+  body: list basic;
+  exit: option control;
+  correct: Is_true (non_empty_bblockb body exit)
+}.
+
+(* FIXME? redundant with definition in Machblock *)
+Definition length_opt {A} (o: option A) : nat :=
+  match o with
+  | Some o => 1
+  | None => 0
+  end.
+
+Program Definition no_header (bb : bblock) := {| header := nil; body := body bb; exit := exit bb |}.
+Next Obligation.
+  destruct bb; cbn. assumption.
+Defined.
+
+Program Definition stick_header (h : list label) (bb : bblock) := {| header := h; body := body bb; exit := exit bb |}.
+Next Obligation.
+  destruct bb; cbn. assumption.
+Defined.
+
+(* This notion of size induces the notion of "valid" code address given by [find_bblock]
+
+   The result is in Z to be compatible with operations on PC.
+*)
+Definition size (b:bblock): Z := Z.of_nat (length (header b) + length (body b) + length_opt (exit b)).
+
+Definition bblocks := list bblock.
+
+Fixpoint size_blocks (l: bblocks): Z :=
+  match l with
+  | nil => 0
+  | b :: l =>
+     (size b) + (size_blocks l)
+  end
+  .
+
+Lemma to_nat_pos : forall z:Z, (Z.to_nat z > 0)%nat -> z > 0.
+Proof.
+  intros. destruct z; auto.
+  - contradict H. cbn. apply gt_irrefl.
+  - apply Zgt_pos_0.
+  - contradict H. cbn. apply gt_irrefl.
+Qed.
+
+Lemma size_positive (b:bblock): size b > 0.
+Proof.
+  unfold size. destruct b as [hd bdy ex cor]. cbn.
+  destruct ex; destruct bdy; try (apply to_nat_pos; rewrite Nat2Z.id; cbn; omega);
+  unfold non_empty_bblockb in cor; simpl in cor.
+  inversion cor.
+Qed.
+
+Record function : Type := mkfunction { fn_sig: signature; fn_blocks: bblocks }.
+Definition fundef := AST.fundef function.
+Definition program := AST.program fundef unit.
+
+(** * Operational semantics *)
+
+(** See "Parameters" of the same names in Asm.v *)
+Record aarch64_linker: Type := {
+  symbol_low: ident -> ptrofs -> val;
+  symbol_high: ident -> ptrofs -> val
+}.
+
+Definition genv := Genv.t fundef unit.
+
+Section RELSEM.
+
+Variable lk: aarch64_linker.
+Variable ge: genv.
+
+(** Evaluating an addressing mode *)
+
+Definition eval_addressing (a: addressing) (rs: regset): val :=
+  match a with
+  | ADimm base n => Val.addl rs#base (Vlong n)
+  | ADreg base r => Val.addl rs#base rs#r
+  | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n))
+  | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n))
+  | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n))
+  | ADadr base id ofs => Val.addl rs#base (symbol_low lk id ofs)
+  | ADpostincr base n => Vundef
+  end.
+
+(** Auxiliaries for memory accesses *)
+
+Definition exec_load_rd_a (chunk: memory_chunk) (transf: val -> val)
+                     (a: addressing) (r: dreg) (rs: regset) (m: mem) :=
+  SOME v <- Mem.loadv chunk m (eval_addressing a rs) IN
+  Next (rs#r <- (transf v)) m.
+
+Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
+                     (a: addressing) (rd1 rd2: dreg) (rs: regset) (m: mem) :=
+  if is_pair_addressing_mode_correct a then
+    let addr := (eval_addressing a rs) in
+    let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
+    let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
+        match Mem.loadv chk1 m addr with
+        | None => Stuck
+        | Some v1 =>
+            match Mem.loadv chk2 m addr' with
+            | None => Stuck
+            | Some v2 =>
+                Next ((rs#rd1 <- (transf v1))#rd2 <- (transf v2)) m
+            end
+        end
+  else Stuck.
+
+Definition exec_store_rs_a (chunk: memory_chunk)
+                      (a: addressing) (v: val)
+                      (rs: regset) (m: mem) :=
+  SOME m' <- Mem.storev chunk m (eval_addressing a rs) v IN
+  Next rs m'.
+
+Definition exec_store_double (chk1 chk2: memory_chunk)
+                      (a: addressing) (v1 v2: val)
+                      (rs: regset) (m: mem) :=
+  if is_pair_addressing_mode_correct a then
+    let addr := (eval_addressing a rs) in
+    let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
+    let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
+      match Mem.storev chk1 m addr v1 with
+      | None => Stuck
+      | Some m' => match Mem.storev chk2 m' addr' v2 with
+                   | None => Stuck
+                   | Some m'' => Next rs m''
+                   end
+      end
+  else Stuck.
+
+(** execution of loads
+*)
+
+Definition chunk_load (ld: load_rd_a): memory_chunk :=
+  match ld with
+  | Pldrw => Mint32
+  | Pldrw_a => Many32
+  | Pldrx => Mint64
+  | Pldrx_a => Many64
+  | Pldrb _ => Mint8unsigned
+  | Pldrsb _ => Mint8signed
+  | Pldrh _ => Mint16unsigned
+  | Pldrsh _ => Mint16signed
+  | Pldrzw => Mint32
+  | Pldrsw => Mint32
+  | Pldrs => Mfloat32
+  | Pldrd => Mfloat64
+  | Pldrd_a =>  Many64
+  end.
+
+Definition chunk_store (st: store_rs_a) : memory_chunk :=
+  match st with
+  | Pstrw => Mint32
+  | Pstrw_a => Many32
+  | Pstrx => Mint64
+  | Pstrx_a => Many64
+  | Pstrb => Mint8unsigned
+  | Pstrh => Mint16unsigned
+  | Pstrs => Mfloat32
+  | Pstrd => Mfloat64
+  | Pstrd_a => Many64
+  end.
+
+Definition interp_load (ld: load_rd_a): val -> val :=
+  match ld with
+  | Pldrb X => Val.longofintu
+  | Pldrsb X => Val.longofint
+  | Pldrh X => Val.longofintu
+  | Pldrsh X => Val.longofint
+  | Pldrzw => Val.longofintu
+  | Pldrsw => Val.longofint
+  (* Changed to exhaustive list because I tend to forgot all the places I need
+   * to check when changing things. *)
+  | Pldrb W | Pldrsb W | Pldrh W | Pldrsh W 
+  | Pldrw   | Pldrw_a | Pldrx 
+  | Pldrx_a | Pldrs   | Pldrd 
+  | Pldrd_a => fun v => v
+  end.
+
+Definition exec_load (ldi: ld_instruction) (rs: regset) (m: mem) :=
+  match ldi with
+  | PLd_rd_a ld rd a => exec_load_rd_a (chunk_load ld) (interp_load ld) a rd rs m
+  | Pldp ld rd1 rd2 chk1 chk2 a => exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+  end.
+
+Definition exec_store (sti: st_instruction) (rs: regset) (m: mem) :=
+  match sti with
+  | PSt_rs_a st rsr a => exec_store_rs_a (chunk_store st) a rs#rsr rs m
+  | Pstp st rs1 rs2 chk1 chk2 a => exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+  end.
+
+(** TODO: redundant w.r.t Machblock ?? *)
+Lemma in_dec (lbl: label) (l: list label):  { List.In lbl l } + { ~(List.In lbl l) }.
+Proof.
+  apply List.in_dec.
+  apply Pos.eq_dec.
+Qed.
+
+(** Note: copy-paste from Machblock *)
+Definition is_label (lbl: label) (bb: bblock) : bool :=
+  if in_dec lbl (header bb) then true else false.
+
+Lemma is_label_correct_true lbl bb:
+  List.In lbl (header bb) <-> is_label lbl bb = true.
+Proof.
+  unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition.
+Qed.
+
+Lemma is_label_correct_false lbl bb:
+  ~(List.In lbl (header bb)) <-> is_label lbl bb = false.
+Proof.
+  unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition.
+Qed.
+
+(** convert a label into a position in the code *)
+Fixpoint label_pos (lbl: label) (pos: Z) (lb: bblocks) {struct lb} : option Z :=
+  match lb with
+  | nil => None
+  | b :: lb' => if is_label lbl b then Some pos else label_pos lbl (pos + (size b)) lb'
+  end.
+
+Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) :=
+  SOME pos <- label_pos lbl 0 (fn_blocks f) IN
+  match rs#PC with
+  | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m
+  | _ => Stuck
+  end.
+
+(** Evaluating a branch
+
+Warning: PC is assumed to be already pointing on the next instruction !
+
+*)
+
+Definition eval_branch (f: function) (lbl: label) (rs: regset) (m: mem) (ores: option bool) :=
+  SOME res <- ores IN
+  if res then goto_label f lbl rs m else Next rs m.
+
+Definition  eval_neg_branch (f: function) (lbl: label) (rs: regset) (m: mem) (ores: option bool) :=
+  SOME res <- ores IN
+  if res then Next rs m else goto_label f lbl rs m.
+
+Definition exec_cfi (f: function) (cfi: cf_instruction) (rs: regset) (m: mem) : outcome :=
+  match cfi with
+  (** Branches *)
+  | Pb lbl =>
+      goto_label f lbl rs m
+  | Pbc cond lbl =>
+      eval_branch f lbl rs m (eval_testcond cond rs)
+  | Pbl id sg =>
+      Next (rs#RA <- (rs#PC) #PC <- (Genv.symbol_address ge id Ptrofs.zero)) m
+  | Pbs id sg =>
+      Next (rs#PC <- (Genv.symbol_address ge id Ptrofs.zero)) m
+  | Pblr r sg =>
+      Next (rs#RA <- (rs#PC) #PC <- (rs#r)) m
+  | Pbr r sg =>
+      Next (rs#PC <- (rs#r)) m
+  | Pret r =>
+      Next (rs#PC <- (rs#r)) m
+  | Pcbnz sz r lbl =>
+      eval_neg_branch f lbl rs m (eval_testzero sz rs#r)
+  | Pcbz sz r lbl =>
+      eval_branch f lbl rs m (eval_testzero sz rs#r)
+  | Ptbnz sz r n lbl =>
+      eval_branch f lbl rs m (eval_testbit sz rs#r n)
+  | Ptbz sz r n lbl =>
+      eval_neg_branch f lbl rs m (eval_testbit sz rs#r n)
+  (** Pseudo-instructions *)
+  | Pbtbl r tbl =>
+      match (rs#X16 <- Vundef)#r with
+      | Vint n =>
+          SOME lbl <- list_nth_z tbl (Int.unsigned n) IN
+          goto_label f lbl (rs#X16 <- Vundef) m
+      | _ => Stuck
+      end
+  end.
+
+Definition arith_eval_p (i : arith_p) : val :=
+  match i with
+  | Padrp id ofs => symbol_high lk id ofs
+  (** Move wide immediate *)
+  | Pmovz W n pos => Vint (Int.repr (Z.shiftl n pos))
+  | Pmovz X n pos => Vlong (Int64.repr (Z.shiftl n pos))
+  | Pmovn W n pos => Vint (Int.repr (Z.lnot (Z.shiftl n pos)))
+  | Pmovn X n pos => Vlong (Int64.repr (Z.lnot (Z.shiftl n pos)))
+  (** Floating-point move *)
+  | Pfmovimms f => Vsingle f
+  | Pfmovimmd f => Vfloat f
+  end.
+
+Definition destroy_X16 (i : arith_p) : bool :=
+  match i with
+  | Pfmovimms d => negb (is_immediate_float32 d)
+  | Pfmovimmd d => negb (is_immediate_float64 d)
+  | _ => false
+  end.
+
+Definition if_opt_bool_val (c: option bool) v1 v2: val :=
+   match c with
+   | Some true => v1
+   | Some false =>  v2
+   | None => Vundef
+   end.
+
+Definition arith_eval_pp i v :=
+  match i with
+  | Pmov => v
+  | Pmovk W n pos => insert_in_int v n pos 16
+  | Pmovk X n pos => insert_in_long v n pos 16
+  | Paddadr id ofs => Val.addl v (symbol_low lk id ofs)
+  | Psbfiz W r s => Val.shl (Val.sign_ext s v) (Vint r)
+  | Psbfiz X r s => Val.shll (Val.sign_ext_l s v) (Vint r)
+  | Psbfx W r s => Val.sign_ext s (Val.shr v (Vint r))
+  | Psbfx X r s => Val.sign_ext_l s (Val.shrl v (Vint r))
+  | Pubfiz W r s => Val.shl (Val.zero_ext s v) (Vint r)
+  | Pubfiz X r s => Val.shll (Val.zero_ext_l s v) (Vint r)
+  | Pubfx W r s => Val.zero_ext s (Val.shru v (Vint r))
+  | Pubfx X r s => Val.zero_ext_l s (Val.shrlu v (Vint r))
+  | Pfmov => v
+  | Pfcvtds => Val.floatofsingle v
+  | Pfcvtsd => Val.singleoffloat v
+  | Pfabs S => Val.absfs v
+  | Pfabs D => Val.absf v
+  | Pfneg S => Val.negfs v
+  | Pfneg D => Val.negf v
+  | Pfcvtzs W S => Val.maketotal (Val.intofsingle v)
+  | Pfcvtzs W D => Val.maketotal (Val.intoffloat v)
+  | Pfcvtzs X S => Val.maketotal (Val.longofsingle v)
+  | Pfcvtzs X D => Val.maketotal (Val.longoffloat v)
+  | Pfcvtzu W S => Val.maketotal (Val.intuofsingle v)
+  | Pfcvtzu W D => Val.maketotal (Val.intuoffloat v)
+  | Pfcvtzu X S => Val.maketotal (Val.longuofsingle v)
+  | Pfcvtzu X D => Val.maketotal (Val.longuoffloat v)
+  | Paddimm W n => Val.add v (Vint (Int.repr n))
+  | Paddimm X n => Val.addl v (Vlong (Int64.repr n))
+  | Psubimm W n => Val.sub v (Vint (Int.repr n))
+  | Psubimm X n => Val.subl v (Vlong (Int64.repr n))
+  | Pscvtf S W => Val.maketotal (Val.singleofint v)
+  | Pscvtf D W => Val.maketotal (Val.floatofint v)
+  | Pscvtf S X => Val.maketotal (Val.singleoflong v)
+  | Pscvtf D X => Val.maketotal (Val.floatoflong v)
+  | Pucvtf S W => Val.maketotal (Val.singleofintu v)
+  | Pucvtf D W => Val.maketotal (Val.floatofintu v)
+  | Pucvtf S X => Val.maketotal (Val.singleoflongu v)
+  | Pucvtf D X => Val.maketotal (Val.floatoflongu v)
+  end.
+
+Definition arith_eval_ppp i v1 v2 :=
+  match i with
+  | Pasrv W => Val.shr v1 v2
+  | Pasrv X => Val.shrl v1 v2
+  | Plslv W => Val.shl v1 v2
+  | Plslv X => Val.shll v1 v2
+  | Plsrv W => Val.shru v1 v2
+  | Plsrv X => Val.shrlu v1 v2
+  | Prorv W => Val.ror v1 v2
+  | Prorv X => Val.rorl v1 v2
+  | Psmulh => Val.mullhs v1 v2
+  | Pumulh => Val.mullhu v1 v2
+  | Psdiv W => Val.maketotal (Val.divs v1 v2)
+  | Psdiv X => Val.maketotal (Val.divls v1 v2)
+  | Pudiv W => Val.maketotal (Val.divu v1 v2)
+  | Pudiv X => Val.maketotal (Val.divlu v1 v2)
+  | Paddext x => Val.addl v1 (eval_extend v2 x)
+  | Psubext x => Val.subl v1 (eval_extend v2 x)
+  | Pfadd S => Val.addfs v1 v2
+  | Pfadd D => Val.addf v1 v2
+  | Pfdiv S => Val.divfs v1 v2
+  | Pfdiv D => Val.divf v1 v2
+  | Pfmul S => Val.mulfs v1 v2
+  | Pfmul D => Val.mulf v1 v2
+  | Pfsub S => Val.subfs v1 v2
+  | Pfsub D => Val.subf v1 v2
+  end.
+
+Definition arith_rr0r_isize (i: arith_rr0r) :=
+  match i with
+  | Padd is _ => is
+  | Psub is _ => is
+  | Pand is _ => is
+  | Pbic is _ => is
+  | Peon is _ => is
+  | Peor is _ => is
+  | Porr is _ => is
+  | Porn is _ => is
+  end.
+
+(* obtain v1 by [ir0 (arith_rr0r_isize i) rs s1] *)
+Definition arith_eval_rr0r i v1 v2 :=
+  match i with
+  | Padd W s => Val.add v1 (eval_shift_op_int v2 s)
+  | Padd X s => Val.addl v1 (eval_shift_op_long v2 s)
+  | Psub W s => Val.sub v1 (eval_shift_op_int v2 s)
+  | Psub X s => Val.subl v1 (eval_shift_op_long v2 s)
+  | Pand W s => Val.and v1 (eval_shift_op_int v2 s)
+  | Pand X s => Val.andl v1 (eval_shift_op_long v2 s)
+  | Pbic W s => Val.and v1 (Val.notint (eval_shift_op_int v2 s))
+  | Pbic X s => Val.andl v1 (Val.notl (eval_shift_op_long v2 s))
+  | Peon W s => Val.xor v1 (Val.notint (eval_shift_op_int v2 s))
+  | Peon X s => Val.xorl v1 (Val.notl (eval_shift_op_long v2 s))
+  | Peor W s => Val.xor v1 (eval_shift_op_int v2 s)
+  | Peor X s => Val.xorl v1 (eval_shift_op_long v2 s)
+  | Porr W s => Val.or v1 (eval_shift_op_int v2 s)
+  | Porr X s => Val.orl v1 (eval_shift_op_long v2 s)
+  | Porn W s => Val.or v1 (Val.notint (eval_shift_op_int v2 s))
+  | Porn X s => Val.orl v1 (Val.notl (eval_shift_op_long v2 s))
+  end.
+
+Definition arith_rr0_isize (i : arith_rr0) :=
+  match i with
+  | Pandimm is _ | Peorimm is _ | Porrimm is _ => is
+  end.
+
+(* obtain v by [ir0 (arith_rr0_isize i) rs s] *)
+Definition arith_eval_rr0 i v :=
+  match i with
+  | Pandimm W n => Val.and v (Vint (Int.repr n))
+  | Pandimm X n => Val.andl v (Vlong (Int64.repr n))
+  | Peorimm W n => Val.xor v (Vint (Int.repr n))
+  | Peorimm X n => Val.xorl v (Vlong (Int64.repr n))
+  | Porrimm W n => Val.or v (Vint (Int.repr n))
+  | Porrimm X n => Val.orl v (Vlong (Int64.repr n))
+  end.
+
+Definition arith_arrrr0_isize (i : arith_arrrr0) :=
+  match i with
+  | Pmadd is | Pmsub is => is
+  end.
+
+(* obtain v3 by [ir0 (arith_arrrr0_isize i) rs s3] *)
+Definition arith_eval_arrrr0 i v1 v2 v3 :=
+  match i with
+  | Pmadd W => Val.add v3 (Val.mul v1 v2)
+  | Pmadd X => Val.addl v3 (Val.mull v1 v2)
+  | Pmsub W => Val.sub v3 (Val.mul v1 v2)
+  | Pmsub X => Val.subl v3 (Val.mull v1 v2)
+  end.
+
+Definition arith_prepare_comparison_pp i (v1 v2 : val) :=
+  match i with
+  | Pcmpext x => (v1, (eval_extend v2 x))
+  | Pcmnext x => (v1, (Val.negl (eval_extend v2 x)))
+  | Pfcmp _ => (v1, v2)
+  end.
+
+Definition arith_comparison_r0r_isize i :=
+  match i with
+  | Pcmp is _ => is
+  | Pcmn is _ => is
+  | Ptst is _ => is
+  end.
+
+Definition arith_prepare_comparison_r0r i v1 v2 :=
+  match i with
+  | Pcmp W s => (v1, (eval_shift_op_int v2 s))
+  | Pcmp X s => (v1, (eval_shift_op_long v2 s))
+  | Pcmn W s => (v1, (Val.neg (eval_shift_op_int v2 s)))
+  | Pcmn X s => (v1, (Val.negl (eval_shift_op_long v2 s)))
+  | Ptst W s => ((Val.and v1 (eval_shift_op_int v2 s)), (Vint Int.zero))
+  | Ptst X s => ((Val.andl v1 (eval_shift_op_long v2 s)), (Vlong Int64.zero))
+  end.
+
+Definition arith_prepare_comparison_p i v :=
+  match i with
+  | Pcmpimm W n => (v, (Vint (Int.repr n)))
+  | Pcmpimm X n => (v, (Vlong (Int64.repr n)))
+  | Pcmnimm W n => (v, (Vint (Int.neg (Int.repr n))))
+  | Pcmnimm X n => (v, (Vlong (Int64.neg (Int64.repr n))))
+  | Ptstimm W n => ((Val.and v (Vint (Int.repr n))), (Vint Int.zero))
+  | Ptstimm X n => ((Val.andl v (Vlong (Int64.repr n))), (Vlong Int64.zero))
+  | Pfcmp0 S => (v, (Vsingle Float32.zero))
+  | Pfcmp0 D => (v, (Vfloat Float.zero))
+  end.
+
+Definition arith_comparison_pp_compare i :=
+  match i with
+  | Pcmpext _ | Pcmnext _ => compare_long
+  | Pfcmp S => compare_single
+  | Pfcmp D => compare_float
+  end.
+
+Definition arith_comparison_p_compare i :=
+   match i with
+   | Pcmpimm W _ | Pcmnimm W _ | Ptstimm W _ => compare_int
+   | Pcmpimm X _ | Pcmnimm X _ | Ptstimm X _ => compare_long
+   | Pfcmp0 S => compare_single
+   | Pfcmp0 D => compare_float
+   end.
+
+Definition exec_arith_instr (ai: ar_instruction) (rs: regset): regset :=
+  match ai with
+  | PArithP i d =>
+     let rs' := rs#d <- (arith_eval_p i) in
+     if destroy_X16 i then rs'#X16 <- Vundef else rs'
+  | PArithPP i d s => rs#d <- (arith_eval_pp i rs#s)
+  | PArithPPP i d s1 s2 => rs#d <- (arith_eval_ppp i rs#s1 rs#s2)
+
+  | PArithRR0R i d s1 s2 => rs#d <- (arith_eval_rr0r i (ir0 (arith_rr0r_isize i) rs s1) rs#s2)
+
+  | PArithRR0 i d s => rs#d <- (arith_eval_rr0 i (ir0 (arith_rr0_isize i) rs s))
+
+  | PArithARRRR0 i d s1 s2 s3 =>
+    rs#d <- (arith_eval_arrrr0 i rs#s1 rs#s2 (ir0 (arith_arrrr0_isize i) rs s3))
+
+  | PArithComparisonPP i s1 s2 =>
+    let (v1, v2) := arith_prepare_comparison_pp i rs#s1 rs#s2 in
+    arith_comparison_pp_compare i rs v1 v2
+  | PArithComparisonR0R i s1 s2 =>
+    let is := arith_comparison_r0r_isize i in
+    let (v1, v2) := arith_prepare_comparison_r0r i (ir0 is rs s1) rs#s2 in
+    (if is (* is W *) then compare_int else compare_long) rs v1 v2
+  | PArithComparisonP i s =>
+    let (v1, v2) := arith_prepare_comparison_p i rs#s in
+    arith_comparison_p_compare i rs v1 v2
+  | Pcset d c => rs#d <- (if_opt_bool_val (eval_testcond c rs) (Vint Int.one) (Vint Int.zero))
+  | Pfmovi S d s => rs#d <- (float32_of_bits rs##s)
+  | Pfmovi D d s => rs#d <- (float64_of_bits rs###s)
+  | Pcsel d s1 s2 c => rs#d <- (if_opt_bool_val (eval_testcond c rs) (rs#s1) (rs#s2))
+  | Pfnmul S d s1 s2 => rs#d <- (Val.negfs (Val.mulfs rs#s1 rs#s2))
+  | Pfnmul D d s1 s2 => rs#d <- (Val.negf (Val.mulf rs#s1 rs#s2))
+  end.
+
+(* basic exec *)
+Definition exec_basic (b: basic) (rs: regset) (m: mem): outcome :=
+  match b with
+  | PArith ai => Next (exec_arith_instr ai rs) m
+  | PLoad ldi => exec_load ldi rs m
+  | PStore sti => exec_store sti rs m
+  | Pallocframe sz pos =>
+      let (m1, stk) := Mem.alloc m 0 sz in
+      let sp := (Vptr stk Ptrofs.zero) in
+      SOME m2 <- Mem.storev Mint64 m1 (Val.offset_ptr sp pos) rs#SP IN
+      Next (rs #X29 <- (rs#SP) #SP <- sp #X16 <- Vundef) m2
+  | Pfreeframe sz pos =>
+      SOME v <- Mem.loadv Mint64 m (Val.offset_ptr rs#SP pos) IN
+      match rs#SP with
+      | Vptr stk ofs =>
+        SOME m' <- Mem.free m stk 0 sz IN
+        Next (rs#SP <- v #X16 <- Vundef) m'
+      | _ => Stuck
+      end
+  | Ploadsymbol rd id =>
+      Next (rs#rd <- (Genv.symbol_address ge id Ptrofs.zero)) m
+  | Pcvtsw2x rd r1 =>
+      Next (rs#rd <- (Val.longofint rs#r1)) m
+  | Pcvtuw2x rd r1 =>
+      Next (rs#rd <- (Val.longofintu rs#r1)) m
+  | Pcvtx2w rd =>
+      Next (rs#rd <- (Val.loword rs#rd)) m
+  | Pnop => Next rs m
+  end.
+
+(**  execution of the body of a bblock *)
+Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
+  match body with
+  | nil => Next rs m
+  | bi::body' =>
+     SOME o <- exec_basic bi rs m IN
+     exec_body body' (_rs o) (_m o)
+  end.
+
+Definition incrPC size_b (rs: regset) :=
+  rs#PC <- (Val.offset_ptr rs#PC size_b).
+
+Definition estep (f: function) oc size_b (rs: regset) (m: mem) :=
+  match oc with
+  | Some (PCtlFlow cfi) => exec_cfi f cfi (incrPC size_b rs) m
+  | Some (Pbuiltin ef args res) => Next (incrPC size_b rs) m
+  | None => Next (incrPC size_b rs) m
+  end.
+
+(**  execution of the exit instruction of a bblock *)
+Inductive exec_exit (f: function) size_b (rs: regset) (m: mem): (option control) -> trace -> regset -> mem -> Prop :=
+  | none_step:
+      exec_exit f size_b rs m None E0 (incrPC size_b rs) m
+  | cfi_step (cfi: cf_instruction) rs' m':
+      exec_cfi f cfi (incrPC size_b rs) m = Next rs' m' ->
+      exec_exit f size_b rs m (Some (PCtlFlow cfi)) E0 rs' m'
+  | builtin_step ef args res vargs t vres rs' m':
+      eval_builtin_args ge (fun (r: dreg) => rs r) rs#SP m args vargs ->
+      external_call ef ge vargs m t vres m' ->
+      rs' = incrPC size_b
+              (set_res (map_builtin_res DR res) vres
+                (undef_regs (DR (IR X16) :: DR (IR X30) :: map preg_of (destroyed_by_builtin ef)) rs)) ->
+      exec_exit f size_b rs m (Some (Pbuiltin ef args res)) t rs' m'
+      .
+
+(*Definition bbstep f cfi size_b bdy rs m :=*)
+  (*match exec_body bdy rs m with*)
+  (*| Some (State rs' m') => estep f cfi size_b rs' m'*)
+  (*| Stuck => Stuck*)
+  (*end.*)
+
+Definition bbstep f bb rs m :=
+  match exec_body (body bb) rs m with
+  | Some (State rs' m') => estep f (exit bb) (Ptrofs.repr (size bb)) rs' m'
+  | Stuck => Stuck
+  end.
+
+Definition exec_bblock (f: function) (b: bblock) (rs: regset) (m: mem) (t:trace) (rs':regset) (m':mem): Prop
+  := exists rs1 m1, exec_body (body b) rs m = Next rs1 m1 /\  exec_exit f (Ptrofs.repr (size b)) rs1 m1 (exit b) t rs' m'.
+
+Fixpoint find_bblock (pos: Z) (lb: bblocks) {struct lb} : option bblock :=
+  match lb with
+  | nil => None
+  | b :: il =>
+    if zlt pos 0 then None  (* NOTE: It is impossible to branch inside a block *)
+    else if zeq pos 0 then Some b
+    else find_bblock (pos - (size b)) il
+  end.
+
+(** Execution of the instruction at [rs PC]. *)
+
+Inductive step: state -> trace -> state -> Prop :=
+  | exec_step_internal:
+      forall b ofs f bi rs m t rs' m',
+      rs PC = Vptr b ofs ->
+      Genv.find_funct_ptr ge b = Some (Internal f) ->
+      find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bi ->
+      exec_bblock f bi rs m t rs' m' ->
+      step (State rs m) t (State rs' m')
+  | exec_step_external:
+      forall b ef args res rs m t rs' m',
+      rs PC = Vptr b Ptrofs.zero ->
+      Genv.find_funct_ptr ge b = Some (External ef) ->
+      external_call ef ge args m t res m' ->
+      extcall_arguments rs m (ef_sig ef) args ->
+      rs' = (set_pair (loc_external_result (ef_sig ef) ) res (undef_caller_save_regs rs))#PC <- (rs RA) ->
+      step (State rs m) t (State rs' m')
+  .
+
+
+End RELSEM.
+
+
+(** Execution of whole programs. *)
+
+Inductive initial_state (p: program): state -> Prop :=
+  | initial_state_intro: forall m0,
+      Genv.init_mem p = Some m0 ->
+      let ge := Genv.globalenv p in
+      let rs0 :=
+        (Pregmap.init Vundef)
+        # PC <- (Genv.symbol_address ge p.(prog_main) Ptrofs.zero)
+        # RA <- Vnullptr
+        # SP <- Vnullptr in
+      initial_state p (State rs0 m0).
+
+Inductive final_state: state -> int -> Prop :=
+  | final_state_intro: forall rs m r,
+      rs#PC = Vnullptr ->
+      rs#X0 = Vint r ->
+      final_state (State rs m) r.
+
+Definition semantics (lk: aarch64_linker) (p: program) :=
+  Semantics (step lk) (initial_state p) final_state (Genv.globalenv p).
diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v
new file mode 100644
index 00000000..5cd049c5
--- /dev/null
+++ b/aarch64/Asmblockdeps.v
@@ -0,0 +1,2688 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(** * Translation from [Asmblock] to [AbstractBB] *)
+
+(** We define a specific instance [L] of [AbstractBB] and translate [bblocks] from [Asmblock] into [L].
+    [AbstractBB] will then define a sequential semantics for [L].
+    We prove a bisimulation between the sequential semantics of [L] and [Asmblock].
+    Then, the checker on [Asmblock] is deduced from those of [L].
+ *)
+
+Require Import AST.
+Require Import Asm Asmblock.
+Require Import Asmblockprops.
+Require Import Values.
+Require Import Globalenvs.
+Require Import Memory.
+Require Import Errors.
+Require Import Integers.
+Require Import Floats.
+Require Import ZArith.
+Require Import Coqlib.
+Require Import ImpSimuTest.
+Require Import Axioms.
+Require Import Permutation.
+Require Import Events.
+
+Require Import Lia.
+
+Import ListNotations.
+Local Open Scope list_scope.
+
+Open Scope impure.
+(** auxiliary treatments of builtins *)
+
+Definition is_builtin(ex: option control): bool :=
+ match ex with
+ | Some (Pbuiltin _ _ _) => true
+ | _ => false
+ end.
+
+Definition has_builtin(bb: bblock): bool :=
+ is_builtin (exit bb).
+
+Remark builtin_arg_eq_dreg: forall (a b: builtin_arg dreg), {a=b} + {a<>b}.
+Proof.
+  intros.
+  apply (builtin_arg_eq dreg_eq).
+Qed.
+
+Remark builtin_res_eq_dreg: forall (a b: builtin_res dreg), {a=b} + {a<>b}.
+Proof. 
+  intros. 
+  apply (builtin_res_eq dreg_eq).
+Qed.
+
+Definition assert_same_builtin (bb1 bb2: bblock): ?? unit := 
+  match exit bb1 with
+  | Some (Pbuiltin ef1 lbar1 brr1) =>
+     match exit bb2 with
+     | Some (Pbuiltin ef2 lbar2 brr2) => 
+        if (external_function_eq ef1 ef2) then
+           if (list_eq_dec builtin_arg_eq_dreg lbar1 lbar2) then
+              if (builtin_res_eq_dreg brr1 brr2) then RET tt
+              else FAILWITH "Different brr in Pbuiltin"
+           else FAILWITH "Different lbar in Pbuiltin"
+        else FAILWITH "Different ef in Pbuiltin"
+     | _ =>  FAILWITH "Expected a builtin: found something else" (* XXX: on peut raffiner le message d'erreur si nécessaire *)
+     end
+  | _ => match exit bb2 with
+         | Some (Pbuiltin ef2 lbar2 brr2) => FAILWITH "Expected a something else: found a builtin"
+         | _ => RET tt (* ok *)
+         end
+  end.
+
+Lemma assert_same_builtin_correct (bb1 bb2: bblock):
+  WHEN assert_same_builtin bb1 bb2 ~> _ THEN
+    has_builtin bb1 = true \/ has_builtin bb2 = true -> exit bb1 = exit bb2.
+Proof.
+  unfold assert_same_builtin, has_builtin.
+  destruct (exit bb1) as [[]|]; simpl;
+  destruct (exit bb2) as [[]|]; wlp_simplify; try congruence.
+Qed.
+Global Opaque assert_same_builtin.
+Local Hint Resolve assert_same_builtin_correct: wlp.
+
+(** Definition of [L] *)
+
+Module P<: ImpParam.
+Module R := Pos.
+
+Section IMPPARAM.
+
+Definition env := Genv.t fundef unit.
+
+Record genv_wrap := { _genv: env; _fn: function; _lk: aarch64_linker }.
+Definition genv := genv_wrap.
+
+Variable Ge: genv.
+
+Inductive value_wrap :=
+  | Val (v: val)
+  | Memstate (m: mem)
+  | Bool (b: bool)
+.
+
+Definition value := value_wrap.
+
+Record CRflags := { _CN: val; _CZ:val; _CC: val; _CV: val }.
+
+Inductive control_op :=
+  | Ob (l: label)
+  | Obc (c: testcond) (l: label)
+  | Obl (id: ident)
+  | Obs (id: ident)
+  | Ocbnz (sz: isize) (l: label)
+  | Ocbz (sz: isize) (l: label)
+  | Otbnz (sz: isize) (n: int) (l: label)
+  | Otbz (sz: isize) (n: int) (l: label)
+  | Obtbl (l: list label)
+  | OError
+  | OIncremPC (sz: Z)
+.
+
+Inductive arith_op :=
+  | OArithP (n: arith_p)
+  | OArithPP (n: arith_pp)
+  | OArithPPP (n: arith_ppp)
+  | OArithRR0R (n: arith_rr0r)
+  | OArithRR0R_XZR (n: arith_rr0r) (vz: val)
+  | OArithRR0 (n: arith_rr0)
+  | OArithRR0_XZR (n: arith_rr0) (vz: val)
+  | OArithARRRR0 (n: arith_arrrr0)
+  | OArithARRRR0_XZR (n: arith_arrrr0) (vz: val)
+  | OArithComparisonPP_CN (n: arith_comparison_pp)
+  | OArithComparisonPP_CZ (n: arith_comparison_pp)
+  | OArithComparisonPP_CC (n: arith_comparison_pp)
+  | OArithComparisonPP_CV (n: arith_comparison_pp)
+  | OArithComparisonR0R_CN (n: arith_comparison_r0r) (is: isize)
+  | OArithComparisonR0R_CZ (n: arith_comparison_r0r) (is: isize)
+  | OArithComparisonR0R_CC (n: arith_comparison_r0r) (is: isize)
+  | OArithComparisonR0R_CV (n: arith_comparison_r0r) (is: isize)
+  | OArithComparisonR0R_CN_XZR (n: arith_comparison_r0r) (is: isize) (vz: val)
+  | OArithComparisonR0R_CZ_XZR (n: arith_comparison_r0r) (is: isize) (vz: val)
+  | OArithComparisonR0R_CC_XZR (n: arith_comparison_r0r) (is: isize) (vz: val)
+  | OArithComparisonR0R_CV_XZR (n: arith_comparison_r0r) (is: isize) (vz: val)
+  | OArithComparisonP_CN (n: arith_comparison_p)
+  | OArithComparisonP_CZ (n: arith_comparison_p)
+  | OArithComparisonP_CC (n: arith_comparison_p)
+  | OArithComparisonP_CV (n: arith_comparison_p)
+  | Ocset (c: testcond)
+  | Ofmovi (fsz: fsize)
+  | Ofmovi_XZR (fsz: fsize)
+  | Ocsel (c: testcond)
+  | Ofnmul (fsz: fsize)
+.
+
+Inductive store_op :=
+  | Ostore1 (st: store_rs_a) (chunk: memory_chunk) (a: addressing)
+  | Ostore2 (st: store_rs_a) (chunk: memory_chunk) (a: addressing)
+  | OstoreU (st: store_rs_a) (chunk: memory_chunk) (a: addressing)
+.
+
+Inductive load_op :=
+  | Oload1 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing)
+  | Oload2 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing)
+  | OloadU (ld: load_rd_a) (chunk: memory_chunk) (a: addressing)
+.
+
+Inductive allocf_op :=
+  | OAllocf_SP (sz: Z) (linkofs: ptrofs)
+  | OAllocf_Mem (sz: Z) (linkofs: ptrofs)
+.
+
+Inductive freef_op :=
+  | OFreef_SP (sz: Z) (linkofs: ptrofs)
+  | OFreef_Mem (sz: Z) (linkofs: ptrofs)
+.
+
+Inductive op_wrap :=
+  (* arithmetic operation *)
+  | Arith (op: arith_op)
+  | Load (ld: load_op)
+  | Store (st: store_op)
+  | Allocframe (al: allocf_op)
+  | Freeframe (fr: freef_op)
+  | Loadsymbol (id: ident)
+  | Cvtsw2x
+  | Cvtuw2x
+  | Cvtx2w
+  | Control (co: control_op)
+  | Constant (v: val)
+.
+
+Definition op:=op_wrap.
+
+Coercion Arith: arith_op >-> op_wrap.
+Coercion Control: control_op >-> op_wrap.
+
+Definition v_compare_int (v1 v2: val) : CRflags :=
+  {| _CN := (Val.negative (Val.sub v1 v2));
+     _CZ := (Val.mxcmpu Ceq v1 v2);
+     _CC := (Val.mxcmpu Cge v1 v2);
+     _CV := (Val.sub_overflow v1 v2) |}.
+
+Definition v_compare_long (v1 v2: val) : CRflags :=
+  {| _CN := (Val.negativel (Val.subl v1 v2));
+     _CZ := (Val.mxcmplu Ceq v1 v2);
+     _CC := (Val.mxcmplu Cge v1 v2);
+     _CV := (Val.subl_overflow v1 v2) |}.
+
+Definition v_compare_float (v1 v2: val) : CRflags :=
+  match v1, v2 with
+  | Vfloat f1, Vfloat f2 =>
+      {| _CN := (Val.of_bool (Float.cmp Clt f1 f2));
+         _CZ := (Val.of_bool (Float.cmp Ceq f1 f2));
+         _CC := (Val.of_bool (negb (Float.cmp Clt f1 f2)));
+         _CV := (Val.of_bool (negb (Float.ordered f1 f2))) |}
+  | _, _ =>
+      {| _CN := Vundef;
+         _CZ := Vundef;
+         _CC := Vundef;
+         _CV := Vundef |}
+  end.
+
+Definition v_compare_single (v1 v2: val) : CRflags :=
+  match v1, v2 with
+  | Vsingle f1, Vsingle f2 =>
+      {| _CN := (Val.of_bool (Float32.cmp Clt f1 f2));
+         _CZ := (Val.of_bool (Float32.cmp Ceq f1 f2));
+         _CC := (Val.of_bool (negb (Float32.cmp Clt f1 f2)));
+         _CV := (Val.of_bool (negb (Float32.ordered f1 f2))) |}
+  | _, _ =>
+      {| _CN := Vundef;
+         _CZ := Vundef;
+         _CC := Vundef;
+         _CV := Vundef |}
+  end.
+
+Definition arith_eval_comparison_pp (n: arith_comparison_pp) (v1 v2: val) :=
+  let (v1',v2') := arith_prepare_comparison_pp n v1 v2 in
+  match n with 
+  | Pcmpext _ | Pcmnext _ => v_compare_long v1' v2'
+  | Pfcmp S => v_compare_single v1' v2'
+  | Pfcmp D => v_compare_float v1' v2'
+  end.
+
+Definition arith_eval_comparison_p (n: arith_comparison_p) (v: val) :=
+  let (v1',v2') := arith_prepare_comparison_p n v in
+  match n with
+  | Pcmpimm W _ | Pcmnimm W _ | Ptstimm W _ => v_compare_int v1' v2'
+  | Pcmpimm X _ | Pcmnimm X _ | Ptstimm X _ => v_compare_long v1' v2'
+  | Pfcmp0 S => v_compare_single v1' v2'
+  | Pfcmp0 D => v_compare_float v1' v2'
+  end.
+
+Definition arith_eval_comparison_r0r (n: arith_comparison_r0r) (v1 v2: val) (is: isize) :=
+  let (v1',v2') := arith_prepare_comparison_r0r n v1 v2 in
+  if is then v_compare_int v1' v2' else v_compare_long v1' v2'.
+
+Definition flags_testcond_value (c: testcond) (vCN vCZ vCC vCV: val) :=
+  match c with
+  | TCeq =>                             (**r equal *)
+      match vCZ with
+      | Vint n => Some (Int.eq n Int.one)
+      | _ => None
+      end
+  | TCne =>                             (**r not equal *)
+      match vCZ with
+      | Vint n => Some (Int.eq n Int.zero)
+      | _ => None
+      end
+  | TClo =>                             (**r unsigned less than  *)
+      match vCC with
+      | Vint n => Some (Int.eq n Int.zero)
+      | _ => None
+      end
+  | TCls =>                             (**r unsigned less or equal *)
+      match vCC, vCZ with
+      | Vint c, Vint z => Some (Int.eq c Int.zero || Int.eq z Int.one)
+      | _, _ => None
+      end
+  | TChs =>                             (**r unsigned greater or equal *)
+      match vCC with
+      | Vint n => Some (Int.eq n Int.one)
+      | _ => None
+      end
+  | TChi =>                             (**r unsigned greater *)
+      match vCC, vCZ with
+      | Vint c, Vint z => Some (Int.eq c Int.one && Int.eq z Int.zero)
+      | _, _ => None
+      end
+  | TClt =>                             (**r signed less than *)
+      match vCV, vCN with
+      | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.one)
+      | _, _ => None
+      end
+  | TCle =>                             (**r signed less or equal *)
+      match vCV, vCN, vCZ with
+      | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.one || Int.eq z Int.one)
+      | _, _, _ => None
+      end
+  | TCge =>                             (**r signed greater or equal *)
+      match vCV, vCN with
+      | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.zero)
+      | _, _ => None
+      end
+  | TCgt =>                             (**r signed greater *)
+      match vCV, vCN, vCZ with
+      | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.zero && Int.eq z Int.zero)
+      | _, _, _ => None
+      end
+  | TCpl =>                             (**r positive *)
+      match vCN with
+      | Vint n => Some (Int.eq n Int.zero)
+      | _ => None
+      end
+  | TCmi =>                             (**r negative *)
+      match vCN with
+      | Vint n => Some (Int.eq n Int.one)
+      | _ => None
+      end
+  end.
+
+(* The is argument is used to identify the source inst and avoid rewriting some code
+  0 -> Ocset
+  1 -> Ocsel
+  2 -> Obc *)
+Definition cond_eval_is (c: testcond) (v1 v2 vCN vCZ vCC vCV: val) (is: Z) :=
+  let res := flags_testcond_value c vCN vCZ vCC vCV in
+  match is, res with
+  | 0, res => Some (Val (if_opt_bool_val res (Vint Int.one) (Vint Int.zero)))
+  | 1, res => Some (Val (if_opt_bool_val res v1 v2))
+  | 2, Some b => Some (Bool (b))
+  | _, _ => None
+  end.
+
+Definition fmovi_eval (fsz: fsize) (v: val) :=
+  match fsz with
+  | S => float32_of_bits v
+  | D => float64_of_bits v
+  end.
+
+Definition fmovi_eval_xzr (fsz: fsize) :=
+  match fsz with
+  | S => float32_of_bits (Vint Int.zero)
+  | D => float64_of_bits (Vlong Int64.zero)
+  end.
+
+Definition fnmul_eval (fsz: fsize) (v1 v2: val) :=
+  match fsz with
+  | S => Val.negfs (Val.mulfs v1 v2)
+  | D => Val.negf (Val.mulf v1 v2)
+  end.
+
+Definition cflags_eval (c: testcond) (l: list value) (v1 v2: val) (is: Z) :=
+  match c, l with
+  | TCeq, [Val vCZ] => cond_eval_is TCeq v1 v2 Vundef vCZ Vundef Vundef is
+  | TCne, [Val vCZ] => cond_eval_is TCne v1 v2 Vundef vCZ Vundef Vundef is
+  | TChs, [Val vCC] => cond_eval_is TChs v1 v2 Vundef Vundef vCC Vundef is
+  | TClo, [Val vCC] => cond_eval_is TClo v1 v2 Vundef Vundef vCC Vundef is
+  | TCmi, [Val vCN] => cond_eval_is TCmi v1 v2 vCN Vundef Vundef Vundef is
+  | TCpl, [Val vCN] => cond_eval_is TCpl v1 v2 vCN Vundef Vundef Vundef is
+  | TChi, [Val vCZ; Val vCC] => cond_eval_is TChi v1 v2 Vundef vCZ vCC Vundef is
+  | TCls, [Val vCZ; Val vCC] => cond_eval_is TCls v1 v2 Vundef vCZ vCC Vundef is
+  | TCge, [Val vCN; Val vCV] => cond_eval_is TCge v1 v2 vCN Vundef Vundef vCV is
+  | TClt, [Val vCN; Val vCV] => cond_eval_is TClt v1 v2 vCN Vundef Vundef vCV is
+  | TCgt, [Val vCN; Val vCZ; Val vCV] => cond_eval_is TCgt v1 v2 vCN vCZ Vundef vCV is
+  | TCle, [Val vCN; Val vCZ; Val vCV] => cond_eval_is TCle v1 v2 vCN vCZ Vundef vCV is
+  | _, _ => None
+  end.
+
+Definition arith_op_eval (op: arith_op) (l: list value) :=
+  match op, l with
+  | OArithP n, [] => Some (Val (arith_eval_p Ge.(_lk) n))
+  | OArithPP n, [Val v] => Some (Val (arith_eval_pp Ge.(_lk) n v))
+  | OArithPPP n, [Val v1; Val v2] => Some (Val (arith_eval_ppp n v1 v2))
+  | OArithRR0R n, [Val v1; Val v2] => Some (Val (arith_eval_rr0r n v1 v2))
+  | OArithRR0R_XZR n vz, [Val v] => Some (Val (arith_eval_rr0r n vz v))
+  | OArithRR0 n, [Val v] => Some (Val (arith_eval_rr0 n v))
+  | OArithRR0_XZR n vz, [] => Some (Val (arith_eval_rr0 n vz))
+  | OArithARRRR0 n, [Val v1; Val v2; Val v3] => Some (Val (arith_eval_arrrr0 n v1 v2 v3))
+  | OArithARRRR0_XZR n vz, [Val v1; Val v2] => Some (Val (arith_eval_arrrr0 n v1 v2 vz))
+  | OArithComparisonPP_CN n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CN)))
+  | OArithComparisonPP_CZ n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CZ)))
+  | OArithComparisonPP_CC n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CC)))
+  | OArithComparisonPP_CV n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CV)))
+  | OArithComparisonR0R_CN n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CN)))
+  | OArithComparisonR0R_CZ n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CZ)))
+  | OArithComparisonR0R_CC n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CC)))
+  | OArithComparisonR0R_CV n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CV)))
+  | OArithComparisonR0R_CN_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CN)))
+  | OArithComparisonR0R_CZ_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CZ)))
+  | OArithComparisonR0R_CC_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CC)))
+  | OArithComparisonR0R_CV_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CV)))
+  | OArithComparisonP_CN n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CN)))
+  | OArithComparisonP_CZ n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CZ)))
+  | OArithComparisonP_CC n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CC)))
+  | OArithComparisonP_CV n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CV)))
+  | Ocset c, l => cflags_eval c l Vundef Vundef 0
+  | Ofmovi fsz, [Val v] => Some (Val (fmovi_eval fsz v))
+  | Ofmovi_XZR fsz, [] => Some (Val (fmovi_eval_xzr fsz))
+  | Ocsel c, Val v1 :: Val v2 :: l' => cflags_eval c l' v1 v2 1
+  | Ofnmul fsz, [Val v1; Val v2] => Some (Val (fnmul_eval fsz v1 v2))
+  | _, _ => None
+  end.
+
+Definition call_ll_storev (c: memory_chunk) (m: mem) (v: option val) (vs: val) :=
+  match v with
+  | Some va => match Mem.storev c m va vs with
+               | Some m' => Some (Memstate m')
+               | None => None
+               end
+  | None => None (* should never occurs *)
+  end.
+
+Definition exec_store1 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs: val) :=
+  let v :=
+    match a with
+    | ADimm _ n => Some (Val.addl vs (Vlong n))
+    | ADadr _ id ofs => Some (Val.addl vs (symbol_low Ge.(_lk) id ofs))
+    | _ => None
+    end in
+  call_ll_storev chunk m v vr.
+
+Definition exec_store2 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs1 vs2: val) :=
+  let v :=
+    match a with
+    | ADreg _ _ => Some (Val.addl vs1 vs2)
+    | ADlsl _ _ n => Some (Val.addl vs1 (Val.shll vs2 (Vint n)))
+    | ADsxt _ _ n => Some (Val.addl vs1 (Val.shll (Val.longofint vs2) (Vint n)))
+    | ADuxt _ _ n => Some (Val.addl vs1 (Val.shll (Val.longofintu vs2) (Vint n)))
+    | _ => None
+    end in
+  call_ll_storev chunk m v vr.
+  
+Definition exec_storeU (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr: val) :=
+  call_ll_storev chunk m None vr.
+
+Definition goto_label_deps (f: function) (lbl: label) (vpc: val) :=
+  match label_pos lbl 0 (fn_blocks f) with
+  | None => None
+  | Some pos =>
+      match vpc with
+      | Vptr b ofs => Some (Val (Vptr b (Ptrofs.repr pos)))
+      | _          => None
+      end
+  end.
+
+Definition control_eval (o: control_op) (l: list value) :=
+  let (ge, fn, lk) := Ge in
+  match o, l with
+  | Ob lbl, [Val vpc] => goto_label_deps fn lbl vpc
+  | Obc c lbl, Val vpc :: l' => match cflags_eval c l' Vundef Vundef 2 with
+                                | Some (Bool true) => goto_label_deps fn lbl vpc
+                                | Some (Bool false) => Some (Val vpc)
+                                | _ => None
+                                end
+  | Obl id, [] => Some (Val (Genv.symbol_address Ge.(_genv) id Ptrofs.zero))
+  | Obs id, [] => Some (Val (Genv.symbol_address Ge.(_genv) id Ptrofs.zero))
+  | Ocbnz sz lbl, [Val v; Val vpc] => match eval_testzero sz v with
+                                      | Some (true) => Some (Val vpc)
+                                      | Some (false) => goto_label_deps fn lbl vpc
+                                      | None => None
+                                      end
+  | Ocbz sz lbl, [Val v; Val vpc] => match eval_testzero sz v with
+                                      | Some (true) => goto_label_deps fn lbl vpc
+                                      | Some (false) => Some (Val vpc)
+                                      | None => None
+                                     end
+  | Otbnz sz n lbl, [Val v; Val vpc] => match eval_testbit sz v n with
+                                        | Some (true) => goto_label_deps fn lbl vpc
+                                        | Some (false) => Some (Val vpc)
+                                        | None => None
+                                       end
+  | Otbz sz n lbl, [Val v; Val vpc] => match eval_testbit sz v n with
+                                        | Some (true) => Some (Val vpc)
+                                        | Some (false) => goto_label_deps fn lbl vpc
+                                        | None => None
+                                       end
+  | Obtbl tbl, [Val index; Val vpc] => match index with
+                                       | Vint n => 
+                                         match list_nth_z tbl (Int.unsigned n) with
+                                         | None => None
+                                         | Some lbl => goto_label_deps fn lbl vpc
+                                         end
+                                       | _ => None
+                                       end
+  | OIncremPC sz, [Val vpc] => Some (Val (Val.offset_ptr vpc (Ptrofs.repr sz)))
+  | OError, _ => None
+  | _, _ => None
+  end.
+
+Definition store_eval (o: store_op) (l: list value) :=
+  match o, l with
+  | Ostore1 st chunk a, [Val vr; Val vs; Memstate m] => exec_store1 st m chunk a vr vs
+  | Ostore2 st chunk a, [Val vr; Val vs1; Val vs2; Memstate m] => exec_store2 st m chunk a vr vs1 vs2
+  | OstoreU st chunk a, [Val vr; Memstate m] => exec_storeU st m chunk a vr
+  | _, _ => None
+  end.
+
+Definition call_ll_loadv (c: memory_chunk) (transf: val -> val) (m: mem) (v: option val) :=
+  match v with
+  | Some va => match Mem.loadv c m va with
+               | Some v' => Some (Val (transf v'))
+               | None => None
+               end
+  | None => None (* should never occurs *)
+  end.
+
+Definition exec_load1 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl: val) :=
+  let v :=
+    match a with
+    | ADimm _ n => Some (Val.addl vl (Vlong n))
+    | ADadr _ id ofs => Some (Val.addl vl (symbol_low Ge.(_lk) id ofs))
+    | _ => None
+    end in
+  call_ll_loadv chunk (interp_load ld) m v.
+
+Definition exec_load2 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl1 vl2: val) :=
+  let v :=
+    match a with
+    | ADreg _ _ => Some (Val.addl vl1 vl2)
+    | ADlsl _ _ n => Some (Val.addl vl1 (Val.shll vl2 (Vint n)))
+    | ADsxt _ _ n => Some (Val.addl vl1 (Val.shll (Val.longofint vl2) (Vint n)))
+    | ADuxt _ _ n => Some (Val.addl vl1 (Val.shll (Val.longofintu vl2) (Vint n)))
+    | _ => None
+    end in
+  call_ll_loadv chunk (interp_load ld) m v.
+  
+Definition exec_loadU (n: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) :=
+  call_ll_loadv chunk (interp_load n) m None.
+
+Definition load_eval (o: load_op) (l: list value) :=
+  match o, l with
+  | Oload1 ld chunk a, [Val vs; Memstate m] => exec_load1 ld m chunk a vs
+  | Oload2 ld chunk a, [Val vs1; Val vs2; Memstate m] => exec_load2 ld m chunk a vs1 vs2
+  | OloadU st chunk a, [Memstate m] => exec_loadU st m chunk a
+  | _, _ => None
+  end.
+
+Definition eval_allocf (o: allocf_op) (l: list value) :=
+  match o, l with
+  | OAllocf_Mem sz linkofs, [Val spv; Memstate m] =>
+      let (m1, stk) := Mem.alloc m 0 sz in
+      let sp := (Vptr stk Ptrofs.zero) in
+      call_ll_storev Mint64 m1 (Some (Val.offset_ptr sp linkofs)) spv
+  | OAllocf_SP sz linkofs, [Val spv; Memstate m] =>
+      let (m1, stk) := Mem.alloc m 0 sz in
+      let sp := (Vptr stk Ptrofs.zero) in
+      match call_ll_storev Mint64 m1 (Some (Val.offset_ptr sp linkofs)) spv with
+      | None => None
+      | Some ms => Some (Val sp)
+      end
+  | _, _ => None
+  end.
+
+Definition eval_freef (o: freef_op) (l: list value) :=
+  match o, l with
+  | OFreef_Mem sz linkofs, [Val spv; Memstate m] =>
+      match call_ll_loadv Mint64 (fun v => v) m (Some (Val.offset_ptr spv linkofs)) with
+      | None => None
+      | Some v =>
+          match spv with
+          | Vptr stk ofs =>
+              match Mem.free m stk 0 sz with
+              | None => None
+              | Some m' => Some (Memstate m')
+              end
+          | _ => None
+          end
+      end
+  | OFreef_SP sz linkofs, [Val spv; Memstate m] =>
+      match call_ll_loadv Mint64 (fun v => v) m (Some (Val.offset_ptr spv linkofs)) with
+      | None => None
+      | Some v =>
+          match spv with
+          | Vptr stk ofs =>
+              match Mem.free m stk 0 sz with
+              | None => None
+              | Some m' => Some (v)
+              end
+          | _ => None
+          end
+      end
+  | _, _ => None
+  end.
+
+Definition op_eval (op: op) (l:list value) :=
+  match op, l with
+  | Arith op, l => arith_op_eval op l
+  | Load o, l => load_eval o l
+  | Store o, l => store_eval o l
+  | Allocframe o, l => eval_allocf o l
+  | Freeframe o, l => eval_freef o l
+  | Loadsymbol id, [] => Some (Val (Genv.symbol_address Ge.(_genv) id Ptrofs.zero))
+  | Cvtsw2x, [Val v] => Some (Val (Val.longofint v))
+  | Cvtuw2x, [Val v] => Some (Val (Val.longofintu v))
+  | Cvtx2w, [Val v] => Some (Val (Val.loword v))
+  | Control o, l => control_eval o l
+  | Constant v, [] => Some (Val v)
+  | _, _ => None
+  end.
+
+Definition vz_eq (vz1 vz2: val) : ?? bool :=
+  RET (match vz1 with
+       | Vint i1 => match vz2 with
+                    | Vint i2 => Int.eq i1 i2
+                    | _ => false
+                    end
+       | Vlong l1 => match vz2 with
+                     | Vlong l2 => Int64.eq l1 l2
+                     | _ => false
+                     end
+       | _ => false
+       end).
+
+Lemma vz_eq_correct vz1 vz2:
+  WHEN vz_eq vz1 vz2 ~> b THEN b = true -> vz1 = vz2.
+Proof.
+  wlp_simplify.
+  destruct vz1; destruct vz2; trivial; try discriminate.
+  - eapply f_equal; apply Int.same_if_eq; auto.
+  - eapply f_equal. apply Int64.same_if_eq; auto.
+Qed.
+Hint Resolve vz_eq_correct: wlp.
+
+Definition is_eq (is1 is2: isize) : ?? bool :=
+  RET (match is1 with
+       | W => match is2 with
+              | W => true
+              | _ => false
+              end
+       | X => match is2 with
+              | X => true
+              | _ => false
+              end
+       end).
+
+Lemma is_eq_correct is1 is2:
+  WHEN is_eq is1 is2 ~> b THEN b = true -> is1 = is2.
+Proof.
+  wlp_simplify; destruct is1; destruct is2; trivial; try discriminate.
+Qed.
+Hint Resolve is_eq_correct: wlp.
+
+Definition arith_op_eq (o1 o2: arith_op): ?? bool :=
+  match o1 with
+  | OArithP n1 =>
+      match o2 with OArithP n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithPP n1 =>
+      match o2 with OArithPP n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithPPP n1 =>
+      match o2 with OArithPPP n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithRR0R n1 =>
+      match o2 with OArithRR0R n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithRR0R_XZR n1 vz1 =>
+      match o2 with OArithRR0R_XZR n2 vz2 => iandb (phys_eq n1 n2) (vz_eq vz1 vz2) | _ => RET false end
+  | OArithRR0 n1 =>
+      match o2 with OArithRR0 n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithRR0_XZR n1 vz1 =>
+      match o2 with OArithRR0_XZR n2 vz2 => iandb (phys_eq n1 n2) (vz_eq vz1 vz2) | _ => RET false end
+  | OArithARRRR0 n1 =>
+      match o2 with OArithARRRR0 n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithARRRR0_XZR n1 vz1 =>
+      match o2 with OArithARRRR0_XZR n2 vz2 => iandb (phys_eq n1 n2) (vz_eq vz1 vz2) | _ => RET false end
+  | OArithComparisonPP_CN n1 =>
+      match o2 with OArithComparisonPP_CN n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithComparisonPP_CZ n1 =>
+      match o2 with OArithComparisonPP_CZ n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithComparisonPP_CC n1 =>
+      match o2 with OArithComparisonPP_CC n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithComparisonPP_CV n1 =>
+      match o2 with OArithComparisonPP_CV n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithComparisonR0R_CN n1 is1 =>
+      match o2 with OArithComparisonR0R_CN n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end
+  | OArithComparisonR0R_CZ n1 is1 =>
+      match o2 with OArithComparisonR0R_CZ n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end
+  | OArithComparisonR0R_CC n1 is1 =>
+      match o2 with OArithComparisonR0R_CC n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end
+  | OArithComparisonR0R_CV n1 is1 =>
+      match o2 with OArithComparisonR0R_CV n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end
+  | OArithComparisonR0R_CN_XZR n1 is1 vz1 =>
+      match o2 with OArithComparisonR0R_CN_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end
+  | OArithComparisonR0R_CZ_XZR n1 is1 vz1 =>
+      match o2 with OArithComparisonR0R_CZ_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end
+  | OArithComparisonR0R_CC_XZR n1 is1 vz1 =>
+      match o2 with OArithComparisonR0R_CC_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end
+  | OArithComparisonR0R_CV_XZR n1 is1 vz1 =>
+      match o2 with OArithComparisonR0R_CV_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end
+  | OArithComparisonP_CN n1 =>
+      match o2 with OArithComparisonP_CN n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithComparisonP_CZ n1 =>
+      match o2 with OArithComparisonP_CZ n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithComparisonP_CC n1 =>
+      match o2 with OArithComparisonP_CC n2 => phys_eq n1 n2 | _ => RET false end
+  | OArithComparisonP_CV n1 =>
+      match o2 with OArithComparisonP_CV n2 => phys_eq n1 n2 | _ => RET false end
+  | Ocset c1 =>
+      match o2 with Ocset c2 => struct_eq c1 c2 | _ => RET false end
+  | Ofmovi fsz1 =>
+      match o2 with Ofmovi fsz2 => phys_eq fsz1 fsz2 | _ => RET false end
+  | Ofmovi_XZR fsz1 =>
+      match o2 with Ofmovi_XZR fsz2 => phys_eq fsz1 fsz2 | _ => RET false end
+  | Ocsel c1 =>
+      match o2 with Ocsel c2 => struct_eq c1 c2 | _ => RET false end
+  | Ofnmul fsz1 =>
+      match o2 with Ofnmul fsz2 => phys_eq fsz1 fsz2 | _ => RET false end
+  end.
+
+Ltac my_wlp_simplify := wlp_xsimplify ltac:(intros; subst; simpl in * |- *; congruence || intuition eauto with wlp).
+
+Lemma arith_op_eq_correct o1 o2:
+  WHEN arith_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
+Proof.
+  destruct o1, o2; my_wlp_simplify; try congruence;
+  try (destruct vz; destruct vz0); try (destruct is; destruct is0);
+  repeat apply f_equal; try congruence;
+  try apply Int.same_if_eq; try apply Int64.same_if_eq; try auto.
+Qed.
+Hint Resolve arith_op_eq_correct: wlp.
+Opaque arith_op_eq_correct.
+
+Definition control_op_eq (c1 c2: control_op): ?? bool :=
+  match c1 with
+  | Ob lbl1 =>
+     match c2 with Ob lbl2 => phys_eq lbl1 lbl2 | _ => RET false end
+  | Obc c1 lbl1 =>
+     match c2 with Obc c2 lbl2 => iandb (struct_eq c1 c2) (phys_eq lbl1 lbl2) | _ => RET false end
+  | Obl id1 =>
+     match c2 with Obl id2 => phys_eq id1 id2 | _ => RET false end
+  | Obs id1 =>
+     match c2 with Obs id2 => phys_eq id1 id2 | _ => RET false end
+  | Ocbnz sz1 lbl1 =>
+     match c2 with Ocbnz sz2 lbl2 => iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2) | _ => RET false end
+  | Ocbz sz1 lbl1 =>
+     match c2 with Ocbz sz2 lbl2 => iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2) | _ => RET false end
+  | Otbnz sz1 n1 lbl1 =>
+     match c2 with Otbnz sz2 n2 lbl2 => iandb (RET (Int.eq n1 n2)) (iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2)) | _ => RET false end
+  | Otbz sz1 n1 lbl1 =>
+     match c2 with Otbz sz2 n2 lbl2 => iandb (RET (Int.eq n1 n2)) (iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2)) | _ => RET false end
+  | Obtbl tbl1 =>
+     match c2 with Obtbl tbl2 => (phys_eq tbl1 tbl2) | _ => RET false end
+  | OIncremPC sz1 =>
+      match c2 with OIncremPC sz2 => RET (Z.eqb sz1 sz2) | _ => RET false end
+  | OError =>
+     match c2 with OError => RET true | _ => RET false end
+  end.
+
+Lemma control_op_eq_correct c1 c2:
+  WHEN control_op_eq c1 c2 ~> b THEN b = true -> c1 = c2.
+Proof.
+  destruct c1, c2; wlp_simplify; try rewrite Z.eqb_eq in * |-; try congruence;
+  try apply Int.same_if_eq in H; try congruence.
+Qed.
+Hint Resolve control_op_eq_correct: wlp.
+Opaque control_op_eq_correct.
+
+Definition store_op_eq (s1 s2: store_op): ?? bool :=
+  match s1 with
+  | Ostore1 st1 chk1 a1 =>
+      match s2 with Ostore1 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end
+  | Ostore2 st1 chk1 a1 =>
+      match s2 with Ostore2 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end
+  | OstoreU st1 chk1 a1 =>
+      match s2 with OstoreU st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end
+  end.
+
+Lemma store_op_eq_correct s1 s2:
+  WHEN store_op_eq s1 s2 ~> b THEN b = true -> s1 = s2.
+Proof.
+  destruct s1, s2; wlp_simplify; try congruence.
+  all: rewrite H1 in H0; rewrite H0, H; reflexivity.
+Qed.
+Hint Resolve store_op_eq_correct: wlp.
+Opaque store_op_eq_correct.
+
+Definition load_op_eq (l1 l2: load_op): ?? bool :=
+  match l1 with
+  | Oload1 ld1 chk1 a1 =>
+      match l2 with Oload1 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end
+  | Oload2 ld1 chk1 a1 =>
+      match l2 with Oload2 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end
+  | OloadU ld1 chk1 a1 =>
+      match l2 with OloadU ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end
+  end.
+
+Lemma load_op_eq_correct l1 l2:
+  WHEN load_op_eq l1 l2 ~> b THEN b = true -> l1 = l2.
+Proof.
+  destruct l1, l2; wlp_simplify; try congruence.
+  all: rewrite H1 in H0; rewrite H, H0; reflexivity.
+Qed.
+Hint Resolve load_op_eq_correct: wlp.
+Opaque load_op_eq_correct.
+
+Definition allocf_op_eq (al1 al2: allocf_op): ?? bool :=
+  match al1 with
+  | OAllocf_SP sz1 linkofs1 =>
+      match al2 with OAllocf_SP sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end
+  | OAllocf_Mem sz1 linkofs1 =>
+      match al2 with OAllocf_Mem sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end
+  end.
+
+Lemma allocf_op_eq_correct al1 al2:
+  WHEN allocf_op_eq al1 al2 ~> b THEN b = true -> al1 = al2.
+Proof.
+  destruct al1, al2; wlp_simplify; try congruence.
+  all: rewrite H2; rewrite Z.eqb_eq in H; rewrite H; reflexivity.
+Qed.
+Hint Resolve allocf_op_eq_correct: wlp.
+Opaque allocf_op_eq_correct.
+
+Definition freef_op_eq (fr1 fr2: freef_op): ?? bool :=
+  match fr1 with
+  | OFreef_SP sz1 linkofs1 =>
+      match fr2 with OFreef_SP sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end
+  | OFreef_Mem sz1 linkofs1 =>
+      match fr2 with OFreef_Mem sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end
+  end.
+
+Lemma freef_op_eq_correct fr1 fr2:
+  WHEN freef_op_eq fr1 fr2 ~> b THEN b = true -> fr1 = fr2.
+Proof.
+  destruct fr1, fr2; wlp_simplify; try congruence.
+  all: rewrite H2; rewrite Z.eqb_eq in H; rewrite H; reflexivity.
+Qed.
+Hint Resolve freef_op_eq_correct: wlp.
+Opaque freef_op_eq_correct.
+
+Definition op_eq (o1 o2: op): ?? bool :=
+  match o1 with
+  | Arith i1 =>
+    match o2 with Arith i2 => arith_op_eq i1 i2 | _ => RET false end
+  | Control i1 =>
+    match o2 with Control i2 => control_op_eq i1 i2 | _ => RET false end
+  | Load i1 =>
+      match o2 with Load i2 => load_op_eq i1 i2 | _ => RET false end
+  | Store i1 =>
+      match o2 with Store i2 => store_op_eq i1 i2 | _ => RET false end
+  | Allocframe i1 =>
+      match o2 with Allocframe i2 => allocf_op_eq i1 i2 | _ => RET false end
+  | Freeframe i1 =>
+      match o2 with Freeframe i2 => freef_op_eq i1 i2 | _ => RET false end
+  | Loadsymbol id1 =>
+      match o2 with Loadsymbol id2 => phys_eq id1 id2 | _ => RET false end
+  | Cvtsw2x =>
+    match o2 with Cvtsw2x => RET true | _ => RET false end
+  | Cvtuw2x =>
+    match o2 with Cvtuw2x => RET true | _ => RET false end
+  | Cvtx2w =>
+    match o2 with Cvtx2w => RET true | _ => RET false end
+  | Constant c1 =>
+    match o2 with Constant c2 => phys_eq c1 c2 | _ => RET false end
+  end.
+
+Lemma op_eq_correct o1 o2: 
+ WHEN op_eq o1 o2 ~> b THEN b=true -> o1 = o2.
+Proof.
+  destruct o1, o2; wlp_simplify; congruence.
+Qed.
+
+End IMPPARAM.
+
+End P.
+
+Module L <: ISeqLanguage with Module LP:=P.
+
+Module LP:=P.
+
+Include MkSeqLanguage P.
+
+End L.
+
+Module IST := ImpSimu L ImpPosDict.
+
+Import L.
+Import P.
+
+(** Compilation from [Asmblock] to [L] *)
+
+Local Open Scope positive_scope.
+
+Definition pmem : R.t := 1.
+
+Definition ireg_to_pos (ir: ireg) : R.t :=
+  match ir with
+  | X0 => 8 | X1 => 9 | X2 => 10 | X3 => 11 | X4 => 12 | X5 => 13 | X6 => 14 | X7 => 15
+  | X8 => 16 | X9 => 17 | X10 => 18 | X11 => 19 | X12 => 20 | X13 => 21 | X14 => 22 | X15 => 23
+  | X16 => 24 | X17 => 25 | X18 => 26 | X19 => 27 | X20 => 28 | X21 => 29 | X22 => 30 | X23 => 31
+  | X24 => 32 | X25 => 33 | X26 => 34 | X27 => 35 | X28 => 36 | X29 => 37 | X30 => 38
+  end
+.
+
+Definition freg_to_pos (fr: freg) : R.t :=
+  match fr with
+  | D0 => 39 | D1 => 40 | D2 => 41 | D3 => 42 | D4 => 43 | D5 => 44 | D6 => 45 | D7 => 46
+  | D8 => 47 | D9 => 48 | D10 => 49 | D11 => 50 | D12 => 51 | D13 => 52 | D14 => 53 | D15 => 54
+  | D16 => 55 | D17 => 56 | D18 => 57 | D19 => 58 | D20 => 59 | D21 => 60 | D22 => 61 | D23 => 62
+  | D24 => 63 | D25 => 64 | D26 => 65 | D27 => 66 | D28 => 67 | D29 => 68 | D30 => 69 | D31 => 70
+  end
+.
+
+Lemma ireg_to_pos_discr: forall r r', r <> r' -> ireg_to_pos r <> ireg_to_pos r'.
+Proof.
+  destruct r; destruct r'; try contradiction; discriminate.
+Qed.
+
+Lemma freg_to_pos_discr: forall r r', r <> r' -> freg_to_pos r <> freg_to_pos r'.
+Proof.
+  destruct r; destruct r'; try contradiction; discriminate.
+Qed.
+
+Definition ppos (r: preg) : R.t :=
+  match r with
+  | CR c => match c with
+            | CN => 2
+            | CZ => 3
+            | CC => 4
+            | CV => 5
+            end
+  | PC => 6
+  | DR d => match d with
+            | IR i => match i with
+                      | XSP => 7
+                      | RR1 ir => ireg_to_pos ir
+                      end
+            | FR fr => freg_to_pos fr
+            end
+  end
+.
+
+Notation "# r" := (ppos r) (at level 100, right associativity). 
+
+Lemma not_eq_add:
+  forall k n n', n <> n' -> k + n <> k + n'.
+Proof.
+  intros k n n' H1 H2. apply H1; clear H1. eapply Pos.add_reg_l; eauto.
+Qed.
+
+Lemma ppos_equal: forall r r', r = r' <-> ppos r = ppos r'.
+Proof.
+  destruct r as [dr|cr|]; destruct r' as [dr'|cr'|];
+  try destruct dr as [ir|fr]; try destruct dr' as [ir'|fr'];
+  try destruct ir as [irr|]; try destruct ir' as [irr'|].
+  all: split; intros; try rewrite H; try discriminate; try contradiction; simpl; eauto;
+  try destruct irr; try destruct irr';
+  try destruct fr; try destruct fr';
+  try destruct cr; try destruct cr';
+  simpl; try discriminate; try reflexivity.
+Qed.
+
+Lemma ppos_discr: forall r r', r <> r' <-> ppos r <> ppos r'.
+Proof.
+  split; unfold not; try intros; try apply ppos_equal in H0; try discriminate; try contradiction.
+Qed.
+
+Lemma ppos_pmem_discr: forall r, pmem <> ppos r.
+Proof.
+  intros. destruct r as [dr|cr|].
+  - destruct dr as [ir|fr]; try destruct ir as [irr|]; try destruct irr; try destruct fr;
+    unfold ppos; unfold pmem; discriminate.
+  - unfold ppos; unfold pmem; destruct cr; discriminate.
+  - unfold ppos; unfold pmem; discriminate.
+Qed.
+
+(** Inversion functions, used for debug traces *)
+
+Definition pos_to_ireg (p: R.t) : option ireg :=
+  match p with
+  | 8 => Some (X0) | 9 => Some (X1) | 10 => Some (X2) | 11 => Some (X3) | 12 => Some (X4) | 13 => Some (X5) | 14 => Some (X6) | 15 => Some (X7)
+  | 16 => Some (X8) | 17 => Some (X9) | 18 => Some (X10) | 19 => Some (X11) | 20 => Some (X12) | 21 => Some (X13) | 22 => Some (X14) | 23 => Some (X15)
+  | 24 => Some (X16) | 25 => Some (X17) | 26 => Some (X18) | 27 => Some (X19) | 28 => Some (X20) | 29 => Some (X21) | 30 => Some (X22) | 31 => Some (X23)
+  | 32 => Some (X24) | 33 => Some (X25) | 34 => Some (X26) | 35 => Some (X27) | 36 => Some (X28) | 37 => Some (X29) | 38 => Some (X30) | _ => None
+  end.
+
+Definition pos_to_freg (p: R.t) : option freg :=
+  match p with
+  | 39 => Some(D0) | 40 => Some(D1) | 41 => Some(D2) | 42 => Some(D3) | 43 => Some(D4) | 44 => Some(D5) | 45 => Some(D6) | 46 => Some(D7)
+  | 47 => Some(D8) | 48 => Some(D9) | 49 => Some(D10) | 50 => Some(D11) | 51 => Some(D12) | 52 => Some(D13) | 53 => Some(D14) | 54 => Some(D15)
+  | 55 => Some(D16) | 56 => Some(D17) | 57 => Some(D18) | 58 => Some(D19) | 59 => Some(D20) | 60 => Some(D21) | 61 => Some(D22) | 62 => Some(D23)
+  | 63 => Some(D24) | 64 => Some(D25) | 65 => Some(D26) | 66 => Some(D27) | 67 => Some(D28) | 68 => Some(D29) | 69 => Some(D30) | 70 => Some(D31) | _ => None
+  end.
+
+Definition inv_ppos (p: R.t) : option preg :=
+  match p with
+  | 1 => None
+  | 2 => Some (CR CN)
+  | 3 => Some (CR CZ)
+  | 4 => Some (CR CC)
+  | 5 => Some (CR CV)
+  | 6 => Some (PC)
+  | 7 => Some (DR (IR XSP))
+  | n => match pos_to_ireg n with
+         | None => match pos_to_freg n with
+                   | None => None
+                   | Some fr => Some (DR (FR fr))
+                   end
+         | Some ir => Some (DR (IR ir))
+         end
+  end.
+
+Notation "a @ b" := (Econs a b) (at level 102, right associativity).
+
+(** Translations of instructions *)
+
+Definition get_testcond_rlocs (c: testcond) :=
+  match c with
+  | TCeq => (PReg(#CZ) @ Enil)
+  | TCne => (PReg(#CZ) @ Enil)
+  | TChs => (PReg(#CC) @ Enil)
+  | TClo => (PReg(#CC) @ Enil)
+  | TCmi => (PReg(#CN) @ Enil)
+  | TCpl => (PReg(#CN) @ Enil)
+  | TChi => (PReg(#CZ) @ PReg(#CC) @ Enil)
+  | TCls => (PReg(#CZ) @ PReg(#CC) @ Enil)
+  | TCge => (PReg(#CN) @ PReg(#CV) @ Enil)
+  | TClt => (PReg(#CN) @ PReg(#CV) @ Enil)
+  | TCgt => (PReg(#CN) @ PReg(#CZ) @ PReg(#CV) @ Enil)
+  | TCle => (PReg(#CN) @ PReg(#CZ) @ PReg(#CV) @ Enil)
+  end.
+
+Definition trans_control (ctl: control) : inst :=
+  match ctl with
+  | Pb lbl => [(#PC, Op (Control (Ob lbl)) (PReg(#PC) @ Enil))]
+  | Pbc c lbl =>
+      let lr := get_testcond_rlocs c in
+      [(#PC, Op (Control (Obc c lbl)) (PReg(#PC) @ lr))]
+  | Pbl id sg => [(#RA, PReg(#PC));
+                  (#PC, Op (Control (Obl id)) Enil)]
+  | Pbs id sg => [(#PC, Op (Control (Obs id)) Enil)]
+  | Pblr r sg => [(#RA, PReg(#PC));
+                  (#PC, Old (PReg(#r)))]
+  | Pbr r sg => [(#PC, PReg(#r))]
+  | Pret r => [(#PC, PReg(#r))]
+  | Pcbnz sz r lbl => [(#PC, Op (Control (Ocbnz sz lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
+  | Pcbz sz r lbl => [(#PC, Op (Control (Ocbz sz lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
+  | Ptbnz sz r n lbl => [(#PC, Op (Control (Otbnz sz n lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
+  | Ptbz sz r n lbl => [(#PC, Op (Control (Otbz sz n lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
+  | Pbtbl r tbl => [(#X16, Op (Constant Vundef) Enil);
+                    (#PC, Op (Control (Obtbl tbl)) (PReg(#r) @ PReg(#PC) @ Enil));
+                    (#X16, Op (Constant Vundef) Enil)]
+  | Pbuiltin ef args res => []
+  end.
+
+Definition trans_exit (ex: option control) : L.inst :=
+  match ex with
+  | None => []
+  | Some ctl => trans_control ctl
+  end
+.
+
+Definition trans_arith (ai: ar_instruction) : inst :=
+  match ai with
+  | PArithP n rd => 
+      if destroy_X16 n then [(#rd, Op(Arith (OArithP n)) Enil); (#X16, Op (Constant Vundef) Enil)]
+      else [(#rd, Op(Arith (OArithP n)) Enil)]
+  | PArithPP n rd r1 => [(#rd, Op(Arith (OArithPP n)) (PReg(#r1) @ Enil))]
+  | PArithPPP n rd r1 r2 => [(#rd, Op(Arith (OArithPPP n)) (PReg(#r1) @ PReg(#r2) @ Enil))]
+  | PArithRR0R n rd r1 r2 =>
+      let lr := match r1 with
+                | RR0 r1' => Op(Arith (OArithRR0R n)) (PReg(#r1') @ PReg(#r2) @ Enil)
+                | XZR => let vz := if arith_rr0r_isize n then Vint Int.zero else Vlong Int64.zero in
+                         Op(Arith (OArithRR0R_XZR n vz)) (PReg(#r2) @ Enil)
+                end in
+      [(#rd,  lr)]
+  | PArithRR0 n rd r1 =>
+      let lr := match r1 with
+                | RR0 r1' => Op(Arith (OArithRR0 n)) (PReg(#r1') @ Enil)
+                | XZR => let vz := if arith_rr0_isize n then Vint Int.zero else Vlong Int64.zero in
+                         Op(Arith (OArithRR0_XZR n vz)) (Enil)
+                end in
+      [(#rd, lr)]
+  | PArithARRRR0 n rd r1 r2 r3 =>
+      let lr := match r3 with
+                | RR0 r3' => Op(Arith (OArithARRRR0 n)) (PReg(#r1) @ PReg (#r2) @ PReg(#r3') @ Enil)
+                | XZR => let vz := if arith_arrrr0_isize n then Vint Int.zero else Vlong Int64.zero in
+                         Op(Arith (OArithARRRR0_XZR n vz)) (PReg(#r1) @ PReg(#r2) @ Enil)
+                end in
+      [(#rd, lr)]
+  | PArithComparisonPP n r1 r2 =>
+      [(#CN, Op(Arith (OArithComparisonPP_CN n)) (PReg(#r1) @ PReg(#r2) @ Enil));
+       (#CZ, Op(Arith (OArithComparisonPP_CZ n)) (PReg(#r1) @ PReg(#r2) @ Enil));
+       (#CC, Op(Arith (OArithComparisonPP_CC n)) (PReg(#r1) @ PReg(#r2) @ Enil));
+       (#CV, Op(Arith (OArithComparisonPP_CV n)) (PReg(#r1) @ PReg(#r2) @ Enil))]
+  | PArithComparisonR0R n r1 r2 =>
+      let is := arith_comparison_r0r_isize n in
+      match r1 with
+      | RR0 r1' => [(#CN, Op(Arith (OArithComparisonR0R_CN n is)) (PReg(#r1') @ PReg(#r2) @ Enil));
+                    (#CZ, Op(Arith (OArithComparisonR0R_CZ n is)) (PReg(#r1') @ PReg(#r2) @ Enil));
+                    (#CC, Op(Arith (OArithComparisonR0R_CC n is)) (PReg(#r1') @ PReg(#r2) @ Enil));
+                    (#CV, Op(Arith (OArithComparisonR0R_CV n is)) (PReg(#r1') @ PReg(#r2) @ Enil))]
+      | XZR => let vz := if is then Vint Int.zero else Vlong Int64.zero in
+          [(#CN, Op(Arith (OArithComparisonR0R_CN_XZR n is vz)) (PReg(#r2) @ Enil));
+                (#CZ, Op(Arith (OArithComparisonR0R_CZ_XZR n is vz)) (PReg(#r2) @ Enil));
+                (#CC, Op(Arith (OArithComparisonR0R_CC_XZR n is vz)) (PReg(#r2) @ Enil));
+                (#CV, Op(Arith (OArithComparisonR0R_CV_XZR n is vz)) (PReg(#r2) @ Enil))]
+      end
+  | PArithComparisonP n r1 =>
+      [(#CN, Op(Arith (OArithComparisonP_CN n)) (PReg(#r1) @ Enil));
+       (#CZ, Op(Arith (OArithComparisonP_CZ n)) (PReg(#r1) @ Enil));
+       (#CC, Op(Arith (OArithComparisonP_CC n)) (PReg(#r1) @ Enil));
+       (#CV, Op(Arith (OArithComparisonP_CV n)) (PReg(#r1) @ Enil))]
+  | Pcset rd c =>
+      let lr := get_testcond_rlocs c in
+      [(#rd, Op(Arith (Ocset c)) lr)]
+  | Pfmovi fsz rd r1 =>
+      let lr := match r1 with
+                | RR0 r1' => Op(Arith (Ofmovi fsz)) (PReg(#r1') @ Enil)
+                | XZR => Op(Arith (Ofmovi_XZR fsz)) Enil
+                end in
+      [(#rd, lr)]
+  | Pcsel rd r1 r2 c =>
+      let lr := get_testcond_rlocs c in
+      [(#rd, Op(Arith (Ocsel c)) (PReg(#r1) @ PReg (#r2) @ lr))]
+  | Pfnmul fsz rd r1 r2 => [(#rd, Op(Arith (Ofnmul fsz)) (PReg(#r1) @ PReg(#r2) @ Enil))]
+  end.
+
+Definition eval_addressing_rlocs_st (st: store_rs_a) (chunk: memory_chunk) (rs: dreg) (a: addressing) :=
+  match a with
+  | ADimm base n => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil)
+  | ADreg base r => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+  | ADlsl base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+  | ADsxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+  | ADuxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+  | ADadr base id ofs => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil)
+  | ADpostincr base n => Op (Store (OstoreU st chunk a)) (PReg (#rs) @ PReg (pmem) @ Enil) (* not modeled yet *)
+  end.
+
+Definition eval_addressing_rlocs_ld (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) :=
+  match a with
+  | ADimm base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
+  | ADreg base r => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+  | ADlsl base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+  | ADsxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+  | ADuxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+  | ADadr base id ofs => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
+  | ADpostincr base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
+  end.
+
+Definition trans_ldp_chunk (chunk: memory_chunk) (r: dreg): load_rd_a :=
+  match chunk with
+  | Mint32 => Pldrw
+  | Mint64 => Pldrx
+  | Mfloat32 => Pldrs
+  | Mfloat64 => Pldrd
+  | Many32 => Pldrw_a
+  | _ => (* This case should always correspond to Many64 *)
+    match r with
+    | IR _ => Pldrx_a
+    | FR _ => Pldrd_a
+    end
+  end.
+
+Definition trans_stp_chunk (chunk: memory_chunk) (r: dreg): store_rs_a :=
+  match chunk with
+  | Mint32 => Pstrw
+  | Mint64 => Pstrx
+  | Mfloat32 => Pstrs
+  | Mfloat64 => Pstrd
+  | Many32 => Pstrw_a
+  | _ => (* This case should always correspond to Many64 *)
+    match r with
+    | IR _ => Pstrx_a
+    | FR _ => Pstrd_a
+    end
+  end.
+
+Definition trans_load (ldi: ld_instruction) :=
+  match ldi with
+  | PLd_rd_a ld r a =>
+      let lr := eval_addressing_rlocs_ld ld (chunk_load ld) a in [(#r, lr)]
+  | Pldp ld r1 r2 chk1 chk2 a =>
+      let ldi1 := trans_ldp_chunk chk1 r1 in
+      let ldi2 := trans_ldp_chunk chk2 r1 in
+      let lr := eval_addressing_rlocs_ld ldi1 chk1 a in
+      let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4%Z | _ => 8%Z end in
+      match a with
+      | ADimm base n =>
+        let a' := (get_offset_addr a ofs) in
+          [(#r1, lr);
+           (#r2, Op (Load (Oload1 ldi2 chk2 a'))
+            (Old(PReg (#base)) @ PReg (pmem) @ Enil))]
+      | _ => [(#PC, (Op (OError)) Enil)]
+      end
+  end.
+
+Definition trans_store (sti: st_instruction) :=
+  match sti with
+  | PSt_rs_a st r a =>
+      let lr := eval_addressing_rlocs_st st (chunk_store st) r a in [(pmem, lr)]
+  | Pstp st r1 r2 chk1 chk2 a =>
+      let sti1 := trans_stp_chunk chk1 r1 in
+      let sti2 := trans_stp_chunk chk2 r1 in
+      let lr := eval_addressing_rlocs_st sti1 chk1 r1 a in
+      let ofs := match chk1 with | Mint32 | Mfloat32| Many32 => 4%Z | _ => 8%Z end in
+      match a with
+      | ADimm base n =>
+        let a' := (get_offset_addr a ofs) in
+          [(pmem, lr);
+           (pmem, Op (Store (Ostore1 sti2 chk2 a'))
+            (PReg (#r2) @ Old(PReg (#base)) @ PReg (pmem) @ Enil))]
+      | _ => [(#PC, (Op (OError)) Enil)]
+      end
+  end.
+
+Definition trans_basic (b: basic) : inst :=
+  match b with
+  | PArith ai => trans_arith ai
+  | PLoad ld => trans_load ld
+  | PStore st => trans_store st
+  | Pallocframe sz linkofs =>
+      [(#X29, PReg(#SP));
+       (#SP, Op (Allocframe (OAllocf_SP sz linkofs)) (PReg (#SP) @ PReg pmem @ Enil));
+       (#X16, Op (Constant Vundef) Enil);
+       (pmem, Op (Allocframe (OAllocf_Mem sz linkofs)) (Old(PReg(#SP)) @ PReg pmem @ Enil))]
+  | Pfreeframe sz linkofs =>
+      [(pmem, Op (Freeframe (OFreef_Mem sz linkofs)) (PReg (#SP) @ PReg pmem @ Enil));
+       (#SP, Op (Freeframe (OFreef_SP sz linkofs)) (PReg (#SP) @ Old (PReg pmem) @ Enil));
+       (#X16, Op (Constant Vundef) Enil)]
+  | Ploadsymbol rd id => [(#rd, Op (Loadsymbol id) Enil)]
+  | Pcvtsw2x rd r1 => [(#rd, Op (Cvtsw2x) (PReg (#r1) @ Enil))]
+  | Pcvtuw2x rd r1 => [(#rd, Op (Cvtuw2x) (PReg (#r1) @ Enil))]
+  | Pcvtx2w rd => [(#rd, Op (Cvtx2w) (PReg (#rd) @ Enil))]
+  | Pnop => []
+  end.
+
+Fixpoint trans_body (b: list basic) : list L.inst :=
+  match b with
+  | nil => nil
+  | b :: lb => (trans_basic b) :: (trans_body lb)
+  end.
+
+Definition trans_pcincr (sz: Z) (k: L.inst) := (#PC, Op (Control (OIncremPC sz)) (PReg(#PC) @ Enil)) :: k.
+
+Definition trans_block (b: Asmblock.bblock) : L.bblock :=
+  trans_body (body b) ++ (trans_pcincr (size b) (trans_exit (exit b)) :: nil).
+
+(*Theorem trans_block_noheader_inv: forall bb, trans_block (no_header bb) = trans_block bb.*)
+(*Proof.*)
+  (*intros. destruct bb as [hd bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.*)
+(*Qed.*)
+
+(*Theorem trans_block_header_inv: forall bb hd, trans_block (stick_header hd bb) = trans_block bb.*)
+(*Proof.*)
+  (*intros. destruct bb as [hdr bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.*)
+(*Qed.*)
+
+(** Lemmas on the translation *)
+
+Definition state := L.mem.
+Definition exec := L.run.
+
+Definition match_states (s: Asm.state) (s': state) :=
+  let (rs, m) := s in
+     s' pmem = Memstate m
+  /\ forall r, s' (#r) = Val (rs r).
+
+Definition match_outcome (o:outcome) (s: option state) :=
+  match o with
+  | Some n => exists s', s=Some s' /\ match_states n s'
+  | None => s=None
+  end.
+ 
+Notation "a <[ b <- c ]>" := (assign a b c) (at level 102, right associativity).
+
+Definition trans_state (s: Asm.state) : state :=
+  let (rs, m) := s in
+  fun x => if (Pos.eq_dec x pmem) then Memstate m
+           else match (inv_ppos x) with
+           | Some r => Val (rs r)
+           | None => Val Vundef
+           end.
+
+Lemma not_eq_IR:
+  forall r r', r <> r' -> IR r <> IR r'.
+Proof.
+  intros. congruence.
+Qed.
+
+Lemma ireg_pos_ppos: forall r,
+  ireg_to_pos r = # r.
+Proof.
+  intros. simpl. reflexivity.
+Qed.
+
+Lemma freg_pos_ppos: forall r,
+  freg_to_pos r = # r.
+Proof.
+  intros. simpl. reflexivity.
+Qed.
+
+Lemma ireg_not_pc: forall r,
+  (#PC) <> ireg_to_pos r.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_pmem: forall r,
+  ireg_to_pos r <> pmem.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_CN: forall r,
+  2 <> ireg_to_pos r.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_CZ: forall r,
+  3 <> ireg_to_pos r.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_CC: forall r,
+  4 <> ireg_to_pos r.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_CV: forall r,
+  5 <> ireg_to_pos r.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_pmem: forall r,
+  freg_to_pos r <> pmem.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_CN: forall r,
+  2 <> freg_to_pos r.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_CZ: forall r,
+  3 <> freg_to_pos r.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_CC: forall r,
+  4 <> freg_to_pos r.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_CV: forall r,
+  5 <> freg_to_pos r.
+Proof.
+  intros; destruct r; discriminate.
+Qed.
+
+Lemma dreg_not_pmem: forall (r: dreg),
+  (# r) <> pmem.
+Proof.
+  intros; destruct r as [i|f].
+  - destruct i. apply ireg_not_pmem. discriminate.
+  - apply freg_not_pmem.
+Qed.
+
+Ltac DPRM pr :=
+  destruct pr as [drDPRF|crDPRF|]; 
+      [destruct drDPRF as [irDPRF|frDPRF]; [destruct irDPRF |]
+      | destruct crDPRF|].
+
+Ltac DPRF pr :=
+  destruct pr as [drDPRF|crDPRF|]; 
+      [destruct drDPRF as [irDPRF|frDPRF]; [destruct irDPRF as [irrDPRF|]; [destruct irrDPRF|]
+      | destruct frDPRF]
+      | destruct crDPRF|].
+
+Lemma preg_not_pmem: forall r,
+  pmem <> # r.
+Proof.
+  intros. DPRF r; simpl; discriminate.
+Qed.
+
+Ltac DIRN1 ir := destruct ir as [irrDIRN1|]; subst; try destruct irrDIRN1; simpl.
+
+Lemma dreg_not_CN: forall (r: dreg),
+  2 <> (#r).
+Proof.
+  intros; DIRN1 r; [ apply ireg_not_CN | discriminate | apply freg_not_CN].
+Qed.
+
+Lemma dreg_not_CZ: forall (r: dreg),
+  3 <> (#r).
+Proof.
+  intros; DIRN1 r; [ apply ireg_not_CZ | discriminate | apply freg_not_CZ].
+Qed.
+
+Lemma dreg_not_CC: forall (r: dreg),
+  4 <> (#r).
+Proof.
+  intros; DIRN1 r; [ apply ireg_not_CC | discriminate | apply freg_not_CC].
+Qed.
+
+Lemma dreg_not_CV: forall (r: dreg),
+  5 <> (#r).
+Proof.
+  intros; DIRN1 r; [ apply ireg_not_CV | discriminate | apply freg_not_CV].
+Qed.
+
+Lemma sr_update_both: forall sr rsr r1 rr v
+  (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+  (sr <[ (#r1) <- Val (v) ]>) (#rr) =
+  Val (rsr # r1 <- v rr).
+Proof.
+  intros. unfold assign.
+  destruct (R.eq_dec (#r1) (#rr)); subst.
+  - apply ppos_equal in e; subst; rewrite Pregmap.gss; reflexivity.
+  - rewrite Pregmap.gso; eapply ppos_discr in n; auto.
+Qed.
+
+Lemma sr_gss: forall sr pos v,
+  (sr <[ pos <- v ]>) pos = v.
+Proof.
+  intros. unfold assign.
+  destruct (R.eq_dec pos pos) eqn:REQ; try reflexivity; try congruence.
+Qed.
+
+Lemma sr_update_overwrite: forall sr pos v1 v2,
+  (sr <[ pos <- v1 ]>) <[ pos <- v2 ]> = (sr <[ pos <- v2 ]>).
+Proof.
+  intros.
+  unfold assign. apply functional_extensionality; intros x.
+  destruct (R.eq_dec pos x); reflexivity.
+Qed.
+
+Ltac sr_val_rwrt :=
+  repeat match goal with
+  | [H: forall r: preg, ?sr (# r) = Val (?rsr r) |- _ ]
+      => rewrite H
+  end.
+
+Ltac sr_memstate_rwrt :=
+  repeat match goal with
+  | [H: ?sr pmem = Memstate ?mr |- _ ]
+      => rewrite <- H
+  end.
+
+Ltac replace_ppos :=
+  try erewrite !ireg_pos_ppos;
+  try erewrite !freg_pos_ppos;
+  try replace (7) with (#XSP) by eauto;
+  try replace (24) with (#X16) by auto.
+
+Ltac DDRM dr :=
+  destruct dr as [irsDDRF|frDDRF]; 
+      [destruct irsDDRF
+      | idtac ].
+
+(* Ltac DDRF dr :=
+  destruct dr as [irsDDRF|frDDRF]; 
+      [destruct irsDDRF as [irsDDRF|]; [destruct irsDDRF|]
+      | destruct frDDRF]. *)
+
+(* Ltac DPRI pr :=
+  destruct pr as [drDPRI|crDPRI|]; 
+      [destruct drDPRI as [irDPRI|frDPRI]; [destruct irDPRI as [irrDPRI|]; [destruct irrDPRI|]|]
+      | idtac
+      | idtac ]. *)
+
+Ltac discriminate_ppos :=
+  try apply ireg_not_pmem;
+  try apply ireg_not_pc;
+  try apply freg_not_pmem;
+  try apply dreg_not_pmem;
+  try apply ireg_not_CN;
+  try apply ireg_not_CZ;
+  try apply ireg_not_CC;
+  try apply ireg_not_CV;
+  try apply freg_not_CN;
+  try apply freg_not_CZ;
+  try apply freg_not_CC;
+  try apply freg_not_CV;
+  try apply dreg_not_CN;
+  try apply dreg_not_CZ;
+  try apply dreg_not_CC;
+  try apply dreg_not_CV;
+  try(simpl; discriminate).
+
+Ltac replace_pc := try replace (6) with (#PC) by eauto.
+
+Ltac replace_regs_pos sr :=
+  try replace (sr 7) with (sr (ppos XSP)) by eauto;
+  try replace (sr 6) with (sr (ppos PC)) by eauto;
+  try replace (sr 2) with (sr (ppos CN)) by eauto;
+  try replace (sr 3) with (sr (ppos CZ)) by eauto;
+  try replace (sr 4) with (sr (ppos CC)) by eauto;
+  try replace (sr 5) with (sr (ppos CV)) by eauto.
+
+Ltac Simpl_exists sr :=
+  replace_ppos;
+  replace_regs_pos sr;
+  try sr_val_rwrt;
+  try (eexists; split; [| split]); eauto;
+  try (sr_memstate_rwrt; rewrite assign_diff;
+      try reflexivity;
+      discriminate_ppos
+  ).
+
+Ltac Simpl_rep sr :=
+  replace_ppos;
+  replace_regs_pos sr;
+  try sr_val_rwrt;
+  try (sr_memstate_rwrt; rewrite assign_diff;
+      try reflexivity;
+      discriminate_ppos
+  ).
+
+Ltac Simpl_update :=
+  try eapply sr_update_both; eauto.
+
+Ltac Simpl sr := Simpl_exists sr; try (intros rr(* ; try rewrite sr_update_overwrite; replace_regs_pos sr; DPRM rr *)); Simpl_update.
+
+Ltac destruct_res_flag rsr := try (rewrite Pregmap.gso; discriminate_ppos); destruct (rsr _); simpl; try reflexivity.
+
+(* Ltac discriminate_preg_flags := rewrite !assign_diff; try rewrite !Pregmap.gso; discriminate_ppos; sr_val_rwrt; reflexivity. *)
+
+(* Ltac destruct_reg_neq r1 r2 :=
+  destruct (PregEq.eq r1 r2); subst;
+   [ rewrite sr_gss; rewrite Pregmap.gss; reflexivity |
+     rewrite assign_diff; try rewrite Pregmap.gso; fold (ppos r1); try apply ppos_discr; auto]. *)
+
+Lemma reg_update_overwrite: forall rsr sr r rd v1 v2
+  (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+  ((sr <[ # rd <- Val (v1) ]>) <[ # rd <- Val (v2) ]>) (# r) =
+  Val ((rsr # rd <- v1) # rd <- v2 r).
+Proof.
+  intros.
+  unfold Pregmap.set; destruct (PregEq.eq r rd).
+  - rewrite e; apply sr_gss; reflexivity.
+  - rewrite sr_update_overwrite. rewrite assign_diff; eauto.
+    unfold not; intros. apply ppos_equal in H. congruence.
+Qed.
+
+Ltac replace_regs_cond_force :=
+  try replace (5) with (#CV) in * by auto;
+  try replace (4) with (#CC) in * by auto;
+  try replace (3) with (#CZ) in * by auto;
+  try replace (2) with (#CN) in * by auto.
+  
+Ltac validate_crbit_flags rr v1 v2 :=
+  destruct (R.eq_dec 5 (#rr)) as [e0|n0];
+  destruct (R.eq_dec 4 (#rr)) as [e1|n1];
+  destruct (R.eq_dec 3 (#rr)) as [e2|n2];
+  destruct (R.eq_dec 2 (#rr)) as [e3|n3];
+  replace_regs_cond_force;
+  try apply ppos_equal in e0;
+  try apply ppos_equal in e1;
+  try apply ppos_equal in e2;
+  try apply ppos_equal in e3;
+  try apply ppos_discr in n0;
+  try apply ppos_discr in n1;
+  try apply ppos_discr in n2;
+  try apply ppos_discr in n3;
+  subst.
+
+Ltac Simpl_flags :=
+  try (rewrite Pregmap.gss; reflexivity);
+  try (rewrite Pregmap.gso, Pregmap.gss; [reflexivity|]; try auto);
+  try (rewrite Pregmap.gso; try auto); 
+  try (rewrite !Pregmap.gso; try auto).
+
+Lemma compare_single_res: forall sr mr rsr rr v1 v2
+  (HMEM: sr pmem = Memstate mr)
+  (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+  ((((sr <[ 2 <- Val (_CN (v_compare_single v1 v2)) ]>) <[ 3 <-
+   Val (_CZ (v_compare_single v1 v2)) ]>) <[ 4 <-
+  Val (_CC (v_compare_single v1 v2)) ]>) <[ 5 <-
+ Val (_CV (v_compare_single v1 v2)) ]>) (# rr) =
+Val
+  ((compare_single rsr v1 v2) rr).
+Proof.
+  intros. unfold v_compare_single, compare_single, assign.
+  validate_crbit_flags rr v1 v2.
+  all: destruct v1; destruct v2; Simpl_flags.
+Qed.
+
+Lemma compare_float_res: forall sr mr rsr rr v1 v2
+  (HMEM: sr pmem = Memstate mr)
+  (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+  ((((sr <[ 2 <- Val (_CN (v_compare_float v1 v2)) ]>) <[ 3 <-
+   Val (_CZ (v_compare_float v1 v2)) ]>) <[ 4 <-
+  Val (_CC (v_compare_float v1 v2)) ]>) <[ 5 <-
+ Val (_CV (v_compare_float v1 v2)) ]>) (# rr) =
+Val
+  ((compare_float rsr v1 v2) rr).
+Proof.
+  intros. unfold v_compare_float, compare_float, assign.
+  validate_crbit_flags rr v1 v2.
+  all: destruct v1; destruct v2; Simpl_flags.
+Qed.
+
+Lemma compare_long_res: forall sr mr rsr rr v1 v2
+  (HMEM: sr pmem = Memstate mr)
+  (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+  ((((sr <[ 2 <- Val (_CN (v_compare_long v1 v2)) ]>) <[ 3 <-
+   Val (_CZ (v_compare_long v1 v2)) ]>) <[ 4 <-
+  Val (_CC (v_compare_long v1 v2)) ]>) <[ 5 <-
+ Val (_CV (v_compare_long v1 v2)) ]>) (# rr) =
+Val
+  ((compare_long rsr v1 v2) rr).
+Proof.
+  intros. unfold v_compare_long, compare_long, assign.
+  validate_crbit_flags rr v1 v2.
+  all: Simpl_flags.
+Qed.
+
+Lemma compare_int_res: forall sr mr rsr rr v1 v2
+  (HMEM: sr pmem = Memstate mr)
+  (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+  ((((sr <[ 2 <- Val (_CN (v_compare_int v1 v2)) ]>) <[ 3 <-
+   Val (_CZ (v_compare_int v1 v2)) ]>) <[ 4 <-
+  Val (_CC (v_compare_int v1 v2)) ]>) <[ 5 <-
+ Val (_CV (v_compare_int v1 v2)) ]>) (# rr) =
+Val
+  ((compare_int rsr v1 v2) rr).
+Proof.
+  intros. unfold v_compare_int, compare_int, assign.
+  validate_crbit_flags rr v1 v2.
+  all: Simpl_flags.
+Qed.
+
+Section SECT_SEQ.
+
+Variable Ge: genv.
+
+Lemma trans_arith_correct rsr mr sr rsw' old i:
+  match_states (State rsr mr) sr ->
+  exec_arith_instr Ge.(_lk) i rsr = rsw' ->
+  exists sw,
+     inst_run Ge (trans_arith i) sr old = Some sw
+  /\ match_states (State rsw' mr) sw.
+Proof.
+  induction i.
+  all: intros MS EARITH; subst; inv MS; unfold exec_arith_instr.
+  - (* PArithP *)
+    destruct i.
+    1,2,3: DIRN1 rd; Simpl sr.
+    (* Special case for Pmovimms/Pmovimmd *)
+    all: simpl;
+      try(destruct (negb (is_immediate_float32 _)));
+      try(destruct (negb (is_immediate_float64 _)));
+      DIRN1 rd; Simpl sr;
+      try (rewrite assign_diff; discriminate_ppos; reflexivity);
+      try (intros rr'; Simpl_update).
+  - (* PArithPP *)
+    DIRN1 rs; DIRN1 rd; Simpl sr.
+  - (* PArithPPP *)
+    DIRN1 r1; DIRN1 r2; DIRN1 rd; Simpl sr.
+  - (* PArithRR0R *) 
+    simpl. destruct r1.
+    + (* OArithRR0R *) simpl; Simpl sr.
+    + (* OArithRR0R_XZR *) simpl; destruct (arith_rr0r_isize _); Simpl sr.
+  - (* PArithRR0 *)
+    simpl. destruct r1.
+    + (* OArithRR0 *) simpl; Simpl sr.
+    + (* OArithRR0_XZR *) simpl; destruct (arith_rr0_isize _); Simpl sr.
+  - (* PArithARRRR0 *)
+    simpl. destruct r3.
+    + (* OArithARRRR0 *) simpl; Simpl sr.
+    + (* OArithARRRR0_XZR *) simpl; destruct (arith_arrrr0_isize _); Simpl sr.
+  - (* PArithComparisonPP *)
+    simpl; destruct i;
+    unfold arith_eval_comparison_pp, arith_prepare_comparison_pp; simpl;
+    fold (ppos r2); fold(ppos r1); try rewrite !H0; repeat Simpl_rep sr;
+    Simpl sr;
+    try destruct sz;
+    try (eapply compare_single_res; eauto);
+    try (eapply compare_long_res; eauto);
+    try (eapply compare_float_res; eauto).
+  - (* PArithComparisonR0R *)
+    simpl; destruct r1; simpl; destruct i;
+    repeat (replace_regs_cond_force; try fold (ppos r); try fold(ppos r2);
+    try rewrite !assign_diff; discriminate_ppos; try rewrite !H0);
+    Simpl sr; unfold arith_eval_comparison_r0r, arith_comparison_r0r_isize;
+    destruct arith_prepare_comparison_r0r; destruct is;
+    try (eapply compare_long_res; eauto);
+    try (eapply compare_int_res; eauto).
+  - (* PArithComparisonP *)
+    simpl; destruct i;
+    unfold arith_eval_comparison_p, arith_prepare_comparison_p; simpl;
+    fold(ppos r1); try rewrite !H0; repeat Simpl_rep sr;
+    Simpl sr;
+    try destruct sz;
+    try (eapply compare_int_res; eauto);
+    try (eapply compare_single_res; eauto);
+    try (eapply compare_long_res; eauto);
+    try (eapply compare_float_res; eauto).
+  - (* Pcset *)
+    simpl; unfold eval_testcond, get_testcond_rlocs, cflags_eval;
+    unfold cond_eval_is; unfold flags_testcond_value, list_exp_eval; destruct c; simpl;
+    repeat Simpl_rep sr; Simpl_exists sr;
+    destruct_res_flag rsr;
+    Simpl sr.
+  - (* Pfmovi *)
+    simpl; destruct r1; simpl; destruct fsz; Simpl sr.
+  - (* Pcsel *)
+    destruct c; simpl; DIRN1 rd; fold (ppos r2); fold (ppos r1); Simpl sr.
+  - (* Pfnmul *)
+    simpl; destruct fsz; Simpl sr.
+Qed.
+
+Lemma sp_xsp:
+  SP = XSP.
+Proof.
+  econstructor.
+Qed.
+
+Lemma load_chunk_neutral: forall chk v r,
+  interp_load (trans_ldp_chunk chk r) v = v.
+Proof.
+  intros; destruct chk; destruct r; simpl; reflexivity.
+Qed.
+
+Theorem bisimu_basic rsr mr sr bi:
+  match_states (State rsr mr) sr ->
+  match_outcome (exec_basic Ge.(_lk) Ge.(_genv) bi rsr mr) (inst_run Ge (trans_basic bi) sr sr).
+Proof.
+(* a little tactic to automate reasoning on preg_eq *)
+Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
+Local Ltac preg_eq_discr r rd :=
+  destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto);
+  rewrite (assign_diff _ (#rd) (#r) _); auto;
+  rewrite Pregmap.gso; auto.
+
+  intros MS; inversion MS as (H & H0).
+  destruct bi; simpl.
+  (* Loadsymbol / Cvtsw2x / Cvtuw2x / Cvtx2w *) 
+  6,7,8,9: Simpl sr.
+  - (* Arith *)
+    exploit trans_arith_correct; eauto.
+  - (* Load *)
+    destruct ld.
+    + unfold exec_load, exec_load_rd_a, eval_addressing_rlocs_ld, exp_eval;
+      destruct ld; destruct a; simpl;
+      try fold (ppos base); try fold (ppos r);
+      erewrite !H0, H; simpl;
+      unfold exec_load1, exec_load2, chunk_load; unfold call_ll_loadv;
+      try destruct (Mem.loadv _ _ _); simpl; auto.
+      all: try (fold (ppos rd); Simpl_exists sr; auto; intros rr; Simpl_update).
+    + 
+      unfold exec_load, exec_load_double, eval_addressing_rlocs_ld, exp_eval;
+      destruct ld; destruct a; simpl; unfold control_eval; destruct Ge; auto;
+      try fold (ppos base);
+      try erewrite !H0, H; simpl;
+      unfold exec_load1, exec_load2; unfold call_ll_loadv;
+      destruct (Mem.loadv _ _ _); simpl; auto;
+      fold (ppos rd1); rewrite assign_diff; discriminate_ppos; rewrite H;
+      try destruct (Mem.loadv _ _ _); simpl; auto; Simpl_exists sr;
+      rewrite !load_chunk_neutral;
+      try (rewrite !assign_diff; discriminate_ppos; reflexivity);
+      try (destruct rd1 as [ir1|fr1]; try destruct ir1; destruct rd2 as [ir2|fr2]; try destruct ir2;
+      destruct base; discriminate_ppos);
+      repeat (try fold (ppos r); try fold (ppos r0);
+      try fold (ppos fr1); try fold (ppos fr2); intros; Simpl_update).
+  - (* Store *)
+    destruct st.
+    + unfold exec_store, exec_store_rs_a, eval_addressing_rlocs_st, exp_eval;
+      destruct st; destruct a; simpl;
+      try fold (ppos base); try fold (ppos rs); try fold (ppos r);
+      erewrite !H0, H; simpl;
+      unfold exec_store1, exec_store2, chunk_store; unfold call_ll_storev;
+      try destruct (Mem.storev _ _ _ _); simpl; auto.
+      all: eexists; split; [| split]; eauto;
+      intros rr; rewrite assign_diff; try rewrite H0; auto; discriminate_ppos.
+    + unfold exec_store, exec_store_double, eval_addressing_rlocs_st, exp_eval;
+      destruct st; destruct a; simpl; unfold control_eval; destruct Ge; auto;
+      try fold (ppos base); try fold (ppos rs1);
+      erewrite !H0, H; simpl;
+      unfold exec_store1, exec_store2; unfold call_ll_storev;
+      try destruct (Mem.storev _ _ _ _); simpl; auto;
+      fold (ppos rs2); rewrite assign_diff; try congruence; try rewrite H0; simpl;
+      try destruct (Mem.storev _ _ _ _); simpl; auto.
+      all: eexists; split; [| split]; eauto;
+      repeat (try intros rr; rewrite assign_diff; try rewrite H0; auto; discriminate_ppos).
+  - (* Alloc *)
+    destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS.
+    + eexists; repeat split. 
+      * rewrite !assign_diff; try discriminate_ppos; Simpl_exists sr;
+        rewrite H; destruct (Mem.alloc _ _ _) eqn:MEMAL2;
+        injection MEMAL; intros Hm Hb; try rewrite Hm, Hb;
+        rewrite sp_xsp in MEMS; rewrite MEMS.
+        rewrite !assign_diff; try discriminate_ppos; Simpl_exists sr; rewrite H;
+        destruct (Mem.alloc _ _ _) eqn:MEMAL3;
+        injection MEMAL2; intros Hm2 Hb2; try rewrite Hm2, Hb2;
+        rewrite Hm, Hb; rewrite MEMS; reflexivity.
+      * eauto.
+      * intros rr. rewrite assign_diff; try apply preg_not_pmem; try rewrite sp_xsp.
+        replace 37 with (#X29) by auto; repeat (Simpl_update; intros).
+    + simpl; repeat Simpl_exists sr. erewrite H. destruct (Mem.alloc _ _ _) eqn:HMEMAL2.
+        injection MEMAL; intros Hm Hb. 
+        try rewrite Hm, Hb; clear Hm Hb.
+        try rewrite sp_xsp in MEMS; rewrite MEMS. reflexivity.
+  - (* Free *)
+    destruct (Mem.loadv _ _ _) eqn:MLOAD; simpl; auto;
+    repeat Simpl_exists sr; rewrite H; simpl.
+    + destruct (rsr SP) eqn:EQSP; simpl; rewrite <- sp_xsp; rewrite EQSP; rewrite MLOAD; try reflexivity.
+      destruct (Mem.free _ _ _) eqn:EQFREE; try reflexivity. rewrite assign_diff; discriminate_ppos.
+      replace_regs_pos sr; sr_val_rwrt. rewrite <- sp_xsp; rewrite EQSP; rewrite MLOAD. rewrite EQFREE.
+      replace 24 with (#X16) by auto; rewrite sp_xsp; Simpl sr.
+      intros rr'; destruct (PregEq.eq XSP rr').
+      * rewrite e; rewrite Pregmap.gss, sr_gss; auto.
+      * rewrite Pregmap.gso, !assign_diff; auto; apply ppos_discr in n; auto.
+    + rewrite <- sp_xsp; rewrite MLOAD; reflexivity.
+  - (* Nop *)
+    Simpl sr.
+Qed.
+
+Theorem bisimu_body:
+  forall bdy rsr mr sr,
+  match_states (State rsr mr) sr ->
+  match_outcome (exec_body Ge.(_lk) Ge.(_genv) bdy rsr mr) (exec Ge (trans_body bdy) sr).
+Proof.
+  induction bdy as [|i bdy]; simpl; eauto. 
+  intros.
+  exploit (bisimu_basic rsr mr sr i); eauto.
+  destruct (exec_basic _ _ _ _ _); simpl.
+  - intros (s' & X1 & X2).
+    rewrite X1; simpl; eauto. eapply IHbdy; eauto; simpl.
+    unfold match_states in *. destruct s. unfold Asm._m. eauto.
+  - intros X; rewrite X; simpl; auto.
+Qed.
+
+Theorem bisimu_control ex sz rsr mr sr:
+  match_states (State rsr mr) sr ->
+  match_outcome (exec_cfi Ge.(_genv) Ge.(_fn) ex (incrPC (Ptrofs.repr sz) rsr) mr) (inst_run Ge (trans_pcincr sz (trans_exit (Some (PCtlFlow ex)))) sr sr).
+Proof.
+  intros MS.
+  simpl in *. inv MS.
+  destruct ex.
+  (* Obr / Oret *)
+  6,7: unfold control_eval, incrPC; simpl; destruct Ge;
+       replace_pc; rewrite (H0 PC);
+       repeat Simpl_rep sr; Simpl_exists sr;
+       intros rr; destruct (preg_eq rr PC); [
+       rewrite e; rewrite sr_gss; rewrite Pregmap.gss;
+       try rewrite Pregmap.gso; discriminate_ppos; fold (ppos r); auto |
+       repeat Simpl_rep sr; try rewrite !Pregmap.gso; auto; apply ppos_discr in n; auto ].
+  (* Ocbnz / Ocbz *)
+  6,7,8,9: unfold control_eval; destruct Ge; simpl;
+           replace_pc; rewrite (H0 PC);
+           unfold eval_branch, eval_neg_branch, eval_testzero, eval_testbit,
+                  incrPC, goto_label_deps, goto_label;
+           destruct (PregEq.eq r PC);
+           [ rewrite e; destruct sz0; simpl; Simpl sr |
+             destruct sz0; simpl; replace_pc; rewrite Pregmap.gso; auto; repeat Simpl_rep sr; try rewrite H0;
+             try (destruct (Val.mxcmpu_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b);
+             try (destruct (Val.mxcmplu_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b);
+             try (destruct (Val.cmp_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b);
+             try (destruct (Val.cmpl_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b);
+             try (simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr);
+             destruct (label_pos _ _ _); try reflexivity; rewrite Pregmap.gss;
+             destruct Val.offset_ptr; try reflexivity; rewrite sr_update_overwrite; 
+             simpl; Simpl sr; (destruct (PregEq.eq rr PC); subst;
+                  [ rewrite sr_gss, Pregmap.gss; reflexivity |
+                  rewrite !assign_diff, !Pregmap.gso; replace_pc; auto; apply ppos_discr; auto]) ].
+  - (* Ob *)
+    replace_pc. rewrite (H0 PC). simpl.
+    unfold goto_label, control_eval. destruct Ge.
+    unfold goto_label_deps. destruct (label_pos _ _ _); auto.
+    + unfold incrPC. rewrite Pregmap.gss; eauto. destruct (Val.offset_ptr _ _); auto;
+      try (rewrite sr_gss; unfold Stuck; reflexivity).
+      simpl. eexists; split; split.
+      * rewrite sr_update_overwrite. unfold pmem, assign in *. simpl. rewrite H; reflexivity.
+      * intros. rewrite sr_update_overwrite. unfold Pregmap.set, assign.
+        destruct r as [dr|cr|]; try destruct dr as [ir|fr]; try destruct ir as [irr|];
+        try destruct irr; try destruct fr; try destruct cr; simpl; try rewrite <- H0; eauto.
+    + rewrite sr_gss; reflexivity.
+  - (* Obc *)
+    replace_pc. rewrite (H0 PC). simpl.
+    unfold eval_branch, goto_label, control_eval. destruct Ge.
+    unfold goto_label_deps, cflags_eval, eval_testcond, list_exp_eval.
+    destruct c; simpl; unfold incrPC;
+    repeat (replace_ppos; replace_pc; replace_regs_pos sr; sr_val_rwrt; try rewrite !assign_diff; discriminate_ppos).
+    1,2,3,4,5,6: destruct_res_flag rsr.
+    7,8,9,10: do 2 (destruct_res_flag rsr).
+    11,12 : do 3 (destruct_res_flag rsr).
+    1,2,3,4,5,6,9,10: destruct (Int.eq _ _); [| simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr ];
+                      destruct (label_pos _ _ _); [| reflexivity]; replace_pc; 
+                      rewrite !Pregmap.gss; destruct Val.offset_ptr;
+                      try (unfold Stuck; reflexivity); Simpl_exists sr; intros rr;
+                      apply reg_update_overwrite; eauto.
+    1,3: destruct (andb); [| simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr ];
+         destruct (label_pos _ _ _); [| reflexivity]; replace_pc; rewrite !Pregmap.gss;
+         destruct Val.offset_ptr; try (unfold Stuck; reflexivity); Simpl_exists sr;
+         intros rr; apply reg_update_overwrite; eauto.
+    1,2: destruct (orb); [| simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr ];
+         destruct (label_pos _ _ _); [| reflexivity]; replace_pc; rewrite !Pregmap.gss;
+         destruct Val.offset_ptr; try (unfold Stuck; reflexivity); Simpl_exists sr;
+         intros rr; apply reg_update_overwrite; eauto.
+  - (* Obl *)
+    replace_pc. rewrite (H0 PC). simpl.
+    unfold control_eval. destruct Ge.
+    rewrite sr_gss. Simpl sr.
+    replace_pc; try replace (38) with (#X30) by eauto; unfold incrPC. Simpl_update.
+    repeat (intros; Simpl_update).
+  - (* Obs *)
+    unfold control_eval. destruct Ge. replace_pc. rewrite (H0 PC). simpl; unfold incrPC.
+    replace_pc; Simpl_exists sr; intros rr; apply reg_update_overwrite; eauto.
+  - (* Oblr *)
+    replace_pc. rewrite (H0 PC).
+    unfold control_eval. destruct Ge. simpl. unfold incrPC.
+    try (eexists; split; [  | split ]); eauto.
+    intros rr; destruct (PregEq.eq PC rr).
+    + replace_pc; rewrite e; rewrite !Pregmap.gss, !sr_gss;
+      rewrite Pregmap.gso; fold (ppos r); try rewrite H0; auto.
+      rewrite <- e; destruct r; discriminate.
+    + replace_pc; rewrite Pregmap.gso, assign_diff; auto; apply ppos_discr in n; auto;
+      rewrite Pregmap.gss, sr_gss. destruct (PregEq.eq X30 rr); replace 38 with (#X30) by auto.
+      * rewrite e; rewrite Pregmap.gss, sr_gss; auto.
+      * replace_pc; rewrite !Pregmap.gso, !assign_diff; auto; apply ppos_discr in n0; auto;
+        apply ppos_discr; eauto.
+  - (* Obtbl *)
+    replace_pc; rewrite (H0 PC);
+    unfold control_eval; destruct Ge; simpl; unfold incrPC.
+    destruct (PregEq.eq X16 r1).
+    + fold (ppos r1); rewrite <- e; rewrite Pregmap.gss, sr_gss; reflexivity.
+    + rewrite !Pregmap.gso; auto; rewrite ppos_discr in n;
+      fold (ppos r1); replace 24 with (#X16) by auto; try rewrite !assign_diff; auto;
+      discriminate_ppos; rewrite H0; destruct (rsr r1) eqn:EQR;
+      try (try rewrite EQR; reflexivity).
+      try destruct (list_nth_z _ _); try reflexivity;
+      unfold goto_label, goto_label_deps; destruct (label_pos _ _ _);
+      try rewrite 2Pregmap.gso, Pregmap.gss; destruct (Val.offset_ptr (rsr PC) (Ptrofs.repr sz));
+      try reflexivity; discriminate_ppos. Simpl sr.
+      destruct (PregEq.eq X16 rr); [ subst; Simpl_update |];
+      destruct (PregEq.eq PC rr); [ subst; Simpl_update |].
+      rewrite !Pregmap.gso; auto;
+      apply ppos_discr in n0; apply ppos_discr in n1;
+      rewrite !assign_diff; auto.
+Qed.
+
+Theorem bisimu_exit ex sz rsr mr sr:
+  match_states (State rsr mr) sr ->
+  (*is_builtin ex = false ->*)
+  match_outcome (estep Ge.(_genv) Ge.(_fn) ex (Ptrofs.repr sz) rsr mr) (inst_run Ge (trans_pcincr sz (trans_exit ex)) sr sr).
+Proof.
+  intros MS; unfold estep.
+  destruct ex.
+  - destruct c.
+    + exploit (bisimu_control i sz rsr mr sr); eauto.
+    + simpl. inv MS. eexists; split; [| split].
+      unfold control_eval; destruct Ge.
+      replace_pc; rewrite (H0 PC); eauto.
+      rewrite assign_diff; auto.
+      intros rr. unfold incrPC. destruct (PregEq.eq rr PC); subst.
+      * rewrite sr_gss, Pregmap.gss. reflexivity.
+      * rewrite assign_diff, Pregmap.gso; try rewrite H0;
+        auto; try rewrite ppos_discr in n; auto.
+  - simpl. inv MS. eexists; split; [| split].
+    + unfold control_eval; destruct Ge.
+      replace_pc; rewrite (H0 PC); eauto.
+    + rewrite assign_diff; auto.
+    + intros rr. unfold incrPC. destruct (PregEq.eq rr PC); subst.
+      * rewrite sr_gss, Pregmap.gss. reflexivity.
+      * rewrite assign_diff, Pregmap.gso; try rewrite H0; auto; try rewrite ppos_discr in n; auto.
+Qed.
+
+(* Definition trans_block_aux bdy sz ex := (trans_body bdy) ++ (trans_pcincr sz (trans_exit ex) :: nil). *)
+
+Theorem bisimu rsr mr sr bb:
+  match_states (State rsr mr) sr ->
+  match_outcome (bbstep Ge.(_lk) Ge.(_genv) Ge.(_fn) bb rsr mr) (exec Ge (trans_block bb) sr).
+Proof.
+  intros MS. unfold bbstep, trans_block.
+  exploit (bisimu_body (body bb) rsr mr sr); eauto.
+  destruct (exec_body _ _ _ _ _); simpl.
+  - unfold match_states in *. intros (s' & X1 & X2). destruct s.
+    erewrite run_app_Some; eauto.
+    exploit (bisimu_exit (exit bb) (size bb) _rs _m s'); eauto.
+    destruct Ge; simpl. destruct MS as (Y1 & Y2). destruct X2 as (X2 & X3).
+    replace_pc. erewrite !X3; simpl.
+    destruct (inst_run _ _ _ _); simpl; auto.
+  - intros X; erewrite run_app_None; eauto.
+Qed.
+
+Theorem trans_state_match: forall S, match_states S (trans_state S).
+Proof.
+  intros. destruct S as (rs & m). simpl.
+  split. reflexivity.
+  intro. destruct r as [dr|cr|]; try destruct dr as [ir|fr]; try destruct cr;
+  try destruct ir as [irr|]; try destruct irr; try destruct fr; try reflexivity.
+Qed.
+
+Lemma state_eq_decomp:
+  forall rs1 m1 rs2 m2, rs1 = rs2 -> m1 = m2 -> State rs1 m1 = State rs2 m2.
+Proof.
+  intros. congruence.
+Qed.
+
+Theorem state_equiv S1 S2 S': match_states S1 S' -> match_states S2 S' -> S1 = S2.
+Proof.
+  unfold match_states; intros H0 H1. destruct S1 as (rs1 & m1). destruct S2 as (rs2 & m2). inv H0. inv H1.
+  apply state_eq_decomp.
+  - apply functional_extensionality. intros. assert (Val (rs1 x) = Val (rs2 x)) by congruence. congruence.
+  - congruence.
+Qed.
+
+End SECT_SEQ.
+
+Section SECT_BBLOCK_EQUIV.
+
+Variable Ge: genv.
+
+Local Hint Resolve trans_state_match: core.
+
+Lemma bblock_simu_reduce_aux:
+  forall p1 p2,
+  L.bblock_simu Ge (trans_block p1) (trans_block p2) ->
+  Asmblockprops.bblock_simu_aux Ge.(_lk) Ge.(_genv) Ge.(_fn) p1 p2.
+Proof.
+  intros p1 p2 H0 rs m EBB.
+  generalize (H0 (trans_state (State rs m))); clear H0.
+  intro H0.
+  exploit (bisimu Ge rs m (trans_state (State rs m)) p1); eauto.
+  exploit (bisimu Ge rs m (trans_state (State rs m)) p2); eauto.
+  destruct (bbstep Ge.(_lk) Ge.(_genv) Ge.(_fn) p1 rs m); try (unfold Stuck in EBB; congruence).
+  intros H1 (s2' & exp2 & MS'). unfold exec in exp2, H1. rewrite exp2 in H0.
+  destruct H0 as (m2' & H0 & H2). discriminate. rewrite H0 in H1.
+  destruct (bbstep Ge.(_lk) Ge.(_genv) Ge.(_fn) p2 rs m); simpl in H1.
+  * unfold match_states in H1, MS'. destruct s, s0.
+    destruct H1 as (s' & H1 & H3 & H4). inv H1. inv MS'.
+    replace (_rs0) with (_rs).
+    - replace (_m0) with (_m); auto. congruence.
+    - apply functional_extensionality. intros r.
+      generalize (H1 r). intros Hr. congruence.
+  * discriminate.
+Qed.
+
+Lemma incrPC_set_res_commut res: forall d vres rs,  
+  incrPC d (set_res (map_builtin_res DR res) vres rs) =
+  set_res (map_builtin_res DR res) vres (incrPC d rs).
+Proof.
+  induction res; simpl; auto.
+  - intros; apply functional_extensionality.
+    unfold incrPC; intros x0.
+    destruct (PregEq.eq x0 PC).
+    + subst; rewrite! Pregmap.gss; auto.
+    + rewrite Pregmap.gso; auto.
+      destruct (PregEq.eq x x0).
+      * subst; rewrite! Pregmap.gss; auto.
+      * rewrite !Pregmap.gso; auto.
+  - intros; rewrite IHres2. f_equal. auto.
+Qed.
+
+Lemma incrPC_undef_regs_commut l : forall d rs,
+  incrPC d (undef_regs l rs) = undef_regs l (incrPC d rs).
+Proof.
+  induction l; simpl; auto.
+  intros. rewrite IHl. unfold incrPC.
+  destruct (PregEq.eq a PC).
+  - rewrite e. rewrite Pregmap.gss.
+    simpl. apply f_equal. unfold Pregmap.set.
+    apply functional_extensionality. intros x.
+    destruct (PregEq.eq x PC); auto.
+  - rewrite Pregmap.gso; auto.
+    apply f_equal. unfold Pregmap.set.
+    apply functional_extensionality. intros x.
+    destruct (PregEq.eq x PC).
+    + subst. destruct a; simpl; auto. congruence.
+    + auto.
+Qed.
+
+Lemma bblock_simu_reduce:
+  forall p1 p2,
+  L.bblock_simu Ge (trans_block p1) (trans_block p2) ->
+  (has_builtin p1 = true \/ has_builtin p2 = true -> exit p1 = exit p2) ->
+  Asmblockprops.bblock_simu Ge.(_lk) Ge.(_genv) Ge.(_fn) p1 p2.
+Proof.
+  unfold bblock_simu. intros p1 p2 H0 BLT rs m EBB.
+  unfold exec_bblock.
+  generalize (bblock_simu_reduce_aux p1 p2 H0).
+  unfold bblock_simu_aux. clear H0.
+  unfold exec_bblock, bbstep. intros H0 m' t0 (rs1 & m1 & H1 & H2).
+  assert ((has_builtin p1 = false /\ has_builtin p2 = false) \/ (has_builtin p1 = true \/ has_builtin p2 = true)). { repeat destruct (has_builtin _); simpl; intuition. }
+  destruct H as [[X1 X2]|H].
+  - (* Not a builtin *)
+    exploit (H0); eauto; erewrite H1; simpl; (*gen*)
+    unfold estep; unfold has_builtin in *.
+    { destruct (exit p1) as [[]|] eqn: EQEX1; try discriminate; simpl in *.
+    inversion H2; subst. rewrite H3; discriminate. }
+    destruct (exit p1) as [[]|] eqn: EQEX1; try discriminate; simpl in *.
+    { inversion H2; subst. rewrite H3.
+    destruct (exec_body _ _ (body p2) _ _) as [[rs2 m2]|]; try discriminate.
+    intros H4. eexists; eexists; split; try reflexivity.
+    destruct (exit p2) as [[]|] eqn:EQEX2; simpl in *; try discriminate; try econstructor; eauto.
+    inversion H4. econstructor. }
+    { inversion H2; subst.
+      destruct (exec_body _ _ (body p2) _ _) as [[rs2 m2]|]; try discriminate.
+intros H4. eexists; eexists; split; try reflexivity.
+    destruct (exit p2) as [[]|] eqn:EQEX2; simpl in *; try discriminate; try econstructor; eauto.
+    inversion H4. rewrite H3. econstructor. }
+  - (* Builtin *)
+    exploit (BLT); eauto.
+    intros EXIT.
+    unfold has_builtin in H.
+    assert (is_builtin (exit p1) = true). { rewrite <- EXIT in H; intuition. }
+    clear H.
+    generalize (H0 rs m); eauto; erewrite H1; simpl;
+    unfold estep. destruct (exit p1) as [[]|] eqn:EQEX1; try discriminate.
+    rewrite <- EXIT.
+    intros CONTRA.
+    exploit (CONTRA); try discriminate.
+    destruct (exec_body _ _ (body p2) _ _) as [[rs2 m2]|]; try discriminate.
+    intros H4. eexists; eexists; split; try reflexivity.
+    inversion H2; subst. inversion H4; subst.
+    econstructor; eauto.
+    + unfold incrPC in H5.
+      replace (rs2 SP) with (rs1 SP).
+      replace (fun r : dreg => rs2 r) with (fun r : dreg => rs1 r); auto.
+      * apply functional_extensionality.
+        intros r; destruct (PregEq.eq r PC); try discriminate.
+        replace (rs1 r) with (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size p1))) r) by auto.
+        rewrite H5; rewrite Pregmap.gso; auto.
+      * replace (rs1 SP) with (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size p1))) SP) by auto.
+        rewrite H5; rewrite Pregmap.gso; auto; try discriminate.
+    + rewrite !incrPC_set_res_commut.
+      rewrite !incrPC_undef_regs_commut.
+      rewrite H5.
+      reflexivity.
+Qed.
+
+(** Used for debug traces *)
+
+Definition ireg_name (ir: ireg) : pstring :=
+  match ir with
+  | X0 => Str ("X0") | X1 => Str ("X1") | X2 => Str ("X2") | X3 => Str ("X3") | X4 => Str ("X4") | X5 => Str ("X5") | X6 => Str ("X6") | X7 => Str ("X7")
+  | X8 => Str ("X8") | X9 => Str ("X9") | X10 => Str ("X10") | X11 => Str ("X11") | X12 => Str ("X12") | X13 => Str ("X13") | X14 => Str ("X14") | X15 => Str ("X15")
+  | X16 => Str ("X16") | X17 => Str ("X17") | X18 => Str ("X18") | X19 => Str ("X19") | X20 => Str ("X20") | X21 => Str ("X21") | X22 => Str ("X22") | X23 => Str ("X23")
+  | X24 => Str ("X24") | X25 => Str ("X25") | X26 => Str ("X26") | X27 => Str ("X27") | X28 => Str ("X28") | X29 => Str ("X29") | X30 => Str ("X30")
+  end
+.
+
+Definition freg_name (fr: freg) : pstring :=
+  match fr with
+  | D0 => Str ("D0") | D1 => Str ("D1") | D2 => Str ("D2") | D3 => Str ("D3") | D4 => Str ("D4") | D5 => Str ("D5") | D6 => Str ("D6") | D7 => Str ("D7")
+  | D8 => Str ("D8") | D9 => Str ("D9") | D10 => Str ("D10") | D11 => Str ("D11") | D12 => Str ("D12") | D13 => Str ("D13") | D14 => Str ("D14") | D15 => Str ("D15")
+  | D16 => Str ("D16") | D17 => Str ("D17") | D18 => Str ("D18") | D19 => Str ("D19") | D20 => Str ("D20") | D21 => Str ("D21") | D22 => Str ("D22") | D23 => Str ("D23")
+  | D24 => Str ("D24") | D25 => Str ("D25") | D26 => Str ("D26") | D27 => Str ("D27") | D28 => Str ("D28") | D29 => Str ("D29") | D30 => Str ("D30") | D31 => Str ("D31")
+  end
+.
+
+Definition iregsp_name (irsp: iregsp) : pstring :=
+  match irsp with
+  | RR1 ir => ireg_name ir
+  | XSP => Str ("XSP")
+  end.
+
+Definition dreg_name (dr: dreg) : ?? pstring :=
+  match dr with
+  | IR ir => match ir with
+             | XSP => RET (Str ("XSP"))
+             | RR1 irr => RET (ireg_name irr)
+             end
+  | FR fr => RET (freg_name fr)
+  end.
+
+Definition string_of_name (x: P.R.t): ?? pstring := 
+  if (Pos.eqb x pmem) then 
+    RET (Str "MEM")
+  else
+    match inv_ppos x with
+    | Some (CR cr) => match cr with
+                      | CN => RET (Str ("CN"))
+                      | CZ => RET (Str ("CZ"))
+                      | CC => RET (Str ("CC"))
+                      | CV => RET (Str ("CV"))
+                      end
+    | Some (PC) => RET (Str ("PC"))
+    | Some (DR dr) => dreg_name dr
+    | _ => RET (Str ("UNDEFINED"))
+    end.
+
+Definition string_of_name_ArithP (n: arith_p) : pstring :=
+  match n with
+  | Padrp _ _ => "Padrp"
+  | Pmovz _ _ _ => "Pmov"
+  | Pmovn _ _ _ => "Pmov"
+  | Pfmovimms _ => "Pfmovimm"
+  | Pfmovimmd _ => "Pfmovimm"
+  end.
+
+Definition string_of_name_ArithPP (n: arith_pp) : pstring :=
+  match n with
+  | Pmov => "Pmov"
+  | Pmovk _ _ _ => "Pmovk"
+  | Paddadr _ _ => "Paddadr"
+  | Psbfiz _ _ _ => "Psbfiz"
+  | Psbfx _ _ _ => "Psbfx"
+  | Pubfiz _ _ _ => "Pubfiz"
+  | Pubfx _ _ _ => "Pubfx"
+  | Pfmov => "Pfmov"
+  | Pfcvtds => "Pfcvtds"
+  | Pfcvtsd => "Pfcvtsd"
+  | Pfabs _ => "Pfabs"
+  | Pfneg _ => "Pfneg"
+  | Pscvtf _ _ => "Pscvtf"
+  | Pucvtf _ _ => "Pucvtf"
+  | Pfcvtzs _ _ => "Pfcvtzs"
+  | Pfcvtzu _ _ => "Pfcvtzu"
+  | Paddimm _ _ => "Paddimm"
+  | Psubimm _ _ => "Psubimm"
+  end.
+
+Definition string_of_name_ArithPPP (n: arith_ppp) : pstring :=
+  match n with
+  | Pasrv _ => "Pasrv"
+  | Plslv _ => "Plslv"
+  | Plsrv _ => "Plsrv"
+  | Prorv _ => "Prorv"
+  | Psmulh => "Psmulh"
+  | Pumulh => "Pumulh"
+  | Psdiv _ => "Psdiv"
+  | Pudiv _ => "Pudiv"
+  | Paddext _ => "Paddext"
+  | Psubext _ => "Psubext"
+  | Pfadd _ => "Pfadd"
+  | Pfdiv _ => "Pfdiv"
+  | Pfmul _ => "Pfmul"
+  | Pfsub _ => "Pfsub"
+  end.
+
+Definition string_of_name_ArithRR0R (n: arith_rr0r) : pstring :=
+  match n with
+  | Padd _ _ => "ArithRR0R=>Padd"
+  | Psub _ _ => "ArithRR0R=>Psub"
+  | Pand _ _ => "ArithRR0R=>Pand"
+  | Pbic _ _ => "ArithRR0R=>Pbic"
+  | Peon _ _ => "ArithRR0R=>Peon"
+  | Peor _ _ => "ArithRR0R=>Peor"
+  | Porr _ _ => "ArithRR0R=>Porr"
+  | Porn _ _ => "ArithRR0R=>Porn"
+  end.
+
+Definition string_of_name_ArithRR0R_XZR (n: arith_rr0r) : pstring :=
+  match n with
+  | Padd _ _ => "ArithRR0R_XZR=>Padd"
+  | Psub _ _ => "ArithRR0R_XZR=>Psub"
+  | Pand _ _ => "ArithRR0R_XZR=>Pand"
+  | Pbic _ _ => "ArithRR0R_XZR=>Pbic"
+  | Peon _ _ => "ArithRR0R_XZR=>Peon"
+  | Peor _ _ => "ArithRR0R_XZR=>Peor"
+  | Porr _ _ => "ArithRR0R_XZR=>Porr"
+  | Porn _ _ => "ArithRR0R_XZR=>Porn"
+  end.
+
+Definition string_of_name_ArithRR0 (n: arith_rr0) : pstring :=
+  match n with
+  | Pandimm _ _ => "ArithRR0=>Pandimm"
+  | Peorimm _ _ => "ArithRR0=>Peorimm"
+  | Porrimm _ _ => "ArithRR0=>Porrimm"
+  end.
+
+Definition string_of_name_ArithRR0_XZR (n: arith_rr0) : pstring :=
+match n with
+  | Pandimm _ _ => "ArithRR0_XZR=>Pandimm"
+  | Peorimm _ _ => "ArithRR0_XZR=>Peorimm"
+  | Porrimm _ _ => "ArithRR0_XZR=>Porrimm"
+  end.
+
+Definition string_of_name_ArithARRRR0 (n: arith_arrrr0) : pstring :=
+  match n with
+  | Pmadd _ => "ArithARRRR0=>Pmadd"
+  | Pmsub _ => "ArithARRRR0=>Pmsub"
+  end.
+
+Definition string_of_name_ArithARRRR0_XZR (n: arith_arrrr0) : pstring :=
+  match n with
+  | Pmadd _ => "ArithARRRR0_XZR=>Pmadd"
+  | Pmsub _ => "ArithARRRR0_XZR=>Pmsub"
+  end.
+
+Definition string_of_name_ArithComparisonPP_CN (n: arith_comparison_pp) : pstring :=
+  match n with
+  | Pcmpext _ => "ArithComparisonPP_CN=>Pcmpext"
+  | Pcmnext _ => "ArithComparisonPP_CN=>Pcmnext"
+  | Pfcmp _ => "ArithComparisonPP_CN=>Pfcmp"
+  end.
+
+Definition string_of_name_ArithComparisonPP_CZ (n: arith_comparison_pp) : pstring :=
+  match n with
+  | Pcmpext _ => "ArithComparisonPP_CZ=>Pcmpext"
+  | Pcmnext _ => "ArithComparisonPP_CZ=>Pcmnext"
+  | Pfcmp _ => "ArithComparisonPP_CZ=>Pfcmp"
+  end.
+
+Definition string_of_name_ArithComparisonPP_CC (n: arith_comparison_pp) : pstring :=
+match n with
+  | Pcmpext _ => "ArithComparisonPP_CC=>Pcmpext"
+  | Pcmnext _ => "ArithComparisonPP_CC=>Pcmnext"
+  | Pfcmp _ => "ArithComparisonPP_CC=>Pfcmp"
+  end.
+
+Definition string_of_name_ArithComparisonPP_CV (n: arith_comparison_pp) : pstring :=
+match n with
+  | Pcmpext _ => "ArithComparisonPP_CV=>Pcmpext"
+  | Pcmnext _ => "ArithComparisonPP_CV=>Pcmnext"
+  | Pfcmp _ => "ArithComparisonPP_CV=>Pfcmp"
+  end.
+
+Definition string_of_name_ArithComparisonR0R_CN (n: arith_comparison_r0r) : pstring :=
+  match n with
+  | Pcmp _ _ => "ArithComparisonR0R_CN=>Pcmp"
+  | Pcmn _ _ => "ArithComparisonR0R_CN=>Pcmn"
+  | Ptst _ _ => "ArithComparisonR0R_CN=>Ptst"
+  end.
+
+Definition string_of_name_ArithComparisonR0R_CZ (n: arith_comparison_r0r) : pstring :=
+  match n with
+  | Pcmp _ _ => "ArithComparisonR0R_CZ=>Pcmp"
+  | Pcmn _ _ => "ArithComparisonR0R_CZ=>Pcmn"
+  | Ptst _ _ => "ArithComparisonR0R_CZ=>Ptst"
+  end.
+
+Definition string_of_name_ArithComparisonR0R_CC (n: arith_comparison_r0r) : pstring :=
+  match n with
+  | Pcmp _ _ => "ArithComparisonR0R_CC=>Pcmp"
+  | Pcmn _ _ => "ArithComparisonR0R_CC=>Pcmn"
+  | Ptst _ _ => "ArithComparisonR0R_CC=>Ptst"
+  end.
+
+Definition string_of_name_ArithComparisonR0R_CV (n: arith_comparison_r0r) : pstring :=
+  match n with
+  | Pcmp _ _ => "ArithComparisonR0R_CV=>Pcmp"
+  | Pcmn _ _ => "ArithComparisonR0R_CV=>Pcmn"
+  | Ptst _ _ => "ArithComparisonR0R_CV=>Ptst"
+  end.
+
+Definition string_of_name_ArithComparisonR0R_CN_XZR (n: arith_comparison_r0r) : pstring :=
+  match n with
+  | Pcmp _ _ => "ArithComparisonR0R_CN_XZR=>Pcmp"
+  | Pcmn _ _ => "ArithComparisonR0R_CN_XZR=>Pcmn"
+  | Ptst _ _ => "ArithComparisonR0R_CN_XZR=>Ptst"
+  end.
+
+Definition string_of_name_ArithComparisonR0R_CZ_XZR (n: arith_comparison_r0r) : pstring :=
+  match n with
+  | Pcmp _ _ => "ArithComparisonR0R_CZ_XZR=>Pcmp"
+  | Pcmn _ _ => "ArithComparisonR0R_CZ_XZR=>Pcmn"
+  | Ptst _ _ => "ArithComparisonR0R_CZ_XZR=>Ptst"
+  end.
+
+Definition string_of_name_ArithComparisonR0R_CC_XZR (n: arith_comparison_r0r) : pstring :=
+match n with
+  | Pcmp _ _ => "ArithComparisonR0R_CC_XZR=>Pcmp"
+  | Pcmn _ _ => "ArithComparisonR0R_CC_XZR=>Pcmn"
+  | Ptst _ _ => "ArithComparisonR0R_CC_XZR=>Ptst"
+  end.
+
+Definition string_of_name_ArithComparisonR0R_CV_XZR (n: arith_comparison_r0r) : pstring :=
+match n with
+  | Pcmp _ _ => "ArithComparisonR0R_CV_XZR=>Pcmp"
+  | Pcmn _ _ => "ArithComparisonR0R_CV_XZR=>Pcmn"
+  | Ptst _ _ => "ArithComparisonR0R_CV_XZR=>Ptst"
+  end.
+
+Definition string_of_name_ArithComparisonP_CN (n: arith_comparison_p) : pstring :=
+  match n with
+  | Pfcmp0 _ => "ArithComparisonP_CN=>Pfcmp0"
+  | Pcmpimm _ _ => "ArithComparisonP_CN=>Pcmpimm"
+  | Pcmnimm _ _ => "ArithComparisonP_CN=>Pcmnimm"
+  | Ptstimm _ _ => "ArithComparisonP_CN=>Ptstimm"
+  end.
+
+Definition string_of_name_ArithComparisonP_CZ (n: arith_comparison_p) : pstring :=
+  match n with
+  | Pfcmp0 _ => "ArithComparisonP_CZ=>Pfcmp0"
+  | Pcmpimm _ _ => "ArithComparisonP_CZ=>Pcmpimm"
+  | Pcmnimm _ _ => "ArithComparisonP_CZ=>Pcmnimm"
+  | Ptstimm _ _ => "ArithComparisonP_CZ=>Ptstimm"
+  end.
+
+Definition string_of_name_ArithComparisonP_CC (n: arith_comparison_p) : pstring :=
+  match n with
+  | Pfcmp0 _ => "ArithComparisonP_CC=>Pfcmp0"
+  | Pcmpimm _ _ => "ArithComparisonP_CC=>Pcmpimm"
+  | Pcmnimm _ _ => "ArithComparisonP_CC=>Pcmnimm"
+  | Ptstimm _ _ => "ArithComparisonP_CC=>Ptstimm"
+  end.
+
+Definition string_of_name_ArithComparisonP_CV (n: arith_comparison_p) : pstring :=
+  match n with
+  | Pfcmp0 _ => "ArithComparisonP_CV=>Pfcmp0"
+  | Pcmpimm _ _ => "ArithComparisonP_CV=>Pcmpimm"
+  | Pcmnimm _ _ => "ArithComparisonP_CV=>Pcmnimm"
+  | Ptstimm _ _ => "ArithComparisonP_CV=>Ptstimm"
+  end.
+
+Definition string_of_name_cset (c: testcond) : pstring :=
+  match c with
+  | TCeq => "Cset=>TCeq"
+  | TCne => "Cset=>TCne"
+  | TChs => "Cset=>TChs"
+  | TClo => "Cset=>TClo"
+  | TCmi => "Cset=>TCmi"
+  | TCpl => "Cset=>TCpl"
+  | TChi => "Cset=>TChi"
+  | TCls => "Cset=>TCls"
+  | TCge => "Cset=>TCge"
+  | TClt => "Cset=>TClt"
+  | TCgt => "Cset=>TCgt"
+  | TCle => "Cset=>TCle"
+  end.
+
+Definition string_of_arith (op: arith_op): pstring :=
+  match op with
+  | OArithP n => string_of_name_ArithP n
+  | OArithPP n => string_of_name_ArithPP n
+  | OArithPPP n => string_of_name_ArithPPP n
+  | OArithRR0R n => string_of_name_ArithRR0R n
+  | OArithRR0R_XZR n _ => string_of_name_ArithRR0R_XZR n
+  | OArithRR0 n => string_of_name_ArithRR0 n
+  | OArithRR0_XZR n _ => string_of_name_ArithRR0_XZR n
+  | OArithARRRR0 n => string_of_name_ArithARRRR0 n
+  | OArithARRRR0_XZR n _ => string_of_name_ArithARRRR0_XZR n
+  | OArithComparisonPP_CN n => string_of_name_ArithComparisonPP_CN n
+  | OArithComparisonPP_CZ n => string_of_name_ArithComparisonPP_CZ n
+  | OArithComparisonPP_CC n => string_of_name_ArithComparisonPP_CC n
+  | OArithComparisonPP_CV n => string_of_name_ArithComparisonPP_CV n
+  | OArithComparisonR0R_CN n _ => string_of_name_ArithComparisonR0R_CN n
+  | OArithComparisonR0R_CZ n _ => string_of_name_ArithComparisonR0R_CZ n
+  | OArithComparisonR0R_CC n _ => string_of_name_ArithComparisonR0R_CC n
+  | OArithComparisonR0R_CV n _ => string_of_name_ArithComparisonR0R_CV n
+  | OArithComparisonR0R_CN_XZR n _ _ => string_of_name_ArithComparisonR0R_CN_XZR n
+  | OArithComparisonR0R_CZ_XZR n _ _ => string_of_name_ArithComparisonR0R_CZ_XZR n
+  | OArithComparisonR0R_CC_XZR n _ _ => string_of_name_ArithComparisonR0R_CC_XZR n
+  | OArithComparisonR0R_CV_XZR n _ _ => string_of_name_ArithComparisonR0R_CV_XZR n
+  | OArithComparisonP_CN n => string_of_name_ArithComparisonP_CN n
+  | OArithComparisonP_CZ n => string_of_name_ArithComparisonP_CZ n
+  | OArithComparisonP_CC n => string_of_name_ArithComparisonP_CC n
+  | OArithComparisonP_CV n => string_of_name_ArithComparisonP_CV n
+  | Ocset c => string_of_name_cset c
+  | Ofmovi _ => "Ofmovi"
+  | Ofmovi_XZR _ => "Ofmovi_XZR"
+  | Ocsel _ => "Ocsel"
+  | Ofnmul _ => "Ofnmul"
+  end.
+
+Definition string_of_ofs (ofs: ptrofs) : ?? pstring :=
+  (string_of_Z (Ptrofs.signed ofs)).
+
+Definition string_of_int (n: int) : ?? pstring :=
+  (string_of_Z (Int.signed n)).
+
+Definition string_of_int64 (n: int64) : ?? pstring :=
+  (string_of_Z (Int64.signed n)).
+
+Notation "x +; y" := (Concat x y).
+
+Definition string_of_addressing (a: addressing) : ?? pstring :=
+  match a with
+  | ADimm base n =>
+      DO n' <~ string_of_int64 n;;
+      RET ((Str "[ADimm ") +; (iregsp_name base) +; " " +; n' +; "]")
+  | ADreg base r =>
+      RET ((Str "[ADreg ") +; (iregsp_name base) +; " " +; (ireg_name r) +; "]")
+  | ADlsl base r n =>
+      DO n' <~ string_of_int n;;
+      RET ((Str "[ADlsl ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]")
+  | ADsxt base r n =>
+      DO n' <~ string_of_int n;;
+      RET ((Str "[ADsxt ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]")
+  | ADuxt base r n =>
+      DO n' <~ string_of_int n;;
+      RET ((Str "[ADuxt ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]")
+  | ADadr base id ofs =>
+      DO id' <~ string_of_Z (Zpos id);;
+      DO ofs' <~ string_of_ofs ofs;;
+      RET ((Str "[ADadr ") +; (iregsp_name base) +; " " +; id' +; " " +; ofs' +; "]")
+  | ADpostincr base n =>
+      DO n' <~ string_of_int64 n;;
+      RET ((Str "[ADpostincr ") +; (iregsp_name base) +; " " +; n' +; "]")
+  end.
+
+Definition string_of_ld_rd_a (ld: load_rd_a) : pstring :=
+  match ld with
+  | Pldrw => Str "Pldrw"
+  | Pldrw_a => Str "Pldrw_a"
+  | Pldrx => Str "Pldrx"
+  | Pldrx_a => Str "Pldrx_a"
+  | Pldrb _ => Str "Pldrb"
+  | Pldrsb _ => Str "Pldrsb"
+  | Pldrh _ => Str "Pldrh"
+  | Pldrsh _ => Str "Pldrsh"
+  | Pldrzw => Str "Pldrzw"
+  | Pldrsw => Str "Pldrsw"
+  | Pldrs => Str "Pldrs"
+  | Pldrd => Str "Pldrd"
+  | Pldrd_a => Str "Pldrd_a"
+  end.
+
+Definition string_of_ldi (ldi: ld_instruction) : ?? pstring:=
+  match ldi with
+  | PLd_rd_a ld rd a =>
+      DO a' <~ string_of_addressing a;;
+      DO rd' <~ dreg_name rd;;
+      RET (Str "PLd_rd_a (" +; (string_of_ld_rd_a ld) +; " " +; rd' +; " " +; a' +; ")")
+  | Pldp _ _ _ _ _ a =>
+      DO a' <~ string_of_addressing a;;
+      RET (Str "Pldp (" +; a' +; ")")
+  end.
+
+Definition string_of_load (op: load_op) : ?? pstring :=
+  match op with
+  | Oload1 ld _ a =>
+      DO a' <~ string_of_addressing a;;
+      (*DO ld' <~ string_of_ldi ld;;*)
+      RET((Str "Oload1 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ")
+  | Oload2 ld _ a =>
+      DO a' <~ string_of_addressing a;;
+      (*DO ld' <~ string_of_ldi ld;;*)
+      RET((Str "Oload2 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ")
+  | OloadU _ _ _ => RET (Str "OloadU")
+  end.
+
+Definition string_of_st_rs_a (st: store_rs_a) : pstring :=
+  match st with
+  | Pstrw => Str "Pstrw"
+  | Pstrw_a => Str "Pstrw_a"
+  | Pstrx => Str "Pstrx"
+  | Pstrx_a => Str "Pstrx_a"
+  | Pstrb => Str "Pstrb"
+  | Pstrh => Str "Pstrh"
+  | Pstrs => Str "Pstrs"
+  | Pstrd => Str "Pstrd"
+  | Pstrd_a => Str "Pstrd_a"
+  end.
+
+Definition string_of_sti (sti: st_instruction) : ?? pstring:=
+  match sti with
+  | PSt_rs_a st rs a =>
+      DO a' <~ string_of_addressing a;;
+      DO rs' <~ dreg_name rs;;
+      RET (Str "PSt_rs_a (" +; (string_of_st_rs_a st) +; " " +; rs' +; " " +; a' +; ")")
+  | Pstp _ _ _ _ _ a =>
+      DO a' <~ string_of_addressing a;;
+      RET (Str "Pstp (" +; a' +; ")")
+  end.
+
+Definition string_of_store (op: store_op) : ?? pstring :=
+  match op with
+  | Ostore1 st _ a =>
+      DO a' <~ string_of_addressing a;;
+      (*DO st' <~ string_of_sti st;;*)
+      RET((Str "Ostore1 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ")
+  | Ostore2 st _ a =>
+      DO a' <~ string_of_addressing a;;
+      (*DO st' <~ string_of_sti st;;*)
+      RET((Str "Ostore2 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ")
+  | OstoreU _ _ _ => RET (Str "OstoreU")
+  end.
+
+Definition string_of_control (op: control_op) : pstring :=
+  match op with
+  | Ob _ => "Ob"
+  | Obc _ _ => "Obc"
+  | Obl _ => "Obl"
+  | Obs _ => "Obs"
+  | Ocbnz _ _ => "Ocbnz"
+  | Ocbz _ _ => "Ocbz"
+  | Otbnz _ _ _ => "Otbnz"
+  | Otbz _ _ _ => "Otbz"
+  | Obtbl _ => "Obtbl"
+  | OIncremPC _ => "OIncremPC"
+  | OError => "OError"
+  end.
+
+Definition string_of_allocf (op: allocf_op) : pstring :=
+  match op with
+  | OAllocf_SP _ _ => "OAllocf_SP"
+  | OAllocf_Mem _ _ => "OAllocf_Mem"
+  end.
+
+Definition string_of_freef (op: freef_op) : pstring :=
+  match op with
+  | OFreef_SP _ _ => "OFreef_SP"
+  | OFreef_Mem _ _ => "OFreef_Mem"
+  end.
+
+Definition string_of_op (op: P.op): ?? pstring := 
+  match op with
+  | Arith op => RET (string_of_arith op)
+  | Load op => string_of_load op
+  | Store op => string_of_store op
+  | Control op => RET (string_of_control op)
+  | Allocframe op => RET (string_of_allocf op)
+  | Freeframe op => RET (string_of_freef op)
+  | Loadsymbol _ => RET (Str "Loadsymbol")
+  | Constant _ => RET (Str "Constant")
+  | Cvtsw2x => RET (Str "Cvtsw2x")
+  | Cvtuw2x => RET (Str "Cvtuw2x")
+  | Cvtx2w => RET (Str "Cvtx2w")
+  (*| Fail => RET (Str "Fail")*)
+  end.
+End SECT_BBLOCK_EQUIV.
+
+(** REWRITE RULES *)
+
+Definition is_constant (o: op): bool :=
+  match o with
+  | OArithP _ | OArithRR0_XZR _ _ | Ofmovi_XZR _ | Loadsymbol _ | Constant _ | Obl _ | Obs _ => true
+  | _ => false
+  end.
+
+Lemma is_constant_correct ge o: is_constant o = true -> op_eval ge o [] <> None.
+Proof.
+  destruct o; simpl in * |- *; try congruence.
+  destruct op0; simpl in * |- *; try congruence.
+  destruct co; simpl in * |- *; try congruence;
+  unfold control_eval; destruct ge; simpl in * |- *; try congruence.
+Qed.
+
+Definition main_reduce (t: Terms.term):= RET (Terms.nofail is_constant t).
+
+Local Hint Resolve is_constant_correct: wlp.
+
+Lemma main_reduce_correct t:
+ WHEN main_reduce t ~> pt THEN Terms.match_pt t pt.
+Proof.
+  wlp_simplify.
+Qed.
+
+Definition reduce := {| Terms.result := main_reduce; Terms.result_correct := main_reduce_correct |}.
+
+Definition bblock_simu_test (verb: bool) (p1 p2: Asmblock.bblock) : ?? bool :=
+  assert_same_builtin p1 p2;;
+  if verb then
+    IST.verb_bblock_simu_test reduce string_of_name string_of_op (trans_block p1) (trans_block p2)
+  else
+    IST.bblock_simu_test reduce (trans_block p1) (trans_block p2).
+
+Local Hint Resolve IST.bblock_simu_test_correct IST.verb_bblock_simu_test_correct: wlp.
+
+(** Main simulation (Impure) theorem *) 
+Theorem bblock_simu_test_correct verb p1 p2 :
+  WHEN bblock_simu_test verb p1 p2 ~> b THEN b=true -> forall ge fn lk, Asmblockprops.bblock_simu lk ge fn p1 p2.
+Proof.
+  wlp_simplify; eapply bblock_simu_reduce with (Ge:={| _genv := ge; _fn := fn; _lk := lk |}); eauto;
+  intros; destruct H; auto.
+Qed.
+
+Hint Resolve bblock_simu_test_correct: wlp.
+
+(** ** Coerce bblock_simu_test into a pure function (this is a little unsafe like all oracles in CompCert). *)
+
+Import UnsafeImpure.
+
+Definition pure_bblock_simu_test (verb: bool) (p1 p2: Asmblock.bblock): bool := 
+  match unsafe_coerce (bblock_simu_test verb p1 p2) with
+  | Some b => b
+  | None => false
+  end.
+
+Theorem pure_bblock_simu_test_correct verb p1 p2 lk ge fn: pure_bblock_simu_test verb p1 p2 = true -> Asmblockprops.bblock_simu lk ge fn p1 p2.
+Proof.
+   unfold pure_bblock_simu_test. 
+   destruct (unsafe_coerce (bblock_simu_test verb p1 p2)) eqn: UNSAFE; try discriminate.
+   intros; subst. eapply bblock_simu_test_correct; eauto.
+   apply unsafe_coerce_not_really_correct; eauto.
+Qed.
+
+Definition bblock_simub: Asmblock.bblock -> Asmblock.bblock -> bool := pure_bblock_simu_test true.
+
+Lemma bblock_simub_correct p1 p2 lk ge fn: bblock_simub p1 p2 = true -> Asmblockprops.bblock_simu lk ge fn p1 p2.
+Proof.
+ eapply (pure_bblock_simu_test_correct true).
+Qed.
diff --git a/aarch64/Asmblockgen.v b/aarch64/Asmblockgen.v
new file mode 100644
index 00000000..acb5a1e6
--- /dev/null
+++ b/aarch64/Asmblockgen.v
@@ -0,0 +1,1252 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*           Xavier Leroy       INRIA Paris-Rocquencourt       *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(** * Translation from Machblock to AArch64 assembly block language (Asmblock) 
+    Inspired from the Mach->Asm pass of original Leroy's backend, but adapted to the block structure like the KVX backend. *)
+
+Require Import Recdef Coqlib Zwf Zbits.
+Require Import Errors AST Integers Floats Op.
+Require Import Locations Machblock Asm Asmblock.
+
+Local Open Scope string_scope.
+Local Open Scope list_scope.
+Local Open Scope error_monad_scope.
+
+(** Extracting integer or float registers. *)
+
+Definition ireg_of (r: mreg) : res ireg :=
+  match preg_of r with
+  |  IR irs => match irs with
+                | RR1 mr => OK mr
+                | _ => Error(msg "Asmgenblock.ireg_of")
+                end
+  | _ => Error(msg "Asmgenblock.iregsp_of")
+  end.
+
+Definition freg_of (r: mreg) : res freg :=
+  match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end.
+
+(** Recognition of immediate arguments for logical integer operations.*)
+
+(** Valid immediate arguments are repetitions of a bit pattern [B]
+  of length [e] = 2, 4, 8, 16, 32 or 64.
+  The bit pattern [B] must be of the form [0*1*0*] or [1*0*1*]
+  but must not be all zeros or all ones. *)
+
+(** The following automaton recognizes [0*1*0*|1*0*1*].
+<<
+               0          1          0
+              / \        / \        / \
+              \ /        \ /        \ /
+        -0--> [B] --1--> [D] --0--> [F]
+       /
+     [A]
+       \
+        -1--> [C] --0--> [E] --1--> [G]
+              / \        / \        / \
+              \ /        \ /        \ /
+               1          0          1
+>>
+*)
+
+Module Automaton.
+
+Inductive state : Type := SA | SB | SC | SD | SE | SF | SG | Sbad.
+
+Definition start := SA.
+
+Definition next (s: state) (b: bool) :=
+  match s, b with
+    | SA,false => SB      | SA,true => SC
+    | SB,false => SB      | SB,true => SD
+    | SC,false => SE      | SC,true => SC
+    | SD,false => SF      | SD,true => SD
+    | SE,false => SE      | SE,true => SG
+    | SF,false => SF      | SF,true => Sbad
+    | SG,false => Sbad    | SG,true => SG
+    | Sbad,_ => Sbad
+  end.
+
+Definition accepting (s: state) :=
+  match s with
+  | SA | SB | SC | SD | SE | SF | SG => true
+  | Sbad => false
+  end.
+
+Fixpoint run (len: nat) (s: state) (x: Z) : bool :=
+  match len with
+  | Datatypes.O => accepting s
+  | Datatypes.S len => run len (next s (Z.odd x)) (Z.div2 x)
+  end.
+
+End Automaton.
+
+(** The following function determines the candidate length [e],
+    ensuring that [x] is a repetition [BB...B] 
+    of a bit pattern [B] of length [e]. *)
+
+Definition logical_imm_length (x: Z) (sixtyfour: bool) : nat :=
+  (** [test n] checks that the low [2n] bits of [x] are of the
+      form [BB], that is, two occurrences of the same [n] bits *)
+  let test (n: Z) : bool :=
+    Z.eqb (Zzero_ext n x) (Zzero_ext n (Z.shiftr x n)) in
+  (** If [test n] fails, we know that the candidate length [e] is
+      at least [2n].  Hence we test with decreasing values of [n]:
+      32, 16, 8, 4, 2. *)
+  if sixtyfour && negb (test 32) then 64%nat
+  else if negb (test 16) then 32%nat
+  else if negb (test 8) then 16%nat
+  else if negb (test 4) then 8%nat
+  else if negb (test 2) then 4%nat
+  else 2%nat.
+
+(** A valid logical immediate is 
+- neither [0] nor [-1];
+- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e]
+- the low [e] bits of the number, that is, [B], match [0*1*0*] or [1*0*1*].
+*)
+
+Definition is_logical_imm32 (x: int) : bool :=
+  negb (Int.eq x Int.zero) && negb (Int.eq x Int.mone) &&
+  Automaton.run (logical_imm_length (Int.unsigned x) false)
+                Automaton.start (Int.unsigned x).
+
+Definition is_logical_imm64 (x: int64) : bool :=
+  negb (Int64.eq x Int64.zero) && negb (Int64.eq x Int64.mone) &&
+  Automaton.run (logical_imm_length (Int64.unsigned x) true)
+                Automaton.start (Int64.unsigned x).
+
+(** Arithmetic immediates are 12-bit unsigned numbers, possibly shifted left 12 bits *)
+
+Definition is_arith_imm32 (x: int) : bool :=
+  Int.eq x (Int.zero_ext 12 x)
+  || Int.eq x (Int.shl (Int.zero_ext 12 (Int.shru x (Int.repr 12))) (Int.repr 12)).
+
+Definition is_arith_imm64 (x: int64) : bool :=
+  Int64.eq x (Int64.zero_ext 12 x)
+  || Int64.eq x (Int64.shl (Int64.zero_ext 12 (Int64.shru x (Int64.repr 12))) (Int64.repr 12)).
+
+Definition bcode := list basic.
+
+Program Definition single_basic (bi: basic): bblock :=
+  {| header := nil; body:= bi::nil; exit := None |}.
+
+(* insert [bi] at the head of [k] *)
+Program Definition insert_basic (bi: basic) (k:bblocks): bblocks :=
+  match k with
+  | bb::k' =>
+    match bb.(header) with
+    | nil => {| header := nil; body := bi :: (body bb); exit := exit bb |}::k'
+    | _ => (single_basic bi)::k
+    end
+  | _ => (single_basic bi)::k
+  end.
+
+Notation "bi ::b k" := (insert_basic bi k) (at level 49, right associativity).
+
+(* NB: this notation helps the Coq typechecker to infer coercion [PArith] in [bcode] expressions *)
+(** Alignment check for symbols *)
+Notation "i ::bi k" := (cons (A:=basic) i k) (at level 49, right associativity).
+Notation "a @@ b" := (app a b) (at level 49, right associativity).
+
+(* The pop_bc and push_bc functions are used to adapt the output of some definitions
+   in bblocks format and avoid some redefinitions. *)
+
+(* pop_bc takes the body of the first bblock in the list if it does not have a header. *)
+Definition pop_bc (k: bblocks): bcode :=
+  match k with
+  | bb :: k' => match bb.(header) with
+                | nil => (body bb)
+                | _ => nil
+                end
+  | _ => nil
+  end.
+
+(* push_bc tries to overwrite code in the first bblock if it does not have a header,
+   otherwise, a new bblock is created and appended to the list. *)
+Program Definition push_bc (bc: bcode) (k:bblocks): bblocks :=
+  match bc with
+  | bi :: bc' => match k with
+                 | bb :: k' => match bb.(header) with
+                               | nil => {| header := nil; body := bc; exit := exit bb |} :: k'
+                               | _ => {| header := nil; body := bc; exit := None |} :: k
+                               end
+                 | _ => {| header := nil; body := bc; exit := None |} :: nil
+                 end
+  | nil => k
+  end.
+Next Obligation.
+  simpl; auto.
+Qed.
+Next Obligation.
+  simpl; auto.
+Qed.
+Next Obligation.
+  simpl; auto.
+Qed.
+
+Parameter symbol_is_aligned : ident -> Z -> bool.
+(** [symbol_is_aligned id sz] checks whether the symbol [id] is [sz] aligned *)
+
+(***************************************************************************************)
+
+(** Decompose integer literals into 16-bit fragments *)
+
+Fixpoint decompose_int (N: nat) (n p: Z) {struct N} : list (Z * Z) :=
+  match N with
+  | Datatypes.O => nil
+  | Datatypes.S N =>
+    let frag := Zzero_ext 16 (Z.shiftr n p) in
+    if Z.eqb frag 0 then
+      decompose_int N n (p + 16)
+    else
+      (frag, p) :: decompose_int N (Z.ldiff n (Z.shiftl 65535 p)) (p + 16)
+  end.
+
+Definition negate_decomposition (l: list (Z * Z)) :=
+  List.map (fun np => (Z.lxor (fst np) 65535, snd np)) l.
+
+Definition loadimm_k (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: bcode) : bcode :=
+    List.fold_right (fun np k => Pmovk sz (fst np) (snd np) rd rd ::bi k) k l.
+
+Definition loadimm_z (sz: isize) (rd: ireg) (l: list (Z * Z))  (k: bcode) : bcode :=
+  match l with
+  | nil => Pmovz sz 0 0 rd ::bi k
+  | (n1, p1) :: l => (Pmovz sz n1 p1 rd) ::bi loadimm_k sz rd l k
+  end.
+
+Definition loadimm_n (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: bcode) : bcode :=
+  match l with
+  | nil => Pmovn sz 0 0 rd ::bi k
+  | (n1, p1) :: l => Pmovn sz n1 p1 rd ::bi loadimm_k sz rd (negate_decomposition l) k
+  end.
+
+Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: bcode) : bcode :=
+  let N := match sz with W => 2%nat | X => 4%nat end in
+  let dz := decompose_int N n 0 in
+  let dn := decompose_int N (Z.lnot n) 0 in
+  if Nat.leb (List.length dz) (List.length dn)
+  then loadimm_z sz rd dz k
+  else loadimm_n sz rd dn k.
+
+Definition loadimm32 (rd: ireg) (n: int) (k: bcode) : bcode :=
+  if is_logical_imm32 n
+  then Porrimm W (Int.unsigned n) rd XZR ::bi k
+  else loadimm W rd (Int.unsigned n) k.
+
+Definition loadimm64 (rd: ireg) (n: int64) (k: bcode) : bcode :=
+  if is_logical_imm64 n
+  then Porrimm X (Int64.unsigned n) rd XZR ::bi k
+  else loadimm X rd (Int64.unsigned n) k.
+
+Definition offset_representable (sz: Z) (ofs: int64) : bool :=
+  let isz := Int64.repr sz in
+  (** either unscaled 9-bit signed *)
+  Int64.eq ofs (Int64.sign_ext 9 ofs) ||
+  (** or scaled 12-bit unsigned *)
+  (Int64.eq (Int64.modu ofs isz) Int64.zero
+   && Int64.ltu ofs (Int64.shl isz (Int64.repr 12))).
+
+Definition indexed_memory_access_bc (insn: addressing -> basic)
+                                 (sz: Z) (base: iregsp) (ofs: ptrofs) (k: bcode) : bcode :=
+  let ofs := Ptrofs.to_int64 ofs in
+  if offset_representable sz ofs
+  then insn (ADimm base ofs) :: k
+  else loadimm64 X16 ofs (insn (ADreg base X16) :: k).
+
+Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: bcode) :=
+  match ty, preg_of dst with
+  | Tint,    IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrw rd) 4 base ofs k)
+  | Tlong,   IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrx rd) 8 base ofs k)
+  | Tsingle, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrs rd) 4 base ofs k)
+  | Tfloat,  FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrd rd) 8 base ofs k)
+  | Tany32,  IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrw_a rd) 4 base ofs k)
+  | Tany64,  IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrx_a rd) 8 base ofs k)
+  | Tany64,  FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrd_a rd) 8 base ofs k)
+  | _, _           => Error (msg "Asmgen.loadind")
+  end.
+
+Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: bcode) :=
+  match ty, preg_of src with
+  | Tint,    IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrw rd) 4 base ofs k)
+  | Tlong,   IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrx rd) 8 base ofs k)
+  | Tsingle, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrs rd) 4 base ofs k)
+  | Tfloat,  FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrd rd) 8 base ofs k)
+  | Tany32,  IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrw_a rd) 4 base ofs k)
+  | Tany64,  IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrx_a rd) 8 base ofs k)
+  | Tany64,  FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrd_a rd) 8 base ofs k)
+  | _, _           => Error (msg "Asmgen.storeind")
+  end.
+
+Definition loadptr_bc (base: iregsp) (ofs: ptrofs) (dst: ireg) (k: bcode): bcode :=
+  indexed_memory_access_bc (PLd_rd_a Pldrx dst) 8 base ofs k. 
+
+Definition storeptr_bc (src: ireg) (base: iregsp) (ofs: ptrofs) (k: bcode): bcode :=
+  indexed_memory_access_bc (PSt_rs_a Pstrx src) 8 base ofs k.
+
+(** Function epilogue *)
+
+Definition make_epilogue (f: Machblock.function) : bcode :=
+  loadptr_bc XSP f.(fn_retaddr_ofs) RA
+  (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs)::nil).
+
+(** Add immediate *)
+
+Definition addimm_aux (insn: Z -> arith_pp)
+                        (rd r1: iregsp) (n: Z) (k: bcode) :=
+  let nlo := Zzero_ext 12 n in
+  let nhi := n - nlo in
+  if Z.eqb nhi 0 then
+    insn nlo rd r1 ::bi k
+  else if Z.eqb nlo 0 then
+    insn nhi rd r1 ::bi k
+  else
+    insn nhi rd r1 ::bi insn nlo rd rd ::bi k.
+
+Definition addimm32 (rd r1: ireg) (n: int) (k: bcode) : bcode :=
+  let m := Int.neg n in
+  if Int.eq n (Int.zero_ext 24 n) then
+    addimm_aux (Paddimm W) rd r1 (Int.unsigned n) k
+  else if Int.eq m (Int.zero_ext 24 m) then
+    addimm_aux (Psubimm W) rd r1 (Int.unsigned m) k
+  else if Int.lt n Int.zero then
+    loadimm32 X16 m (Psub W SOnone rd r1 X16 ::bi k)
+  else
+    loadimm32 X16 n (Padd W SOnone rd r1 X16 ::bi k).
+
+Definition addimm64 (rd r1: iregsp) (n: int64) (k: bcode) : bcode :=
+  let m := Int64.neg n in
+  if Int64.eq n (Int64.zero_ext 24 n) then
+    addimm_aux (Paddimm X) rd r1 (Int64.unsigned n) k
+  else if Int64.eq m (Int64.zero_ext 24 m) then
+    addimm_aux (Psubimm X) rd r1 (Int64.unsigned m) k
+  else if Int64.lt n Int64.zero then
+    loadimm64 X16 m (Psubext (EOuxtx Int.zero) rd r1 X16 ::bi k)
+  else
+    loadimm64 X16 n (Paddext (EOuxtx Int.zero) rd r1 X16 ::bi k).
+
+(** Logical immediate *)
+
+Definition logicalimm32
+              (insn1: Z -> arith_rr0)
+              (insn2: shift_op -> arith_rr0r)
+              (rd r1: ireg) (n: int) (k: bcode) : bcode :=
+  if is_logical_imm32 n
+  then insn1 (Int.unsigned n) rd r1 ::bi k
+  else loadimm32 X16 n (insn2 SOnone rd r1 X16 ::bi k).
+
+Definition logicalimm64
+              (insn1: Z -> arith_rr0)
+              (insn2: shift_op -> arith_rr0r)
+              (rd r1: ireg) (n: int64) (k: bcode) : bcode :=
+  if is_logical_imm64 n
+  then insn1 (Int64.unsigned n) rd r1 ::bi k
+  else loadimm64 X16 n (insn2 SOnone rd r1 X16 ::bi k).
+
+(** Sign- or zero-extended arithmetic *)
+
+Definition transl_extension (ex: extension) (a: int) : extend_op :=
+  match ex with Xsgn32 => EOsxtw a | Xuns32 => EOuxtw a end.
+
+Definition move_extended_base
+              (rd: ireg) (r1: ireg) (ex: extension) (k: bcode) : bcode :=
+  match ex with
+  | Xsgn32 => Pcvtsw2x rd r1 ::bi k
+  | Xuns32 => Pcvtuw2x rd r1 ::bi k
+  end.
+
+Definition move_extended
+              (rd: ireg) (r1: ireg) (ex: extension) (a: int) (k: bcode) : bcode :=
+  if Int.eq a Int.zero then
+    move_extended_base rd r1 ex k
+  else
+    move_extended_base rd r1 ex (Padd X (SOlsl a) rd XZR rd ::bi k).
+
+Definition arith_extended 
+              (insnX: extend_op -> arith_ppp)
+              (insnS: shift_op -> arith_rr0r)
+              (rd r1 r2: ireg) (ex: extension) (a: int) (k: bcode) : bcode :=
+  if Int.ltu a (Int.repr 5) then
+    insnX (transl_extension ex a) rd r1 r2 ::bi k
+  else
+    move_extended_base X16 r2 ex (insnS (SOlsl a) rd r1 X16 ::bi k).
+
+(** Extended right shift *)
+
+Definition shrx32 (rd r1: ireg) (n: int) (k: bcode) : bcode :=
+  if Int.eq n Int.zero then
+    Pmov rd r1 ::bi k
+  else
+    Porr W (SOasr (Int.repr 31)) X16 XZR r1 ::bi
+    Padd W (SOlsr (Int.sub Int.iwordsize n)) X16 r1 X16 ::bi
+    Porr W (SOasr n) rd XZR X16 ::bi k.
+
+Definition shrx64 (rd r1: ireg) (n: int) (k: bcode) : bcode :=
+  if Int.eq n Int.zero then
+    Pmov rd r1 ::bi k
+  else
+    Porr X (SOasr (Int.repr 63)) X16 XZR r1 ::bi
+    Padd X (SOlsr (Int.sub Int64.iwordsize' n)) X16 r1 X16 ::bi
+    Porr X (SOasr n) rd XZR X16 ::bi k.
+
+(** Load the address [id + ofs] in [rd] *)
+
+Definition loadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (k: bcode) : bcode :=
+  if Archi.pic_code tt then
+    if Ptrofs.eq ofs Ptrofs.zero then
+      Ploadsymbol rd id ::bi k
+    else
+      Ploadsymbol rd id :: addimm64 rd rd (Ptrofs.to_int64 ofs) k
+  else
+    Padrp id ofs rd ::bi Paddadr id ofs rd rd ::bi k.
+
+(** Translate a shifted operand *)
+
+Definition transl_shift (s: Op.shift) (a: int): Asm.shift_op :=
+  match s with
+  | Slsl => SOlsl a
+  | Slsr => SOlsr a
+  | Sasr => SOasr a
+  | Sror => SOror a
+  end.
+
+(** Translation of a condition.  Prepends to [k] the instructions
+  that evaluate the condition and leave its boolean result in one of
+  the bits of the condition register.  The bit in question is
+  determined by the [crbit_for_cond] function. *)
+
+Definition transl_cond
+              (cond: condition) (args: list mreg) (k: bcode) :=
+  match cond, args with
+  | (Ccomp c | Ccompu c), a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pcmp W SOnone r1 r2 ::bi k)
+  | (Ccompshift c s a | Ccompushift c s a), a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pcmp W (transl_shift s a) r1 r2 ::bi k)
+  | (Ccompimm c n | Ccompuimm c n), a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (if is_arith_imm32 n then
+            Pcmpimm W (Int.unsigned n) r1 ::bi k
+          else if is_arith_imm32 (Int.neg n) then
+            Pcmnimm W (Int.unsigned (Int.neg n)) r1 ::bi k
+          else
+            loadimm32 X16 n (Pcmp W SOnone r1 X16 ::bi k))
+  | (Cmaskzero n | Cmasknotzero n), a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (if is_logical_imm32 n then
+            Ptstimm W (Int.unsigned n) r1 ::bi k
+          else
+            loadimm32 X16 n (Ptst W SOnone r1 X16 ::bi k))
+  | (Ccompl c | Ccomplu c), a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pcmp X SOnone r1 r2 ::bi k)
+  | (Ccomplshift c s a | Ccomplushift c s a), a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pcmp X (transl_shift s a) r1 r2 ::bi k)
+  | (Ccomplimm c n | Ccompluimm c n), a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (if is_arith_imm64 n then
+            Pcmpimm X (Int64.unsigned n) r1 ::bi k
+          else if is_arith_imm64 (Int64.neg n) then
+            Pcmnimm X (Int64.unsigned (Int64.neg n)) r1 ::bi k
+          else
+            loadimm64 X16 n (Pcmp X SOnone r1 X16 ::bi k))
+  | (Cmasklzero n | Cmasklnotzero n), a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (if is_logical_imm64 n then
+            Ptstimm X (Int64.unsigned n) r1 ::bi k
+          else
+            loadimm64 X16 n (Ptst X SOnone r1 X16 ::bi k))
+  | Ccompf cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmp D r1 r2 ::bi k)
+  | Cnotcompf cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmp D r1 r2 ::bi k)
+  | Ccompfzero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmp0 D r1 ::bi k)
+  | Cnotcompfzero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmp0 D r1 ::bi k)
+  | Ccompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmp S r1 r2 ::bi k)
+  | Cnotcompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmp S r1 r2 ::bi k)
+  | Ccompfszero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmp0 S r1 ::bi k)
+  | Cnotcompfszero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmp0 S r1 ::bi k)
+  | _, _ =>
+      Error(msg "Asmgenblock.transl_cond")
+  end.
+
+Definition cond_for_signed_cmp (cmp: comparison) :=
+  match cmp with
+  | Ceq => TCeq
+  | Cne => TCne
+  | Clt => TClt
+  | Cle => TCle
+  | Cgt => TCgt
+  | Cge => TCge
+  end.
+
+Definition cond_for_unsigned_cmp (cmp: comparison) :=
+  match cmp with
+  | Ceq => TCeq
+  | Cne => TCne
+  | Clt => TClo
+  | Cle => TCls
+  | Cgt => TChi
+  | Cge => TChs
+  end.
+
+Definition cond_for_float_cmp (cmp: comparison) :=
+  match cmp with
+  | Ceq => TCeq
+  | Cne => TCne
+  | Clt => TCmi
+  | Cle => TCls
+  | Cgt => TCgt
+  | Cge => TCge
+  end.
+
+Definition cond_for_float_not_cmp (cmp: comparison) :=
+  match cmp with
+  | Ceq => TCne
+  | Cne => TCeq
+  | Clt => TCpl
+  | Cle => TChi
+  | Cgt => TCle
+  | Cge => TClt
+  end.
+
+Definition cond_for_cond (cond: condition) :=
+  match cond with
+  | Ccomp cmp => cond_for_signed_cmp cmp
+  | Ccompu cmp => cond_for_unsigned_cmp cmp
+  | Ccompshift cmp s a => cond_for_signed_cmp cmp
+  | Ccompushift cmp s a => cond_for_unsigned_cmp cmp
+  | Ccompimm cmp n => cond_for_signed_cmp cmp
+  | Ccompuimm cmp n => cond_for_unsigned_cmp cmp
+  | Cmaskzero n => TCeq
+  | Cmasknotzero n => TCne
+  | Ccompl cmp => cond_for_signed_cmp cmp
+  | Ccomplu cmp => cond_for_unsigned_cmp cmp
+  | Ccomplshift cmp s a => cond_for_signed_cmp cmp
+  | Ccomplushift cmp s a => cond_for_unsigned_cmp cmp
+  | Ccomplimm cmp n => cond_for_signed_cmp cmp
+  | Ccompluimm cmp n => cond_for_unsigned_cmp cmp
+  | Cmasklzero n => TCeq
+  | Cmasklnotzero n => TCne
+  | Ccompf cmp => cond_for_float_cmp cmp
+  | Cnotcompf cmp => cond_for_float_not_cmp cmp
+  | Ccompfzero cmp => cond_for_float_cmp cmp
+  | Cnotcompfzero cmp => cond_for_float_not_cmp cmp
+  | Ccompfs cmp => cond_for_float_cmp cmp
+  | Cnotcompfs cmp => cond_for_float_not_cmp cmp
+  | Ccompfszero cmp => cond_for_float_cmp cmp
+  | Cnotcompfszero cmp => cond_for_float_not_cmp cmp
+  end.
+
+(** Translation of a conditional branch.  Prepends to [k] the instructions
+  that evaluate the condition and ranch to [lbl] if it holds.
+  We recognize some conditional branches that can be implemented
+  without setting then testing condition flags.  *)
+
+Definition transl_cond_branch_default
+  (c: condition) (args: list mreg) (lbl: label) (k: bcode) : res (bcode*cf_instruction) :=
+  do ccode <- transl_cond c args k;
+  OK(ccode, Pbc (cond_for_cond c) lbl).
+ 
+Definition transl_cond_branch
+  (c: condition) (args: list mreg) (lbl: label) (k: bcode) : res (bcode*cf_instruction) :=
+  match args, c with
+  | a1 :: nil, (Ccompimm Cne n | Ccompuimm Cne n) =>
+      if Int.eq n Int.zero
+      then (do r1 <- ireg_of a1; OK (k, Pcbnz W r1 lbl))
+      else transl_cond_branch_default c args lbl k
+  | a1 :: nil, (Ccompimm Ceq n | Ccompuimm Ceq n) =>
+      if Int.eq n Int.zero
+      then (do r1 <- ireg_of a1; OK (k, Pcbz W r1 lbl))
+      else transl_cond_branch_default c args lbl k
+  | a1 :: nil, (Ccomplimm Cne n | Ccompluimm Cne n) =>
+      if Int64.eq n Int64.zero
+      then (do r1 <- ireg_of a1; OK (k, Pcbnz X r1 lbl))
+      else transl_cond_branch_default c args lbl k
+  | a1 :: nil, (Ccomplimm Ceq n | Ccompluimm Ceq n) =>
+      if Int64.eq n Int64.zero
+      then (do r1 <- ireg_of a1; OK (k, Pcbz X r1 lbl))
+      else transl_cond_branch_default c args lbl k
+  | a1 :: nil, Cmaskzero n =>
+      match Int.is_power2 n with
+      | Some bit => do r1 <- ireg_of a1; OK (k, Ptbz W r1 bit lbl)
+      | None => transl_cond_branch_default c args lbl k
+      end
+  | a1 :: nil, Cmasknotzero n =>
+      match Int.is_power2 n with
+      | Some bit => do r1 <- ireg_of a1; OK (k, Ptbnz W r1 bit lbl)
+      | None => transl_cond_branch_default c args lbl k
+      end
+  | a1 :: nil, Cmasklzero n =>
+      match Int64.is_power2' n with
+      | Some bit => do r1 <- ireg_of a1; OK (k, Ptbz X r1 bit lbl)
+      | None => transl_cond_branch_default c args lbl k
+      end
+  | a1 :: nil, Cmasklnotzero n =>
+      match Int64.is_power2' n with
+      | Some bit => do r1 <- ireg_of a1; OK (k, Ptbnz X r1 bit lbl)
+      | None => transl_cond_branch_default c args lbl k
+      end
+  | _, _ =>
+      transl_cond_branch_default c args lbl k
+  end.
+
+(** Translation of the arithmetic operation [r <- op(args)].
+  The corresponding instructions are prepended to [k]. *)
+
+Definition transl_op
+              (op: operation) (args: list mreg) (res: mreg) (k: bcode) :=
+  match op, args with
+  | Omove, a1 :: nil =>
+      match preg_of res, preg_of a1 with
+      | IR r, IR a => OK (Pmov r a ::bi k)
+      | FR r, FR a => OK (Pfmov r a ::bi k)
+      |  _  ,  _   => Error(msg "Asmgenblock.Omove")
+      end
+  | Ointconst n, nil =>
+      do rd <- ireg_of res;
+      OK (loadimm32 rd n k)
+  | Olongconst n, nil =>
+      do rd <- ireg_of res;
+      OK (loadimm64 rd n k)
+  | Ofloatconst f, nil =>
+      do rd <- freg_of res;
+      OK (if Float.eq_dec f Float.zero
+          then Pfmovi D rd XZR ::bi k
+          else Pfmovimmd f rd ::bi k)
+  | Osingleconst f, nil =>
+      do rd <- freg_of res;
+      OK (if Float32.eq_dec f Float32.zero
+          then Pfmovi S rd XZR ::bi k
+          else Pfmovimms f rd ::bi k)
+  | Oaddrsymbol id ofs, nil =>
+      do rd <- ireg_of res;
+      OK (loadsymbol rd id ofs k)
+  | Oaddrstack ofs, nil =>
+      do rd <- ireg_of res;
+      OK (addimm64 rd XSP (Ptrofs.to_int64 ofs) k)
+  (** 32-bit integer arithmetic *)
+  | Oshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porr W (transl_shift s a) rd XZR r1 ::bi k)
+  | Oadd, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Padd W SOnone rd r1 r2 ::bi k)
+  | Oaddshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Padd W (transl_shift s a) rd r1 r2 ::bi k)
+  | Oaddimm n, a1 :: nil =>
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (addimm32 rd rs n k)
+  | Oneg, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psub W SOnone rd XZR r1 ::bi k)
+  | Onegshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psub W (transl_shift s a) rd XZR r1 ::bi k)
+  | Osub, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psub W SOnone rd r1 r2 ::bi k)
+  | Osubshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psub W (transl_shift s a) rd r1 r2 ::bi k)
+  | Omul, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pmadd W rd rs1 rs2 XZR ::bi k)
+  | Omuladd, a1 :: a2 :: a3 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+      OK (Pmadd W rd r2 r3 r1 ::bi k)
+  | Omulsub, a1 :: a2 :: a3 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+      OK (Pmsub W rd r2 r3 r1 ::bi k)
+  | Odiv, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psdiv W rd r1 r2 ::bi k)
+  | Odivu, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pudiv W rd r1 r2 ::bi k)
+  | Oand, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pand W SOnone rd r1 r2 ::bi k)
+  | Oandshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pand W (transl_shift s a) rd r1 r2 ::bi k)
+  | Oandimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm32 (Pandimm W) (Pand W) rd r1 n k)
+  | Oor, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porr W SOnone rd r1 r2 ::bi k)
+  | Oorshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porr W (transl_shift s a) rd r1 r2 ::bi k)
+  | Oorimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm32 (Porrimm W) (Porr W) rd r1 n k)      
+  | Oxor, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peor W SOnone rd r1 r2 ::bi k)
+  | Oxorshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peor W (transl_shift s a) rd r1 r2 ::bi k)
+  | Oxorimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm32 (Peorimm W) (Peor W) rd r1 n k)      
+  | Onot, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porn W SOnone rd XZR r1 ::bi k)
+  | Onotshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porn W (transl_shift s a) rd XZR r1 ::bi k)
+  | Obic, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pbic W SOnone rd r1 r2 ::bi k)
+  | Obicshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pbic W (transl_shift s a) rd r1 r2 ::bi k)
+  | Oorn, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porn W SOnone rd r1 r2 ::bi k)
+  | Oornshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porn W (transl_shift s a) rd r1 r2 ::bi k)
+  | Oeqv, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peon W SOnone rd r1 r2 ::bi k)
+  | Oeqvshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peon W (transl_shift s a) rd r1 r2 ::bi k)
+  | Oshl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Plslv W rd r1 r2 ::bi k)
+  | Oshr, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pasrv W rd r1 r2 ::bi k)
+  | Oshru, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Plsrv W rd r1 r2 ::bi k)
+  | Oshrximm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (shrx32 rd r1 n k)
+  | Ozext s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfiz W Int.zero s rd r1 ::bi k)
+  | Osext s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfiz W Int.zero s rd r1 ::bi k)
+  | Oshlzext s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfiz W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+  | Oshlsext s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfiz W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+  | Ozextshr a s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfx W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+  | Osextshr a s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfx W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+  (** 64-bit integer arithmetic *)
+  | Oshiftl s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porr X (transl_shift s a) rd XZR r1 ::bi k)
+  | Oextend x a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (move_extended rd r1 x a k)
+  (* [Omakelong] and [Ohighlong] should not occur *)
+  | Olowlong, a1 :: nil => 
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      assertion (ireg_eq rd r1);
+      OK (Pcvtx2w rd ::bi k)
+  | Oaddl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Padd X SOnone rd r1 r2 ::bi k)
+  | Oaddlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Padd X (transl_shift s a) rd r1 r2 ::bi k)
+  | Oaddlext x a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (arith_extended Paddext (Padd X) rd r1 r2 x a k)
+  | Oaddlimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (addimm64 rd r1 n k)
+  | Onegl, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psub X SOnone rd XZR r1 ::bi k)
+  | Oneglshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psub X (transl_shift s a) rd XZR r1 ::bi k)
+  | Osubl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psub X SOnone rd r1 r2 ::bi k)
+  | Osublshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psub X (transl_shift s a) rd r1 r2 ::bi k)
+  | Osublext x a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (arith_extended Psubext (Psub X) rd r1 r2 x a k)
+  | Omull, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pmadd X rd r1 r2 XZR ::bi k)
+  | Omulladd, a1 :: a2 :: a3 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+      OK (Pmadd X rd r2 r3 r1 ::bi k)
+  | Omullsub, a1 :: a2 :: a3 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+      OK (Pmsub X rd r2 r3 r1 ::bi k)
+  | Omullhs, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psmulh rd r1 r2 ::bi k)
+  | Omullhu, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pumulh rd r1 r2 ::bi k)
+  | Odivl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Psdiv X rd r1 r2 ::bi k)
+  | Odivlu, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pudiv X rd r1 r2 ::bi k)
+  | Oandl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pand X SOnone rd r1 r2 ::bi k)
+  | Oandlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pand X (transl_shift s a) rd r1 r2 ::bi k)
+  | Oandlimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm64 (Pandimm X) (Pand X) rd r1 n k)      
+  | Oorl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porr X SOnone rd r1 r2 ::bi k)
+  | Oorlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porr X (transl_shift s a) rd r1 r2 ::bi k)
+  | Oorlimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm64 (Porrimm X) (Porr X) rd r1 n k)      
+  | Oxorl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peor X SOnone rd r1 r2 ::bi k)
+  | Oxorlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peor X (transl_shift s a) rd r1 r2 ::bi k)
+  | Oxorlimm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (logicalimm64 (Peorimm X) (Peor X) rd r1 n k)      
+  | Onotl, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porn X SOnone rd XZR r1 ::bi k)
+  | Onotlshift s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Porn X (transl_shift s a) rd XZR r1 ::bi k)
+  | Obicl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pbic X SOnone rd r1 r2 ::bi k)
+  | Obiclshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pbic X (transl_shift s a) rd r1 r2 ::bi k)
+  | Oornl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porn X SOnone rd r1 r2 ::bi k)
+  | Oornlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Porn X (transl_shift s a) rd r1 r2 ::bi k)
+  | Oeqvl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peon X SOnone rd r1 r2 ::bi k)
+  | Oeqvlshift s a, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Peon X (transl_shift s a) rd r1 r2 ::bi k)
+  | Oshll, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Plslv X rd r1 r2 ::bi k)
+  | Oshrl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Pasrv X rd r1 r2 ::bi k)
+  | Oshrlu, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (Plsrv X rd r1 r2 ::bi k)
+  | Oshrlximm n, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (shrx64 rd r1 n k)
+  | Ozextl s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfiz X Int.zero s rd r1 ::bi k)
+  | Osextl s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfiz X Int.zero s rd r1 ::bi k)
+  | Oshllzext s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfiz X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+  | Oshllsext s a, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfiz X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+  | Ozextshrl a s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Pubfx X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+  | Osextshrl a s, a1 :: nil =>
+      do rd <- ireg_of res; do r1 <- ireg_of a1;
+      OK (Psbfx X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+  (** 64-bit floating-point arithmetic *)
+  | Onegf, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfneg D rd rs ::bi k)
+  | Oabsf, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfabs D rd rs ::bi k)
+  | Oaddf, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfadd D rd rs1 rs2 ::bi k)
+  | Osubf, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfsub D rd rs1 rs2 ::bi k)
+  | Omulf, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfmul D rd rs1 rs2 ::bi k)
+  | Odivf, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfdiv D rd rs1 rs2 ::bi k)
+  (** 32-bit floating-point arithmetic *)
+  | Onegfs, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfneg S rd rs ::bi k)
+  | Oabsfs, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfabs S rd rs ::bi k)
+  | Oaddfs, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfadd S rd rs1 rs2 ::bi k)
+  | Osubfs, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfsub S rd rs1 rs2 ::bi k)
+  | Omulfs, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfmul S rd rs1 rs2 ::bi k)
+  | Odivfs, a1 :: a2 :: nil =>
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfdiv S rd rs1 rs2 ::bi k)
+  | Osingleoffloat, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfcvtsd rd rs ::bi k)
+  | Ofloatofsingle, a1 :: nil =>
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfcvtds rd rs ::bi k)
+  (** Conversions between int and float *)
+  | Ointoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzs W D rd rs ::bi k)
+  | Ointuoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzu W D rd rs ::bi k)
+  | Ofloatofint, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pscvtf D W rd rs ::bi k)
+  | Ofloatofintu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pucvtf D W rd rs ::bi k)
+  | Ointofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzs W S rd rs ::bi k)
+  | Ointuofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzu W S rd rs ::bi k)
+  | Osingleofint, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pscvtf S W rd rs ::bi k)
+  | Osingleofintu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pucvtf S W rd rs ::bi k)
+  | Olongoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzs X D rd rs ::bi k)
+  | Olonguoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzu X D rd rs ::bi k)
+  | Ofloatoflong, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pscvtf D X rd rs ::bi k)
+  | Ofloatoflongu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pucvtf D X rd rs ::bi k)
+  | Olongofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzs X S rd rs ::bi k)
+  | Olonguofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtzu X S rd rs ::bi k)
+  | Osingleoflong, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pscvtf S X rd rs ::bi k)
+  | Osingleoflongu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pucvtf S X rd rs ::bi k)
+  (** Boolean tests *)
+  | Ocmp c, _ =>
+      do rd <- ireg_of res;
+      transl_cond c args (Pcset rd (cond_for_cond c) ::bi k)
+  (** Conditional move *)
+  | Osel cmp ty, a1 :: a2 :: args =>
+      match preg_of res with
+      | IR r => 
+          do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+          transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) ::bi k)
+      | FR r =>
+          do r1 <- freg_of a1; do r2 <- freg_of a2;
+          transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) ::bi k)
+      | _ =>
+          Error(msg "Asmgenblock.Osel")
+      end
+  | _, _ => Error(msg "Asmgenblock.transl_op")
+  end.
+
+(** Translation of addressing modes *)
+ 
+Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg)
+                             (insn: Asm.addressing -> basic) (k: bcode) : res bcode :=
+  match addr, args with
+  | Aindexed ofs, a1 :: nil =>
+      do r1 <- ireg_of a1;
+       if offset_representable sz ofs then
+        OK (insn (ADimm r1 ofs) ::bi k)
+      else
+        OK (loadimm64 X16 ofs (insn (ADreg r1 X16) ::bi k))
+  | Aindexed2, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (insn (ADreg r1 r2) ::bi k)
+  | Aindexed2shift a, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      if Int.eq a Int.zero then
+        OK (insn (ADreg r1 r2) ::bi k)
+      else if Int.eq (Int.shl Int.one a) (Int.repr sz) then
+        OK (insn (ADlsl r1 r2 a) ::bi k)
+      else
+        OK (Padd X (SOlsl a) X16 r1 r2 ::bi insn (ADimm X16 Int64.zero) ::bi k)
+  | Aindexed2ext x a, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      if Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz) then
+        OK (insn (match x with Xsgn32 => ADsxt r1 r2 a
+                             | Xuns32 => ADuxt r1 r2 a end) ::bi k)
+      else
+        OK (arith_extended Paddext (Padd X) X16 r1 r2 x a
+                           (insn (ADimm X16 Int64.zero) ::bi k))
+  | Aglobal id ofs, nil =>
+      assertion (negb (Archi.pic_code tt));
+      if Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz
+      then OK (Padrp id ofs X16 ::bi insn (ADadr X16 id ofs) ::bi k)
+      else OK (loadsymbol X16 id ofs (insn (ADimm X16 Int64.zero) ::bi k))
+  | Ainstack ofs, nil =>
+      let ofs := Ptrofs.to_int64 ofs in
+      if offset_representable sz ofs then
+        OK (insn (ADimm XSP ofs) ::bi k)
+      else
+        OK (loadimm64 X16 ofs (insn (ADreg XSP X16) ::bi k))
+  | _, _ =>
+      Error(msg "Asmgen.transl_addressing")
+  end.
+
+(** Translation of loads and stores *)
+
+Definition transl_load (chunk: memory_chunk) (addr: Op.addressing)
+                       (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
+  match chunk with
+  | Mint8unsigned =>
+      do rd <- ireg_of dst; transl_addressing 1 addr args (PLd_rd_a (Pldrb W) rd) k
+  | Mint8signed =>
+      do rd <- ireg_of dst; transl_addressing 1 addr args (PLd_rd_a (Pldrsb W) rd) k
+  | Mint16unsigned =>
+      do rd <- ireg_of dst; transl_addressing 2 addr args (PLd_rd_a (Pldrh W) rd) k
+  | Mint16signed =>
+      do rd <- ireg_of dst; transl_addressing 2 addr args (PLd_rd_a (Pldrsh W) rd) k
+  | Mint32 =>
+      do rd <- ireg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrw rd) k
+  | Mint64 =>
+      do rd <- ireg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrx rd) k
+  | Mfloat32 =>
+      do rd <- freg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrs rd) k
+  | Mfloat64 =>
+      do rd <- freg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrd rd) k
+  | Many32 =>
+      do rd <- ireg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrw_a rd) k
+  | Many64 =>
+      do rd <- ireg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrx_a rd) k
+  end.
+
+Definition transl_store (chunk: memory_chunk) (addr: Op.addressing)
+                        (args: list mreg) (src: mreg) (k: bcode) : res bcode :=
+  match chunk with
+  | Mint8unsigned | Mint8signed =>
+      do r1 <- ireg_of src; transl_addressing 1 addr args (PSt_rs_a Pstrb r1) k
+  | Mint16unsigned | Mint16signed =>
+      do r1 <- ireg_of src; transl_addressing 2 addr args (PSt_rs_a Pstrh r1) k
+  | Mint32 =>
+      do r1 <- ireg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrw r1) k
+  | Mint64 =>
+      do r1 <- ireg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrx r1) k
+  | Mfloat32 =>
+      do r1 <- freg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrs r1) k
+  | Mfloat64 =>
+      do r1 <- freg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrd r1) k
+  | Many32 =>
+      do r1 <- ireg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrw_a r1) k
+  | Many64 =>
+      do r1 <- ireg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrx_a r1) k
+  end.
+
+(** Translation of a Machblock instruction. *)
+
+Definition transl_instr_basic (f: Machblock.function) (i: Machblock.basic_inst)
+                              (ep: bool) (k: bcode) :=
+  match i with
+  | MBgetstack ofs ty dst =>
+      loadind XSP ofs ty dst k
+  | MBsetstack src ofs ty =>
+      storeind src XSP ofs ty k
+  | MBgetparam ofs ty dst =>
+      do c <- loadind X29 ofs ty dst k;
+      OK (if ep then c else loadptr_bc XSP f.(fn_link_ofs) X29 c)
+  | MBop op args res =>
+      transl_op op args res k
+  | MBload t chunk addr args dst =>
+      match t with
+      | TRAP => transl_load chunk addr args dst k
+      | NOTRAP => Error(msg "Asmgenblock.transl_instr_basic: NOTRAP load not supported in aarch64.")
+      end
+  | MBstore chunk addr args src =>
+      transl_store chunk addr args src k
+  end.
+
+(** Translation of a code sequence *)
+
+Definition it1_is_parent (before: bool) (i: Machblock.basic_inst) : bool :=
+  match i with
+  (*| MBgetstack ofs ty dst => before && negb (mreg_eq dst R29)*)
+  | MBsetstack src ofs ty => before
+  | MBgetparam ofs ty dst => negb (mreg_eq dst R29)
+  | MBop op args res => before && negb (mreg_eq res R29)
+  (*| MBload trapping_mode chunk addr args dst => before && negb (mreg_eq dst R29)*)
+  | _ => false
+  end.
+
+Fixpoint transl_basic_code (f: Machblock.function) (il: list Machblock.basic_inst) (it1p: bool) :=
+  match il with
+  | nil => OK (nil)
+  | i1 :: il' =>
+      do k1 <- transl_basic_code f il' (it1_is_parent it1p i1);
+      transl_instr_basic f i1 it1p k1
+  end.
+
+Program Definition cons_bblocks (ll: list label) (bdy: list basic) (ex: option control): bblocks :=
+  match ex with
+  | None =>
+    match bdy with
+    | nil => {| header := ll; body:= Pnop::nil; exit := None |} :: nil
+    | _ => {| header := ll; body:= bdy; exit := None |} :: nil
+    end
+  | _ =>
+    match bdy with
+    | nil => {| header := ll; body:= nil; exit := ex |} :: nil
+    | _ => {| header := ll; body:= bdy; exit := ex |} :: nil
+    end
+  end.
+Next Obligation.
+  induction bdy. congruence.
+  simpl. auto.
+Qed.
+Next Obligation.
+  destruct ex. simpl. auto.
+  congruence.
+Qed.
+Next Obligation.
+  induction bdy. congruence.
+  simpl. auto.
+Qed.
+
+Definition transl_control (f: Machblock.function) (ctl: control_flow_inst) : res (bcode*control) :=
+  match ctl with
+  | MBcall sig (inl r) => do r1 <- ireg_of r;
+      OK (nil, PCtlFlow (Pblr r1 sig))
+  | MBcall sig (inr symb) => OK (nil, PCtlFlow (Pbl symb sig))
+  | MBtailcall sig (inr symb) => OK(make_epilogue f, PCtlFlow (Pbs symb sig))
+  | MBtailcall sig (inl r) => do r1 <- ireg_of r;
+      OK (make_epilogue f, PCtlFlow (Pbr r1 sig))
+  | MBbuiltin ef args res => OK (nil, Pbuiltin ef (List.map (map_builtin_arg dreg_of) args) (map_builtin_res dreg_of res))
+  | MBgoto lbl => OK (nil, PCtlFlow (Pb lbl))
+  | MBcond cond args lbl => do (bc, c) <- transl_cond_branch cond args lbl nil; OK (bc, PCtlFlow c)
+  | MBreturn => OK (make_epilogue f, PCtlFlow (Pret RA))
+  | MBjumptable arg tbl => do r <- ireg_of arg;
+      OK (nil, PCtlFlow (Pbtbl r tbl))
+  end.
+
+Definition transl_exit (f: Machblock.function) (ext: option control_flow_inst) : res (bcode*option control) :=
+  match ext with
+    Some ctl => do (b,c) <- transl_control f ctl; OK (b, Some c)
+  | None => OK (nil, None)
+  end.
+
+Definition transl_block (f: Machblock.function) (fb: Machblock.bblock) (ep: bool) : res (list bblock) :=
+  do (bdy2, ex) <- transl_exit f fb.(Machblock.exit);
+  do bdy1 <- transl_basic_code f fb.(Machblock.body) ep;
+  OK (cons_bblocks fb.(Machblock.header) (bdy1 @@ bdy2) ex) 
+  .
+
+Fixpoint transl_blocks (f: Machblock.function) (lmb: list Machblock.bblock) (ep: bool) :=
+  match lmb with
+  | nil => OK nil
+  | mb :: lmb => 
+      do lb <- transl_block f mb (if Machblock.header mb then ep else false);
+      do lb' <- transl_blocks f lmb false;
+      OK (lb @@ lb')
+  end
+.
+
+Program Definition make_prologue (f:  Machblock.function) (k:bblocks) :=
+  {| header := nil; body := Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::bi
+      ((PSt_rs_a Pstrx RA) (ADimm XSP (Ptrofs.to_int64 (f.(fn_retaddr_ofs))))) ::bi nil;
+      exit := None |} :: k.
+  
+Definition transl_function (f: Machblock.function) : res Asmblock.function :=
+  do lb <- transl_blocks f f.(Machblock.fn_code) true;
+  OK (mkfunction f.(Machblock.fn_sig)
+        (make_prologue f lb)).
+
+Definition transf_function (f: Machblock.function) : res Asmblock.function :=
+  do tf <- transl_function f;
+  if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks))
+  then Error (msg "code size exceeded")
+  else OK tf.
+
+Definition transf_fundef (f: Machblock.fundef) : res Asmblock.fundef :=
+  transf_partial_fundef transf_function f.
+
+Definition transf_program (p: Machblock.program) : res Asmblock.program :=
+  transform_partial_program transf_fundef p.
+
diff --git a/aarch64/Asmblockgenproof.v b/aarch64/Asmblockgenproof.v
new file mode 100644
index 00000000..6f7d39fa
--- /dev/null
+++ b/aarch64/Asmblockgenproof.v
@@ -0,0 +1,1546 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           Xavier Leroy       INRIA Paris-Rocquencourt       *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Machblock Conventions Asmblock IterList.
+Require Import Asmblockgen Asmblockgenproof0 Asmblockgenproof1 Asmblockprops.
+
+Module MB := Machblock.
+Module AB := Asmblock.
+
+Definition match_prog (p: MB.program) (tp: AB.program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+  intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable lk: aarch64_linker.
+Variable prog: Machblock.program.
+Variable tprog: Asmblock.program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved:
+  forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof (Genv.find_symbol_match TRANSF).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSF).
+
+Lemma functions_translated:
+  forall b f,
+  Genv.find_funct_ptr ge b = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial TRANSF).
+
+Lemma functions_transl:
+  forall fb f tf,
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  transf_function f = OK tf ->
+  Genv.find_funct_ptr tge fb = Some (Internal tf).
+Proof.
+  intros. exploit functions_translated; eauto. intros [tf' [A B]].
+  monadInv B. rewrite H0 in EQ; inv EQ; auto.
+Qed.
+
+Lemma transf_function_no_overflow:
+  forall f tf,
+  transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned.
+Proof.
+  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0.
+  omega.
+Qed.
+
+Hypothesis symbol_high_low: forall (id: ident) (ofs: ptrofs),
+  Val.addl (symbol_high lk id ofs) (symbol_low lk id ofs) = Genv.symbol_address tge id ofs.
+
+(** * Proof of semantic preservation *)
+
+(** Semantic preservation is proved using a complex simulation diagram
+  of the following form.
+<<
+                                     MB.step
+                      ---------------------------------------->
+                      header      body          exit
+                  st1 -----> st2 -----> st3 ------------------> st4
+                   |          |          |                       |
+                   |   (A)    |   (B)    |         (C)           |
+   match_codestate |          |          |                       |
+                   |  header  |   body1  |  body2                |  match_states
+                  cs1 -----> cs2 -----> cs3 ------> cs4          |
+                   |                  /                \  exit   |
+   match_asmstate  |   ---------------                  --->---  |
+                   |  /   match_asmstate                       \ |
+                  st'1 ---------------------------------------> st'2
+                                     AB.step                  *
+>>
+  The invariant between each MB.step/AB.step is the [match_states] predicate below.
+  However, we also need to introduce an intermediary state [Codestate] which allows
+  us to reason on a finer grain, executing header, body and exit separately.
+
+  This [Codestate] consists in a state like [Asmblock.State], except that the
+  code is directly stored in the state, much like [Machblock.State]. It also features
+  additional useful elements to keep track of while executing a bblock.
+*)
+
+Inductive match_states: Machblock.state -> Asm.state -> Prop :=
+  | match_states_intro:
+      forall s fb sp c ep ms m m' rs f tf tc
+        (STACKS: match_stack ge s)
+        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+        (MEXT: Mem.extends m m')
+        (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
+        (AG: agree ms sp rs)
+        (DXP: ep = true -> rs#X29 = parent_sp s),
+      match_states (Machblock.State s fb sp c ms m)
+                   (Asm.State rs m')
+  | match_states_call:
+      forall s fb ms m m' rs
+        (STACKS: match_stack ge s)
+        (MEXT: Mem.extends m m')
+        (AG: agree ms (parent_sp s) rs)
+        (ATPC: rs PC = Vptr fb Ptrofs.zero)
+        (ATLR: rs RA = parent_ra s),
+      match_states (Machblock.Callstate s fb ms m)
+                   (Asm.State rs m')
+  | match_states_return:
+      forall s ms m m' rs
+        (STACKS: match_stack ge s)
+        (MEXT: Mem.extends m m')
+        (AG: agree ms (parent_sp s) rs)
+        (ATPC: rs PC = parent_ra s),
+      match_states (Machblock.Returnstate s ms m)
+                   (Asm.State rs m').
+
+Section TRANSL_LABEL. (* Lemmas on translation of MB.is_label into AB.is_label *)
+
+Lemma cons_bblocks_label:
+  forall hd bdy ex tbb tc,
+  cons_bblocks hd bdy ex = tbb::tc ->
+  header tbb = hd.
+Proof.
+  intros until tc. intros CONSB. unfold cons_bblocks in CONSB.
+  destruct ex; try destruct bdy; try destruct c; try destruct i.
+  all: inv CONSB; simpl; auto.
+Qed.
+
+Lemma cons_bblocks_label2:
+  forall hd bdy ex tbb1 tbb2,
+  cons_bblocks hd bdy ex = tbb1::tbb2::nil ->
+  header tbb2 = nil.
+Proof.
+  intros until tbb2. intros CONSB. unfold cons_bblocks in CONSB.
+  destruct ex; try destruct bdy; try destruct c; try destruct i.
+  all: inv CONSB; simpl; auto.
+Qed.
+
+Remark in_dec_transl:
+  forall lbl hd,
+  (if in_dec lbl hd then true else false) = (if MB.in_dec lbl hd then true else false).
+Proof.
+  intros. destruct (in_dec lbl hd), (MB.in_dec lbl hd). all: tauto.
+Qed.
+
+Lemma transl_is_label:
+  forall lbl bb tbb f ep tc,
+  transl_block f bb ep = OK (tbb::tc) ->
+  is_label lbl tbb = MB.is_label lbl bb.
+Proof.
+  intros until tc. intros TLB.
+  destruct tbb as [thd tbdy tex]; simpl in *.
+  monadInv TLB.
+  unfold is_label. simpl.
+  apply cons_bblocks_label in H0. simpl in H0. subst.
+  rewrite in_dec_transl. auto.
+Qed.
+
+Lemma transl_is_label_false2:
+  forall lbl bb f ep tbb1 tbb2,
+  transl_block f bb ep = OK (tbb1::tbb2::nil) ->
+  is_label lbl tbb2 = false.
+Proof.
+  intros until tbb2. intros TLB.
+  destruct tbb2 as [thd tbdy tex]; simpl in *.
+  monadInv TLB. apply cons_bblocks_label2 in H0. simpl in H0. subst.
+  apply is_label_correct_false. simpl. auto.
+Qed.
+
+Lemma transl_block_nonil:
+  forall f c ep tc,
+  transl_block f c ep = OK tc ->
+  tc <> nil.
+Proof.
+  intros. monadInv H. unfold cons_bblocks.
+  destruct x0; try destruct (x1 @@ x); try destruct c0; try destruct i.
+  all: discriminate.
+Qed.
+
+Lemma transl_block_limit: forall f bb ep tbb1 tbb2 tbb3 tc,
+  ~transl_block f bb ep = OK (tbb1 :: tbb2 :: tbb3 :: tc).
+Proof.
+  intros. intro. monadInv H.
+  unfold cons_bblocks in H0.
+  destruct x0; try destruct (x1 @@ x); try destruct c0; try destruct i.
+  all: discriminate.
+Qed.
+
+Lemma find_label_transl_false:
+  forall x f lbl bb ep x',
+  transl_block f bb ep = OK x ->
+  MB.is_label lbl bb = false ->
+  find_label lbl (x++x') = find_label lbl x'.
+Proof.
+  intros until x'. intros TLB MBis; simpl; auto.
+  destruct x as [|x0 x1]; simpl; auto.
+  destruct x1 as [|x1 x2]; simpl; auto.
+  - erewrite <- transl_is_label in MBis; eauto. rewrite MBis. auto.
+  - destruct x2 as [|x2 x3]; simpl; auto.
+    + erewrite <- transl_is_label in MBis; eauto. rewrite MBis.
+      erewrite transl_is_label_false2; eauto.
+    + apply transl_block_limit in TLB. destruct TLB.
+Qed.
+
+Lemma transl_blocks_label:
+  forall lbl f c tc ep,
+  transl_blocks f c ep = OK tc ->
+  match MB.find_label lbl c with
+  | None => find_label lbl tc = None
+  | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_blocks f c' false = OK tc'
+  end.
+Proof.
+  induction c; simpl; intros.
+  inv H. auto.
+  monadInv H.
+  destruct (MB.is_label lbl a) eqn:MBis.
+  - destruct x as [|tbb tc]. { apply transl_block_nonil in EQ. contradiction. }
+    simpl find_label. exploit transl_is_label; eauto. intros ABis. rewrite MBis in ABis.
+    rewrite ABis.
+    eexists. eexists. split; eauto. simpl transl_blocks.
+    assert (MB.header a <> nil).
+    { apply MB.is_label_correct_true in MBis.
+      destruct (MB.header a). contradiction. discriminate. }
+    destruct (MB.header a); try contradiction.
+    rewrite EQ. simpl. rewrite EQ1. simpl. auto.
+  - apply IHc in EQ1. destruct (MB.find_label lbl c).
+    + destruct EQ1 as (tc' & FIND & TLBS). exists tc'; eexists; auto.
+      erewrite find_label_transl_false; eauto.
+    + erewrite find_label_transl_false; eauto.
+Qed.
+
+Lemma find_label_nil:
+  forall bb lbl c,
+  header bb = nil ->
+  find_label lbl (bb::c) = find_label lbl c.
+Proof.
+  intros. destruct bb as [hd bdy ex]; simpl in *. subst.
+  assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false).
+  { erewrite <- is_label_correct_false. simpl. auto. }
+  rewrite H. auto.
+Qed.
+
+Theorem transl_find_label:
+  forall lbl f tf,
+  transf_function f = OK tf ->
+  match MB.find_label lbl f.(MB.fn_code) with
+  | None => find_label lbl tf.(fn_blocks) = None
+  | Some c => exists tc, find_label lbl tf.(fn_blocks) = Some tc /\ transl_blocks f c false = OK tc
+  end.
+Proof.
+  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); inv EQ0. clear g.
+  monadInv EQ. unfold make_prologue. simpl fn_blocks. repeat (rewrite find_label_nil); simpl; auto.
+  eapply transl_blocks_label; eauto.
+Qed.
+
+End TRANSL_LABEL.
+
+(** A valid branch in a piece of Machblock code translates to a valid ``go to''
+  transition in the generated Asmblock code. *)
+
+Lemma find_label_goto_label:
+  forall f tf lbl rs m c' b ofs,
+  Genv.find_funct_ptr ge b = Some (Internal f) ->
+  transf_function f = OK tf ->
+  rs PC = Vptr b ofs ->
+  MB.find_label lbl f.(MB.fn_code) = Some c' ->
+  exists tc', exists rs',
+    goto_label tf lbl rs m = Next rs' m
+  /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
+  /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+  intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
+  intros (tc & A & B).
+  exploit label_pos_code_tail; eauto. instantiate (1 := 0).
+  intros [pos' [P [Q R]]].
+  exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
+  split. unfold goto_label. rewrite P. rewrite H1. auto.
+  split. rewrite Pregmap.gss. constructor; auto.
+  rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
+  auto. omega.
+  generalize (transf_function_no_overflow _ _ H0). omega.
+  intros. apply Pregmap.gso; auto.
+Qed.
+
+(** Existence of return addresses *)
+
+Lemma return_address_exists:
+  forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
+  exists ra, return_address_offset f c ra.
+Proof.
+  intros. eapply Asmblockgenproof0.return_address_exists; eauto.
+
+- intros. monadInv H0.
+  destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. monadInv EQ. simpl.
+  exists x; exists true; split; auto.
+  repeat constructor.
+- exact transf_function_no_overflow.
+Qed.
+
+(* Useful for dealing with the many cases in some proofs *)
+Ltac exploreInst :=
+  repeat match goal with
+  | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var
+  | [ H : OK _ = OK _ |- _ ] => monadInv H
+  | [ |- context[if ?b then _ else _] ] => destruct b
+  | [ |- context[match ?m with | _ => _ end] ] => destruct m
+  | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m
+  | [ H : bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H : Error _ = OK _ |- _ ] => inversion H
+  end.
+
+(** Some translation properties *)
+
+Lemma transl_blocks_distrib:
+  forall c f bb tbb tc ep,
+  transl_blocks f (bb::c) ep = OK (tbb::tc)
+  -> transl_block f bb (if MB.header bb then ep else false) = OK (tbb :: nil)
+  /\ transl_blocks f c false = OK tc.
+Proof.
+  intros until ep. intros TLBS.
+  destruct bb as [hd bdy ex].
+  monadInv TLBS. monadInv EQ.
+  unfold transl_block.
+  rewrite EQ0; simpl.
+  simpl in EQ; rewrite EQ; simpl.
+  unfold cons_bblocks in *. simpl in EQ0.
+  destruct ex.
+  - simpl in *. monadInv EQ0.
+    destruct (x3 @@ x1) eqn: CBDY; inv H0; inv EQ1; auto.
+  - simpl in *. inv EQ0. destruct (x3 @@ nil) eqn: CBDY; inv H0; inv EQ1; auto.
+Qed.
+
+Lemma cons_bblocks_decomp:
+  forall thd tbdy tex tbb,
+  (tbdy <> nil \/ tex <> None) ->
+  cons_bblocks thd tbdy tex = tbb :: nil ->
+     header tbb = thd
+  /\ body tbb = tbdy
+  /\ exit tbb = tex.
+Proof.
+  intros until tbb. intros Hnonil CONSB. unfold cons_bblocks in CONSB.
+  destruct (tex) eqn:ECTL.
+  - destruct tbdy; inv CONSB; simpl; auto.
+  - inversion Hnonil.
+    + destruct tbdy as [|bi tbdy]; [ contradiction H; auto | inv CONSB; auto].
+    + contradict H; simpl; auto.
+Qed.
+
+Lemma transl_blocks_nonil:
+  forall f bb c tc ep,
+  transl_blocks f (bb::c) ep = OK tc ->
+  exists tbb tc', tc = tbb :: tc'.
+Proof.
+  intros until ep. intros TLBS. monadInv TLBS. monadInv EQ. unfold cons_bblocks.
+  destruct (x2);
+  destruct (x3 @@ x1); simpl; eauto.
+Qed.
+
+Definition mb_remove_header bb := {| MB.header := nil; MB.body := MB.body bb; MB.exit := MB.exit bb |}.
+
+Definition mb_remove_body (bb: MB.bblock) := 
+  {| MB.header := MB.header bb; MB.body := nil; MB.exit := MB.exit bb |}.
+  
+Definition mbsize (bb: MB.bblock) := (length (MB.body bb) + length_opt (MB.exit bb))%nat.
+
+Lemma mbsize_eqz:
+  forall bb, mbsize bb = 0%nat -> MB.body bb = nil /\ MB.exit bb = None.
+Proof.
+  intros. destruct bb as [hd bdy ex]; simpl in *. unfold mbsize in H.
+  remember (length _) as a. remember (length_opt _) as b.
+  assert (a = 0%nat) by omega. assert (b = 0%nat) by omega. subst. clear H.
+  inv H0. inv H1. destruct bdy; destruct ex; auto.
+  all: try discriminate.
+Qed.
+
+Lemma mbsize_neqz:
+  forall bb, mbsize bb <> 0%nat -> (MB.body bb <> nil \/ MB.exit bb <> None).
+Proof.
+  intros. destruct bb as [hd bdy ex]; simpl in *.
+  destruct bdy; destruct ex; try (right; discriminate); try (left; discriminate).
+  contradict H. unfold mbsize. simpl. auto.
+Qed.
+
+Record codestate :=
+  Codestate {     pstate: state;        (**r projection to Asmblock.state *)
+                  pheader: list label;
+                  pbody1: list basic;   (**r list of basic instructions coming from the translation of the Machblock body *)
+                  pbody2: list basic;   (**r list of basic instructions coming from the translation of the Machblock exit *)
+                  pctl: option control; (**r exit instruction, coming from the translation of the Machblock exit *)
+                  ep: bool;             (**r reflects the [ep] variable used in the translation *)
+                  rem: list AB.bblock;  (**r remaining bblocks to execute *)
+                  cur: bblock           (**r current bblock to execute - to keep track of its size when incrementing PC *)
+            }.
+
+(* The part that deals with Machblock <-> Codestate agreement
+ * Note about DXP: the property of [ep] only matters if the current block doesn't have a header, hence the condition *)
+Inductive match_codestate fb: Machblock.state -> codestate -> Prop :=
+  | match_codestate_intro:
+      forall s sp ms m rs0 m0 f tc ep c bb tbb tbc1 tbc2 ex
+        (STACKS: match_stack ge s)
+        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+        (MEXT: Mem.extends m m0)
+        (TBC: transl_basic_code f (MB.body bb) (if MB.header bb then ep else false) = OK tbc1)
+        (TIC: transl_exit f (MB.exit bb) = OK (tbc2, ex))
+        (TBLS: transl_blocks f c false = OK tc)
+        (AG: agree ms sp rs0)
+        (DXP: (if MB.header bb then ep else false) = true -> rs0#X29 = parent_sp s)
+        ,
+      match_codestate fb (Machblock.State s fb sp (bb::c) ms m)
+        {|  pstate := (Asm.State rs0 m0);
+            pheader := (MB.header bb);
+            pbody1 := tbc1;
+            pbody2 := tbc2;
+            pctl := ex;
+            ep := ep;
+            rem := tc;
+            cur := tbb
+        |}
+.
+
+(* The part ensuring that the code in Codestate actually resides at [rs PC] *)
+Inductive match_asmstate fb: codestate -> Asm.state -> Prop :=
+  | match_asmstate_some:
+      forall rs f tf tc m tbb ofs ep tbdy1 tbdy2 tex lhd
+        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+        (TRANSF: transf_function f = OK tf)
+        (PCeq: rs PC = Vptr fb ofs)
+        (TAIL: code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (tbb::tc))
+        ,
+      match_asmstate fb 
+        {|  pstate := (Asm.State rs m);
+            pheader := lhd;
+            pbody1 := tbdy1;
+            pbody2 := tbdy2;
+            pctl := tex;
+            ep := ep;
+            rem := tc;
+            cur := tbb |}
+        (Asm.State rs m)
+.
+
+Lemma indexed_memory_access_nonil: forall f ofs r i k,
+  indexed_memory_access_bc f ofs r i k <> nil.
+Proof.
+  intros.
+  unfold indexed_memory_access_bc, loadimm64, loadimm, loadimm_z, loadimm_n;
+  desif; try congruence.
+  all: destruct decompose_int; try destruct p; try congruence.
+Qed.
+
+Lemma loadimm_nonil: forall sz x n k,
+  loadimm sz x n k <> nil.
+Proof.
+  intros.
+  unfold loadimm. desif;
+  unfold loadimm_n, loadimm_z.
+  all: destruct decompose_int; try destruct p; try congruence.
+Qed.
+
+Lemma loadimm32_nonil: forall sz x n,
+  loadimm32 sz x n <> nil.
+Proof.
+  intros.
+  unfold loadimm32. desif; try congruence.
+  apply loadimm_nonil.
+Qed.
+
+Lemma loadimm64_nonil: forall sz x n,
+  loadimm64 sz x n <> nil.
+Proof.
+  intros.
+  unfold loadimm64. desif; try congruence.
+  apply loadimm_nonil.
+Qed.
+
+Lemma loadsymbol_nonil: forall sz x n k,
+  loadsymbol sz x n k <> nil.
+Proof.
+  intros.
+  unfold loadsymbol. desif; try congruence.
+Qed.
+
+Lemma move_extended_nonil: forall x0 x1 x2 a k,
+  move_extended x1 x2 x0 a k <> nil.
+Proof.
+  intros. unfold move_extended, move_extended_base.
+  desif; try congruence.
+Qed.
+
+Lemma arith_extended_nonil: forall insnX insnS x0 x1 x2 x3 a k,
+  arith_extended insnX insnS x1 x2 x3 x0 a k <> nil.
+Proof.
+  intros. unfold arith_extended, move_extended_base.
+  desif; try congruence.
+Qed.
+
+Lemma transl_instr_basic_nonil:
+  forall k f bi ep x,
+  transl_instr_basic f bi ep k = OK x ->
+  x <> nil.
+Proof.
+  intros until x. intros TIB.
+  destruct bi.
+  - simpl in TIB. unfold loadind in TIB;
+    exploreInst; try discriminate; apply indexed_memory_access_nonil.
+  - simpl in TIB. unfold storeind in TIB;
+    exploreInst; try discriminate; apply indexed_memory_access_nonil.
+  - simpl in TIB. monadInv TIB. unfold loadind in EQ. exploreInst; try discriminate;
+    unfold loadptr_bc; apply indexed_memory_access_nonil.
+  - simpl in TIB. unfold transl_op in TIB. exploreInst; try discriminate;
+    unfold addimm32, addimm64, shrx32, shrx64,
+    logicalimm32, logicalimm64, addimm_aux.
+    all: desif; try congruence;
+    try apply loadimm32_nonil; try apply loadimm64_nonil; try apply loadsymbol_nonil;
+    try apply move_extended_nonil; try apply arith_extended_nonil.
+    all: unfold transl_cond in *; exploreInst; try discriminate;
+      try apply loadimm32_nonil; try apply loadimm64_nonil.
+  - simpl in TIB. unfold transl_load in TIB. exploreInst; try discriminate;
+    unfold transl_addressing in *; exploreInst; try discriminate.
+    all: try apply loadimm64_nonil; try apply arith_extended_nonil; try apply loadsymbol_nonil.
+  - simpl in TIB. unfold transl_store in TIB. exploreInst; try discriminate;
+    unfold transl_addressing in *; exploreInst; try discriminate.
+    all: try apply loadimm64_nonil; try apply arith_extended_nonil; try apply loadsymbol_nonil.
+Qed.
+
+Lemma transl_basic_code_nonil:
+  forall bdy f x ep,
+  bdy <> nil ->
+  transl_basic_code f bdy ep = OK x ->
+  x <> nil.
+Proof.
+  induction bdy as [|bi bdy].
+    intros. contradict H0; auto.
+  destruct bdy as [|bi2 bdy].
+  - clear IHbdy. intros f x b _ TBC. simpl in TBC. eapply transl_instr_basic_nonil; eauto.
+  - intros f x b Hnonil TBC. remember (bi2 :: bdy) as bdy'.
+    monadInv TBC.
+    assert (x0 <> nil).
+      eapply IHbdy; eauto. subst bdy'. discriminate.
+    eapply transl_instr_basic_nonil; eauto.
+Qed.
+
+Lemma transl_exit_nonil:
+  forall ex f bdy x,
+  ex <> None ->
+  transl_exit f ex = OK(bdy, x) ->
+  x <> None.
+Proof.
+  intros ex f bdy x Hnonil TIC.
+  destruct ex as [ex|].
+  - clear Hnonil. destruct ex.
+    all: try (simpl in TIC; try monadInv TIC; exploreInst; discriminate).
+  - contradict Hnonil; auto.
+Qed.
+
+Theorem app_nonil: forall A (l1 l2: list A),
+  l1 <> nil ->
+  l1 @@ l2 <> nil.
+Proof.
+  induction l2.
+  - intros; rewrite app_nil_r; auto.
+  - intros. unfold not. intros. symmetry in H0. 
+    generalize (app_cons_not_nil); intros. unfold not in H1.
+    generalize (H0). apply H1.
+Qed.
+
+Theorem match_state_codestate:
+  forall mbs abs s fb sp bb c ms m,
+  (MB.body bb <> nil \/ MB.exit bb <> None) ->
+  mbs = (Machblock.State s fb sp (bb::c) ms m) ->
+  match_states mbs abs ->
+  exists cs fb f tbb tc ep,
+    match_codestate fb mbs cs /\ match_asmstate fb cs abs
+    /\ Genv.find_funct_ptr ge fb = Some (Internal f)
+    /\ transl_blocks f (bb::c) ep = OK (tbb::tc)
+    /\ body tbb = pbody1 cs ++ pbody2 cs
+    /\ exit tbb = pctl cs
+    /\ cur cs = tbb /\ rem cs = tc
+    /\ pstate cs = abs.
+Proof.
+  intros until m. intros Hnotempty Hmbs MS. subst. inv MS.
+  inv AT. clear H0. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst.
+  exploit transl_blocks_distrib; eauto. intros (TLB & TLBS). clear H2.
+  monadInv TLB. exploit cons_bblocks_decomp; eauto.
+    { inversion Hnotempty.
+      - destruct (MB.body bb) as [|bi bdy]; try (contradict H0; simpl; auto; fail).
+        left. apply app_nonil. eapply transl_basic_code_nonil; eauto.
+      - destruct (MB.exit bb) as [ei|]; try (contradict H0; simpl; auto; fail).
+        right. eapply transl_exit_nonil; eauto. }
+  intros (Hth & Htbdy & Htexit).
+  exists {| pstate := (State rs m'); pheader := (Machblock.header bb); pbody1 := x1; pbody2 := x;
+            pctl := x0; ep := ep0; rem := tc'; cur := tbb |}, fb, f, tbb, tc', ep0.
+  repeat split. 1-2: econstructor; eauto.
+  { destruct (MB.header bb). eauto. discriminate. } eauto.
+  unfold transl_blocks. fold transl_blocks. unfold transl_block. rewrite EQ. simpl. rewrite EQ1; simpl.
+  rewrite TLBS. simpl. rewrite H2.
+  all: simpl; auto.
+Qed.
+
+Lemma exec_straight_body:
+  forall c c' rs1 m1 rs2 m2,
+  exec_straight tge lk c rs1 m1 c' rs2 m2 ->
+  exists l,
+     c = l ++ c'
+  /\ exec_body lk tge l rs1 m1 = Next rs2 m2.
+Proof.
+  induction c; try (intros; inv H; fail).
+  intros until m2. intros EXES. inv EXES.
+  - exists (a :: nil). repeat (split; simpl; auto). rewrite H6. auto.
+  - eapply IHc in H7; eauto. destruct H7 as (l' & Hc & EXECB). subst.
+    exists (a :: l'). repeat (split; simpl; auto).
+    rewrite H1. auto.
+Qed.
+
+Lemma exec_straight_body2:
+  forall c rs1 m1 c' rs2 m2,
+  exec_straight tge lk c rs1 m1 c' rs2 m2 ->
+  exists body,
+     exec_body lk tge body rs1 m1 = Next rs2 m2
+  /\ body ++ c' = c.
+Proof.
+  intros until m2. induction 1.
+  - exists (i1::nil). split; auto. simpl. rewrite H. auto.
+  - destruct IHexec_straight as (bdy & EXEB & BTC).
+    exists (i:: bdy). split; simpl.
+    + rewrite H. auto.
+    + congruence.
+Qed.
+
+Lemma exec_straight_opt_body2:
+  forall c rs1 m1 c' rs2 m2,
+  exec_straight_opt tge lk c rs1 m1 c' rs2 m2 ->
+  exists body,
+     exec_body lk tge body rs1 m1 = Next rs2 m2
+  /\ body ++ c' = c.
+Proof.
+  intros until m2. intros EXES.
+  inv EXES.
+  - exists nil. split; auto.
+  - eapply exec_straight_body2. auto.
+Qed.
+
+Lemma PC_not_data_preg: forall r ,
+  data_preg r = true ->
+  r <> PC.
+Proof.
+  intros. destruct (PregEq.eq r PC); [ rewrite e in H; simpl in H; discriminate | auto ].
+Qed.
+
+Lemma X30_not_data_preg: forall r ,
+  data_preg r = true ->
+  r <> X30.
+Proof.
+  intros. destruct (PregEq.eq r X30); [ rewrite e in H; simpl in H; discriminate | auto ].
+Qed.
+
+Lemma X16_not_data_preg: forall r ,
+  data_preg r = true ->
+  r <> X16.
+Proof.
+  intros. destruct (PregEq.eq r X16); [ rewrite e in H; simpl in H; discriminate | auto ].
+Qed.
+
+Lemma undef_regs_other_2':
+  forall r rl rs,
+  data_preg r = true ->
+  preg_notin r rl ->
+  undef_regs (DR (IR X16) :: DR (IR X30) :: map preg_of rl) rs r = rs r.
+Proof.
+  intros. apply undef_regs_other. intros. simpl in H1.
+  destruct H1 as [HX16 | [HX30 | HDES]]; subst.
+  apply X16_not_data_preg; auto. apply X30_not_data_preg; auto.
+  exploit list_in_map_inv; eauto. intros [mr [A B]]. subst.
+  rewrite preg_notin_charact in H0. auto.
+Qed.
+
+Ltac Simpl :=
+  rewrite Pregmap.gso; try apply PC_not_data_preg; try apply X30_not_data_preg.
+  
+(* See (C) in the diagram. The proofs are mostly adapted from the previous Mach->Asm proofs, but are
+   unfortunately quite cumbersome. To reproduce them, it's best to have a Coq IDE with you and see by
+   yourself the steps *)
+Theorem step_simu_control:
+  forall bb' fb fn s sp c ms' m' rs2 m2 t S'' rs1 m1 tbb tbdy2 tex cs2,
+  MB.body bb' = nil ->
+  Genv.find_funct_ptr tge fb = Some (Internal fn) ->
+  pstate cs2 = (State rs2 m2) ->
+  pbody1 cs2 = nil -> pbody2 cs2 = tbdy2 -> pctl cs2 = tex ->
+  cur cs2 = tbb ->
+  match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2 ->
+  match_asmstate fb cs2 (State rs1 m1) ->
+  exit_step return_address_offset ge (MB.exit bb') (MB.State s fb sp (bb'::c) ms' m') t S'' ->
+  (exists rs3 m3 rs4 m4,
+      exec_body lk tge tbdy2 rs2 m2 = Next rs3 m3
+  /\  exec_exit tge fn (Ptrofs.repr (size tbb)) rs3 m3 tex t rs4 m4
+  /\  match_states S'' (State rs4 m4)).
+Proof.
+  intros until cs2. intros Hbody FIND Hpstate Hpbody1 Hpbody2 Hpctl Hcur MCS MAS ESTEP.
+  inv ESTEP.
+  - inv MCS. inv MAS. simpl in *.
+    inv Hpstate.
+    destruct ctl.
+    + (* MBcall *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence. subst f0.
+      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+        eapply transf_function_no_overflow; eauto.
+      destruct s1 as [rf|fid]; simpl in H1.
+      * (* Indirect call *)
+        monadInv H1. monadInv EQ.
+        assert (ms' rf = Vptr f' Ptrofs.zero).
+        { unfold find_function_ptr in H12. destruct (ms' rf); try discriminate.
+          revert H12; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+        assert (rs2 x = Vptr f' Ptrofs.zero).
+        { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
+        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+        remember (Ptrofs.add _ _) as ofs'.
+        assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+        { econstructor; eauto. }
+        assert (f1 = f) by congruence. subst f1.
+        exploit return_address_offset_correct; eauto. intros; subst ra.
+
+        repeat eexists.
+        econstructor; eauto. econstructor.
+          econstructor; eauto. econstructor; eauto.
+          eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros.
+          unfold incrPC; repeat Simpl; auto.
+          simpl. unfold incrPC; rewrite Pregmap.gso; auto; try discriminate.
+          rewrite !Pregmap.gss. rewrite PCeq. rewrite Heqofs'. simpl. auto.
+
+      * (* Direct call *)
+        monadInv H1.
+        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+        remember (Ptrofs.add _ _) as ofs'.
+        assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+          econstructor; eauto.
+        assert (f1 = f) by congruence. subst f1.
+        exploit return_address_offset_correct; eauto. intros; subst ra.
+        repeat eexists.
+        econstructor; eauto. econstructor.
+          econstructor; eauto. econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros.
+        unfold incrPC; repeat Simpl; auto. unfold Genv.symbol_address. rewrite symbols_preserved. simpl in H12. rewrite H12. auto.
+        unfold incrPC; simpl; rewrite Pregmap.gso; try discriminate. rewrite !Pregmap.gss.
+        subst. unfold Val.offset_ptr. rewrite PCeq. auto.
+    + (* MBtailcall *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence.  subst f0.
+      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+        eapply transf_function_no_overflow; eauto.
+      exploit Mem.loadv_extends. eauto. eexact H13. auto. simpl. intros [parent' [A B]].
+      destruct s1 as [rf|fid]; simpl in H11. 
+      * monadInv H1. monadInv EQ.
+        assert (ms' rf = Vptr f' Ptrofs.zero).
+          { destruct (ms' rf); try discriminate. revert H11. predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+        assert (rs2 x = Vptr f' Ptrofs.zero).
+          { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
+
+        assert (f = f1) by congruence. subst f1. clear FIND1. clear H12.
+        exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+        exploit exec_straight_body; eauto.
+        intros (l & MKEPI & EXEB).
+        repeat eexists. rewrite app_nil_r in MKEPI.
+        rewrite <- MKEPI in EXEB.
+        eauto. econstructor. simpl. unfold incrPC.
+        rewrite !Pregmap.gso; try discriminate. eauto.
+        econstructor; eauto.
+          { apply agree_set_other.
+            - econstructor; auto with asmgen.
+              + apply V.
+              + intro r. destruct r; apply V; auto.
+            - eauto with asmgen. }
+        rewrite Pregmap.gss. rewrite Z; auto; try discriminate.
+        eapply ireg_of_not_X30''; eauto.
+        eapply ireg_of_not_X16''; eauto.
+      * monadInv H1. assert (f = f1) by congruence. subst f1. clear FIND1. clear H12.
+        exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+        exploit exec_straight_body; eauto.
+        intros (l & MKEPI & EXEB).
+        repeat eexists. inv EQ. rewrite app_nil_r in MKEPI.
+        rewrite <- MKEPI in EXEB.
+        eauto. inv EQ. econstructor. simpl. unfold incrPC.
+        eauto.
+        econstructor; eauto.
+        { apply agree_set_other.
+          - econstructor; auto with asmgen.
+            + apply V.
+            + intro r. destruct r; apply V; auto.
+          - eauto with asmgen. }
+        { rewrite Pregmap.gss. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H11. auto. }
+    + (* MBbuiltin *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      
+      assert (f0 = f) by congruence. subst f0.
+      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+      eapply transf_function_no_overflow; eauto.
+      generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+      remember (Ptrofs.add _ _) as ofs'.
+      assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+      econstructor; eauto.
+      
+      monadInv TBC. monadInv TIC. inv H0.
+      
+      exploit builtin_args_match; eauto. intros [vargs' [P Q]].
+      exploit external_call_mem_extends; eauto.
+      intros [vres' [m2' [A [B [C D]]]]].
+  
+      repeat eexists. econstructor. erewrite <- sp_val by eauto.
+      eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+      eapply external_call_symbols_preserved; eauto. apply senv_preserved. eauto.
+      econstructor; eauto.
+      unfold incrPC. rewrite Pregmap.gss.
+      rewrite set_res_other. rewrite undef_regs_other. unfold Val.offset_ptr. rewrite PCeq.
+      eauto. 
+      intros; simpl in *; destruct H as [HX16 | [HX30 | HDES]]; subst; try discriminate;
+      exploit list_in_map_inv; eauto; intros [mr [E F]]; subst; discriminate.
+      auto with asmgen. apply agree_nextblock. eapply agree_set_res; auto.
+      eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2'; auto.
+      intros. discriminate.
+    + (* MBgoto *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence. subst f0. assert (f1 = f) by congruence. subst f1. clear H9.
+      remember (incrPC (Ptrofs.repr (size tbb)) rs2) as rs2'.
+      exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND'.
+      assert (tf = fn) by congruence. subst tf.
+      exploit find_label_goto_label.
+        eauto. eauto.
+        instantiate (2 := rs2').
+        { subst. unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq. eauto. }
+        eauto.
+      intros (tc' & rs' & GOTO & AT2 & INV).
+
+      eexists. eexists. repeat eexists. repeat split.
+      econstructor; eauto.
+        rewrite Heqrs2' in INV. unfold incrPC in INV.
+        rewrite Heqrs2' in GOTO; simpl; eauto.
+        econstructor; eauto.
+        eapply agree_exten; eauto with asmgen.
+        assert (forall r : preg, r <> PC -> rs' r = rs2 r).
+        { intros. rewrite Heqrs2' in INV.
+          rewrite INV; unfold incrPC; try rewrite Pregmap.gso; auto. }
+        eauto with asmgen.
+        congruence.
+    + (* MBcond *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0. monadInv H1. monadInv EQ.
+
+      * (* MBcond true *)
+        assert (f0 = f) by congruence. subst f0.
+        exploit eval_condition_lessdef.
+          eapply preg_vals; eauto.
+          all: eauto.
+        intros EC.
+        exploit transl_cbranch_correct_1; eauto. intros (rs', H).
+        destruct H as [ES [ECFI]].
+        exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
+        assert (PCeq': rs2 PC = rs' PC). { inv ES; auto. erewrite <- exec_straight_pc. 2: eapply H0. eauto. }
+        rewrite PCeq' in PCeq.
+        assert (f1 = f) by congruence. subst f1.
+        exploit find_label_goto_label.
+          4: eapply H14. 1-2: eauto. instantiate (2 := (incrPC (Ptrofs.repr (size tbb)) rs')).
+          unfold incrPC, Val.offset_ptr. rewrite PCeq. rewrite Pregmap.gss. eauto.
+        intros (tc' & rs3 & GOTOL & TLPC & Hrs3).
+        exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
+        assert (tf = fn) by congruence. subst tf.
+
+        repeat eexists.
+          rewrite <- BTC. simpl. rewrite app_nil_r. eauto.
+          rewrite <- BTC. simpl. econstructor. rewrite ECFI. eauto.
+
+        econstructor; eauto.
+          eapply agree_exten with rs2; eauto with asmgen.
+          { intros. rewrite Hrs3; unfold incrPC. Simpl. rewrite H. all: auto. apply PC_not_data_preg; auto. }
+        intros. discriminate.
+      * (* MBcond false *)
+        assert (f0 = f) by congruence. subst f0. monadInv H1. monadInv EQ.
+        exploit eval_condition_lessdef.
+          eapply preg_vals; eauto.
+          all: eauto.
+        intros EC.
+
+        exploit transl_cbranch_correct_1; eauto. intros (rs', H).
+        destruct H as [ES [ECFI]].
+        exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
+        assert (PCeq': rs2 PC = rs' PC). { inv ES; auto. erewrite <- exec_straight_pc. 2: eapply H0. eauto. }
+        rewrite PCeq' in PCeq.
+        exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
+        assert (tf = fn) by congruence. subst tf.
+
+        assert (NOOV: size_blocks fn.(fn_blocks) <= Ptrofs.max_unsigned).
+          eapply transf_function_no_overflow; eauto.
+        generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+
+        repeat eexists.
+          rewrite <- BTC. simpl. rewrite app_nil_r. eauto.
+          rewrite <- BTC. simpl. econstructor. rewrite ECFI. eauto.
+
+        econstructor; eauto.
+          unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq. econstructor; eauto.
+          eapply agree_exten with rs2; eauto with asmgen.
+          { intros. unfold incrPC. Simpl. rewrite H. all: auto. }
+        intros. discriminate.
+    + (* MBjumptable *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence. subst f0.
+      monadInv H1. monadInv EQ.
+      generalize (transf_function_no_overflow _ _ TRANSF0); intro NOOV.
+      assert (f1 = f) by congruence. subst f1.
+      exploit find_label_goto_label. 4: eapply H14. 1-2: eauto. instantiate (2 := (incrPC (Ptrofs.repr (size tbb)) rs2) # X16 <- Vundef).
+        unfold incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. reflexivity. discriminate.
+      exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND3. assert (fn = tf) by congruence. subst fn.
+
+      intros [tc' [rs' [A [B C]]]].
+      exploit ireg_val; eauto. rewrite H11. intros LD; inv LD.
+      
+      repeat eexists. econstructor. simpl. Simpl. 2: { eapply ireg_of_not_X16''; eauto. }
+        unfold incrPC. rewrite Pregmap.gso; try discriminate. rewrite <- H1.
+        simpl. unfold Mach.label in H12. unfold label. rewrite H12. eapply A.
+      econstructor; eauto.
+        eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen.
+        { unfold incrPC. repeat Simpl; auto. apply X16_not_data_preg; auto. }
+        discriminate.
+    + (* MBreturn *)
+      destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+      inv TBC. inv TIC. inv H0.
+
+      assert (f0 = f) by congruence. subst f0.
+      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+        eapply transf_function_no_overflow; eauto.
+      exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+      exploit exec_straight_body; eauto.
+        simpl. eauto.
+      intros EXEB. destruct EXEB as [l [MKEPI EXEB]].
+      assert (f1 = f) by congruence. subst f1.
+      
+      repeat eexists.
+        rewrite app_nil_r in MKEPI. rewrite <- MKEPI in EXEB. eauto.
+        econstructor. simpl. reflexivity.
+      econstructor; eauto.
+        unfold incrPC. repeat apply agree_set_other; auto with asmgen.
+
+  - inv MCS. inv MAS. simpl in *. subst. inv Hpstate.
+    destruct bb' as [hd' bdy' ex']; simpl in *. subst.
+    monadInv TBC. monadInv TIC. simpl in *.
+    simpl. repeat eexists.
+    econstructor. econstructor. 4: instantiate (3 := false). all:eauto.
+      unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq.
+      assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+        eapply transf_function_no_overflow; eauto.
+      assert (f = f0) by congruence. subst f0. econstructor; eauto.
+      generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. eauto.
+    eapply agree_exten; eauto. intros. unfold incrPC; Simpl; auto.
+    discriminate.
+Qed.
+
+(* Handling the individual instructions of theorem (B) in the above diagram. A bit less cumbersome, but still tough *)
+Theorem step_simu_basic:
+  forall bb bb' s fb sp c ms m rs1 m1 ms' m' bi cs1 tbdy bdy,
+  MB.header bb = nil -> MB.body bb = bi::(bdy) ->
+  bb' = {| MB.header := nil; MB.body := bdy; MB.exit := MB.exit bb |} ->
+  basic_step ge s fb sp ms m bi ms' m' ->
+  pstate cs1 = (State rs1 m1) -> pbody1 cs1 = tbdy ->
+  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+  (exists rs2 m2 l cs2 tbdy',
+       cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := tbdy'; pbody2 := pbody2 cs1;
+                pctl := pctl cs1; ep := it1_is_parent (ep cs1) bi; rem := rem cs1; cur := cur cs1 |}
+    /\ tbdy = l ++ tbdy'
+    /\ exec_body lk tge l rs1 m1 = Next rs2 m2
+    /\ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2).
+Proof.
+  intros until bdy. intros Hheader Hbody (* Hnotempty *) Hbb' BSTEP Hpstate Hpbody1 MCS. inv MCS.
+  simpl in *. inv Hpstate.
+  rewrite Hbody in TBC. monadInv TBC.
+  inv BSTEP.
+
+  - (* MBgetstack *)
+    simpl in EQ0.
+    unfold Mach.load_stack in H.
+    exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+    rewrite (sp_val _ _ _ AG) in A.
+    exploit loadind_correct; eauto with asmgen.
+    intros (rs2 & EXECS & Hrs'1 & Hrs'2).
+    eapply exec_straight_body in EXECS.
+    destruct EXECS as (l & Hlbi & EXECB).
+    exists rs2, m1, l.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
+    subst. simpl in Hheadereq.
+
+    eapply match_codestate_intro; eauto.
+    eapply agree_set_mreg; eauto with asmgen.
+    intro Hep. simpl in Hep. discriminate.
+  - (* MBsetstack *)
+    simpl in EQ0.
+    unfold Mach.store_stack in H.
+    assert (Val.lessdef (ms src) (rs1 (preg_of src))). { eapply preg_val; eauto. }
+    exploit Mem.storev_extends; eauto. intros [m2' [A B]].
+    exploit storeind_correct; eauto with asmgen.
+    rewrite (sp_val _ _ _ AG) in A. eauto. intros [rs' [P Q]].
+
+    eapply exec_straight_body in P.
+    destruct P as (l & ll & EXECB).
+    exists rs', m2', l.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    subst.
+    eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
+    eapply agree_undef_regs; eauto with asmgen.
+    simpl; intros. rewrite Q; auto with asmgen. rewrite Hheader in DXP. auto.
+  - (* MBgetparam *)
+    simpl in EQ0.
+
+    assert (f0 = f) by congruence; subst f0.
+    unfold Mach.load_stack in *.
+    exploit Mem.loadv_extends. eauto. eexact H0. auto.
+    intros [parent' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+    exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
+    exploit Mem.loadv_extends. eauto. eexact H1. auto.
+    intros [v' [C D]].
+
+    monadInv EQ0. rewrite Hheader. rewrite Hheader in DXP.
+    destruct ep0 eqn:EPeq.
+
+  (* X29 contains parent *)
+    + exploit loadind_correct. eexact EQ1.
+      instantiate (2 := rs1). rewrite DXP; eauto. discriminate.
+      intros [rs2 [P [Q R]]].
+
+      eapply exec_straight_body in P.
+      destruct P as (l & ll & EXECB).
+      exists rs2, m1, l. eexists.
+      eexists. split. instantiate (1 := x). eauto.
+      repeat (split; auto).
+      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+      assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
+      subst.
+      eapply match_codestate_intro; eauto.
+
+      eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
+      simpl; intros. rewrite R; auto with asmgen. unfold preg_of.
+      apply preg_of_not_X29; auto.
+
+  (* X29 does not contain parent *)
+    + rewrite chunk_of_Tptr in A. 
+      exploit loadptr_correct. eexact A. discriminate. intros [rs2 [P [Q R]]].
+      exploit loadind_correct. eexact EQ1. instantiate (2 := rs2). rewrite Q. eauto.
+      discriminate.
+      intros [rs3 [S [T U]]].
+
+      exploit exec_straight_trans.
+        eapply P.
+        eapply S.
+      intros EXES.
+
+      eapply exec_straight_body in EXES.
+      destruct EXES as (l & ll & EXECB).
+      exists rs3, m1, l.
+      eexists. eexists. split. instantiate (1 := x). eauto.
+      repeat (split; auto).
+      remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+      assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+      subst.
+      eapply match_codestate_intro; eauto.
+      eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
+      instantiate (1 := rs2#X29 <- (rs3#X29)). intros.
+      rewrite Pregmap.gso; auto with asmgen.
+      congruence.
+      intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen.
+      simpl; intros. rewrite U; auto with asmgen.
+      apply preg_of_not_X29; auto.
+  - (* MBop *)
+    simpl in EQ0. rewrite Hheader in DXP.
+    
+    assert (eval_operation tge sp op (map ms args) m' = Some v).
+      rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
+    exploit eval_operation_lessdef.
+      eapply preg_vals; eauto.
+      2: eexact H0.
+      all: eauto.
+    intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+    exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
+
+    eapply exec_straight_body in P.
+    destruct P as (l & ll & EXECB).
+    exists rs2, m1, l.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    subst.
+    eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
+    apply agree_set_undef_mreg with rs1; auto. 
+    apply Val.lessdef_trans with v'; auto.
+    simpl; intros. destruct (andb_prop _ _ H1); clear H1.
+    rewrite R; auto. apply preg_of_not_X29; auto.
+Local Transparent destroyed_by_op.
+    destruct op; simpl; auto; try discriminate.
+  - (* MBload *)
+    simpl in EQ0. rewrite Hheader in DXP.
+
+    assert (Op.eval_addressing tge sp addr (map ms args) = Some a).
+      rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+    exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+    intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+    exploit Mem.loadv_extends; eauto. intros [v' [C D]]. destruct trap; try discriminate.
+    exploit transl_load_correct; eauto.
+    intros [rs2 [P [Q R]]].
+
+    eapply exec_straight_body in P.
+    destruct P as (l & ll & EXECB).
+    exists rs2, m1, l.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+    subst.
+    eapply match_codestate_intro; eauto.
+    eapply agree_set_mreg; eauto with asmgen.
+    intro Hep. simpl in Hep. discriminate.
+  - (* MBload notrap1 *)
+    simpl in EQ0. unfold transl_load in EQ0. discriminate.
+  - (* MBload notrap2 *)
+    simpl in EQ0. unfold transl_load in EQ0. discriminate.
+  - (* MBstore *)
+    simpl in EQ0. rewrite Hheader in DXP.
+
+    assert (Op.eval_addressing tge sp addr (map ms args) = Some a).
+      rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+    exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+    intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+    assert (Val.lessdef (ms src) (rs1 (preg_of src))). eapply preg_val; eauto.
+    exploit Mem.storev_extends; eauto. intros [m2' [C D]].
+    exploit transl_store_correct; eauto. intros [rs2 [P Q]].
+
+    eapply exec_straight_body in P.
+    destruct P as (l & ll & EXECB).
+    exists rs2, m2', l.
+    eexists. eexists. split. instantiate (1 := x). eauto.
+    repeat (split; auto).
+    remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+    assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+    subst.
+    eapply match_codestate_intro; eauto.
+    eapply agree_undef_regs; eauto with asmgen.
+    intro Hep. simpl in Hep. discriminate.
+Qed.
+
+Lemma exec_body_trans:
+  forall l l' rs0 m0 rs1 m1 rs2 m2,
+  exec_body lk tge l rs0 m0 = Next rs1 m1 ->
+  exec_body lk tge l' rs1 m1 = Next rs2 m2 ->
+  exec_body lk tge (l++l') rs0 m0 = Next rs2 m2.
+Proof.
+  induction l.
+  - simpl. induction l'. intros.
+    + simpl in *. congruence.
+    + intros. inv H. auto.
+  - intros until m2. intros EXEB1 EXEB2.
+    inv EXEB1. destruct (exec_basic _) eqn:EBI; try discriminate.
+    simpl. rewrite EBI. eapply IHl; eauto.
+Qed.
+
+Lemma exec_body_control:
+  forall b t rs1 m1 rs2 m2 rs3 m3 fn,
+  exec_body lk tge (body b) rs1 m1 = Next rs2 m2 ->
+  exec_exit tge fn (Ptrofs.repr (size b)) rs2 m2 (exit b) t rs3 m3 ->
+  exec_bblock lk tge fn b rs1 m1 t rs3 m3.
+Proof.
+  intros until fn. intros EXEB EXECTL.
+  econstructor; eauto.
+Qed.
+
+Inductive exec_header: codestate -> codestate -> Prop :=
+  | exec_header_cons: forall cs1,
+      exec_header cs1 {| pstate := pstate cs1; pheader := nil; pbody1 := pbody1 cs1; pbody2 := pbody2 cs1;
+                          pctl := pctl cs1; ep := (if pheader cs1 then ep cs1 else false); rem := rem cs1;
+                          cur := cur cs1 |}.
+
+(* Theorem (A) in the diagram, the easiest of all *)
+Theorem step_simu_header:
+  forall bb s fb sp c ms m rs1 m1 cs1,
+  pstate cs1 = (State rs1 m1) ->
+  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+  (exists cs1',
+       exec_header cs1 cs1'
+    /\ match_codestate fb (MB.State s fb sp (mb_remove_header bb::c) ms m) cs1').
+Proof.
+  intros until cs1. intros Hpstate MCS.
+  eexists. split; eauto.
+  econstructor; eauto.
+  inv MCS. simpl in *. inv Hpstate.
+  econstructor; eauto.
+Qed.
+
+(* Theorem (B) in the diagram, using step_simu_basic + induction on the Machblock body *)
+Theorem step_simu_body:
+  forall bb s fb sp c ms m rs1 m1 ms' cs1 m',
+  MB.header bb = nil ->
+  body_step ge s fb sp (MB.body bb) ms m ms' m' ->
+  pstate cs1 = (State rs1 m1) ->
+  match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+  (exists rs2 m2 cs2 ep,
+       cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := nil; pbody2 := pbody2 cs1;
+                pctl := pctl cs1; ep := ep; rem := rem cs1; cur := cur cs1 |}
+    /\ exec_body lk tge (pbody1 cs1) rs1 m1 = Next rs2 m2
+    /\ match_codestate fb (MB.State s fb sp ({| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |}::c) ms' m') cs2).
+Proof.
+  intros bb. destruct bb as [hd bdy ex]; simpl; auto. induction bdy as [|bi bdy].
+  - intros until m'. intros Hheader BSTEP Hpstate MCS.
+    inv BSTEP.
+    exists rs1, m1, cs1, (ep cs1).
+    inv MCS. inv Hpstate. simpl in *. monadInv TBC. repeat (split; simpl; auto).
+    econstructor; eauto.
+  - intros until m'. intros Hheader BSTEP Hpstate MCS. inv BSTEP.
+    rename ms' into ms''. rename m' into m''. rename rs' into ms'. rename m'0 into m'.
+    exploit (step_simu_basic); eauto. simpl. eauto. simpl; auto. simpl; auto.
+    intros (rs2 & m2 & l & cs2 & tbdy' & Hcs2 & Happ & EXEB & MCS').
+    simpl in *.
+    exploit IHbdy. auto. eapply H6. 2: eapply MCS'. all: eauto. subst; eauto. simpl; auto.
+    intros (rs3 & m3 & cs3 & ep & Hcs3 & EXEB' & MCS'').
+    exists rs3, m3, cs3, ep.
+    repeat (split; simpl; auto). subst. simpl in *. auto.
+    rewrite Happ. eapply exec_body_trans; eauto. rewrite Hcs2 in EXEB'; simpl in EXEB'. auto.
+Qed.
+
+(* Bringing theorems (A), (B) and (C) together, for the case of the absence of builtin instruction *)
+(* This more general form is easier to prove, but the actual theorem is step_simulation_bblock further below *)
+Lemma step_simulation_bblock':
+  forall t sf f sp bb bb' bb'' rs m rs' m' s'' c S1,
+  bb' = mb_remove_header bb ->
+  body_step ge sf f sp (Machblock.body bb') rs m rs' m' ->
+  bb'' = mb_remove_body bb' ->
+  exit_step return_address_offset ge (Machblock.exit bb'') (Machblock.State sf f sp (bb'' :: c) rs' m') t s'' ->
+  match_states (Machblock.State sf f sp (bb :: c) rs m) S1 ->
+  exists S2 : state, plus (step lk) tge S1 t S2 /\ match_states s'' S2.
+Proof.
+  intros until S1. intros Hbb' BSTEP Hbb'' ESTEP MS.
+  destruct (mbsize bb) eqn:SIZE.
+  - apply mbsize_eqz in SIZE. destruct SIZE as (Hbody & Hexit).
+    destruct bb as [hd bdy ex]; simpl in *; subst.
+    inv MS. inv AT. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. rename tc' into tc.
+    monadInv H2. simpl in *. inv ESTEP. inv BSTEP.
+    eexists. split.
+    + eapply plus_one.
+      exploit functions_translated; eauto. intros (tf0 & FIND' & TRANSF'). monadInv TRANSF'.
+      assert (x = tf) by congruence. subst x.
+      eapply exec_step_internal; eauto. eapply find_bblock_tail; eauto.
+      unfold exec_bblock. simpl.
+      eexists; eexists; split; eauto.
+      econstructor.
+    + econstructor.
+      1,2,3: eauto.
+      *
+      unfold incrPC. rewrite Pregmap.gss.
+      unfold Val.offset_ptr. rewrite <- H.
+    assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+      { eapply transf_function_no_overflow; eauto. }
+    econstructor; eauto.
+      generalize (code_tail_next_int _ _ _ _ NOOV H3). intro CT1. eauto.
+      *
+    eapply agree_exten; eauto. intros. unfold incrPC. rewrite Pregmap.gso; auto.
+    unfold data_preg in H2. destruct r; try congruence.
+   *
+    intros. discriminate.
+  - subst. exploit mbsize_neqz. { instantiate (1 := bb). rewrite SIZE. discriminate. }
+    intros Hnotempty.
+
+    (* initial setting *)
+    exploit match_state_codestate.
+      eapply Hnotempty.
+      all: eauto.
+    intros (cs1 & fb & f0 & tbb & tc & ep & MCS & MAS & FIND & TLBS & Hbody & Hexit & Hcur & Hrem & Hpstate).
+
+    (* step_simu_header part *)
+    assert (exists rs1 m1, pstate cs1 = State rs1 m1). { inv MAS. simpl. eauto. }
+    destruct H as (rs1 & m1 & Hpstate2). subst.
+    assert (f = fb). { inv MCS. auto. } subst fb.
+    exploit step_simu_header.
+      2: eapply MCS.
+      all: eauto.
+    intros (cs1' & EXEH & MCS2).
+
+    (* step_simu_body part *)
+    assert (Hpstate': pstate cs1' = pstate cs1). { inv EXEH; auto. }
+    exploit step_simu_body.
+      2: eapply BSTEP.
+      3: eapply MCS2.
+      all: eauto. rewrite Hpstate'. eauto.
+    intros (rs2 & m2 & cs2 & ep' & Hcs2 & EXEB & MCS').
+
+    (* step_simu_control part *)
+    assert (exists tf, Genv.find_funct_ptr tge f = Some (Internal tf)).
+    { exploit functions_translated; eauto. intros (tf & FIND' & TRANSF'). monadInv TRANSF'. eauto. }
+    destruct H as (tf & FIND').
+    inv EXEH. simpl in *.
+    subst. exploit step_simu_control.
+      8: eapply MCS'. all: simpl.
+      9: eapply ESTEP.
+      all: simpl; eauto.
+      { inv MAS; simpl in *. inv Hpstate2. eapply match_asmstate_some; eauto.
+        erewrite exec_body_pc; eauto. }
+    intros (rs3 & m3 & rs4 & m4 & EXEB' & EXECTL' & MS').
+
+    (* bringing the pieces together *)
+    exploit exec_body_trans.
+      eapply EXEB.
+      eauto.
+    intros EXEB2.
+    exploit exec_body_control; eauto.
+    rewrite <- Hbody in EXEB2. eauto.
+    rewrite Hexit. eauto.
+    intros EXECB. (* inv EXECB. *)
+    exists (State rs4 m4).
+    split; auto. eapply plus_one. rewrite Hpstate2.
+    assert (exists ofs, rs1 PC = Vptr f ofs).
+    { rewrite Hpstate2 in MAS. inv MAS. simpl in *. eauto. }
+    destruct H as (ofs & Hrs1pc).
+    eapply exec_step_internal; eauto.
+
+    (* proving the initial find_bblock *)
+    rewrite Hpstate2 in MAS. inv MAS. simpl in *. 
+    assert (f1 = f0) by congruence. subst f0.
+    rewrite PCeq in Hrs1pc. inv Hrs1pc.
+    exploit functions_translated; eauto. intros (tf1 & FIND'' & TRANS''). rewrite FIND' in FIND''.
+    inv FIND''. monadInv TRANS''. rewrite TRANSF0 in EQ. inv EQ.
+    eapply find_bblock_tail; eauto.
+Qed.
+
+Theorem step_simulation_bblock:
+  forall t sf f sp bb ms m ms' m' S2 c,
+  body_step ge sf f sp (Machblock.body bb) ms m ms' m' ->
+  exit_step return_address_offset ge (Machblock.exit bb) (Machblock.State sf f sp (bb :: c) ms' m') t S2 ->
+  forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' ->
+  exists S2' : state, plus (step lk) tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  intros until c. intros BSTEP ESTEP S1' MS.
+  eapply step_simulation_bblock'; eauto.
+  all: destruct bb as [hd bdy ex]; simpl in *; eauto.
+  inv ESTEP.
+  - econstructor. inv H; try (econstructor; eauto; fail).
+  - econstructor.
+Qed.
+
+(* Measure to prove finite stuttering, see the other backends *)
+Definition measure (s: MB.state) : nat :=
+  match s with
+  | MB.State _ _ _ _ _ _ => 0%nat
+  | MB.Callstate _ _ _ _ => 0%nat
+  | MB.Returnstate _ _ _ => 1%nat
+  end.
+
+Lemma next_sep:
+  forall rs m rs' m', rs = rs' -> m = m' -> Next rs m = Next rs' m'.
+Proof.
+  congruence.
+Qed.
+
+(* The actual MB.step/AB.step simulation, using the above theorems, plus extra proofs
+   for the internal and external function cases *)
+Theorem step_simulation:
+  forall S1 t S2, MB.step return_address_offset ge S1 t S2 ->
+  forall S1' (MS: match_states S1 S1'),
+  (exists S2', plus (step lk) tge S1' t S2' /\ match_states S2 S2')
+  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
+Proof.
+  induction 1; intros.
+
+- (* bblock *)
+  left. destruct (Machblock.exit bb) eqn:MBE; try destruct c0.
+  all: try(inversion H0; subst; inv H2; eapply step_simulation_bblock; 
+            try (rewrite MBE; try discriminate); eauto). 
+  + inversion H0. subst. eapply step_simulation_bblock; try (rewrite MBE; try discriminate); eauto.
+- (* internal function *)
+  inv MS.
+  exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
+  generalize EQ; intros EQ'. monadInv EQ'.
+  destruct (zlt Ptrofs.max_unsigned (size_blocks x0.(fn_blocks))); inversion EQ1. clear EQ1. subst x0.
+  unfold Mach.store_stack in *.
+  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
+  intros [m1' [C D]].
+  exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
+  intros [m2' [F G]].
+  simpl chunk_of_type in F.
+  exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
+  intros [m3' [P Q]].
+  (* Execution of function prologue *)
+  monadInv EQ0.
+  set (tfbody := make_prologue f x0) in *.
+  set (tf := {| fn_sig := MB.fn_sig f; fn_blocks := tfbody |}) in *.
+  set (rs2 := rs0#X29 <- (parent_sp s) #SP <- sp #X16 <- Vundef).
+  exploit (storeptr_correct lk tge XSP (fn_retaddr_ofs f) RA nil m2' m3' rs2).
+  { rewrite chunk_of_Tptr in P.
+    assert (rs0 X30 = rs2 RA) by auto.
+    rewrite <- H3.
+    rewrite ATLR.
+    change (rs2 XSP) with sp. eexact P. }
+  1-2: discriminate.
+  intros (rs3 & U & V).
+  assert (EXEC_PROLOGUE: exists rs3',
+            exec_straight_blocks tge lk tf
+              tf.(fn_blocks) rs0 m'
+              x0 rs3' m3'
+          /\ forall r, r <> PC -> r <> X16 -> rs3' r = rs3 r).
+  { eexists. split.
+    - change (fn_blocks tf) with tfbody; unfold tfbody.
+      econstructor; eauto.
+      assert (Archi.ptr64 = true) as SF; auto.
+      + unfold exec_bblock. simpl exec_body.
+        rewrite C. fold sp. rewrite <- (sp_val _ _ _ AG). rewrite chunk_of_Tptr in F.
+        assert (Mptr = Mint64) by auto. rewrite H3 in F. simpl in F. rewrite F. simpl.
+        unfold exec_store_rs_a. repeat Simpl; try discriminate.
+        exists rs2. exists m3'. split.
+        * unfold eval_addressing. Simpl; try discriminate. rewrite Pregmap.gss.
+          rewrite chunk_of_Tptr in P. rewrite H3 in P.
+          unfold Val.addl. unfold Val.offset_ptr in P.
+          destruct sp; simpl; try discriminate. rewrite SF; simpl.
+          rewrite Ptrofs.of_int64_to_int64. unfold Mem.storev in P. rewrite ATLR.
+          rewrite P. simpl. apply next_sep; eauto. apply SF.
+        * econstructor.
+      + eauto.
+    - intros. unfold incrPC.
+      rewrite Pregmap.gso; auto. rewrite V; auto.
+  } destruct EXEC_PROLOGUE as (rs3' & EXEC_PROLOGUE & Heqrs3').
+  exploit exec_straight_steps_2; eauto using functions_transl.
+  simpl fn_blocks. simpl fn_blocks in g. omega. constructor.
+  intros (ofs' & X & Y).
+  left; exists (State rs3' m3'); split.
+  eapply exec_straight_steps_1; eauto.
+  simpl fn_blocks. simpl fn_blocks in g. omega.
+  constructor.
+  econstructor; eauto.
+  rewrite X; econstructor; eauto. 
+  apply agree_exten with rs2; eauto with asmgen.
+  unfold rs2. 
+  apply agree_set_other; auto with asmgen.
+  apply agree_change_sp with (parent_sp s). 
+  apply agree_undef_regs with rs0. auto.
+Local Transparent destroyed_at_function_entry.
+  simpl; intros; Simpl. auto.
+  assert (r' <> X29). { contradict H3; rewrite H3; unfold data_preg; auto. } auto.
+  unfold sp; congruence.
+
+  intros.
+
+  rewrite Heqrs3'. rewrite V. 2-5: try apply X16_not_data_preg; try apply PC_not_data_preg; auto.
+  auto.
+  intros. rewrite Heqrs3'; try discriminate. rewrite V by auto with asmgen. reflexivity.
+- (* external function *)
+  inv MS.
+  exploit functions_translated; eauto.
+  intros [tf [A B]]. simpl in B. inv B.
+  exploit extcall_arguments_match; eauto.
+  intros [args' [C D]].
+  exploit external_call_mem_extends; eauto.
+  intros [res' [m2' [P [Q [R S]]]]].
+  left; econstructor; split.
+  apply plus_one. eapply exec_step_external; eauto.
+  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  econstructor; eauto.
+  unfold loc_external_result.
+  apply agree_set_other; auto.
+  apply agree_set_pair; auto.
+  apply agree_undef_caller_save_regs; auto.
+
+- (* return *) 
+  inv MS.
+  inv STACKS. simpl in *.
+  right. split. omega. split. auto.
+  rewrite <- ATPC in H5.
+  econstructor; eauto. congruence.
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, MB.initial_state prog st1 ->
+  exists st2, AB.initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inversion H. unfold ge0 in *.
+  econstructor; split.
+  econstructor.
+  eapply (Genv.init_mem_transf_partial TRANSF); eauto.
+  replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero)
+     with (Vptr fb Ptrofs.zero).
+  econstructor; eauto.
+  constructor.
+  apply Mem.extends_refl.
+  split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
+  intros. rewrite Mach.Regmap.gi. auto.
+  unfold Genv.symbol_address.
+  rewrite (match_program_main TRANSF).
+  rewrite symbols_preserved.
+  unfold ge; rewrite H1. auto.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r,
+  match_states st1 st2 -> MB.final_state st1 r -> AB.final_state st2 r.
+Proof.
+  intros. inv H0. inv H. constructor. assumption.
+  compute in H1. inv H1.
+  generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
+Qed.
+
+Definition return_address_offset : Machblock.function -> Machblock.code -> ptrofs -> Prop := 
+  Asmblockgenproof0.return_address_offset.
+
+Lemma transf_program_correct:
+  forward_simulation (MB.semantics return_address_offset prog) (AB.semantics lk tprog).
+Proof.
+  eapply forward_simulation_star with (measure := measure).
+  - apply senv_preserved.
+  - eexact transf_initial_states.
+  - eexact transf_final_states.
+  - exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/aarch64/Asmblockgenproof0.v b/aarch64/Asmblockgenproof0.v
new file mode 100644
index 00000000..03d863a3
--- /dev/null
+++ b/aarch64/Asmblockgenproof0.v
@@ -0,0 +1,884 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           Xavier Leroy       INRIA Paris-Rocquencourt       *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(** * "block" version of Asmgenproof0
+
+    This module is largely adapted from Asmgenproof0.v of the other backends
+    It needs to stand apart because of the block structure, and the distinction control/basic that there isn't in the other backends
+    It has similar definitions than Asmgenproof0, but adapted to this new structure *)
+
+Require Import Coqlib.
+Require Intv.
+Require Import AST.
+Require Import Errors.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Events.
+Require Import Smallstep.
+Require Import Locations.
+Require Import Machblock.
+Require Import Asmblock.
+Require Import Asmblockgen.
+Require Import Conventions1.
+Require Import Axioms.
+Require Import Asmblockprops.
+
+Module MB:=Machblock.
+Module AB:=Asmblock.
+
+(** * Agreement between Mach registers and processor registers *)
+
+Hint Extern 2 (_ <> _) => congruence: asmgen.
+
+Lemma ireg_of_eq:
+  forall r r', ireg_of r = OK r' -> preg_of r = IR r'.
+Proof.
+  unfold ireg_of; intros. destruct (preg_of r) as [[[rr1|]|]|xsp|]; inv H; auto.
+Qed.
+
+Lemma freg_of_eq:
+  forall r r', freg_of r = OK r' -> preg_of r = FR r'.
+Proof.
+  unfold freg_of; intros. destruct (preg_of r) as [[fr|]|xsp|]; inv H; auto.
+Qed.
+
+Lemma ireg_of_eq':
+  forall r r', ireg_of r = OK r' -> dreg_of r = IR r'.
+Proof.
+  unfold ireg_of; intros. destruct r; simpl in *; inv H; auto.
+Qed.
+
+Lemma freg_of_eq':
+  forall r r', freg_of r = OK r' -> dreg_of r = FR r'.
+Proof.
+  unfold freg_of; intros. destruct r; simpl in *; inv H; auto.
+Qed.
+
+Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop :=
+  match rl with
+  | nil => True
+  | r1 :: nil => r <> preg_of r1
+  | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl
+  end.
+
+Remark preg_notin_charact:
+  forall r rl,
+  preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr).
+Proof.
+  induction rl; simpl; intros.
+  tauto.
+  destruct rl.
+  simpl. split. intros. intuition congruence. auto.
+  rewrite IHrl. split.
+  intros [A B]. intros. destruct H. congruence. auto.
+  auto.
+Qed.
+
+Record agree (ms: Mach.regset) (sp: val) (rs: AB.regset) : Prop := mkagree {
+  agree_sp: rs#SP = sp;
+  agree_sp_def: sp <> Vundef;
+  agree_mregs: forall r: mreg, Val.lessdef (ms r) (rs#(preg_of r))
+}.
+
+Lemma agree_exten:
+  forall ms sp rs rs',
+  agree ms sp rs ->
+  (forall r, data_preg r = true -> rs'#r = rs#r) ->
+  agree ms sp rs'.
+Proof.
+  intros. destruct H. split; auto.
+  rewrite H0; auto. auto.
+  intros. rewrite H0; auto. apply preg_of_data.
+Qed.
+
+Lemma preg_val:
+  forall ms sp rs r, agree ms sp rs -> Val.lessdef (ms r) rs#(preg_of r).
+Proof.
+  intros. destruct H. auto.
+Qed.
+
+Lemma preg_vals:
+  forall ms sp rs, agree ms sp rs ->
+  forall l, Val.lessdef_list (map ms l) (map rs (map preg_of l)).
+Proof.
+  induction l; simpl. constructor. constructor. eapply preg_val; eauto. auto.
+Qed.
+
+Lemma preg_of_injective:
+  forall r1 r2, preg_of r1 = preg_of r2 -> r1 = r2.
+Proof.
+  destruct r1; destruct r2; simpl; intros; reflexivity || discriminate.
+Qed.
+
+Lemma sp_val:
+  forall ms sp rs, agree ms sp rs -> sp = rs#SP.
+Proof.
+  intros. destruct H; auto.
+Qed.
+
+Lemma ireg_val:
+  forall ms sp rs r r',
+  agree ms sp rs ->
+  ireg_of r = OK r' ->
+  Val.lessdef (ms r) rs#r'.
+Proof.
+  intros. rewrite <- (ireg_of_eq _ _ H0). eapply preg_val; eauto.
+Qed.
+
+Lemma preg_of_not_X29: forall dst,
+  negb (mreg_eq dst R29) = true ->
+  DR (IR X29) <> preg_of dst.
+Proof.
+  intros. destruct dst; try discriminate.
+Qed.
+
+Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen.
+
+(** Preservation of register agreement under various assignments. *)
+
+Lemma agree_set_mreg:
+  forall ms sp rs r v rs',
+  agree ms sp rs ->
+  Val.lessdef v (rs'#(preg_of r)) ->
+  (forall r', data_preg r' = true -> r' <> preg_of r -> rs'#r' = rs#r') ->
+  agree (Mach.Regmap.set r v ms) sp rs'.
+Proof.
+  intros. destruct H. split; auto.
+  rewrite H1; auto. apply not_eq_sym. apply preg_of_not_SP.
+  intros. unfold Mach.Regmap.set. destruct (Mach.RegEq.eq r0 r). congruence.
+  rewrite H1. auto. apply preg_of_data.
+  red; intros; elim n. eapply preg_of_injective; eauto.
+Qed.
+
+Corollary agree_set_mreg_parallel:
+  forall ms sp rs r v v',
+  agree ms sp rs ->
+  Val.lessdef v v' ->
+  agree (Mach.Regmap.set r v ms) sp (Pregmap.set (preg_of r) v' rs).
+Proof.
+  intros. eapply agree_set_mreg; eauto. rewrite Pregmap.gss; auto. intros; apply Pregmap.gso; auto.
+Qed.
+
+Lemma agree_set_other:
+  forall ms sp rs r v,
+  agree ms sp rs ->
+  data_preg r = false ->
+  agree ms sp (rs#r <- v).
+Proof.
+  intros. apply agree_exten with rs. auto.
+  intros. apply Pregmap.gso. congruence.
+Qed.
+
+Lemma agree_nextblock:
+  forall ms sp rs b,
+  agree ms sp rs -> agree ms sp (incrPC (Ptrofs.repr (size b)) rs).
+Proof.
+  intros. unfold incrPC. apply agree_set_other. auto. auto.
+Qed.
+
+Lemma agree_set_pair:
+  forall sp p v v' ms rs,
+  agree ms sp rs ->
+  Val.lessdef v v' ->
+  agree (Mach.set_pair p v ms) sp (set_pair (map_rpair preg_of p) v' rs).
+Proof.
+  intros. destruct p; simpl.
+  - apply agree_set_mreg_parallel; auto.
+  - apply agree_set_mreg_parallel. apply agree_set_mreg_parallel; auto.
+    apply Val.hiword_lessdef; auto. apply Val.loword_lessdef; auto.
+Qed.
+
+Lemma agree_set_res:
+  forall res ms sp rs v v',
+  agree ms sp rs ->
+  Val.lessdef v v' ->
+  agree (Mach.set_res res v ms) sp (set_res (map_builtin_res DR (map_builtin_res dreg_of res)) v' rs).
+Proof.
+  induction res; simpl; intros.
+  - eapply agree_set_mreg; eauto. rewrite Pregmap.gss. auto.
+    intros. apply Pregmap.gso; auto.
+  - auto.
+  - apply IHres2. apply IHres1. auto.
+    apply Val.hiword_lessdef; auto.
+    apply Val.loword_lessdef; auto.
+Qed.
+
+Lemma agree_undef_regs:
+  forall ms sp rl rs rs',
+  agree ms sp rs ->
+  (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') ->
+  agree (Mach.undef_regs rl ms) sp rs'.
+Proof.
+  intros. destruct H. split; auto.
+  rewrite <- agree_sp0. apply H0; auto.
+  rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP.
+  intros. destruct (In_dec mreg_eq r rl).
+  rewrite Mach.undef_regs_same; auto.
+  rewrite Mach.undef_regs_other; auto. rewrite H0; auto.
+  apply preg_of_data.
+  rewrite preg_notin_charact. intros; red; intros. elim n.
+  exploit preg_of_injective; eauto. congruence.
+Qed.
+
+Lemma agree_set_undef_mreg:
+  forall ms sp rs r v rl rs',
+  agree ms sp rs ->
+  Val.lessdef v (rs'#(preg_of r)) ->
+  (forall r', data_preg r' = true -> r' <> preg_of r -> preg_notin r' rl -> rs'#r' = rs#r') ->
+  agree (Mach.Regmap.set r v (Mach.undef_regs rl ms)) sp rs'.
+Proof.
+  intros. apply agree_set_mreg with (rs'#(preg_of r) <- (rs#(preg_of r))); auto.
+  apply agree_undef_regs with rs; auto.
+  intros. unfold Pregmap.set. destruct (PregEq.eq r' (preg_of r)).
+  congruence. auto.
+  intros. rewrite Pregmap.gso; auto.
+Qed.
+
+Lemma agree_undef_caller_save_regs:
+  forall ms sp rs,
+  agree ms sp rs ->
+  agree (Mach.undef_caller_save_regs ms) sp (undef_caller_save_regs rs).
+Proof.
+  intros. destruct H. unfold Mach.undef_caller_save_regs, undef_caller_save_regs; split.
+  - unfold proj_sumbool; rewrite dec_eq_true. auto.
+  - auto.
+  - intros. unfold proj_sumbool. rewrite dec_eq_false by (apply preg_of_not_SP). 
+    destruct (List.in_dec preg_eq (preg_of r) (List.map preg_of (List.filter is_callee_save all_mregs))); simpl.
+    + apply list_in_map_inv in i. destruct i as (mr & A & B). 
+      assert (r = mr) by (apply preg_of_injective; auto). subst mr; clear A.
+      apply List.filter_In in B. destruct B as [C D]. rewrite D. auto.
+    + destruct (is_callee_save r) eqn:CS; auto.
+      elim n. apply List.in_map. apply List.filter_In. auto using all_mregs_complete. 
+Qed.
+
+Lemma agree_change_sp:
+  forall ms sp rs sp',
+  agree ms sp rs -> sp' <> Vundef ->
+  agree ms sp' (rs#SP <- sp').
+Proof.
+  intros. inv H. split; auto.
+  intros. rewrite Pregmap.gso; auto with asmgen.
+Qed.
+
+Remark builtin_arg_match:
+  forall ge (rs: regset) sp m a v,
+  eval_builtin_arg ge (fun r => rs (dreg_of r)) sp m a v ->
+  eval_builtin_arg ge (fun r => rs (DR r)) sp m (map_builtin_arg dreg_of a) v.
+Proof.
+  induction 1; simpl; eauto with barg. econstructor.
+Qed.
+
+Lemma builtin_args_match:
+  forall ge ms sp rs m m', agree ms sp rs -> Mem.extends m m' ->
+  forall al vl, eval_builtin_args ge ms sp m al vl ->
+  exists vl', eval_builtin_args ge (fun r => rs (DR r)) sp m' (map (map_builtin_arg dreg_of) al) vl'
+           /\ Val.lessdef_list vl vl'.
+Proof.
+  induction 3; intros; simpl.
+  exists (@nil val); split; constructor.
+  exploit (@eval_builtin_arg_lessdef _ ge ms (fun r => rs (preg_of r))); eauto.
+  intros; eapply preg_val; eauto.
+  intros (v1' & A & B).
+  destruct IHlist_forall2 as [vl' [C D]].
+  exists (v1' :: vl'); split; constructor; auto. apply builtin_arg_match; auto.
+Qed.
+
+(** Connection between Mach and Asm calling conventions for external
+    functions. *)
+
+Lemma extcall_arg_match:
+  forall ms sp rs m m' l v,
+  agree ms sp rs ->
+  Mem.extends m m' ->
+  Mach.extcall_arg ms m sp l v ->
+  exists v', extcall_arg rs m' l v' /\ Val.lessdef v v'.
+Proof.
+  intros. inv H1.
+  exists (rs#(preg_of r)); split. constructor. eapply preg_val; eauto.
+  unfold Mach.load_stack in H2.
+  exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+  rewrite (sp_val _ _ _ H) in A.
+  exists v'; split; auto.
+  econstructor. eauto. assumption.
+Qed.
+
+Lemma extcall_arg_pair_match:
+  forall ms sp rs m m' p v,
+  agree ms sp rs ->
+  Mem.extends m m' ->
+  Mach.extcall_arg_pair ms m sp p v ->
+  exists v', extcall_arg_pair rs m' p v' /\ Val.lessdef v v'.
+Proof.
+  intros. inv H1.
+  - exploit extcall_arg_match; eauto. intros (v' & A & B). exists v'; split; auto. constructor; auto.
+  - exploit extcall_arg_match. eauto. eauto. eexact H2. intros (v1 & A1 & B1).
+    exploit extcall_arg_match. eauto. eauto. eexact H3. intros (v2 & A2 & B2).
+    exists (Val.longofwords v1 v2); split. constructor; auto. apply Val.longofwords_lessdef; auto.
+Qed.
+
+Lemma extcall_args_match:
+  forall ms sp rs m m', agree ms sp rs -> Mem.extends m m' ->
+  forall ll vl,
+  list_forall2 (Mach.extcall_arg_pair ms m sp) ll vl ->
+  exists vl', list_forall2 (extcall_arg_pair rs m') ll vl' /\ Val.lessdef_list vl vl'.
+Proof.
+  induction 3; intros.
+  exists (@nil val); split. constructor. constructor.
+  exploit extcall_arg_pair_match; eauto. intros [v1' [A B]].
+  destruct IHlist_forall2 as [vl' [C D]].
+  exists (v1' :: vl'); split; constructor; auto.
+Qed.
+
+Lemma extcall_arguments_match:
+  forall ms m m' sp rs sg args,
+  agree ms sp rs -> Mem.extends m m' ->
+  Mach.extcall_arguments ms m sp sg args ->
+  exists args', extcall_arguments rs m' sg args' /\ Val.lessdef_list args args'.
+Proof.
+  unfold Mach.extcall_arguments, extcall_arguments; intros.
+  eapply extcall_args_match; eauto.
+Qed.
+
+Lemma set_res_other:
+  forall r res v rs,
+  data_preg r = false ->
+  set_res (map_builtin_res DR (map_builtin_res dreg_of res)) v rs r = rs r.
+Proof.
+  induction res; simpl; intros.
+  - apply Pregmap.gso. red; intros; subst r. rewrite dreg_of_data in H; discriminate.
+  - auto.
+  - rewrite IHres2, IHres1; auto.
+Qed.
+
+Lemma undef_regs_other:
+  forall r rl rs,
+  (forall r', In r' rl -> r <> r') ->
+  undef_regs rl rs r = rs r.
+Proof.
+  induction rl; simpl; intros. auto.
+  rewrite IHrl by auto. rewrite Pregmap.gso; auto.
+Qed.
+
+Lemma undef_regs_other_2:
+  forall r rl rs,
+  preg_notin r rl ->
+  undef_regs (map preg_of rl) rs r = rs r.
+Proof.
+  intros. apply undef_regs_other. intros.
+  exploit list_in_map_inv; eauto. intros [mr [A B]]. subst.
+  rewrite preg_notin_charact in H. auto.
+Qed.
+
+Inductive code_tail: Z -> bblocks -> bblocks -> Prop :=
+  | code_tail_0: forall c,
+      code_tail 0 c c
+  | code_tail_S: forall pos bi c1 c2,
+      code_tail pos c1 c2 ->
+      code_tail (pos + (size bi)) (bi :: c1) c2.
+      
+Lemma code_tail_pos:
+  forall pos c1 c2, code_tail pos c1 c2 -> pos >= 0.
+Proof.
+  induction 1. omega. generalize (size_positive bi); intros; omega.
+Qed.
+
+Lemma find_bblock_tail:
+  forall c1 bi c2 pos,
+  code_tail pos c1 (bi :: c2) ->
+  find_bblock pos c1 = Some bi.
+Proof.
+  induction c1; simpl; intros.
+  inversion H.  
+  destruct (zlt pos 0). generalize (code_tail_pos _ _ _ H); intro; omega.
+  destruct (zeq pos 0). subst pos.
+  inv H. auto. generalize (size_positive a) (code_tail_pos _ _ _ H4). intro; omega.
+  inv H. congruence. replace (pos0 + size a - size a) with pos0 by omega.
+  eauto.
+Qed.
+
+Local Hint Resolve code_tail_0 code_tail_S: core.
+
+Lemma code_tail_next:
+  forall fn ofs c0,
+  code_tail ofs fn c0 ->
+  forall bi c1, c0 = bi :: c1 -> code_tail (ofs + (size bi)) fn c1.
+Proof.
+  induction 1; intros.
+  - subst; eauto.
+  - replace (pos + size bi + size bi0) with ((pos + size bi0) + size bi); eauto.
+    omega.
+Qed.
+
+Lemma size_blocks_pos c: 0 <= size_blocks c.
+Proof.
+  induction c as [| a l ]; simpl; try omega.
+  generalize (size_positive a); omega.
+Qed.
+
+Remark code_tail_positive:
+  forall fn ofs c,
+  code_tail ofs fn c -> 0 <= ofs.
+Proof.
+  induction 1; intros; simpl.
+  - omega.
+  - generalize (size_positive bi). omega.
+Qed.
+
+Remark code_tail_size:
+  forall fn ofs c,
+  code_tail ofs fn c -> size_blocks fn = ofs + size_blocks c.
+Proof.
+  induction 1; intros; simpl; try omega.
+Qed.
+
+Remark code_tail_bounds fn ofs c:
+  code_tail ofs fn c -> 0 <= ofs <= size_blocks fn.
+Proof.
+  intro H; 
+  exploit code_tail_size; eauto.
+  generalize (code_tail_positive _ _ _ H), (size_blocks_pos c).
+  omega.
+Qed.
+
+Local Hint Resolve code_tail_next: core.
+
+Lemma code_tail_next_int:
+  forall fn ofs bi c,
+  size_blocks fn <= Ptrofs.max_unsigned ->
+  code_tail (Ptrofs.unsigned ofs) fn (bi :: c) ->
+  code_tail (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bi)))) fn c.
+Proof.
+  intros. 
+  exploit code_tail_size; eauto.
+  simpl; generalize (code_tail_positive _ _ _ H0), (size_positive bi), (size_blocks_pos c).
+  intros.
+  rewrite Ptrofs.add_unsigned, Ptrofs.unsigned_repr.
+  - rewrite Ptrofs.unsigned_repr; eauto.
+    omega.
+  - rewrite Ptrofs.unsigned_repr; omega.
+Qed.
+
+(** The [find_label] function returns the code tail starting at the
+  given label.  A connection with [code_tail] is then established. *)
+
+Fixpoint find_label (lbl: label) (c: bblocks) {struct c} : option bblocks :=
+  match c with
+  | nil => None
+  | bb1 :: bbl => if is_label lbl bb1 then Some c else find_label lbl bbl
+  end.
+
+(* inspired from Mach *)
+
+Lemma find_label_tail:
+  forall lbl c c', MB.find_label lbl c = Some c' -> is_tail c' c.
+Proof.
+  induction c; simpl; intros. discriminate.
+  destruct (MB.is_label lbl a). inv H. auto with coqlib. eauto with coqlib.
+Qed.
+
+Lemma label_pos_code_tail:
+  forall lbl c pos c',
+  find_label lbl c = Some c' ->
+  exists pos',
+  label_pos lbl pos c = Some pos'
+  /\ code_tail (pos' - pos) c c'
+  /\ pos <= pos' <= pos + size_blocks c.
+Proof.
+  induction c.
+  simpl; intros. discriminate.
+  simpl; intros until c'.
+  case (is_label lbl a).
+  - intros. inv H. exists pos. split; auto. split.
+    replace (pos - pos) with 0 by omega. constructor. constructor; try omega.
+    generalize (size_blocks_pos c). generalize (size_positive a). omega.
+  - intros. generalize (IHc (pos+size a) c' H). intros [pos' [A [B C]]].
+  exists pos'. split. auto. split.
+  replace (pos' - pos) with ((pos' - (pos + (size a))) + (size a)) by omega.
+  constructor. auto. generalize (size_positive a). omega.
+Qed.
+
+(** Predictor for return addresses in generated Asm code.
+
+  The [return_address_offset] predicate defined here is used in the
+  semantics for Mach to determine the return addresses that are
+  stored in activation records. *)
+
+(** Consider a Mach function [f] and a sequence [c] of Mach instructions
+  representing the Mach code that remains to be executed after a
+  function call returns.  The predicate [return_address_offset f c ofs]
+  holds if [ofs] is the integer offset of the PPC instruction
+  following the call in the Asm code obtained by translating the
+  code of [f]. Graphically:
+<<
+     Mach function f    |--------- Mcall ---------|
+         Mach code c    |                |--------|
+                        |                 \        \
+                        |                  \        \
+                        |                   \        \
+     Asm code           |                    |--------|
+     Asm function       |------------- Pcall ---------|
+
+                        <-------- ofs ------->
+>>
+*)
+
+Definition return_address_offset (f: MB.function) (c: MB.code) (ofs: ptrofs) : Prop :=
+  forall tf  tc,
+  transf_function f = OK tf ->
+  transl_blocks f c false = OK tc ->
+  code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc.
+
+Lemma transl_blocks_tail:
+  forall f c1 c2, is_tail c1 c2 ->
+  forall tc2 ep2, transl_blocks f c2 ep2 = OK tc2 ->
+  exists tc1, exists ep1, transl_blocks f c1 ep1 = OK tc1 /\ is_tail tc1 tc2.
+Proof.
+  induction 1; simpl; intros.
+  exists tc2; exists ep2; split; auto with coqlib.
+  monadInv H0. exploit IHis_tail; eauto. intros (tc1 & ep1 & A & B).
+  exists tc1; exists ep1; split. auto.
+  eapply is_tail_trans with x0; eauto with coqlib.
+Qed.
+
+Lemma is_tail_code_tail:
+  forall c1 c2, is_tail c1 c2 -> exists ofs, code_tail ofs c2 c1.
+Proof.
+  induction 1; eauto.
+  destruct IHis_tail; eauto. 
+Qed.
+
+Section RETADDR_EXISTS.
+
+Hypothesis transf_function_inv:
+  forall f tf, transf_function f = OK tf ->
+  exists tc ep, transl_blocks f (Machblock.fn_code f) ep = OK tc /\ is_tail tc (fn_blocks tf).
+
+Hypothesis transf_function_len:
+  forall f tf, transf_function f = OK tf -> size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned.
+
+
+Lemma return_address_exists:
+  forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
+  exists ra, return_address_offset f c ra.
+Proof.
+  intros. destruct (transf_function f) as [tf|] eqn:TF.
+  + exploit transf_function_inv; eauto. intros (tc1 & ep1 & TR1 & TL1).
+    exploit transl_blocks_tail; eauto. intros (tc2 & ep2 & TR2 & TL2).
+    monadInv TR2.
+    assert (TL3: is_tail x0 (fn_blocks tf)).
+    { apply is_tail_trans with tc1; auto. 
+      apply is_tail_trans with (x++x0); auto. eapply is_tail_app.
+    }
+    exploit is_tail_code_tail. eexact TL3. intros [ofs CT].
+    exists (Ptrofs.repr ofs). red; intros.
+    rewrite Ptrofs.unsigned_repr. congruence.
+    exploit code_tail_bounds; eauto.
+    intros; apply transf_function_len in TF. omega.
+  + exists Ptrofs.zero; red; intros. congruence.
+Qed.
+
+End RETADDR_EXISTS.
+
+(** [transl_code_at_pc pc fb f c ep tf tc] holds if the code pointer [pc] points
+  within the Asmblock code generated by translating Machblock function [f],
+  and [tc] is the tail of the generated code at the position corresponding
+  to the code pointer [pc]. *)
+
+Inductive transl_code_at_pc (ge: MB.genv):
+    val -> block -> MB.function -> MB.code -> bool -> AB.function -> AB.bblocks -> Prop :=
+  transl_code_at_pc_intro:
+    forall b ofs f c ep tf tc,
+    Genv.find_funct_ptr ge b = Some(Internal f) ->
+    transf_function f = Errors.OK tf ->
+    transl_blocks f c ep = OK tc ->
+    code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc ->
+    transl_code_at_pc ge (Vptr b ofs) b f c ep tf tc.
+
+Remark code_tail_no_bigger:
+  forall pos c1 c2, code_tail pos c1 c2 -> (length c2 <= length c1)%nat.
+Proof.
+  induction 1; simpl; omega.
+Qed.
+
+Remark code_tail_unique:
+  forall fn c pos pos',
+  code_tail pos fn c -> code_tail pos' fn c -> pos = pos'.
+Proof.
+  induction fn; intros until pos'; intros ITA CT; inv ITA; inv CT; auto.
+  generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; omega.
+  generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; omega.
+  f_equal. eauto.
+Qed.
+
+Lemma return_address_offset_correct:
+  forall ge b ofs fb f c tf tc ofs',
+  transl_code_at_pc ge (Vptr b ofs) fb f c false tf tc ->
+  return_address_offset f c ofs' ->
+  ofs' = ofs.
+Proof.
+  intros. inv H. red in H0.
+  exploit code_tail_unique. eexact H12. eapply H0; eauto. intro.
+  rewrite <- (Ptrofs.repr_unsigned ofs).
+  rewrite <- (Ptrofs.repr_unsigned ofs').
+  congruence.
+Qed.
+
+Section STRAIGHTLINE.
+
+Variable ge: genv.
+Variable lk: aarch64_linker.
+Variable fn: function.
+
+(** Straight-line code is composed of processor instructions that execute
+  in sequence (no branches, no function calls and returns).
+  The following inductive predicate relates the machine states
+  before and after executing a straight-line sequence of instructions.
+  Instructions are taken from the first list instead of being fetched
+  from memory. *)
+
+Inductive exec_straight: list basic -> regset -> mem ->
+                         list basic -> regset -> mem -> Prop :=
+  | exec_straight_one:
+      forall i1 c rs1 m1 rs2 m2,
+      exec_basic lk ge i1 rs1 m1 = Next rs2 m2 ->
+      exec_straight (i1 :: c) rs1 m1 c rs2 m2
+  | exec_straight_step:
+      forall i c rs1 m1 rs2 m2 c' rs3 m3,
+      exec_basic lk ge i rs1 m1 = Next rs2 m2 ->
+      exec_straight c rs2 m2 c' rs3 m3 ->
+      exec_straight (i :: c) rs1 m1 c' rs3 m3.
+
+Lemma exec_straight_trans:
+  forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3,
+  exec_straight c1 rs1 m1 c2 rs2 m2 ->
+  exec_straight c2 rs2 m2 c3 rs3 m3 ->
+  exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+  induction 1; intros.
+  apply exec_straight_step with rs2 m2; auto.
+  apply exec_straight_step with rs2 m2; auto.
+Qed.
+
+Lemma exec_straight_two:
+  forall i1 i2 c rs1 m1 rs2 m2 rs3 m3,
+  exec_basic lk ge i1 rs1 m1 = Next rs2 m2 ->
+  exec_basic lk ge i2 rs2 m2 = Next rs3 m3 ->
+  exec_straight (i1 :: i2 :: c) rs1 m1 c rs3 m3.
+Proof.
+  intros. apply exec_straight_step with rs2 m2; auto.
+  apply exec_straight_one; auto.
+Qed.
+
+Lemma exec_straight_three:
+  forall i1 i2 i3 c rs1 m1 rs2 m2 rs3 m3 rs4 m4,
+  exec_basic lk ge i1 rs1 m1 = Next rs2 m2 ->
+  exec_basic lk ge i2 rs2 m2 = Next rs3 m3 ->
+  exec_basic lk ge i3 rs3 m3 = Next rs4 m4 ->
+  exec_straight (i1 :: i2 :: i3 :: c) rs1 m1 c rs4 m4.
+Proof.
+  intros. apply exec_straight_step with rs2 m2; auto.
+  eapply exec_straight_two; eauto.
+Qed.
+
+Inductive exec_straight_opt: list basic -> regset -> mem -> list basic -> regset -> mem -> Prop :=
+  | exec_straight_opt_refl: forall c rs m,
+      exec_straight_opt c rs m c rs m
+  | exec_straight_opt_intro: forall c1 rs1 m1 c2 rs2 m2,
+      exec_straight c1 rs1 m1 c2 rs2 m2 ->
+      exec_straight_opt c1 rs1 m1 c2 rs2 m2.
+
+Remark exec_straight_opt_right:
+  forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2,
+  exec_straight_opt c1 rs1 m1 c2 rs2 m2 ->
+  exec_straight c2 rs2 m2 c3 rs3 m3 ->
+  exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+  destruct 1; intros. auto. eapply exec_straight_trans; eauto. 
+Qed.
+
+Lemma exec_straight_opt_step:
+  forall i c rs1 m1 rs2 m2 c' rs3 m3,
+  exec_basic lk ge i rs1 m1 = Next rs2 m2 ->
+  exec_straight_opt c rs2 m2 c' rs3 m3 ->
+  exec_straight (i :: c) rs1 m1 c' rs3 m3.
+Proof.
+  intros. inv H0.
+  - apply exec_straight_one; auto.
+  - eapply exec_straight_step; eauto.
+Qed.
+
+Lemma exec_straight_opt_step_opt:
+  forall i c rs1 m1 rs2 m2 c' rs3 m3,
+  exec_basic lk ge i rs1 m1 = Next rs2 m2 ->
+  exec_straight_opt c rs2 m2 c' rs3 m3 ->
+  exec_straight_opt (i :: c) rs1 m1 c' rs3 m3.
+Proof.
+  intros. apply exec_straight_opt_intro. eapply exec_straight_opt_step; eauto.
+Qed.
+
+(** Like exec_straight predicate, but on blocks *)
+
+Inductive exec_straight_blocks: bblocks -> regset -> mem ->
+                                bblocks -> regset -> mem -> Prop :=
+  | exec_straight_blocks_one:
+      forall b1 c rs1 m1 rs2 m2,
+      exec_bblock lk ge fn b1 rs1 m1 E0 rs2 m2 ->
+      rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b1)) ->
+      exec_straight_blocks (b1 :: c) rs1 m1 c rs2 m2
+  | exec_straight_blocks_step:
+      forall b c rs1 m1 rs2 m2 c' rs3 m3,
+      exec_bblock lk ge fn b rs1 m1 E0 rs2 m2 ->
+      rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b)) ->
+      exec_straight_blocks c rs2 m2 c' rs3 m3 ->
+      exec_straight_blocks (b :: c) rs1 m1 c' rs3 m3.
+
+Lemma exec_straight_blocks_trans:
+  forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3,
+  exec_straight_blocks c1 rs1 m1 c2 rs2 m2 ->
+  exec_straight_blocks c2 rs2 m2 c3 rs3 m3 ->
+  exec_straight_blocks c1 rs1 m1 c3 rs3 m3.
+Proof.
+  induction 1; intros.
+  eapply exec_straight_blocks_step; eauto.
+  eapply exec_straight_blocks_step; eauto.
+Qed.
+
+(** Linking exec_straight with exec_straight_blocks *)
+
+Lemma exec_straight_pc:
+  forall c c' rs1 m1 rs2 m2,
+  exec_straight c rs1 m1 c' rs2 m2 ->
+  rs2 PC = rs1 PC.
+Proof.
+  induction c; intros; try (inv H; fail).
+  inv H.
+  - eapply exec_basic_instr_pc; eauto.
+  - rewrite (IHc c' rs3 m3 rs2 m2); auto.
+    erewrite exec_basic_instr_pc; eauto.
+Qed.
+
+Lemma exec_body_pc:
+  forall ge l rs1 m1 rs2 m2,
+  exec_body lk ge l rs1 m1 = Next rs2 m2 ->
+  rs2 PC = rs1 PC.
+Proof.
+  induction l.
+  - intros. inv H. auto.
+  - intros until m2. intro EXEB.
+    inv EXEB. destruct (exec_basic _ _ _ _ _) eqn:EBI; try discriminate.
+    eapply IHl in H0. rewrite H0.
+    destruct s.
+    erewrite exec_basic_instr_pc; eauto.
+Qed.
+
+(** The following lemmas show that straight-line executions
+  (predicate [exec_straight_blocks]) correspond to correct Asm executions. *)
+
+Lemma exec_straight_steps_1:
+  forall c rs m c' rs' m',
+  exec_straight_blocks c rs m c' rs' m' ->
+  size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned ->
+  forall b ofs,
+  rs#PC = Vptr b ofs ->
+  Genv.find_funct_ptr ge b = Some (Internal fn) ->
+  code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c ->
+  plus (step lk) ge (State rs m) E0 (State rs' m').
+Proof.
+  induction 1; intros.
+  apply plus_one.
+  econstructor; eauto.
+  eapply find_bblock_tail. eauto.
+  eapply plus_left'.
+  econstructor; eauto.
+  eapply find_bblock_tail. eauto.
+  apply IHexec_straight_blocks with b0 (Ptrofs.add ofs (Ptrofs.repr (size b))).
+  auto. rewrite H0. rewrite H3. reflexivity.
+  auto.
+  apply code_tail_next_int; auto.
+  traceEq.
+Qed.
+
+Lemma exec_straight_steps_2:
+  forall c rs m c' rs' m',
+  exec_straight_blocks c rs m c' rs' m' ->
+  size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned ->
+  forall b ofs,
+  rs#PC = Vptr b ofs ->
+  Genv.find_funct_ptr ge b = Some (Internal fn) ->
+  code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c ->
+  exists ofs',
+     rs'#PC = Vptr b ofs'
+  /\ code_tail (Ptrofs.unsigned ofs') (fn_blocks fn) c'.
+Proof.
+  induction 1; intros.
+  exists (Ptrofs.add ofs (Ptrofs.repr (size b1))). split.
+  rewrite H0. rewrite H2. auto.
+  apply code_tail_next_int; auto.
+  apply IHexec_straight_blocks with (Ptrofs.add ofs (Ptrofs.repr (size b))).
+  auto. rewrite H0. rewrite H3. reflexivity. auto.
+  apply code_tail_next_int; auto.
+Qed.
+
+End STRAIGHTLINE.
+
+(** * Properties of the Machblock call stack *)
+
+Section MATCH_STACK.
+
+Variable ge: MB.genv.
+
+Inductive match_stack: list MB.stackframe -> Prop :=
+  | match_stack_nil:
+      match_stack nil
+  | match_stack_cons: forall fb sp ra c s f tf tc,
+      Genv.find_funct_ptr ge fb = Some (Internal f) ->
+      transl_code_at_pc ge ra fb f c false tf tc ->
+      sp <> Vundef ->
+      match_stack s ->
+      match_stack (Stackframe fb sp ra c :: s).
+
+Lemma parent_sp_def: forall s, match_stack s -> parent_sp s <> Vundef.
+Proof.
+  induction 1; simpl.
+  unfold Vnullptr; destruct Archi.ptr64; congruence.
+  auto.
+Qed.
+
+Lemma parent_ra_def: forall s, match_stack s -> parent_ra s <> Vundef.
+Proof.
+  induction 1; simpl.
+  unfold Vnullptr; destruct Archi.ptr64; congruence.
+  inv H0. congruence.
+Qed.
+
+Lemma lessdef_parent_sp:
+  forall s v,
+  match_stack s -> Val.lessdef (parent_sp s) v -> v = parent_sp s.
+Proof.
+  intros. inv H0. auto. exploit parent_sp_def; eauto. tauto.
+Qed.
+
+Lemma lessdef_parent_ra:
+  forall s v,
+  match_stack s -> Val.lessdef (parent_ra s) v -> v = parent_ra s.
+Proof.
+  intros. inv H0. auto. exploit parent_ra_def; eauto. tauto.
+Qed.
+
+End MATCH_STACK.
diff --git a/aarch64/Asmblockgenproof1.v b/aarch64/Asmblockgenproof1.v
new file mode 100644
index 00000000..61d77881
--- /dev/null
+++ b/aarch64/Asmblockgenproof1.v
@@ -0,0 +1,1955 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           Xavier Leroy       INRIA Paris-Rocquencourt       *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(** * Proof of correctness for individual instructions *)
+
+Require Import Coqlib Errors Maps Zbits.
+Require Import AST Integers Floats Values Memory Globalenvs Linking.
+Require Import Op Locations Machblock Conventions Lia.
+Require Import Asmblock Asmblockgen Asmblockgenproof0 Asmblockprops.
+
+Module MB := Machblock.
+Module AB := Asmblock.
+
+Section CONSTRUCTORS.
+
+Variable lk: aarch64_linker.
+Variable ge: genv.
+Variable fn: function.
+
+Hypothesis symbol_high_low: forall (id: ident) (ofs: ptrofs),
+  Val.addl (symbol_high lk id ofs) (symbol_low lk id ofs) = Genv.symbol_address ge id ofs.
+
+Ltac Simplif :=
+  ((rewrite Pregmap.gss)
+  || (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+Ltac ArgsInv :=
+  repeat (match goal with
+  | [ H: Error _ = OK _ |- _ ] => discriminate
+  | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
+  | [ H: bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  end);
+  subst;
+  repeat (match goal with
+  | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *
+  | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *
+  end).
+  
+Ltac SimplEval H :=
+  match type of H with
+  | Some _ = None _ => discriminate
+  | Some _ = Some _ => inversion H; subst
+  | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity)
+end.
+
+Ltac TranslOpSimpl :=
+  econstructor; split;
+  [ apply exec_straight_one; reflexivity
+  | split; [ apply Val.lessdef_same; simpl; Simpl; fail | intros; simpl; Simpl; fail ] ].
+
+Ltac TranslOpSimplN :=
+  econstructor; split;
+  try apply exec_straight_one; try reflexivity; try split; try apply Val.lessdef_same;
+  Simpl; simpl; try destruct negb; Simpl; try intros; Simpl; simpl; try destruct negb; Simpl.
+
+Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC.
+Proof.
+  destruct r; simpl; try discriminate.
+Qed.
+Hint Resolve preg_of_iregsp_not_PC: asmgen.
+
+Lemma preg_of_not_X16: forall r, preg_of r <> X16.
+Proof.
+  destruct r; simpl; try discriminate.
+Qed.
+
+Lemma preg_of_not_X30: forall r, preg_of r <> X30.
+Proof.
+  destruct r; simpl; try discriminate.
+Qed.
+
+Lemma ireg_of_not_X16: forall r x, ireg_of r = OK x -> x <> X16.
+Proof.
+  unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
+  red; intros; subst x. elim (preg_of_not_X16 r); auto.
+  destruct d. destruct i. inv H1; auto.
+  all: discriminate.
+Qed.
+
+Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16.
+Proof.
+  intros. apply ireg_of_not_X16 in H. congruence.
+Qed.
+
+Lemma ireg_of_not_X16'': forall r x, ireg_of r = OK x -> DR (IR x) <> DR (IR X16).
+Proof.
+  intros. apply ireg_of_not_X16 in H. congruence.
+Qed.
+
+Lemma ireg_of_not_X30: forall r x, ireg_of r = OK x -> x <> X30.
+Proof.
+  unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
+  red; intros; subst x. elim (preg_of_not_X30 r); auto.
+  destruct d. destruct i. inv H1; auto.
+  all: discriminate.
+Qed.
+
+Lemma ireg_of_not_X30': forall r x, ireg_of r = OK x -> IR x <> IR X30.
+Proof.
+  intros. apply ireg_of_not_X30 in H. congruence.
+Qed.
+
+Lemma ireg_of_not_X30'': forall r x, ireg_of r = OK x -> DR (IR x) <> DR (IR X30).
+Proof.
+  intros. apply ireg_of_not_X30 in H. congruence.
+Qed.
+
+Hint Resolve preg_of_not_X16 ireg_of_not_X16 ireg_of_not_X16' ireg_of_not_X16'': asmgen.
+Hint Resolve preg_of_not_X30 ireg_of_not_X30 ireg_of_not_X30' ireg_of_not_X30'': asmgen.
+
+Inductive wf_decomposition: list (Z * Z) -> Prop :=
+  | wf_decomp_nil:
+      wf_decomposition nil
+  | wf_decomp_cons: forall m n p l,
+      n = Zzero_ext 16 m -> 0 <= p -> wf_decomposition l ->
+      wf_decomposition ((n, p) :: l).
+
+Lemma decompose_int_wf:
+  forall N n p, 0 <= p -> wf_decomposition (decompose_int N n p).
+Proof.
+Local Opaque Zzero_ext.
+  induction N as [ | N]; simpl; intros.
+  - constructor.
+  - set (frag := Zzero_ext 16 (Z.shiftr n p)) in *. destruct (Z.eqb frag 0).
+    + apply IHN. lia.
+    + econstructor. reflexivity. lia. apply IHN; lia. 
+Qed.
+
+Fixpoint recompose_int (accu: Z) (l: list (Z * Z)) : Z :=
+  match l with
+  | nil => accu
+  | (n, p) :: l => recompose_int (Zinsert accu n p 16) l
+  end.
+
+Lemma decompose_int_correct:
+  forall N n p accu,
+  0 <= p ->
+  (forall i, p <= i -> Z.testbit accu i = false) ->
+  (forall i, 0 <= i < p + Z.of_nat N * 16 ->
+   Z.testbit (recompose_int accu (decompose_int N n p)) i =
+   if zlt i p then Z.testbit accu i else Z.testbit n i).
+Proof.
+  induction N as [ | N]; intros until accu; intros PPOS ABOVE i RANGE.
+  - simpl. rewrite zlt_true; auto. lia.
+  - rewrite inj_S in RANGE. simpl.
+    set (frag := Zzero_ext 16 (Z.shiftr n p)).
+    assert (FRAG: forall i, p <= i < p + 16 -> Z.testbit n i = Z.testbit frag (i - p)).
+    { unfold frag; intros. rewrite Zzero_ext_spec by lia. rewrite zlt_true by lia.
+      rewrite Z.shiftr_spec by lia. f_equal; lia. }
+    destruct (Z.eqb_spec frag 0).
+    + rewrite IHN.
+      * destruct (zlt i p). rewrite zlt_true by lia. auto.
+        destruct (zlt i (p + 16)); auto.
+        rewrite ABOVE by lia. rewrite FRAG by lia. rewrite e, Z.testbit_0_l. auto.
+      * lia.
+      * intros; apply ABOVE; lia.
+      * lia.
+    + simpl. rewrite IHN.
+      * destruct (zlt i (p + 16)).
+        ** rewrite Zinsert_spec by lia. unfold proj_sumbool.
+           rewrite zlt_true by lia.
+           destruct (zlt i p).
+           rewrite zle_false by lia. auto.
+           rewrite zle_true by lia. simpl. symmetry; apply FRAG; lia.
+        ** rewrite Z.ldiff_spec, Z.shiftl_spec by lia.
+           change 65535 with (two_p 16 - 1). rewrite Ztestbit_two_p_m1 by lia.
+           rewrite zlt_false by lia. rewrite zlt_false by lia. apply andb_true_r. 
+      * lia.
+      * intros. rewrite Zinsert_spec by lia. unfold proj_sumbool.
+        rewrite zle_true by lia. rewrite zlt_false by lia. simpl.
+        apply ABOVE. lia.
+      * lia.
+Qed.
+
+Corollary decompose_int_eqmod: forall N n,
+  eqmod (two_power_nat (N * 16)%nat) (recompose_int 0 (decompose_int N n 0)) n.
+Proof.
+  intros; apply eqmod_same_bits; intros.
+  rewrite decompose_int_correct. apply zlt_false; lia. 
+  lia. intros; apply Z.testbit_0_l. lia.
+Qed.
+
+Corollary decompose_notint_eqmod: forall N n,
+  eqmod (two_power_nat (N * 16)%nat)
+        (Z.lnot (recompose_int 0 (decompose_int N (Z.lnot n) 0))) n.
+Proof.
+  intros; apply eqmod_same_bits; intros.
+  rewrite Z.lnot_spec, decompose_int_correct.
+  rewrite zlt_false by lia. rewrite Z.lnot_spec by lia. apply negb_involutive.
+  lia. intros; apply Z.testbit_0_l. lia. lia.
+Qed.
+
+Lemma negate_decomposition_wf:
+  forall l, wf_decomposition l -> wf_decomposition (negate_decomposition l).
+Proof.
+  induction 1; simpl; econstructor; auto.
+  instantiate (1 := (Z.lnot m)).
+  apply equal_same_bits; intros.
+  rewrite H. change 65535 with (two_p 16 - 1).
+  rewrite Z.lxor_spec, !Zzero_ext_spec, Z.lnot_spec, Ztestbit_two_p_m1 by lia.
+  destruct (zlt i 16).
+  apply xorb_true_r.
+  auto.
+Qed.
+
+Lemma Zinsert_eqmod:
+  forall n x1 x2 y p l, 0 <= p -> 0 <= l ->
+  eqmod (two_power_nat n) x1 x2 ->
+  eqmod (two_power_nat n) (Zinsert x1 y p l) (Zinsert x2 y p l).
+Proof.
+  intros. apply eqmod_same_bits; intros. rewrite ! Zinsert_spec by lia.
+  destruct (zle p i && zlt i (p + l)); auto.
+  apply same_bits_eqmod with n; auto.
+Qed.
+
+Lemma Zinsert_0_l:
+  forall y p l,
+  0 <= p -> 0 <= l ->
+  Z.shiftl (Zzero_ext l y) p = Zinsert 0 (Zzero_ext l y) p l.
+Proof.
+  intros. apply equal_same_bits; intros.
+  rewrite Zinsert_spec by lia. unfold proj_sumbool.
+  destruct (zlt i p); [rewrite zle_false by lia|rewrite zle_true by lia]; simpl.
+  - rewrite Z.testbit_0_l, Z.shiftl_spec_low by auto. auto.
+  - rewrite Z.shiftl_spec by lia. 
+    destruct (zlt i (p + l)); auto.
+    rewrite Zzero_ext_spec, zlt_false, Z.testbit_0_l by lia. auto.
+Qed.
+
+Lemma recompose_int_negated:
+  forall l, wf_decomposition l ->
+  forall accu, recompose_int (Z.lnot accu) (negate_decomposition l) = Z.lnot (recompose_int accu l).
+Proof.
+  induction 1; intros accu; simpl.
+  - auto.
+  - rewrite <- IHwf_decomposition. f_equal. apply equal_same_bits; intros. 
+    rewrite Z.lnot_spec, ! Zinsert_spec, Z.lxor_spec, Z.lnot_spec by lia.
+    unfold proj_sumbool.
+    destruct (zle p i); simpl; auto.
+    destruct (zlt i (p + 16)); simpl; auto.
+    change 65535 with (two_p 16 - 1).
+    rewrite Ztestbit_two_p_m1 by lia. rewrite zlt_true by lia.
+    apply xorb_true_r. 
+Qed.
+
+Lemma exec_loadimm_k_w:
+  forall (rd: ireg) k m l,
+  wf_decomposition l ->
+  forall (rs: regset) accu,
+  rs#rd = Vint (Int.repr accu) ->
+  exists rs',
+     exec_straight_opt ge lk (loadimm_k W rd l k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.repr (recompose_int accu l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  induction 1; intros rs accu ACCU; simpl.
+  - exists rs; split. apply exec_straight_opt_refl. auto.
+  - destruct (IHwf_decomposition
+                  ((rs#rd <- (insert_in_int rs#rd n p 16)))
+                  (Zinsert accu n p 16))
+    as (rs' & P & Q & R).
+    Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr. 
+    apply Zinsert_eqmod. auto. lia. apply Int.eqm_sym; apply Int.eqm_unsigned_repr.
+    exists rs'; split.
+    eapply exec_straight_opt_step_opt. simpl. eauto. auto.
+    split. exact Q. intros; Simpl. rewrite R by auto. Simpl.
+Qed.
+
+Lemma exec_loadimm_z_w:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  exists rs',
+     exec_straight ge lk (loadimm_z W rd l k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.repr (recompose_int 0 l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_z; destruct 1.
+  - econstructor; split.
+    apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl.
+    intros; Simpl.
+  - set (accu0 := Zinsert 0 n p 16).
+    set (rs1 := rs#rd <- (Vint (Int.repr accu0))).
+    destruct (exec_loadimm_k_w rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto.
+    unfold rs1; Simpl.
+    exists rs2; split.
+    eapply exec_straight_opt_step; eauto.
+    simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia.
+    split. exact Q. 
+    intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm_n_w:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  exists rs',
+     exec_straight ge lk (loadimm_n W rd l k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l)))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_n; destruct 1.
+  - econstructor; split.
+    apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl. 
+    intros; Simpl.
+  - set (accu0 := Z.lnot (Zinsert 0 n p 16)).
+    set (rs1 := rs#rd <- (Vint (Int.repr accu0))).
+    destruct (exec_loadimm_k_w rd k m (negate_decomposition l) 
+                                      (negate_decomposition_wf l H1)
+                                      rs1 accu0) as (rs2 & P & Q & R).
+    unfold rs1; Simpl.
+    exists rs2; split.
+    eapply exec_straight_opt_step; eauto.
+    simpl. unfold rs1. do 5 f_equal.
+    unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia.
+    split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
+    intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm32:
+  forall rd n k rs m,
+  exists rs',
+     exec_straight ge lk (loadimm32 rd n k) rs m k rs' m
+  /\ rs'#rd = Vint n
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm32, loadimm; intros.
+  destruct (is_logical_imm32 n).
+  - econstructor; split.
+    apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl. rewrite Int.repr_unsigned, Int.or_zero_l; auto.
+    intros; Simpl.
+  - set (dz := decompose_int 2%nat (Int.unsigned n) 0).
+    set (dn := decompose_int 2%nat (Z.lnot (Int.unsigned n)) 0).
+    assert (A: Int.repr (recompose_int 0 dz) = n).
+    { transitivity (Int.repr (Int.unsigned n)).
+      apply Int.eqm_samerepr. apply decompose_int_eqmod. 
+      apply Int.repr_unsigned. }
+    assert (B: Int.repr (Z.lnot (recompose_int 0 dn)) = n).
+    { transitivity (Int.repr (Int.unsigned n)).
+      apply Int.eqm_samerepr. apply decompose_notint_eqmod. 
+      apply Int.repr_unsigned. }
+    destruct Nat.leb.
+    + rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; lia.
+    + rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; lia.
+Qed.
+
+Lemma exec_loadimm_k_x:
+  forall (rd: ireg) k m l,
+  wf_decomposition l ->
+  forall (rs: regset) accu,
+  rs#rd = Vlong (Int64.repr accu) ->
+  exists rs',
+     exec_straight_opt ge lk (loadimm_k X rd l k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.repr (recompose_int accu l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  induction 1; intros rs accu ACCU; simpl.
+  - exists rs; split. apply exec_straight_opt_refl. auto.
+  - destruct (IHwf_decomposition
+                  (rs#rd <- (insert_in_long rs#rd n p 16))
+                  (Zinsert accu n p 16))
+    as (rs' & P & Q & R).
+    Simpl. rewrite ACCU. simpl. f_equal. apply Int64.eqm_samerepr. 
+    apply Zinsert_eqmod. auto. lia. apply Int64.eqm_sym; apply Int64.eqm_unsigned_repr.
+    exists rs'; split.
+    eapply exec_straight_opt_step_opt. simpl; eauto. auto.
+    split. exact Q. intros; Simpl. rewrite R by auto. Simpl.
+Qed.
+
+Lemma exec_loadimm_z_x:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  exists rs',
+     exec_straight ge lk (loadimm_z X rd l k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_z; destruct 1.
+  - econstructor; split.
+    apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl.
+    intros; Simpl.
+  - set (accu0 := Zinsert 0 n p 16).
+    set (rs1 := rs#rd <- (Vlong (Int64.repr accu0))).
+    destruct (exec_loadimm_k_x rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto.
+    unfold rs1; Simpl.
+    exists rs2; split.
+    eapply exec_straight_opt_step; eauto.
+    simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia.
+    split. exact Q. 
+    intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm_n_x:
+  forall rd l k rs m,
+  wf_decomposition l ->
+  exists rs',
+     exec_straight ge lk (loadimm_n X rd l k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l)))
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm_n; destruct 1.
+  - econstructor; split.
+    apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl. 
+    intros; Simpl.
+  - set (accu0 := Z.lnot (Zinsert 0 n p 16)).
+    set (rs1 := rs#rd <- (Vlong (Int64.repr accu0))).
+    destruct (exec_loadimm_k_x rd k m (negate_decomposition l) 
+                                      (negate_decomposition_wf l H1)
+                                      rs1 accu0) as (rs2 & P & Q & R).
+    unfold rs1; Simpl.
+    exists rs2; split.
+    eapply exec_straight_opt_step; eauto.
+    simpl. unfold rs1. do 5 f_equal.
+    unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia.
+    split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
+    intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm64:
+  forall rd n k rs m,
+  exists rs',
+     exec_straight ge lk (loadimm64 rd n k) rs m k rs' m
+  /\ rs'#rd = Vlong n
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadimm64, loadimm; intros.
+  destruct (is_logical_imm64 n).
+  - econstructor; split.
+    apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl. rewrite Int64.repr_unsigned, Int64.or_zero_l; auto.
+    intros; Simpl.
+  - set (dz := decompose_int 4%nat (Int64.unsigned n) 0).
+    set (dn := decompose_int 4%nat (Z.lnot (Int64.unsigned n)) 0).
+    assert (A: Int64.repr (recompose_int 0 dz) = n).
+    { transitivity (Int64.repr (Int64.unsigned n)).
+      apply Int64.eqm_samerepr. apply decompose_int_eqmod. 
+      apply Int64.repr_unsigned. }
+    assert (B: Int64.repr (Z.lnot (recompose_int 0 dn)) = n).
+    { transitivity (Int64.repr (Int64.unsigned n)).
+      apply Int64.eqm_samerepr. apply decompose_notint_eqmod. 
+      apply Int64.repr_unsigned. }
+    destruct Nat.leb.
+    + rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; lia.
+    + rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; lia.
+Qed.
+
+(** Add immediate *)
+
+Lemma exec_addimm_aux_32:
+  forall (insn: Z -> arith_pp) (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_basic lk ge (PArith (PArithPP (insn n) rd r1)) rs m =
+      Next (rs#rd <- (sem rs#r1 (Vint (Int.repr n)))) m) ->
+  (forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) ->
+  forall rd r1 n k rs m,
+  exists rs',
+     exec_straight ge lk (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vint n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros insn sem SEM ASSOC; intros. unfold addimm_aux.
+  set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo).
+  assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; lia).
+  rewrite <- (Int.repr_unsigned n).
+  destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
+  - econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+    split. Simpl. do 3 f_equal; lia.
+    intros; Simpl.
+  - econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+    split. Simpl. do 3 f_equal; lia.
+    intros; Simpl.
+  - econstructor; split. eapply exec_straight_two.
+    apply SEM. apply SEM. simpl. Simpl.
+    split. Simpl. simpl.  rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr.
+    rewrite E. auto with ints.
+    intros; Simpl.
+Qed.
+
+Lemma exec_addimm32:
+  forall (rd r1: ireg) n k rs m,
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge lk (addimm32 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = Val.add rs#r1 (Vint n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros. unfold addimm32. set (nn := Int.neg n).
+  destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))].
+  - apply exec_addimm_aux_32 with (sem := Val.add). auto. intros; apply Val.add_assoc. 
+  - rewrite <- Val.sub_opp_add.
+    apply exec_addimm_aux_32 with (sem := Val.sub). auto.
+    intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto.
+  - destruct (Int.lt n Int.zero).
+    + rewrite <- Val.sub_opp_add; fold nn.
+      edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
+      split. Simpl. rewrite B, C; eauto with asmgen.
+      intros; Simpl.
+    + edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
+      split. Simpl. rewrite B, C; eauto with asmgen.
+      intros; Simpl.
+Qed.
+
+Lemma exec_addimm_aux_64:
+  forall (insn: Z -> arith_pp) (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_basic lk ge (PArith (PArithPP (insn n) rd r1)) rs m =
+      Next (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n)))) m) ->
+  (forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) ->
+  forall rd r1 n k rs m,
+  exists rs',
+     exec_straight ge lk (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vlong n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros insn sem SEM ASSOC; intros. unfold addimm_aux.
+  set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo).
+  assert (E: Int64.unsigned n = nhi + nlo) by (unfold nhi; lia).
+  rewrite <- (Int64.repr_unsigned n).
+  destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
+  - econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+    split. Simpl. do 3 f_equal; lia.
+    intros; Simpl.
+  - econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
+    split. Simpl. do 3 f_equal; lia.
+    intros; Simpl.
+  - econstructor; split. eapply exec_straight_two.
+    apply SEM. apply SEM. Simpl. Simpl.
+    split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr.
+    rewrite E. auto with ints.
+    intros; Simpl.
+Qed.
+
+Lemma exec_addimm64:
+  forall (rd r1: iregsp) n k rs m,
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge lk (addimm64 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = Val.addl rs#r1 (Vlong n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros. 
+  unfold addimm64. set (nn := Int64.neg n).
+  destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))].
+  - apply exec_addimm_aux_64 with (sem := Val.addl). auto. intros; apply Val.addl_assoc. 
+  - rewrite <- Val.subl_opp_addl.
+    apply exec_addimm_aux_64 with (sem := Val.subl). auto.
+    intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto.
+  - destruct (Int64.lt n Int64.zero).
+    + rewrite <- Val.subl_opp_addl; fold nn.
+      edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. 
+      split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
+      intros; Simpl.
+    + edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. 
+      split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
+      intros; Simpl.
+Qed.
+
+(** Logical immediate *)
+
+Lemma exec_logicalimm32:
+  forall (insn1: Z -> arith_rr0)
+         (insn2: shift_op -> arith_rr0r)
+         (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_basic lk ge (PArith (PArithRR0 (insn1 n) rd r1)) rs m =
+      Next (rs#rd <- (sem rs##r1 (Vint (Int.repr n)))) m) ->
+  (forall rd r1 r2 s rs m,
+    exec_basic lk ge (PArith (PArithRR0R (insn2 s) rd r1 r2)) rs m =
+      Next (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s))) m) ->
+  forall rd r1 n k rs m,
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge lk (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vint n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32.
+  destruct (is_logical_imm32 n).
+  - econstructor; split. 
+    apply exec_straight_one. apply SEM1.
+    split. Simpl. rewrite Int.repr_unsigned; auto. intros; Simpl.
+  - edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C).
+    econstructor; split.
+    eapply exec_straight_trans. eexact A.  
+    apply exec_straight_one. apply SEM2.
+    split. Simpl. f_equal; auto. apply C; auto with asmgen.
+    intros; Simpl. 
+Qed.
+
+Lemma exec_logicalimm64:
+  forall (insn1: Z -> arith_rr0)
+         (insn2: shift_op -> arith_rr0r)
+         (sem: val -> val -> val),
+  (forall rd r1 n rs m,
+    exec_basic lk ge (PArith (PArithRR0 (insn1 n) rd r1)) rs m =
+      Next (rs#rd <- (sem rs###r1 (Vlong (Int64.repr n)))) m) ->
+  (forall rd r1 r2 s rs m,
+    exec_basic lk ge (PArith (PArithRR0R (insn2 s) rd r1 r2)) rs m =
+      Next (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s))) m) ->
+  forall rd r1 n k rs m,
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge lk (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Vlong n)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64.
+  destruct (is_logical_imm64 n).
+  - econstructor; split. 
+    apply exec_straight_one. apply SEM1.
+    split. Simpl. rewrite Int64.repr_unsigned. auto. intros; Simpl.
+  - edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C).
+    econstructor; split.
+    eapply exec_straight_trans. eexact A.  
+    apply exec_straight_one. apply SEM2.
+    split. Simpl. f_equal; auto. apply C; auto with asmgen.
+    intros; Simpl. 
+Qed.
+
+(** Load address of symbol *)
+
+Lemma exec_loadsymbol: forall rd s ofs k rs m,
+  rd <> X16 \/ Archi.pic_code tt = false ->
+  exists rs',
+     exec_straight ge lk (loadsymbol rd s ofs k) rs m k rs' m
+  /\ rs'#rd = Genv.symbol_address ge s ofs
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold loadsymbol; intros. destruct (Archi.pic_code tt).
+  - predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero.
+    + subst ofs. econstructor; split.
+      apply exec_straight_one. simpl; eauto.
+      split. Simpl. intros; Simpl.
+    + exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence.
+      intros (rs1 & A & B & C).
+      econstructor; split.
+      econstructor. simpl; eauto. auto. eexact A. 
+      split. simpl in B; rewrite B. Simpl. 
+      rewrite <- Genv.shift_symbol_address_64 by auto.
+      rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto.
+      intros. rewrite C by auto. Simpl.
+  - econstructor; split.
+    eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
+    split. Simpl.
+    intros; Simpl.
+Qed.
+
+(** Shifted operands *)
+
+Remark transl_shift_not_none:
+  forall s a, transl_shift s a <> SOnone.
+Proof.
+  destruct s; intros; simpl; congruence.
+Qed.
+
+Remark or_zero_eval_shift_op_int:
+  forall v s, s <> SOnone -> Val.or (Vint Int.zero) (eval_shift_op_int v s) = eval_shift_op_int v s.
+Proof.
+  intros; destruct s; try congruence; destruct v; auto; simpl;
+  destruct (Int.ltu n Int.iwordsize); auto; rewrite Int.or_zero_l; auto.
+Qed.
+
+Remark or_zero_eval_shift_op_long:
+  forall v s, s <> SOnone -> Val.orl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
+Proof.
+  intros; destruct s; try congruence; destruct v; auto; simpl;
+  destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.or_zero_l; auto.
+Qed.
+
+Remark add_zero_eval_shift_op_long:
+  forall v s, s <> SOnone -> Val.addl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
+Proof.
+  intros; destruct s; try congruence; destruct v; auto; simpl;
+  destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.add_zero_l; auto.
+Qed.
+
+Lemma transl_eval_shift: forall s v (a: amount32),
+  eval_shift_op_int v (transl_shift s a) = eval_shift s v a.
+Proof.
+  intros. destruct s; simpl; auto.
+Qed.
+
+Lemma transl_eval_shift': forall s v (a: amount32),
+  Val.or (Vint Int.zero) (eval_shift_op_int v (transl_shift s a)) = eval_shift s v a.
+Proof.
+  intros. rewrite or_zero_eval_shift_op_int by (apply transl_shift_not_none).
+  apply transl_eval_shift.
+Qed.
+
+Lemma transl_eval_shiftl: forall s v (a: amount64),
+  eval_shift_op_long v (transl_shift s a) = eval_shiftl s v a.
+Proof.
+  intros. destruct s; simpl; auto.
+Qed.
+
+Lemma transl_eval_shiftl': forall s v (a: amount64),
+  Val.orl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
+Proof.
+  intros. rewrite or_zero_eval_shift_op_long by (apply transl_shift_not_none).
+  apply transl_eval_shiftl.
+Qed.
+
+Lemma transl_eval_shiftl'': forall s v (a: amount64),
+  Val.addl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
+Proof.
+  intros. rewrite add_zero_eval_shift_op_long by (apply transl_shift_not_none).
+  apply transl_eval_shiftl.
+Qed.
+
+(** Zero- and Sign- extensions *)
+
+Lemma exec_move_extended_base: forall rd r1 ex k rs m,
+  exists rs',
+     exec_straight ge lk (move_extended_base rd r1 ex k) rs m k rs' m
+  /\ rs' rd = match ex with Xsgn32 => Val.longofint rs#r1 | Xuns32 => Val.longofintu rs#r1 end
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold move_extended_base; destruct ex; econstructor;
+  (split; [apply exec_straight_one; simpl; eauto | split; [Simpl|intros;Simpl]]).
+Qed.
+
+Lemma exec_move_extended: forall rd r1 ex (a: amount64) k rs m,
+  exists rs',
+     exec_straight ge lk (move_extended rd r1 ex a k) rs m k rs' m
+  /\ rs' rd = Op.eval_extend ex rs#r1 a
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold move_extended; intros. predSpec Int.eq Int.eq_spec a Int.zero.
+  - exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
+    exists rs'; split. eexact A. split. unfold Op.eval_extend. rewrite H. rewrite B.
+    destruct ex, (rs r1); simpl; auto; rewrite Int64.shl'_zero; auto.
+    auto.
+  - Local Opaque Val.addl.
+    exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
+    econstructor; split.
+    eapply exec_straight_trans. eexact A. apply exec_straight_one.
+    unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+    change (SOlsl a) with (transl_shift Slsl a). rewrite transl_eval_shiftl''. eauto. auto.
+    split. Simpl. rewrite B. auto. 
+    intros; Simpl.
+Qed.
+
+Lemma exec_arith_extended:
+  forall (sem: val -> val -> val)
+         (insnX: extend_op -> arith_ppp)
+         (insnS: shift_op -> arith_rr0r),
+  (forall rd r1 r2 x rs m,
+    exec_basic lk ge (PArith (PArithPPP (insnX x) rd r1 r2)) rs m =
+      Next (rs#rd <- (sem rs#r1 (eval_extend rs#r2 x))) m) ->
+  (forall rd r1 r2 s rs m,
+    exec_basic lk ge (PArith (PArithRR0R (insnS s) rd r1 r2)) rs m =
+      Next (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s))) m) ->
+  forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: bcode) rs m,
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge lk (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m
+  /\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a)
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)).
+  - econstructor; split. 
+    apply exec_straight_one. rewrite EX; eauto. auto.
+    split. Simpl. f_equal. destruct ex; auto.
+    intros; Simpl.
+  - exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C).
+    econstructor; split.
+    eapply exec_straight_trans. eexact A. apply exec_straight_one. 
+    rewrite ES. eauto. auto.
+    split. Simpl. unfold ir0. rewrite C by eauto with asmgen. f_equal. 
+    rewrite B. destruct ex; auto.
+    intros; Simpl.
+Qed. 
+
+(** Extended right shift *)
+
+Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
+  Val.shrx rs#r1 (Vint n) = Some v ->
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge lk (shrx32 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = v
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold shrx32; intros. apply Val.shrx_shr_2 in H.
+  destruct (Int.eq n Int.zero) eqn:E0.
+  - econstructor; split. apply exec_straight_one; simpl; eauto. 
+    split. Simpl. subst v; auto. intros; Simpl.
+  - econstructor; split. eapply exec_straight_three.
+    unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+    rewrite or_zero_eval_shift_op_int by congruence. eauto.
+    simpl; eauto.
+    unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+    rewrite or_zero_eval_shift_op_int by congruence. eauto.
+    split. subst v; Simpl. intros; Simpl.
+Qed.
+
+Lemma exec_shrx32_none: forall (rd r1: ireg) (n: int) k x (rs: regset) m,
+  Val.shrx rs#r1 (Vint n) = None ->
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge lk (shrx32 rd r1 n k) rs m k rs' m
+  /\ Val.lessdef (Val.maketotal None) (rs' x)
+  /\ forall r, data_preg r = true -> r <> rd -> preg_notin r (destroyed_by_op (Oshrximm n)) -> rs'#r = rs#r.
+Proof.
+  unfold shrx32; intros.
+  destruct (Int.eq n Int.zero) eqn:E0.
+  - econstructor; split. apply exec_straight_one; simpl; eauto. 
+    split. Simpl. auto. intros; Simpl.
+  - econstructor; split. eapply exec_straight_three.
+    unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+    rewrite or_zero_eval_shift_op_int by congruence. eauto.
+    simpl; eauto.
+    unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+    rewrite or_zero_eval_shift_op_int by congruence. eauto.
+    split. Simpl. intros; Simpl.
+Qed.
+
+Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
+  Val.shrxl rs#r1 (Vint n) = Some v ->
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge lk (shrx64 rd r1 n k) rs m k rs' m
+  /\ rs'#rd = v
+  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+  unfold shrx64; intros. apply Val.shrxl_shrl_2 in H.
+  destruct (Int.eq n Int.zero) eqn:E.
+  - econstructor; split. apply exec_straight_one; simpl; eauto. 
+    split. Simpl. subst v; auto. intros; Simpl.
+  - econstructor; split. eapply exec_straight_three.
+    unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+    rewrite or_zero_eval_shift_op_long by congruence. eauto.
+    simpl; eauto.
+    unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+    rewrite or_zero_eval_shift_op_long by congruence. eauto.
+    split. subst v; Simpl. intros; Simpl.
+Qed.
+
+Lemma exec_shrx64_none: forall (rd r1: ireg) (n: int) k x (rs: regset) m,
+  Val.shrxl rs#r1 (Vint n) = None ->
+  r1 <> X16 ->
+  exists rs',
+     exec_straight ge lk (shrx64 rd r1 n k) rs m k rs' m
+  /\ Val.lessdef (Val.maketotal None) (rs' x)
+  /\ forall r, data_preg r = true -> r <> rd -> preg_notin r (destroyed_by_op (Oshrximm n)) -> rs'#r = rs#r.
+Proof.
+  unfold shrx64; intros.
+  destruct (Int.eq n Int.zero) eqn:E.
+  - econstructor; split. apply exec_straight_one; simpl; eauto. 
+    split. Simpl. auto. intros; Simpl.
+  - econstructor; split. eapply exec_straight_three.
+    unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+    rewrite or_zero_eval_shift_op_long by congruence. eauto.
+    simpl; eauto.
+    unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+    rewrite or_zero_eval_shift_op_long by congruence. eauto.
+    split. Simpl. intros; Simpl.
+Qed.
+
+Ltac TranslOpBase :=
+  econstructor; split;
+  [ apply exec_straight_one; [simpl; eauto ]
+  | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl
+           | intros; Simpl; fail ] ].
+
+(** Condition bits *)
+
+Lemma compare_int_spec: forall rs v1 v2,
+  let rs' := compare_int rs v1 v2 in
+     rs'#CN = (Val.negative (Val.sub v1 v2))
+  /\ rs'#CZ = (Val.mxcmpu Ceq v1 v2)
+  /\ rs'#CC = (Val.mxcmpu Cge v1 v2)
+  /\ rs'#CV = (Val.sub_overflow v1 v2).
+Proof.
+  intros; unfold rs'; auto.
+Qed.
+
+Lemma eval_testcond_compare_sint: forall c v1 v2 b rs,
+  Val.cmp_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_signed_cmp c) (compare_int rs v1 v2) = Some b.
+Proof.
+  intros. generalize (compare_int_spec rs v1 v2). 
+  set (rs' := compare_int rs v1 v2). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+  destruct v1; try discriminate; destruct v2; try discriminate.
+  simpl in H; inv H.
+  unfold Val.mxcmpu; simpl. destruct c; simpl.
+  - destruct (Int.eq i i0); auto.
+  - destruct (Int.eq i i0); auto.
+  - rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
+  - rewrite Int.lt_sub_overflow, Int.not_lt.
+    destruct (Int.eq i i0), (Int.lt i i0); auto.
+  - rewrite Int.lt_sub_overflow, (Int.lt_not i). 
+    destruct (Int.eq i i0), (Int.lt i i0); auto.
+  - rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
+Qed.
+
+Lemma eval_testcond_compare_uint: forall c v1 v2 b rs,
+  Val.mxcmpu_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_unsigned_cmp c) (compare_int rs v1 v2) = Some b.
+Proof.
+  intros. generalize (compare_int_spec rs v1 v2). 
+  set (rs' := compare_int rs v1 v2). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+  destruct v1; try discriminate; destruct v2; try discriminate.
+  simpl in H; inv H.
+  unfold Val.mxcmpu; simpl. destruct c; simpl.
+  - destruct (Int.eq i i0); auto.
+  - destruct (Int.eq i i0); auto.
+  - destruct (Int.ltu i i0); auto.
+  - rewrite (Int.not_ltu i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
+  - rewrite (Int.ltu_not i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
+  - destruct (Int.ltu i i0); auto.
+Qed.
+
+Lemma compare_long_spec: forall rs v1 v2,
+  let rs' := compare_long rs v1 v2 in
+     rs'#CN = (Val.negativel (Val.subl v1 v2))
+  /\ rs'#CZ = (Val.mxcmplu Ceq v1 v2)
+  /\ rs'#CC = (Val.mxcmplu Cge v1 v2)
+  /\ rs'#CV = (Val.subl_overflow v1 v2).
+Proof.
+  intros; unfold rs'; auto.
+Qed.
+
+Remark int64_sub_overflow:
+  forall x y,
+  Int.xor (Int.repr (Int64.unsigned (Int64.sub_overflow x y Int64.zero)))
+          (Int.repr (Int64.unsigned (Int64.negative (Int64.sub x y)))) =
+  (if Int64.lt x y then Int.one else Int.zero).
+Proof.
+  intros.
+  transitivity (Int.repr (Int64.unsigned (if Int64.lt x y then Int64.one else Int64.zero))).
+  rewrite <- (Int64.lt_sub_overflow x y).
+  unfold Int64.sub_overflow, Int64.negative.
+  set (s := Int64.signed x - Int64.signed y - Int64.signed Int64.zero).
+  destruct (zle Int64.min_signed s && zle s Int64.max_signed);
+  destruct (Int64.lt (Int64.sub x y) Int64.zero);
+  auto.
+  destruct (Int64.lt x y); auto.
+Qed.
+
+Lemma eval_testcond_compare_slong: forall c v1 v2 b rs,
+  Val.cmpl_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_signed_cmp c) (compare_long rs v1 v2) = Some b.
+Proof.
+  intros. generalize (compare_long_spec rs v1 v2). 
+  set (rs' := compare_long rs v1 v2). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+  destruct v1; try discriminate; destruct v2; try discriminate.
+  simpl in H; inv H.
+  unfold Val.mxcmplu; simpl. destruct c; simpl.
+  - destruct (Int64.eq i i0); auto.
+  - destruct (Int64.eq i i0); auto.
+  - rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
+  - rewrite int64_sub_overflow, Int64.not_lt.
+    destruct (Int64.eq i i0), (Int64.lt i i0); auto.
+  - rewrite int64_sub_overflow, (Int64.lt_not i). 
+    destruct (Int64.eq i i0), (Int64.lt i i0); auto.
+  - rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
+Qed.
+
+Lemma eval_testcond_compare_ulong: forall c v1 v2 b rs,
+  Val.mxcmplu_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_unsigned_cmp c) (compare_long rs v1 v2) = Some b.
+Proof.
+  intros. generalize (compare_long_spec rs v1 v2). 
+  set (rs' := compare_long rs v1 v2). intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E; unfold Val.mxcmplu.
+  destruct v1; try discriminate; destruct v2; try discriminate; simpl in H.
+  - (* int-int *)
+    inv H. destruct c; simpl.
+    + destruct (Int64.eq i i0); auto.
+    + destruct (Int64.eq i i0); auto.
+    + destruct (Int64.ltu i i0); auto.
+    + rewrite (Int64.not_ltu i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
+    + rewrite (Int64.ltu_not i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
+    + destruct (Int64.ltu i i0); auto.
+  - (* int-ptr *)
+    simpl.
+    destruct (Archi.ptr64); simpl; try discriminate.
+    destruct (Int64.eq i Int64.zero); simpl; try discriminate.
+    destruct c; simpl in H; inv H; reflexivity.
+  - (* ptr-int *)
+    simpl.
+    destruct (Archi.ptr64); simpl; try discriminate.
+    destruct (Int64.eq i0 Int64.zero); try discriminate.
+    destruct c; simpl in H; inv H; reflexivity.
+  - (* ptr-ptr *)
+    simpl. 
+    destruct (eq_block b0 b1).
+    destruct (Archi.ptr64); simpl; try discriminate.
+    inv H.
+    destruct c; simpl.
+    * destruct (Ptrofs.eq i i0); auto.
+    * destruct (Ptrofs.eq i i0); auto.
+    * destruct (Ptrofs.ltu i i0); auto.
+    * rewrite (Ptrofs.not_ltu i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
+    * rewrite (Ptrofs.ltu_not i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
+    * destruct (Ptrofs.ltu i i0); auto.
+    * destruct c; simpl in H; inv H; reflexivity.
+Qed.
+
+Lemma compare_float_spec: forall rs f1 f2,
+  let rs' := compare_float rs (Vfloat f1) (Vfloat f2) in
+     rs'#CN = (Val.of_bool (Float.cmp Clt f1 f2))
+  /\ rs'#CZ = (Val.of_bool (Float.cmp Ceq f1 f2))
+  /\ rs'#CC = (Val.of_bool (negb (Float.cmp Clt f1 f2)))
+  /\ rs'#CV = (Val.of_bool (negb (Float.ordered f1 f2))).
+Proof.
+  intros; auto.
+Qed.
+
+Lemma eval_testcond_compare_float: forall c v1 v2 b rs,
+  Val.cmpf_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_float_cmp c) (compare_float rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. 
+  generalize (compare_float_spec rs f f0). 
+  set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float.cmp Float.ordered.
+  unfold Float.cmp, Float.ordered;
+  destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma eval_testcond_compare_not_float: forall c v1 v2 b rs,
+  option_map negb (Val.cmpf_bool c v1 v2) = Some b ->
+  eval_testcond (cond_for_float_not_cmp c) (compare_float rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+  generalize (compare_float_spec rs f f0). 
+  set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float.cmp Float.ordered.
+  unfold Float.cmp, Float.ordered;
+  destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma compare_single_spec: forall rs f1 f2,
+  let rs' := compare_single rs (Vsingle f1) (Vsingle f2) in
+     rs'#CN = (Val.of_bool (Float32.cmp Clt f1 f2))
+  /\ rs'#CZ = (Val.of_bool (Float32.cmp Ceq f1 f2))
+  /\ rs'#CC = (Val.of_bool (negb (Float32.cmp Clt f1 f2)))
+  /\ rs'#CV = (Val.of_bool (negb (Float32.ordered f1 f2))).
+Proof.
+  intros; auto.
+Qed.
+
+Lemma eval_testcond_compare_single: forall c v1 v2 b rs,
+  Val.cmpfs_bool c v1 v2 = Some b ->
+  eval_testcond (cond_for_float_cmp c) (compare_single rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. 
+  generalize (compare_single_spec rs f f0). 
+  set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float32.cmp Float32.ordered.
+  unfold Float32.cmp, Float32.ordered;
+  destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma eval_testcond_compare_not_single: forall c v1 v2 b rs,
+  option_map negb (Val.cmpfs_bool c v1 v2) = Some b ->
+  eval_testcond (cond_for_float_not_cmp c) (compare_single rs v1 v2) = Some b.
+Proof.
+  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+  generalize (compare_single_spec rs f f0). 
+  set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
+  intros (B & C & D & E).
+  unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float32.cmp Float32.ordered.
+  unfold Float32.cmp, Float32.ordered;
+  destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Remark compare_float_inv: forall rs v1 v2 r,
+  match r with CR _ => False | _ => True end ->
+  (compare_float rs v1 v2)#r = rs#r.
+Proof.
+  intros; unfold compare_float.
+  destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
+Qed.
+
+Remark compare_single_inv: forall rs v1 v2 r,
+  match r with CR _ => False | _ => True end ->
+  (compare_single rs v1 v2)#r = rs#r.
+Proof.
+  intros; unfold compare_single.
+  destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
+Qed.
+
+Lemma transl_cond_correct:
+  forall cond args k c rs m,
+  transl_cond cond args k = OK c ->
+  exists rs',
+     exec_straight ge lk c rs m k rs' m
+  /\ (forall b,
+      eval_condition cond (map rs (map preg_of args)) m = Some b ->
+      eval_testcond (cond_for_cond cond) rs' = Some b)
+  /\ forall r, data_preg r = true -> rs'#r = rs#r.
+Proof.
+  intros until m; intros TR. destruct cond; simpl in TR; ArgsInv.
+  - (* Ccomp *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_sint; auto. 
+    destruct r; reflexivity || discriminate.
+  - (* Ccompu *)
+    econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+    split; intros. apply eval_testcond_compare_uint; auto. 
+    destruct r; reflexivity || discriminate.
+  - (* Ccompimm *)
+    destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
+    + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+      split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. 
+      destruct r; reflexivity || discriminate.
+    + econstructor; split.
+      apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
+      split; intros. apply eval_testcond_compare_sint; auto. 
+      destruct r; reflexivity || discriminate.
+    + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. apply exec_straight_one.
+      simpl. rewrite B, C by eauto with asmgen. eauto.
+      split; intros. apply eval_testcond_compare_sint; auto. 
+      transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  - (* Ccompuimm *)
+    destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
+    + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+      split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. 
+      destruct r; reflexivity || discriminate.
+    + econstructor; split.
+      apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
+      split; intros. apply eval_testcond_compare_uint; auto. 
+      destruct r; reflexivity || discriminate.
+    + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. apply exec_straight_one.
+      simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+      split; intros. apply eval_testcond_compare_uint; auto. 
+      transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  - (* Ccompshift *)
+    econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+    split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. 
+    destruct r; reflexivity || discriminate.
+  - (* Ccompushift *)
+    econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+    split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. 
+    destruct r; reflexivity || discriminate.
+  - (* Cmaskzero *)
+    destruct (is_logical_imm32 n).
+    + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+      split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto.
+      destruct r; reflexivity || discriminate.
+    + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A.
+      apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+      split; intros. apply (eval_testcond_compare_sint Ceq); auto.
+      transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  - (* Cmasknotzero *)
+    destruct (is_logical_imm32 n).
+    + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+      split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto.
+      destruct r; reflexivity || discriminate.
+    + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A.
+      apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+      split; intros. apply (eval_testcond_compare_sint Cne); auto.
+      transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  - (* Ccompl *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_slong; auto. 
+    destruct r; reflexivity || discriminate.
+  - (* Ccomplu *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_ulong; auto.
+    erewrite Val.mxcmplu_bool_correct; eauto.
+    destruct r; reflexivity || discriminate.
+  - (* Ccomplimm *)
+    destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
+    + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+      split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. 
+      destruct r; reflexivity || discriminate.
+    + econstructor; split.
+      apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto.
+      split; intros. apply eval_testcond_compare_slong; auto. 
+      destruct r; reflexivity || discriminate.
+    + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. apply exec_straight_one.
+      simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+      split; intros. apply eval_testcond_compare_slong; auto. 
+      transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  - (* Ccompluimm *)
+    destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
+    + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+      split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto.
+      erewrite Val.mxcmplu_bool_correct; eauto.
+      destruct r; reflexivity || discriminate.
+    + econstructor; split.
+      apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto.
+      split; intros. apply eval_testcond_compare_ulong; auto.
+      erewrite Val.mxcmplu_bool_correct; eauto.
+      destruct r; reflexivity || discriminate.
+    + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. apply exec_straight_one.
+      simpl. rewrite B, C by eauto with asmgen. eauto.
+      split; intros. apply eval_testcond_compare_ulong; auto.
+      erewrite Val.mxcmplu_bool_correct; eauto. 
+      transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  - (* Ccomplshift *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. 
+    destruct r; reflexivity || discriminate.
+  - (* Ccomplushift *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto.
+    erewrite Val.mxcmplu_bool_correct; eauto. 
+    destruct r; reflexivity || discriminate.
+  - (* Cmasklzero *)
+    destruct (is_logical_imm64 n).
+    + econstructor; split. apply exec_straight_one. simpl; eauto.
+      split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto.
+      destruct r; reflexivity || discriminate.
+    + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A.
+      apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto.
+      split; intros. apply (eval_testcond_compare_slong Ceq); auto.
+      transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  - (* Cmasknotzero *)
+    destruct (is_logical_imm64 n).
+    + econstructor; split. apply exec_straight_one. simpl; eauto.
+      split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto.
+      destruct r; reflexivity || discriminate.
+    + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A.
+      apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto.
+      split; intros. apply (eval_testcond_compare_slong Cne); auto.
+      transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+  - (* Ccompf *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_float; auto.
+    destruct r; discriminate || rewrite compare_float_inv; auto.
+  - (* Cnotcompf *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_not_float; auto.
+    destruct r; discriminate || rewrite compare_float_inv; auto.
+  - (* Ccompfzero *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_float; auto.
+    destruct r; discriminate || rewrite compare_float_inv; auto.
+  - (* Cnotcompfzero *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_not_float; auto.
+    destruct r; discriminate || rewrite compare_float_inv; auto.
+  - (* Ccompfs *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_single; auto.
+    destruct r; discriminate || rewrite compare_single_inv; auto.
+  - (* Cnotcompfs *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_not_single; auto.
+    destruct r; discriminate || rewrite compare_single_inv; auto.
+  - (* Ccompfszero *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_single; auto.
+    destruct r; discriminate || rewrite compare_single_inv; auto.
+  - (* Cnotcompfszero *)
+    econstructor; split. apply exec_straight_one. simpl; eauto.
+    split; intros. apply eval_testcond_compare_not_single; auto.
+    destruct r; discriminate || rewrite compare_single_inv; auto.
+Qed.
+
+Lemma transl_cond_correct':
+  forall cond args k c tbb rs m,
+  transl_cond cond args k = OK c ->
+  exists rs',
+     exec_straight ge lk c rs m k rs' m
+  /\ (forall b,
+      eval_condition cond (map rs (map preg_of args)) m = Some b ->
+      eval_testcond (cond_for_cond cond) (incrPC (Ptrofs.repr (size tbb)) rs') = Some b)
+  /\ forall r, data_preg r = true -> rs'#r = rs#r.
+Proof.
+  intros until m; intros TR.
+  eapply transl_cond_correct; eauto.
+Qed.
+  
+Lemma transl_cbranch_correct_1:
+  forall cond args lbl c k b m rs tbb bdy,
+  transl_cond_branch cond args lbl k = OK (bdy,c) ->
+  eval_condition cond (map rs (map preg_of args)) m = Some b ->
+  exists rs',
+     exec_straight_opt ge lk bdy rs m k rs' m
+  /\ exec_cfi ge fn c (incrPC (Ptrofs.repr (size tbb)) rs') m =
+        (if b then goto_label fn lbl (incrPC (Ptrofs.repr (size tbb)) rs') m else
+        Next (incrPC (Ptrofs.repr (size tbb)) rs') m)
+  /\ forall r, data_preg r = true -> rs'#r = rs#r.
+Proof.
+intros until bdy; intros TR EV.
+  assert (Archi.ptr64 = true) as SF; auto.
+  assert (DFL:
+    transl_cond_branch_default cond args lbl k = OK (bdy,c) ->
+    exists rs',
+       exec_straight_opt ge lk bdy rs m k rs' m
+    /\ exec_cfi ge fn c (incrPC (Ptrofs.repr (size tbb)) rs') m = eval_branch fn lbl (incrPC (Ptrofs.repr (size tbb)) rs') m (Some b)
+    /\ forall r, data_preg r = true -> rs'#r = rs#r).
+  {
+    unfold transl_cond_branch_default; intros. monadInv H.
+    exploit transl_cond_correct'; eauto. intros (rs' & A & B & C).
+    eexists; split.
+    apply exec_straight_opt_intro. eexact A.
+    split; auto. simpl. rewrite (B b) by auto. auto. 
+  }
+Local Opaque transl_cond transl_cond_branch_default.
+  destruct args as [ | a1 args]; simpl in TR; auto.
+  destruct args as [ | a2 args]; simpl in TR; auto.
+  destruct cond; simpl in TR; auto.
+  - (* Ccompimm *)
+    destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto; 
+    apply Int.same_if_eq in N0; subst n; ArgsInv.
+    + (* Ccompimm Cne 0 *)
+      econstructor; split. econstructor.
+      split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV.
+      unfold incrPC. Simpl. rewrite EQRSX. simpl. auto.
+    + (* Ccompimm Ceq 0 *)
+      econstructor; split. econstructor.
+      split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. simpl.
+      unfold incrPC. Simpl. rewrite EQRSX. simpl.
+      destruct (Int.eq i Int.zero); auto.
+  - (* Ccompuimm *)
+    destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto.
+    apply Int.same_if_eq in N0; subst n; ArgsInv.
+    + (* Ccompuimm Cne 0 *)
+      econstructor; split. econstructor.
+      split; auto. simpl. apply Val.mxcmpu_bool_correct in EV.
+      unfold incrPC. Simpl. rewrite EV. auto.
+    + (* Ccompuimm Ceq 0 *)
+      monadInv TR. ArgsInv. simpl in *.
+      econstructor; split. econstructor.
+      split; auto. simpl. unfold incrPC. Simpl.
+      apply Int.same_if_eq in N0; subst.
+      rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Cne), EV.
+      destruct b; auto.
+  - (* Cmaskzero *)
+    destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
+    econstructor; split. econstructor.
+    split; auto. simpl.
+    erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
+    unfold incrPC. Simpl.
+    rewrite (Val.negate_cmp_bool Ceq), EV. destruct b; auto.
+  - (* Cmasknotzero *)
+    destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
+    econstructor; split. econstructor.
+    split; auto. simpl.
+    erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
+    unfold incrPC. Simpl.
+    rewrite EV. auto.
+  - (* Ccomplimm *)
+    destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto; 
+    apply Int64.same_if_eq in N0; subst n; ArgsInv.
+    + (* Ccomplimm Cne 0 *)
+      econstructor; split. econstructor.
+      split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV.
+      unfold incrPC. Simpl. rewrite EQRSX. simpl. auto.
+    + (* Ccomplimm Ceq 0 *)
+      econstructor; split. econstructor.
+      split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. simpl.
+      unfold incrPC. Simpl. rewrite EQRSX. simpl.
+      destruct (Int64.eq i Int64.zero); auto.
+  - (* Ccompluimm *)
+    destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto;
+    apply Int64.same_if_eq in N0; subst n; ArgsInv.
+    + (* Ccompluimm Cne 0 *)
+      econstructor; split. econstructor.
+      split; auto. simpl. apply Val.mxcmplu_bool_correct in EV.
+      unfold incrPC. Simpl. rewrite EV. auto.
+    + (* Ccompluimm Ceq 0 *)
+      econstructor; split. econstructor.
+      split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. simpl.
+      unfold incrPC. Simpl. rewrite EQRSX. simpl.
+      destruct (Int64.eq i Int64.zero); auto.
+      unfold incrPC. Simpl. rewrite EQRSX. simpl.
+      rewrite SF in *; simpl in *.
+      rewrite Int64.eq_true in *.
+      destruct ((Mem.valid_pointer m b0 (Ptrofs.unsigned i) || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1))); simpl in *.
+      assert (b = true). { destruct b; try congruence. }
+      rewrite H; auto. discriminate.
+  - (* Cmasklzero *)
+    destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
+    econstructor; split. econstructor.
+    split; auto.
+    erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
+    unfold incrPC. Simpl.
+    rewrite (Val.negate_cmpl_bool Ceq), EV. destruct b; auto.
+  - (* Cmasklnotzero *)
+    destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
+    econstructor; split. econstructor.
+    split; auto.
+    erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
+    unfold incrPC. Simpl.
+    rewrite EV. auto.
+Qed.
+
+Lemma transl_op_correct:
+  forall op args res k (rs: regset) m v c,
+  transl_op op args res k = OK c ->
+  eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v ->
+  exists rs',
+     exec_straight ge lk c rs m k rs' m
+  /\ Val.lessdef v rs'#(preg_of res)
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
+Proof.
+  (* assert (SAME: forall v1 v2, v1 = v2 -> Val.lessdef v2 v1). { intros; subst; auto. } *)
+Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize.
+  intros until c; intros TR EV.
+  unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl;
+  try (rewrite <- transl_eval_shift; TranslOpSimpl).
+  - (* move *)
+    destruct (preg_of res), (preg_of m0); try destruct d; try destruct d0; inv TR; TranslOpSimpl.
+  - (* intconst *)
+    exploit exec_loadimm32. intros (rs' & A & B & C).
+    exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen.
+  - (* longconst *)
+    exploit exec_loadimm64. intros (rs' & A & B & C).
+    exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen.
+  - (* floatconst *)
+    destruct (Float.eq_dec n Float.zero).
+    + subst n. TranslOpSimpl. 
+    + TranslOpSimplN.
+  - (* singleconst *)
+    destruct (Float32.eq_dec n Float32.zero).
+    + subst n. TranslOpSimpl. 
+    + TranslOpSimplN.
+  - (* loadsymbol *)
+    exploit (exec_loadsymbol x id ofs). eauto with asmgen. intros (rs' & A & B & C).
+    exists rs'; split. eexact A. split. rewrite B; auto. auto.
+  - (* addrstack *)
+    exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)); try discriminate. simpl; eauto with asmgen.
+    intros (rs' & A & B & C).
+    exists rs'; split. eexact A. split. replace (DR XSP) with (SP) in B by auto. rewrite B.
+  Local Transparent Val.addl.
+    destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
+    auto.
+  - (* shift *)
+    rewrite <- transl_eval_shift'. TranslOpSimpl.
+  - (* addimm *)
+    exploit (exec_addimm32 x x0 n). eauto with asmgen. intros (rs' & A & B & C).
+    exists rs'; split. eexact A. split. rewrite B; auto. auto.
+  - (* mul *)
+    TranslOpBase.
+  Local Transparent Val.add.
+    destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto.
+  - (* andimm *)
+    exploit (exec_logicalimm32 (Pandimm W) (Pand W)).
+    intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+    intros (rs' & A & B & C). 
+    exists rs'; split. eexact A. split. rewrite B; auto. auto.
+  - (* orimm *)
+    exploit (exec_logicalimm32 (Porrimm W) (Porr W)). 
+    intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+    intros (rs' & A & B & C). 
+    exists rs'; split. eexact A. split. rewrite B; auto. auto.
+  - (* xorimm *)
+    exploit (exec_logicalimm32 (Peorimm W) (Peor W)). 
+    intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+    intros (rs' & A & B & C). 
+    exists rs'; split. eexact A. split. rewrite B; auto. auto.
+  - (* not *)
+    TranslOpBase.
+    destruct (rs x0); auto. simpl. rewrite Int.or_zero_l; auto. 
+  - (* notshift *)
+    TranslOpBase.
+    destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto. 
+  - (* shrx *)
+    assert (Val.maketotal (Val.shrx (rs x0) (Vint n)) = Val.maketotal (Val.shrx (rs x0) (Vint n))) by eauto.
+    destruct (Val.shrx) eqn:E.
+    + exploit (exec_shrx32 x x0 n); eauto with asmgen. intros (rs' & A & B & C).
+      econstructor; split. eexact A. split. rewrite B; auto. auto.
+    + exploit (exec_shrx32_none x x0 n); eauto with asmgen.
+  - (* zero-ext *)
+    TranslOpBase.
+    destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
+  - (* sign-ext *)
+    TranslOpBase.
+    destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
+  - (* shlzext *)
+    TranslOpBase.
+    destruct (rs x0); simpl; auto. rewrite <- Int.shl_zero_ext_min; auto using a32_range.
+  - (* shlsext *)
+    TranslOpBase.
+    destruct (rs x0); simpl; auto. rewrite <- Int.shl_sign_ext_min; auto using a32_range.
+  - (* zextshr *)
+    TranslOpBase.
+    destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.zero_ext_shru_min; auto using a32_range.
+  - (* sextshr *)
+    TranslOpBase.
+    destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.sign_ext_shr_min; auto using a32_range.
+  - (* shiftl *)
+    rewrite <- transl_eval_shiftl'. TranslOpSimpl.
+  - (* extend *)
+    exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C).
+    econstructor; split. eexact A. 
+    split. rewrite B; auto. eauto with asmgen.
+  - (* addlshift *)
+    TranslOpBase.
+  - (* addext *)
+    exploit (exec_arith_extended Val.addl Paddext (Padd X)).
+    auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C).
+    econstructor; split. eexact A. split. rewrite B; auto. auto.
+  - (* addlimm *)
+    exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence.
+    intros (rs' & A & B & C).
+    exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto.
+  - (* neglshift *)
+    TranslOpBase.
+  - (* sublshift *)
+    TranslOpBase.
+  - (* subext *)
+    exploit (exec_arith_extended Val.subl Psubext (Psub X)).
+    auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C).
+    econstructor; split. eexact A. split. rewrite B; auto. auto.
+  - (* mull *)
+    TranslOpBase.
+    destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto.
+  - (* andlshift *)
+    TranslOpBase.
+  - (* andlimm *)
+    exploit (exec_logicalimm64 (Pandimm X) (Pand X)). 
+    intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+    intros (rs' & A & B & C). 
+    exists rs'; split. eexact A. split. rewrite B; auto. auto.
+  - (* orlshift *)
+    TranslOpBase.
+  - (* orlimm *)
+    exploit (exec_logicalimm64 (Porrimm X) (Porr X)). 
+    intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+    intros (rs' & A & B & C). 
+    exists rs'; split. eexact A. split. rewrite B; auto. auto.
+  - (* orlshift *)
+    TranslOpBase.
+  - (* xorlimm *)
+    exploit (exec_logicalimm64 (Peorimm X) (Peor X)). 
+    intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+    intros (rs' & A & B & C). 
+    exists rs'; split. eexact A. split. rewrite B; auto. auto.
+  - (* notl *)
+    TranslOpBase.
+    destruct (rs x0); auto. simpl. rewrite Int64.or_zero_l; auto.
+  - (* notlshift *)
+    TranslOpBase.
+    destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
+  - (* biclshift *)
+    TranslOpBase.
+  - (* ornlshift *)
+    TranslOpBase.
+  - (* eqvlshift *)
+    TranslOpBase.
+  - (* shrx *)
+    assert (Val.maketotal (Val.shrxl (rs x0) (Vint n)) = Val.maketotal (Val.shrxl (rs x0) (Vint n))) by eauto.
+    destruct (Val.shrxl) eqn:E.
+    + exploit (exec_shrx64 x x0 n); eauto with asmgen. intros (rs' & A & B & C).
+      econstructor; split. eexact A. split. rewrite B; auto. auto.
+    + exploit (exec_shrx64_none x x0 n); eauto with asmgen.
+  - (* zero-ext-l *)
+    TranslOpBase.
+    destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
+  - (* sign-ext-l *)
+    TranslOpBase.
+    destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
+  - (* shllzext *)
+    TranslOpBase.
+    destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_zero_ext_min; auto using a64_range.
+  - (* shllsext *)
+    TranslOpBase.
+    destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_sign_ext_min; auto using a64_range.
+  - (* zextshrl *)
+    TranslOpBase.
+    destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.zero_ext_shru'_min; auto using a64_range.
+  - (* sextshrl *)
+    TranslOpBase.
+    destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range.
+  - (* condition *)
+    exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+    econstructor; split.
+    eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
+    split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+    rewrite (B b) by auto. auto. 
+    auto.
+    intros; Simpl.
+  - (* select *)
+    destruct (preg_of res) as [[ir|fr]|cr|] eqn:RES; monadInv TR.
+    + (* integer *)
+      generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
+      exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto.
+      split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+      rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
+      rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
+      auto.
+      intros; Simpl.
+    + (* FP *)
+      generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
+      exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+      econstructor; split.
+      eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto.
+      split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+      rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
+      rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
+      auto.
+      intros; Simpl.
+Qed.
+
+(** Translation of addressing modes, loads, stores *)
+
+Lemma transl_addressing_correct:
+  forall sz addr args (insn: Asm.addressing -> basic) k (rs: regset) m c b o,
+  transl_addressing sz addr args insn k = OK c ->
+  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some (Vptr b o) ->
+  exists ad rs',
+     exec_straight_opt ge lk c rs m (insn ad :: k) rs' m
+  /\ eval_addressing lk ad rs' = Vptr b o
+  /\ forall r, data_preg r = true -> rs' r = rs r.
+Proof.
+  intros until o; intros TR EV.
+  unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV.
+  - (* Aindexed *)
+    destruct (offset_representable sz ofs); inv EQ0.
+    + econstructor; econstructor; split. apply exec_straight_opt_refl.
+      auto.
+    + exploit (exec_loadimm64 X16 ofs). intros (rs' & A & B & C).
+      econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A.
+      split. simpl. rewrite B, C by eauto with asmgen. auto.
+      eauto with asmgen.
+  - (* Aindexed2 *)
+    econstructor; econstructor; split. apply exec_straight_opt_refl.
+    auto.
+  - (* Aindexed2shift *)
+    destruct (Int.eq a Int.zero) eqn:E; [|destruct (Int.eq (Int.shl Int.one a) (Int.repr sz))]; inv EQ2.
+    + apply Int.same_if_eq in E. rewrite E.
+      econstructor; econstructor; split. apply exec_straight_opt_refl.
+      split; auto. simpl.
+      rewrite Val.addl_commut in H0. destruct (rs x0); try discriminate.
+      unfold Val.shll. rewrite Int64.shl'_zero. auto.
+    + econstructor; econstructor; split. apply exec_straight_opt_refl.
+      auto. 
+    + econstructor; econstructor; split.
+      apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto.
+      split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto.
+      intros; Simpl.
+  - (* Aindexed2ext *)
+    destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2.
+    + econstructor; econstructor; split. apply exec_straight_opt_refl.
+      split; auto. destruct x; auto.
+    + exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto.
+      instantiate (1 := x0). eauto with asmgen.
+      intros (rs' & A & B & C).
+      econstructor; exists rs'; split.
+      apply exec_straight_opt_intro. eexact A. 
+      split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal.
+      unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range;
+      simpl; rewrite Int64.add_zero; auto.
+      intros. apply C; eauto with asmgen.
+  - (* Aglobal *)
+    destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR.
+    + econstructor; econstructor; split.
+      apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto.
+      split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence.
+      intros; Simpl.
+    + exploit (exec_loadsymbol X16 id ofs). auto. intros (rs' & A & B & C).
+      econstructor; exists rs'; split.
+      apply exec_straight_opt_intro. eexact A.
+      split. simpl. 
+      rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto. 
+      simpl in EV. congruence. 
+      auto with asmgen.
+  - (* Ainstrack *)
+    assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o).
+    { simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl. 
+      rewrite Ptrofs.of_int64_to_int64 by auto. auto. }   
+    destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR.
+    + econstructor; econstructor; split. apply exec_straight_opt_refl.
+      auto.
+    + exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). intros (rs' & A & B & C).
+      econstructor; exists rs'; split.
+      apply exec_straight_opt_intro. eexact A.
+      split. simpl. rewrite B, C by eauto with asmgen. auto.
+      auto with asmgen.
+Qed.
+
+Lemma transl_load_correct:
+  forall chunk addr args dst k c (rs: regset) m vaddr v,
+  transl_load chunk addr args dst k = OK c ->
+  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
+  Mem.loadv chunk m vaddr = Some v ->
+  exists rs',
+     exec_straight ge lk c rs m k rs' m
+  /\ rs'#(preg_of dst) = v
+  /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r.
+Proof.
+  intros. destruct vaddr; try discriminate.
+  assert (A: exists sz insn,
+                transl_addressing sz addr args insn k = OK c
+             /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m =
+                              exec_load_rd_a lk chunk (fun v => v) ad (dreg_of dst) rs' m)).
+  {
+    destruct chunk; monadInv H;
+    try rewrite (ireg_of_eq' _ _ EQ); try rewrite (freg_of_eq' _ _ EQ);
+    do 2 econstructor; (split; [eassumption|auto]).
+  }
+  destruct A as (sz & insn & B & C).
+  exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R).
+  assert (X: exec_load_rd_a lk chunk (fun v => v) ad (dreg_of dst) rs' m =
+             Next (rs'#(preg_of dst) <- v) m).
+  { unfold exec_load_rd_a. rewrite Q, H1. auto. }
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact P.
+  apply exec_straight_one. rewrite C, X; eauto. Simpl. 
+  split. auto. intros; Simpl.
+Qed.
+
+Lemma transl_store_correct:
+  forall chunk addr args src k c (rs: regset) m vaddr m',
+  transl_store chunk addr args src k = OK c ->
+  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
+  Mem.storev chunk m vaddr rs#(preg_of src) = Some m' ->
+  exists rs',
+     exec_straight ge lk c rs m k rs' m'
+  /\ forall r, data_preg r = true -> rs' r = rs r.
+Proof.
+  intros. destruct vaddr; try discriminate. 
+  set (chunk' := match chunk with Mint8signed => Mint8unsigned
+                                | Mint16signed => Mint16unsigned
+                                | _ => chunk end).
+  assert (A: exists sz insn,
+                transl_addressing sz addr args insn k = OK c
+             /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m =
+                              exec_store_rs_a lk chunk' ad rs'#(preg_of src) rs' m)).
+  {
+    unfold chunk'; destruct chunk; monadInv H;
+    try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ);
+    do 2 econstructor; (split; [eassumption|auto]).
+  }
+  destruct A as (sz & insn & B & C).
+  exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R).
+  assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m').
+  { rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry.
+    apply Mem.store_signed_unsigned_8.
+    apply Mem.store_signed_unsigned_16. }
+  assert (Y: exec_store_rs_a lk chunk' ad rs'#(preg_of src) rs' m =
+             Next rs' m').
+  { unfold exec_store_rs_a. rewrite Q, R, X by auto with asmgen. auto. }
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact P.
+  apply exec_straight_one. rewrite C, Y; eauto. 
+  intros; Simpl. rewrite R; auto.
+Qed.
+
+(** Memory accesses *)
+
+Lemma indexed_memory_access_correct: forall insn sz (base: iregsp) ofs k (rs: regset) m b i,
+  preg_of_iregsp base <> X16 ->
+  Val.offset_ptr rs#base ofs = Vptr b i ->
+  exists ad rs',
+     exec_straight_opt ge lk (indexed_memory_access_bc insn sz base ofs k) rs m (insn ad :: k) rs' m
+  /\ eval_addressing lk ad rs' = Vptr b i
+  /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r.
+Proof.
+  unfold indexed_memory_access_bc; intros.
+  assert (Val.addl rs#base (Vlong (Ptrofs.to_int64 ofs)) = Vptr b i).
+  { destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. }
+  destruct offset_representable.
+  - econstructor; econstructor; split. apply exec_straight_opt_refl. auto. 
+  - exploit (exec_loadimm64 X16); eauto. intros (rs' & A & B & C).
+    econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A.
+    split. simpl. rewrite B, C; eauto; try discriminate.
+    unfold preg_of_iregsp in H. destruct base; auto. auto.
+Qed.
+
+Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset),
+  Mem.loadv Mint64 m (Val.offset_ptr rs#base ofs) = Some v ->
+  preg_of_iregsp base <> IR X16 ->
+  exists rs',
+     exec_straight ge lk (loadptr_bc base ofs dst k) rs m k rs' m
+  /\ rs'#dst = v
+  /\ forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. simpl. unfold exec_load_rd_a. rewrite B, H. eauto.
+  split. Simpl. intros; Simpl.
+Qed.
+
+Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset),
+  Mem.storev Mint64 m (Val.offset_ptr rs#base ofs) rs#src = Some m' ->
+  preg_of_iregsp base <> IR X16 ->
+  (DR (IR (RR1 src))) <> (DR (IR (RR1 X16))) ->
+  exists rs',
+     exec_straight ge lk (storeptr_bc src base ofs k) rs m k rs' m'
+  /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. simpl. unfold exec_store_rs_a. rewrite B, C, H. eauto.
+  discriminate. auto.
+  intros; Simpl. rewrite C; auto.
+Qed.
+
+Lemma loadind_correct:
+  forall (base: iregsp) ofs ty dst k c (rs: regset) m v,
+  loadind base ofs ty dst k = OK c ->
+  Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v ->
+  preg_of_iregsp base <> IR X16 ->
+  exists rs',
+     exec_straight ge lk c rs m k rs' m
+  /\ rs'#(preg_of dst) = v
+  /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
+Proof.
+  intros.
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  assert (X: exists sz (insn: addressing -> ld_instruction),
+                c = indexed_memory_access_bc insn sz base ofs k
+             /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m =
+                              exec_load_rd_a lk (chunk_of_type ty) (fun v => v) ad (dreg_of dst) rs' m)).
+  {
+    unfold loadind in H; destruct ty; destruct (dst); inv H;
+    do 2 econstructor; split; eauto.
+  }
+  destruct X as (sz & insn & EQ & SEM). subst c.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. rewrite SEM. unfold exec_load.
+  unfold exec_load_rd_a. rewrite B, H0. eauto. Simpl.
+  split. auto. intros; Simpl.
+Qed.
+
+Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m',
+  storeind src base ofs ty k = OK c ->
+  Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' ->
+  preg_of_iregsp base <> IR X16 ->
+  exists rs',
+     exec_straight ge lk c rs m k rs' m'
+  /\ forall r, data_preg r = true -> rs' r = rs r.
+Proof.
+  intros. 
+  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+  assert (X: exists sz (insn: addressing -> st_instruction),
+                c = indexed_memory_access_bc insn sz base ofs k
+             /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m =
+                              exec_store_rs_a lk (chunk_of_type ty) ad rs'#(preg_of src) rs' m)).
+  {
+    unfold storeind in H; destruct ty; destruct (preg_of src) as [[ir|fr]|cr|]; inv H; do 2 econstructor; split; eauto.
+  }
+  destruct X as (sz & insn & EQ & SEM). subst c.
+  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A.
+  apply exec_straight_one. rewrite SEM.
+  unfold exec_store. unfold exec_store_rs_a.
+  rewrite B, C, H0 by eauto with asmgen. eauto.
+  intros; Simpl. unfold data_preg in H2. destruct r as [[[ir|]|fr]|cr|].
+  all: rewrite C; auto; try discriminate;
+       destruct ir; try discriminate.
+Qed.
+
+Lemma make_epilogue_correct:
+  forall ge0 f m stk soff cs m' ms rs tm,
+  Mach.load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) ->
+  Mach.load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) ->
+  Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+  agree ms (Vptr stk soff) rs ->
+  Mem.extends m tm ->
+  match_stack ge0 cs ->
+  exists rs', exists tm',
+     exec_straight ge lk (make_epilogue f) rs tm nil rs' tm'
+  /\ agree ms (parent_sp cs) rs'
+  /\ Mem.extends m' tm'
+  /\ rs'#RA = parent_ra cs
+  /\ rs'#SP = parent_sp cs
+  /\ (forall r, r <> PC -> r <> SP -> r <> X30 -> r <> X16 -> rs'#r = rs#r).
+Proof.
+  assert (Archi.ptr64 = true) as SF; auto.
+  intros until tm; intros LP LRA FREE AG MEXT MCS.
+  exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
+  exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA').
+  exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
+  exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'.
+  exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
+  unfold make_epilogue.
+  rewrite chunk_of_Tptr in *. unfold Mptr in *. rewrite SF in *.
+
+  exploit (loadptr_correct XSP (fn_retaddr_ofs f)).
+    instantiate (2 := rs). simpl. 
+    replace (rs XSP) with (rs SP) by auto.
+    rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; discriminate.
+    
+    intros (rs1 & A1 & B1 & C1).
+  econstructor; econstructor; split.
+  eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl. 
+    simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. 
+    rewrite FREE'. eauto.
+  split. apply agree_set_other; auto.
+  apply agree_change_sp with (Vptr stk soff).
+  apply agree_exten with rs; auto. intros; apply C1; auto with asmgen.
+  eapply parent_sp_def; eauto.
+  split. auto.
+  split. Simpl. 
+  split. Simpl. 
+  intros. Simpl.
+Qed.
+
+End CONSTRUCTORS.
diff --git a/aarch64/Asmblockprops.v b/aarch64/Asmblockprops.v
new file mode 100644
index 00000000..38fbd6d3
--- /dev/null
+++ b/aarch64/Asmblockprops.v
@@ -0,0 +1,119 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(** Common definition and proofs on Asmblock required by various modules *)
+
+Require Import Coqlib.
+Require Import Integers.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Values.
+Require Import Asmblock.
+Require Import Axioms.
+
+Definition bblock_simu (lk: aarch64_linker) (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) :=
+  forall rs m rs' m' t,
+    exec_bblock lk ge f bb rs m t rs' m' -> exec_bblock lk ge f bb' rs m t rs' m'.
+    
+Definition bblock_simu_aux (lk: aarch64_linker) (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) :=
+  forall rs m,
+    bbstep lk ge f bb rs m <> Stuck ->
+    bbstep lk ge f bb rs m = bbstep lk ge f bb' rs m.
+
+Hint Extern 2 (_ <> _) => congruence: asmgen.
+
+Lemma preg_of_data:
+  forall r, data_preg (preg_of r) = true.
+Proof.
+  intros. destruct r; reflexivity.
+Qed.
+
+Lemma dreg_of_data:
+  forall r, data_preg (dreg_of r) = true.
+Proof.
+  intros. destruct r; reflexivity.
+Qed.
+Hint Resolve preg_of_data dreg_of_data: asmgen.
+
+Lemma data_diff:
+  forall r r',
+  data_preg r = true -> data_preg r' = false -> r <> r'.
+Proof.
+  congruence.
+Qed.
+Hint Resolve data_diff: asmgen.
+
+Lemma preg_of_not_PC:
+  forall r, preg_of r <> PC.
+Proof.
+  intros. apply data_diff; auto with asmgen.
+Qed.
+
+Lemma preg_of_not_SP:
+  forall r, preg_of r <> SP.
+Proof.
+  intros. unfold preg_of; destruct r; simpl; try discriminate.
+Qed.
+
+Ltac desif :=
+  repeat match goal with
+  | [ |- context[ if ?f then _ else _ ] ] => destruct f
+  end.
+  
+Ltac decomp :=
+  repeat match goal with
+  | [ |- context[ match (?rs ?r) with | _ => _ end ] ] => destruct (rs r)
+  end.
+
+Ltac Simplif :=
+  ((desif)
+  || (try unfold compare_long)
+  || (try unfold compare_int)
+  || (try unfold compare_float)
+  || (try unfold compare_single)
+  || decomp
+  || (rewrite Pregmap.gss)
+  || (rewrite Pregmap.gso by eauto with asmgen)
+  ); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+Section EPC.
+
+Variable lk: aarch64_linker.
+
+(* For Asmblockgenproof0 *)
+
+Theorem exec_basic_instr_pc:
+  forall ge b rs1 m1 rs2 m2,
+  exec_basic lk ge b rs1 m1 = Next rs2 m2 ->
+  rs2 PC = rs1 PC.
+Proof.
+  intros. destruct b; try destruct i; try destruct i; try destruct ld; try destruct ld;
+  try destruct st; try destruct st; try destruct a.
+  all: try (inv H; simpl in *; auto; Simpl).
+  all: try (try unfold exec_load_rd_a in H1; try destruct (Mem.loadv _ _ _); inv H1; Simpl).
+  all: try (try unfold exec_load_double in H0; destruct (Mem.loadv _ _ _); simpl in *;
+            try destruct (Mem.loadv _ _ _); simpl in *; inv H0; Simpl).
+  all: try (try unfold exec_store_rs_a in H0; try destruct (Mem.storev _ _ _); inv H0; auto; Simpl).
+  all: try (try unfold exec_store_double in H1; destruct (Mem.storev _ _ _); simpl in *;
+            try destruct (Mem.storev _ _ _); simpl in *; inv H1; auto; Simpl).
+  - (* alloc *)
+    destruct (Mem.alloc _ _ _); destruct (Mem.store _ _ _); inv H1; auto; Simpl.
+  - (* free *)
+    destruct (rs1 SP); try discriminate; destruct (Mem.free _ _ _ _); inv H1; Simpl.
+Qed.
+
+End EPC.
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index b0787d0a..8187e077 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -34,13 +34,13 @@ let _m1 = Z.of_sint (-1)
 (* Emit instruction sequences that set or offset a register by a constant. *)
 
 let expand_loadimm32 (dst: ireg) n =
-  List.iter emit (Asmgen.loadimm32 dst n [])
+  List.iter emit (Asmgen.Asmgen_expand.loadimm32 dst n [])
 
 let expand_addimm64 (dst: iregsp) (src: iregsp) n =
-  List.iter emit (Asmgen.addimm64 dst src n [])
+  List.iter emit (Asmgen.Asmgen_expand.addimm64 dst src n [])
 
 let expand_storeptr (src: ireg) (base: iregsp) ofs =
-  List.iter emit (Asmgen.storeptr src base ofs [])
+  List.iter emit (Asmgen.Asmgen_expand.storeptr src base ofs [])
 
 (* Handling of varargs *)
 
@@ -73,8 +73,8 @@ let save_parameter_registers ir fr =
   while !i < 8 do
     let pos = 8*16 + !i*8 in
     if !i land 1 = 0 then begin
-      emit (Pstp(int_param_regs.(!i), int_param_regs.(!i + 1),
-                 ADimm(XSP, Z.of_uint pos)));
+      emit (Pstpx(int_param_regs.(!i), int_param_regs.(!i + 1),
+                 Mint64, Mint64, ADimm(XSP, Z.of_uint pos)));
       i := !i + 2
     end else begin
       emit (Pstrx(int_param_regs.(!i), ADimm(XSP, Z.of_uint pos)));
@@ -132,9 +132,9 @@ let expand_builtin_va_start r =
 let expand_annot_val kind txt targ args res =
   emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none));
   match args, res with
-  | [BA(IR src)], BR(IR dst) ->
-     if dst <> src then emit (Pmov (RR1 dst, RR1 src))
-  | [BA(FR src)], BR(FR dst) ->
+  | [BA(DR(IR src))], BR(DR(IR dst)) ->
+     if dst <> src then emit (Pmov (dst, src))
+  | [BA(DR(FR src))], BR(DR(FR dst)) ->
      if dst <> src then emit (Pfmov (dst, src))
   | _, _ ->
      raise (Error "ill-formed __builtin_annot_val")
@@ -152,8 +152,8 @@ let offset_in_range ofs =
   
 let memcpy_small_arg sz arg tmp =
   match arg with
-  | BA (IR r) ->
-      (RR1 r, _0)
+  | BA (DR(IR r)) ->
+      (r, _0)
   | BA_addrstack ofs ->
       if offset_in_range ofs
       && offset_in_range (Ptrofs.add ofs (Ptrofs.repr (Z.of_uint sz)))
@@ -164,13 +164,13 @@ let memcpy_small_arg sz arg tmp =
 
 let expand_builtin_memcpy_small sz al src dst =
   let (tsrc, tdst) =
-    if dst <> BA (IR X17) then (X17, X29) else (X29, X17) in
+    if dst <> BA (DR(IR(RR1 X17))) then (X17, X29) else (X29, X17) in
   let (rsrc, osrc) = memcpy_small_arg sz src tsrc in
   let (rdst, odst) = memcpy_small_arg sz dst tdst in
   let rec copy osrc odst sz =
     if sz >= 16 then begin
-      emit (Pldp(X16, X30, ADimm(rsrc, osrc)));
-      emit (Pstp(X16, X30, ADimm(rdst, odst)));
+      emit (Pldpx(X16, X30, Mint64, Mint64, ADimm(rsrc, osrc)));
+      emit (Pstpx(X16, X30, Mint64, Mint64, ADimm(rdst, odst)));
       copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16)
     end
     else if sz >= 8 then begin
@@ -197,7 +197,7 @@ let expand_builtin_memcpy_small sz al src dst =
 
 let memcpy_big_arg arg tmp =
   match arg with
-  | BA (IR r) -> emit (Pmov(RR1 tmp, RR1 r))
+  | BA (DR(IR r)) -> emit (Pmov(RR1 tmp, r))
   | BA_addrstack ofs -> expand_addimm64 (RR1 tmp) XSP ofs
   | _ -> assert false
 
@@ -208,8 +208,8 @@ let expand_builtin_memcpy_big sz al src dst =
   let lbl = new_label () in
   expand_loadimm32 X15 (Z.of_uint (sz / 16));
   emit (Plabel lbl);
-  emit (Pldp(X16, X17, ADpostincr(RR1 X30, _16)));
-  emit (Pstp(X16, X17, ADpostincr(RR1 X29, _16)));
+  emit (Pldpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X30, _16)));
+  emit (Pstpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X29, _16)));
   emit (Psubimm(W, RR1 X15, RR1 X15, _1));
   emit (Pcbnz(W, X15, lbl));
   if sz mod 16 >= 8 then begin
@@ -241,29 +241,29 @@ let expand_builtin_memcpy  sz al args =
 let expand_builtin_vload_common chunk base ofs res =
   let addr = ADimm(base, ofs) in
   match chunk, res with
-  | Mint8unsigned, BR(IR res) ->
+  | Mint8unsigned, BR(DR(IR(RR1 res))) ->
      emit (Pldrb(W, res, addr))
-  | Mint8signed, BR(IR res) ->
+  | Mint8signed, BR(DR(IR(RR1 res))) ->
      emit (Pldrsb(W, res, addr))
-  | Mint16unsigned, BR(IR res) ->
+  | Mint16unsigned, BR(DR(IR(RR1 res))) ->
      emit (Pldrh(W, res, addr))
-  | Mint16signed, BR(IR res) ->
+  | Mint16signed, BR(DR(IR(RR1 res))) ->
      emit (Pldrsh(W, res, addr))
-  | Mint32, BR(IR res) ->
+  | Mint32, BR(DR(IR(RR1 res))) ->
      emit (Pldrw(res, addr))
-  | Mint64, BR(IR res) ->
+  | Mint64, BR(DR(IR(RR1 res))) ->
      emit (Pldrx(res, addr))
-  | Mfloat32, BR(FR res) ->
+  | Mfloat32, BR(DR(FR res)) ->
      emit (Pldrs(res, addr))
-  | Mfloat64, BR(FR res) ->
+  | Mfloat64, BR(DR(FR res)) ->
      emit (Pldrd(res, addr))
   | _ ->
      assert false
 
 let expand_builtin_vload chunk args res =
   match args with
-  | [BA(IR addr)] ->
-      expand_builtin_vload_common chunk (RR1 addr) _0 res
+  | [BA(DR(IR addr))] ->
+      expand_builtin_vload_common chunk addr _0 res
   | [BA_addrstack ofs] ->
       if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
         expand_builtin_vload_common chunk XSP ofs res
@@ -271,11 +271,11 @@ let expand_builtin_vload chunk args res =
         expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
         expand_builtin_vload_common chunk (RR1 X16) _0 res
       end
-  | [BA_addptr(BA(IR addr), BA_long ofs)] ->
+  | [BA_addptr(BA(DR(IR addr)), BA_long ofs)] ->
       if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
-        expand_builtin_vload_common chunk (RR1 addr) ofs res
+        expand_builtin_vload_common chunk addr ofs res
       else begin
-        expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
+        expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *)
         expand_builtin_vload_common chunk (RR1 X16) _0 res
       end
   | _ ->
@@ -284,25 +284,25 @@ let expand_builtin_vload chunk args res =
 let expand_builtin_vstore_common chunk base ofs src =
   let addr = ADimm(base, ofs) in
   match chunk, src with
-  | (Mint8signed | Mint8unsigned), BA(IR src) ->
+  | (Mint8signed | Mint8unsigned), BA(DR(IR(RR1 src))) ->
      emit (Pstrb(src, addr))
-  | (Mint16signed | Mint16unsigned), BA(IR src) ->
+  | (Mint16signed | Mint16unsigned), BA(DR(IR(RR1 src))) ->
      emit (Pstrh(src, addr))
-  | Mint32, BA(IR src) ->
+  | Mint32, BA(DR(IR(RR1 src))) ->
      emit (Pstrw(src, addr))
-  | Mint64, BA(IR src) ->
+  | Mint64, BA(DR(IR(RR1 src))) ->
      emit (Pstrx(src, addr))
-  | Mfloat32, BA(FR src) ->
+  | Mfloat32, BA(DR(FR src)) ->
      emit (Pstrs(src, addr))
-  | Mfloat64, BA(FR src) ->
+  | Mfloat64, BA(DR(FR src)) ->
      emit (Pstrd(src, addr))
   | _ ->
      assert false
 
 let expand_builtin_vstore chunk args =
   match args with
-  | [BA(IR addr); src] ->
-      expand_builtin_vstore_common chunk (RR1 addr) _0 src
+  | [BA(DR(IR addr)); src] ->
+      expand_builtin_vstore_common chunk addr _0 src
   | [BA_addrstack ofs; src] ->
       if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
         expand_builtin_vstore_common chunk XSP ofs src
@@ -310,11 +310,11 @@ let expand_builtin_vstore chunk args =
         expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
         expand_builtin_vstore_common chunk (RR1 X16) _0 src
       end
-  | [BA_addptr(BA(IR addr), BA_long ofs); src] ->
+  | [BA_addptr(BA(DR(IR addr)), BA_long ofs); src] ->
       if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
-        expand_builtin_vstore_common chunk (RR1 addr) ofs src
+        expand_builtin_vstore_common chunk addr ofs src
       else begin
-        expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
+        expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *)
         expand_builtin_vstore_common chunk (RR1 X16) _0 src
       end
   | _ ->
@@ -330,37 +330,47 @@ let expand_builtin_inline name args res =
   | "__builtin_nop", [], _ ->
      emit Pnop
   (* Byte swap *)
-  | ("__builtin_bswap" | "__builtin_bswap32"), [BA(IR a1)], BR(IR res) ->
+  | ("__builtin_bswap" | "__builtin_bswap32"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Prev(W, res, a1))
-  | "__builtin_bswap64", [BA(IR a1)], BR(IR res) ->
+  | "__builtin_bswap64", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Prev(X, res, a1))
-  | "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
+  | "__builtin_bswap16", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Prev16(W, res, a1));
      emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF))
-  (* Count leading zeros and leading sign bits *)
-  | "__builtin_clz",  [BA(IR a1)], BR(IR res) ->
+  (* Count leading zeros, leading sign bits, trailing zeros *)
+  | "__builtin_clz",  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Pclz(W, res, a1))
-  | ("__builtin_clzl" | "__builtin_clzll"),  [BA(IR a1)], BR(IR res) ->
+  | ("__builtin_clzl" | "__builtin_clzll"),  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Pclz(X, res, a1)) 
-  | "__builtin_cls",  [BA(IR a1)], BR(IR res) ->
+  | "__builtin_cls",  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Pcls(W, res, a1))
-  | ("__builtin_clsl" | "__builtin_clsll"),  [BA(IR a1)], BR(IR res) ->
+  | ("__builtin_clsl" | "__builtin_clsll"),  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
      emit (Pcls(X, res, a1))
+  | "__builtin_ctz",  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
+     emit (Prbit(W, res, a1));
+     emit (Pclz(W, res, res))
+  | ("__builtin_ctzl" | "__builtin_ctzll"),  [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
+     emit (Prbit(X, res, a1));
+     emit (Pclz(X, res, res))
  (* Float arithmetic *)
-  | "__builtin_fabs",  [BA(FR a1)], BR(FR res) ->
+  | "__builtin_fabs",  [BA(DR(FR a1))], BR(DR(FR res)) ->
      emit (Pfabs(D, res, a1))
-  | "__builtin_fsqrt",  [BA(FR a1)], BR(FR res) ->
+  | ("__builtin_fsqrt" | "__builtin_sqrt"),  [BA(DR(FR a1))], BR(DR(FR res)) ->
      emit (Pfsqrt(D, res, a1))
-  | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+  | "__builtin_fmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
       emit (Pfmadd(D, res, a1, a2, a3))
-  | "__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+  | "__builtin_fmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
       emit (Pfmsub(D, res, a1, a2, a3))
-  | "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+  | "__builtin_fnmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
       emit (Pfnmadd(D, res, a1, a2, a3))
-  | "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+  | "__builtin_fnmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
       emit (Pfnmsub(D, res, a1, a2, a3))
+  | "__builtin_fmax", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
+      emit (Pfmax (D, res, a1, a2))
+  | "__builtin_fmin", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
+      emit (Pfmin (D, res, a1, a2))
   (* Vararg *)
-  | "__builtin_va_start", [BA(IR a)], _ ->
+  | "__builtin_va_start", [BA(DR(IR(RR1 a)))], _ ->
       expand_builtin_va_start a
   (* Catch-all *)
   | _ ->
@@ -427,9 +437,9 @@ let float_reg_to_dwarf = function
   | D30 -> 94 | D31 -> 95
 
 let preg_to_dwarf = function
-   | IR r -> int_reg_to_dwarf r
-   | FR r -> float_reg_to_dwarf r
-   | SP -> 31
+   | DR(IR(RR1 r)) -> int_reg_to_dwarf r
+   | DR(FR r) -> float_reg_to_dwarf r
+   | DR(IR(XSP)) -> 31
    | _ -> assert false
 
 let expand_function id fn =
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index 024c9a17..45205158 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -1,167 +1,47 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*         Xavier Leroy, Collège de France and INRIA Paris             *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Translation from Mach to AArch64. *)
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           Justus Fasse       UGA, VERIMAG                   *)
+(*           Xavier Leroy       INRIA Paris-Rocquencourt       *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
 
 Require Import Recdef Coqlib Zwf Zbits.
 Require Import Errors AST Integers Floats Op.
-Require Import Locations Mach Asm.
-
-Local Open Scope string_scope.
-Local Open Scope list_scope.
-Local Open Scope error_monad_scope.
-
-(** Alignment check for symbols *)
-
-Parameter symbol_is_aligned : ident -> Z -> bool.
-(** [symbol_is_aligned id sz] checks whether the symbol [id] is [sz] aligned *)
-
-(** Extracting integer or float registers. *)
-
-Definition ireg_of (r: mreg) : res ireg :=
-  match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgen.ireg_of") end.
-
-Definition freg_of (r: mreg) : res freg :=
-  match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgen.freg_of") end.
-
-(** Recognition of immediate arguments for logical integer operations.*)
-
-(** Valid immediate arguments are repetitions of a bit pattern [B]
-  of length [e] = 2, 4, 8, 16, 32 or 64.
-  The bit pattern [B] must be of the form [0*1*0*] or [1*0*1*]
-  but must not be all zeros or all ones. *)
-
-(** The following automaton recognizes [0*1*0*|1*0*1*].
-<<
-               0          1          0
-              / \        / \        / \
-              \ /        \ /        \ /
-        -0--> [B] --1--> [D] --0--> [F]
-       /
-     [A]
-       \
-        -1--> [C] --0--> [E] --1--> [G]
-              / \        / \        / \
-              \ /        \ /        \ /
-               1          0          1
->>
-*)
-
-Module Automaton.
-
-Inductive state : Type := SA | SB | SC | SD | SE | SF | SG | Sbad.
-
-Definition start := SA.
-
-Definition next (s: state) (b: bool) :=
-  match s, b with
-    | SA,false => SB      | SA,true => SC
-    | SB,false => SB      | SB,true => SD
-    | SC,false => SE      | SC,true => SC
-    | SD,false => SF      | SD,true => SD
-    | SE,false => SE      | SE,true => SG
-    | SF,false => SF      | SF,true => Sbad
-    | SG,false => Sbad    | SG,true => SG
-    | Sbad,_ => Sbad
-  end.
-
-Definition accepting (s: state) :=
-  match s with
-  | SA | SB | SC | SD | SE | SF | SG => true
-  | Sbad => false
-  end.
-
-Fixpoint run (len: nat) (s: state) (x: Z) : bool :=
-  match len with
-  | Datatypes.O => accepting s
-  | Datatypes.S len => run len (next s (Z.odd x)) (Z.div2 x)
-  end.
-
-End Automaton.
-
-(** The following function determines the candidate length [e],
-    ensuring that [x] is a repetition [BB...B] 
-    of a bit pattern [B] of length [e]. *)
-
-Definition logical_imm_length (x: Z) (sixtyfour: bool) : nat :=
-  (** [test n] checks that the low [2n] bits of [x] are of the
-      form [BB], that is, two occurrences of the same [n] bits *)
-  let test (n: Z) : bool :=
-    Z.eqb (Zzero_ext n x) (Zzero_ext n (Z.shiftr x n)) in
-  (** If [test n] fails, we know that the candidate length [e] is
-      at least [2n].  Hence we test with decreasing values of [n]:
-      32, 16, 8, 4, 2. *)
-  if sixtyfour && negb (test 32) then 64%nat
-  else if negb (test 16) then 32%nat
-  else if negb (test 8) then 16%nat
-  else if negb (test 4) then 8%nat
-  else if negb (test 2) then 4%nat
-  else 2%nat.
-
-(** A valid logical immediate is 
-- neither [0] nor [-1];
-- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e]
-- the low [e] bits of the number, that is, [B], match [0*1*0*] or [1*0*1*].
-*)
-
-Definition is_logical_imm32 (x: int) : bool :=
-  negb (Int.eq x Int.zero) && negb (Int.eq x Int.mone) &&
-  Automaton.run (logical_imm_length (Int.unsigned x) false)
-                Automaton.start (Int.unsigned x).
-
-Definition is_logical_imm64 (x: int64) : bool :=
-  negb (Int64.eq x Int64.zero) && negb (Int64.eq x Int64.mone) &&
-  Automaton.run (logical_imm_length (Int64.unsigned x) true)
-                Automaton.start (Int64.unsigned x).
+Require Import Locations Compopts.
+Require Import Mach Asm Asmblock Asmblockgen Machblockgen PostpassScheduling.
 
-(** Arithmetic immediates are 12-bit unsigned numbers, possibly shifted left 12 bits *)
 
-Definition is_arith_imm32 (x: int) : bool :=
-  Int.eq x (Int.zero_ext 12 x)
-  || Int.eq x (Int.shl (Int.zero_ext 12 (Int.shru x (Int.repr 12))) (Int.repr 12)).
-
-Definition is_arith_imm64 (x: int64) : bool :=
-  Int64.eq x (Int64.zero_ext 12 x)
-  || Int64.eq x (Int64.shl (Int64.zero_ext 12 (Int64.shru x (Int64.repr 12))) (Int64.repr 12)).
+Local Open Scope error_monad_scope.
 
-(** Decompose integer literals into 16-bit fragments *)
+(** Functions called by the Asmexpand ocaml file, inspired and adapted from Asmblockgen.v *)
 
-Fixpoint decompose_int (N: nat) (n p: Z) {struct N} : list (Z * Z) :=
-  match N with
-  | Datatypes.O => nil
-  | Datatypes.S N =>
-    let frag := Zzero_ext 16 (Z.shiftr n p) in
-    if Z.eqb frag 0 then
-      decompose_int N n (p + 16)
-    else
-      (frag, p) :: decompose_int N (Z.ldiff n (Z.shiftl 65535 p)) (p + 16)
-  end.
+Module Asmgen_expand.
 
-Definition negate_decomposition (l: list (Z * Z)) :=
-  List.map (fun np => (Z.lxor (fst np) 65535, snd np)) l.
+(* Load immediate *)
 
 Definition loadimm_k (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code :=
-  List.fold_right (fun np k => Pmovk sz rd (fst np) (snd np) :: k) k l.
+  List.fold_right (fun np k => Asm.Pmovk sz rd (fst np) (snd np) :: k) k l.
 
 Definition loadimm_z (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code :=
   match l with
-  | nil => Pmovz sz rd 0 0 :: k
-  | (n1, p1) :: l => Pmovz sz rd n1 p1 :: loadimm_k sz rd l k
+  | nil => Asm.Pmovz sz rd 0 0 :: k
+  | (n1, p1) :: l => Asm.Pmovz sz rd n1 p1 :: loadimm_k sz rd l k
   end.
 
 Definition loadimm_n (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code :=
   match l with
-  | nil => Pmovn sz rd 0 0 :: k
-  | (n1, p1) :: l => Pmovn sz rd n1 p1 :: loadimm_k sz rd (negate_decomposition l) k
+  | nil => Asm.Pmovn sz rd 0 0 :: k
+  | (n1, p1) :: l => Asm.Pmovn sz rd n1 p1 :: loadimm_k sz rd (negate_decomposition l) k
   end.
 
 Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: code) : code :=
@@ -174,15 +54,15 @@ Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: code) : code :=
 
 Definition loadimm32 (rd: ireg) (n: int) (k: code) : code :=
   if is_logical_imm32 n
-  then Porrimm W rd XZR (Int.unsigned n) :: k
+  then Asm.Porrimm W rd XZR (Int.unsigned n) :: k
   else loadimm W rd (Int.unsigned n) k.
 
 Definition loadimm64 (rd: ireg) (n: int64) (k: code) : code :=
   if is_logical_imm64 n
-  then Porrimm X rd XZR (Int64.unsigned n) :: k
+  then Asm.Porrimm X rd XZR (Int64.unsigned n) :: k
   else loadimm X rd (Int64.unsigned n) k.
 
-(** Add immediate *)
+(* Add immediate *)
 
 Definition addimm_aux (insn: iregsp -> iregsp -> Z -> instruction)
                         (rd r1: iregsp) (n: Z) (k: code) :=
@@ -195,978 +75,393 @@ Definition addimm_aux (insn: iregsp -> iregsp -> Z -> instruction)
   else
     insn rd r1 nhi :: insn rd rd nlo :: k.
 
-Definition addimm32 (rd r1: ireg) (n: int) (k: code) : code :=
-  let m := Int.neg n in
-  if Int.eq n (Int.zero_ext 24 n) then
-    addimm_aux (Paddimm W) rd r1 (Int.unsigned n) k
-  else if Int.eq m (Int.zero_ext 24 m) then
-    addimm_aux (Psubimm W) rd r1 (Int.unsigned m) k
-  else if Int.lt n Int.zero then
-    loadimm32 X16 m (Psub W rd r1 X16 SOnone :: k)
-  else
-    loadimm32 X16 n (Padd W rd r1 X16 SOnone :: k).
-
 Definition addimm64 (rd r1: iregsp) (n: int64) (k: code) : code :=
   let m := Int64.neg n in
   if Int64.eq n (Int64.zero_ext 24 n) then
-    addimm_aux (Paddimm X) rd r1 (Int64.unsigned n) k
+    addimm_aux (Asm.Paddimm X) rd r1 (Int64.unsigned n) k
   else if Int64.eq m (Int64.zero_ext 24 m) then
-    addimm_aux (Psubimm X) rd r1 (Int64.unsigned m) k
+    addimm_aux (Asm.Psubimm X) rd r1 (Int64.unsigned m) k
   else if Int64.lt n Int64.zero then
-    loadimm64 X16 m (Psubext rd r1 X16 (EOuxtx Int.zero) :: k)
+    loadimm64 X16 m (Asm.Psubext rd r1 X16 (EOuxtx Int.zero) :: k)
   else
-    loadimm64 X16 n (Paddext rd r1 X16 (EOuxtx Int.zero) :: k).
-
-(** Logical immediate *)
+    loadimm64 X16 n (Asm.Paddext rd r1 X16 (EOuxtx Int.zero) :: k).
 
-Definition logicalimm32
-              (insn1: ireg -> ireg0 -> Z -> instruction)
-              (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
-              (rd r1: ireg) (n: int) (k: code) : code :=
-  if is_logical_imm32 n
-  then insn1 rd r1 (Int.unsigned n) :: k
-  else loadimm32 X16 n (insn2 rd r1 X16 SOnone :: k).
-
-Definition logicalimm64
-              (insn1: ireg -> ireg0 -> Z -> instruction)
-              (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
-              (rd r1: ireg) (n: int64) (k: code) : code :=
-  if is_logical_imm64 n
-  then insn1 rd r1 (Int64.unsigned n) :: k
-  else loadimm64 X16 n (insn2 rd r1 X16 SOnone :: k).
-
-(** Sign- or zero-extended arithmetic *)
-
-Definition transl_extension (ex: extension) (a: int) : extend_op :=
-  match ex with Xsgn32 => EOsxtw a | Xuns32 => EOuxtw a end.
-
-Definition move_extended_base
-              (rd: ireg) (r1: ireg) (ex: extension) (k: code) : code :=
-  match ex with
-  | Xsgn32 => Pcvtsw2x rd r1 :: k
-  | Xuns32 => Pcvtuw2x rd r1 :: k
-  end.
+(** Register-indexed stores *)
 
-Definition move_extended
-              (rd: ireg) (r1: ireg) (ex: extension) (a: int) (k: code) : code :=
-  if Int.eq a Int.zero then
-    move_extended_base rd r1 ex k
-  else
-    move_extended_base rd r1 ex (Padd X rd XZR rd (SOlsl a) :: k).
-
-Definition arith_extended 
-              (insnX: iregsp -> iregsp -> ireg -> extend_op -> instruction)
-              (insnS: ireg -> ireg0 -> ireg -> shift_op -> instruction)
-              (rd r1 r2: ireg) (ex: extension) (a: int) (k: code) : code :=
-  if Int.ltu a (Int.repr 5) then
-    insnX rd r1 r2 (transl_extension ex a) :: k
-  else
-    move_extended_base X16 r2 ex (insnS rd r1 X16 (SOlsl a) :: k).
-
-(** Extended right shift *)
-
-Definition shrx32 (rd r1: ireg) (n: int) (k: code) : code :=
-  if Int.eq n Int.zero then
-    Pmov rd r1 :: k
-  else if Int.eq n Int.one then
-         Padd W X16 r1 r1 (SOlsr (Int.repr 31)) ::
-         Porr W rd XZR X16 (SOasr n) :: k
-       else
-         Porr W X16 XZR r1 (SOasr (Int.repr 31)) ::
-         Padd W X16 r1 X16 (SOlsr (Int.sub Int.iwordsize n)) ::
-         Porr W rd XZR X16 (SOasr n) :: k.
-
-Definition shrx64 (rd r1: ireg) (n: int) (k: code) : code :=
-  if Int.eq n Int.zero then
-    Pmov rd r1 :: k
-  else if Int.eq n Int.one then
-         Padd X X16 r1 r1 (SOlsr (Int.repr 63)) ::
-         Porr X rd XZR X16 (SOasr n) :: k
-       else
-         Porr X X16 XZR r1 (SOasr (Int.repr 63)) ::
-         Padd X X16 r1 X16 (SOlsr (Int.sub Int64.iwordsize' n)) ::
-         Porr X rd XZR X16 (SOasr n) :: k.
-
-(** Load the address [id + ofs] in [rd] *)
-
-Definition loadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (k: code) : code :=
-  if Archi.pic_code tt then
-    if Ptrofs.eq ofs Ptrofs.zero then
-      Ploadsymbol rd id :: k
-    else
-      Ploadsymbol rd id :: addimm64 rd rd (Ptrofs.to_int64 ofs) k
-  else
-    Padrp rd id ofs :: Paddadr rd rd id ofs :: k.
-
-(** Translate a shifted operand *)
-
-Definition transl_shift (s: Op.shift) (a: int): Asm.shift_op :=
-  match s with
-  | Slsl => SOlsl a
-  | Slsr => SOlsr a
-  | Sasr => SOasr a
-  | Sror => SOror a
-  end.
+Definition indexed_memory_access (insn: Asm.addressing -> instruction)
+                                 (sz: Z) (base: iregsp) (ofs: ptrofs) (k: code) :=
+  let ofs := Ptrofs.to_int64 ofs in
+  if offset_representable sz ofs
+  then insn (ADimm base ofs) :: k
+  else loadimm64 X16 ofs (insn (ADreg base X16) :: k).
 
-(** Translation of a condition.  Prepends to [k] the instructions
-  that evaluate the condition and leave its boolean result in one of
-  the bits of the condition register.  The bit in question is
-  determined by the [crbit_for_cond] function. *)
+Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) :=
+  indexed_memory_access (Asm.Pstrx src) 8 base ofs k.
 
-Definition transl_cond
-              (cond: condition) (args: list mreg) (k: code) :=
-  match cond, args with
-  | (Ccomp c | Ccompu c), a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pcmp W r1 r2 SOnone :: k)
-  | (Ccompshift c s a | Ccompushift c s a), a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pcmp W r1 r2 (transl_shift s a) :: k)
-  | (Ccompimm c n | Ccompuimm c n), a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (if is_arith_imm32 n then
-            Pcmpimm W r1 (Int.unsigned n) :: k
-          else if is_arith_imm32 (Int.neg n) then
-            Pcmnimm W r1 (Int.unsigned (Int.neg n)) :: k
-          else
-            loadimm32 X16 n (Pcmp W r1 X16 SOnone :: k))
-  | (Cmaskzero n | Cmasknotzero n), a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (if is_logical_imm32 n then
-            Ptstimm W r1 (Int.unsigned n) :: k
-          else
-            loadimm32 X16 n (Ptst W r1 X16 SOnone :: k))
-  | (Ccompl c | Ccomplu c), a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pcmp X r1 r2 SOnone :: k)
-  | (Ccomplshift c s a | Ccomplushift c s a), a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pcmp X r1 r2 (transl_shift s a) :: k)
-  | (Ccomplimm c n | Ccompluimm c n), a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (if is_arith_imm64 n then
-            Pcmpimm X r1 (Int64.unsigned n) :: k
-          else if is_arith_imm64 (Int64.neg n) then
-            Pcmnimm X r1 (Int64.unsigned (Int64.neg n)) :: k
-          else
-            loadimm64 X16 n (Pcmp X r1 X16 SOnone :: k))
-  | (Cmasklzero n | Cmasklnotzero n), a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (if is_logical_imm64 n then
-            Ptstimm X r1 (Int64.unsigned n) :: k
-          else
-            loadimm64 X16 n (Ptst X r1 X16 SOnone :: k))
-  | Ccompf cmp, a1 :: a2 :: nil =>
-      do r1 <- freg_of a1; do r2 <- freg_of a2;
-      OK (Pfcmp D r1 r2 :: k)
-  | Cnotcompf cmp, a1 :: a2 :: nil =>
-      do r1 <- freg_of a1; do r2 <- freg_of a2;
-      OK (Pfcmp D r1 r2 :: k)
-  | Ccompfzero cmp, a1 :: nil =>
-      do r1 <- freg_of a1;
-      OK (Pfcmp0 D r1 :: k)
-  | Cnotcompfzero cmp, a1 :: nil =>
-      do r1 <- freg_of a1;
-      OK (Pfcmp0 D r1 :: k)
-  | Ccompfs cmp, a1 :: a2 :: nil =>
-      do r1 <- freg_of a1; do r2 <- freg_of a2;
-      OK (Pfcmp S r1 r2 :: k)
-  | Cnotcompfs cmp, a1 :: a2 :: nil =>
-      do r1 <- freg_of a1; do r2 <- freg_of a2;
-      OK (Pfcmp S r1 r2 :: k)
-  | Ccompfszero cmp, a1 :: nil =>
-      do r1 <- freg_of a1;
-      OK (Pfcmp0 S r1 :: k)
-  | Cnotcompfszero cmp, a1 :: nil =>
-      do r1 <- freg_of a1;
-      OK (Pfcmp0 S r1 :: k)
-  | _, _ =>
-      Error(msg "Asmgen.transl_cond")
-  end.
+End Asmgen_expand.
 
-Definition cond_for_signed_cmp (cmp: comparison) :=
-  match cmp with
-  | Ceq => TCeq
-  | Cne => TCne
-  | Clt => TClt
-  | Cle => TCle
-  | Cgt => TCgt
-  | Cge => TCge
-  end.
+(** * Translation from Asmblock to assembly language
+      Inspired from the KVX backend (see kvx/Asm.v and kvx/Asmgen.v) *)
 
-Definition cond_for_unsigned_cmp (cmp: comparison) :=
-  match cmp with
-  | Ceq => TCeq
-  | Cne => TCne
-  | Clt => TClo
-  | Cle => TCls
-  | Cgt => TChi
-  | Cge => TChs
-  end.
+Module Asmblock_TRANSF.
+(* STUB *)
 
-Definition cond_for_float_cmp (cmp: comparison) :=
-  match cmp with
-  | Ceq => TCeq
-  | Cne => TCne
-  | Clt => TCmi
-  | Cle => TCls
-  | Cgt => TCgt
-  | Cge => TCge
+Definition ireg_of_preg (p : Asm.preg) : res ireg :=
+  match p with
+  | DR (IR (RR1 r)) => OK r
+  | _ => Error (msg "Asmgen.ireg_of_preg")
   end.
 
-Definition cond_for_float_not_cmp (cmp: comparison) :=
-  match cmp with
-  | Ceq => TCne
-  | Cne => TCeq
-  | Clt => TCpl
-  | Cle => TChi
-  | Cgt => TCle
-  | Cge => TClt
+Definition freg_of_preg (p : Asm.preg) : res freg :=
+  match p with
+  | DR (FR r) => OK r
+  | _ => Error (msg "Asmgen.freg_of_preg")
   end.
 
-Definition cond_for_cond (cond: condition) :=
-  match cond with
-  | Ccomp cmp => cond_for_signed_cmp cmp
-  | Ccompu cmp => cond_for_unsigned_cmp cmp
-  | Ccompshift cmp s a => cond_for_signed_cmp cmp
-  | Ccompushift cmp s a => cond_for_unsigned_cmp cmp
-  | Ccompimm cmp n => cond_for_signed_cmp cmp
-  | Ccompuimm cmp n => cond_for_unsigned_cmp cmp
-  | Cmaskzero n => TCeq
-  | Cmasknotzero n => TCne
-  | Ccompl cmp => cond_for_signed_cmp cmp
-  | Ccomplu cmp => cond_for_unsigned_cmp cmp
-  | Ccomplshift cmp s a => cond_for_signed_cmp cmp
-  | Ccomplushift cmp s a => cond_for_unsigned_cmp cmp
-  | Ccomplimm cmp n => cond_for_signed_cmp cmp
-  | Ccompluimm cmp n => cond_for_unsigned_cmp cmp
-  | Cmasklzero n => TCeq
-  | Cmasklnotzero n => TCne
-  | Ccompf cmp => cond_for_float_cmp cmp
-  | Cnotcompf cmp => cond_for_float_not_cmp cmp
-  | Ccompfzero cmp => cond_for_float_cmp cmp
-  | Cnotcompfzero cmp => cond_for_float_not_cmp cmp
-  | Ccompfs cmp => cond_for_float_cmp cmp
-  | Cnotcompfs cmp => cond_for_float_not_cmp cmp
-  | Ccompfszero cmp => cond_for_float_cmp cmp
-  | Cnotcompfszero cmp => cond_for_float_not_cmp cmp
+Definition iregsp_of_preg (p : Asm.preg) : res iregsp :=
+  match p with
+  | DR (IR r) => OK r
+  | _ => Error (msg "Asmgen.iregsp_of_preg")
   end.
 
-(** Translation of a conditional branch.  Prepends to [k] the instructions
-  that evaluate the condition and ranch to [lbl] if it holds.
-  We recognize some conditional branches that can be implemented
-  without setting then testing condition flags.  *)
-
-Definition transl_cond_branch_default
-              (c: condition) (args: list mreg) (lbl: label) (k: code) :=
-  transl_cond c args (Pbc (cond_for_cond c) lbl :: k).
- 
-Definition transl_cond_branch
-              (c: condition) (args: list mreg) (lbl: label) (k: code) :=
-  match args, c with
-  | a1 :: nil, (Ccompimm Cne n | Ccompuimm Cne n) =>
-      if Int.eq n Int.zero
-      then (do r1 <- ireg_of a1; OK (Pcbnz W r1 lbl :: k))
-      else transl_cond_branch_default c args lbl k
-  | a1 :: nil, (Ccompimm Ceq n | Ccompuimm Ceq n) =>
-      if Int.eq n Int.zero
-      then (do r1 <- ireg_of a1; OK (Pcbz W r1 lbl :: k))
-      else transl_cond_branch_default c args lbl k
-  | a1 :: nil, (Ccomplimm Cne n | Ccompluimm Cne n) =>
-      if Int64.eq n Int64.zero
-      then (do r1 <- ireg_of a1; OK (Pcbnz X r1 lbl :: k))
-      else transl_cond_branch_default c args lbl k
-  | a1 :: nil, (Ccomplimm Ceq n | Ccompluimm Ceq n) =>
-      if Int64.eq n Int64.zero
-      then (do r1 <- ireg_of a1; OK (Pcbz X r1 lbl :: k))
-      else transl_cond_branch_default c args lbl k
-  | a1 :: nil, Cmaskzero n =>
-      match Int.is_power2 n with
-      | Some bit => do r1 <- ireg_of a1; OK (Ptbz W r1 bit lbl :: k)
-      | None => transl_cond_branch_default c args lbl k
-      end
-  | a1 :: nil, Cmasknotzero n =>
-      match Int.is_power2 n with
-      | Some bit => do r1 <- ireg_of a1; OK (Ptbnz W r1 bit lbl :: k)
-      | None => transl_cond_branch_default c args lbl k
-      end
-  | a1 :: nil, Cmasklzero n =>
-      match Int64.is_power2' n with
-      | Some bit => do r1 <- ireg_of a1; OK (Ptbz X r1 bit lbl :: k)
-      | None => transl_cond_branch_default c args lbl k
+Definition basic_to_instruction (b: basic) : res Asm.instruction :=
+  match b with
+  (* Aithmetic instructions *)
+  | PArith (PArithP (Padrp id ofs)   rd) => do rd' <- ireg_of_preg rd;
+                                            OK (Asm.Padrp rd' id ofs)
+  | PArith (PArithP (Pmovz sz n pos) rd) => do rd' <- ireg_of_preg rd;
+                                            OK (Asm.Pmovz sz rd' n pos)
+  | PArith (PArithP (Pmovn sz n pos) rd) => do rd' <- ireg_of_preg rd;
+                                            OK (Asm.Pmovn sz rd' n pos)
+  | PArith (PArithP (Pfmovimms f)    rd) => do rd' <- freg_of_preg rd;
+                                            OK (Asm.Pfmovimms rd' f)
+  | PArith (PArithP (Pfmovimmd f)    rd) => do rd' <- freg_of_preg rd;
+                                            OK (Asm.Pfmovimmd rd' f)
+
+  | PArith (PArithPP (Pmovk sz n pos) rd rs) =>
+    if (Asm.preg_eq rd rs) then (
+      do rd' <- ireg_of_preg rd;
+      OK (Asm.Pmovk sz rd' n pos)
+    ) else
+      Error (msg "Asmgen.basic_to_instruction: Pmovk uses a single register as both source and target")
+  | PArith (PArithPP Pmov rd rs)              => do rd' <- iregsp_of_preg rd;
+                                                 do rs' <- iregsp_of_preg rs;
+                                                 OK (Asm.Pmov rd' rs')
+  | PArith (PArithPP (Paddadr id ofs)  rd rs) => do rd' <- ireg_of_preg rd;
+                                                 do rs' <- ireg_of_preg rs;
+                                                 OK (Asm.Paddadr rd' rs' id ofs)
+  | PArith (PArithPP (Psbfiz sz r s)   rd rs) => do rd' <- ireg_of_preg rd;
+                                                 do rs' <- ireg_of_preg rs;
+                                                 OK (Asm.Psbfiz sz rd' rs' r s)
+  | PArith (PArithPP (Psbfx sz r s)    rd rs) => do rd' <- ireg_of_preg rd;
+                                                 do rs' <- ireg_of_preg rs;
+                                                 OK (Asm.Psbfx sz rd' rs' r s)
+  | PArith (PArithPP (Pubfiz sz r s)   rd rs) => do rd' <- ireg_of_preg rd;
+                                                 do rs' <- ireg_of_preg rs;
+                                                 OK (Asm.Pubfiz sz rd' rs' r s)
+  | PArith (PArithPP (Pubfx sz r s)    rd rs) => do rd' <- ireg_of_preg rd;
+                                                 do rs' <- ireg_of_preg rs;
+                                                 OK (Asm.Pubfx sz rd' rs' r s)
+  | PArith (PArithPP Pfmov             rd rs) => do rd' <- freg_of_preg rd;
+                                                 do rs' <- freg_of_preg rs;
+                                                 OK (Asm.Pfmov rd' rs')
+  | PArith (PArithPP Pfcvtds           rd rs) => do rd' <- freg_of_preg rd;
+                                                 do rs' <- freg_of_preg rs;
+                                                 OK (Asm.Pfcvtds rd' rs')
+  | PArith (PArithPP Pfcvtsd           rd rs) => do rd' <- freg_of_preg rd;
+                                                 do rs' <- freg_of_preg rs;
+                                                 OK (Asm.Pfcvtsd rd' rs')
+  | PArith (PArithPP (Pfabs sz)        rd rs) => do rd' <- freg_of_preg rd;
+                                                 do rs' <- freg_of_preg rs;
+                                                 OK (Asm.Pfabs sz rd' rs')
+  | PArith (PArithPP (Pfneg sz)        rd rs) => do rd' <- freg_of_preg rd;
+                                                 do rs' <- freg_of_preg rs;
+                                                 OK (Asm.Pfneg sz rd' rs')
+  | PArith (PArithPP (Pscvtf fsz isz)  rd rs) => do rd' <- freg_of_preg rd;
+                                                 do rs' <- ireg_of_preg rs;
+                                                 OK (Asm.Pscvtf fsz isz rd' rs')
+  | PArith (PArithPP (Pucvtf fsz isz)  rd rs) => do rd' <- freg_of_preg rd;
+                                                 do rs' <- ireg_of_preg rs;
+                                                 OK (Asm.Pucvtf fsz isz rd' rs')
+  | PArith (PArithPP (Pfcvtzs isz fsz) rd rs) => do rd' <- ireg_of_preg rd;
+                                                 do rs' <- freg_of_preg rs;
+                                                 OK (Asm.Pfcvtzs isz fsz rd' rs')
+  | PArith (PArithPP (Pfcvtzu isz fsz) rd rs) => do rd' <- ireg_of_preg rd;
+                                                 do rs' <- freg_of_preg rs;
+                                                 OK (Asm.Pfcvtzu isz fsz rd' rs')
+  | PArith (PArithPP (Paddimm sz n)    rd rs) => do rd' <- iregsp_of_preg rd;
+                                                 do rs' <- iregsp_of_preg rs;
+                                                 OK (Asm.Paddimm sz rd' rs' n)
+  | PArith (PArithPP (Psubimm sz n)    rd rs) => do rd' <- iregsp_of_preg rd;
+                                                 do rs' <- iregsp_of_preg rs;
+                                                 OK (Asm.Psubimm sz rd' rs' n)
+
+  | PArith (PArithPPP (Pasrv sz)  rd r1 r2) => do rd' <- ireg_of_preg rd;
+                                               do r1' <- ireg_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Pasrv sz rd' r1' r2')
+  | PArith (PArithPPP (Plslv sz)  rd r1 r2) => do rd' <- ireg_of_preg rd;
+                                               do r1' <- ireg_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Plslv sz rd' r1' r2')
+  | PArith (PArithPPP (Plsrv sz)  rd r1 r2) => do rd' <- ireg_of_preg rd;
+                                               do r1' <- ireg_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Plsrv sz rd' r1' r2')
+  | PArith (PArithPPP (Prorv sz)  rd r1 r2) => do rd' <- ireg_of_preg rd;
+                                               do r1' <- ireg_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Prorv sz rd' r1' r2')
+  | PArith (PArithPPP Psmulh      rd r1 r2) => do rd' <- ireg_of_preg rd;
+                                               do r1' <- ireg_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Psmulh rd' r1' r2')
+  | PArith (PArithPPP Pumulh      rd r1 r2) => do rd' <- ireg_of_preg rd;
+                                               do r1' <- ireg_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Pumulh rd' r1' r2')
+  | PArith (PArithPPP (Psdiv sz)  rd r1 r2) => do rd' <- ireg_of_preg rd;
+                                               do r1' <- ireg_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Psdiv sz rd' r1' r2')
+  | PArith (PArithPPP (Pudiv sz)  rd r1 r2) => do rd' <- ireg_of_preg rd;
+                                               do r1' <- ireg_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Pudiv sz rd' r1' r2')
+  | PArith (PArithPPP (Paddext x) rd r1 r2) => do rd' <- iregsp_of_preg rd;
+                                               do r1' <- iregsp_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Paddext rd' r1' r2' x)
+  | PArith (PArithPPP (Psubext x) rd r1 r2) => do rd' <- iregsp_of_preg rd;
+                                               do r1' <- iregsp_of_preg r1;
+                                               do r2' <- ireg_of_preg r2;
+                                               OK (Asm.Psubext rd' r1' r2' x)
+  | PArith (PArithPPP (Pfadd sz)  rd r1 r2) => do rd' <- freg_of_preg rd;
+                                               do r1' <- freg_of_preg r1;
+                                               do r2' <- freg_of_preg r2;
+                                               OK (Asm.Pfadd sz rd' r1' r2')
+  | PArith (PArithPPP (Pfdiv sz)  rd r1 r2) => do rd' <- freg_of_preg rd;
+                                               do r1' <- freg_of_preg r1;
+                                               do r2' <- freg_of_preg r2;
+                                               OK (Asm.Pfdiv sz rd' r1' r2')
+  | PArith (PArithPPP (Pfmul sz)  rd r1 r2) => do rd' <- freg_of_preg rd;
+                                               do r1' <- freg_of_preg r1;
+                                               do r2' <- freg_of_preg r2;
+                                               OK (Asm.Pfmul sz rd' r1' r2')
+  | PArith (PArithPPP (Pfsub sz)  rd r1 r2) => do rd' <- freg_of_preg rd;
+                                               do r1' <- freg_of_preg r1;
+                                               do r2' <- freg_of_preg r2;
+                                               OK (Asm.Pfsub sz rd' r1' r2')
+
+  | PArith (PArithRR0 (Pandimm sz n) rd r1) => OK (Asm.Pandimm sz rd r1 n)
+  | PArith (PArithRR0 (Peorimm sz n) rd r1) => OK (Asm.Peorimm sz rd r1 n)
+  | PArith (PArithRR0 (Porrimm sz n) rd r1) => OK (Asm.Porrimm sz rd r1 n)
+
+
+  | PArith (PArithRR0R (Padd sz s) rd r1 r2) => OK (Asm.Padd sz rd r1 r2 s)
+  | PArith (PArithRR0R (Psub sz s) rd r1 r2) => OK (Asm.Psub sz rd r1 r2 s)
+  | PArith (PArithRR0R (Pand sz s) rd r1 r2) => OK (Asm.Pand sz rd r1 r2 s)
+  | PArith (PArithRR0R (Pbic sz s) rd r1 r2) => OK (Asm.Pbic sz rd r1 r2 s)
+  | PArith (PArithRR0R (Peon sz s) rd r1 r2) => OK (Asm.Peon sz rd r1 r2 s)
+  | PArith (PArithRR0R (Peor sz s) rd r1 r2) => OK (Asm.Peor sz rd r1 r2 s)
+  | PArith (PArithRR0R (Porr sz s) rd r1 r2) => OK (Asm.Porr sz rd r1 r2 s)
+  | PArith (PArithRR0R (Porn sz s) rd r1 r2) => OK (Asm.Porn sz rd r1 r2 s)
+
+  | PArith (PArithARRRR0 (Pmadd sz) rd r1 r2 r3) => OK (Asm.Pmadd sz rd r1 r2 r3)
+  | PArith (PArithARRRR0 (Pmsub sz) rd r1 r2 r3) => OK (Asm.Pmsub sz rd r1 r2 r3)
+
+  | PArith (PArithComparisonPP (Pcmpext x) r1 r2) => do r1' <- ireg_of_preg r1;
+                                                     do r2' <- ireg_of_preg r2;
+                                                     OK (Asm.Pcmpext r1' r2' x)
+  | PArith (PArithComparisonPP (Pcmnext x) r1 r2) => do r1' <- ireg_of_preg r1;
+                                                     do r2' <- ireg_of_preg r2;
+                                                     OK (Asm.Pcmnext r1' r2' x)
+  | PArith (PArithComparisonPP (Pfcmp sz)  r1 r2) => do r1' <- freg_of_preg r1;
+                                                     do r2' <- freg_of_preg r2;
+                                                     OK (Asm.Pfcmp sz r1' r2')
+
+  | PArith (PArithComparisonR0R (Pcmp is s) r1 r2) => OK (Asm.Pcmp is r1 r2 s)
+  | PArith (PArithComparisonR0R (Pcmn is s) r1 r2) => OK (Asm.Pcmn is r1 r2 s)
+  | PArith (PArithComparisonR0R (Ptst is s) r1 r2) => OK (Asm.Ptst is r1 r2 s)
+
+  | PArith (PArithComparisonP (Pcmpimm sz n) r1) => do r1' <- ireg_of_preg r1;
+                                                    OK (Asm.Pcmpimm sz r1' n)
+  | PArith (PArithComparisonP (Pcmnimm sz n) r1) => do r1' <- ireg_of_preg r1;
+                                                    OK (Asm.Pcmnimm sz r1' n)
+  | PArith (PArithComparisonP (Ptstimm sz n) r1) => do r1' <- ireg_of_preg r1;
+                                                    OK (Asm.Ptstimm sz r1' n)
+  | PArith (PArithComparisonP (Pfcmp0 sz)    r1) => do r1' <- freg_of_preg r1;
+                                                    OK (Asm.Pfcmp0 sz r1')
+
+  | PArith (Pcset rd c) => OK (Asm.Pcset rd c)
+  | PArith (Pfmovi fsz rd r1) => OK (Asm.Pfmovi fsz rd r1)
+  | PArith (Pcsel rd r1 r2 c) =>
+      match r1, r2 with
+      | IR r1', IR r2' => do rd' <- ireg_of_preg rd;
+                          do r1'' <- ireg_of_preg r1';
+                          do r2'' <- ireg_of_preg r2';
+                          OK (Asm.Pcsel rd' r1'' r2'' c)
+      | FR r1', FR r2' => do rd' <- freg_of_preg rd;
+                          do r1'' <- freg_of_preg r1';
+                          do r2'' <- freg_of_preg r2';
+                          OK (Asm.Pfsel rd' r1'' r2'' c)
+      | _, _ => Error (msg "Asmgen.basic_to_instruction: Pcsel is only defind on iregs and fregs.")
       end
-  | a1 :: nil, Cmasklnotzero n =>
-      match Int64.is_power2' n with
-      | Some bit => do r1 <- ireg_of a1; OK (Ptbnz X r1 bit lbl :: k)
-      | None => transl_cond_branch_default c args lbl k
-      end
-  | _, _ =>
-      transl_cond_branch_default c args lbl k
-  end.
-  
-(** Translation of the arithmetic operation [res <- op(args)].
-    The corresponding instructions are prepended to [k]. *)
-
-Definition transl_op
-              (op: operation) (args: list mreg) (res: mreg) (k: code) :=
-  match op, args with
-  | Omove, a1 :: nil =>
-      match preg_of res, preg_of a1 with
-      | IR r, IR a => OK (Pmov r a :: k)
-      | FR r, FR a => OK (Pfmov r a :: k)
-      |  _  ,  _   => Error(msg "Asmgen.Omove")
-      end
-  | Ointconst n, nil =>
-      do rd <- ireg_of res;
-      OK (loadimm32 rd n k)
-  | Olongconst n, nil =>
-      do rd <- ireg_of res;
-      OK (loadimm64 rd n k)
-  | Ofloatconst f, nil =>
-      do rd <- freg_of res;
-      OK (if Float.eq_dec f Float.zero
-          then Pfmovi D rd XZR :: k
-          else Pfmovimmd rd f :: k)
-  | Osingleconst f, nil =>
-      do rd <- freg_of res;
-      OK (if Float32.eq_dec f Float32.zero
-          then Pfmovi S rd XZR :: k
-          else Pfmovimms rd f :: k)
-  | Oaddrsymbol id ofs, nil =>
-      do rd <- ireg_of res;
-      OK (loadsymbol rd id ofs k)
-  | Oaddrstack ofs, nil =>
-      do rd <- ireg_of res;
-      OK (addimm64 rd XSP (Ptrofs.to_int64 ofs) k)
-(** 32-bit integer arithmetic *)
-  | Oshift s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Porr W rd XZR r1 (transl_shift s a) :: k)
-  | Oadd, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Padd W rd r1 r2 SOnone :: k)
-  | Oaddshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Padd W rd r1 r2 (transl_shift s a) :: k)
-  | Oaddimm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (addimm32 rd r1 n k)
-  | Oneg, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psub W rd XZR r1 SOnone :: k)
-  | Onegshift s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psub W rd XZR r1 (transl_shift s a) :: k)
-  | Osub, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Psub W rd r1 r2 SOnone :: k)
-  | Osubshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Psub W rd r1 r2 (transl_shift s a) :: k)
-  | Omul, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pmadd W rd r1 r2 XZR :: k)
-  | Omuladd, a1 :: a2 :: a3 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
-      OK (Pmadd W rd r2 r3 r1 :: k)
-  | Omulsub, a1 :: a2 :: a3 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
-      OK (Pmsub W rd r2 r3 r1 :: k)
-  | Odiv, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Psdiv W rd r1 r2 :: k)
-  | Odivu, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pudiv W rd r1 r2 :: k)
-  | Oand, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pand W rd r1 r2 SOnone :: k)
-  | Oandshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pand W rd r1 r2 (transl_shift s a) :: k)
-  | Oandimm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (logicalimm32 (Pandimm W) (Pand W) rd r1 n k)      
-  | Oor, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Porr W rd r1 r2 SOnone :: k)
-  | Oorshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Porr W rd r1 r2 (transl_shift s a) :: k)
-  | Oorimm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (logicalimm32 (Porrimm W) (Porr W) rd r1 n k)      
-  | Oxor, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Peor W rd r1 r2 SOnone :: k)
-  | Oxorshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Peor W rd r1 r2 (transl_shift s a) :: k)
-  | Oxorimm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (logicalimm32 (Peorimm W) (Peor W) rd r1 n k)      
-  | Onot, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Porn W rd XZR r1 SOnone :: k)
-  | Onotshift s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Porn W rd XZR r1 (transl_shift s a) :: k)
-  | Obic, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pbic W rd r1 r2 SOnone :: k)
-  | Obicshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pbic W rd r1 r2 (transl_shift s a) :: k)
-  | Oorn, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Porn W rd r1 r2 SOnone :: k)
-  | Oornshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Porn W rd r1 r2 (transl_shift s a) :: k)
-  | Oeqv, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Peon W rd r1 r2 SOnone :: k)
-  | Oeqvshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Peon W rd r1 r2 (transl_shift s a) :: k)
-  | Oshl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Plslv W rd r1 r2 :: k)
-  | Oshr, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pasrv W rd r1 r2 :: k)
-  | Oshru, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Plsrv W rd r1 r2 :: k)
-  | Oshrximm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (shrx32 rd r1 n k)
-  | Ozext s, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Pubfiz W rd r1 Int.zero s :: k)
-  | Osext s, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psbfiz W rd r1 Int.zero s :: k)
-  | Oshlzext s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Pubfiz W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
-  | Oshlsext s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psbfiz W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
-  | Ozextshr a s, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Pubfx W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
-  | Osextshr a s, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psbfx W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
-(** 64-bit integer arithmetic *)
-  | Oshiftl s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Porr X rd XZR r1 (transl_shift s a) :: k)
-  | Oextend x a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (move_extended rd r1 x a k)
-  (* [Omakelong] and [Ohighlong] should not occur *)
-  | Olowlong, a1 :: nil => 
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      assertion (ireg_eq rd r1);
-      OK (Pcvtx2w rd :: k)
-  | Oaddl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Padd X rd r1 r2 SOnone :: k)
-  | Oaddlshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Padd X rd r1 r2 (transl_shift s a) :: k)
-  | Oaddlext x a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (arith_extended Paddext (Padd X) rd r1 r2 x a k)
-  | Oaddlimm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (addimm64 rd r1 n k)
-  | Onegl, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psub X rd XZR r1 SOnone :: k)
-  | Oneglshift s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psub X rd XZR r1 (transl_shift s a) :: k)
-  | Osubl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Psub X rd r1 r2 SOnone :: k)
-  | Osublshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Psub X rd r1 r2 (transl_shift s a) :: k)
-  | Osublext x a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (arith_extended Psubext (Psub X) rd r1 r2 x a k)
-  | Omull, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pmadd X rd r1 r2 XZR :: k)
-  | Omulladd, a1 :: a2 :: a3 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
-      OK (Pmadd X rd r2 r3 r1 :: k)
-  | Omullsub, a1 :: a2 :: a3 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
-      OK (Pmsub X rd r2 r3 r1 :: k)
-  | Omullhs, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Psmulh rd r1 r2 :: k)
-  | Omullhu, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pumulh rd r1 r2 :: k)
-  | Odivl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Psdiv X rd r1 r2 :: k)
-  | Odivlu, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pudiv X rd r1 r2 :: k)
-  | Oandl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pand X rd r1 r2 SOnone :: k)
-  | Oandlshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pand X rd r1 r2 (transl_shift s a) :: k)
-  | Oandlimm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (logicalimm64 (Pandimm X) (Pand X) rd r1 n k)      
-  | Oorl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Porr X rd r1 r2 SOnone :: k)
-  | Oorlshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Porr X rd r1 r2 (transl_shift s a) :: k)
-  | Oorlimm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (logicalimm64 (Porrimm X) (Porr X) rd r1 n k)      
-  | Oxorl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Peor X rd r1 r2 SOnone :: k)
-  | Oxorlshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Peor X rd r1 r2 (transl_shift s a) :: k)
-  | Oxorlimm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (logicalimm64 (Peorimm X) (Peor X) rd r1 n k)      
-  | Onotl, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Porn X rd XZR r1 SOnone :: k)
-  | Onotlshift s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Porn X rd XZR r1 (transl_shift s a) :: k)
-  | Obicl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pbic X rd r1 r2 SOnone :: k)
-  | Obiclshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pbic X rd r1 r2 (transl_shift s a) :: k)
-  | Oornl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Porn X rd r1 r2 SOnone :: k)
-  | Oornlshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Porn X rd r1 r2 (transl_shift s a) :: k)
-  | Oeqvl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Peon X rd r1 r2 SOnone :: k)
-  | Oeqvlshift s a, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Peon X rd r1 r2 (transl_shift s a) :: k)
-  | Oshll, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Plslv X rd r1 r2 :: k)
-  | Oshrl, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pasrv X rd r1 r2 :: k)
-  | Oshrlu, a1 :: a2 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Plsrv X rd r1 r2 :: k)
-  | Oshrlximm n, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (shrx64 rd r1 n k)
-  | Ozextl s, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Pubfiz X rd r1 Int.zero s :: k)
-  | Osextl s, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psbfiz X rd r1 Int.zero s :: k)
-  | Oshllzext s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Pubfiz X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
-  | Oshllsext s a, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psbfiz X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
-  | Ozextshrl a s, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Pubfx X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
-  | Osextshrl a s, a1 :: nil =>
-      do rd <- ireg_of res; do r1 <- ireg_of a1;
-      OK (Psbfx X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
-(** 64-bit floating-point arithmetic *)
-  | Onegf, a1 :: nil =>
-      do rd <- freg_of res; do rs <- freg_of a1;
-      OK (Pfneg D rd rs :: k)
-  | Oabsf, a1 :: nil =>
-      do rd <- freg_of res; do rs <- freg_of a1;
-      OK (Pfabs D rd rs :: k)
-  | Oaddf, a1 :: a2 :: nil =>
-      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
-      OK (Pfadd D rd rs1 rs2 :: k)
-  | Osubf, a1 :: a2 :: nil =>
-      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
-      OK (Pfsub D rd rs1 rs2 :: k)
-  | Omulf, a1 :: a2 :: nil =>
-      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
-      OK (Pfmul D rd rs1 rs2 :: k)
-  | Odivf, a1 :: a2 :: nil =>
-      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
-      OK (Pfdiv D rd rs1 rs2 :: k)
-(** 32-bit floating-point arithmetic *)
-  | Onegfs, a1 :: nil =>
-      do rd <- freg_of res; do rs <- freg_of a1;
-      OK (Pfneg S rd rs :: k)
-  | Oabsfs, a1 :: nil =>
-      do rd <- freg_of res; do rs <- freg_of a1;
-      OK (Pfabs S rd rs :: k)
-  | Oaddfs, a1 :: a2 :: nil =>
-      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
-      OK (Pfadd S rd rs1 rs2 :: k)
-  | Osubfs, a1 :: a2 :: nil =>
-      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
-      OK (Pfsub S rd rs1 rs2 :: k)
-  | Omulfs, a1 :: a2 :: nil =>
-      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
-      OK (Pfmul S rd rs1 rs2 :: k)
-  | Odivfs, a1 :: a2 :: nil =>
-      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
-      OK (Pfdiv S rd rs1 rs2 :: k)
-  | Osingleoffloat, a1 :: nil =>
-      do rd <- freg_of res; do rs <- freg_of a1;
-      OK (Pfcvtsd rd rs :: k)
-  | Ofloatofsingle, a1 :: nil =>
-      do rd <- freg_of res; do rs <- freg_of a1;
-      OK (Pfcvtds rd rs :: k)
-(** Conversions between int and float *)
-  | Ointoffloat, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- freg_of a1;
-      OK (Pfcvtzs W D rd rs :: k)
-  | Ointuoffloat, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- freg_of a1;
-      OK (Pfcvtzu W D rd rs :: k)
-  | Ofloatofint, a1 :: nil =>
-      do rd <- freg_of res; do rs <- ireg_of a1;
-      OK (Pscvtf D W rd rs :: k)
-  | Ofloatofintu, a1 :: nil =>
-      do rd <- freg_of res; do rs <- ireg_of a1;
-      OK (Pucvtf D W rd rs :: k)
-  | Ointofsingle, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- freg_of a1;
-      OK (Pfcvtzs W S rd rs :: k)
-  | Ointuofsingle, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- freg_of a1;
-      OK (Pfcvtzu W S rd rs :: k)
-  | Osingleofint, a1 :: nil =>
-      do rd <- freg_of res; do rs <- ireg_of a1;
-      OK (Pscvtf S W rd rs :: k)
-  | Osingleofintu, a1 :: nil =>
-      do rd <- freg_of res; do rs <- ireg_of a1;
-      OK (Pucvtf S W rd rs :: k)
-  | Olongoffloat, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- freg_of a1;
-      OK (Pfcvtzs X D rd rs :: k)
-  | Olonguoffloat, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- freg_of a1;
-      OK (Pfcvtzu X D rd rs :: k)
-  | Ofloatoflong, a1 :: nil =>
-      do rd <- freg_of res; do rs <- ireg_of a1;
-      OK (Pscvtf D X rd rs :: k)
-  | Ofloatoflongu, a1 :: nil =>
-      do rd <- freg_of res; do rs <- ireg_of a1;
-      OK (Pucvtf D X rd rs :: k)
-  | Olongofsingle, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- freg_of a1;
-      OK (Pfcvtzs X S rd rs :: k)
-  | Olonguofsingle, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- freg_of a1;
-      OK (Pfcvtzu X S rd rs :: k)
-  | Osingleoflong, a1 :: nil =>
-      do rd <- freg_of res; do rs <- ireg_of a1;
-      OK (Pscvtf S X rd rs :: k)
-  | Osingleoflongu, a1 :: nil =>
-      do rd <- freg_of res; do rs <- ireg_of a1;
-      OK (Pucvtf S X rd rs :: k)
-(** Boolean tests *)
-  | Ocmp c, _ =>
-      do rd <- ireg_of res;
-      transl_cond c args (Pcset rd (cond_for_cond c) :: k)
-(** Conditional move *)
-  | Osel cmp ty, a1 :: a2 :: args =>
-      match preg_of res with
-      | IR r => 
-          do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-          transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) :: k)
-      | FR r =>
-          do r1 <- freg_of a1; do r2 <- freg_of a2;
-          transl_cond cmp args (Pfsel r r1 r2 (cond_for_cond cmp) :: k)
-      | _ =>
-          Error(msg "Asmgen.Osel")
-      end
-  | _, _ =>
-      Error(msg "Asmgen.transl_op")
-  end.
-
-(** Translation of addressing modes *)
-
-Definition offset_representable (sz: Z) (ofs: int64) : bool :=
-  let isz := Int64.repr sz in
-  (** either unscaled 9-bit signed *)
-  Int64.eq ofs (Int64.sign_ext 9 ofs) ||
-  (** or scaled 12-bit unsigned *)
-  (Int64.eq (Int64.modu ofs isz) Int64.zero
-   && Int64.ltu ofs (Int64.shl isz (Int64.repr 12))).
- 
-Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg)
-                             (insn: Asm.addressing -> instruction) (k: code) : res code :=
-  match addr, args with
-  | Aindexed ofs, a1 :: nil =>
-      do r1 <- ireg_of a1;
-       if offset_representable sz ofs then
-        OK (insn (ADimm r1 ofs) :: k)
-      else
-        OK (loadimm64 X16 ofs (insn (ADreg r1 X16) :: k))
-  | Aindexed2, a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (insn (ADreg r1 r2) :: k)
-  | Aindexed2shift a, a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      if Int.eq a Int.zero then
-        OK (insn (ADreg r1 r2) :: k)
-      else if Int.eq (Int.shl Int.one a) (Int.repr sz) then
-        OK (insn (ADlsl r1 r2 a) :: k)
-      else
-        OK (Padd X X16 r1 r2 (SOlsl a) :: insn (ADimm X16 Int64.zero) :: k)
-  | Aindexed2ext x a, a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      if Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz) then
-        OK (insn (match x with Xsgn32 => ADsxt r1 r2 a
-                             | Xuns32 => ADuxt r1 r2 a end) :: k)
-      else
-        OK (arith_extended Paddext (Padd X) X16 r1 r2 x a
-                           (insn (ADimm X16 Int64.zero) :: k))
-  | Aglobal id ofs, nil =>
-      assertion (negb (Archi.pic_code tt));
-      if Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz
-      then OK (Padrp X16 id ofs :: insn (ADadr X16 id ofs) :: k)
-      else OK (loadsymbol X16 id ofs (insn (ADimm X16 Int64.zero) :: k))
-  | Ainstack ofs, nil =>
-      let ofs := Ptrofs.to_int64 ofs in
-      if offset_representable sz ofs then
-        OK (insn (ADimm XSP ofs) :: k)
-      else
-        OK (loadimm64 X16 ofs (insn (ADreg XSP X16) :: k))
-  | _, _ =>
-      Error(msg "Asmgen.transl_addressing")
-  end.
-
-(** Translation of loads and stores *)
-
-Definition transl_load (trap: trapping_mode)
-                       (chunk: memory_chunk) (addr: Op.addressing)
-                       (args: list mreg) (dst: mreg) (k: code) : res code :=
-  match trap with
-  | NOTRAP => Error (msg "Asmgen.transl_load non-trapping loads unsupported on aarch64")
-  | TRAP =>
-  match chunk with
-  | Mint8unsigned =>
-      do rd <- ireg_of dst; transl_addressing 1 addr args (Pldrb W rd) k
-  | Mint8signed =>
-      do rd <- ireg_of dst; transl_addressing 1 addr args (Pldrsb W rd) k
-  | Mint16unsigned =>
-      do rd <- ireg_of dst; transl_addressing 2 addr args (Pldrh W rd) k
-  | Mint16signed =>
-      do rd <- ireg_of dst; transl_addressing 2 addr args (Pldrsh W rd) k
-  | Mint32 =>
-      do rd <- ireg_of dst; transl_addressing 4 addr args (Pldrw rd) k
-  | Mint64 =>
-      do rd <- ireg_of dst; transl_addressing 8 addr args (Pldrx rd) k
-  | Mfloat32 =>
-      do rd <- freg_of dst; transl_addressing 4 addr args (Pldrs rd) k
-  | Mfloat64 =>
-      do rd <- freg_of dst; transl_addressing 8 addr args (Pldrd rd) k
-  | Many32 =>
-      do rd <- ireg_of dst; transl_addressing 4 addr args (Pldrw_a rd) k
-  | Many64 =>
-      do rd <- ireg_of dst; transl_addressing 8 addr args (Pldrx_a rd) k
-  end
-  end.
-
-Definition transl_store (chunk: memory_chunk) (addr: Op.addressing)
-                        (args: list mreg) (src: mreg) (k: code) : res code :=
-  match chunk with
-  | Mint8unsigned | Mint8signed =>
-      do r1 <- ireg_of src; transl_addressing 1 addr args (Pstrb r1) k
-  | Mint16unsigned | Mint16signed =>
-      do r1 <- ireg_of src; transl_addressing 2 addr args (Pstrh r1) k
-  | Mint32 =>
-      do r1 <- ireg_of src; transl_addressing 4 addr args (Pstrw r1) k
-  | Mint64 =>
-      do r1 <- ireg_of src; transl_addressing 8 addr args (Pstrx r1) k
-  | Mfloat32 =>
-      do r1 <- freg_of src; transl_addressing 4 addr args (Pstrs r1) k
-  | Mfloat64 =>
-      do r1 <- freg_of src; transl_addressing 8 addr args (Pstrd r1) k
-  | Many32 =>
-      do r1 <- ireg_of src; transl_addressing 4 addr args (Pstrw_a r1) k
-  | Many64 =>
-      do r1 <- ireg_of src; transl_addressing 8 addr args (Pstrx_a r1) k
+  | PArith (Pfnmul fsz rd r1 r2) => OK (Asm.Pfnmul fsz rd r1 r2)
+
+  | PLoad (PLd_rd_a Pldrw       rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw     rd' a)
+  | PLoad (PLd_rd_a Pldrw_a     rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw_a   rd' a)
+  | PLoad (PLd_rd_a Pldrx       rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx     rd' a)
+  | PLoad (PLd_rd_a Pldrx_a     rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx_a   rd' a)
+  | PLoad (PLd_rd_a (Pldrb sz)  rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrb sz  rd' a)
+  | PLoad (PLd_rd_a (Pldrsb sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsb sz rd' a)
+  | PLoad (PLd_rd_a (Pldrh sz)  rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrh sz  rd' a)
+  | PLoad (PLd_rd_a (Pldrsh sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsh sz rd' a)
+  | PLoad (PLd_rd_a Pldrzw      rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrzw    rd' a)
+  | PLoad (PLd_rd_a Pldrsw      rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsw    rd' a)
+
+  | PLoad (PLd_rd_a Pldrs   rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrs   rd' a)
+  | PLoad (PLd_rd_a Pldrd   rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd   rd' a)
+  | PLoad (PLd_rd_a Pldrd_a rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd_a rd' a)
+
+  | PLoad (Pldp Pldpw rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
+                                    do rd2' <- ireg_of_preg rd2;
+                                    OK (Asm.Pldpw rd1' rd2' chk1 chk2 a)
+  | PLoad (Pldp Pldpx rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
+                                    do rd2' <- ireg_of_preg rd2;
+                                    OK (Asm.Pldpx rd1' rd2' chk1 chk2 a)
+  | PLoad (Pldp Pldps rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1;
+                                    do rd2' <- freg_of_preg rd2;
+                                    OK (Asm.Pldps rd1' rd2' chk1 chk2 a)
+  | PLoad (Pldp Pldpd rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1;
+                                    do rd2' <- freg_of_preg rd2;
+                                    OK (Asm.Pldpd rd1' rd2' chk1 chk2 a)
+
+  | PStore (PSt_rs_a Pstrw   r a) => do r'  <- ireg_of_preg r; OK (Asm.Pstrw   r' a)
+  | PStore (PSt_rs_a Pstrw_a r a) => do r'  <- ireg_of_preg r; OK (Asm.Pstrw_a r' a)
+  | PStore (PSt_rs_a Pstrx   r a) => do r'  <- ireg_of_preg r; OK (Asm.Pstrx   r' a)
+  | PStore (PSt_rs_a Pstrx_a r a) => do r'  <- ireg_of_preg r; OK (Asm.Pstrx_a r' a)
+  | PStore (PSt_rs_a Pstrb   r a) => do r'  <- ireg_of_preg r; OK (Asm.Pstrb   r' a)
+  | PStore (PSt_rs_a Pstrh   r a) => do r'  <- ireg_of_preg r; OK (Asm.Pstrh   r' a)
+
+  | PStore (PSt_rs_a Pstrs   r a) => do r' <- freg_of_preg r; OK (Asm.Pstrs    r' a)
+  | PStore (PSt_rs_a Pstrd   r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd    r' a)
+  | PStore (PSt_rs_a Pstrd_a r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd_a  r' a)
+
+  | PStore (Pstp Pstpw rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
+                                    do rs2' <- ireg_of_preg rs2;
+                                    OK (Asm.Pstpw rs1' rs2' chk1 chk2 a)
+  | PStore (Pstp Pstpx rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
+                                    do rs2' <- ireg_of_preg rs2;
+                                    OK (Asm.Pstpx rs1' rs2' chk1 chk2 a)
+  | PStore (Pstp Pstps rs1 rs2 chk1 chk2 a) => do rs1' <- freg_of_preg rs1;
+                                    do rs2' <- freg_of_preg rs2;
+                                    OK (Asm.Pstps rs1' rs2' chk1 chk2 a)
+  | PStore (Pstp Pstpd rs1 rs2 chk1 chk2 a) => do rs1' <- freg_of_preg rs1;
+                                    do rs2' <- freg_of_preg rs2;
+                                    OK (Asm.Pstpd rs1' rs2' chk1 chk2 a)
+
+  | Pallocframe sz linkofs => OK (Asm.Pallocframe sz linkofs)
+  | Pfreeframe  sz linkofs => OK (Asm.Pfreeframe  sz linkofs)
+
+  | Ploadsymbol rd id      => OK (Asm.Ploadsymbol rd id)
+
+  | Pcvtsw2x rd r1         => OK (Asm.Pcvtsw2x rd r1)
+
+  | Pcvtuw2x rd r1         => OK (Asm.Pcvtuw2x rd r1)
+
+  | Pcvtx2w  rd            => OK (Asm.Pcvtx2w rd)
+  | Pnop                   => OK (Asm.Pnop)
   end.
 
-(** Register-indexed loads and stores *)
-
-Definition indexed_memory_access (insn: Asm.addressing -> instruction)
-                                 (sz: Z) (base: iregsp) (ofs: ptrofs) (k: code) :=
-  let ofs := Ptrofs.to_int64 ofs in
-  if offset_representable sz ofs
-  then insn (ADimm base ofs) :: k
-  else loadimm64 X16 ofs (insn (ADreg base X16) :: k).
-
-Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: code) :=
-  match ty, preg_of dst with
-  | Tint,    IR rd => OK (indexed_memory_access (Pldrw rd) 4 base ofs k)
-  | Tlong,   IR rd => OK (indexed_memory_access (Pldrx rd) 8 base ofs k)
-  | Tsingle, FR rd => OK (indexed_memory_access (Pldrs rd) 4 base ofs k)
-  | Tfloat,  FR rd => OK (indexed_memory_access (Pldrd rd) 8 base ofs k)
-  | Tany32,  IR rd => OK (indexed_memory_access (Pldrw_a rd) 4 base ofs k)
-  | Tany64,  IR rd => OK (indexed_memory_access (Pldrx_a rd) 8 base ofs k)
-  | Tany64,  FR rd => OK (indexed_memory_access (Pldrd_a rd) 8 base ofs k)
-  | _, _           => Error (msg "Asmgen.loadind")
+Definition cf_instruction_to_instruction (cfi: cf_instruction) : Asm.instruction :=
+  match cfi with
+  | Pb l => Asm.Pb l
+  | Pbc c lbl => Asm.Pbc c lbl
+  | Pbl id sg => Asm.Pbl id sg
+  | Pbs id sg => Asm.Pbs id sg
+  | Pblr r sg => Asm.Pblr r sg
+  | Pbr r sg => Asm.Pbr r sg
+  | Pret r => Asm.Pret r
+  | Pcbnz sz r lbl => Asm.Pcbnz sz r lbl
+  | Pcbz sz r lbl => Asm.Pcbz sz r lbl
+  | Ptbnz sz r n lbl => Asm.Ptbnz sz r n lbl
+  | Ptbz sz r n lbl => Asm.Ptbz sz r n lbl
+  | Pbtbl r1 tbl => Asm.Pbtbl r1 tbl
   end.
 
-Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: code) :=
-  match ty, preg_of src with
-  | Tint,    IR rd => OK (indexed_memory_access (Pstrw rd) 4 base ofs k)
-  | Tlong,   IR rd => OK (indexed_memory_access (Pstrx rd) 8 base ofs k)
-  | Tsingle, FR rd => OK (indexed_memory_access (Pstrs rd) 4 base ofs k)
-  | Tfloat,  FR rd => OK (indexed_memory_access (Pstrd rd) 8 base ofs k)
-  | Tany32,  IR rd => OK (indexed_memory_access (Pstrw_a rd) 4 base ofs k)
-  | Tany64,  IR rd => OK (indexed_memory_access (Pstrx_a rd) 8 base ofs k)
-  | Tany64,  FR rd => OK (indexed_memory_access (Pstrd_a rd) 8 base ofs k)
-  | _, _           => Error (msg "Asmgen.storeind")
+Definition control_to_instruction (c: control) :=
+  match c with
+  | PCtlFlow i => cf_instruction_to_instruction i
+  | Pbuiltin ef args res => Asm.Pbuiltin ef (List.map (map_builtin_arg DR) args) (map_builtin_res DR res)
   end.
 
-Definition loadptr (base: iregsp) (ofs: ptrofs) (dst: ireg) (k: code) :=
-  indexed_memory_access (Pldrx dst) 8 base ofs k.
-
-Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) :=
-  indexed_memory_access (Pstrx src) 8 base ofs k.
-
-(** Function epilogue *)
-
-Definition make_epilogue (f: Mach.function) (k: code) :=
-  (* FIXME
-     Cannot be used because memcpy destroys X30;
-     issue being discussed with X. Leroy *)
-  (* if is_leaf_function f
-  then Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k
-  else*) loadptr XSP f.(fn_retaddr_ofs) RA
-         (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k).
-  
-(** Translation of a Mach instruction. *)
-
-Definition transl_instr (f: Mach.function) (i: Mach.instruction)
-                        (r29_is_parent: bool) (k: code) : res code :=
-  match i with
-  | Mgetstack ofs ty dst =>
-      loadind XSP ofs ty dst k
-  | Msetstack src ofs ty =>
-      storeind src XSP ofs ty k
-  | Mgetparam ofs ty dst =>
-      (* load via the frame pointer if it is valid *)
-      do c <- loadind X29 ofs ty dst k;
-      OK (if r29_is_parent then c else loadptr XSP f.(fn_link_ofs) X29 c)
-  | Mop op args res =>
-      transl_op op args res k
-  | Mload trap chunk addr args dst =>
-      transl_load trap chunk addr args dst k
-  | Mstore chunk addr args src =>
-      transl_store chunk addr args src k
-  | Mcall sig (inl r) =>
-      do r1 <- ireg_of r; OK (Pblr r1 sig :: k)
-  | Mcall sig (inr symb) =>
-      OK (Pbl symb sig :: k)
-  | Mtailcall sig (inl r) =>
-      do r1 <- ireg_of r;
-      OK (make_epilogue f (Pbr r1 sig :: k))
-  | Mtailcall sig (inr symb) =>
-      OK (make_epilogue f (Pbs symb sig :: k))
-  | Mbuiltin ef args res =>
-      OK (Pbuiltin ef (List.map (map_builtin_arg preg_of) args) (map_builtin_res preg_of res) :: k)
-  | Mlabel lbl =>
-      OK (Plabel lbl :: k)
-  | Mgoto lbl =>
-      OK (Pb lbl :: k)
-  | Mcond cond args lbl =>
-      transl_cond_branch cond args lbl k
-  | Mjumptable arg tbl =>
-      do r <- ireg_of arg;
-      OK (Pbtbl r tbl :: k)
-  | Mreturn =>
-      OK (make_epilogue f (Pret RA :: k))
-  end.
-
-(** Translation of a code sequence *)
-
-Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
-  match i with
-  | Msetstack src ofs ty => before
-  | Mgetparam ofs ty dst => negb (mreg_eq dst R29)
-  | Mop op args res => before && negb (mreg_eq res R29)
-  | _ => false
+Fixpoint unfold_label (ll: list label) :=
+  match ll with
+  | nil => nil
+  | l :: ll => Plabel l :: unfold_label ll
   end.
 
-(** This is the naive definition that we no longer use because it
-  is not tail-recursive.  It is kept as specification. *)
-
-Fixpoint transl_code (f: Mach.function) (il: list Mach.instruction) (it1p: bool) :=
-  match il with
+Fixpoint unfold_body (lb: list basic) : res Asm.code :=
+  match lb with
   | nil => OK nil
-  | i1 :: il' =>
-      do k <- transl_code f il' (it1_is_parent it1p i1);
-      transl_instr f i1 it1p k
+  | b :: lb =>
+    (* x_is: x's instructions *)
+    do b_is  <- basic_to_instruction b;
+    do lb_is <- unfold_body lb;
+    OK (b_is :: lb_is)
   end.
 
-(** This is an equivalent definition in continuation-passing style
-  that runs in constant stack space. *)
-
-Fixpoint transl_code_rec (f: Mach.function) (il: list Mach.instruction)
-                         (it1p: bool) (k: code -> res code) :=
-  match il with
-  | nil => k nil
-  | i1 :: il' =>
-      transl_code_rec f il' (it1_is_parent it1p i1)
-        (fun c1 => do c2 <- transl_instr f i1 it1p c1; k c2)
+Definition unfold_exit (oc: option control) :=
+  match oc with
+  | None => nil
+  | Some c => control_to_instruction c :: nil
   end.
 
-Definition transl_code' (f: Mach.function) (il: list Mach.instruction) (it1p: bool) :=
-  transl_code_rec f il it1p (fun c => OK c).
+Definition unfold_bblock (bb: bblock) :=
+  let lbl := unfold_label (header bb) in
+    (*
+     * With this dynamically checked assumption on a previous optimization we
+     * can show that [Asmblock.label_pos] and [Asm.label_pos] retrieve the same
+     * exact address. Maintaining this property allows us to use the simple
+     * formulation of match_states defined as equality.
+     * Otherwise we would have to deal with the case of a basic block header
+     * that has multiple labels. Asmblock.label_pos will, for all labels, point
+     * to the same location at the beginning of the basic block. Asm.label_pos
+     * on the other hand could return a position pointing into the original
+     * basic block.
+     *)
+  if zle (list_length_z (header bb)) 1 then
+    do bo_is <- unfold_body (body bb);
+    OK (lbl ++ bo_is ++ unfold_exit (exit bb))
+  else
+    Error (msg "Asmgen.unfold_bblock: Multiple labels were generated.").
+
+Fixpoint unfold (bbs: Asmblock.bblocks) : res Asm.code :=
+  match bbs with
+  | nil => OK (nil)
+  | bb :: bbs' =>
+    do bb_is <- unfold_bblock bb;
+    do bbs'_is <- unfold bbs';
+    OK (bb_is ++ bbs'_is)
+  end.
 
-(** Translation of a whole function.  Note that we must check
-  that the generated code contains less than [2^32] instructions,
-  otherwise the offset part of the [PC] code pointer could wrap
-  around, leading to incorrect executions. *)
+Definition transf_function (f: Asmblock.function) : res Asm.function :=
+  do c <- unfold (Asmblock.fn_blocks f);
+  if zlt Ptrofs.max_unsigned (list_length_z c)
+  then Error (msg "Asmgen.trans_function: code size exceeded")
+  else OK {| Asm.fn_sig := Asmblock.fn_sig f; Asm.fn_code := c |}.
 
-Definition transl_function (f: Mach.function) :=
-  do c <- transl_code' f f.(Mach.fn_code) true;
-  OK (mkfunction f.(Mach.fn_sig)
-        (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
-         storeptr RA XSP f.(fn_retaddr_ofs) c)).
+Definition transf_fundef (f: Asmblock.fundef) : res Asm.fundef :=
+  transf_partial_fundef transf_function f.
 
-Definition transf_function (f: Mach.function) : res Asm.function :=
-  do tf <- transl_function f;
-  if zlt Ptrofs.max_unsigned (list_length_z tf.(fn_code))
-  then Error (msg "code size exceeded")
-  else OK tf.
+Definition transf_program (p: Asmblock.program) : res Asm.program :=
+  transform_partial_program transf_fundef p.
 
-Definition transf_fundef (f: Mach.fundef) : res Asm.fundef :=
-  transf_partial_fundef transf_function f.
+End Asmblock_TRANSF.
 
 Definition transf_program (p: Mach.program) : res Asm.program :=
-  transform_partial_program transf_fundef p.
+  let mbp := Machblockgen.transf_program p in
+  do abp <- Asmblockgen.transf_program mbp;
+  do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp;
+  Asmblock_TRANSF.transf_program abp'.
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index 6831509f..d27b3f8c 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -1,24 +1,35 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*         Xavier Leroy, Collège de France and INRIA Paris             *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Correctness proof for AArch64 code generation. *)
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*           Justus Fasse       UGA, VERIMAG                   *)
+(*           Xavier Leroy       INRIA Paris-Rocquencourt       *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
 
 Require Import Coqlib Errors.
 Require Import Integers Floats AST Linking.
 Require Import Values Memory Events Globalenvs Smallstep.
-Require Import Op Locations Mach Conventions Asm.
-Require Import Asmgen Asmgenproof0 Asmgenproof1.
+Require Import Op Locations Machblock Conventions Asm Asmblock.
+Require Machblockgenproof Asmblockgenproof PostpassSchedulingproof.
+Require Import Asmgen.
+Require Import Axioms.
+Require Import IterList.
+Require Import Ring Lia.
 
-Definition match_prog (p: Mach.program) (tp: Asm.program) :=
+Module Asmblock_PRESERVATION.
+
+Import Asmblock_TRANSF.
+
+Definition match_prog (p: Asmblock.program) (tp: Asm.program) :=
   match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
 
 Lemma transf_program_match:
@@ -29,20 +40,36 @@ Qed.
 
 Section PRESERVATION.
 
-Variable prog: Mach.program.
+Variable prog: Asmblock.program.
 Variable tprog: Asm.program.
 Hypothesis TRANSF: match_prog prog tprog.
 Let ge := Genv.globalenv prog.
 Let tge := Genv.globalenv tprog.
 
+Definition lk :aarch64_linker := {| Asmblock.symbol_low:=Asm.symbol_low tge; Asmblock.symbol_high:=Asm.symbol_high tge|}.
+
 Lemma symbols_preserved:
   forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
 Proof (Genv.find_symbol_match TRANSF).
 
+Lemma symbol_addresses_preserved:
+  forall (s: ident) (ofs: ptrofs),
+  Genv.symbol_address tge s ofs = Genv.symbol_address ge s ofs.
+Proof.
+  intros; unfold Genv.symbol_address; rewrite symbols_preserved; reflexivity.
+Qed.
+
 Lemma senv_preserved:
   Senv.equiv ge tge.
 Proof (Genv.senv_match TRANSF).
 
+Lemma symbol_high_low: forall (id: ident) (ofs: ptrofs),
+  Val.addl (Asmblock.symbol_high lk id ofs) (Asmblock.symbol_low lk id ofs) = Genv.symbol_address ge id ofs.
+Proof.
+  unfold lk; simpl. intros; rewrite Asm.symbol_high_low; unfold Genv.symbol_address;
+  rewrite symbols_preserved; reflexivity.
+Qed.
+
 Lemma functions_translated:
   forall b f,
   Genv.find_funct_ptr ge b = Some f ->
@@ -50,1052 +77,2242 @@ Lemma functions_translated:
   Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
 Proof (Genv.find_funct_ptr_transf_partial TRANSF).
 
-Lemma functions_transl:
-  forall fb f tf,
+Lemma internal_functions_translated:
+  forall b f,
+  Genv.find_funct_ptr ge b = Some (Internal f) ->
+  exists tf,
+  Genv.find_funct_ptr tge b = Some (Internal tf) /\ transf_function f = OK tf.
+Proof.
+  intros; exploit functions_translated; eauto.
+  intros (x & FIND & TRANSf).
+  apply bind_inversion in TRANSf.
+  destruct TRANSf as (tf & TRANSf & X).
+  inv X.
+  eauto.
+Qed.
+
+Lemma internal_functions_unfold:
+  forall b f,
+  Genv.find_funct_ptr ge b = Some (Internal f) ->
+  exists tc,
+  Genv.find_funct_ptr tge b = Some (Internal (Asm.mkfunction (fn_sig f) tc))
+  /\ unfold (fn_blocks f) = OK tc
+  /\ list_length_z tc <= Ptrofs.max_unsigned.
+Proof.
+  intros.
+  exploit internal_functions_translated; eauto.
+  intros (tf & FINDtf & TRANStf).
+  unfold transf_function in TRANStf.
+  monadInv TRANStf.
+  destruct (zlt _ _); try congruence.
+  inv EQ. inv EQ0.
+  eexists; intuition eauto.
+  lia.
+Qed.
+
+
+Inductive is_nth_inst (bb: bblock) (n:Z) (i:Asm.instruction): Prop :=
+  | is_nth_label l:
+     list_nth_z (header bb) n = Some l ->
+     i = Asm.Plabel l ->
+     is_nth_inst bb n i
+  | is_nth_basic bi:
+     list_nth_z (body bb) (n - list_length_z (header bb)) = Some bi ->
+     basic_to_instruction bi = OK i ->
+     is_nth_inst bb n i
+  | is_nth_ctlflow cfi:
+     (exit bb) = Some cfi ->
+     n = size bb - 1 ->
+     i = control_to_instruction cfi ->
+     is_nth_inst bb n i.
+
+(* Asmblock and Asm share the same definition of state *)
+Definition match_states (s1 s2 : state) := s1 = s2.
+
+Inductive match_internal: forall n, state -> state -> Prop :=
+  | match_internal_intro n rs1 m1 rs2 m2
+    (MEM: m1 = m2)
+    (AG: forall r, r <> PC -> rs1 r = rs2 r)
+    (AGPC: Val.offset_ptr (rs1 PC) (Ptrofs.repr n) = rs2 PC)
+    : match_internal n (State rs1 m1) (State rs2 m2).
+
+Lemma match_internal_set_parallel:
+  forall n rs1 m1 rs2 m2 r val,
+  match_internal n (State rs1 m1) (State rs2 m2) ->
+  r <> PC ->
+  match_internal n (State (rs1#r <- val) m1) (State (rs2#r <- val ) m2).
+Proof.
+  intros n rs1 m1 rs2 m2 r v MI.
+  inversion MI; constructor; auto.
+  - intros r' NOTPC.
+    unfold Pregmap.set; rewrite AG. reflexivity. assumption.
+  - unfold Pregmap.set; destruct (PregEq.eq PC r); congruence.
+Qed.
+
+Lemma agree_match_states:
+  forall rs1 m1 rs2 m2,
+  match_states (State rs1 m1) (State rs2 m2) ->
+  forall r : preg, rs1#r = rs2#r.
+Proof.
+  intros.
+  unfold match_states in *.
+  assert (rs1 = rs2) as EQ. { congruence. }
+  rewrite EQ. reflexivity.
+Qed.
+
+Lemma match_states_set_parallel:
+  forall rs1 m1 rs2 m2 r v,
+  match_states (State rs1 m1) (State rs2 m2) ->
+  match_states (State (rs1#r <- v) m1) (State (rs2#r <- v) m2).
+Proof.
+  intros; unfold match_states in *.
+  assert (rs1 = rs2) as RSEQ. { congruence. }
+  assert (m1 = m2) as MEQ. { congruence. }
+  rewrite RSEQ in *; rewrite MEQ in *; unfold Pregmap.set; reflexivity.
+Qed.
+
+(* match_internal from match_states *)
+Lemma mi_from_ms:
+  forall rs1 m1 rs2 m2 b ofs,
+  match_states (State rs1 m1) (State rs2 m2) ->
+  rs1#PC = Vptr b ofs ->
+  match_internal 0 (State rs1 m1) (State rs2 m2).
+Proof.
+  intros rs1 m1 rs2 m2 b ofs MS PCVAL.
+  inv MS; constructor; auto; unfold Val.offset_ptr;
+  rewrite PCVAL; rewrite Ptrofs.add_zero; reflexivity.
+Qed.
+
+Lemma transf_initial_states:
+  forall s1, Asmblock.initial_state prog s1 ->
+  exists s2, Asm.initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+  intros ? INIT_s1.
+  inversion INIT_s1 as (m, ?, ge0, rs). unfold ge0 in *.
+  econstructor; split.
+  - econstructor.
+    eapply (Genv.init_mem_transf_partial TRANSF); eauto.
+  - rewrite (match_program_main TRANSF); rewrite symbol_addresses_preserved.
+    unfold rs; reflexivity.
+Qed.
+
+Lemma transf_final_states:
+  forall s1 s2 r,
+  match_states s1 s2 -> Asmblock.final_state s1 r -> Asm.final_state s2 r.
+Proof.
+  intros s1 s2 r MATCH FINAL_s1.
+  inv FINAL_s1; inv MATCH; constructor; assumption.
+Qed.
+
+Definition max_pos (f : Asm.function) := list_length_z f.(Asm.fn_code).
+
+Lemma functions_bound_max_pos: forall fb f tf,
   Genv.find_funct_ptr ge fb = Some (Internal f) ->
   transf_function f = OK tf ->
-  Genv.find_funct_ptr tge fb = Some (Internal tf).
-Proof.
-  intros. exploit functions_translated; eauto. intros [tf' [A B]].
-  monadInv B. rewrite H0 in EQ; inv EQ; auto.
-Qed.
-
-(** * Properties of control flow *)
-
-Lemma transf_function_no_overflow:
-  forall f tf,
-  transf_function f = OK tf -> list_length_z tf.(fn_code) <= Ptrofs.max_unsigned.
-Proof.
-  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0.
-  omega.
-Qed.
-
-Lemma exec_straight_exec:
-  forall fb f c ep tf tc c' rs m rs' m',
-  transl_code_at_pc ge (rs PC) fb f c ep tf tc ->
-  exec_straight tge tf tc rs m c' rs' m' ->
-  plus step tge (State rs m) E0 (State rs' m').
-Proof.
-  intros. inv H.
-  eapply exec_straight_steps_1; eauto.
-  eapply transf_function_no_overflow; eauto.
-  eapply functions_transl; eauto.
-Qed.
-
-Lemma exec_straight_at:
-  forall fb f c ep tf tc c' ep' tc' rs m rs' m',
-  transl_code_at_pc ge (rs PC) fb f c ep tf tc ->
-  transl_code f c' ep' = OK tc' ->
-  exec_straight tge tf tc rs m tc' rs' m' ->
-  transl_code_at_pc ge (rs' PC) fb f c' ep' tf tc'.
-Proof.
-  intros. inv H.
-  exploit exec_straight_steps_2; eauto.
-  eapply transf_function_no_overflow; eauto.
-  eapply functions_transl; eauto.
-  intros [ofs' [PC' CT']].
-  rewrite PC'. constructor; auto.
-Qed.
-
-(** The following lemmas show that the translation from Mach to Asm
-  preserves labels, in the sense that the following diagram commutes:
-<<
-                          translation
-        Mach code ------------------------ Asm instr sequence
-            |                                          |
-            | Mach.find_label lbl       find_label lbl |
-            |                                          |
-            v                                          v
-        Mach code tail ------------------- Asm instr seq tail
-                          translation
->>
-  The proof demands many boring lemmas showing that Asm constructor
-  functions do not introduce new labels.
-*)
+  max_pos tf <= Ptrofs.max_unsigned.
+Proof.
+  intros fb f tf FINDf TRANSf.
+  unfold transf_function in TRANSf.
+  apply bind_inversion in TRANSf.
+  destruct TRANSf as (c & TRANSf).
+  destruct TRANSf as (_ & TRANSf).
+  destruct (zlt _ _).
+  - inversion TRANSf.
+  - unfold max_pos.
+    assert (Asm.fn_code tf = c) as H. { inversion TRANSf as (H'); auto. }
+    rewrite H; lia.
+Qed.
 
-Section TRANSL_LABEL.
+Lemma one_le_max_unsigned:
+  1 <= Ptrofs.max_unsigned.
+Proof.
+  unfold Ptrofs.max_unsigned; simpl; unfold Ptrofs.wordsize;
+  unfold Wordsize_Ptrofs.wordsize; destruct Archi.ptr64; simpl; lia.
+Qed.
 
-Remark loadimm_z_label: forall sz rd l k, tail_nolabel k (loadimm_z sz rd l k).
-Proof. 
-  intros; destruct l as [ | [n1 p1] l]; simpl; TailNoLabel.
-  induction l as [ | [n p] l]; simpl; TailNoLabel.
+(* NB: does not seem useful anymore, with the [exec_header_simulation] proof below
+Lemma match_internal_exec_label:
+  forall n rs1 m1 rs2 m2 l fb f tf,
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  transf_function f = OK tf ->
+  match_internal n (State rs1 m1) (State rs2 m2) ->
+  n >= 0 ->
+  (* There is no step if n is already max_pos *)
+  n < (max_pos tf) ->
+  exists rs2' m2', Asm.exec_instr tge tf (Asm.Plabel l) rs2 m2 = Next rs2' m2'
+                   /\ match_internal (n+1) (State rs1 m1) (State rs2' m2').
+Proof.
+  intros. (* XXX auto generated names *)
+  unfold Asm.exec_instr.
+  eexists; eexists; split; eauto.
+  inversion H1; constructor; auto.
+  - intros; unfold Asm.nextinstr; unfold Pregmap.set;
+    destruct (PregEq.eq r PC); auto; contradiction.
+  - unfold Asm.nextinstr; rewrite Pregmap.gss; unfold Ptrofs.one.
+    rewrite <- AGPC; rewrite Val.offset_ptr_assoc; unfold Ptrofs.add;
+    rewrite Ptrofs.unsigned_repr. rewrite Ptrofs.unsigned_repr; trivial.
+    + split.
+      * apply Z.le_0_1.
+      * apply one_le_max_unsigned.
+    + split.
+      * apply Z.ge_le; assumption.
+      * rewrite <- functions_bound_max_pos; eauto; lia.
 Qed.
+*)
 
-Remark loadimm_n_label: forall sz rd l k, tail_nolabel k (loadimm_n sz rd l k).
-Proof. 
-  intros; destruct l as [ | [n1 p1] l]; simpl; TailNoLabel.
-  induction l as [ | [n p] l]; simpl; TailNoLabel.
+Lemma incrPC_agree_but_pc:
+  forall rs r ofs,
+  r <> PC ->
+  (incrPC ofs rs)#r = rs#r.
+Proof.
+  intros rs r ofs NOTPC.
+  unfold incrPC; unfold Pregmap.set; destruct (PregEq.eq r PC).
+  - contradiction.
+  - reflexivity.
 Qed.
 
-Remark loadimm_label: forall sz rd n k, tail_nolabel k (loadimm sz rd n k).
+Lemma bblock_non_empty bb: body bb <> nil \/ exit bb <> None.
 Proof.
-  unfold loadimm; intros. destruct Nat.leb; [apply loadimm_z_label|apply loadimm_n_label].
+  destruct bb. simpl.
+  unfold non_empty_bblockb in correct.
+  unfold non_empty_body, non_empty_exit, Is_true in correct.
+  destruct body, exit.
+  - right. discriminate.
+  - contradiction.
+  - right. discriminate.
+  - left. discriminate.
 Qed.
-Hint Resolve loadimm_label: labels.
 
-Remark loadimm32_label: forall r n k, tail_nolabel k (loadimm32 r n k).
+Lemma list_length_z_aux_increase A (l: list A): forall acc,
+  list_length_z_aux l acc >= acc.
 Proof.
-  unfold loadimm32; intros. destruct (is_logical_imm32 n); TailNoLabel.
+  induction l; simpl; intros.
+  - lia.
+  - generalize (IHl (Z.succ acc)). lia.
 Qed.
-Hint Resolve loadimm32_label: labels.
 
-Remark loadimm64_label: forall r n k, tail_nolabel k (loadimm64 r n k).
+Lemma bblock_size_aux_pos bb: list_length_z (body bb) + Z.of_nat (length_opt (exit bb)) >= 1.
 Proof.
-  unfold loadimm64; intros. destruct (is_logical_imm64 n); TailNoLabel.
+  destruct (bblock_non_empty bb), (body bb) as [|hd tl], (exit bb); simpl;
+  try (congruence || lia);
+  unfold list_length_z; simpl;
+  generalize (list_length_z_aux_increase _ tl 1); lia.
 Qed.
-Hint Resolve loadimm64_label: labels.
 
-Remark addimm_aux: forall insn rd r1 n k,
-  (forall rd r1 n, nolabel (insn rd r1 n)) ->
-  tail_nolabel k (addimm_aux insn rd r1 n k).
+
+Lemma list_length_add_acc A (l : list A) acc:
+  list_length_z_aux l acc = (list_length_z l) + acc.
 Proof.
-  unfold addimm_aux; intros. 
-  destruct Z.eqb. TailNoLabel. destruct Z.eqb; TailNoLabel.
+    unfold list_length_z, list_length_z_aux. simpl.
+    fold list_length_z_aux.
+    rewrite (list_length_z_aux_shift l acc 0).
+    lia.
 Qed.
 
-Remark addimm32_label: forall rd r1 n k, tail_nolabel k (addimm32 rd r1 n k).
+Lemma list_length_z_cons A hd (tl : list A):
+  list_length_z (hd :: tl) = list_length_z tl + 1.
 Proof.
-  unfold addimm32; intros. 
-  destruct Int.eq. apply addimm_aux; intros; red; auto.
-  destruct Int.eq. apply addimm_aux; intros; red; auto.
-  destruct Int.lt; eapply tail_nolabel_trans; TailNoLabel.
+  unfold list_length_z; simpl; rewrite list_length_add_acc; reflexivity.
 Qed.
-Hint Resolve addimm32_label: labels.
 
-Remark addimm64_label: forall rd r1 n k, tail_nolabel k (addimm64 rd r1 n k).
+Lemma bblock_size_aux bb: size bb = list_length_z (header bb) + list_length_z (body bb) + Z.of_nat (length_opt (exit bb)).
 Proof.
-  unfold addimm64; intros. 
-  destruct Int64.eq. apply addimm_aux; intros; red; auto.
-  destruct Int64.eq. apply addimm_aux; intros; red; auto.
-  destruct Int64.lt; eapply tail_nolabel_trans; TailNoLabel.
+  unfold size.
+  repeat (rewrite list_length_z_nat). repeat (rewrite Nat2Z.inj_add). reflexivity.
 Qed.
-Hint Resolve addimm64_label: labels.
 
-Remark logicalimm32_label: forall insn1 insn2 rd r1 n k,
-  (forall rd r1 n, nolabel (insn1 rd r1 n)) ->
-  (forall rd r1 r2 s, nolabel (insn2 rd r1 r2 s)) ->
-  tail_nolabel k (logicalimm32 insn1 insn2 rd r1 n k).
+Lemma header_size_lt_block_size bb:
+  list_length_z (header bb) < size bb.
 Proof.
-  unfold logicalimm32; intros.
-  destruct (is_logical_imm32 n). TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+  rewrite bblock_size_aux.
+  generalize (bblock_non_empty bb); intros NEMPTY; destruct NEMPTY as [HDR|EXIT].
+  - destruct (body bb); try contradiction; rewrite list_length_z_cons;
+    repeat rewrite list_length_z_nat; lia.
+  - destruct (exit bb); try contradiction; simpl; repeat rewrite list_length_z_nat; lia.
 Qed.
 
-Remark logicalimm64_label: forall insn1 insn2 rd r1 n k,
-  (forall rd r1 n, nolabel (insn1 rd r1 n)) ->
-  (forall rd r1 r2 s, nolabel (insn2 rd r1 r2 s)) ->
-  tail_nolabel k (logicalimm64 insn1 insn2 rd r1 n k).
+Lemma body_size_le_block_size bb:
+  list_length_z (body bb) <= size bb.
 Proof.
-  unfold logicalimm64; intros.
-  destruct (is_logical_imm64 n). TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+  rewrite bblock_size_aux; repeat rewrite list_length_z_nat; lia.
 Qed.
 
-Remark move_extended_label: forall rd r1 ex a k, tail_nolabel k (move_extended rd r1 ex a k).
+
+Lemma bblock_size_pos bb: size bb >= 1.
 Proof.
-  unfold move_extended, move_extended_base; intros. destruct Int.eq, ex; TailNoLabel.
+  rewrite (bblock_size_aux bb).
+  generalize (bblock_size_aux_pos bb).
+  generalize (list_length_z_pos (header bb)).
+  lia.
 Qed.
-Hint Resolve move_extended_label: labels.
 
-Remark arith_extended_label: forall insnX insnS rd r1 r2 ex a k,
-  (forall rd r1 r2 x, nolabel (insnX rd r1 r2 x)) ->
-  (forall rd r1 r2 s, nolabel (insnS rd r1 r2 s)) ->
-  tail_nolabel k (arith_extended insnX insnS rd r1 r2 ex a k).
+Lemma unfold_car_cdr bb bbs tc:
+  unfold (bb :: bbs) = OK tc ->
+  exists tbb tc', unfold_bblock bb = OK tbb
+                  /\ unfold bbs = OK tc'
+                  /\ unfold (bb :: bbs) = OK (tbb ++ tc').
 Proof.
-  unfold arith_extended; intros. destruct Int.ltu.
-  TailNoLabel.
-  destruct ex; simpl; TailNoLabel.
+  intros UNFOLD.
+  assert (UF := UNFOLD).
+  unfold unfold in UNFOLD.
+  apply bind_inversion in UNFOLD. destruct UNFOLD as (? & UBB). destruct UBB as (UBB & REST).
+  apply bind_inversion in REST. destruct REST as (? & UNFOLD').
+  fold unfold in UNFOLD'. destruct UNFOLD' as (UNFOLD' & UNFOLD).
+  rewrite <- UNFOLD in UF.
+  eauto.
 Qed.
 
-Remark loadsymbol_label: forall r id ofs k, tail_nolabel k (loadsymbol r id ofs k).
+Lemma unfold_cdr bb bbs tc:
+  unfold (bb :: bbs) = OK tc ->
+  exists tc', unfold bbs = OK tc'.
 Proof.
-  intros; unfold loadsymbol.
-  destruct (Archi.pic_code tt); TailNoLabel. destruct Ptrofs.eq; TailNoLabel.
-Qed. 
-Hint Resolve loadsymbol_label: labels.
+  intros; exploit unfold_car_cdr; eauto. intros (_ & ? & _ & ? & _).
+  eexists; eauto.
+Qed.
 
-Remark transl_cond_label: forall cond args k c,
-  transl_cond cond args k = OK c -> tail_nolabel k c.
+Lemma unfold_car bb bbs tc:
+  unfold (bb :: bbs) = OK tc ->
+  exists tbb, unfold_bblock bb = OK tbb.
 Proof.
-  unfold transl_cond; intros; destruct cond; TailNoLabel.
-- destruct is_arith_imm32; TailNoLabel. destruct is_arith_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_arith_imm32; TailNoLabel. destruct is_arith_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_logical_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_logical_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_arith_imm64; TailNoLabel. destruct is_arith_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_arith_imm64; TailNoLabel. destruct is_arith_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_logical_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_logical_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+  intros; exploit unfold_car_cdr; eauto. intros (? & _ & ? & _ & _).
+  eexists; eauto.
 Qed.
 
-Remark transl_cond_branch_default_label: forall cond args lbl k c,
-  transl_cond_branch_default cond args lbl k = OK c -> tail_nolabel k c.
+Lemma all_blocks_translated:
+  forall bbs tc,
+  unfold bbs = OK tc ->
+  forall bb, In bb bbs ->
+  exists c, unfold_bblock bb = OK c.
 Proof.
-  unfold transl_cond_branch_default; intros. 
-  eapply tail_nolabel_trans; [eapply transl_cond_label;eauto|TailNoLabel].
+  induction bbs as [| bb bbs IHbbs].
+  - contradiction.
+  - intros ? UNFOLD ? IN.
+    (* unfold proceeds by unfolding the basic block at the head of the list and
+     * then recurring *)
+    exploit unfold_car_cdr; eauto. intros (? & ? & ? & ? & _).
+    (* basic block is either in head or tail *)
+    inversion IN as [EQ | NEQ].
+    + rewrite <- EQ; eexists; eauto.
+    + eapply IHbbs; eauto.
 Qed.
-Hint Resolve transl_cond_branch_default_label: labels.
 
-Remark transl_cond_branch_label: forall cond args lbl k c,
-  transl_cond_branch cond args lbl k = OK c -> tail_nolabel k c.
+Lemma entire_body_translated:
+  forall lbi tc,
+  unfold_body lbi = OK tc ->
+  forall bi, In bi lbi ->
+  exists bi', basic_to_instruction bi = OK bi'.
 Proof.
-  unfold transl_cond_branch; intros; destruct args; TailNoLabel; destruct cond; TailNoLabel.
-- destruct c0; TailNoLabel.
-- destruct c0; TailNoLabel.
-- destruct (Int.is_power2 n); TailNoLabel.
-- destruct (Int.is_power2 n); TailNoLabel.
-- destruct c0; TailNoLabel.
-- destruct c0; TailNoLabel.
-- destruct (Int64.is_power2' n); TailNoLabel.
-- destruct (Int64.is_power2' n); TailNoLabel.
+  induction lbi as [| a lbi IHlbi].
+  - intros. contradiction.
+  - intros tc UNFOLD_BODY bi IN.
+    unfold unfold_body in UNFOLD_BODY. apply bind_inversion in UNFOLD_BODY.
+    destruct UNFOLD_BODY as (? & TRANSbi & REST).
+    apply bind_inversion in REST. destruct REST as (? & UNFOLD_BODY' & ?).
+    fold unfold_body in UNFOLD_BODY'.
+
+    inversion IN as [EQ | NEQ].
+    + rewrite <- EQ; eauto.
+    + eapply IHlbi; eauto.
 Qed.
 
-Remark transl_op_label:
-  forall op args r k c,
-  transl_op op args r k = OK c -> tail_nolabel k c.
+Lemma bblock_in_bblocks bbs bb: forall
+  tc pos
+  (UNFOLD: unfold bbs = OK tc)
+  (FINDBB: find_bblock pos bbs = Some bb),
+  In bb bbs.
 Proof.
-  unfold transl_op; intros; destruct op; TailNoLabel.
-- destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel.
-- destruct (Float.eq_dec n Float.zero); TailNoLabel.
-- destruct (Float32.eq_dec n Float32.zero); TailNoLabel.
-- apply logicalimm32_label; unfold nolabel; auto.
-- apply logicalimm32_label; unfold nolabel; auto.
-- apply logicalimm32_label; unfold nolabel; auto.
-- unfold shrx32. destruct (Int.eq _ _); try destruct (Int.eq _ _); TailNoLabel.
-- apply arith_extended_label; unfold nolabel; auto.
-- apply arith_extended_label; unfold nolabel; auto.
-- apply logicalimm64_label; unfold nolabel; auto.
-- apply logicalimm64_label; unfold nolabel; auto.
-- apply logicalimm64_label; unfold nolabel; auto.
-- unfold shrx64. destruct (Int.eq _ _); try destruct (Int.eq _ _); TailNoLabel.
-- eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel.
-- destruct (preg_of r); try discriminate; TailNoLabel;
-  (eapply tail_nolabel_trans; [eapply transl_cond_label; eauto | TailNoLabel]).
+  induction bbs as [| b bbs IH].
+  - intros. inversion FINDBB.
+  - destruct pos.
+    + intros. inversion FINDBB as (EQ). rewrite <- EQ. apply in_eq.
+    + intros.
+      exploit unfold_cdr; eauto. intros (tc' & UNFOLD').
+      unfold find_bblock in FINDBB. simpl in FINDBB.
+      fold find_bblock in FINDBB.
+      apply in_cons. eapply IH; eauto.
+    + intros. inversion FINDBB.
 Qed.
 
-Remark transl_addressing_label:
-  forall sz addr args insn k c,
-  transl_addressing sz addr args insn k = OK c ->
-  (forall ad, nolabel (insn ad)) ->
-  tail_nolabel k c.
+Lemma blocks_translated tc pos bbs bb: forall
+  (UNFOLD: unfold bbs = OK tc)
+  (FINDBB: find_bblock pos bbs = Some bb),
+  exists tbb, unfold_bblock bb = OK tbb.
 Proof.
-  unfold transl_addressing; intros; destruct addr; TailNoLabel;
-  eapply tail_nolabel_trans; TailNoLabel.
-  eapply tail_nolabel_trans. apply arith_extended_label; unfold nolabel; auto. TailNoLabel.
+  intros; exploit bblock_in_bblocks; eauto; intros;
+  eapply all_blocks_translated; eauto.
 Qed.
 
-Remark transl_load_label:
-  forall trap chunk addr args dst k c,
-  transl_load trap chunk addr args dst k = OK c -> tail_nolabel k c.
+Lemma size_header b pos f bb: forall
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock pos (fn_blocks f) = Some bb),
+  list_length_z (header bb) <= 1.
 Proof.
-  unfold transl_load; intros; destruct trap; try discriminate; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto.
+  intros.
+  exploit internal_functions_unfold; eauto.
+  intros (tc & FINDtf & TRANStf & ?).
+  exploit blocks_translated; eauto. intros TBB.
+
+  unfold unfold_bblock in TBB.
+  destruct (zle (list_length_z (header bb)) 1).
+  - assumption.
+  - destruct TBB as (? & TBB). discriminate TBB.
 Qed.
 
-Remark transl_store_label:
-  forall chunk addr args src k c,
-  transl_store chunk addr args src k = OK c -> tail_nolabel k c.
+Lemma list_nth_z_neg A (l: list A): forall n,
+  n < 0 -> list_nth_z l n = None.
 Proof.
-  unfold transl_store; intros; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto.
+  induction l; simpl; auto.
+  intros n H; destruct (zeq _ _); (try eapply IHl); lia.
 Qed.
 
-Remark indexed_memory_access_label:
-  forall insn sz base ofs k,
-  (forall ad, nolabel (insn ad)) ->
-  tail_nolabel k (indexed_memory_access insn sz base ofs k).
+Lemma find_bblock_neg bbs: forall pos,
+  pos < 0 -> find_bblock pos bbs = None.
 Proof.
-  unfold indexed_memory_access; intros. destruct offset_representable.
-  TailNoLabel.
-  eapply tail_nolabel_trans; TailNoLabel.
+  induction bbs; simpl; auto.
+  intros. destruct (zlt pos 0). { reflexivity. }
+  destruct (zeq pos 0); contradiction.
 Qed.
 
-Remark loadind_label:
-  forall base ofs ty dst k c,
-  loadind base ofs ty dst k = OK c -> tail_nolabel k c.
+Lemma equal_header_size bb:
+  length (header bb) = length (unfold_label (header bb)).
 Proof.
-  unfold loadind; intros.
-  destruct ty, (preg_of dst); inv H; apply indexed_memory_access_label; intros; exact I.
+  induction (header bb); auto.
+  simpl. rewrite IHl. auto.
 Qed.
 
-Remark storeind_label:
-  forall src base ofs ty k c,
-  storeind src base ofs ty k = OK c -> tail_nolabel k c.
+Lemma equal_body_size:
+  forall bb tb,
+  unfold_body (body bb) = OK tb ->
+  length (body bb) = length tb.
 Proof.
-  unfold storeind; intros.
-  destruct ty, (preg_of src); inv H; apply indexed_memory_access_label; intros; exact I.
+  intros bb. induction (body bb).
+  - simpl. intros ? H. inversion H. auto.
+  - intros tb H. simpl in H. apply bind_inversion in H. destruct H as (? & BI & TAIL).
+    apply bind_inversion in TAIL. destruct TAIL as (tb' & BODY' & CONS). inv CONS.
+    simpl. specialize (IHl tb' BODY'). rewrite IHl. reflexivity.
 Qed.
 
-Remark loadptr_label:
-  forall base ofs dst k, tail_nolabel k (loadptr base ofs dst k).
+Lemma equal_exit_size bb:
+  length_opt (exit bb) = length (unfold_exit (exit bb)).
 Proof.
-  intros. apply indexed_memory_access_label. unfold nolabel; auto.
+  destruct (exit bb); trivial.
 Qed.
 
-Remark storeptr_label:
-  forall src base ofs k, tail_nolabel k (storeptr src base ofs k).
+Lemma bblock_size_preserved bb tb:
+  unfold_bblock bb = OK tb ->
+  size bb = list_length_z tb.
 Proof.
-  intros. apply indexed_memory_access_label. unfold nolabel; auto.
+  unfold unfold_bblock. intros UNFOLD_BBLOCK.
+  destruct (zle (list_length_z (header bb)) 1). 2: { inversion UNFOLD_BBLOCK. }
+  apply bind_inversion in UNFOLD_BBLOCK. destruct UNFOLD_BBLOCK as (? & UNFOLD_BODY & CONS).
+  inversion CONS.
+  unfold size.
+  rewrite equal_header_size, equal_exit_size.
+  erewrite equal_body_size; eauto.
+  rewrite list_length_z_nat.
+  repeat (rewrite app_length).
+  rewrite plus_assoc. auto.
 Qed.
 
-Remark make_epilogue_label:
-  forall f k, tail_nolabel k (make_epilogue f k).
+Lemma size_of_blocks_max_pos_aux:
+  forall bbs tbbs pos bb,
+  find_bblock pos bbs = Some bb ->
+  unfold bbs = OK tbbs ->
+  pos + size bb <= list_length_z tbbs.
 Proof.
-  unfold make_epilogue; intros.
-  (* FIXME destruct is_leaf_function.
-  { TailNoLabel. } *)
-  eapply tail_nolabel_trans.
-  apply loadptr_label.
-  TailNoLabel.
+  induction bbs as [| bb ? IHbbs].
+  - intros tbbs ? ? FINDBB; inversion FINDBB.
+  - simpl; intros tbbs pos bb' FINDBB UNFOLD.
+    apply bind_inversion in UNFOLD; destruct UNFOLD as (tbb & UNFOLD_BBLOCK & H).
+    apply bind_inversion in H; destruct H as (tbbs' & UNFOLD & CONS).
+    inv CONS.
+    destruct (zlt pos 0). { discriminate FINDBB. }
+    destruct (zeq pos 0).
+    + inv FINDBB.
+      exploit bblock_size_preserved; eauto; intros SIZE; rewrite SIZE.
+      repeat (rewrite list_length_z_nat). rewrite app_length, Nat2Z.inj_add.
+      lia.
+    + generalize (IHbbs tbbs' (pos - size bb) bb' FINDBB UNFOLD). intros IH.
+      exploit bblock_size_preserved; eauto; intros SIZE.
+      repeat (rewrite list_length_z_nat); rewrite app_length.
+      rewrite Nat2Z.inj_add; repeat (rewrite <- list_length_z_nat).
+      lia.
 Qed.
 
-Lemma transl_instr_label:
-  forall f i ep k c,
-  transl_instr f i ep k = OK c ->
-  match i with Mlabel lbl => c = Plabel lbl :: k | _ => tail_nolabel k c end.
+Lemma size_of_blocks_max_pos pos f tf bi:
+  find_bblock pos (fn_blocks f) = Some bi ->
+  transf_function f = OK tf ->
+  pos + size bi <= max_pos tf.
 Proof.
-  unfold transl_instr; intros; destruct i; TailNoLabel.
-- eapply loadind_label; eauto.
-- eapply storeind_label; eauto.
-- destruct ep. eapply loadind_label; eauto.
-  eapply tail_nolabel_trans. apply loadptr_label. eapply loadind_label; eauto. 
-- eapply transl_op_label; eauto.
-- eapply transl_load_label; eauto. 
-- eapply transl_store_label; eauto.
-- destruct s0; monadInv H; TailNoLabel.
-- destruct s0; monadInv H; (eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel]).
-- eapply transl_cond_branch_label; eauto.
-- eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel].
+  unfold transf_function, max_pos.
+  intros FINDBB UNFOLD.
+  apply bind_inversion in UNFOLD. destruct UNFOLD as (? & UNFOLD & H).
+  destruct (zlt Ptrofs.max_unsigned (list_length_z x)). { discriminate H. }
+  inv H. simpl.
+  eapply size_of_blocks_max_pos_aux; eauto.
 Qed.
 
-Lemma transl_instr_label':
-  forall lbl f i ep k c,
-  transl_instr f i ep k = OK c ->
-  find_label lbl c = if Mach.is_label lbl i then Some k else find_label lbl k.
+Lemma unfold_bblock_not_nil bb:
+  unfold_bblock bb = OK nil -> False.
 Proof.
-  intros. exploit transl_instr_label; eauto.
-  destruct i; try (intros [A B]; apply B).
-  intros. subst c. simpl. auto.
+  intros.
+  exploit bblock_size_preserved; eauto. unfold list_length_z; simpl. intros SIZE.
+  generalize (bblock_size_pos bb). intros SIZE'. lia.
 Qed.
 
-Lemma transl_code_label:
-  forall lbl f c ep tc,
-  transl_code f c ep = OK tc ->
-  match Mach.find_label lbl c with
-  | None => find_label lbl tc = None
-  | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_code f c' false = OK tc'
-  end.
+(* same proof as list_nth_z_range (Coqlib) *)
+Lemma find_instr_range:
+  forall c n i,
+  Asm.find_instr n c = Some i -> 0 <= n < list_length_z c.
 Proof.
   induction c; simpl; intros.
-  inv H. auto.
-  monadInv H. rewrite (transl_instr_label' lbl _ _ _ _ _ EQ0).
-  generalize (Mach.is_label_correct lbl a).
-  destruct (Mach.is_label lbl a); intros.
-  subst a. simpl in EQ. exists x; auto.
-  eapply IHc; eauto.
+  discriminate.
+  rewrite list_length_z_cons. destruct (zeq n 0).
+  generalize (list_length_z_pos c); lia.
+  exploit IHc; eauto. lia.
 Qed.
 
-Lemma transl_find_label:
-  forall lbl f tf,
-  transf_function f = OK tf ->
-  match Mach.find_label lbl f.(Mach.fn_code) with
-  | None => find_label lbl tf.(fn_code) = None
-  | Some c => exists tc, find_label lbl tf.(fn_code) = Some tc /\ transl_code f c false = OK tc
-  end.
+Lemma find_instr_tail:
+  forall tbb pos c i,
+  Asm.find_instr pos c = Some i ->
+  Asm.find_instr (pos + list_length_z tbb) (tbb ++ c) = Some i.
 Proof.
-  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0.
-  monadInv EQ. rewrite transl_code'_transl_code in EQ0. unfold fn_code. 
-  simpl. destruct (storeptr_label X30 XSP (fn_retaddr_ofs f) x) as [A B]; rewrite B. 
-  eapply transl_code_label; eauto.
+  induction tbb as [| ? ? IHtbb].
+  - intros. unfold list_length_z; simpl. rewrite Z.add_0_r. assumption.
+  - intros. rewrite list_length_z_cons. simpl.
+    destruct (zeq (pos + (list_length_z tbb + 1)) 0).
+    + exploit find_instr_range; eauto. intros POS_RANGE.
+      generalize (list_length_z_pos tbb). lia.
+    + replace (pos + (list_length_z tbb + 1) - 1) with (pos + list_length_z tbb) by lia.
+      eapply IHtbb; eauto.
 Qed.
 
-End TRANSL_LABEL.
+Lemma size_of_blocks_bounds fb pos f bi:
+      Genv.find_funct_ptr ge fb = Some (Internal f) ->
+      find_bblock pos (fn_blocks f) = Some bi ->
+      pos + size bi <= Ptrofs.max_unsigned.
+Proof.
+  intros; exploit internal_functions_translated; eauto.
+  intros (tf & _ & TRANSf).
+  assert (pos + size bi <= max_pos tf). { eapply size_of_blocks_max_pos; eauto. }
+  assert (max_pos tf <= Ptrofs.max_unsigned). { eapply functions_bound_max_pos; eauto. }
+  lia.
+Qed.
 
-(** A valid branch in a piece of Mach code translates to a valid ``go to''
-  transition in the generated Asm code. *)
+Lemma find_instr_bblock_tail:
+  forall tbb bb pos c i,
+  Asm.find_instr pos c = Some i ->
+  unfold_bblock bb = OK tbb ->
+  Asm.find_instr (pos + size bb ) (tbb ++ c) = Some i.
+Proof.
+  induction tbb.
+   - intros. exploit unfold_bblock_not_nil; eauto. intros. contradiction.
+   - intros. simpl.
+     destruct (zeq (pos + size bb) 0).
+     + (* absurd *)
+       exploit find_instr_range; eauto. intros POS_RANGE.
+       generalize (bblock_size_pos bb). intros SIZE. lia.
+     + erewrite bblock_size_preserved; eauto.
+       rewrite list_length_z_cons.
+       replace (pos + (list_length_z tbb + 1) - 1) with (pos + list_length_z tbb) by lia.
+       apply find_instr_tail; auto.
+Qed.
 
-Lemma find_label_goto_label:
-  forall f tf lbl rs m c' b ofs,
-  Genv.find_funct_ptr ge b = Some (Internal f) ->
-  transf_function f = OK tf ->
-  rs PC = Vptr b ofs ->
-  Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
-  exists tc', exists rs',
-     goto_label tf lbl rs m = Next rs' m
-  /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
-  /\ forall r, r <> PC -> rs'#r = rs#r.
-Proof.
-  intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
-  intros [tc [A B]].
-  exploit label_pos_code_tail; eauto. instantiate (1 := 0).
-  intros [pos' [P [Q R]]].
-  exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
-  split. unfold goto_label. rewrite P. rewrite H1. auto.
-  split. rewrite Pregmap.gss. constructor; auto.
-  rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
-  auto. omega.
-  generalize (transf_function_no_overflow _ _ H0). omega.
-  intros. apply Pregmap.gso; auto.
-Qed.
-
-(** Existence of return addresses *)
+Lemma list_nth_z_find_label:
+  forall (ll : list label) il n l,
+  list_nth_z ll n = Some l ->
+  Asm.find_instr n ((unfold_label ll) ++ il) = Some (Asm.Plabel l).
+Proof.
+  induction ll.
+  - intros. inversion H.
+  - intros. simpl.
+    destruct (zeq n 0) as [Z | NZ].
+    + inversion H as (H'). rewrite Z in H'. simpl in H'. inv H'. reflexivity.
+    + simpl in H. destruct (zeq n 0). { contradiction. }
+      apply IHll; auto.
+Qed.
 
-Lemma return_address_exists:
-  forall f sg ros c, is_tail (Mcall sg ros :: c) f.(Mach.fn_code) ->
-  exists ra, return_address_offset f c ra.
+Lemma list_nth_z_find_bi:
+  forall lbi bi tlbi n bi' exit,
+  list_nth_z lbi n = Some bi ->
+  unfold_body lbi = OK tlbi ->
+  basic_to_instruction bi = OK bi' ->
+  Asm.find_instr n (tlbi ++ exit) = Some bi'.
 Proof.
-  intros. eapply Asmgenproof0.return_address_exists; eauto.
-- intros. exploit transl_instr_label; eauto.
-  destruct i; try (intros [A B]; apply A). intros. subst c0. repeat constructor.
-- intros. monadInv H0.
-  destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0. monadInv EQ.
-  rewrite transl_code'_transl_code in EQ0.
-  exists x; exists true; split; auto. unfold fn_code.
-  constructor. apply (storeptr_label X30 XSP (fn_retaddr_ofs f0) x).
-- exact transf_function_no_overflow.
-Qed.
-
-(** * Proof of semantic preservation *)
-
-(** Semantic preservation is proved using simulation diagrams
-  of the following form.
-<<
-           st1 --------------- st2
-            |                   |
-           t|                  *|t
-            |                   |
-            v                   v
-           st1'--------------- st2'
->>
-  The invariant is the [match_states] predicate below, which includes:
-- The Asm code pointed by the PC register is the translation of
-  the current Mach code sequence.
-- Mach register values and Asm register values agree.
-*)
+  induction lbi.
+  - intros. inversion H.
+  - simpl. intros.
+    apply bind_inversion in H0. destruct H0 as (? & ? & ?).
+    apply bind_inversion in H2. destruct H2 as (? & ? & ?).
+    destruct (zeq n 0) as [Z | NZ].
+    + destruct n.
+      * inversion H as (BI). rewrite BI in *.
+        inversion H3. simpl. congruence.
+      * (* absurd *) congruence.
+      * (* absurd *) congruence.
+    + inv H3. simpl. destruct (zeq n 0). { contradiction. }
+      eapply IHlbi; eauto.
+Qed.
 
-Inductive match_states: Mach.state -> Asm.state -> Prop :=
-  | match_states_intro:
-      forall s fb sp c ep ms m m' rs f tf tc
-        (STACKS: match_stack ge s)
-        (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
-        (MEXT: Mem.extends m m')
-        (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
-        (AG: agree ms sp rs)
-        (DXP: ep = true -> rs#X29 = parent_sp s)
-        (LEAF: is_leaf_function f = true -> rs#RA = parent_ra s),
-      match_states (Mach.State s fb sp c ms m)
-                   (Asm.State rs m')
-  | match_states_call:
-      forall s fb ms m m' rs
-        (STACKS: match_stack ge s)
-        (MEXT: Mem.extends m m')
-        (AG: agree ms (parent_sp s) rs)
-        (ATPC: rs PC = Vptr fb Ptrofs.zero)
-        (ATLR: rs RA = parent_ra s),
-      match_states (Mach.Callstate s fb ms m)
-                   (Asm.State rs m')
-  | match_states_return:
-      forall s ms m m' rs
-        (STACKS: match_stack ge s)
-        (MEXT: Mem.extends m m')
-        (AG: agree ms (parent_sp s) rs)
-        (ATPC: rs PC = parent_ra s),
-      match_states (Mach.Returnstate s ms m)
-                   (Asm.State rs m').
-
-Lemma exec_straight_steps:
-  forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2,
-  match_stack ge s ->
-  Mem.extends m2 m2' ->
-  Genv.find_funct_ptr ge fb = Some (Internal f) ->
-  transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
-  (forall k c (TR: transl_instr f i ep k = OK c),
-   exists rs2,
-       exec_straight tge tf c rs1 m1' k rs2 m2'
-    /\ agree ms2 sp rs2
-    /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s)
-    /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) ->
-  exists st',
-  plus step tge (State rs1 m1') E0 st' /\
-  match_states (Mach.State s fb sp c ms2 m2) st'.
-Proof.
-  intros. inversion H2. subst. monadInv H7.
-  exploit H3; eauto. intros [rs2 [A [B [C D]]]].
-  exists (State rs2 m2'); split.
-  - eapply exec_straight_exec; eauto.
-  - econstructor; eauto. eapply exec_straight_at; eauto.
-Qed.
-
-Lemma exec_straight_steps_goto:
-  forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2 lbl c',
-  match_stack ge s ->
-  Mem.extends m2 m2' ->
-  Genv.find_funct_ptr ge fb = Some (Internal f) ->
-  Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
-  transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
-  it1_is_parent ep i = false ->
-  (forall k c (TR: transl_instr f i ep k = OK c),
-   exists jmp, exists k', exists rs2,
-       exec_straight tge tf c rs1 m1' (jmp :: k') rs2 m2'
-    /\ agree ms2 sp rs2
-    /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2'
-    /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) ->
-  exists st',
-  plus step tge (State rs1 m1') E0 st' /\
-  match_states (Mach.State s fb sp c' ms2 m2) st'.
-Proof.
-  intros. inversion H3. subst. monadInv H9.
-  exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]].
-  generalize (functions_transl _ _ _ H7 H8); intro FN.
-  generalize (transf_function_no_overflow _ _ H8); intro NOOV.
-  exploit exec_straight_steps_2; eauto.
-  intros [ofs' [PC2 CT2]].
-  exploit find_label_goto_label; eauto.
-  intros [tc' [rs3 [GOTO [AT' OTH]]]].
-  exists (State rs3 m2'); split.
-  eapply plus_right'.
-  eapply exec_straight_steps_1; eauto.
-  econstructor; eauto.
-  eapply find_instr_tail. eauto.
-  rewrite C. eexact GOTO.
-  traceEq.
-  econstructor; eauto.
-  apply agree_exten with rs2; auto with asmgen.
-  congruence.
-  rewrite OTH by congruence; auto.
-Qed.
-
-Lemma exec_straight_opt_steps_goto:
-  forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2 lbl c',
-  match_stack ge s ->
-  Mem.extends m2 m2' ->
-  Genv.find_funct_ptr ge fb = Some (Internal f) ->
-  Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
-  transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
-  it1_is_parent ep i = false ->
-  (forall k c (TR: transl_instr f i ep k = OK c),
-   exists jmp, exists k', exists rs2,
-       exec_straight_opt tge tf c rs1 m1' (jmp :: k') rs2 m2'
-    /\ agree ms2 sp rs2
-    /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2'
-    /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) ->
-  exists st',
-  plus step tge (State rs1 m1') E0 st' /\
-  match_states (Mach.State s fb sp c' ms2 m2) st'.
-Proof.
-  intros. inversion H3. subst. monadInv H9.
-  exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]].
-  generalize (functions_transl _ _ _ H7 H8); intro FN.
-  generalize (transf_function_no_overflow _ _ H8); intro NOOV.
-  inv A.
-- exploit find_label_goto_label; eauto.
-  intros [tc' [rs3 [GOTO [AT' OTH]]]].
-  exists (State rs3 m2'); split.
-  apply plus_one. econstructor; eauto.
-  eapply find_instr_tail. eauto.
-  rewrite C. eexact GOTO.
-  econstructor; eauto.
-  apply agree_exten with rs2; auto with asmgen.
-  congruence.
-  rewrite OTH by congruence; auto.
-- exploit exec_straight_steps_2; eauto.
-  intros [ofs' [PC2 CT2]].
-  exploit find_label_goto_label; eauto.
-  intros [tc' [rs3 [GOTO [AT' OTH]]]].
-  exists (State rs3 m2'); split.
-  eapply plus_right'.
-  eapply exec_straight_steps_1; eauto.
-  econstructor; eauto.
-  eapply find_instr_tail. eauto.
-  rewrite C. eexact GOTO.
-  traceEq.
-  econstructor; eauto.
-  apply agree_exten with rs2; auto with asmgen.
-  congruence.
-  rewrite OTH by congruence; auto.
-Qed.
-
-(** We need to show that, in the simulation diagram, we cannot
-  take infinitely many Mach transitions that correspond to zero
-  transitions on the Asm side.  Actually, all Mach transitions
-  correspond to at least one Asm transition, except the
-  transition from [Machsem.Returnstate] to [Machsem.State].
-  So, the following integer measure will suffice to rule out
-  the unwanted behaviour. *)
-
-Definition measure (s: Mach.state) : nat :=
-  match s with
-  | Mach.State _ _ _ _ _ _ => 0%nat
-  | Mach.Callstate _ _ _ _ => 0%nat
-  | Mach.Returnstate _ _ _ => 1%nat
+Lemma list_nth_z_find_bi_with_header:
+  forall ll lbi bi tlbi n bi' (rest : list Asm.instruction),
+  list_nth_z lbi (n - list_length_z ll) = Some bi ->
+  unfold_body lbi = OK tlbi ->
+  basic_to_instruction bi = OK bi' ->
+  Asm.find_instr n ((unfold_label ll) ++ (tlbi) ++ (rest)) = Some bi'.
+Proof.
+  induction ll.
+  - unfold list_length_z. simpl. intros.
+    replace (n - 0) with n in H by lia. eapply list_nth_z_find_bi; eauto.
+  - intros. simpl. destruct (zeq n 0).
+    + rewrite list_length_z_cons in H. rewrite e in H.
+      replace (0 - (list_length_z ll + 1)) with (-1 - (list_length_z ll)) in H by lia.
+      generalize (list_length_z_pos ll). intros.
+      rewrite list_nth_z_neg in H; try lia. inversion H.
+    + rewrite list_length_z_cons in H.
+      replace (n - (list_length_z ll + 1)) with (n -1 - (list_length_z ll)) in H by lia.
+      eapply IHll; eauto.
+Qed.
+
+(* XXX unused *)
+Lemma range_list_nth_z:
+  forall (A: Type) (l: list A) n,
+  0 <= n < list_length_z l ->
+  exists x, list_nth_z l n = Some x.
+Proof.
+  induction l.
+  - intros. unfold list_length_z in H. simpl in H. lia.
+  - intros n. destruct (zeq n 0).
+    + intros. simpl. destruct (zeq n 0). { eauto. } contradiction.
+    + intros H. rewrite list_length_z_cons in H.
+      simpl. destruct (zeq n 0). { contradiction. }
+      replace (Z.pred n) with (n - 1) by lia.
+      eapply IHl; lia.
+Qed.
+
+Lemma list_nth_z_n_too_big:
+  forall (A: Type) (l: list A) n,
+  0 <= n ->
+  list_nth_z l n = None ->
+  n >= list_length_z l.
+Proof.
+  induction l.
+  - intros. unfold list_length_z. simpl. lia.
+  - intros. rewrite list_length_z_cons.
+    simpl in H0.
+    destruct (zeq n 0) as [N | N].
+    + inversion H0.
+    + (* XXX there must be a more elegant way to prove this simple fact *)
+      assert (n > 0). { lia. }
+      assert (0 <= n - 1). { lia. }
+      generalize (IHl (n - 1)). intros IH.
+      assert (n - 1 >= list_length_z l). { auto. }
+      assert (n > list_length_z l); lia.
+Qed.
+
+Lemma find_instr_past_header:
+  forall labels n rest,
+  list_nth_z labels n = None ->
+  Asm.find_instr n (unfold_label labels ++ rest) =
+  Asm.find_instr (n - list_length_z labels) rest.
+Proof.
+  induction labels as [| label labels' IH].
+  - unfold list_length_z; simpl; intros; rewrite Z.sub_0_r; reflexivity.
+  - intros. simpl. destruct (zeq n 0) as [N | N].
+    + rewrite N in H. inversion H.
+    + rewrite list_length_z_cons.
+      replace (n - (list_length_z labels' + 1)) with (n - 1 - list_length_z labels') by lia.
+      simpl in H. destruct (zeq n 0). { contradiction. }
+      replace (Z.pred n) with (n - 1) in H by lia.
+      apply IH; auto.
+Qed.
+
+(* very similar to find_instr_past_header *)
+Lemma find_instr_past_body:
+  forall lbi n tlbi rest,
+  list_nth_z lbi n = None ->
+  unfold_body lbi = OK tlbi ->
+  Asm.find_instr n (tlbi ++ rest) =
+  Asm.find_instr (n - list_length_z lbi) rest.
+Proof.
+  induction lbi.
+  - unfold list_length_z; simpl; intros ? ? ? ? H. inv H; rewrite Z.sub_0_r; reflexivity.
+  - intros n tlib ? NTH UNFOLD_BODY.
+    unfold unfold_body in UNFOLD_BODY. apply bind_inversion in UNFOLD_BODY.
+    destruct UNFOLD_BODY as (? & BI & H).
+    apply bind_inversion in H. destruct H as (? & UNFOLD_BODY' & CONS).
+    fold unfold_body in UNFOLD_BODY'. inv CONS.
+    simpl; destruct (zeq n 0) as [N|N].
+    + rewrite N in NTH; inversion NTH.
+    + rewrite list_length_z_cons.
+      replace (n - (list_length_z lbi + 1)) with (n - 1 - list_length_z lbi) by lia.
+      simpl in NTH. destruct (zeq n 0). { contradiction. }
+      replace (Z.pred n) with (n - 1)  in NTH by lia.
+      apply IHlbi; auto.
+Qed.
+
+Lemma n_beyond_body:
+  forall bb n,
+  0 <= n < size bb ->
+  list_nth_z (header bb) n = None ->
+  list_nth_z (body bb) (n - list_length_z (header bb)) = None ->
+  n >= Z.of_nat (length (header bb) + length (body bb)).
+Proof.
+  intros.
+  assert (0 <= n). { lia. }
+  generalize (list_nth_z_n_too_big label (header bb) n H2 H0). intros.
+  generalize (list_nth_z_n_too_big _ (body bb) (n - list_length_z (header bb))). intros.
+  unfold size in H.
+
+  assert (0 <= n - list_length_z (header bb)). { lia. }
+  assert (n - list_length_z (header bb) >= list_length_z (body bb)). { apply H4; auto. }
+
+  assert (n >= list_length_z (header bb) + list_length_z (body bb)). { lia. }
+  rewrite Nat2Z.inj_add.
+  repeat (rewrite <- list_length_z_nat). assumption.
+Qed.
+
+Lemma exec_arith_instr_dont_move_PC ai rs rs': forall
+  (BASIC: exec_arith_instr lk ai rs = rs'),
+  rs PC = rs' PC.
+Proof.
+  destruct ai; simpl; intros;
+  try (rewrite <- BASIC; rewrite Pregmap.gso; auto; discriminate).
+  - destruct i; simpl in BASIC;
+    try destruct (negb _); rewrite <- BASIC;
+    repeat rewrite Pregmap.gso; try discriminate; reflexivity.
+  - destruct i; simpl in BASIC. 
+    1,2: rewrite <- BASIC; repeat rewrite Pregmap.gso; try discriminate; reflexivity.
+    destruct sz;
+    try (unfold compare_single in BASIC || unfold compare_float in BASIC);
+    destruct (rs r1), (rs r2);
+    try (rewrite <- BASIC; repeat rewrite Pregmap.gso; try (discriminate || reflexivity)).
+  - destruct i; simpl in BASIC;
+    destruct is;
+    try (unfold compare_int in BASIC || unfold compare_long in BASIC);
+    try (rewrite <- BASIC; repeat rewrite Pregmap.gso; try (discriminate || reflexivity)).
+  - destruct i; simpl in BASIC; destruct sz;
+    try (unfold compare_single in BASIC || unfold compare_float in BASIC);
+    destruct (rs r1);
+    try (rewrite <- BASIC; repeat rewrite Pregmap.gso; try (discriminate || reflexivity)).
+  - destruct fsz; rewrite <- BASIC; rewrite Pregmap.gso; try (discriminate || reflexivity).
+  - destruct fsz; rewrite <- BASIC; rewrite Pregmap.gso; try (discriminate || reflexivity).
+Qed.
+
+Lemma exec_basic_dont_move_PC bi rs m rs' m': forall
+  (BASIC: exec_basic lk ge bi rs m = Next rs' m'),
+  rs PC = rs' PC.
+Proof.
+  destruct bi; simpl; intros.
+  - inv BASIC. exploit exec_arith_instr_dont_move_PC; eauto.
+  - unfold exec_load in BASIC.
+    destruct ld.
+    + unfold exec_load_rd_a in BASIC.
+      destruct Mem.loadv. 2: { discriminate BASIC. }
+      inv BASIC. rewrite Pregmap.gso; try discriminate; auto.
+    + unfold exec_load_double, is_pair_addressing_mode_correct in BASIC.
+      destruct a; try discriminate BASIC.
+      do 2 (destruct Mem.loadv; try discriminate BASIC).
+      inv BASIC. rewrite Pregmap.gso; try discriminate; auto.
+  - unfold exec_store in BASIC.
+    destruct st.
+    + unfold exec_store_rs_a in BASIC.
+      destruct Mem.storev. 2: { discriminate BASIC. }
+      inv BASIC; reflexivity.
+    + unfold exec_store_double in BASIC.
+      destruct a; try discriminate BASIC.
+      do 2 (destruct Mem.storev; try discriminate BASIC).
+      inv BASIC; reflexivity.
+  - destruct Mem.alloc, Mem.store. 2: { discriminate BASIC. }
+    inv BASIC. repeat (rewrite Pregmap.gso; try discriminate). reflexivity.
+  - destruct Mem.loadv. 2: { discriminate BASIC. }
+    destruct rs, Mem.free; try discriminate BASIC.
+    inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+  - inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+  - inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+  - inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+  - inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+  - inv BASIC; auto.
+Qed.
+
+Lemma exec_body_dont_move_PC_aux:
+  forall bis rs m rs' m'
+  (BODY: exec_body lk ge bis rs m = Next rs' m'),
+  rs PC = rs' PC.
+Proof.
+  induction bis.
+  - intros; inv BODY; reflexivity.
+  - simpl; intros.
+    remember (exec_basic lk ge a rs m) as bi eqn:BI; destruct bi. 2: { discriminate BODY. }
+    symmetry in BI; destruct s in BODY, BI; simpl in BODY, BI.
+    exploit exec_basic_dont_move_PC; eauto; intros AGPC; rewrite AGPC.
+    eapply IHbis; eauto.
+Qed.
+
+Lemma exec_body_dont_move_PC bb rs m rs' m': forall
+  (BODY: exec_body lk ge (body bb) rs m = Next rs' m'),
+  rs PC = rs' PC.
+Proof. apply exec_body_dont_move_PC_aux. Qed.
+
+Lemma find_instr_bblock:
+  forall n lb pos bb tlb
+  (FINDBB: find_bblock pos lb = Some bb)
+  (UNFOLD: unfold lb = OK tlb)
+  (SIZE: 0 <= n < size bb),
+  exists i, is_nth_inst bb n i /\ Asm.find_instr (pos+n) tlb = Some i.
+Proof.
+  induction lb as [| b lb IHlb].
+  - intros. inversion FINDBB.
+  - intros pos bb tlb FINDBB UNFOLD SIZE.
+    destruct pos.
+    + inv FINDBB. simpl.
+      exploit unfold_car_cdr; eauto. intros (tbb & tlb' & UNFOLD_BBLOCK & UNFOLD' & UNFOLD_cons).
+      rewrite UNFOLD in UNFOLD_cons. inversion UNFOLD_cons.
+      unfold unfold_bblock in UNFOLD_BBLOCK.
+      destruct (zle (list_length_z (header bb)) 1). 2: { inversion UNFOLD_BBLOCK. }
+      apply bind_inversion in UNFOLD_BBLOCK.
+      destruct UNFOLD_BBLOCK as (? & UNFOLD_BODY & H).
+      inversion H as (UNFOLD_BBLOCK).
+      remember (list_nth_z (header bb) n) as label_opt eqn:LBL. destruct label_opt.
+      * (* nth instruction is a label *)
+        eexists; split. { eapply is_nth_label; eauto. }
+        inversion UNFOLD_cons.
+        symmetry in LBL.
+        rewrite <- app_assoc.
+        apply list_nth_z_find_label; auto.
+      * remember (list_nth_z (body bb) (n - list_length_z (header bb))) as bi_opt eqn:BI.
+        destruct bi_opt.
+        -- (* nth instruction is a basic instruction *)
+           exploit list_nth_z_in; eauto. intros INBB.
+           exploit entire_body_translated; eauto. intros BI'.
+           destruct BI'.
+           eexists; split.
+            ++ eapply is_nth_basic; eauto.
+            ++ repeat (rewrite <- app_assoc). eapply list_nth_z_find_bi_with_header; eauto.
+        -- (* nth instruction is the exit instruction *)
+           generalize n_beyond_body. intros TEMP.
+           assert (n >= Z.of_nat (Datatypes.length (header bb)
+                        + Datatypes.length (body bb))) as NGE. { auto. } clear TEMP.
+           remember (exit bb) as exit_opt eqn:EXIT. destruct exit_opt.
+           ++ rewrite <- app_assoc. rewrite find_instr_past_header; auto.
+              rewrite <- app_assoc. erewrite find_instr_past_body; eauto.
+              assert (SIZE' := SIZE).
+              unfold size in SIZE. rewrite <- EXIT in SIZE. simpl in SIZE.
+              destruct SIZE as (LOWER & UPPER).
+              repeat (rewrite Nat2Z.inj_add in UPPER).
+              repeat (rewrite <- list_length_z_nat in UPPER). repeat (rewrite Nat2Z.inj_add in NGE).
+              repeat (rewrite <- list_length_z_nat in NGE). simpl in UPPER.
+              assert (n = list_length_z (header bb) + list_length_z (body bb)). { lia. }
+              assert (n = size bb - 1). {
+                unfold size. rewrite <- EXIT. simpl.
+                repeat (rewrite Nat2Z.inj_add). repeat (rewrite <- list_length_z_nat). simpl. lia.
+              }
+              symmetry in EXIT.
+              eexists; split.
+              ** eapply is_nth_ctlflow; eauto.
+              ** simpl.
+                 destruct (zeq (n - list_length_z (header bb) - list_length_z (body bb)) 0). { reflexivity. }
+                 (* absurd *) lia.
+           ++ (* absurd *)
+              unfold size in SIZE. rewrite <- EXIT in SIZE. simpl in SIZE.
+              destruct SIZE as (? & SIZE'). rewrite Nat.add_0_r in SIZE'. lia.
+    + unfold find_bblock in FINDBB; simpl in FINDBB; fold find_bblock in FINDBB.
+      inversion UNFOLD as (UNFOLD').
+      apply bind_inversion in UNFOLD'. destruct UNFOLD' as (? & (UNFOLD_BBLOCK' & UNFOLD')).
+      apply bind_inversion in UNFOLD'. destruct UNFOLD' as (? & (UNFOLD' & TLB)).
+      inversion TLB.
+      generalize (IHlb _ _ _ FINDBB UNFOLD'). intros IH.
+      destruct IH as (? & (IH_is_nth & IH_find_instr)); eauto.
+      eexists; split.
+      * apply IH_is_nth.
+      * replace (Z.pos p + n) with (Z.pos p + n - size b + size b) by lia.
+        eapply find_instr_bblock_tail; try assumption.
+        replace (Z.pos p + n - size b) with (Z.pos p - size b + n) by lia.
+        apply IH_find_instr.
+    + (* absurd *)
+      generalize (Pos2Z.neg_is_neg p). intros. exploit (find_bblock_neg (b :: lb)); eauto.
+      rewrite FINDBB. intros CONTRA. inversion CONTRA.
+Qed.
+
+Lemma exec_header_simulation b ofs f bb rs m: forall
+  (ATPC: rs PC = Vptr b ofs)
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb),
+  exists s', star Asm.step tge (State rs m) E0 s'
+             /\ match_internal (list_length_z (header bb)) (State rs m) s'.
+Proof.
+  intros.
+  exploit internal_functions_unfold; eauto.
+  intros (tc & FINDtf & TRANStf & _).
+  assert (BNDhead: list_length_z (header bb) <= 1). { eapply size_header; eauto. }
+  destruct (header bb) as [|l[|]] eqn: EQhead.
+  + (* header nil *)
+    eexists; split.
+    - eapply star_refl.
+    - split; eauto.
+      unfold list_length_z; rewrite !ATPC; simpl.
+      rewrite Ptrofs.add_zero; auto.
+  + (* header one *)
+    assert (Lhead: list_length_z (header bb) = 1). { rewrite EQhead; unfold list_length_z; simpl. auto. }
+    exploit (find_instr_bblock 0); eauto.
+    { generalize (bblock_size_pos bb). lia. }
+    intros (i & NTH & FIND_INSTR).
+    inv NTH.
+    * rewrite EQhead in H; simpl in H. inv H.
+      replace (Ptrofs.unsigned ofs + 0) with (Ptrofs.unsigned ofs) in FIND_INSTR by lia.
+      eexists. split.
+      - eapply star_one.
+        eapply Asm.exec_step_internal; eauto.
+        simpl; eauto.
+      - unfold list_length_z; simpl. split; eauto.
+        intros r; destruct r; simpl; congruence || auto.
+    * (* absurd case *)
+      erewrite list_nth_z_neg in * |-; [ congruence | rewrite Lhead; lia].
+    * (* absurd case *)
+      rewrite bblock_size_aux, Lhead in *. generalize (bblock_size_aux_pos bb). lia.
+  + (* absurd case *)
+    unfold list_length_z in BNDhead. simpl in *.
+    generalize (list_length_z_aux_increase _ l1 2); lia.
+Qed.
+
+Lemma eval_addressing_preserved a rs1 rs2:
+  (forall r : preg, r <> PC -> rs1 r = rs2 r) ->
+  eval_addressing lk a rs1 = Asm.eval_addressing tge a rs2.
+Proof.
+  intros EQ.
+  destruct a; simpl; try (rewrite !EQ; congruence).
+  auto.
+Qed.
+
+Ltac next_stuck_cong := try (unfold Next, Stuck in *; congruence).
+
+Ltac inv_ok_eq :=
+  repeat match goal with
+  | [EQ: OK ?x = OK ?y |- _ ]
+      => inversion EQ; clear EQ; subst
+  end.
+
+Ltac reg_rwrt :=
+  match goal with
+  | [e: DR _ = DR _ |- _ ]
+      => rewrite e in *
   end.
 
-Remark preg_of_not_X29: forall r, negb (mreg_eq r R29) = true -> IR X29 <> preg_of r.
-Proof.
-  intros. change (IR X29) with (preg_of R29). red; intros.
-  exploit preg_of_injective; eauto. intros; subst r; discriminate.
-Qed.
-
-Lemma sp_val': forall ms sp rs, agree ms sp rs -> sp = rs XSP.
-Proof.
-  intros. eapply sp_val; eauto. 
-Qed. 
-
-(** This is the simulation diagram.  We prove it by case analysis on the Mach transition. *)
-
-Theorem step_simulation:
-  forall S1 t S2, Mach.step return_address_offset ge S1 t S2 ->
-  forall S1' (MS: match_states S1 S1')  (WF: wf_state ge S1),
-  (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
-  \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
-Proof.
-  induction 1; intros; inv MS.
-
-- (* Mlabel *)
-  left; eapply exec_straight_steps; eauto; intros.
-  monadInv TR. econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  split. { apply agree_nextinstr; auto. }
-  split. { simpl; congruence. }
-  rewrite nextinstr_inv by congruence; assumption.
-
-- (* Mgetstack *)
-  unfold load_stack in H.
-  exploit Mem.loadv_extends; eauto. intros [v' [A B]].
-  rewrite (sp_val _ _ _ AG) in A.
-  left; eapply exec_straight_steps; eauto. intros. simpl in TR.
-  exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q [R S]]]].
-  exists rs'; split. eauto.
-  split. { eapply agree_set_mreg; eauto with asmgen. congruence. }
-  split. { simpl; congruence. }
-  rewrite S. assumption.
-
-- (* Msetstack *)
-  unfold store_stack in H.
-  assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto).
-  exploit Mem.storev_extends; eauto. intros [m2' [A B]].
-  left; eapply exec_straight_steps; eauto.
-  rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
-  exploit storeind_correct; eauto with asmgen. intros [rs' [P [Q R]]].
-  exists rs'; split. eauto.
-  split. eapply agree_undef_regs; eauto with asmgen.
-  simpl; intros.
-  split. rewrite Q; auto with asmgen.
-  rewrite R. assumption.
-
-- (* Mgetparam *)
-  assert (f0 = f) by congruence; subst f0.
-  unfold load_stack in *.
-  exploit Mem.loadv_extends. eauto. eexact H0. auto.
-  intros [parent' [A B]]. rewrite (sp_val' _ _ _ AG) in A.
-  exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
-  exploit Mem.loadv_extends. eauto. eexact H1. auto.
-  intros [v' [C D]].
-Opaque loadind.
-  left; eapply exec_straight_steps; eauto; intros. monadInv TR. 
-  destruct ep.
-(* X30 contains parent *)
-  exploit loadind_correct. eexact EQ.
-  instantiate (2 := rs0). simpl; rewrite DXP; eauto. simpl; congruence.
-  intros [rs1 [P [Q [R S]]]].
-  exists rs1; split. eauto.
-  split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
-  simpl; split; intros.
-  { rewrite R; auto with asmgen.
-    apply preg_of_not_X29; auto.
+Ltac destruct_reg_inv :=
+  repeat match goal with
+  | [ H : match ?reg with _ => _ end = _ |- _ ]
+      => simpl in *; destruct reg; try congruence; try inv_ok_eq; try reg_rwrt
+  end.
+
+Ltac destruct_ireg_inv :=
+  repeat match goal with
+  | [ H : match ?reg with _ => _ end = _ |- _ ]
+      => destruct reg as [[r|]|]; try congruence; try inv_ok_eq; subst
+  end.
+
+Ltac destruct_reg_size :=
+  simpl in *;
+  match goal with
+  | [ |- context [ match ?reg with _ => _ end ] ]
+      => destruct reg; try congruence
+  end.
+
+Ltac find_rwrt_ag :=
+  simpl in *;
+  match goal with
+  | [ AG: forall r, r <> ?PC -> _ r = _ r |- _ ]
+      => repeat rewrite <- AG; try congruence
+  end.
+
+Ltac inv_matchi :=
+  match goal with
+  | [ MATCHI : match_internal _ _ _ |- _ ]
+      => inversion MATCHI; subst; find_rwrt_ag
+  end.
+
+Ltac destruct_ir0_reg :=
+  match goal with
+  | [ |- context [ ir0 _ _ ?r ] ]
+      => unfold ir0 in *; destruct r; find_rwrt_ag; eauto
+  end.
+
+Ltac pc_not_sp :=
+  match goal with
+  | [ |- ?PC <> ?SP ]
+      => destruct (PregEq.eq SP PC); repeat congruence; discriminate
+  end.
+
+Ltac update_x_access_x :=
+  subst; rewrite !Pregmap.gss; auto.
+
+Ltac update_x_access_r :=
+  rewrite !Pregmap.gso; auto.
+
+Lemma nextinstr_agree_but_pc rs1 rs2: forall
+  (AG: forall r, r <> PC -> rs1 r = rs2 r),
+  forall r, r <> PC -> rs1 r = Asm.nextinstr rs2 r.
+Proof.
+  intros; unfold Asm.nextinstr in *; rewrite Pregmap.gso in *; eauto.
+Qed.
+
+Lemma ptrofs_nextinstr_agree rs1 rs2 n: forall
+  (BOUNDED : 0 <= n <= Ptrofs.max_unsigned)
+  (AGPC : Val.offset_ptr (rs1 PC) (Ptrofs.repr n) = rs2 PC),
+  Val.offset_ptr (rs1 PC) (Ptrofs.repr (n + 1)) = Asm.nextinstr rs2 PC.
+Proof.
+  intros; unfold Asm.nextinstr; rewrite Pregmap.gss.
+  rewrite <- Ptrofs.unsigned_one; rewrite <- (Ptrofs.unsigned_repr n); eauto;
+  rewrite <- Ptrofs.add_unsigned; rewrite <- Val.offset_ptr_assoc; rewrite AGPC; eauto.
+Qed.
+
+Lemma load_rd_a_preserved n rs1 m1 rs1' m1' rs2 m2 rd chk f a: forall
+  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+  (HLOAD: exec_load_rd_a lk chk f a rd rs1 m1 = Next rs1' m1'),
+  exists (rs2' : regset) (m2' : mem), Asm.exec_load tge chk f a rd rs2 m2 = Next rs2' m2'
+  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+  intros.
+  unfold exec_load_rd_a, Asm.exec_load in *.
+  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+  rewrite <- (eval_addressing_preserved a rs1 rs2); auto.
+  destruct (Mem.loadv _ _ _).
+  + inversion HLOAD; auto. repeat (econstructor; eauto).
+    * eapply nextinstr_agree_but_pc; intros.
+      destruct (PregEq.eq r rd); try update_x_access_x; try update_x_access_r.
+    * eapply ptrofs_nextinstr_agree; eauto.
+  + next_stuck_cong.
+Qed.
+ 
+Lemma load_double_preserved n rs1 m1 rs1' m1' rs2 m2 rd1 rd2 chk1 chk2 f a: forall
+  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+  (HLOAD: exec_load_double lk chk1 chk2 f a rd1 rd2 rs1 m1 = Next rs1' m1'),
+  exists (rs2' : regset) (m2' : mem), Asm.exec_load_double tge chk1 chk2 f a rd1 rd2 rs2 m2 = Next rs2' m2'
+  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+  intros.
+  unfold exec_load_double, Asm.exec_load_double in *.
+  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+  erewrite <- !eval_addressing_preserved; eauto.
+  destruct (is_pair_addressing_mode_correct a); try discriminate.
+  destruct (Mem.loadv _ _ _);
+  destruct (Mem.loadv chk2 m2
+      (eval_addressing lk
+         (get_offset_addr a match chk1 with
+                            | Mint32 | Mfloat32| Many32 => 4
+                            | _ => 8
+                            end) rs1));
+  inversion HLOAD; auto.
+  repeat (econstructor; eauto).
+  * eapply nextinstr_agree_but_pc; intros.
+    destruct (PregEq.eq r rd2); destruct (PregEq.eq r rd1).
+    - try update_x_access_x. 
+    - try update_x_access_x.
+    - subst; repeat rewrite Pregmap.gso, Pregmap.gss; auto.
+    - try update_x_access_r.
+  * eapply ptrofs_nextinstr_agree; eauto.
+Qed.
+
+Lemma store_rs_a_preserved n rs1 m1 rs1' m1' rs2 m2 v chk a: forall
+  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+  (HSTORE: exec_store_rs_a lk chk a v rs1 m1 = Next rs1' m1'),
+  exists (rs2' : regset) (m2' : mem), Asm.exec_store tge chk a v rs2 m2 = Next rs2' m2'
+  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+  intros.
+  unfold exec_store_rs_a, Asm.exec_store in *.
+  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+  rewrite <- (eval_addressing_preserved a rs1 rs2); auto.
+  destruct (Mem.storev _ _ _ _).
+  + inversion HSTORE; auto. repeat (econstructor; eauto).
+    * eapply nextinstr_agree_but_pc; intros.
+      subst. apply EQR. auto.
+    * eapply ptrofs_nextinstr_agree; subst; eauto.
+  + next_stuck_cong.
+Qed.
+
+Lemma store_double_preserved n rs1 m1 rs1' m1' rs2 m2 v1 v2 chk1 chk2 a: forall
+  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+  (HSTORE: exec_store_double lk chk1 chk2 a v1 v2 rs1 m1 = Next rs1' m1'),
+  exists (rs2' : regset) (m2' : mem), Asm.exec_store_double tge chk1 chk2 a v1 v2 rs2 m2 = Next rs2' m2'
+  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+  intros.
+  unfold exec_store_double, Asm.exec_store_double in *.
+  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+  erewrite <- !eval_addressing_preserved; eauto.
+  destruct (is_pair_addressing_mode_correct a); try discriminate.
+  destruct (Mem.storev _ _ _ _);
+  try destruct (Mem.storev chk2 m
+             (eval_addressing lk
+                (get_offset_addr a
+                   match chk1 with
+                   | Mint32 | Mfloat32 | Many32 => 4
+                   | _ => 8
+                   end) rs1) v2);
+  inversion HSTORE; auto.
+  repeat (econstructor; eauto).
+  * eapply nextinstr_agree_but_pc; intros.
+    subst. apply EQR. auto.
+  * eapply ptrofs_nextinstr_agree; subst; eauto.
+Qed.
+
+Lemma next_inst_preserved n rs1 m1 rs1' m1' rs2 m2 (x: dreg) v: forall
+  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+  (NEXTI: Next rs1 # x <- v m1 = Next rs1' m1'),
+  exists (rs2' : regset) (m2' : mem),
+  Next (Asm.nextinstr rs2 # x <- v) m2 = Next rs2' m2'
+  /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+  intros.
+  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+  inversion NEXTI. repeat (econstructor; eauto).
+  * eapply nextinstr_agree_but_pc; intros.
+    destruct (PregEq.eq r x); try update_x_access_x; try update_x_access_r.
+  * eapply ptrofs_nextinstr_agree; eauto.
+Qed.
+
+Lemma match_internal_nextinstr_switch:
+  forall n s rs2 m2 r v,
+  r <> PC ->
+  match_internal n s (State ((Asm.nextinstr rs2)#r <- v) m2) ->
+  match_internal n s (State (Asm.nextinstr (rs2#r <- v)) m2).
+Proof.
+  unfold Asm.nextinstr; intros n s rs2 m2 r v NOTPC1 MI.
+  inversion MI; subst; constructor; auto.
+  - eapply nextinstr_agree_but_pc; intros.
+    rewrite AG; try congruence.
+    destruct (PregEq.eq r r0); try update_x_access_x; try update_x_access_r.
+  - rewrite !Pregmap.gss, !Pregmap.gso; try congruence.
+    rewrite AGPC.
+    rewrite Pregmap.gso, Pregmap.gss; try congruence.
+Qed.
+
+Lemma match_internal_nextinstr_set_parallel:
+  forall n rs1 m1 rs2 m2 r v1 v2,
+  r <> PC ->
+  match_internal n (State rs1 m1) (State (Asm.nextinstr rs2) m2) ->
+  v1 = v2 ->
+  match_internal n (State (rs1#r <- v1) m1) (State (Asm.nextinstr (rs2#r <- v2)) m2).
+Proof.
+  intros; subst; eapply match_internal_nextinstr_switch; eauto.
+  intros; eapply match_internal_set_parallel; eauto.
+Qed.
+
+Lemma exec_basic_simulation:
+  forall tf n rs1 m1 rs1' m1' rs2 m2 bi tbi
+  (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+  (BASIC: exec_basic lk ge bi rs1 m1 = Next rs1' m1')
+  (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+  (TRANSBI: basic_to_instruction bi = OK tbi),
+  exists rs2' m2', Asm.exec_instr tge tf tbi
+                                  rs2 m2 = Next rs2' m2'
+                   /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+  intros.
+  destruct bi.
+  { (* PArith *)
+    simpl in *; destruct i.
+    1: {
+      destruct i.
+      1,2,3: 
+        try (destruct sumbool_rec; try congruence);
+        try (monadInv TRANSBI);
+        try (destruct_reg_inv);
+        try (inv_matchi);
+        try (exploit next_inst_preserved; eauto);
+        try (repeat destruct_reg_size);
+        try (destruct_ir0_reg).
+      1,2: (* Special case for Pfmovimmd / Pfmovimms *)
+        try (monadInv TRANSBI);
+        try (destruct_reg_inv);
+        try (inv_matchi);
+        inversion BASIC; clear BASIC; subst;
+        try (destruct (is_immediate_float64 _));
+        try (destruct (is_immediate_float32 _));
+        eexists; eexists; split; eauto;
+        repeat (eapply match_internal_nextinstr_set_parallel; try congruence);
+        try (econstructor; eauto);
+        try (eapply nextinstr_agree_but_pc; eauto);
+        try (eapply ptrofs_nextinstr_agree; eauto).
+    }
+    1,2,3,4,5: (* PArithP, PArithPP, PArithPPP, PArithRR0R, PArithRR0, PArithARRRR0 *)
+      destruct i;
+      try (destruct sumbool_rec; try congruence);
+      try (monadInv TRANSBI);
+      try (destruct_reg_inv);
+      try (inv_matchi);
+      try (exploit next_inst_preserved; eauto);
+      try (repeat destruct_reg_size);
+      try (destruct_ir0_reg).
+    { (* PArithComparisonPP *)
+      destruct i;
+      try (monadInv TRANSBI);
+      try (inv_matchi);
+      try (destruct_reg_inv);
+      simpl in *.
+      1,2: (* compare_long *)
+        inversion BASIC; clear BASIC; subst;
+        eexists; eexists; split; eauto;
+        unfold compare_long;
+        repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]);
+        try (econstructor; eauto);
+        try (eapply nextinstr_agree_but_pc; eauto);
+        try (eapply ptrofs_nextinstr_agree; eauto).
+
+      destruct sz.
+      - (* compare_single *)
+        unfold compare_single in BASIC.
+        destruct (rs1 x), (rs1 x0);
+        inversion BASIC;
+        eexists; eexists; split; eauto;
+        repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]);
+        try (econstructor; eauto);
+        try (eapply nextinstr_agree_but_pc; eauto);
+        try (eapply ptrofs_nextinstr_agree; eauto).
+      - (* compare_float *)
+        unfold compare_float in BASIC.
+        destruct (rs1 x), (rs1 x0);
+        inversion BASIC;
+        eexists; eexists; split; eauto;
+        repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]);
+        try (econstructor; eauto);
+        try (eapply nextinstr_agree_but_pc; eauto);
+        try (eapply ptrofs_nextinstr_agree; eauto). }
+    1,2: (* PArithComparisonR0R, PArithComparisonP *)
+      destruct i;
+      try (monadInv TRANSBI);
+      try (inv_matchi);
+      try (destruct_reg_inv);
+      try (destruct_reg_size);
+      simpl in *;
+      inversion BASIC; clear BASIC; subst;
+      eexists; eexists; split; eauto;
+      unfold compare_long, compare_int, compare_float, compare_single;
+      try (destruct_reg_size);
+      repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]);
+      try (econstructor; eauto);
+      try (destruct_ir0_reg);
+      try (eapply nextinstr_agree_but_pc; eauto);
+      try (eapply ptrofs_nextinstr_agree; eauto).
+    { (* Pcset *)
+      try (monadInv TRANSBI);
+      try (inv_matchi).
+      try (exploit next_inst_preserved; eauto);
+      try (simpl in *; intros;
+      unfold if_opt_bool_val in *; unfold eval_testcond in *;
+      rewrite <- !AG; try congruence; eauto). }
+    { (* Pfmovi *)
+      try (monadInv TRANSBI);
+      try (inv_matchi);
+      try (destruct_reg_size);
+      try (destruct_ir0_reg);
+      try (exploit next_inst_preserved; eauto). }
+    { (* Pcsel *)
+      try (destruct_reg_inv);
+      try (monadInv TRANSBI);
+      try (destruct_reg_inv);
+      try (inv_matchi);
+      try (exploit next_inst_preserved; eauto);
+      simpl in *; intros;
+      unfold if_opt_bool_val in *; unfold eval_testcond in *;
+      rewrite <- !AG; try congruence; eauto. }
+    { (* Pfnmul *)
+      try (monadInv TRANSBI);
+      try (inv_matchi);
+      try (destruct_reg_size);
+      try (exploit next_inst_preserved; eauto);
+      try (find_rwrt_ag). } }
+  { (* PLoad *)
+    destruct ld.
+    - destruct ld; monadInv TRANSBI; try destruct_ireg_inv; exploit load_rd_a_preserved; eauto;
+      intros; simpl in *; destruct sz; eauto.
+    - destruct ld; monadInv TRANSBI; destruct rd1 as [[rd1'|]|]; destruct rd2 as [[rd2'|]|];
+      inv EQ; inv EQ1; exploit load_double_preserved; eauto. }
+  { (* PStore *)
+    destruct st.
+    - destruct st; monadInv TRANSBI; try destruct_ireg_inv; exploit store_rs_a_preserved; eauto;
+      simpl in *; inv_matchi; find_rwrt_ag.
+    - destruct st; monadInv TRANSBI; destruct rs0 as [[rs0'|]|]; destruct rs3 as [[rs3'|]|];
+      inv EQ; inv EQ1; exploit store_double_preserved; eauto;
+      simpl in *; inv_matchi; find_rwrt_ag. }
+  { (* Pallocframe *)
+    monadInv TRANSBI;
+    inv_matchi; try pc_not_sp;
+    destruct sz eqn:EQSZ;
+    destruct Mem.alloc eqn:EQALLOC;
+    destruct Mem.store eqn:EQSTORE; inversion BASIC; try pc_not_sp;
+    eexists; eexists; split; eauto;
+    repeat (eapply match_internal_nextinstr_set_parallel; [ try (pc_not_sp; congruence) | idtac | try (reflexivity)]);
+    try (econstructor; eauto);
+    try (eapply nextinstr_agree_but_pc; eauto);
+    try (eapply ptrofs_nextinstr_agree; eauto). }
+  { (* Pfreeframe *)
+    monadInv TRANSBI;
+    inv_matchi; try pc_not_sp;
+    destruct sz eqn:EQSZ;
+    destruct Mem.loadv eqn:EQLOAD;
+    destruct (rs1 SP) eqn:EQRS1SP;
+    try (destruct Mem.free eqn:EQFREE);
+    inversion BASIC; try pc_not_sp;
+    eexists; eexists; split; eauto;
+    repeat (eapply match_internal_nextinstr_set_parallel; [ try (pc_not_sp; congruence) | idtac | try (reflexivity)]);
+    try (econstructor; eauto);
+    try (eapply nextinstr_agree_but_pc; eauto);
+    try (eapply ptrofs_nextinstr_agree; eauto). }
+  1,2,3,4: (* Ploadsymbol, Pcvtsw2x, Pcvtuw2x, Pcvtx2w *)
+    try (monadInv TRANSBI);
+    try (inv_matchi);
+    try (exploit next_inst_preserved; eauto);
+    rewrite symbol_addresses_preserved; eauto;
+    try (find_rwrt_ag).
+  { (* Pnop *)
+    monadInv TRANSBI; inv_matchi. 
+    inversion BASIC.
+    repeat (econstructor; eauto).
+    eapply nextinstr_agree_but_pc; intros;
+    try rewrite <- H0, AG; auto.
+    try eapply ptrofs_nextinstr_agree; auto; rewrite <- H0;
+    assumption. }
+Qed.
+
+Lemma find_basic_instructions b ofs f bb tc: forall
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+  (UNFOLD: unfold (fn_blocks f) = OK tc),
+  forall n,
+  (n < length (body bb))%nat ->
+  exists (i : Asm.instruction) (bi : basic),
+     list_nth_z (body bb) (Z.of_nat n) = Some bi
+  /\ basic_to_instruction bi = OK i
+  /\ Asm.find_instr (Ptrofs.unsigned ofs
+                     + (list_length_z (header bb))
+                     + Z.of_nat n) tc
+                     = Some i.
+Proof.
+  intros until n; intros NLT.
+  exploit internal_functions_unfold; eauto.
+  intros (tc' & FINDtf & TRANStf & _).
+  assert (tc' = tc) by congruence; subst.
+    exploit (find_instr_bblock (list_length_z (header bb) + Z.of_nat n)); eauto.
+    { unfold size; split.
+      - rewrite list_length_z_nat; lia.
+      - repeat (rewrite list_length_z_nat). repeat (rewrite Nat2Z.inj_add). lia. }
+    intros (i & NTH & FIND_INSTR).
+    exists i; intros.
+    inv NTH.
+    - (* absurd *) apply list_nth_z_range in H; lia.
+    - exists bi;
+      rewrite Z.add_simpl_l in H;
+      rewrite Z.add_assoc in FIND_INSTR;
+      intuition.
+    - (* absurd *) rewrite bblock_size_aux in H0;
+      rewrite H in H0; simpl in H0; repeat rewrite list_length_z_nat in H0; lia.
+Qed.
+
+(* TODO: remplacer find_basic_instructions directement par ce lemme ? *)
+Lemma find_basic_instructions_alt b ofs f bb tc n: forall
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+  (UNFOLD: unfold (fn_blocks f) = OK tc)
+  (BOUND: 0 <= n < list_length_z (body bb)),
+  exists (i : Asm.instruction) (bi : basic),
+     list_nth_z (body bb) n = Some bi
+  /\ basic_to_instruction bi = OK i
+  /\ Asm.find_instr (Ptrofs.unsigned ofs
+                     + (list_length_z (header bb))
+                     + n) tc
+                     = Some i.
+Proof.
+  intros; assert ((Z.to_nat n) < length (body bb))%nat.
+  { rewrite Nat2Z.inj_lt, <- list_length_z_nat, Z2Nat.id; try lia. }
+  exploit find_basic_instructions; eauto.
+  rewrite Z2Nat.id; try lia. intros (i & bi & X).
+  eexists; eexists; intuition eauto.
+Qed.
+
+Lemma header_body_tail_bound: forall (a: basic) (li: list basic) bb ofs
+  (BOUNDBB : Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned)
+  (BDYLENPOS : 0 <= list_length_z (body bb) - list_length_z (a :: li) <
+              list_length_z (body bb)),
+0 <= list_length_z (header bb) + list_length_z (body bb) - list_length_z (a :: li) <=
+Ptrofs.max_unsigned.
+Proof.
+  intros.
+  assert (HBBPOS: list_length_z (header bb) >= 0) by eapply list_length_z_pos.
+  assert (HBBSIZE: list_length_z (header bb) < size bb) by eapply header_size_lt_block_size.
+  assert (OFSBOUND: 0 <= Ptrofs.unsigned ofs <= Ptrofs.max_unsigned) by eapply Ptrofs.unsigned_range_2.
+  assert (BBSIZE: size bb <= Ptrofs.max_unsigned) by lia.
+  unfold size in BBSIZE.
+  rewrite !Nat2Z.inj_add in BBSIZE.
+  rewrite <- !list_length_z_nat in BBSIZE.
+  lia.
+Qed.
+
+(* A more general version of the exec_body_simulation_plus lemma below.
+   This generalization is necessary for the induction proof inside the body.
+*)
+Lemma exec_body_simulation_plus_gen li: forall b ofs f bb rs m s2 rs' m'
+  (BLI: is_tail li (body bb))
+  (ATPC: rs PC = Vptr b ofs)
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+  (NEMPTY_BODY: li <> nil)
+  (MATCHI: match_internal ((list_length_z (header bb)) + (list_length_z (body bb)) - (list_length_z li)) (State rs m) s2)
+  (BODY: exec_body lk ge li rs m = Next rs' m'),
+  exists s2', plus Asm.step tge s2 E0 s2'
+             /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'.
+Proof.
+  induction li as [|a li]; simpl; try congruence.
+  intros.
+  assert (BDYLENPOS: 0 <= (list_length_z (body bb) - list_length_z (a::li)) < list_length_z (body bb)). {
+    assert (Z.of_nat O < list_length_z (a::li) <= list_length_z (body bb)); try lia.
+    rewrite !list_length_z_nat; split.
+    - rewrite <- Nat2Z.inj_lt. simpl. lia.
+    - rewrite <- Nat2Z.inj_le; eapply is_tail_bound; eauto.
   }
-  { rewrite S; auto. }
-    
-(* X30 does not contain parent *)
-  exploit loadptr_correct. eexact A. simpl; congruence. intros [rs1 [P [Q R]]].
-  exploit loadind_correct. eexact EQ. instantiate (2 := rs1). simpl; rewrite Q. eauto. simpl; congruence.
-  intros [rs2 [S [T [U V]]]].
-  exists rs2; split. eapply exec_straight_trans; eauto.
-  split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
-  instantiate (1 := rs1#X29 <- (rs2#X29)). intros.
-  rewrite Pregmap.gso; auto with asmgen.
-  congruence.
-  intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen.
-  split; simpl; intros. rewrite U; auto with asmgen.
-  apply preg_of_not_X29; auto.
-  rewrite V. rewrite R by congruence. auto.
-  
-- (* Mop *)
-  assert (eval_operation tge sp op (map rs args) m = Some v).
-  { rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. }
-  exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eexact H0.
-  intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
-  left; eapply exec_straight_steps; eauto; intros. simpl in TR.
-  exploit transl_op_correct; eauto. intros [rs2 [P [Q [R S]]]].
-  exists rs2; split. eauto. split.
-  apply agree_set_undef_mreg with rs0; auto. 
-  apply Val.lessdef_trans with v'; auto.
-  split; simpl; intros. InvBooleans. 
-  rewrite R; auto. apply preg_of_not_X29; auto.
-Local Transparent destroyed_by_op.
-  destruct op; try exact I; simpl; congruence.
-  rewrite S.
-  auto.
-- (* Mload *)
-  destruct trap.
+  exploit internal_functions_unfold; eauto.
+  intros (tc & FINDtf & TRANStf & _).
+  exploit find_basic_instructions_alt; eauto.
+  intros (tbi & (bi & (NTHBI & TRANSBI & FIND_INSTR))).
+  exploit is_tail_list_nth_z; eauto.
+  rewrite NTHBI; simpl.
+  intros X; inversion X; subst; clear X NTHBI.
+  destruct (exec_basic _ _ _ _ _) eqn:EXEC_BASIC; next_stuck_cong.
+  destruct s as (rs1 & m1); simpl in *.
+  destruct s2 as (rs2 & m2); simpl in *.
+  assert (BOUNDBBMAX: Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned)
+  by (eapply size_of_blocks_bounds; eauto).
+  exploit header_body_tail_bound; eauto. intros BDYTAIL.
+  exploit exec_basic_simulation; eauto.
+  intros (rs_next' & m_next' & EXEC_INSTR & MI_NEXT).
+  exploit exec_basic_dont_move_PC; eauto. intros AGPC.
+  inversion MI_NEXT as [A B C D E M_NEXT_AGREE RS_NEXT_AGREE ATPC_NEXT PC_OFS_NEXT RS RS'].
+  subst A. subst B. subst C. subst D. subst E.
+  rewrite ATPC in AGPC. symmetry in AGPC, ATPC_NEXT.
+
+  inv MATCHI. symmetry in AGPC0.
+  rewrite ATPC in AGPC0.
+  unfold Val.offset_ptr in AGPC0.
+
+  simpl in FIND_INSTR.
+  (* Execute internal step. *)
+  exploit (Asm.exec_step_internal tge b); eauto.
   {
-  assert (Op.eval_addressing tge sp addr (map rs args) = Some a).
-  { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. }
-  exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
-  intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
-  exploit Mem.loadv_extends; eauto. intros [v' [C D]].
-  left; eapply exec_straight_steps; eauto; intros. simpl in TR.
-  exploit transl_load_correct; eauto. intros [rs2 [P [Q [R S]]]].
-  exists rs2; split. eauto.
-  split. eapply agree_set_undef_mreg; eauto. congruence.
-  split. simpl; congruence.
-  rewrite S. assumption.
+    rewrite Ptrofs.add_unsigned.
+    repeat (rewrite Ptrofs.unsigned_repr); try lia.
+    2: {
+      assert (BOUNDOFS: 0 <= Ptrofs.unsigned ofs <= Ptrofs.max_unsigned) by eapply Ptrofs.unsigned_range_2.
+      assert (list_length_z (body bb) <= size bb) by eapply body_size_le_block_size.
+      assert (list_length_z (header bb) <= 1). { eapply size_header; eauto. }
+      lia. }
+    try rewrite list_length_z_nat; try split;
+    simpl; rewrite <- !list_length_z_nat;
+    replace (Ptrofs.unsigned ofs + (list_length_z (header bb) + list_length_z (body bb) -
+      list_length_z (a :: li))) with (Ptrofs.unsigned ofs + list_length_z (header bb) +
+      (list_length_z (body bb) - list_length_z (a :: li))) by lia;
+    try assumption; try lia. }
+
+  (* This is our STEP hypothesis. *)
+  intros STEP_NEXT.
+  destruct li as [|a' li]; simpl in *.
+  - (* case of a single instruction in li: this our base case in the induction *)
+    inversion BODY; subst.
+    eexists; split.
+    + apply plus_one. eauto.
+    + constructor; auto.
+      rewrite ATPC_NEXT.
+      apply f_equal.
+      apply f_equal.
+      rewrite bblock_size_aux, list_length_z_cons; simpl.
+      lia.
+  - exploit (IHli b ofs f bb rs1 m_next' (State rs_next' m_next')); congruence || eauto.
+    + exploit is_tail_app_def; eauto.
+      intros (l3 & EQ); rewrite EQ.
+      exploit (is_tail_app_right (l3 ++ a::nil)).
+      rewrite <- app_assoc; simpl; eauto.
+    + constructor; auto.
+      rewrite ATPC_NEXT.
+      apply f_equal.
+      apply f_equal.
+      rewrite! list_length_z_cons; simpl.
+      lia.
+    + intros (s2' & LAST_STEPS & LAST_MATCHS).
+      eexists. split; eauto.
+      eapply plus_left'; eauto.
+Qed.
+
+Lemma exec_body_simulation_plus b ofs f bb rs m s2 rs' m': forall
+  (ATPC: rs PC = Vptr b ofs)
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+  (NEMPTY_BODY: body bb <> nil)
+  (MATCHI: match_internal (list_length_z (header bb)) (State rs m) s2)
+  (BODY: exec_body lk ge (body bb) rs m = Next rs' m'),
+  exists s2', plus Asm.step tge s2 E0 s2'
+             /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'.
+Proof.
+  intros.
+  exploit exec_body_simulation_plus_gen; eauto.
+  - constructor.
+  - replace (list_length_z (header bb) + list_length_z (body bb) - list_length_z (body bb)) with (list_length_z (header bb)); auto.
+    lia.
+Qed.
+
+Lemma exec_body_simulation_star b ofs f bb rs m s2 rs' m': forall
+  (ATPC: rs PC = Vptr b ofs)
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+  (MATCHI: match_internal (list_length_z (header bb)) (State rs m) s2)
+  (BODY: exec_body lk ge (body bb) rs m = Next rs' m'),
+  exists s2', star Asm.step tge s2 E0 s2'
+             /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'.
+Proof.
+  intros.
+  destruct (body bb) eqn: Hbb.
+  - simpl in BODY. inv BODY.
+    eexists. split.
+    eapply star_refl; eauto.
+    assert (EQ: (size bb - Z.of_nat (length_opt (exit bb))) = list_length_z (header bb)).
+    {  rewrite bblock_size_aux. rewrite Hbb; unfold list_length_z; simpl. lia. }
+    rewrite EQ; eauto.
+  - exploit exec_body_simulation_plus; congruence || eauto.
+    { rewrite Hbb; eauto. }
+    intros (s2' & PLUS & MATCHI').
+    eexists; split; eauto.
+    eapply plus_star; eauto.
+Qed.
+
+Lemma list_nth_z_range_exceeded A (l : list A) n:
+  n >= list_length_z l ->
+  list_nth_z l n = None.
+Proof.
+  intros N.
+  remember (list_nth_z l n) as opt eqn:H. symmetry in H.
+  destruct opt; auto.
+  exploit list_nth_z_range; eauto. lia.
+Qed.
+
+Lemma label_in_header_list lbl a:
+  is_label lbl a = true -> list_length_z (header a) <= 1 -> header a = lbl :: nil.
+Proof.
+  intros.
+  eapply is_label_correct_true in H.
+  destruct (header a).
+  - eapply in_nil in H. contradiction.
+  - rewrite list_length_z_cons in H0.
+    assert (list_length_z l0 >= 0) by eapply list_length_z_pos.
+    assert (list_length_z l0 = 0) by lia.
+    rewrite list_length_z_nat in H2.
+    assert (Datatypes.length l0 = 0%nat) by lia.
+    eapply length_zero_iff_nil in H3. subst.
+    unfold In in H. destruct H.
+    + subst; eauto.
+    + destruct H.
+Qed.
+
+Lemma no_label_in_basic_inst: forall a lbl x,
+  basic_to_instruction a = OK x -> Asm.is_label lbl x = false.
+Proof.
+  intros.
+  destruct a; simpl in *;
+  repeat destruct i;
+  repeat destruct ld; repeat destruct st;
+  simpl in *;
+  try (try destruct_reg_inv; monadInv H; simpl in *; reflexivity).
+Qed.
+
+Lemma label_pos_body bdy: forall c1 c2 z ex lbl
+  (HUNF : unfold_body bdy = OK c2),
+  Asm.label_pos lbl (z + Z.of_nat ((Datatypes.length bdy) + length_opt ex)) c1 = Asm.label_pos lbl (z) ((c2 ++ unfold_exit ex) ++ c1).
+Proof.
+  induction bdy.
+  - intros. inversion HUNF. simpl in *.
+    destruct ex eqn:EQEX.
+    + simpl in *. unfold Asm.is_label. destruct c; simpl; try congruence.
+      destruct i; simpl; try congruence.
+    + simpl in *. ring_simplify (z + 0). auto.
+  - intros. inversion HUNF; clear HUNF. monadInv H0. simpl in *.
+    erewrite no_label_in_basic_inst; eauto. rewrite <- IHbdy; eauto.
+    erewrite Zpos_P_of_succ_nat.
+    apply f_equal2; auto. lia.
+Qed.
+
+Lemma asm_label_pos_header: forall z a x0 x1 lbl
+  (HUNF: unfold_body (body a) = OK x1),
+  Asm.label_pos lbl (z + size a) x0 =
+  Asm.label_pos lbl (z + list_length_z (header a)) ((x1 ++ unfold_exit (exit a)) ++ x0).
+Proof.
+  intros.
+  unfold size.
+  rewrite <- plus_assoc. rewrite Nat2Z.inj_add.
+  rewrite list_length_z_nat.
+  replace (z + (Z.of_nat (Datatypes.length (header a)) + Z.of_nat (Datatypes.length (body a) + length_opt (exit a)))) with (z + Z.of_nat (Datatypes.length (header a)) + Z.of_nat (Datatypes.length (body a) + length_opt (exit a))) by lia.
+  eapply (label_pos_body (body a) x0 x1 (z + Z.of_nat (Datatypes.length (header a))) (exit a) lbl). auto.
+Qed.
+
+Lemma header_size_cons_nil: forall (l0: label) (l1: list label)
+  (HSIZE: list_length_z (l0 :: l1) <= 1),
+  l1 = nil.
+Proof.
+  intros.
+  destruct l1; try congruence. rewrite !list_length_z_cons in HSIZE.
+  assert (list_length_z l1 >= 0) by eapply list_length_z_pos.
+  assert (list_length_z l1 + 1 + 1 >= 2) by lia.
+  assert (2 <= 1) by lia. contradiction H1. lia.
+Qed.
+
+Lemma label_pos_preserved_gen bbs: forall lbl c z
+  (HUNF: unfold bbs = OK c),
+  label_pos lbl z bbs = Asm.label_pos lbl z c.
+Proof.
+  induction bbs.
+  - intros. simpl in *. inversion HUNF. simpl. reflexivity.
+  - intros. simpl in *. monadInv HUNF. unfold unfold_bblock in EQ.
+    destruct (zle _ _); try congruence. monadInv EQ.
+    destruct (is_label _ _) eqn:EQLBL.
+    + erewrite label_in_header_list; eauto.
+      simpl in *. destruct (peq lbl lbl); try congruence.
+    + erewrite IHbbs; eauto.
+      rewrite (asm_label_pos_header z a x0 x1 lbl); auto.
+      unfold is_label in *.
+      destruct (header a).
+      * replace (z + list_length_z (@nil label)) with (z); eauto.
+        unfold list_length_z. simpl. lia.
+      * eapply header_size_cons_nil in l as HL1.
+        subst. simpl in *. destruct (in_dec _ _); try congruence.
+        simpl in *.
+        destruct (peq _ _); try intuition congruence.
+Qed.
+
+Lemma label_pos_preserved f lbl z tf: forall
+  (FINDF: transf_function f = OK tf),
+  label_pos lbl z (fn_blocks f) = Asm.label_pos lbl z (Asm.fn_code tf).
+Proof.
+  intros.
+  eapply label_pos_preserved_gen.
+  unfold transf_function in FINDF. monadInv FINDF.
+  destruct zlt; try congruence. inversion EQ0. eauto.
+Qed.
+
+Lemma goto_label_preserved bb rs1 m1 rs1' m1' rs2 m2 lbl f tf v: forall
+  (FINDF: transf_function f = OK tf)
+  (BOUNDED: size bb <= Ptrofs.max_unsigned)
+  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2))
+  (HGOTO: goto_label f lbl (incrPC v rs1) m1 = Next rs1' m1'),
+  exists (rs2' : regset) (m2' : mem), Asm.goto_label tf lbl rs2 m2 = Next rs2' m2'
+  /\ match_states (State rs1' m1') (State rs2' m2').
+Proof.
+  intros.
+  unfold goto_label, Asm.goto_label in *.
+  rewrite <- (label_pos_preserved f); auto.
+  inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+  destruct label_pos; next_stuck_cong.
+  destruct (incrPC v rs1 PC) eqn:INCRPC; next_stuck_cong.
+  inversion HGOTO; auto. repeat (econstructor; eauto).
+  rewrite <- EQPC.
+  unfold incrPC in *.
+  rewrite !Pregmap.gss in *.
+  destruct (rs1 PC) eqn:EQRS1; simpl in *; try congruence.
+  replace (rs2 # PC <- (Vptr b0 (Ptrofs.repr z))) with ((rs1 # PC <- (Vptr b0 (Ptrofs.add i0 v))) # PC <- (Vptr b (Ptrofs.repr z))); auto.
+  eapply functional_extensionality. intros.
+  destruct (PregEq.eq x PC); subst.
+  rewrite !Pregmap.gss. congruence.
+  rewrite !Pregmap.gso; auto.
+Qed.
+
+Lemma next_inst_incr_pc_preserved bb rs1 m1 rs1' m1' rs2 m2 f tf: forall
+  (FINDF: transf_function f = OK tf)
+  (BOUNDED: size bb <= Ptrofs.max_unsigned)
+  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2))
+  (NEXT: Next (incrPC (Ptrofs.repr (size bb)) rs1) m2 = Next rs1' m1'),
+  exists (rs2' : regset) (m2' : mem),
+  Next (Asm.nextinstr rs2) m2 = Next rs2' m2'
+  /\ match_states (State rs1' m1') (State rs2' m2').
+Proof.
+  intros; simpl in *; unfold incrPC in NEXT;
+  inv_matchi;
+  assert (size bb >= 1) by eapply bblock_size_pos;
+  assert (0 <= size bb - 1 <= Ptrofs.max_unsigned) by lia;
+  inversion NEXT; subst;
+  eexists; eexists; split; eauto.
+  assert (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb))) = Asm.nextinstr rs2). {
+    unfold Pregmap.set. apply functional_extensionality.
+    intros x. destruct (PregEq.eq x PC).
+    -- unfold Asm.nextinstr. rewrite <- AGPC.
+       rewrite Val.offset_ptr_assoc. rewrite Ptrofs.add_unsigned.
+       rewrite (Ptrofs.unsigned_repr (size bb - 1)); try lia.
+       rewrite Ptrofs.unsigned_one.
+       replace (size bb - 1 + 1) with (size bb) by lia.
+       rewrite e. rewrite Pregmap.gss.
+       reflexivity.
+    -- eapply nextinstr_agree_but_pc; eauto. }
+       rewrite H1. econstructor.
+Qed.
+
+Lemma pc_reg_overwrite: forall (r: ireg) rs1 m1 rs2 m2 bb
+  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)),
+  rs2 # PC <- (rs2 r) =
+  (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb)))) # PC <-
+  (rs1 r).
+Proof.
+  intros.
+  unfold Pregmap.set; apply functional_extensionality.
+  intros x; destruct (PregEq.eq x PC) as [X | X]; try discriminate; inv_matchi.
+Qed.
+
+Lemma exec_cfi_simulation:
+  forall bb f tf rs1 m1 rs1' m1' rs2 m2 cfi
+  (SIZE: size bb <= Ptrofs.max_unsigned)
+  (FINDF: transf_function f = OK tf)
+  (* Warning: Asmblock's PC is assumed to be already pointing on the next instruction ! *)
+  (CFI: exec_cfi ge f cfi (incrPC (Ptrofs.repr (size bb)) rs1) m1 = Next rs1' m1')
+  (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)),
+  exists rs2' m2', Asm.exec_instr tge tf (cf_instruction_to_instruction cfi)
+                                  rs2 m2 = Next rs2' m2'
+                   /\ match_states (State rs1' m1') (State rs2' m2').
+Proof.
+  intros.
+  assert (BBPOS: size bb >= 1) by eapply bblock_size_pos.
+  destruct cfi; inv CFI; simpl.
+  - (* Pb *)
+    exploit goto_label_preserved; eauto.
+  - (* Pbc *)
+    inv_matchi.
+    unfold eval_testcond in *. destruct c;
+    erewrite !incrPC_agree_but_pc in H0; try rewrite <- !AG; try congruence.
+    all:
+      destruct_reg_size;
+      try destruct b eqn:EQB.
+      1,4,7,10,13,16,19,22,25,28,31,34:
+        exploit goto_label_preserved; eauto.
+      1,3,5,7,9,11,13,15,17,19,21,23:
+        exploit next_inst_incr_pc_preserved; eauto.
+      all: repeat (econstructor; eauto).
+  - (* Pbl *)
+    eexists; eexists; split; eauto.
+    assert ( ((incrPC (Ptrofs.repr (size bb)) rs1) # X30 <- (incrPC (Ptrofs.repr (size bb)) rs1 PC))
+                                                   # PC <-  (Genv.symbol_address ge id Ptrofs.zero)
+           = (rs2 # X30 <- (Val.offset_ptr (rs2 PC) Ptrofs.one))
+                           # PC <- (Genv.symbol_address tge id Ptrofs.zero)
+           ) as EQRS. {
+      unfold incrPC. unfold Pregmap.set. simpl. apply functional_extensionality.
+      intros x. destruct (PregEq.eq x PC).
+      * rewrite symbol_addresses_preserved. reflexivity.
+      * destruct (PregEq.eq x X30).
+        -- inv MATCHI. rewrite <- AGPC. rewrite Val.offset_ptr_assoc.
+           unfold Ptrofs.add, Ptrofs.one. repeat (rewrite Ptrofs.unsigned_repr); try lia.
+           replace (size bb - 1 + 1) with (size bb) by lia. reflexivity.
+        -- inv MATCHI; rewrite AG; try assumption; reflexivity.
+    } rewrite EQRS; inv MATCHI; reflexivity.
+  - (* Pbs *)
+    eexists; eexists; split; eauto.
+    assert ( (incrPC (Ptrofs.repr (size bb)) rs1) # PC <-
+                           (Genv.symbol_address ge id Ptrofs.zero)
+           = rs2 # PC <- (Genv.symbol_address tge id Ptrofs.zero)
+           ) as EQRS. {
+      unfold incrPC, Pregmap.set. rewrite symbol_addresses_preserved. inv MATCHI.
+      apply functional_extensionality. intros x. destruct (PregEq.eq x PC); auto.
+    } rewrite EQRS; inv MATCHI; reflexivity.
+  - (* Pblr *)
+    eexists; eexists; split; eauto.
+    unfold incrPC. rewrite Pregmap.gss. rewrite Pregmap.gso; try discriminate.
+    assert ( (rs2 # X30 <- (Val.offset_ptr (rs2 PC) Ptrofs.one)) # PC <- (rs2 r)
+           = ((rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb))))
+                   # X30 <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb))))
+                   # PC <- (rs1 r)
+           ) as EQRS. {
+      unfold Pregmap.set. apply functional_extensionality.
+      intros x; destruct (PregEq.eq x PC) as [X | X].
+      - inv_matchi; rewrite AG; auto.
+      - destruct (PregEq.eq x X30) as [X' | X'].
+        + inversion MATCHI; subst. rewrite <- AGPC.
+          rewrite Val.offset_ptr_assoc. unfold Ptrofs.one.
+          rewrite Ptrofs.add_unsigned. rewrite Ptrofs.unsigned_repr; try lia. rewrite Ptrofs.unsigned_repr; try lia.
+          rewrite Z.sub_add; reflexivity.
+        + inv_matchi.
+    } rewrite EQRS. inv_matchi.
+  - (* Pbr *)
+    eexists; eexists; split; eauto.
+    unfold incrPC. rewrite Pregmap.gso; try discriminate.
+    rewrite (pc_reg_overwrite r rs1 m1' rs2 m2 bb); auto.
+    inv_matchi.
+  - (* Pret *)
+    eexists; eexists; split; eauto.
+    unfold incrPC. rewrite Pregmap.gso; try discriminate.
+    rewrite (pc_reg_overwrite r rs1 m1' rs2 m2 bb); auto.
+    inv_matchi.
+  - (* Pcbnz *)
+    inv_matchi.
+    unfold eval_neg_branch in *.
+    erewrite incrPC_agree_but_pc in H0; try congruence.
+    destruct eval_testzero; next_stuck_cong.
+    destruct b.
+    * exploit next_inst_incr_pc_preserved; eauto.
+    * exploit goto_label_preserved; eauto.
+  - (* Pcbz *)
+    inv_matchi.
+    unfold eval_branch in *.
+    erewrite incrPC_agree_but_pc in H0; try congruence.
+    destruct eval_testzero; next_stuck_cong.
+    destruct b.
+    * exploit goto_label_preserved; eauto.
+    * exploit next_inst_incr_pc_preserved; eauto.
+  - (* Ptbnbz *)
+    inv_matchi.
+    unfold eval_branch in *.
+    erewrite incrPC_agree_but_pc in H0; try congruence.
+    destruct eval_testbit; next_stuck_cong.
+    destruct b.
+    * exploit goto_label_preserved; eauto.
+    * exploit next_inst_incr_pc_preserved; eauto.
+  - (* Ptbz *)
+    inv_matchi.
+    unfold eval_neg_branch in *.
+    erewrite incrPC_agree_but_pc in H0; try congruence.
+    destruct eval_testbit; next_stuck_cong.
+    destruct b.
+    * exploit next_inst_incr_pc_preserved; eauto.
+    * exploit goto_label_preserved; eauto.
+  - (* Pbtbl *)
+    assert (rs2 # X16 <- Vundef r1 = (incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef r1)
+    as EQUNDEFX16. {
+      unfold incrPC, Pregmap.set.
+      destruct (PregEq.eq r1 X16) as [X16 | X16]; auto.
+      destruct (PregEq.eq r1 PC) as [PC' | PC']; try discriminate.
+      inv MATCHI; rewrite AG; auto.
+    } rewrite <- EQUNDEFX16 in H0.
+    destruct_reg_inv; next_stuck_cong.
+    unfold goto_label, Asm.goto_label in *.
+    rewrite <- (label_pos_preserved f); auto.
+    inversion MATCHI; subst.
+    destruct label_pos; next_stuck_cong.
+    destruct ((incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef PC) eqn:INCRPC; next_stuck_cong.
+    inversion H0; auto. repeat (econstructor; eauto).
+    rewrite !Pregmap.gso; try congruence.
+    rewrite <- AGPC.
+    unfold incrPC in *.
+    destruct (rs1 PC) eqn:EQRS1; simpl in *; try discriminate.
+    replace ((rs2 # X16 <- Vundef) # PC <- (Vptr b0 (Ptrofs.repr z))) with
+      (((rs1 # PC <- (Vptr b0 (Ptrofs.add i1 (Ptrofs.repr (size bb))))) # X16 <-
+      Vundef) # PC <- (Vptr b (Ptrofs.repr z))); auto.
+    eapply functional_extensionality; intros x.
+    destruct (PregEq.eq x PC); subst.
+    + rewrite Pregmap.gso in INCRPC; try congruence.
+      rewrite Pregmap.gss in INCRPC.
+      rewrite !Pregmap.gss in *; congruence.
+    + rewrite Pregmap.gso; auto.
+      rewrite (Pregmap.gso (i := x) (j := PC)); auto.
+      destruct (PregEq.eq x X16); subst.
+      * rewrite !Pregmap.gss; auto.
+      * rewrite !Pregmap.gso; auto.
+Qed.
+
+Lemma last_instruction_cannot_be_label bb:
+  list_nth_z (header bb) (size bb - 1) = None.
+Proof.
+  assert (list_length_z (header bb) <= size bb - 1). {
+    rewrite bblock_size_aux. generalize (bblock_size_aux_pos bb). lia.
   }
-  
-  (* Mload notrap1 *)
-  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
-
-- (* Mload notrap *)
-  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
-
-- (* Mload notrap *)
-  inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
-
-- (* Mstore *)
-  assert (Op.eval_addressing tge sp addr (map rs args) = Some a).
-  { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. }
-  exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
-  intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
-  assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto).
-  exploit Mem.storev_extends; eauto. intros [m2' [C D]].
-  left; eapply exec_straight_steps; eauto.
-  intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P [Q R]]].
-  exists rs2; split. eauto.
-  split. eapply agree_undef_regs; eauto with asmgen.
-  split. simpl; congruence.
-  rewrite R. assumption.
-
-- (* Mcall *)
-  assert (f0 = f) by congruence.  subst f0.
-  inv AT.
-  assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
-  { eapply transf_function_no_overflow; eauto. }
-  destruct ros as [rf|fid]; simpl in H; monadInv H5.
-+ (* Indirect call *)
-  assert (rs rf = Vptr f' Ptrofs.zero).
-  { destruct (rs rf); try discriminate.
-    revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
-  assert (rs0 x0 = Vptr f' Ptrofs.zero).
-  { exploit ireg_val; eauto. rewrite H5; intros LD; inv LD; auto. }
-  generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1.
-  assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x).
-  {  econstructor; eauto. }
-  exploit return_address_offset_correct; eauto. intros; subst ra.
-  left; econstructor; split.
-  apply plus_one. eapply exec_step_internal. Simpl. rewrite <- H2; simpl; eauto.
-  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
-  simpl. eauto.
-  econstructor; eauto.
-  econstructor; eauto.
-  eapply agree_sp_def; eauto.
-  simpl. eapply agree_exten; eauto. intros. Simpl.
-  Simpl. rewrite <- H2. auto.
-+ (* Direct call *)
-  generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1.
-  assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x).
-    econstructor; eauto.
-  exploit return_address_offset_correct; eauto. intros; subst ra.
-  left; econstructor; split.
-  apply plus_one. eapply exec_step_internal. eauto.
-  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
-  simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. eauto.
-  econstructor; eauto.
-  econstructor; eauto.
-  eapply agree_sp_def; eauto.
-  simpl. eapply agree_exten; eauto. intros. Simpl.
-  Simpl. rewrite <- H2. auto.
-
-- (* Mtailcall *)
-  assert (f0 = f) by congruence.  subst f0.
-  inversion AT; subst.
-  assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
-  { eapply transf_function_no_overflow; eauto. }
-  exploit Mem.loadv_extends. eauto. eexact H1. auto. simpl. intros [parent' [A B]].
-  destruct ros as [rf|fid]; simpl in H; monadInv H7.
-+ (* Indirect call *)
-  assert (rs rf = Vptr f' Ptrofs.zero).
-  { destruct (rs rf); try discriminate.
-    revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
-  assert (rs0 x0 = Vptr f' Ptrofs.zero).
-  { exploit ireg_val; eauto. rewrite H7; intros LD; inv LD; auto. }
-  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). 
-  exploit exec_straight_steps_2; eauto using functions_transl.                      
-  intros (ofs' & P & Q).
-  left; econstructor; split.
-  (* execution *)
-  eapply plus_right'. eapply exec_straight_exec; eauto.
-  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
-  simpl. reflexivity.
-  traceEq.
-  (* match states *)
-  econstructor; eauto.
-  apply agree_set_other; auto with asmgen.
-  Simpl. rewrite Z by (rewrite <- (ireg_of_eq _ _ EQ1); eauto with asmgen). assumption. 
-+ (* Direct call *)
-  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). 
-  exploit exec_straight_steps_2; eauto using functions_transl.                      
-  intros (ofs' & P & Q).
-  left; econstructor; split.
-  (* execution *)
-  eapply plus_right'. eapply exec_straight_exec; eauto.
-  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
-  simpl. reflexivity.
-  traceEq.
-  (* match states *)
-  econstructor; eauto.
-  apply agree_set_other; auto with asmgen.
-  Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. auto.
-
-- (* Mbuiltin *)
-  inv AT. monadInv H4.
-  exploit functions_transl; eauto. intro FN.
-  generalize (transf_function_no_overflow _ _ H3); intro NOOV.
-  exploit builtin_args_match; eauto. intros [vargs' [P Q]].
-  exploit external_call_mem_extends; eauto.
-  intros [vres' [m2' [A [B [C D]]]]].
-  left. econstructor; split. apply plus_one.
-  eapply exec_step_builtin. eauto. eauto.
-  eapply find_instr_tail; eauto.
-  erewrite <- sp_val by eauto.
-  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
-  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-  eauto.
-  econstructor; eauto.
-  instantiate (2 := tf); instantiate (1 := x).
-  unfold nextinstr. rewrite Pregmap.gss.
-  rewrite set_res_other. rewrite undef_regs_other_2.
-  rewrite <- H1. simpl. econstructor; eauto.
-  eapply code_tail_next_int; eauto.
-  rewrite preg_notin_charact. intros. auto with asmgen.
-  auto with asmgen.
-  apply agree_nextinstr. eapply agree_set_res; auto.
-  eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2; auto.
-  congruence.
-
-  Simpl.
-  rewrite set_res_other by trivial.
-  rewrite undef_regs_other.
-  assumption.
-  intro.
-  rewrite in_map_iff.
-  intros (x0 & PREG & IN).
-  subst r'.
-  intro.
-  apply (preg_of_not_RA x0).
-  congruence.
-  
-- (* Mgoto *)
-  assert (f0 = f) by congruence. subst f0.
-  inv AT. monadInv H4.
-  exploit find_label_goto_label; eauto. intros [tc' [rs' [GOTO [AT2 INV]]]].
-  left; exists (State rs' m'); split.
-  apply plus_one. econstructor; eauto.
-  eapply functions_transl; eauto.
-  eapply find_instr_tail; eauto.
-  simpl; eauto.
-  econstructor; eauto.
-  eapply agree_exten; eauto with asmgen.
-  congruence.
-
-  rewrite INV by congruence.
-  assumption.
-  
-- (* Mcond true *)
-  assert (f0 = f) by congruence. subst f0.
-  exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
-  left; eapply exec_straight_opt_steps_goto; eauto.
-  intros. simpl in TR.
-  exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D).
-  exists jmp; exists k; exists rs'.
-  split. eexact A. 
-  split. apply agree_exten with rs0; auto with asmgen.
-  split.
-  exact B.
-  rewrite D. exact LEAF.
-
-- (* Mcond false *)
-  exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
-  left; eapply exec_straight_steps; eauto. intros. simpl in TR.
-  exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D).
-  econstructor; split.
-  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. eexact B. auto.
-  split. apply agree_exten with rs0; auto. intros. Simpl.
-  split.
-  simpl; congruence.
-  Simpl. rewrite D.
-  exact LEAF.
-
-- (* Mjumptable *)
-  assert (f0 = f) by congruence. subst f0.
-  inv AT. monadInv H6.
-  exploit functions_transl; eauto. intro FN.
-  generalize (transf_function_no_overflow _ _ H5); intro NOOV.
-  exploit find_label_goto_label. eauto. eauto.
-  instantiate (2 := rs0#X16 <- Vundef #X17 <- Vundef).
-  Simpl. eauto.
-  eauto.
-  intros [tc' [rs' [A [B C]]]].
-  exploit ireg_val; eauto. rewrite H. intros LD; inv LD.
-  left; econstructor; split.
-  apply plus_one. econstructor; eauto.
-  eapply find_instr_tail; eauto.
-  simpl. Simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
-  econstructor; eauto.
-  eapply agree_undef_regs; eauto.
-  simpl. intros. rewrite C; auto with asmgen. Simpl.
-  congruence.
-
-  rewrite C by congruence.
-  repeat rewrite Pregmap.gso by congruence.
-  assumption.
-  
-- (* Mreturn *)
-  assert (f0 = f) by congruence. subst f0.
-  inversion AT; subst. simpl in H6; monadInv H6.
-  assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
-    eapply transf_function_no_overflow; eauto.
-  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
-  exploit exec_straight_steps_2; eauto using functions_transl.                      
-  intros (ofs' & P & Q).
-  left; econstructor; split.
-  (* execution *)
-  eapply plus_right'. eapply exec_straight_exec; eauto.
-  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
-  simpl. reflexivity.
-  traceEq.
-  (* match states *)
-  econstructor; eauto.
-  apply agree_set_other; auto with asmgen.
-
-- (* internal function *)
-
-  exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
-  generalize EQ; intros EQ'. monadInv EQ'.
-  destruct (zlt Ptrofs.max_unsigned (list_length_z x0.(fn_code))); inversion EQ1. clear EQ1. subst x0.
-  unfold store_stack in *.
-  exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
-  intros [m1' [C D]].
-  exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
-  intros [m2' [F G]].
-  simpl chunk_of_type in F.
-  exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
-  intros [m3' [P Q]].
-  change (chunk_of_type Tptr) with Mint64 in *.
-  (* Execution of function prologue *)
-  monadInv EQ0. rewrite transl_code'_transl_code in EQ1.
-  set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) ::
-                 storeptr RA XSP (fn_retaddr_ofs f) x0) in *.
-  set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *.
-  set (rs2 := nextinstr (rs0#X29 <- (parent_sp s) #SP <- sp #X16 <- Vundef)).
-  exploit (storeptr_correct tge tf XSP (fn_retaddr_ofs f) RA x0 m2' m3' rs2).
-    simpl preg_of_iregsp. change (rs2 X30) with (rs0 X30). rewrite ATLR. 
-    change (rs2 X2) with sp. eexact P. 
-    simpl; congruence. congruence.
-  intros (rs3 & U & V & W).
-  assert (EXEC_PROLOGUE:
-            exec_straight tge tf
-              tf.(fn_code) rs0 m'
-              x0 rs3 m3').
-  { change (fn_code tf) with tfbody; unfold tfbody.
-    apply exec_straight_step with rs2 m2'.
-    unfold exec_instr. rewrite C. fold sp.
-    rewrite <- (sp_val _ _ _ AG). rewrite F. reflexivity.
-    reflexivity. 
-    eexact U. }
-  exploit exec_straight_steps_2; eauto using functions_transl. omega. constructor.
-  intros (ofs' & X & Y).                    
-  left; exists (State rs3 m3'); split.
-  eapply exec_straight_steps_1; eauto. omega. constructor.
-  econstructor; eauto.
-  rewrite X; econstructor; eauto. 
-  apply agree_exten with rs2; eauto with asmgen.
-  unfold rs2. 
-  apply agree_nextinstr. apply agree_set_other; auto with asmgen.
-  apply agree_change_sp with (parent_sp s). 
-  apply agree_undef_regs with rs0. auto.
-Local Transparent destroyed_at_function_entry. simpl.
-  simpl; intros; Simpl.
-  unfold sp; congruence.
-  intros. rewrite V by auto with asmgen. reflexivity.
-
-  rewrite W.
-  unfold rs2.
-  Simpl.
-  
-- (* external function *)
-  exploit functions_translated; eauto.
-  intros [tf [A B]]. simpl in B. inv B.
-  exploit extcall_arguments_match; eauto.
-  intros [args' [C D]].
-  exploit external_call_mem_extends; eauto.
-  intros [res' [m2' [P [Q [R S]]]]].
-  left; econstructor; split.
-  apply plus_one. eapply exec_step_external; eauto.
-  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-  econstructor; eauto.
-  unfold loc_external_result. apply agree_set_other; auto. apply agree_set_pair; auto.
-  apply agree_undef_caller_save_regs; auto. 
-
-- (* return *)
-  inv STACKS. simpl in *.
-  right. split. omega. split. auto.
-  rewrite <- ATPC in H5.
-  econstructor; eauto. congruence.
-  inv WF.
-  inv STACK.
-  inv H1.
-  congruence.
+  remember (list_nth_z (header bb) (size bb - 1)) as label_opt; destruct label_opt; auto;
+  exploit list_nth_z_range; eauto; lia.
 Qed.
 
-Lemma transf_initial_states:
-  forall st1, Mach.initial_state prog st1 ->
-  exists st2, Asm.initial_state tprog st2 /\ match_states st1 st2.
+Lemma pc_ptr_exec_step: forall ofs bb b rs m _rs _m
+  (ATPC : rs PC = Vptr b ofs)
+  (MATCHI : match_internal (size bb - 1)
+            {| _rs := rs; _m := m |}
+            {| _rs := _rs; _m := _m |}),
+  _rs PC = Vptr b (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))).
 Proof.
-  intros. inversion H. unfold ge0 in *.
-  econstructor; split.
-  econstructor.
-  eapply (Genv.init_mem_transf_partial TRANSF); eauto.
-  replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero)
-     with (Vptr fb Ptrofs.zero).
-  econstructor; eauto.
-  constructor.
-  apply Mem.extends_refl.
-  split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
-  intros. rewrite Regmap.gi. auto.
-  unfold Genv.symbol_address.
-  rewrite (match_program_main TRANSF).
-  rewrite symbols_preserved.
-  unfold ge; rewrite H1. auto.
+  intros; inv MATCHI. rewrite <- AGPC; rewrite ATPC; unfold Val.offset_ptr; eauto.
 Qed.
 
-Lemma transf_final_states:
-  forall st1 st2 r,
-  match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r.
+Lemma find_instr_ofs_somei: forall ofs bb f tc asmi rs m _rs _m
+  (BOUNDOFS : Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned)
+  (FIND_INSTR : Asm.find_instr (Ptrofs.unsigned ofs + (size bb - 1)) tc =
+                Some (asmi))
+  (MATCHI : match_internal (size bb - 1)
+            {| _rs := rs; _m := m |}
+            {| _rs := _rs; _m := _m |}),
+  Asm.find_instr (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))))
+    (Asm.fn_code {| Asm.fn_sig := fn_sig f; Asm.fn_code := tc |}) =
+  Some (asmi).
 Proof.
-  intros. inv H0. inv H. constructor. assumption.
-  compute in H1. inv H1.
-  generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
+  intros; simpl.
+  replace (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))))
+          with (Ptrofs.unsigned ofs + (size bb - 1)); try assumption.
+  generalize (bblock_size_pos bb); generalize (Ptrofs.unsigned_range_2 ofs); intros.
+  unfold Ptrofs.add.
+  rewrite Ptrofs.unsigned_repr. rewrite Ptrofs.unsigned_repr; try lia.
+  rewrite Ptrofs.unsigned_repr; lia.
 Qed.
 
+Lemma eval_builtin_arg_match: forall rs _m _rs a1 b1
+  (AG : forall r : preg, r <> PC -> rs r = _rs r)
+  (EVAL : eval_builtin_arg tge (fun r : dreg => rs r) (rs SP) _m a1 b1),
+  eval_builtin_arg tge _rs (_rs SP) _m (map_builtin_arg DR a1) b1.
+Proof.
+  intros; induction EVAL; simpl in *; try rewrite AG; try rewrite AG in EVAL; try discriminate; try congruence; eauto with barg.
+  econstructor. rewrite <- AG; try discriminate; auto.
+Qed.
+
+Lemma eval_builtin_args_match: forall bb rs m _rs _m args vargs
+  (MATCHI : match_internal (size bb - 1)
+            {| _rs := rs; _m := m |}
+            {| _rs := _rs; _m := _m |})
+  (EVAL : eval_builtin_args tge (fun r : dreg => rs r) (rs SP) m args vargs),
+  eval_builtin_args tge _rs (_rs SP) _m (map (map_builtin_arg DR) args) vargs.
+Proof.
+  intros; inv MATCHI.
+  induction EVAL; subst.
+  - econstructor.
+  - econstructor.
+    + eapply eval_builtin_arg_match; eauto.
+    + eauto.
+Qed.
+
+Lemma pc_both_sides: forall (rs _rs: regset) v
+  (AG : forall r : preg, r <> PC -> rs r = _rs r),
+  rs # PC <- v = _rs # PC <- v.
+Proof.
+  intros; unfold Pregmap.set; apply functional_extensionality; intros y.
+  destruct (PregEq.eq y PC); try rewrite AG; eauto.
+Qed.
+
+Lemma set_buitin_res_sym res: forall vres rs _rs r
+  (NPC: r <> PC)
+  (AG : forall r : preg, r <> PC -> rs r = _rs r),
+  set_res res vres rs r = set_res res vres _rs r.
+Proof.
+  induction res; simpl; intros; unfold Pregmap.set; try rewrite AG; eauto.
+Qed.
+
+Lemma set_builtin_res_dont_move_pc_gen res: forall vres rs _rs v1 v2
+  (HV: v1 = v2)
+  (AG : forall r : preg, r <> PC -> rs r = _rs r),
+  (set_res res vres rs) # PC <- v1 =
+  (set_res res vres _rs) # PC <- v2.
+Proof.
+  intros. rewrite HV. generalize res vres rs _rs AG v2.
+  clear res vres rs _rs AG v1 v2 HV.
+  induction res.
+  - simpl; intros. apply pc_both_sides; intros.
+    unfold Pregmap.set; try rewrite AG; eauto.
+  - simpl; intros; apply pc_both_sides; eauto.
+  - simpl; intros.
+    erewrite IHres2; eauto; intros.
+    eapply set_buitin_res_sym; eauto.
+Qed.
+
+Lemma set_builtin_map_not_pc (res: builtin_res dreg): forall vres rs,
+  set_res (map_builtin_res DR res) vres rs PC = rs PC.
+Proof.
+  induction res.
+  - intros; simpl. unfold Pregmap.set. destruct (PregEq.eq PC x); try congruence.
+  - intros; simpl; congruence.
+  - intros; simpl in *. rewrite IHres2. rewrite IHres1. reflexivity.
+Qed.
+
+Lemma undef_reg_preserved (rl: list mreg): forall rs _rs r
+  (NPC: r <> PC)
+  (AG : forall r : preg, r <> PC -> rs r = _rs r),
+  undef_regs (map preg_of rl) rs r = undef_regs (map preg_of rl) _rs r.
+Proof.
+  induction rl.
+  - simpl; auto.
+  - simpl; intros. erewrite IHrl; eauto.
+    intros. unfold Pregmap.set. destruct (PregEq.eq r0 (preg_of a)); try rewrite AG; eauto.
+Qed.
+
+Lemma undef_regs_other:
+  forall r rl rs,
+  (forall r', In r' rl -> r <> r') ->
+  undef_regs rl rs r = rs r.
+Proof.
+  induction rl; simpl; intros. auto.
+  rewrite IHrl by auto. rewrite Pregmap.gso; auto.
+Qed.
+
+Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop :=
+  match rl with
+  | nil => True
+  | r1 :: nil => r <> preg_of r1
+  | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl
+  end.
+
+Remark preg_notin_charact:
+  forall r rl,
+  preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr).
+Proof.
+  induction rl; simpl; intros.
+  tauto.
+  destruct rl.
+  simpl. split. intros. intuition congruence. auto.
+  rewrite IHrl. split.
+  intros [A B]. intros. destruct H. congruence. auto.
+  auto.
+Qed.
+
+Lemma undef_regs_other_2:
+  forall r rl rs,
+  preg_notin r rl ->
+  undef_regs (map preg_of rl) rs r = rs r.
+Proof.
+  intros. apply undef_regs_other. intros.
+  exploit list_in_map_inv; eauto. intros [mr [A B]]. subst.
+  rewrite preg_notin_charact in H. auto.
+Qed.
+
+Lemma exec_exit_simulation_plus b ofs f bb s2 t rs m rs' m': forall
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+  (NEMPTY_EXIT: exit bb <> None)
+  (MATCHI: match_internal (size bb - Z.of_nat (length_opt (exit bb))) (State rs m) s2)
+  (EXIT: exec_exit ge f (Ptrofs.repr (size bb)) rs m (exit bb) t rs' m')
+  (ATPC: rs PC = Vptr b ofs),
+  plus Asm.step tge s2 t (State rs' m').
+Proof.
+  intros.
+  exploit internal_functions_unfold; eauto.
+  intros (tc & FINDtf & TRANStf & _).
+
+  exploit (find_instr_bblock (size bb - 1)); eauto.
+  { generalize (bblock_size_pos bb). lia. }
+  intros (i' & NTH & FIND_INSTR).
+
+  inv NTH.
+  + rewrite last_instruction_cannot_be_label in *. discriminate.
+  + destruct (exit bb) as [ctrl |] eqn:NEMPTY_EXIT'. 2: { contradiction. }
+    rewrite bblock_size_aux in *. rewrite NEMPTY_EXIT' in *. simpl in *.
+    (* XXX: Is there a better way to simplify this expression i.e. automatically? *)
+    replace (list_length_z (header bb) + list_length_z (body bb) + 1 - 1 -
+       list_length_z (header bb)) with (list_length_z (body bb)) in H by lia.
+    rewrite list_nth_z_range_exceeded in H; try lia. discriminate.
+  + assert (Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned). {
+      eapply size_of_blocks_bounds; eauto.
+    }
+    assert (size bb <= Ptrofs.max_unsigned). { generalize (Ptrofs.unsigned_range_2 ofs); lia. }
+    destruct cfi.
+    * (* control flow instruction *)
+      destruct s2.
+      rewrite H in EXIT. (* exit bb is a cfi *)
+      inv EXIT.
+      rewrite H in MATCHI. simpl in MATCHI.
+      exploit internal_functions_translated; eauto.
+      rewrite FINDtf.
+      intros (tf & FINDtf' & TRANSf). inversion FINDtf'; subst; clear FINDtf'.
+      exploit exec_cfi_simulation; eauto.
+      (* extract exec_cfi_simulation's conclusion as separate hypotheses *)
+      intros (rs2' & m2' & EXECI & MATCHS); rewrite MATCHS.
+      apply plus_one.
+      eapply Asm.exec_step_internal; eauto.
+      - eapply pc_ptr_exec_step; eauto.
+      - eapply find_instr_ofs_somei; eauto.
+    * (* builtin *)
+      destruct s2.
+      rewrite H in EXIT.
+      rewrite H in MATCHI. simpl in MATCHI.
+      simpl in FIND_INSTR.
+      inversion EXIT.
+      apply plus_one.
+      eapply external_call_symbols_preserved in H10; try (apply senv_preserved).
+      eapply eval_builtin_args_preserved in H6; try (apply symbols_preserved).
+      eapply Asm.exec_step_builtin; eauto.
+      - eapply pc_ptr_exec_step; eauto.
+      - eapply find_instr_ofs_somei; eauto.
+      - eapply eval_builtin_args_match; eauto.
+      - inv MATCHI; eauto.
+      - inv MATCHI.
+        unfold Asm.nextinstr, incrPC.
+        assert (HPC: Val.offset_ptr (rs PC) (Ptrofs.repr (size bb))
+                   = Val.offset_ptr (_rs PC) Ptrofs.one).
+        { rewrite <- AGPC. rewrite ATPC. unfold Val.offset_ptr.
+          rewrite Ptrofs.add_assoc. unfold Ptrofs.add.
+          assert (BBPOS: size bb >= 1) by eapply bblock_size_pos.
+          rewrite (Ptrofs.unsigned_repr (size bb - 1)); try lia.
+          rewrite Ptrofs.unsigned_one.
+          replace (size bb - 1 + 1) with (size bb) by lia.
+          reflexivity. }
+        apply set_builtin_res_dont_move_pc_gen.
+        -- erewrite !set_builtin_map_not_pc.
+           erewrite !undef_regs_other.
+           rewrite HPC; auto.
+           all: intros; simpl in *; destruct H3 as [HX16 | [HX30 | HDES]]; subst; try discriminate;
+           exploit list_in_map_inv; eauto; intros [mr [A B]]; subst; discriminate.
+        -- intros. eapply undef_reg_preserved; eauto.
+           intros. destruct (PregEq.eq X16 r0); destruct (PregEq.eq X30 r0); subst.
+           rewrite Pregmap.gso, Pregmap.gss; try congruence.
+           do 2 (rewrite Pregmap.gso, Pregmap.gss; try discriminate; auto).
+           rewrite 2Pregmap.gss; auto.
+           rewrite !Pregmap.gso; auto.
+Qed.
+
+Lemma exec_exit_simulation_star b ofs f bb s2 t rs m rs' m': forall
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+  (MATCHI: match_internal (size bb - Z.of_nat (length_opt (exit bb))) (State rs m) s2)
+  (EXIT: exec_exit ge f (Ptrofs.repr (size bb)) rs m (exit bb) t rs' m')
+  (ATPC: rs PC = Vptr b ofs),
+  star Asm.step tge s2 t (State rs' m').
+Proof.
+  intros.
+  destruct (exit bb) eqn: Hex.
+  - eapply plus_star.
+    eapply exec_exit_simulation_plus; try rewrite Hex; congruence || eauto.
+  - inv MATCHI.
+    inv EXIT.
+    assert (X: rs2 = incrPC (Ptrofs.repr (size bb)) rs). {
+      unfold incrPC. unfold Pregmap.set.
+        apply functional_extensionality. intros x.
+        destruct (PregEq.eq x PC) as [X|].
+        - rewrite X. rewrite <- AGPC. simpl.
+          replace (size bb - 0) with (size bb) by lia. reflexivity.
+        - rewrite AG; try assumption. reflexivity.
+    }
+    destruct X.
+    subst; eapply star_refl; eauto.
+Qed.
+
+Lemma exec_bblock_simulation b ofs f bb t rs m rs' m': forall
+  (ATPC: rs PC = Vptr b ofs)
+  (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+  (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+  (EXECBB: exec_bblock lk ge f bb rs m t rs' m'),
+  plus Asm.step tge (State rs m) t (State rs' m').
+Proof.
+  intros; destruct EXECBB as (rs1 & m1 & BODY & CTL).
+  exploit exec_header_simulation; eauto.
+  intros (s0 & STAR & MATCH0).
+  eapply star_plus_trans; traceEq || eauto.
+  destruct (bblock_non_empty bb).
+  - (* body bb <> nil *)
+     exploit exec_body_simulation_plus; eauto.
+     intros (s1 & PLUS & MATCH1).
+     eapply plus_star_trans; traceEq || eauto.
+     eapply exec_exit_simulation_star; eauto.
+     erewrite <- exec_body_dont_move_PC; eauto.
+  - (* exit bb <> None *)
+     exploit exec_body_simulation_star; eauto.
+     intros (s1 & STAR1 & MATCH1).
+     eapply star_plus_trans; traceEq || eauto.
+     eapply exec_exit_simulation_plus; eauto.
+     erewrite <- exec_body_dont_move_PC; eauto.
+Qed.
+
+Lemma step_simulation s t s':
+  Asmblock.step lk ge s t s' -> plus Asm.step tge s t s'.
+Proof.
+  intros STEP.
+  inv STEP; simpl; exploit functions_translated; eauto;
+  intros (tf0 & FINDtf & TRANSf);
+  monadInv TRANSf.
+  - (* internal step *) eapply exec_bblock_simulation; eauto.
+  - (* external step *)
+    apply plus_one.
+    exploit external_call_symbols_preserved; eauto. apply senv_preserved.
+    intros ?.
+    eapply Asm.exec_step_external; eauto.
+Qed.
+
+Lemma transf_program_correct:
+  forward_simulation (Asmblock.semantics lk prog) (Asm.semantics tprog).
+Proof.
+  eapply forward_simulation_plus.
+  - apply senv_preserved.
+  - eexact transf_initial_states.
+  - eexact transf_final_states.
+  - unfold match_states.
+    simpl; intros; subst; eexists; split; eauto.
+    eapply step_simulation; eauto.
+Qed.
+
+End PRESERVATION.
+
+End Asmblock_PRESERVATION.
+
+
+Local Open Scope linking_scope.
+
+Definition block_passes :=
+      mkpass Machblockgenproof.match_prog
+  ::: mkpass Asmblockgenproof.match_prog
+  ::: mkpass PostpassSchedulingproof.match_prog
+  ::: mkpass Asmblock_PRESERVATION.match_prog
+  ::: pass_nil _.
+
+Definition match_prog := pass_match (compose_passes block_passes).
+
+Lemma transf_program_match:
+  forall p tp, Asmgen.transf_program p = OK tp -> match_prog p tp.
+Proof.
+  intros p tp H.
+  unfold Asmgen.transf_program in H. apply bind_inversion in H. destruct H.
+  inversion_clear H. apply bind_inversion in H1. destruct H1.
+  inversion_clear H.
+  unfold Compopts.time in *. remember (Machblockgen.transf_program p) as mbp.
+  unfold match_prog; simpl.
+  exists mbp; split. apply Machblockgenproof.transf_program_match; auto.
+  exists x; split. apply Asmblockgenproof.transf_program_match; auto.
+  exists x0; split. apply PostpassSchedulingproof.transf_program_match; auto.
+  exists tp; split. apply Asmblock_PRESERVATION.transf_program_match; auto. auto.
+Qed.
+
+(** Return Address Offset *)
+
+Definition return_address_offset: Mach.function -> Mach.code -> ptrofs -> Prop :=
+  Machblockgenproof.Mach_return_address_offset (Asmblockgenproof.return_address_offset).
+
+Lemma return_address_exists:
+  forall f sg ros c, is_tail (Mach.Mcall sg ros :: c) f.(Mach.fn_code) ->
+  exists ra, return_address_offset f c ra.
+Proof.
+  intros; unfold return_address_offset; eapply Machblockgenproof.Mach_return_address_exists; eauto.
+  intros; eapply Asmblockgenproof.return_address_exists; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: Mach.program.
+Variable tprog: Asm.program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
 Theorem transf_program_correct:
   forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog).
 Proof.
-  eapply forward_simulation_star with (measure := measure)
-         (match_states := fun S1 S2 => match_states S1 S2 /\ wf_state ge S1).
-  - apply senv_preserved.
-  - simpl; intros. exploit transf_initial_states; eauto.
-    intros (s2 & A & B).
-    exists s2; intuition auto. apply wf_initial; auto.
-  - simpl; intros. destruct H as [MS WF]. eapply transf_final_states; eauto.
-  - simpl; intros. destruct H0 as [MS WF]. 
-    exploit step_simulation; eauto. intros [ (s2' & A & B) | (A & B & C) ].
-    + left; exists s2'; intuition auto. eapply wf_step; eauto. 
-    + right; intuition auto. eapply wf_step; eauto.
+  unfold match_prog in TRANSF. simpl in TRANSF.
+  inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H.
+  eapply compose_forward_simulations.
+  { exploit Machblockgenproof.transf_program_correct; eauto. }
+  
+  eapply compose_forward_simulations.
+  + apply Asmblockgenproof.transf_program_correct; eauto.
+    { intros.
+      unfold Genv.symbol_address.
+      erewrite <- PostpassSchedulingproof.symbols_preserved; eauto.
+      erewrite Asmblock_PRESERVATION.symbol_high_low; eauto.
+      reflexivity.
+    }
+  + eapply compose_forward_simulations.
+    - apply PostpassSchedulingproof.transf_program_correct; eauto.
+    - apply Asmblock_PRESERVATION.transf_program_correct; eauto.
 Qed.
 
 End PRESERVATION.
+
+Instance TransfAsm: TransfLink match_prog := pass_match_link (compose_passes block_passes).
+
+(*******************************************)
+(* Stub actually needed by driver/Compiler *)
+
+Module Asmgenproof0.
+
+Definition return_address_offset := return_address_offset.
+
+End Asmgenproof0.
diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v
deleted file mode 100644
index 0e36bd05..00000000
--- a/aarch64/Asmgenproof1.v
+++ /dev/null
@@ -1,2138 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*         Xavier Leroy, Collège de France and INRIA Paris             *)
-(*                                                                     *)
-(*  Copyright Institut National de Recherche en Informatique et en     *)
-(*  Automatique.  All rights reserved.  This file is distributed       *)
-(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
-(*                                                                     *)
-(* *********************************************************************)
-
-(** Correctness proof for AArch64 code generation: auxiliary results. *)
-
-Require Import Recdef Coqlib Zwf Zbits.
-Require Import Maps Errors AST Integers Floats Values Memory Globalenvs.
-Require Import Op Locations Mach Asm Conventions.
-Require Import Asmgen.
-Require Import Asmgenproof0.
-
-Local Transparent Archi.ptr64.
-
-(** Properties of registers *)
-
-Lemma preg_of_not_RA:
-  forall r, (preg_of r) <> RA.
-Proof.
-  destruct r; discriminate.
-Qed.
-
-Lemma RA_not_written:
-  forall (rs : regset) dst v,
-    rs # (preg_of dst) <- v RA = rs RA.
-Proof.
-  intros.
-  apply Pregmap.gso.
-  intro.
-  symmetry in H.
-  exact (preg_of_not_RA dst H).
-Qed.
-
-Hint Resolve RA_not_written : asmgen.
-
-Lemma RA_not_written2:
-  forall (rs : regset) dst v i,
-    preg_of dst = i ->
-    rs # i <- v RA = rs RA.
-Proof.
-  intros.
-  subst i.
-  apply RA_not_written.
-Qed.
-
-Hint Resolve RA_not_written2 : asmgen.
-
-Lemma RA_not_written3:
-  forall (rs : regset) dst v i,
-    ireg_of dst = OK i ->
-    rs # i <- v RA = rs RA.
-Proof.
-  intros.
-  unfold ireg_of in H.
-  destruct preg_of eqn:PREG; try discriminate.
-  replace i0 with i in * by congruence.
-  eapply RA_not_written2; eassumption.
-Qed.
-
-Hint Resolve RA_not_written3 : asmgen.
-
-Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC.
-Proof.
-  destruct r; simpl; congruence.
-Qed.
-Hint Resolve preg_of_iregsp_not_PC: asmgen.
-
-Lemma preg_of_not_X16: forall r, preg_of r <> X16.
-Proof.
-  destruct r; simpl; congruence.
-Qed.
-
-Lemma ireg_of_not_X16: forall r x, ireg_of r = OK x -> x <> X16.
-Proof.
-  unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
-  red; intros; subst x. elim (preg_of_not_X16 r); auto.
-Qed.
-
-Lemma ireg_of_not_RA: forall r x, ireg_of r = OK x -> x <> RA.
-Proof.
-  unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
-  red; intros; subst x. elim (preg_of_not_RA r); auto.
-Qed.
-
-Lemma ireg_of_not_RA': forall r x, ireg_of r = OK x -> RA <> x.
-Proof.
-  intros. intro.
-  apply (ireg_of_not_RA r x); auto.
-Qed.
-
-Lemma ireg_of_not_RA'': forall r x, ireg_of r = OK x -> IR RA <> IR x.
-Proof.
-  intros. intro.
-  apply (ireg_of_not_RA' r x); auto. congruence.
-Qed.
-
-Hint Resolve ireg_of_not_RA ireg_of_not_RA' ireg_of_not_RA'' : asmgen.
-
-Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16.
-Proof.
-  intros. apply ireg_of_not_X16 in H. congruence.
-Qed.
-
-Hint Resolve preg_of_not_X16 ireg_of_not_X16 ireg_of_not_X16': asmgen.
-
-(** Useful simplification tactic *)
-
-
-Ltac Simplif :=
-  ((rewrite nextinstr_inv by eauto with asmgen)
-  || (rewrite nextinstr_inv1 by eauto with asmgen)
-  || (rewrite Pregmap.gss)
-  || (rewrite nextinstr_pc)
-  || (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen.
-
-Ltac Simpl := repeat Simplif.
-
-(** * Correctness of ARM constructor functions *)
-
-Section CONSTRUCTORS.
-
-Variable ge: genv.
-Variable fn: function.
-
-(** Decomposition of integer literals *)
-
-Inductive wf_decomposition: list (Z * Z) -> Prop :=
-  | wf_decomp_nil:
-      wf_decomposition nil
-  | wf_decomp_cons: forall m n p l,
-      n = Zzero_ext 16 m -> 0 <= p -> wf_decomposition l ->
-      wf_decomposition ((n, p) :: l).
-
-Lemma decompose_int_wf:
-  forall N n p, 0 <= p -> wf_decomposition (decompose_int N n p).
-Proof.
-Local Opaque Zzero_ext.
-  induction N as [ | N]; simpl; intros.
-- constructor.
-- set (frag := Zzero_ext 16 (Z.shiftr n p)) in *. destruct (Z.eqb frag 0).
-+ apply IHN. omega.
-+ econstructor. reflexivity. omega. apply IHN; omega. 
-Qed.
-
-Fixpoint recompose_int (accu: Z) (l: list (Z * Z)) : Z :=
-  match l with
-  | nil => accu
-  | (n, p) :: l => recompose_int (Zinsert accu n p 16) l
-  end.
-
-Lemma decompose_int_correct:
-  forall N n p accu,
-  0 <= p ->
-  (forall i, p <= i -> Z.testbit accu i = false) ->
-  (forall i, 0 <= i < p + Z.of_nat N * 16 ->
-   Z.testbit (recompose_int accu (decompose_int N n p)) i =
-   if zlt i p then Z.testbit accu i else Z.testbit n i).
-Proof.
-  induction N as [ | N]; intros until accu; intros PPOS ABOVE i RANGE.
-- simpl. rewrite zlt_true; auto. xomega.
-- rewrite inj_S in RANGE. simpl.
-  set (frag := Zzero_ext 16 (Z.shiftr n p)).
-  assert (FRAG: forall i, p <= i < p + 16 -> Z.testbit n i = Z.testbit frag (i - p)).
-  { unfold frag; intros. rewrite Zzero_ext_spec by omega. rewrite zlt_true by omega.
-    rewrite Z.shiftr_spec by omega. f_equal; omega. }
-  destruct (Z.eqb_spec frag 0).
-+ rewrite IHN.
-* destruct (zlt i p). rewrite zlt_true by omega. auto.
-  destruct (zlt i (p + 16)); auto.
-  rewrite ABOVE by omega. rewrite FRAG by omega. rewrite e, Z.testbit_0_l. auto.
-* omega.
-* intros; apply ABOVE; omega.
-* xomega.
-+ simpl. rewrite IHN.
-* destruct (zlt i (p + 16)).
-** rewrite Zinsert_spec by omega. unfold proj_sumbool.
-   rewrite zlt_true by omega.
-   destruct (zlt i p).
-   rewrite zle_false by omega. auto.
-   rewrite zle_true by omega. simpl. symmetry; apply FRAG; omega.
-** rewrite Z.ldiff_spec, Z.shiftl_spec by omega.
-   change 65535 with (two_p 16 - 1). rewrite Ztestbit_two_p_m1 by omega.
-   rewrite zlt_false by omega. rewrite zlt_false by omega. apply andb_true_r. 
-* omega.
-* intros. rewrite Zinsert_spec by omega. unfold proj_sumbool.
-  rewrite zle_true by omega. rewrite zlt_false by omega. simpl.
-  apply ABOVE. omega.
-* xomega.
-Qed.
-
-Corollary decompose_int_eqmod: forall N n,
-  eqmod (two_power_nat (N * 16)%nat) (recompose_int 0 (decompose_int N n 0)) n.
-Proof.
-  intros; apply eqmod_same_bits; intros.
-  rewrite decompose_int_correct. apply zlt_false; omega. 
-  omega. intros; apply Z.testbit_0_l. xomega.
-Qed.
-
-Corollary decompose_notint_eqmod: forall N n,
-  eqmod (two_power_nat (N * 16)%nat)
-        (Z.lnot (recompose_int 0 (decompose_int N (Z.lnot n) 0))) n.
-Proof.
-  intros; apply eqmod_same_bits; intros.
-  rewrite Z.lnot_spec, decompose_int_correct.
-  rewrite zlt_false by omega. rewrite Z.lnot_spec by omega. apply negb_involutive.
-  omega. intros; apply Z.testbit_0_l. xomega. omega.
-Qed.
-
-Lemma negate_decomposition_wf:
-  forall l, wf_decomposition l -> wf_decomposition (negate_decomposition l).
-Proof.
-  induction 1; simpl; econstructor; auto.
-  instantiate (1 := (Z.lnot m)).
-  apply equal_same_bits; intros.
-  rewrite H. change 65535 with (two_p 16 - 1).
-  rewrite Z.lxor_spec, !Zzero_ext_spec, Z.lnot_spec, Ztestbit_two_p_m1 by omega.
-  destruct (zlt i 16).
-  apply xorb_true_r.
-  auto.
-Qed.
-
-Lemma Zinsert_eqmod:
-  forall n x1 x2 y p l, 0 <= p -> 0 <= l ->
-  eqmod (two_power_nat n) x1 x2 ->
-  eqmod (two_power_nat n) (Zinsert x1 y p l) (Zinsert x2 y p l).
-Proof.
-  intros. apply eqmod_same_bits; intros. rewrite ! Zinsert_spec by omega.
-  destruct (zle p i && zlt i (p + l)); auto.
-  apply same_bits_eqmod with n; auto.
-Qed.
-
-Lemma Zinsert_0_l:
-  forall y p l,
-  0 <= p -> 0 <= l ->
-  Z.shiftl (Zzero_ext l y) p = Zinsert 0 (Zzero_ext l y) p l.
-Proof.
-  intros. apply equal_same_bits; intros.
-  rewrite Zinsert_spec by omega. unfold proj_sumbool.
-  destruct (zlt i p); [rewrite zle_false by omega|rewrite zle_true by omega]; simpl.
-- rewrite Z.testbit_0_l, Z.shiftl_spec_low by auto. auto.
-- rewrite Z.shiftl_spec by omega. 
-  destruct (zlt i (p + l)); auto.
-  rewrite Zzero_ext_spec, zlt_false, Z.testbit_0_l by omega. auto.
-Qed.
-
-Lemma recompose_int_negated:
-  forall l, wf_decomposition l ->
-  forall accu, recompose_int (Z.lnot accu) (negate_decomposition l) = Z.lnot (recompose_int accu l).
-Proof.
-  induction 1; intros accu; simpl.
-- auto.
-- rewrite <- IHwf_decomposition. f_equal. apply equal_same_bits; intros. 
-  rewrite Z.lnot_spec, ! Zinsert_spec, Z.lxor_spec, Z.lnot_spec by omega.
-  unfold proj_sumbool.
-  destruct (zle p i); simpl; auto.
-  destruct (zlt i (p + 16)); simpl; auto.
-  change 65535 with (two_p 16 - 1).
-  rewrite Ztestbit_two_p_m1 by omega. rewrite zlt_true by omega.
-  apply xorb_true_r. 
-Qed.
-
-Lemma exec_loadimm_k_w:
-  forall (rd: ireg) k m l,
-  wf_decomposition l ->
-  rd <> RA ->
-  forall (rs: regset) accu,
-  rs#rd = Vint (Int.repr accu) ->
-  exists rs',
-     exec_straight_opt ge fn (loadimm_k W rd l k) rs m k rs' m
-  /\ rs'#rd = Vint (Int.repr (recompose_int accu l))
-  /\ (forall r, r <> PC -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  induction 1; intros RD_NOT_RA rs accu ACCU; simpl.                          
-- exists rs; split. apply exec_straight_opt_refl. auto.
-- destruct (IHwf_decomposition RD_NOT_RA
-                (nextinstr (rs#rd <- (insert_in_int rs#rd n p 16)))
-                (Zinsert accu n p 16))
-  as (rs' & P & Q & R & S).
-  Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr. 
-  apply Zinsert_eqmod. auto. omega. apply Int.eqm_sym; apply Int.eqm_unsigned_repr.
-  exists rs'; split.
-  eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P.
-  split. exact Q.
-  split.
-  { intros; Simpl.
-    rewrite R by auto. Simpl. }
-  { rewrite S. Simpl. }
-Qed.
-
-Lemma exec_loadimm_z_w:
-  forall rd l k rs m,
-  wf_decomposition l ->
-  rd <> RA ->
-  exists rs',
-     exec_straight ge fn (loadimm_z W rd l k) rs m k rs' m
-  /\ rs'#rd = Vint (Int.repr (recompose_int 0 l))
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold loadimm_z; destruct 1; intro RD_NOT_RA.
-- econstructor; split.
-  apply exec_straight_one. simpl; eauto. auto.
-  split. Simpl.
-  intros; Simpl.
-- set (accu0 := Zinsert 0 n p 16).
-  set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))).
-  destruct (exec_loadimm_k_w rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R & S); auto.
-  unfold rs1; Simpl.
-  exists rs2; split.
-  eapply exec_straight_opt_step; eauto.
-  simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; omega.
-  reflexivity.
-  split. exact Q. 
-  intros. rewrite R by auto. unfold rs1; Simpl.
-Qed.
-
-Lemma exec_loadimm_n_w:
-  forall rd l k rs m,
-  wf_decomposition l ->
-  rd <> RA ->
-  exists rs',
-     exec_straight ge fn (loadimm_n W rd l k) rs m k rs' m
-  /\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l)))
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold loadimm_n; destruct 1; intro RD_NOT_RA.
-- econstructor; split.
-  apply exec_straight_one. simpl; eauto. auto.
-  split. Simpl. 
-  intros; Simpl.
-- set (accu0 := Z.lnot (Zinsert 0 n p 16)).
-  set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))).
-  destruct (exec_loadimm_k_w rd k m (negate_decomposition l) 
-                                    (negate_decomposition_wf l H1)
-                                    RD_NOT_RA rs1 accu0)
-    as (rs2 & P & Q & R & S).
-  unfold rs1; Simpl.
-  exists rs2; split.
-  eapply exec_straight_opt_step; eauto.
-  simpl. unfold rs1. do 5 f_equal.
-  unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; omega.
-  reflexivity.  
-  split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
-  intros. rewrite R by auto. unfold rs1; Simpl.
-Qed.
-
-Lemma exec_loadimm32:
-  forall rd n k rs m
-  (RD_NOT_RA : rd <> RA),
-  exists rs',
-     exec_straight ge fn (loadimm32 rd n k) rs m k rs' m
-  /\ rs'#rd = Vint n
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold loadimm32, loadimm; intros.
-  destruct (is_logical_imm32 n).
-- econstructor; split.
-  apply exec_straight_one. simpl; eauto. auto.
-  split. Simpl. rewrite Int.repr_unsigned, Int.or_zero_l; auto.
-  intros; Simpl.
-- set (dz := decompose_int 2%nat (Int.unsigned n) 0).
-  set (dn := decompose_int 2%nat (Z.lnot (Int.unsigned n)) 0).
-  assert (A: Int.repr (recompose_int 0 dz) = n).
-  { transitivity (Int.repr (Int.unsigned n)).
-    apply Int.eqm_samerepr. apply decompose_int_eqmod. 
-    apply Int.repr_unsigned. }
-  assert (B: Int.repr (Z.lnot (recompose_int 0 dn)) = n).
-  { transitivity (Int.repr (Int.unsigned n)).
-    apply Int.eqm_samerepr. apply decompose_notint_eqmod. 
-    apply Int.repr_unsigned. }
-  destruct Nat.leb.
-+ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; omega. trivial.
-+ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; omega. trivial.
-Qed.
-
-Lemma exec_loadimm_k_x:
-  forall (rd: ireg) k m l,
-    wf_decomposition l ->
-    rd <> RA ->
-  forall (rs: regset) accu,
-  rs#rd = Vlong (Int64.repr accu) ->
-  exists rs',
-     exec_straight_opt ge fn (loadimm_k X rd l k) rs m k rs' m
-  /\ rs'#rd = Vlong (Int64.repr (recompose_int accu l))
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  induction 1; intros RD_NOT_RA rs accu ACCU; simpl.
-- exists rs; split. apply exec_straight_opt_refl. auto.
-- destruct (IHwf_decomposition RD_NOT_RA
-                (nextinstr (rs#rd <- (insert_in_long rs#rd n p 16)))
-                (Zinsert accu n p 16))
-  as (rs' & P & Q & R).
-  Simpl. rewrite ACCU. simpl. f_equal. apply Int64.eqm_samerepr. 
-  apply Zinsert_eqmod. auto. omega. apply Int64.eqm_sym; apply Int64.eqm_unsigned_repr.
-  exists rs'; split.
-  eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P.
-  split. exact Q. intros; Simpl. rewrite R by auto. Simpl.
-Qed.
-
-Lemma exec_loadimm_z_x:
-  forall rd l k rs m,
-  wf_decomposition l ->
-    rd <> RA ->
-  exists rs',
-     exec_straight ge fn (loadimm_z X rd l k) rs m k rs' m
-  /\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l))
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold loadimm_z; destruct 1; intro RD_NOT_RA.
-- econstructor; split.
-  apply exec_straight_one. simpl; eauto. auto.
-  split. Simpl.
-  intros; Simpl.
-- set (accu0 := Zinsert 0 n p 16).
-  set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))).
-  destruct (exec_loadimm_k_x rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R); auto.
-  unfold rs1; Simpl.
-  exists rs2; split.
-  eapply exec_straight_opt_step; eauto.
-  simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; omega.
-  reflexivity.
-  split. exact Q. 
-  intros. rewrite R by auto. unfold rs1; Simpl.
-Qed.
-
-Lemma exec_loadimm_n_x:
-  forall rd l k rs m,
-  wf_decomposition l ->
-  rd <> RA ->
-  exists rs',
-     exec_straight ge fn (loadimm_n X rd l k) rs m k rs' m
-  /\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l)))
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold loadimm_n; destruct 1; intro RD_NOT_RA.
-- econstructor; split.
-  apply exec_straight_one. simpl; eauto. auto.
-  split. Simpl. 
-  intros; Simpl.
-- set (accu0 := Z.lnot (Zinsert 0 n p 16)).
-  set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))).
-  destruct (exec_loadimm_k_x rd k m (negate_decomposition l) 
-                                    (negate_decomposition_wf l H1)
-                                    RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R).
-  unfold rs1; Simpl.
-  exists rs2; split.
-  eapply exec_straight_opt_step; eauto.
-  simpl. unfold rs1. do 5 f_equal.
-  unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; omega.
-  reflexivity.  
-  split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
-  intros. rewrite R by auto. unfold rs1; Simpl.
-Qed.
-
-Lemma exec_loadimm64:
-  forall rd n k rs m,
-    rd <> RA ->
-  exists rs',
-     exec_straight ge fn (loadimm64 rd n k) rs m k rs' m
-  /\ rs'#rd = Vlong n
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold loadimm64, loadimm; intros until m; intro RD_NOT_RA.
-  destruct (is_logical_imm64 n).
-- econstructor; split.
-  apply exec_straight_one. simpl; eauto. auto.
-  split. Simpl. rewrite Int64.repr_unsigned, Int64.or_zero_l; auto.
-  intros; Simpl.
-- set (dz := decompose_int 4%nat (Int64.unsigned n) 0).
-  set (dn := decompose_int 4%nat (Z.lnot (Int64.unsigned n)) 0).
-  assert (A: Int64.repr (recompose_int 0 dz) = n).
-  { transitivity (Int64.repr (Int64.unsigned n)).
-    apply Int64.eqm_samerepr. apply decompose_int_eqmod. 
-    apply Int64.repr_unsigned. }
-  assert (B: Int64.repr (Z.lnot (recompose_int 0 dn)) = n).
-  { transitivity (Int64.repr (Int64.unsigned n)).
-    apply Int64.eqm_samerepr. apply decompose_notint_eqmod. 
-    apply Int64.repr_unsigned. }
-  destruct Nat.leb.
-+ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; omega. trivial.
-+ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; omega. trivial.
-Qed.
-
-(** Add immediate *)
-
-Lemma exec_addimm_aux_32:
-  forall (insn: iregsp -> iregsp -> Z -> instruction) (sem: val -> val -> val),
-  (forall rd r1 n rs m,
-    exec_instr ge fn (insn rd r1 n) rs m =
-      Next (nextinstr (rs#rd <- (sem rs#r1 (Vint (Int.repr n))))) m) ->
-  (forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) ->
-  forall rd r1 n k rs m,
-    (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m
-  /\ rs'#rd = sem rs#r1 (Vint n)
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros insn sem SEM ASSOC; intros until m; intro RD_NOT_RA. unfold addimm_aux.
-  set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo).
-  assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; omega).
-  rewrite <- (Int.repr_unsigned n).
-  destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
-- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
-  split. Simpl. do 3 f_equal; omega.
-  split; intros; Simpl.
-- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
-  split. Simpl. do 3 f_equal; omega.
-  split; intros; Simpl.
-- econstructor; split. eapply exec_straight_two.
-  apply SEM. apply SEM. Simpl. Simpl.
-  split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr.
-  rewrite E. auto with ints.
-  split; intros; Simpl.
-Qed.
-
-Lemma exec_addimm32:
-  forall rd r1 n k rs m,
-  r1 <> X16 ->
-  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (addimm32 rd r1 n k) rs m k rs' m
-  /\ rs'#rd = Val.add rs#r1 (Vint n)
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros. unfold addimm32. set (nn := Int.neg n).
-  destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))].
-- apply exec_addimm_aux_32 with (sem := Val.add); auto. intros; apply Val.add_assoc. 
-- rewrite <- Val.sub_opp_add.
-  apply exec_addimm_aux_32 with (sem := Val.sub); auto.
-  intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto.
-- destruct (Int.lt n Int.zero).
-+ rewrite <- Val.sub_opp_add; fold nn.
-  edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C). congruence.
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
-  split. Simpl. rewrite B, C; eauto with asmgen.
-  split; intros; Simpl.
-+ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence.
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
-  split. Simpl. rewrite B, C; eauto with asmgen.
-  split; intros; Simpl.
-Qed.
-
-Lemma exec_addimm_aux_64:
-  forall (insn: iregsp -> iregsp -> Z -> instruction) (sem: val -> val -> val),
-  (forall rd r1 n rs m,
-    exec_instr ge fn (insn rd r1 n) rs m =
-      Next (nextinstr (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n))))) m) ->
-  (forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) ->
-  forall rd r1 n k rs m,
-  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m
-  /\ rs'#rd = sem rs#r1 (Vlong n)
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros insn sem SEM ASSOC; intros. unfold addimm_aux.
-  set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo).
-  assert (E: Int64.unsigned n = nhi + nlo) by (unfold nhi; omega).
-  rewrite <- (Int64.repr_unsigned n).
-  destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
-- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
-  split. Simpl. do 3 f_equal; omega.
-  split; intros; Simpl.
-- econstructor; split. apply exec_straight_one. apply SEM. Simpl. 
-  split. Simpl. do 3 f_equal; omega.
-  split; intros; Simpl.
-- econstructor; split. eapply exec_straight_two.
-  apply SEM. apply SEM. Simpl. Simpl.
-  split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr.
-  rewrite E. auto with ints.
-  split; intros; Simpl.
-Qed.
-
-Lemma exec_addimm64:
-  forall rd r1 n k rs m,
-  preg_of_iregsp r1 <> X16 ->
-  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (addimm64 rd r1 n k) rs m k rs' m
-  /\ rs'#rd = Val.addl rs#r1 (Vlong n)
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros. 
-  unfold addimm64. set (nn := Int64.neg n).
-  destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))].
-- apply exec_addimm_aux_64 with (sem := Val.addl); auto. intros; apply Val.addl_assoc. 
-- rewrite <- Val.subl_opp_addl.
-  apply exec_addimm_aux_64 with (sem := Val.subl); auto.
-  intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto.
-- destruct (Int64.lt n Int64.zero).
-+ rewrite <- Val.subl_opp_addl; fold nn.
-  edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C). congruence.
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. 
-  split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
-  split; intros; Simpl.
-+ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence.
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. 
-  split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
-  split; intros; Simpl.
-Qed.
-
-(** Logical immediate *)
-
-Lemma exec_logicalimm32:
-  forall (insn1: ireg -> ireg0 -> Z -> instruction)
-         (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
-         (sem: val -> val -> val),
-  (forall rd r1 n rs m,
-    exec_instr ge fn (insn1 rd r1 n) rs m =
-      Next (nextinstr (rs#rd <- (sem rs##r1 (Vint (Int.repr n))))) m) ->
-  (forall rd r1 r2 s rs m,
-    exec_instr ge fn (insn2 rd r1 r2 s) rs m =
-      Next (nextinstr (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s)))) m) ->
-  forall rd r1 n k rs m,
-  r1 <> X16 ->
-  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m
-  /\ rs'#rd = sem rs#r1 (Vint n)
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32.
-  destruct (is_logical_imm32 n).
-- econstructor; split. 
-  apply exec_straight_one. apply SEM1. reflexivity. 
-  split. Simpl. rewrite Int.repr_unsigned; auto.
-  split; intros; Simpl.
-- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence.
-  econstructor; split.
-  eapply exec_straight_trans. eexact A.  
-  apply exec_straight_one. apply SEM2. reflexivity.
-  split. Simpl. f_equal; auto. apply C; auto with asmgen.
-  split; intros; Simpl. 
-Qed.
-
-Lemma exec_logicalimm64:
-  forall (insn1: ireg -> ireg0 -> Z -> instruction)
-         (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
-         (sem: val -> val -> val),
-  (forall rd r1 n rs m,
-    exec_instr ge fn (insn1 rd r1 n) rs m =
-      Next (nextinstr (rs#rd <- (sem rs###r1 (Vlong (Int64.repr n))))) m) ->
-  (forall rd r1 r2 s rs m,
-    exec_instr ge fn (insn2 rd r1 r2 s) rs m =
-      Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) ->
-  forall rd r1 n k rs m,
-  r1 <> X16 ->
-  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m
-  /\ rs'#rd = sem rs#r1 (Vlong n)
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64.
-  destruct (is_logical_imm64 n).
-- econstructor; split. 
-  apply exec_straight_one. apply SEM1. reflexivity. 
-  split. Simpl. rewrite Int64.repr_unsigned. auto.
-  split; intros; Simpl.
-- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence.
-  econstructor; split.
-  eapply exec_straight_trans. eexact A.  
-  apply exec_straight_one. apply SEM2. reflexivity.
-  split. Simpl. f_equal; auto. apply C; auto with asmgen.
-  split; intros; Simpl. 
-Qed.
-
-(** Load address of symbol *)
-
-Lemma exec_loadsymbol: forall rd s ofs k rs m,
-    rd <> X16 \/ Archi.pic_code tt = false ->
-    (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (loadsymbol rd s ofs k) rs m k rs' m
-  /\ rs'#rd = Genv.symbol_address ge s ofs
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs'#RA = rs#RA.
-Proof.
-  unfold loadsymbol; intros. destruct (Archi.pic_code tt).
-- predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero.
-+ subst ofs. econstructor; split.
-  apply exec_straight_one; [simpl; eauto | reflexivity].
-  split. Simpl. split; intros; Simpl.
-  
-+ exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence.
-  instantiate (1 := rd). assumption.
-  intros (rs1 & A & B & C & D).
-  econstructor; split.
-  econstructor. simpl; eauto. auto. eexact A. 
-  split. simpl in B; rewrite B. Simpl. 
-  rewrite <- Genv.shift_symbol_address_64 by auto.
-  rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto.
-  split; intros. rewrite C by auto; Simpl.
-  rewrite D. Simpl.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
-  split. Simpl. rewrite symbol_high_low; auto. 
-  split; intros; Simpl.
-Qed.
-
-(** Shifted operands *)
-
-Remark transl_shift_not_none:
-  forall s a, transl_shift s a <> SOnone.
-Proof.
-  destruct s; intros; simpl; congruence.
-Qed.
-
-Remark or_zero_eval_shift_op_int:
-  forall v s, s <> SOnone -> Val.or (Vint Int.zero) (eval_shift_op_int v s) = eval_shift_op_int v s.
-Proof.
-  intros; destruct s; try congruence; destruct v; auto; simpl;
-  destruct (Int.ltu n Int.iwordsize); auto; rewrite Int.or_zero_l; auto.
-Qed.
-
-Remark or_zero_eval_shift_op_long:
-  forall v s, s <> SOnone -> Val.orl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
-Proof.
-  intros; destruct s; try congruence; destruct v; auto; simpl;
-  destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.or_zero_l; auto.
-Qed.
-
-Remark add_zero_eval_shift_op_long:
-  forall v s, s <> SOnone -> Val.addl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
-Proof.
-  intros; destruct s; try congruence; destruct v; auto; simpl;
-  destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.add_zero_l; auto.
-Qed.
-
-Lemma transl_eval_shift: forall s v (a: amount32),
-  eval_shift_op_int v (transl_shift s a) = eval_shift s v a.
-Proof.
-  intros. destruct s; simpl; auto.
-Qed.
-
-Lemma transl_eval_shift': forall s v (a: amount32),
-  Val.or (Vint Int.zero) (eval_shift_op_int v (transl_shift s a)) = eval_shift s v a.
-Proof.
-  intros. rewrite or_zero_eval_shift_op_int by (apply transl_shift_not_none).
-  apply transl_eval_shift.
-Qed.
-
-Lemma transl_eval_shiftl: forall s v (a: amount64),
-  eval_shift_op_long v (transl_shift s a) = eval_shiftl s v a.
-Proof.
-  intros. destruct s; simpl; auto.
-Qed.
-
-Lemma transl_eval_shiftl': forall s v (a: amount64),
-  Val.orl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
-Proof.
-  intros. rewrite or_zero_eval_shift_op_long by (apply transl_shift_not_none).
-  apply transl_eval_shiftl.
-Qed.
-
-Lemma transl_eval_shiftl'': forall s v (a: amount64),
-  Val.addl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
-Proof.
-  intros. rewrite add_zero_eval_shift_op_long by (apply transl_shift_not_none).
-  apply transl_eval_shiftl.
-Qed.
-
-(** Zero- and Sign- extensions *)
-
-Lemma exec_move_extended_base: forall rd r1 ex k rs m,
-  exists rs',
-     exec_straight ge fn (move_extended_base rd r1 ex k) rs m k rs' m
-  /\ rs' rd = match ex with Xsgn32 => Val.longofint rs#r1 | Xuns32 => Val.longofintu rs#r1 end
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold move_extended_base; destruct ex; econstructor;
-  (split; [apply exec_straight_one; [simpl;eauto|auto] | split; [Simpl|intros;Simpl]]).
-Qed.
-
-Lemma exec_move_extended: forall rd r1 ex (a: amount64) k rs m,
-  exists rs',
-     exec_straight ge fn (move_extended rd r1 ex a k) rs m k rs' m
-  /\ rs' rd = Op.eval_extend ex rs#r1 a
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold move_extended; intros. predSpec Int.eq Int.eq_spec a Int.zero.
-- exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
-  exists rs'; split. eexact A. split. unfold Op.eval_extend. rewrite H. rewrite B.
-  destruct ex, (rs r1); simpl; auto; rewrite Int64.shl'_zero; auto.
-  auto.
-- Local Opaque Val.addl.
-  exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one.
-  unfold exec_instr. change (SOlsl a) with (transl_shift Slsl a). rewrite transl_eval_shiftl''. eauto. auto.
-  split. Simpl. rewrite B. auto. 
-  intros; Simpl.
-Qed.
-
-Lemma exec_arith_extended:
-  forall (sem: val -> val -> val)
-         (insnX: iregsp -> iregsp -> ireg -> extend_op -> instruction)
-         (insnS: ireg -> ireg0 -> ireg -> shift_op -> instruction),
-  (forall rd r1 r2 x rs m,
-    exec_instr ge fn (insnX rd r1 r2 x) rs m =
-      Next (nextinstr (rs#rd <- (sem rs#r1 (eval_extend rs#r2 x)))) m) ->
-  (forall rd r1 r2 s rs m,
-    exec_instr ge fn (insnS rd r1 r2 s) rs m =
-      Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) ->
-  forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: code) rs m,
-  r1 <> X16 ->
-  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m
-  /\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a)
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)).
-- econstructor; split. 
-  apply exec_straight_one. rewrite EX; eauto. auto.
-  split. Simpl. f_equal. destruct ex; auto.
-  split; intros; Simpl.
-- exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one. 
-  rewrite ES. eauto. auto.
-  split. Simpl. unfold ir0x. rewrite C by eauto with asmgen. f_equal. 
-  rewrite B. destruct ex; auto.
-  split; intros; Simpl.
-Qed. 
-
-(** Extended right shift *)
-
-Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
-  Val.shrx rs#r1 (Vint n) = Some v ->
-  r1 <> X16 ->
-  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m
-  /\ rs'#rd = v
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  unfold shrx32; intros. apply Val.shrx_shr_3 in H.
-  destruct (Int.eq n Int.zero) eqn:E.
-- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
-  split. Simpl. subst v; auto.
-  split; intros; Simpl.
-- generalize (Int.eq_spec n Int.one).
-  destruct (Int.eq n Int.one); intro ONE.
-  * subst n.
-    econstructor; split. eapply exec_straight_two.
-    all: simpl; auto.
-    split.
-    ** subst v; Simpl.
-       destruct (Val.add _ _); simpl; trivial.
-       change (Int.ltu Int.one Int.iwordsize) with true; simpl.
-       rewrite Int.or_zero_l.
-       reflexivity.
-    ** split; intros; Simpl.
-  * econstructor; split. eapply exec_straight_three.
-    unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
-    simpl; eauto.
-    unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
-    auto. auto. auto.
-    split. subst v; Simpl.
-    split; intros; Simpl.
-Qed.
- 
-Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
-  Val.shrxl rs#r1 (Vint n) = Some v ->
-  r1 <> X16 ->
-  (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
-  exists rs',
-     exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m
-  /\ rs'#rd = v
-  /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  unfold shrx64; intros. apply Val.shrxl_shrl_3 in H.
-  destruct (Int.eq n Int.zero) eqn:E.
-- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. 
-  split. Simpl. subst v; auto.
-  split; intros; Simpl.
-- generalize (Int.eq_spec n Int.one).
-  destruct (Int.eq n Int.one); intro ONE.
-  * subst n.
-    econstructor; split. eapply exec_straight_two.
-    all: simpl; auto.
-    split.
-    ** subst v; Simpl.
-       destruct (Val.addl _ _); simpl; trivial.
-       change (Int.ltu Int.one Int64.iwordsize') with true; simpl.
-       rewrite Int64.or_zero_l.
-       reflexivity.
-    ** split; intros; Simpl.
-  * econstructor; split. eapply exec_straight_three.
-    unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
-    simpl; eauto.
-    unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
-    auto. auto. auto.
-    split. subst v; Simpl.
-    split; intros; Simpl.
-Qed.
-
-(** Condition bits *)
-
-Lemma compare_int_spec: forall rs v1 v2 m,
-  let rs' := compare_int rs v1 v2 m in
-     rs'#CN = (Val.negative (Val.sub v1 v2))
-  /\ rs'#CZ = (Val.cmpu (Mem.valid_pointer m) Ceq v1 v2)
-  /\ rs'#CC = (Val.cmpu (Mem.valid_pointer m) Cge v1 v2)
-  /\ rs'#CV = (Val.sub_overflow v1 v2).
-Proof.
-  intros; unfold rs'; auto.
-Qed.
-
-Lemma eval_testcond_compare_sint: forall c v1 v2 b rs m,
-  Val.cmp_bool c v1 v2 = Some b ->
-  eval_testcond (cond_for_signed_cmp c) (compare_int rs v1 v2 m) = Some b.
-Proof.
-  intros. generalize (compare_int_spec rs v1 v2 m). 
-  set (rs' := compare_int rs v1 v2 m). intros (B & C & D & E).
-  unfold eval_testcond; rewrite B, C, D, E.
-  destruct v1; try discriminate; destruct v2; try discriminate.
-  simpl in H; inv H.
-  unfold Val.cmpu; simpl. destruct c; simpl.
-- destruct (Int.eq i i0); auto.
-- destruct (Int.eq i i0); auto.
-- rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
-- rewrite Int.lt_sub_overflow, Int.not_lt.
-  destruct (Int.eq i i0), (Int.lt i i0); auto.
-- rewrite Int.lt_sub_overflow, (Int.lt_not i). 
-  destruct (Int.eq i i0), (Int.lt i i0); auto.
-- rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
-Qed.
-
-Lemma eval_testcond_compare_uint: forall c v1 v2 b rs m,
-  Val.cmpu_bool (Mem.valid_pointer m) c v1 v2 = Some b ->
-  eval_testcond (cond_for_unsigned_cmp c) (compare_int rs v1 v2 m) = Some b.
-Proof.
-  intros. generalize (compare_int_spec rs v1 v2 m). 
-  set (rs' := compare_int rs v1 v2 m). intros (B & C & D & E).
-  unfold eval_testcond; rewrite B, C, D, E.
-  destruct v1; try discriminate; destruct v2; try discriminate.
-  simpl in H; inv H.
-  unfold Val.cmpu; simpl. destruct c; simpl.
-- destruct (Int.eq i i0); auto.
-- destruct (Int.eq i i0); auto.
-- destruct (Int.ltu i i0); auto.
-- rewrite (Int.not_ltu i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
-- rewrite (Int.ltu_not i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
-- destruct (Int.ltu i i0); auto.
-Qed.
-
-Lemma compare_long_spec: forall rs v1 v2 m,
-  let rs' := compare_long rs v1 v2 m in
-     rs'#CN = (Val.negativel (Val.subl v1 v2))
-  /\ rs'#CZ = (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq v1 v2))
-  /\ rs'#CC = (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Cge v1 v2))
-  /\ rs'#CV = (Val.subl_overflow v1 v2).
-Proof.
-  intros; unfold rs'; auto.
-Qed.
-
-Remark int64_sub_overflow:
-  forall x y,
-  Int.xor (Int.repr (Int64.unsigned (Int64.sub_overflow x y Int64.zero)))
-          (Int.repr (Int64.unsigned (Int64.negative (Int64.sub x y)))) =
-  (if Int64.lt x y then Int.one else Int.zero).
-Proof.
-  intros.
-  transitivity (Int.repr (Int64.unsigned (if Int64.lt x y then Int64.one else Int64.zero))).
-  rewrite <- (Int64.lt_sub_overflow x y).
-  unfold Int64.sub_overflow, Int64.negative.
-  set (s := Int64.signed x - Int64.signed y - Int64.signed Int64.zero).
-  destruct (zle Int64.min_signed s && zle s Int64.max_signed);
-  destruct (Int64.lt (Int64.sub x y) Int64.zero);
-  auto.
-  destruct (Int64.lt x y); auto.
-Qed.
-
-Lemma eval_testcond_compare_slong: forall c v1 v2 b rs m,
-  Val.cmpl_bool c v1 v2 = Some b ->
-  eval_testcond (cond_for_signed_cmp c) (compare_long rs v1 v2 m) = Some b.
-Proof.
-  intros. generalize (compare_long_spec rs v1 v2 m). 
-  set (rs' := compare_long rs v1 v2 m). intros (B & C & D & E).
-  unfold eval_testcond; rewrite B, C, D, E.
-  destruct v1; try discriminate; destruct v2; try discriminate.
-  simpl in H; inv H.
-  unfold Val.cmplu; simpl. destruct c; simpl.
-- destruct (Int64.eq i i0); auto.
-- destruct (Int64.eq i i0); auto.
-- rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
-- rewrite int64_sub_overflow, Int64.not_lt.
-  destruct (Int64.eq i i0), (Int64.lt i i0); auto.
-- rewrite int64_sub_overflow, (Int64.lt_not i). 
-  destruct (Int64.eq i i0), (Int64.lt i i0); auto.
-- rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
-Qed.
-
-Lemma eval_testcond_compare_ulong: forall c v1 v2 b rs m,
-  Val.cmplu_bool (Mem.valid_pointer m) c v1 v2 = Some b ->
-  eval_testcond (cond_for_unsigned_cmp c) (compare_long rs v1 v2 m) = Some b.
-Proof.
-  intros. generalize (compare_long_spec rs v1 v2 m). 
-  set (rs' := compare_long rs v1 v2 m). intros (B & C & D & E).
-  unfold eval_testcond; rewrite B, C, D, E; unfold Val.cmplu.
-  destruct v1; try discriminate; destruct v2; try discriminate; simpl in H.
-- (* int-int *)
-  inv H. destruct c; simpl.
-+ destruct (Int64.eq i i0); auto.
-+ destruct (Int64.eq i i0); auto.
-+ destruct (Int64.ltu i i0); auto.
-+ rewrite (Int64.not_ltu i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
-+ rewrite (Int64.ltu_not i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
-+ destruct (Int64.ltu i i0); auto.
-- (* int-ptr *)
-  simpl.
-  destruct (Int64.eq i Int64.zero &&
-            (Mem.valid_pointer m b0 (Ptrofs.unsigned i0)
-              || Mem.valid_pointer m b0 (Ptrofs.unsigned i0 - 1))); try discriminate.
-  destruct c; simpl in H; inv H; reflexivity.
-- (* ptr-int *)
-  simpl.
-  destruct (Int64.eq i0 Int64.zero &&
-            (Mem.valid_pointer m b0 (Ptrofs.unsigned i)
-              || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1))); try discriminate.
-  destruct c; simpl in H; inv H; reflexivity.
-- (* ptr-ptr *)
-  simpl. 
-  destruct (eq_block b0 b1).
-+ destruct ((Mem.valid_pointer m b0 (Ptrofs.unsigned i)
-             || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1)) &&
-            (Mem.valid_pointer m b1 (Ptrofs.unsigned i0)
-             || Mem.valid_pointer m b1 (Ptrofs.unsigned i0 - 1)));
-  inv H.
-  destruct c; simpl.
-* destruct (Ptrofs.eq i i0); auto.
-* destruct (Ptrofs.eq i i0); auto.
-* destruct (Ptrofs.ltu i i0); auto.
-* rewrite (Ptrofs.not_ltu i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
-* rewrite (Ptrofs.ltu_not i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
-* destruct (Ptrofs.ltu i i0); auto.
-+ destruct (Mem.valid_pointer m b0 (Ptrofs.unsigned i) &&
-            Mem.valid_pointer m b1 (Ptrofs.unsigned i0)); try discriminate.
-  destruct c; simpl in H; inv H; reflexivity.
-Qed.
-
-Lemma compare_float_spec: forall rs f1 f2,
-  let rs' := compare_float rs (Vfloat f1) (Vfloat f2) in
-     rs'#CN = (Val.of_bool (Float.cmp Clt f1 f2))
-  /\ rs'#CZ = (Val.of_bool (Float.cmp Ceq f1 f2))
-  /\ rs'#CC = (Val.of_bool (negb (Float.cmp Clt f1 f2)))
-  /\ rs'#CV = (Val.of_bool (negb (Float.ordered f1 f2))).
-Proof.
-  intros; auto.
-Qed.
-
-Lemma eval_testcond_compare_float: forall c v1 v2 b rs,
-  Val.cmpf_bool c v1 v2 = Some b ->
-  eval_testcond (cond_for_float_cmp c) (compare_float rs v1 v2) = Some b.
-Proof.
-  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. 
-  generalize (compare_float_spec rs f f0). 
-  set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
-  intros (B & C & D & E).
-  unfold eval_testcond; rewrite B, C, D, E.
-Local Transparent Float.cmp Float.ordered.
-  unfold Float.cmp, Float.ordered;
-  destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
-Qed.
-
-Lemma eval_testcond_compare_not_float: forall c v1 v2 b rs,
-  option_map negb (Val.cmpf_bool c v1 v2) = Some b ->
-  eval_testcond (cond_for_float_not_cmp c) (compare_float rs v1 v2) = Some b.
-Proof.
-  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
-  generalize (compare_float_spec rs f f0). 
-  set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
-  intros (B & C & D & E).
-  unfold eval_testcond; rewrite B, C, D, E.
-Local Transparent Float.cmp Float.ordered.
-  unfold Float.cmp, Float.ordered;
-  destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
-Qed.
-
-Lemma compare_single_spec: forall rs f1 f2,
-  let rs' := compare_single rs (Vsingle f1) (Vsingle f2) in
-     rs'#CN = (Val.of_bool (Float32.cmp Clt f1 f2))
-  /\ rs'#CZ = (Val.of_bool (Float32.cmp Ceq f1 f2))
-  /\ rs'#CC = (Val.of_bool (negb (Float32.cmp Clt f1 f2)))
-  /\ rs'#CV = (Val.of_bool (negb (Float32.ordered f1 f2))).
-Proof.
-  intros; auto.
-Qed.
-
-Lemma eval_testcond_compare_single: forall c v1 v2 b rs,
-  Val.cmpfs_bool c v1 v2 = Some b ->
-  eval_testcond (cond_for_float_cmp c) (compare_single rs v1 v2) = Some b.
-Proof.
-  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. 
-  generalize (compare_single_spec rs f f0). 
-  set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
-  intros (B & C & D & E).
-  unfold eval_testcond; rewrite B, C, D, E.
-Local Transparent Float32.cmp Float32.ordered.
-  unfold Float32.cmp, Float32.ordered;
-  destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
-Qed.
-
-Lemma eval_testcond_compare_not_single: forall c v1 v2 b rs,
-  option_map negb (Val.cmpfs_bool c v1 v2) = Some b ->
-  eval_testcond (cond_for_float_not_cmp c) (compare_single rs v1 v2) = Some b.
-Proof.
-  intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
-  generalize (compare_single_spec rs f f0). 
-  set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
-  intros (B & C & D & E).
-  unfold eval_testcond; rewrite B, C, D, E.
-Local Transparent Float32.cmp Float32.ordered.
-  unfold Float32.cmp, Float32.ordered;
-  destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
-Qed.
-
-Remark compare_float_inv: forall rs v1 v2 r,
-  match r with CR _ => False | _ => True end ->
-  (nextinstr (compare_float rs v1 v2))#r = (nextinstr rs)#r.
-Proof.
-  intros; unfold compare_float.
-  destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
-Qed.
-
-Remark compare_single_inv: forall rs v1 v2 r,
-  match r with CR _ => False | _ => True end ->
-  (nextinstr (compare_single rs v1 v2))#r = (nextinstr rs)#r.
-Proof.
-  intros; unfold compare_single.
-  destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
-Qed.
-
-(** Translation of conditionals *)
-
-Ltac ArgsInv :=
-  repeat (match goal with
-  | [ H: Error _ = OK _ |- _ ] => discriminate
-  | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
-  | [ H: bind _ _ = OK _ |- _ ] => monadInv H
-  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
-  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
-  end);
-  subst;
-  repeat (match goal with
-  | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *
-  | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *
-  end).
-
-Lemma compare_int_RA:
-  forall rs a b m,
-    compare_int rs a b m X30 = rs X30.
-Proof.
-  unfold compare_int.
-  intros.
-  repeat rewrite Pregmap.gso by congruence.
-  trivial.
-Qed.
-
-Hint Resolve compare_int_RA : asmgen.
-
-Lemma compare_long_RA:
-  forall rs a b m,
-    compare_long rs a b m X30 = rs X30.
-Proof.
-  unfold compare_long.
-  intros.
-  repeat rewrite Pregmap.gso by congruence.
-  trivial.
-Qed.
-
-Hint Resolve compare_long_RA : asmgen.
-
-Lemma compare_float_RA:
-  forall rs a b,
-    compare_float rs a b X30 = rs X30.
-Proof.
-  unfold compare_float.
-  intros.
-  destruct a; destruct b.
-  all: repeat rewrite Pregmap.gso by congruence; trivial.
-Qed.
-
-Hint Resolve compare_float_RA : asmgen.
-  
-
-Lemma compare_single_RA:
-  forall rs a b,
-    compare_single rs a b X30 = rs X30.
-Proof.
-  unfold compare_single.
-  intros.
-  destruct a; destruct b.
-  all: repeat rewrite Pregmap.gso by congruence; trivial.
-Qed.
-
-Hint Resolve compare_single_RA : asmgen.
-  
-                                         
-Lemma transl_cond_correct:
-  forall cond args k c rs m,
-  transl_cond cond args k = OK c ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ (forall b,
-      eval_condition cond (map rs (map preg_of args)) m = Some b ->
-      eval_testcond (cond_for_cond cond) rs' = Some b)
-  /\ (forall r, data_preg r = true -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros until m; intros TR. destruct cond; simpl in TR; ArgsInv.
-- (* Ccomp *)
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_sint; auto. 
-  destruct r; reflexivity || discriminate.
-- (* Ccompu *)
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_uint; auto. 
-  destruct r; reflexivity || discriminate.
-- (* Ccompimm *)
-  destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. 
-  destruct r; reflexivity || discriminate.
-+ econstructor; split.
-  apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_sint; auto. 
-  destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one.
-  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_sint; auto. 
-  transitivity (rs' r). destruct r; reflexivity || discriminate.
-  auto with asmgen.
-  Simpl. rewrite compare_int_RA.
-  apply C; congruence.
-- (* Ccompuimm *)
-  destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. 
-  destruct r; reflexivity || discriminate.
-+ econstructor; split.
-  apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_uint; auto. 
-  destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one.
-  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_uint; auto. 
-  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
-  Simpl. rewrite compare_int_RA.
-  apply C; congruence.  
-- (* Ccompshift *)
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. 
-  destruct r; reflexivity || discriminate.
-- (* Ccompushift *)
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. 
-  destruct r; reflexivity || discriminate.
-- (* Cmaskzero *)
-  destruct (is_logical_imm32 n).
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto.
-  destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A.
-  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
-  repeat split; intros. apply (eval_testcond_compare_sint Ceq); auto.
-  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
-  Simpl. rewrite compare_int_RA.
-  apply C; congruence.
-
-- (* Cmasknotzero *)
-  destruct (is_logical_imm32 n).
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto.
-  destruct r; reflexivity || discriminate.
-
-+ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A.
-  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
-  repeat split; intros. apply (eval_testcond_compare_sint Cne); auto.
-  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
-  Simpl. rewrite compare_int_RA.
-  apply C; congruence.
-  
-- (* Ccompl *)
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_slong; auto. 
-  destruct r; reflexivity || discriminate.
-- (* Ccomplu *)
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_ulong; auto. 
-  destruct r; reflexivity || discriminate.
-- (* Ccomplimm *)
-  destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. 
-  destruct r; reflexivity || discriminate.
-+ econstructor; split.
-  apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_slong; auto. 
-  destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one.
-  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_slong; auto. 
-  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
-  Simpl. rewrite compare_long_RA.
-  apply C; congruence.
-
-- (* Ccompluimm *)
-  destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto. 
-  destruct r; reflexivity || discriminate.
-+ econstructor; split.
-  apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_ulong; auto. 
-  destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one.
-  simpl. rewrite B, C by eauto with asmgen. eauto. auto.
-  repeat split; intros. apply eval_testcond_compare_ulong; auto. 
-  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
-  Simpl. rewrite compare_long_RA.
-  apply C; congruence.
-  
-- (* Ccomplshift *)
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. 
-  destruct r; reflexivity || discriminate.
-- (* Ccomplushift *)
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto. 
-  destruct r; reflexivity || discriminate.
-- (* Cmasklzero *)
-  destruct (is_logical_imm64 n).
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto.
-  destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A.
-  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
-  repeat split; intros. apply (eval_testcond_compare_slong Ceq); auto.
-  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
-  Simpl. rewrite compare_long_RA.
-  apply C; congruence.
-
-- (* Cmasknotzero *)
-  destruct (is_logical_imm64 n).
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto.
-  destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A.
-  apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
-  repeat split; intros. apply (eval_testcond_compare_slong Cne); auto.
-  transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
-    Simpl. rewrite compare_long_RA.
-  apply C; congruence.
-
-- (* Ccompf *)
-  econstructor; split. apply exec_straight_one. simpl; eauto.
-  rewrite compare_float_inv; auto.
-  repeat split; intros. apply eval_testcond_compare_float; auto.
-  destruct r; discriminate || rewrite compare_float_inv; auto.
-  Simpl. 
-- (* Cnotcompf *)
-  econstructor; split. apply exec_straight_one. simpl; eauto.
-  rewrite compare_float_inv; auto.
-  repeat split; intros. apply eval_testcond_compare_not_float; auto.
-  destruct r; discriminate || rewrite compare_float_inv; auto.
-  Simpl. 
-- (* Ccompfzero *)
-  econstructor; split. apply exec_straight_one. simpl; eauto.
-  rewrite compare_float_inv; auto.
-  repeat split; intros. apply eval_testcond_compare_float; auto.
-  destruct r; discriminate || rewrite compare_float_inv; auto.
-  Simpl.
-- (* Cnotcompfzero *)
-  econstructor; split. apply exec_straight_one. simpl; eauto.
-  rewrite compare_float_inv; auto.
-  repeat split; intros. apply eval_testcond_compare_not_float; auto.
-  destruct r; discriminate || rewrite compare_float_inv; auto.
-  Simpl.
-- (* Ccompfs *)
-  econstructor; split. apply exec_straight_one. simpl; eauto.
-  rewrite compare_single_inv; auto.
-  repeat split; intros. apply eval_testcond_compare_single; auto.
-  destruct r; discriminate || rewrite compare_single_inv; auto.
-  Simpl.
-- (* Cnotcompfs *)
-  econstructor; split. apply exec_straight_one. simpl; eauto.
-  rewrite compare_single_inv; auto.
-  repeat split; intros. apply eval_testcond_compare_not_single; auto.
-  destruct r; discriminate || rewrite compare_single_inv; auto.
-  Simpl.
-- (* Ccompfszero *)
-  econstructor; split. apply exec_straight_one. simpl; eauto.
-  rewrite compare_single_inv; auto.
-  repeat split; intros. apply eval_testcond_compare_single; auto.
-  destruct r; discriminate || rewrite compare_single_inv; auto.
-  Simpl.
-- (* Cnotcompfszero *)
-  econstructor; split. apply exec_straight_one. simpl; eauto.
-  rewrite compare_single_inv; auto.
-  repeat split; intros. apply eval_testcond_compare_not_single; auto.
-  destruct r; discriminate || rewrite compare_single_inv; auto.
-  Simpl.
-Qed.
-
-(** Translation of conditional branches *)
-
-Lemma transl_cond_branch_correct:
-  forall cond args lbl k c rs m b,
-  transl_cond_branch cond args lbl k = OK c ->
-  eval_condition cond (map rs (map preg_of args)) m = Some b ->
-  exists rs' insn,
-     exec_straight_opt ge fn c rs m (insn :: k) rs' m
-  /\ exec_instr ge fn insn rs' m =
-         (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m)
-  /\ (forall r, data_preg r = true -> rs'#r = rs#r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros until b; intros TR EV.
-  assert (DFL:
-    transl_cond_branch_default cond args lbl k = OK c ->
-    exists rs' insn,
-       exec_straight_opt ge fn c rs m (insn :: k) rs' m
-    /\ exec_instr ge fn insn rs' m =
-         (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m)
-    /\ (forall r, data_preg r = true -> rs'#r = rs#r)
-     /\ rs' # RA = rs # RA ).
-  {
-    unfold transl_cond_branch_default; intros.
-    exploit transl_cond_correct; eauto. intros (rs' & A & B & C & D).
-    exists rs', (Pbc (cond_for_cond cond) lbl); split.
-    apply exec_straight_opt_intro. eexact A.
-    repeat split; auto. simpl. rewrite (B b) by auto. auto.
-  }
-Local Opaque transl_cond transl_cond_branch_default.
-  destruct args as [ | a1 args]; simpl in TR; auto.
-  destruct args as [ | a2 args]; simpl in TR; auto.
-  destruct cond; simpl in TR; auto.
-- (* Ccompimm *)
-  destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto; 
-  apply Int.same_if_eq in N0; subst n; ArgsInv.
-+ (* Ccompimm Cne 0 *)
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. auto.
-+ (* Ccompimm Ceq 0 *)
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. destruct (Int.eq i Int.zero); auto.
-- (* Ccompuimm *)
-  destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto;
-  apply Int.same_if_eq in N0; subst n; ArgsInv.
-+ (* Ccompuimm Cne 0 *)
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl. rewrite EV. auto.
-+ (* Ccompuimm Ceq 0 *)
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl. rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Cne), EV. destruct b; auto.
-- (* Cmaskzero *)
-  destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl.
-  erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
-  rewrite (Val.negate_cmp_bool Ceq), EV. destruct b; auto.
-- (* Cmasknotzero *)
-  destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl.
-  erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
-  rewrite EV. auto.
-- (* Ccomplimm *)
-  destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto; 
-  apply Int64.same_if_eq in N0; subst n; ArgsInv.
-+ (* Ccomplimm Cne 0 *)
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. auto.
-+ (* Ccomplimm Ceq 0 *)
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl. destruct (rs x); simpl in EV; inv EV. simpl. destruct (Int64.eq i Int64.zero); auto.
-- (* Ccompluimm *)
-  destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto;
-  apply Int64.same_if_eq in N0; subst n; ArgsInv.
-+ (* Ccompluimm Cne 0 *)
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl. rewrite EV. auto.
-+ (* Ccompluimm Ceq 0 *)
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl. rewrite (Val.negate_cmplu_bool (Mem.valid_pointer m) Cne), EV. destruct b; auto.
-- (* Cmasklzero *)
-  destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl.
-  erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
-  rewrite (Val.negate_cmpl_bool Ceq), EV. destruct b; auto.
-- (* Cmasklnotzero *)
-  destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
-  do 2 econstructor; split.
-  apply exec_straight_opt_refl.
-  split; auto. simpl.
-  erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
-  rewrite EV. auto.
-Qed.
-
-(** Translation of arithmetic operations *)
-
-Ltac SimplEval H :=
-  match type of H with
-  | Some _ = None _ => discriminate
-  | Some _ = Some _ => inv H
-  | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity)
-end.
-
-Ltac TranslOpSimpl :=
-  econstructor; split;
-  [ apply exec_straight_one; [simpl; eauto | reflexivity]
-  | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl;
-             apply Val.lessdef_same; Simpl; fail
-           | split; [ intros; Simpl; fail
-                    | intros; Simpl; eauto with asmgen; fail] ]].
-
-Ltac TranslOpBase :=
-  econstructor; split;
-  [ apply exec_straight_one; [simpl; eauto | reflexivity]
-  | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl
-           | split; [ intros; Simpl; fail
-                    | intros; Simpl; eapply RA_not_written2; eauto] ]].
-
-Lemma transl_op_correct:
-  forall op args res k (rs: regset) m v c,
-  transl_op op args res k = OK c ->
-  eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ Val.lessdef v rs'#(preg_of res)
-  /\ (forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r)
-  /\ rs' RA = rs RA.
-Proof.
-Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize.
-  intros until c; intros TR EV.
-  unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl.
-- (* move *)
-  destruct (preg_of res) eqn:RR; try discriminate; destruct (preg_of m0) eqn:R1; inv TR.
-  all: TranslOpSimpl.
-- (* intconst *)
-  exploit exec_loadimm32. apply (ireg_of_not_RA res); eassumption.
-  intros (rs' & A & B & C).
-  exists rs'; split. eexact A. split. rewrite B; auto.
-  split. intros; auto with asmgen.
-  apply C. congruence.
-  eapply ireg_of_not_RA''; eauto.
-- (* longconst *)
-  exploit exec_loadimm64. apply (ireg_of_not_RA res); eassumption.
-  intros (rs' & A & B & C).
-  exists rs'; split. eexact A. split. rewrite B; auto.
-  split. intros; auto with asmgen.
-  apply C. congruence.
-  eapply ireg_of_not_RA''; eauto.
-- (* floatconst *)
-  destruct (Float.eq_dec n Float.zero).
-+ subst n. TranslOpSimpl. 
-+ TranslOpSimpl.
-- (* singleconst *)
-  destruct (Float32.eq_dec n Float32.zero).
-+ subst n. TranslOpSimpl. 
-+ TranslOpSimpl.
-- (* loadsymbol *)
-  exploit (exec_loadsymbol x id ofs). eauto with asmgen.
-  apply (ireg_of_not_RA'' res); eassumption. 
-  intros (rs' & A & B & C & D).
-  exists rs'; split. eexact A. split. rewrite B; auto.
-  split; auto.
-- (* addrstack *)
-  exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)). simpl; eauto with asmgen.
-  apply (ireg_of_not_RA'' res); eassumption. 
-  intros (rs' & A & B & C & D).
-  exists rs'; split. eexact A. split. simpl in B; rewrite B.
-Local Transparent Val.addl.
-  destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
-  auto.
-- (* shift *)
-  rewrite <- transl_eval_shift'. TranslOpSimpl.
-- (* addimm *)
-  exploit (exec_addimm32 x x0 n). eauto with asmgen. eapply ireg_of_not_RA''; eassumption.
-  intros (rs' & A & B & C & D).
-  exists rs'; split. eexact A. split. rewrite B; auto. auto.
-- (* mul *)
-  TranslOpBase.
-Local Transparent Val.add.
-  destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto.
-- (* andimm *)
-  exploit (exec_logicalimm32 (Pandimm W) (Pand W)). 
-  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D). 
-  exists rs'; split. eexact A. split. rewrite B; auto.
-  split; auto.
-- (* orimm *)
-  exploit (exec_logicalimm32 (Porrimm W) (Porr W)). 
-  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.  apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D). 
-  exists rs'; split. eexact A. split. rewrite B; auto.
-  split; auto.
-- (* xorimm *)
-  exploit (exec_logicalimm32 (Peorimm W) (Peor W)). 
-  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D). 
-  exists rs'; split. eexact A. split. rewrite B; auto. auto.
-- (* not *)
-  TranslOpBase.
-  destruct (rs x0); auto. simpl. rewrite Int.or_zero_l; auto.
-- (* notshift *)
-  TranslOpBase.
-  destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto.
-- (* shrx *)
-  exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D).
-  econstructor; split. eexact A. split. rewrite B; auto.
-  split; auto.
-- (* zero-ext *)
-  TranslOpBase.
-  destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
-- (* sign-ext *)
-  TranslOpBase.
-  destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
-- (* shlzext *)
-  TranslOpBase.
-  destruct (rs x0); simpl; auto. rewrite <- Int.shl_zero_ext_min; auto using a32_range.
-- (* shlsext *)
-  TranslOpBase.
-  destruct (rs x0); simpl; auto. rewrite <- Int.shl_sign_ext_min; auto using a32_range.
-- (* zextshr *)
-  TranslOpBase.
-  destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.zero_ext_shru_min; auto using a32_range.
-- (* sextshr *)
-  TranslOpBase.
-  destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.sign_ext_shr_min; auto using a32_range.
-- (* shiftl *)
-  rewrite <- transl_eval_shiftl'. TranslOpSimpl.
-- (* extend *)
-  exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C).
-  econstructor; split. eexact A. 
-  split. rewrite B; auto.
-  split; eauto with asmgen.
-- (* addext *)
-  exploit (exec_arith_extended Val.addl Paddext (Padd X)).
-  auto. auto. instantiate (1 := x1). eauto with asmgen.
-  apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D).
-  econstructor; split. eexact A. split. rewrite B; auto.
-  split; auto.
-- (* addlimm *)
-  exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence.
-  apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D).
-  exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto.
-- (* subext *)
-  exploit (exec_arith_extended Val.subl Psubext (Psub X)).
-  auto. auto. instantiate (1 := x1). eauto with asmgen.
-  apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D).
-  econstructor; split. eexact A. split. rewrite B; auto.
-  split; auto.
-- (* mull *)
-  TranslOpBase.
-  destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto.
-- (* andlimm *)
-  exploit (exec_logicalimm64 (Pandimm X) (Pand X)). 
-  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
-  apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D). 
-  exists rs'; split. eexact A. split. rewrite B; auto. auto.
-- (* orlimm *)
-  exploit (exec_logicalimm64 (Porrimm X) (Porr X)). 
-  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
-  apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D). 
-  exists rs'; split. eexact A. split. rewrite B; auto. auto.
-- (* xorlimm *)
-  exploit (exec_logicalimm64 (Peorimm X) (Peor X)). 
-  intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
-  apply (ireg_of_not_RA'' res); eassumption.
-  intros (rs' & A & B & C & D). 
-  exists rs'; split. eexact A. split. rewrite B; auto. auto.
-- (* notl *)
-  TranslOpBase.
-  destruct (rs x0); auto. simpl. rewrite Int64.or_zero_l; auto.
-- (* notlshift *)
-  TranslOpBase.
-  destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
-- (* shrx *)
-  exploit (exec_shrx64 x x0 n); eauto with asmgen.
-  apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ).
-  econstructor; split. eexact A. split. rewrite B; auto. auto.
-- (* zero-ext-l *)
-  TranslOpBase.
-  destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
-- (* sign-ext-l *)
-  TranslOpBase.
-  destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
-- (* shllzext *)
-  TranslOpBase.
-  destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_zero_ext_min; auto using a64_range.
-- (* shllsext *)
-  TranslOpBase.
-  destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_sign_ext_min; auto using a64_range.
-- (* zextshrl *)
-  TranslOpBase.
-  destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.zero_ext_shru'_min; auto using a64_range.
-- (* sextshrl *)
-  TranslOpBase.
-  destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range.
-- (* condition *)
-  exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
-  split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
-  rewrite (B b) by auto. auto. 
-  auto.
-  split; intros; Simpl.
-- (* select *)
-  destruct (preg_of res) eqn:RES; monadInv TR.
-  + (* integer *)
-    generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
-    exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D).
-    econstructor; split.
-    eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
-    split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
-    rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
-    rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
-    auto.
-    split; intros; Simpl.
-    rewrite <- D.
-    eapply RA_not_written2; eassumption.
-  + (* FP *)
-    generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
-    exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D).
-    econstructor; split.
-    eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
-    split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
-    rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
-    rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
-    auto.
-    split; intros; Simpl.
-Qed.
-
-(** Translation of addressing modes, loads, stores *)
-
-Lemma transl_addressing_correct:
-  forall sz addr args (insn: Asm.addressing -> instruction) k (rs: regset) m c b o,
-  transl_addressing sz addr args insn k = OK c ->
-  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some (Vptr b o) ->
-  exists ad rs',
-     exec_straight_opt ge fn c rs m (insn ad :: k) rs' m
-  /\ Asm.eval_addressing ge ad rs' = Vptr b o
-  /\ (forall r, data_preg r = true -> rs' r = rs r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros until o; intros TR EV.
-  unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV.
-- (* Aindexed *)
-  destruct (offset_representable sz ofs); inv EQ0.
-+ econstructor; econstructor; split. apply exec_straight_opt_refl.
-  auto.
-+ exploit (exec_loadimm64 X16 ofs). congruence. intros (rs' & A & B & C).
-  econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A.
-  split. simpl. rewrite B, C by eauto with asmgen. auto.
-  split; eauto with asmgen.
-- (* Aindexed2 *)
-  econstructor; econstructor; split. apply exec_straight_opt_refl.
-  auto.
-- (* Aindexed2shift *)
-  destruct (Int.eq a Int.zero) eqn:E; [|destruct (Int.eq (Int.shl Int.one a) (Int.repr sz))]; inv EQ2.
-+ apply Int.same_if_eq in E. rewrite E.
-  econstructor; econstructor; split. apply exec_straight_opt_refl.
-  split; auto. simpl.
-  rewrite Val.addl_commut in H0. destruct (rs x0); try discriminate.
-  unfold Val.shll. rewrite Int64.shl'_zero. auto.
-+ econstructor; econstructor; split. apply exec_straight_opt_refl.
-  auto. 
-+ econstructor; econstructor; split.
-  apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto.
-  split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto.
-  split; intros; Simpl.
-- (* Aindexed2ext *)
-  destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2.
-+ econstructor; econstructor; split. apply exec_straight_opt_refl.
-  split; auto. destruct x; auto.
-+ exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto.
-  instantiate (1 := x0). eauto with asmgen.
-  instantiate (1 := X16). simpl. congruence.
-  intros (rs' & A & B & C & D).
-  econstructor; exists rs'; split.
-  apply exec_straight_opt_intro. eexact A. 
-  split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal.
-  unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range;
-  simpl; rewrite Int64.add_zero; auto.
-  split; intros.
-  apply C; eauto with asmgen.
-  trivial.
-- (* Aglobal *)
-  destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR.
-+ econstructor; econstructor; split.
-  apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto.
-  split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence.
-  split; intros; Simpl.
-+ exploit (exec_loadsymbol X16 id ofs). auto.
-  simpl. congruence.
-  intros (rs' & A & B & C & D).
-  econstructor; exists rs'; split.
-  apply exec_straight_opt_intro. eexact A.
-  split. simpl. 
-  rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto. 
-  simpl in EV. congruence. 
-  split; auto with asmgen.
-- (* Ainstrack *)
-  assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o).
-  { simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl. 
-    rewrite Ptrofs.of_int64_to_int64 by auto. auto. }   
-  destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR.
-+ econstructor; econstructor; split. apply exec_straight_opt_refl.
-  auto.
-+ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)).
-  simpl. congruence.
-  intros (rs' & A & B & C).
-  econstructor; exists rs'; split.
-  apply exec_straight_opt_intro. eexact A.
-  split. simpl. rewrite B, C by eauto with asmgen. auto.
-  auto with asmgen.
-Qed.
-
-Lemma transl_load_correct:
-  forall chunk addr args dst k c (rs: regset) m vaddr v,
-  transl_load TRAP chunk addr args dst k = OK c ->
-  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
-  Mem.loadv chunk m vaddr = Some v ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ rs'#(preg_of dst) = v
-  /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r)
-  /\ rs' # RA = rs # RA.
-Proof.
-  intros. destruct vaddr; try discriminate. 
-  assert (A: exists sz insn,
-                transl_addressing sz addr args insn k = OK c
-             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
-                              exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m)).
-  {
-    destruct chunk; monadInv H;
-    try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ);
-    do 2 econstructor; (split; [eassumption|auto]).
-  }
-  destruct A as (sz & insn & B & C).
-  exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S).
-  assert (X: exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m =
-             Next (nextinstr (rs'#(preg_of dst) <- v)) m).
-  { unfold exec_load. rewrite Q, H1. auto. }
-  econstructor; split.
-  eapply exec_straight_opt_right. eexact P.
-  apply exec_straight_one. rewrite C, X; eauto. Simpl. 
-  split. Simpl.
-  split; intros; Simpl.
-  rewrite <- S.
-  apply RA_not_written.
-Qed.
-
-Lemma transl_store_correct:
-  forall chunk addr args src k c (rs: regset) m vaddr m',
-  transl_store chunk addr args src k = OK c ->
-  Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
-  Mem.storev chunk m vaddr rs#(preg_of src) = Some m' ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m'
-     /\ (forall r, data_preg r = true -> rs' r = rs r)
-     /\ rs' # RA = rs # RA.
-Proof.
-  intros. destruct vaddr; try discriminate. 
-  set (chunk' := match chunk with Mint8signed => Mint8unsigned
-                                | Mint16signed => Mint16unsigned
-                                | _ => chunk end).
-  assert (A: exists sz insn,
-                transl_addressing sz addr args insn k = OK c
-             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
-                              exec_store ge chunk' ad rs'#(preg_of src) rs' m)).
-  {
-    unfold chunk'; destruct chunk; monadInv H;
-    try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ);
-    do 2 econstructor; (split; [eassumption|auto]).
-  }
-  destruct A as (sz & insn & B & C).
-  exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S).                                                  
-  assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m').
-  { rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry.
-    apply Mem.store_signed_unsigned_8.
-    apply Mem.store_signed_unsigned_16. }
-  assert (Y: exec_store ge chunk' ad rs'#(preg_of src) rs' m =
-             Next (nextinstr rs') m').
-  { unfold exec_store. rewrite Q, R, X by auto with asmgen. auto. }
-  econstructor; split.
-  eapply exec_straight_opt_right. eexact P.
-  apply exec_straight_one. rewrite C, Y; eauto. Simpl. 
-  split; intros; Simpl.
-Qed.
-
-(** Translation of indexed memory accesses *)
-
-Lemma indexed_memory_access_correct: forall insn sz (base: iregsp) ofs k (rs: regset) m b i,
-  preg_of_iregsp base <> IR X16 ->
-  Val.offset_ptr rs#base ofs = Vptr b i ->
-  exists ad rs',
-     exec_straight_opt ge fn (indexed_memory_access insn sz base ofs k) rs m (insn ad :: k) rs' m
-  /\ Asm.eval_addressing ge ad rs' = Vptr b i
-  /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r.
-Proof.
-  unfold indexed_memory_access; intros.
-  assert (Val.addl rs#base (Vlong (Ptrofs.to_int64 ofs)) = Vptr b i).
-  { destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. }
-  destruct offset_representable.
-- econstructor; econstructor; split. apply exec_straight_opt_refl. auto. 
-- exploit (exec_loadimm64 X16); eauto.
-  simpl. congruence.
-  intros (rs' & A & B & C).
-  econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A.
-  split. simpl. rewrite B, C by eauto with asmgen. auto. auto.
-Qed.
-
-Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset),
-  Mem.loadv Mint64 m (Val.offset_ptr rs#base ofs) = Some v ->
-  preg_of_iregsp base <> IR X16 ->
-  exists rs',
-     exec_straight ge fn (loadptr base ofs dst k) rs m k rs' m
-  /\ rs'#dst = v
-  /\ (forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r).
-Proof.
-  intros. 
-  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
-  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
-  econstructor; split.
-  eapply exec_straight_opt_right. eexact A.
-  apply exec_straight_one. simpl. unfold exec_load. rewrite B, H. eauto. auto.
-  split. Simpl.
-  intros; Simpl.
-Qed.
-
-Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset),
-  Mem.storev Mint64 m (Val.offset_ptr rs#base ofs) rs#src = Some m' ->
-  preg_of_iregsp base <> IR X16 ->
-  src <> X16 ->
-  exists rs',
-     exec_straight ge fn (storeptr src base ofs k) rs m k rs' m'
-  /\ (forall r, r <> PC -> r <> X16 -> rs' r = rs r)
-  /\ rs' RA = rs RA.
-Proof.
-  intros. 
-  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
-  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
-  econstructor; split.
-  eapply exec_straight_opt_right. eexact A.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite B, C, H by eauto with asmgen. eauto. auto.
-  split; intros; Simpl.
-Qed.
-
-Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v,
-  loadind base ofs ty dst k = OK c ->
-  Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v ->
-  preg_of_iregsp base <> IR X16 ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ rs'#(preg_of dst) = v
-  /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r)
-  /\ rs' RA = rs RA.
-Proof.
-  intros. 
-  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
-  assert (X: exists sz insn,
-                c = indexed_memory_access insn sz base ofs k
-             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
-                              exec_load ge (chunk_of_type ty) (fun v => v) ad (preg_of dst) rs' m)).
-  {
-    unfold loadind in H; destruct ty; destruct (preg_of dst); inv H; do 2 econstructor; eauto.
-  }
-  destruct X as (sz & insn & EQ & SEM). subst c.
-  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
-  econstructor; split.
-  eapply exec_straight_opt_right. eexact A.
-  apply exec_straight_one. rewrite SEM. unfold exec_load. rewrite B, H0. eauto. Simpl.
-  split. Simpl.
-  split. intros; Simpl.
-  Simpl. rewrite RA_not_written.
-  apply C; congruence.
-Qed.
-
-Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m',
-  storeind src base ofs ty k = OK c ->
-  Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' ->
-  preg_of_iregsp base <> IR X16 ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m'
-     /\ (forall r, data_preg r = true -> rs' r = rs r)
-     /\ rs' RA = rs RA.
-Proof.
-  intros. 
-  destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
-  assert (X: exists sz insn,
-                c = indexed_memory_access insn sz base ofs k
-             /\ (forall ad rs', exec_instr ge fn (insn ad) rs' m =
-                              exec_store ge (chunk_of_type ty) ad rs'#(preg_of src) rs' m)).
-  {
-    unfold storeind in H; destruct ty; destruct (preg_of src); inv H; do 2 econstructor; eauto.
-  }
-  destruct X as (sz & insn & EQ & SEM). subst c.
-  exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). 
-  econstructor; split.
-  eapply exec_straight_opt_right. eexact A.
-  apply exec_straight_one. rewrite SEM.
-  unfold exec_store. rewrite B, C, H0 by eauto with asmgen. eauto.
-  Simpl.
-  split. intros; Simpl.
-  Simpl. 
-Qed.
-
-Lemma make_epilogue_correct:
-  forall ge0 f m stk soff cs m' ms rs k tm,
-  (is_leaf_function f = true -> rs # (IR RA) = parent_ra cs) ->
-  load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) ->
-  ((* FIXME is_leaf_function f = false -> *) load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs)) ->
-  Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
-  agree ms (Vptr stk soff) rs ->
-  Mem.extends m tm ->
-  match_stack ge0 cs ->
-  exists rs', exists tm',
-     exec_straight ge fn (make_epilogue f k) rs tm k rs' tm'
-  /\ agree ms (parent_sp cs) rs'
-  /\ Mem.extends m' tm'
-  /\ rs'#RA = parent_ra cs
-  /\ rs'#SP = parent_sp cs
-  /\ (forall r, r <> PC -> r <> SP -> r <> RA -> r <> X16 -> rs'#r = rs#r).
-Proof.
-  intros until tm; intros LEAF_RA LP LRA FREE AG MEXT MCS.
-
-  (* FIXME
-  Cannot be used at this point
-  destruct (is_leaf_function f) eqn:IS_LEAF.
-  {
-  exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
-  exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
-  exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
-  unfold make_epilogue.
-  rewrite IS_LEAF.
-
-  econstructor; econstructor; split.
-  apply exec_straight_one. simpl.
-  rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. 
-    rewrite FREE'. eauto. auto. 
-  split. apply agree_nextinstr. apply agree_set_other; auto.
-  apply agree_change_sp with (Vptr stk soff).
-  apply agree_exten with rs; auto.
-  eapply parent_sp_def; eauto.
-  split. auto.
-  split. Simpl.
-  split. Simpl.
-  intros. Simpl.
-  }
-  lapply LRA. 2: reflexivity.
-  clear LRA. intro LRA. *)
-  exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
-  exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA').
-  exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
-  exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'.
-  exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
-  unfold make_epilogue.
-  (* FIXME rewrite IS_LEAF. *)
-  exploit (loadptr_correct XSP (fn_retaddr_ofs f)).
-    instantiate (2 := rs). simpl. rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; congruence.
-  intros (rs1 & A1 & B1 & C1).
-
-  econstructor; econstructor; split.
-  eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl. 
-    simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. 
-    rewrite FREE'. eauto. auto. 
-  split. apply agree_nextinstr. apply agree_set_other; auto.
-  apply agree_change_sp with (Vptr stk soff).
-  apply agree_exten with rs; auto. intros; apply C1; auto with asmgen.
-  eapply parent_sp_def; eauto.
-  split. auto.
-  split. Simpl. 
-  split. Simpl. 
-  intros. Simpl.
-Qed.
-
-End CONSTRUCTORS.
diff --git a/aarch64/CBuiltins.ml b/aarch64/CBuiltins.ml
index fdc1372d..e2a9c87a 100644
--- a/aarch64/CBuiltins.ml
+++ b/aarch64/CBuiltins.ml
@@ -6,6 +6,9 @@
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
 (* *********************************************************************)
@@ -29,14 +32,6 @@ let builtins = {
     "__builtin_fence",
       (TVoid [], [], false);
     (* Integer arithmetic *)
-    "__builtin_bswap64",
-      (TInt(IULongLong, []), [TInt(IULongLong, [])], false);
-    "__builtin_clz",
-      (TInt(IInt, []), [TInt(IUInt, [])], false);
-    "__builtin_clzl",
-      (TInt(IInt, []), [TInt(IULong, [])], false);
-    "__builtin_clzll",
-      (TInt(IInt, []), [TInt(IULongLong, [])], false);
     "__builtin_cls",
       (TInt(IInt, []), [TInt(IInt, [])], false);
     "__builtin_clsl",
diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v
index a23e41a8..d5c7ee0f 100644
--- a/aarch64/CSE2deps.v
+++ b/aarch64/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v
index dbd46142..653c88f4 100644
--- a/aarch64/CSE2depsproof.v
+++ b/aarch64/CSE2depsproof.v
@@ -104,9 +104,71 @@ Section MEMORY_WRITE.
   Qed.
   End INDEXED_AWAY.
 End MEMORY_WRITE.
-End SOUNDNESS.
 
 
+Section STACK_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Ainstack ofsw) nil = Some addrw.
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Ainstack ofsr) nil = Some addrr.
+
+  Lemma stack_load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct sp; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    
+    all: try rewrite ptrofs_modulus in *.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem stack_load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply stack_load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End STACK_WRITE.
+End SOUNDNESS.
+
 Section SOUNDNESS.
   Variable F V : Type.
   Variable genv: Genv.t F V.
@@ -124,7 +186,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+ - (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -134,7 +196,14 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned ofs0) chunk' (Ptrofs.unsigned ofs) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
index deab7cd4..c777062c 100644
--- a/aarch64/ConstpropOpproof.v
+++ b/aarch64/ConstpropOpproof.v
@@ -335,40 +335,63 @@ Qed.
 
 Lemma make_divimm_correct:
   forall n r1 r2 v,
-  Val.divs e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divs e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  destruct (Int.ltu i (Int.repr 31)) eqn:?.
-  exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
-  exists v; auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divs_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+      destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+      rewrite Val.divs_pow2 with (y:=v) (n:=n).
+      cbn.
+      all: auto.
+    }
+    exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
+
 Lemma make_divuimm_correct:
   forall n r1 r2 v,
-  Val.divu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divuimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divuimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  econstructor; split. simpl; eauto.
-  rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
-  rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divu_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+   cbn.
+   rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
+   exists (Val.shru e # r1 (Vint i)); split; auto.
+   destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+   rewrite Val.divu_pow2 with (y:=v) (n:=n).
+   all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_andimm_correct:
@@ -503,34 +526,60 @@ Qed.
 
 Lemma make_divlimm_correct:
   forall n r1 r2 v,
-  Val.divls e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divls e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divlimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divlimm.
-  destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
-  rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
-  exists v; auto.
-  exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+      rewrite H0 in H.
+      destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+      {
+        subst v0.
+        rewrite Val.divls_pow2 with (y:=v) (n:=n).
+        cbn.
+        all: auto.
+      }
+      subst. auto.
+    }
+    cbn. subst. rewrite H0.
+    exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
 Qed.
 
+
 Lemma make_divluimm_correct:
   forall n r1 r2 v,
-  Val.divlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divluimm.
   destruct (Int64.is_power2' n) eqn:?.
+  {
   econstructor; split. simpl; eauto.
-  rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl.
-  erewrite Int64.is_power2'_range by eauto.    
-  erewrite Int64.divu_pow2' by eauto.  auto. 
-  exists v; auto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  {
+    rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.is_power2'_range by eauto.    
+    erewrite Int64.divu_pow2' by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_andlimm_correct:
@@ -679,10 +728,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
 - (* divs *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divimm_correct; auto.
+  apply make_divimm_correct; auto. congruence.
 - (* divu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divuimm_correct; auto.
+  apply make_divuimm_correct; auto. congruence.
 - (* and 1 *)
   rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
 - (* and 2 *)
@@ -745,10 +794,10 @@ Proof.
   InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
 - (* divl *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divlimm_correct; auto.
+  apply make_divlimm_correct; auto. congruence.
 - (* divlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divluimm_correct; auto.
+  apply make_divluimm_correct; auto. congruence.
 - (* andl 1 *)
   rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
 - (* andl 2 *)
diff --git a/aarch64/ExpansionOracle.ml b/aarch64/ExpansionOracle.ml
new file mode 100644
index 00000000..3b63b80d
--- /dev/null
+++ b/aarch64/ExpansionOracle.ml
@@ -0,0 +1,17 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open RTLpathCommon
+
+let expanse (sb : superblock) code pm = (code, pm)
+
+let find_last_node_reg c = ()
diff --git a/aarch64/Machregs.v b/aarch64/Machregs.v
index 3d27f48f..bfe23e83 100644
--- a/aarch64/Machregs.v
+++ b/aarch64/Machregs.v
@@ -158,7 +158,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   match ef with
   | EF_memcpy sz al => R15 :: R17 :: R29 :: nil
   | EF_inline_asm txt sg clob => destroyed_by_clobber clob
-  | EF_profiling _ _ => R15 :: R17 :: nil
+  | EF_profiling _ _ => R15 :: R17 :: R29 :: nil
   | _ => nil
   end.
 
diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml
index f13a9ff5..41db3bd4 100644
--- a/aarch64/Machregsaux.ml
+++ b/aarch64/Machregsaux.ml
@@ -12,28 +12,9 @@
 
 (** Auxiliary functions on machine registers *)
 
-open Camlcoq
-open Machregs
-
-let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
-
-let _ =
-  List.iter
-    (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
-    Machregs.register_names
-
 let is_scratch_register s =
   s = "X16" || s = "x16" || s = "X30" || s = "x30"
 
-
-let name_of_register r =
-  try Some (Hashtbl.find register_names r) with Not_found -> None
-
-let register_by_name s =
-  Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
-
-let can_reserve_register r = Conventions1.is_callee_save r
-                           
 let class_of_type = function
   | AST.Tint | AST.Tlong -> 0
   | AST.Tfloat | AST.Tsingle -> 1
diff --git a/aarch64/Op.v b/aarch64/Op.v
index afc25aa6..40f6ebf0 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -386,8 +386,8 @@ Definition eval_operation
   | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
   | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3))
   | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3))
-  | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
-  | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+  | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+  | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
   | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
   | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a))
   | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
@@ -408,7 +408,7 @@ Definition eval_operation
   | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
   | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
   | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
-  | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+  | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
   | Ozext s, v1 :: nil => Some (Val.zero_ext s v1)
   | Osext s, v1 :: nil => Some (Val.sign_ext s v1)
   | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a))
@@ -435,8 +435,8 @@ Definition eval_operation
   | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3))
   | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
   | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
-  | Odivl, v1 :: v2 :: nil => Val.divls v1 v2
-  | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2
+  | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2))
+  | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2))
   | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2)
   | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a))
   | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n))
@@ -457,7 +457,7 @@ Definition eval_operation
   | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2)
   | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2)
   | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2)
-  | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n)
+  | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
   | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1)
   | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1)
   | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a))
@@ -481,22 +481,22 @@ Definition eval_operation
 
   | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
   | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
-  | Ointoffloat, v1::nil => Val.intoffloat v1
-  | Ointuoffloat, v1::nil => Val.intuoffloat v1
-  | Ofloatofint, v1::nil => Val.floatofint v1
-  | Ofloatofintu, v1::nil => Val.floatofintu v1
-  | Ointofsingle, v1::nil => Val.intofsingle v1
-  | Ointuofsingle, v1::nil => Val.intuofsingle v1
-  | Osingleofint, v1::nil => Val.singleofint v1
-  | Osingleofintu, v1::nil => Val.singleofintu v1
-  | Olongoffloat, v1::nil => Val.longoffloat v1
-  | Olonguoffloat, v1::nil => Val.longuoffloat v1
-  | Ofloatoflong, v1::nil => Val.floatoflong v1
-  | Ofloatoflongu, v1::nil => Val.floatoflongu v1
-  | Olongofsingle, v1::nil => Val.longofsingle v1
-  | Olonguofsingle, v1::nil => Val.longuofsingle v1
-  | Osingleoflong, v1::nil => Val.singleoflong v1
-  | Osingleoflongu, v1::nil => Val.singleoflongu v1
+  | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+  | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+  | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+  | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+  | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+  | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+  | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+  | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+  | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+  | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+  | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+  | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+  | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+  | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+  | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+  | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
 
   | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
   | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
@@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0... destruct v1...
   - apply type_add.
   - apply type_sub.
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (_  || _); trivial...
+  - destruct v0; destruct v1; cbn in *; trivial.
+    destruct (Int.eq i0 Int.zero); constructor.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shift s v1 a)...
   - destruct v0...
@@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... 
@@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - apply type_subl.
   - destruct v0... destruct v1...
   - destruct v0... destruct v1...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   - destruct v0... destruct v1...
   - destruct v0... destruct (eval_shiftl s v1 a)...
   - destruct v0...
@@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
   - destruct v0...
   - destruct v0...
   - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... 
@@ -893,29 +895,29 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0...
   - destruct v0...
   (* intoffloat, intuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial.
   (* floatofint, floatofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial.
   (* singleofint, singleofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+  - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* cmp *)
   - destruct (eval_condition cond vl m) as [[]|]...
   - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I.
@@ -924,16 +926,7 @@ Qed.
 
 Definition is_trapping_op (op : operation) :=
   match op with
-  | Odiv | Odivu | Odivl | Odivlu
-  | Oshrximm _  | Oshrlximm _
-  | Ointoffloat | Ointuoffloat
-  | Ointofsingle | Ointuofsingle
-  | Ofloatofint | Ofloatofintu
-  | Osingleofint | Osingleofintu
-  | Olongoffloat | Olonguoffloat
-  | Olongofsingle | Olonguofsingle
-  | Ofloatoflong | Ofloatoflongu
-  | Osingleoflong | Osingleoflongu => true
+  | Omove => false
   | _ => false
   end.
                 
@@ -1209,6 +1202,28 @@ Proof.
   rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto.
 Qed.
 
+Lemma cond_valid_pointer_eq:
+  forall cond args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_condition cond args m1 = eval_condition cond args m2.
+Proof.
+  intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence.
+  all: repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+    
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. intro MEM. destruct op eqn:OP; simpl; try congruence.
+  - f_equal; f_equal; auto using cond_valid_pointer_eq.
+  - destruct cond; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
@@ -1409,12 +1424,12 @@ Proof.
   - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto.
   - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto.
   (* div, divu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero
-              || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (_ || _); cbn;
+    constructor.
+  - inv H4; inv H2; trivial. cbn.
+    destruct (Int.eq i0 Int.zero); cbn;
+    constructor.
   (* and*)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1446,8 +1461,8 @@ Proof.
   (* shru *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
   (* shrx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1482,12 +1497,10 @@ Proof.
   - inv H4; inv H2; simpl; auto.
   - inv H4; inv H2; simpl; auto.
   (* divl, divlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero
-              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (_ || _); cbn; trivial.
+  - inv H4; inv H2; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   (* andl *)
   - inv H4; inv H2; simpl; auto. 
   - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1519,8 +1532,8 @@ Proof.
   (* shrlu *)
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   (* shrlx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial.
   (* shift-ext *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1551,37 +1564,29 @@ Proof.
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
   (* intoffloat, intuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial.
   (* floatofint, floatofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial.
   (* singleofint, singleofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial.
+  - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; trivial.
+  - inv H4; cbn; trivial.
   (* cmp, sel *)
   - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
     exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
new file mode 100644
index 00000000..5cdd002c
--- /dev/null
+++ b/aarch64/OpWeights.ml
@@ -0,0 +1,353 @@
+open Op;;
+open PrepassSchedulingOracleDeps;;
+
+module Cortex_A53=
+  struct
+    let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
+    let nr_non_pipelined_units = 1;;
+
+    let latency_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _ -> 1
+      | Oshift _ -> 2
+      | Oadd -> 1
+      | Oaddshift _ -> 2
+      | Oaddimm _
+        | Oneg -> 1                   
+      | Onegshift _ -> 2
+      | Osub -> 1
+      | Osubshift _ -> 2
+      | Omul
+        | Omuladd  
+        | Omulsub -> 4
+      | Odiv
+        | Odivu -> 29
+      | Oand -> 1
+      | Oandshift _ -> 2
+      | Oandimm _ -> 1
+      | Oor -> 1
+      | Oorshift _ -> 2
+      | Oorimm _ -> 1
+      | Oxor -> 1
+      | Oxorshift _ -> 2
+      | Oxorimm _ -> 1
+      | Onot -> 1
+      | Onotshift _ -> 2
+      | Obic -> 1
+      | Obicshift _ -> 2
+      | Oorn -> 1
+      | Oornshift _ -> 2
+      | Oeqv -> 1
+      | Oeqvshift _ -> 2
+      | Oshl 
+        | Oshr
+        | Oshru -> 2
+      | Oshrximm _ -> 6
+      | Ozext _
+        | Osext _ -> 1
+      | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _ -> 2
+                      
+      (* 64-bit integer arithmetic *)
+      | Oshiftl _ -> 2
+      | Oextend _ -> 1
+      | Omakelong
+        | Olowlong 
+        | Ohighlong 
+        | Oaddl -> 1
+      | Oaddlshift _
+        | Oaddlext _ -> 2
+      | Oaddlimm _
+        | Onegl -> 1         
+      | Oneglshift _ -> 2
+      | Osubl -> 1
+      | Osublshift _
+        | Osublext _ -> 2
+      | Omull
+        | Omulladd 
+        | Omullsub 
+        | Omullhs 
+        | Omullhu -> 4
+      | Odivl -> 50
+      | Odivlu -> 50
+      | Oandl -> 1
+      | Oandlshift _ -> 2
+      | Oandlimm _
+        | Oorl -> 1 
+      | Oorlshift _ -> 2
+      | Oorlimm _
+        | Oxorl -> 1
+      | Oxorlshift _ -> 2
+      | Oxorlimm _
+        | Onotl -> 1
+      | Onotlshift _ -> 2
+      | Obicl -> 1
+      | Obiclshift _ -> 2
+      | Oornl -> 1
+      | Oornlshift _ -> 2
+      | Oeqvl -> 1
+      | Oeqvlshift _ -> 2
+      | Oshll 
+        | Oshrl
+        | Oshrlu -> 2
+      | Oshrlximm _ -> 6
+      | Ozextl _
+        | Osextl _ -> 1
+      | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> 2
+                       
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
+(* 32-bit floating-point arithmetic *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> 6
+      | Odivf -> 50                                         (* r [rd = r1 / r2] *)
+      | Odivfs -> 20
+  (*  Boolean tests *)
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) -> 6
+          | _ -> 1);;
+    
+    let resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Omove 
+        | Ointconst _
+        | Olongconst _
+        | Ofloatconst _
+        | Osingleconst _
+        | Oaddrsymbol _
+        | Oaddrstack _
+        (* 32-bit integer arithmetic *)
+        | Oshift _
+        | Oadd 
+        | Oaddshift _
+        | Oaddimm _
+        | Oneg         
+        | Onegshift _
+        | Osub
+        | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+      | Omul 
+        | Omuladd
+        | Omulsub -> [| 1; 1; 1; 0 |]
+      | Odiv 
+        | Odivu -> [| 1; 0; 0; 0 |]
+      | Oand 
+        | Oandshift _
+        | Oandimm _
+        | Oor
+        | Oorshift _
+        | Oorimm _
+        | Oxor
+        | Oxorshift _
+        | Oxorimm _
+        | Onot
+        | Onotshift _
+        | Obic
+        | Obicshift _
+        | Oorn
+        | Oornshift _
+        | Oeqv
+        | Oeqvshift _
+        | Oshl
+        | Oshr
+        | Oshru
+        | Oshrximm _
+        | Ozext _
+        | Osext  _
+        | Oshlzext _
+        | Oshlsext _
+        | Ozextshr _
+        | Osextshr _
+  
+(* 64-bit integer arithmetic *)
+        | Oshiftl _
+        | Oextend _
+        | Omakelong
+        | Olowlong 
+        | Ohighlong
+        | Oaddl 
+        | Oaddlshift _
+        | Oaddlext _
+        | Oaddlimm _
+        | Onegl        
+        | Oneglshift _
+        | Osubl
+        | Osublshift _
+        | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+      | Omull 
+        | Omulladd 
+        | Omullsub
+        | Omullhs 
+        | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+      | Odivl 
+        | Odivlu -> [| 1; 0; 0; 0 |]
+      | Oandl 
+        | Oandlshift _
+        | Oandlimm _
+        | Oorl
+        | Oorlshift _
+        | Oorlimm _
+        | Oxorl
+        | Oxorlshift _
+        | Oxorlimm _
+        | Onotl
+        | Onotlshift _
+        | Obicl
+        | Obiclshift _
+        | Oornl
+        | Oornlshift _
+        | Oeqvl
+        | Oeqvlshift _
+        | Oshll
+        | Oshrl
+        | Oshrlu 
+        | Oshrlximm _
+        | Ozextl _
+        | Osextl _
+        | Oshllzext _
+        | Oshllsext _
+        | Ozextshrl _
+        | Osextshrl _ -> [| 1; 1; 0; 0 |]
+      (* 64-bit floating-point arithmetic *)
+      | Onegf                                               (* r [rd = - r1] *)
+        | Oabsf                                               (* r [rd = abs(r1)] *)
+        | Oaddf                                               (* r [rd = r1 + r2] *)
+        | Osubf                                               (* r [rd = r1 - r2] *)
+        | Omulf                                               (* r [rd = r1 * r2] *)
+        | Odivf
+        (* 32-bit floating-point arithmetic *)
+        | Onegfs                                              (* r [rd = - r1] *)
+        | Oabsfs                                              (* r [rd = abs(r1)] *)
+        | Oaddfs                                              (* r [rd = r1 + r2] *)
+        | Osubfs                                              (* r [rd = r1 - r2] *)
+        | Omulfs                                              (* r [rd = r1 * r2] *)
+        | Odivfs                                              (* r [rd = r1 / r2] *)
+        | Osingleoffloat                                      (* r [rd] is [r1] truncated to single-precision float *)
+        | Ofloatofsingle                                      (* r [rd] is [r1] extended to double-precision float *)
+(*  Conversions between int and float *)
+        | Ointoffloat                                         (* r [rd = signed_int_of_float64(r1)] *)
+        | Ointuoffloat                                        (* r [rd = unsigned_int_of_float64(r1)] *)
+        | Ofloatofint                                         (* r [rd = float64_of_signed_int(r1)] *)
+        | Ofloatofintu                                        (* r [rd = float64_of_unsigned_int(r1)] *)
+        | Ointofsingle                                        (* r [rd = signed_int_of_float32(r1)] *)
+        | Ointuofsingle                                       (* r [rd = unsigned_int_of_float32(r1)] *)
+        | Osingleofint                                        (* r [rd = float32_of_signed_int(r1)] *)
+        | Osingleofintu                                       (* r [rd = float32_of_unsigned_int(r1)] *)
+        | Olongoffloat                                        (* r [rd = signed_long_of_float64(r1)] *)
+        | Olonguoffloat                                       (* r [rd = unsigned_long_of_float64(r1)] *)
+        | Ofloatoflong                                        (* r [rd = float64_of_signed_long(r1)] *)
+        | Ofloatoflongu                                       (* r [rd = float64_of_unsigned_long(r1)] *)
+        | Olongofsingle                                       (* r [rd = signed_long_of_float32(r1)] *)
+        | Olonguofsingle                                      (* r [rd = unsigned_long_of_float32(r1)] *)
+        | Osingleoflong                                       (* r [rd = float32_of_signed_long(r1)] *)
+        | Osingleoflongu                                      (* r [rd = float32_of_unsigned_int(r1)] *)
+        -> [| 1 ; 1; 1; 0 |]
+         
+(* Boolean tests *)
+      | Ocmp cmp | Osel (cmp, _) ->
+         (match cmp with
+          | Ccompf _                              (* r FP comparison *)
+            | Cnotcompf _                           (* r negation of an FP comparison *)
+            | Ccompfzero _                          (* r comparison with 0.0 *)
+            | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+            | Ccompfs _                             (* r FP comparison *)
+            | Cnotcompfs _                          (* r negation of an FP comparison *)
+            | Ccompfszero _                         (* r equal to 0.0 *)
+            | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+             [| 1; 1; 1; 0 |]
+          | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Odiv | Odivu -> [| 29 |]
+      | Odivfs -> [| 20 |]
+      | Odivl | Odivlu | Odivf -> [| 50 |]
+      | _ -> [| -1 |];;
+    
+    let resources_of_cond (cmp : condition) (nargs : int) =
+      (match cmp with
+       | Ccompf _                              (* r FP comparison *)
+         | Cnotcompf _                           (* r negation of an FP comparison *)
+         | Ccompfzero _                          (* r comparison with 0.0 *)
+         | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+         | Ccompfs _                             (* r FP comparison *)
+         | Cnotcompfs _                          (* r negation of an FP comparison *)
+         | Ccompfszero _                         (* r equal to 0.0 *)
+         | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+          [| 1; 1; 1; 0 |]
+       | _ -> [| 1; 1; 0; 0 |] );;
+    
+    let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+    let latency_of_call _ _ = 6;;
+    
+    let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+    
+    let resources_of_call _ _ = resource_bounds;;
+    let resources_of_builtin _ = resource_bounds;;
+  end;;
+
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "cortex-a53" | "cortex-a35" | "" ->
+     {
+       pipelined_resource_bounds = Cortex_A53.resource_bounds;
+       nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+       latency_of_op = Cortex_A53.latency_of_op;
+       resources_of_op = Cortex_A53.resources_of_op;
+       non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;
+       latency_of_load = Cortex_A53.latency_of_load;
+       resources_of_load = Cortex_A53.resources_of_load;
+       resources_of_store = Cortex_A53.resources_of_store;
+       resources_of_cond = Cortex_A53.resources_of_cond;
+       latency_of_call = Cortex_A53.latency_of_call;
+       resources_of_call = Cortex_A53.resources_of_call;
+       resources_of_builtin = Cortex_A53.resources_of_builtin
+     }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/aarch64/OpWeightsAsm.ml b/aarch64/OpWeightsAsm.ml
new file mode 100644
index 00000000..44c6f16b
--- /dev/null
+++ b/aarch64/OpWeightsAsm.ml
@@ -0,0 +1,165 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open Asmblock
+
+(*type called_function = (Registers.reg, AST.ident) Datatypes.sum*)
+
+type instruction = PBasic of Asmblock.basic | PControl of Asmblock.control
+
+type opweights = {
+  pipelined_resource_bounds : int array;
+  nr_non_pipelined_units : int;
+  latency_of_op : instruction -> int -> int;
+  resources_of_op : instruction -> int -> int array;
+      (*non_pipelined_resources_of_op : Op.operation -> int -> int array;*)
+      (*latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;*)
+      (*resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;*)
+      (*resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;*)
+      (*resources_of_cond : Op.condition -> int -> int array;*)
+      (*latency_of_call : AST.signature -> called_function -> int;*)
+      (*resources_of_call : AST.signature -> called_function -> int array;*)
+      (*resources_of_builtin : AST.external_function -> int array*)
+}
+
+module Cortex_A53 = struct
+  let resource_bounds = [| 2; 2; 1; 1 |]
+
+  (* instr ; ALU ; MAC; LSU *)
+  let nr_non_pipelined_units = 1
+
+  let latency_of_op (i : instruction) (nargs : int) =
+    match i with
+    | PBasic (PArith (PArithP (_, _))) -> 1
+    | PBasic (PArith (PArithPP (i', _, _))) -> (
+        match i' with
+        | Psbfiz (_, _, _) | Psbfx (_, _, _) | Pubfiz (_, _, _) | Pubfx (_, _, _)
+          ->
+            2
+        | Pfcvtds | Pfcvtsd
+        | Pfcvtzs (_, _)
+        | Pfcvtzu (_, _)
+        | Pfabs _ | Pfneg _
+        | Pscvtf (_, _)
+        | Pucvtf (_, _) ->
+            6
+        | _ -> 1)
+    | PBasic (PArith (PArithPPP (i', _, _, _))) -> (
+        match i' with
+        | Pasrv _ | Plslv _ | Plsrv _ | Prorv _ | Paddext _ | Psubext _ -> 2
+        | Psmulh | Pumulh -> 4
+        | Pfdiv _ | Psdiv _ | Pudiv _ -> 50
+        | _ -> 6)
+    | PBasic (PArith (PArithRR0R (_, _, _, _))) -> 2
+    | PBasic (PArith (PArithRR0 (_, _, _))) -> 1
+    | PBasic (PArith (PArithARRRR0 (_, _, _, _, _))) -> 4
+    | PBasic (PArith (PArithComparisonPP (_, _, _))) -> 6
+    | PBasic (PArith (PArithComparisonR0R (_, _, _))) -> 1
+    | PBasic (PArith (PArithComparisonP (i', _))) -> (
+        match i' with Pfcmp0 _ -> 6 | _ -> 1)
+    | PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> 6
+    | PBasic (PArith _) -> 1
+    | PBasic (PLoad _) -> 3
+    | PBasic (PStore _) -> 3
+    | PBasic (Pallocframe (_, _)) -> 3
+    | PBasic (Pfreeframe (_, _)) -> 1
+    | PBasic (Ploadsymbol (_, _)) -> 1
+    | PBasic (Pcvtsw2x (_, _)) -> 2
+    | PBasic (Pcvtuw2x (_, _)) -> 2
+    | PBasic (Pcvtx2w _) -> 1
+    | PBasic (Pnop) -> 0
+    | PControl _ -> 6
+
+  let resources_of_op (i : instruction) (nargs : int) =
+    match i with
+    | PBasic (PArith (PArithP (i', _))) -> (
+        match i' with
+        | Pfmovimmd _ | Pfmovimms _ -> [| 1; 1; 0; 1 |]
+        | _ -> [| 1; 1; 0; 0 |])
+    | PBasic (PArith (PArithPP (i', _, _))) -> (
+        match i' with
+        | Pfcvtds | Pfcvtsd
+        | Pfcvtzs (_, _)
+        | Pfcvtzu (_, _)
+        | Pfabs _ | Pfneg _
+        | Pscvtf (_, _)
+        | Pucvtf (_, _) ->
+            [| 1 ; 1; 1; 0 |]
+        | _ -> [| 1; 1; 0; 0 |])
+    | PBasic (PArith (PArithPPP (i', _, _, _))) -> (
+        match i' with
+        | Pasrv _ | Plslv _ | Plsrv _ | Prorv _ | Paddext _ | Psubext _ -> [| 1; 1; 0; 0 |]
+        | Pfdiv _ | Psdiv _ | Pudiv _ -> [| 1; 0; 0; 0 |]
+        | _ -> [| 1; 1; 1; 0 |])
+    | PBasic (PArith (PArithRR0R (_, _, _, _))) -> [| 1; 1; 0; 0 |]
+    | PBasic (PArith (PArithRR0 (_, _, _))) -> [| 1; 1; 0; 0 |]
+    | PBasic (PArith (PArithARRRR0 (_, _, _, _, _))) -> [| 1; 1; 1; 0 |]
+    | PBasic (PArith (PArithComparisonPP (_, _, _))) -> [| 1; 1; 1; 0 |]
+    | PBasic (PArith (PArithComparisonR0R (_, _, _))) -> [| 1; 1; 0; 0 |]
+    | PBasic (PArith (PArithComparisonP (i', _))) -> (
+      match i' with Pfcmp0 _ -> [| 1; 1; 1; 0 |] | _ -> [| 1; 1; 0; 0 |])
+    | PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> [| 1; 1; 1; 0 |]
+    | PBasic (PArith _) -> [| 1; 1; 0; 0 |]
+    | PBasic (PLoad _) -> [| 1; 0; 0; 1 |]
+    | PBasic (PStore _) -> [| 1; 0; 0; 1 |]
+    | PBasic (Pallocframe (_, _)) -> [| 1; 0; 0; 1 |]
+    | PBasic (Pfreeframe (_, _)) -> [| 1; 1; 0; 0 |]
+    | PBasic (Ploadsymbol (_, _)) -> [| 1; 1; 0; 0 |]
+    | PBasic (Pcvtsw2x (_, _)) -> [| 1; 1; 0; 0 |]
+    | PBasic (Pcvtuw2x (_, _)) -> [| 1; 1; 0; 0 |]
+    | PBasic (Pcvtx2w _) -> [| 1; 1; 0; 0 |]
+    | PBasic (Pnop) -> [| 0; 0; 0; 0 |]
+    | PControl _ -> resource_bounds
+
+  (*let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+      match op with
+      | Odiv | Odivu -> [| 29 |]
+      | Odivfs -> [| 20 |]
+      | Odivl | Odivlu | Odivf -> [| 50 |]
+      | _ -> [| -1 |];;
+
+    let resources_of_cond (cmp : condition) (nargs : int) =
+      (match cmp with
+       | Ccompf _                              (* r FP comparison *)
+         | Cnotcompf _                           (* r negation of an FP comparison *)
+         | Ccompfzero _                          (* r comparison with 0.0 *)
+         | Cnotcompfzero _                       (* r negation of comparison with 0.0 *)
+         | Ccompfs _                             (* r FP comparison *)
+         | Cnotcompfs _                          (* r negation of an FP comparison *)
+         | Ccompfszero _                         (* r equal to 0.0 *)
+         | Cnotcompfszero _                     (* r not equal to 0.0 *) ->
+          [| 1; 1; 1; 0 |]
+       | _ -> [| 1; 1; 0; 0 |] )*)
+
+end
+
+let get_opweights () : opweights =
+  (*match !Clflags.option_mtune with*)
+  (*| "cortex-a53" | "cortex-a35" | "" ->*)
+  {
+    pipelined_resource_bounds = Cortex_A53.resource_bounds;
+    nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+    latency_of_op = Cortex_A53.latency_of_op;
+    resources_of_op =
+      Cortex_A53.resources_of_op
+      (*non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;*)
+      (*latency_of_load = Cortex_A53.latency_of_load;*)
+      (*resources_of_load = Cortex_A53.resources_of_load;*)
+      (*resources_of_store = Cortex_A53.resources_of_store;*)
+      (*resources_of_cond = Cortex_A53.resources_of_cond;*)
+      (*latency_of_call = Cortex_A53.latency_of_call;*)
+      (*resources_of_call = Cortex_A53.resources_of_call;*)
+      (*resources_of_builtin = Cortex_A53.resources_of_builtin*);
+  }
+
+(*| xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;*)
diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml
new file mode 100644
index 00000000..18f41fed
--- /dev/null
+++ b/aarch64/PeepholeOracle.ml
@@ -0,0 +1,623 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open Camlcoq
+open Asmblock
+open Asm
+open Int64
+open Printf
+
+(* If true, the oracle will print a msg for each applied peephole *)
+let debug = false
+
+(* Functions to verify the immediate offset range for ldp/stp *)
+let is_valid_immofs_32 z =
+  if z <= 252 && z >= -256 && z mod 4 = 0 then true else false
+
+let is_valid_immofs_64 z =
+  if z <= 504 && z >= -512 && z mod 8 = 0 then true else false
+
+(* Functions to check if a ldp/stp replacement is valid according to args *)
+let is_valid_ldr32 rd1 rd2 b1 b2 n1 n2 =
+  let z1 = to_int (camlint64_of_coqint n1) in
+  let z2 = to_int (camlint64_of_coqint n2) in
+  if
+    (not (dreg_eq rd1 rd2))
+    && iregsp_eq b1 b2
+    && (not (dreg_eq rd1 (IR b2)))
+    && (z2 = z1 + 4 || z2 = z1 - 4)
+    && is_valid_immofs_32 z1
+  then true
+  else false
+
+let is_valid_ldr64 rd1 rd2 b1 b2 n1 n2 =
+  let z1 = to_int (camlint64_of_coqint n1) in
+  let z2 = to_int (camlint64_of_coqint n2) in
+  if
+    (not (dreg_eq rd1 rd2))
+    && iregsp_eq b1 b2
+    && (not (dreg_eq rd1 (IR b2)))
+    && (z2 = z1 + 8 || z2 = z1 - 8)
+    && is_valid_immofs_64 z1
+  then true
+  else false
+
+let is_valid_str32 b1 b2 n1 n2 =
+  let z1 = to_int (camlint64_of_coqint n1) in
+  let z2 = to_int (camlint64_of_coqint n2) in
+  if iregsp_eq b1 b2 && z2 = z1 + 4 && is_valid_immofs_32 z1 then true
+  else false
+
+let is_valid_str64 b1 b2 n1 n2 =
+  let z1 = to_int (camlint64_of_coqint n1) in
+  let z2 = to_int (camlint64_of_coqint n2) in
+  if iregsp_eq b1 b2 && z2 = z1 + 8 && is_valid_immofs_64 z1 then true
+  else false
+
+let dreg_of_ireg r = IR (RR1 r)
+
+let dreg_of_freg r = FR r
+
+(* Return true if an intermediate
+ * affectation eliminates the potential
+ * candidate *)
+let verify_load_affect reg rd b rev =
+  let b = IR b in
+  if not rev then dreg_eq reg b else dreg_eq reg b || dreg_eq reg rd
+
+(* Return true if an intermediate
+ * read eliminates the potential
+ * candidate *)
+let verify_load_read reg rd b rev = dreg_eq reg rd
+
+(* Return true if an intermediate
+ * affectation eliminates the potential
+ * candidate *)
+let verify_store_affect reg rs b rev =
+  let b = IR b in
+  dreg_eq reg b || dreg_eq reg rs
+
+type ph_type = P32 | P32f | P64 | P64f
+
+type inst_type = Ldr of ph_type | Str of ph_type
+
+let ph_ty_to_string = function
+  | Ldr P32 -> "ldr32"
+  | Ldr P32f -> "ldr32f"
+  | Ldr P64 -> "ldr64"
+  | Ldr P64f -> "ldr64f"
+  | Str P32 -> "str32"
+  | Str P32f -> "str32f"
+  | Str P64 -> "str64"
+  | Str P64f -> "str64f"
+
+let print_ph_ty chan v = output_string chan (ph_ty_to_string v)
+
+let symb_mem = Hashtbl.create 9
+
+(* Affect a symbolic memory list of potential replacements
+ * for a given write in reg *)
+let rec affect_symb_mem reg insta pot_rep stype rev =
+  match pot_rep with
+  | [] -> []
+  | h0 :: t0 -> (
+      match (insta.(h0), stype) with
+      | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ ->
+          if verify_load_affect reg rd b rev then
+            affect_symb_mem reg insta t0 stype rev
+          else h0 :: affect_symb_mem reg insta t0 stype rev
+      | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ ->
+          if verify_store_affect reg rs b rev then
+            affect_symb_mem reg insta t0 stype rev
+          else h0 :: affect_symb_mem reg insta t0 stype rev
+      | _, _ ->
+          failwith "affect_symb_mem: Found an inconsistent inst in pot_rep")
+
+(* Affect a symbolic memory list of potential replacements
+ * for a given read in reg *)
+let rec read_symb_mem reg insta pot_rep stype rev =
+  match pot_rep with
+  | [] -> []
+  | h0 :: t0 -> (
+      match (insta.(h0), stype) with
+      | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ ->
+          if verify_load_read reg rd b rev then
+            read_symb_mem reg insta t0 stype rev
+          else h0 :: read_symb_mem reg insta t0 stype rev
+      | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ ->
+          h0 :: read_symb_mem reg insta t0 stype rev
+      | _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep")
+
+(* Update a symbolic memory list of potential replacements
+ * for any addressing *)
+let update_pot_rep_addressing a insta pot_rep stype rev =
+  match a with
+  | ADimm (base, _) ->
+      pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+  | ADreg (base, r) ->
+      pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+      pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+  | ADlsl (base, r, _) ->
+      pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+      pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+  | ADsxt (base, r, _) ->
+      pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+      pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+  | ADuxt (base, r, _) ->
+      pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+      pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+  | ADadr (base, _, _) ->
+      pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+  | ADpostincr (base, _) ->
+      pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+
+(* Update a symbolic memory list of potential replacements
+ * for any basic instruction *)
+let update_pot_rep_basic inst insta stype rev =
+  let pot_rep = Hashtbl.find symb_mem stype in
+  (match inst with
+  | PArith i -> (
+      match i with
+      | PArithP (_, rd) ->
+          pot_rep := affect_symb_mem rd insta !pot_rep stype rev
+      | PArithPP (_, rd, rs) ->
+          pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+          pot_rep := read_symb_mem rs insta !pot_rep stype rev
+      | PArithPPP (_, rd, rs1, rs2) ->
+          pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+          pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
+          pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
+      | PArithRR0R (_, rd, rs1, rs2) ->
+          pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+          (match rs1 with
+          | RR0 rs1 ->
+              pot_rep :=
+                read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+          | _ -> ());
+          pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev
+      | PArithRR0 (_, rd, rs) -> (
+          pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+          match rs with
+          | RR0 rs1 ->
+              pot_rep :=
+                read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+          | _ -> ())
+      | PArithARRRR0 (_, rd, rs1, rs2, rs3) -> (
+          pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+          pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev;
+          pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev;
+          match rs3 with
+          | RR0 rs1 ->
+              pot_rep :=
+                read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+          | _ -> ())
+      | PArithComparisonPP (_, rs1, rs2) ->
+          pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
+          pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
+      | PArithComparisonR0R (_, rs1, rs2) ->
+          (match rs1 with
+          | RR0 rs1 ->
+              pot_rep :=
+                read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+          | _ -> ());
+          pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev
+      | PArithComparisonP (_, rs1) ->
+          pot_rep := read_symb_mem rs1 insta !pot_rep stype rev
+      | Pcset (rd, _) ->
+          pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
+      | Pfmovi (_, rd, rs) -> (
+          pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev;
+          match rs with
+          | RR0 rs ->
+              pot_rep :=
+                read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+          | _ -> ())
+      | Pcsel (rd, rs1, rs2, _) ->
+          pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+          pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
+          pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
+      | Pfnmul (_, rd, rs1, rs2) ->
+          pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev;
+          pot_rep := read_symb_mem (dreg_of_freg rs1) insta !pot_rep stype rev;
+          pot_rep := read_symb_mem (dreg_of_freg rs2) insta !pot_rep stype rev)
+  | PLoad i -> (
+      (* Here, we consider a different behavior for load and store potential candidates:
+         * a load does not obviously cancel the ldp candidates, but it does for any stp candidate. *)
+      match stype with
+      | Ldr _ -> (
+          match i with
+          | PLd_rd_a (_, rd, a) ->
+              pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+              update_pot_rep_addressing a insta pot_rep stype rev
+          | Pldp (_, rd1, rd2, _, _, a) ->
+              pot_rep := affect_symb_mem rd1 insta !pot_rep stype rev;
+              pot_rep := affect_symb_mem rd2 insta !pot_rep stype rev;
+              update_pot_rep_addressing a insta pot_rep stype rev)
+      | _ -> pot_rep := [])
+  | PStore _ -> (
+      (* Here, we consider that a store cancel all ldp candidates, but it is far more complicated for stp ones :
+         * if we cancel stp candidates here, we would prevent ourselves to apply the non-consec store peephole.
+         * To solve this issue, the store candidates cleaning is managed directly in the peephole function below. *)
+      match stype with Ldr _ -> pot_rep := [] | _ -> ())
+  | Pallocframe (_, _) -> pot_rep := []
+  | Pfreeframe (_, _) -> pot_rep := []
+  | Ploadsymbol (rd, _) ->
+      pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
+  | Pcvtsw2x (rd, rs) ->
+      pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+      pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+  | Pcvtuw2x (rd, rs) ->
+      pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+      pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+  | Pcvtx2w rd ->
+      pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+      pot_rep := read_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
+  | Pnop -> ());
+  Hashtbl.replace symb_mem stype pot_rep
+
+(* This is useful to manage the case were the immofs
+ * of the first ldr/str is greater than the second one *)
+let min_is_rev n1 n2 =
+  let z1 = to_int (camlint64_of_coqint n1) in
+  let z2 = to_int (camlint64_of_coqint n2) in
+  if z1 < z2 then true else false
+
+(* Below functions were added to merge pattern matching cases in peephole,
+ * thanks to this, we can make the chunk difference (int/any) compatible. *)
+let trans_ldi (ldi : load_rd_a) : load_rd1_rd2_a =
+  match ldi with
+  | Pldrw | Pldrw_a -> Pldpw
+  | Pldrx | Pldrx_a -> Pldpx
+  | Pldrs -> Pldps
+  | Pldrd | Pldrd_a -> Pldpd
+  | _ -> failwith "trans_ldi: Found a non compatible load to translate"
+
+let trans_sti (sti : store_rs_a) : store_rs1_rs2_a =
+  match sti with
+  | Pstrw | Pstrw_a -> Pstpw
+  | Pstrx | Pstrx_a -> Pstpx
+  | Pstrs -> Pstps
+  | Pstrd | Pstrd_a -> Pstpd
+  | _ -> failwith "trans_sti: Found a non compatible store to translate"
+
+let is_compat_load (ldi : load_rd_a) =
+  match ldi with
+  | Pldrw | Pldrw_a | Pldrx | Pldrx_a | Pldrs | Pldrd | Pldrd_a -> true
+  | _ -> false
+
+let are_compat_load (ldi1 : load_rd_a) (ldi2 : load_rd_a) =
+  match ldi1 with
+  | Pldrw | Pldrw_a -> ( match ldi2 with Pldrw | Pldrw_a -> true | _ -> false)
+  | Pldrx | Pldrx_a -> ( match ldi2 with Pldrx | Pldrx_a -> true | _ -> false)
+  | Pldrs -> ( match ldi2 with Pldrs -> true | _ -> false)
+  | Pldrd | Pldrd_a -> ( match ldi2 with Pldrd | Pldrd_a -> true | _ -> false)
+  | _ -> false
+
+let is_compat_store (sti : store_rs_a) =
+  match sti with
+  | Pstrw | Pstrw_a | Pstrx | Pstrx_a | Pstrs | Pstrd | Pstrd_a -> true
+  | _ -> false
+
+let are_compat_store (sti1 : store_rs_a) (sti2 : store_rs_a) =
+  match sti1 with
+  | Pstrw | Pstrw_a -> ( match sti2 with Pstrw | Pstrw_a -> true | _ -> false)
+  | Pstrx | Pstrx_a -> ( match sti2 with Pstrx | Pstrx_a -> true | _ -> false)
+  | Pstrs -> ( match sti2 with Pstrs -> true | _ -> false)
+  | Pstrd | Pstrd_a -> ( match sti2 with Pstrd | Pstrd_a -> true | _ -> false)
+  | _ -> false
+
+let get_load_pht (ldi : load_rd_a) =
+  match ldi with
+  | Pldrw | Pldrw_a -> Ldr P32
+  | Pldrs -> Ldr P32f
+  | Pldrx | Pldrx_a -> Ldr P64
+  | Pldrd | Pldrd_a -> Ldr P64f
+  | _ -> failwith "get_load_string: Found a non compatible load to translate"
+
+let get_store_pht (sti : store_rs_a) =
+  match sti with
+  | Pstrw | Pstrw_a -> Str P32
+  | Pstrs -> Str P32f
+  | Pstrx | Pstrx_a -> Str P64
+  | Pstrd | Pstrd_a -> Str P64f
+  | _ -> failwith "get_store_string: Found a non compatible store to translate"
+
+let is_valid_ldr rd1 rd2 b1 b2 n1 n2 stype =
+  match stype with
+  | Ldr P32 | Ldr P32f -> is_valid_ldr32 rd1 rd2 b1 b2 n1 n2
+  | _ -> is_valid_ldr64 rd1 rd2 b1 b2 n1 n2
+
+let is_valid_str b1 b2 n1 n2 stype =
+  match stype with
+  | Str P32 | Str P32f -> is_valid_str32 b1 b2 n1 n2
+  | _ -> is_valid_str64 b1 b2 n1 n2
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory *)
+let rec search_compat_rep r2 b2 n2 insta pot_rep stype =
+  match pot_rep with
+  | [] -> None
+  | h0 :: t0 -> (
+      match insta.(h0) with
+      | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+          if is_valid_ldr rd1 r2 b1 b2 n1 n2 stype then
+            Some (h0, chunk_load ld1, rd1, b1, n1)
+          else search_compat_rep r2 b2 n2 insta t0 stype
+      | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+          if is_valid_str b1 b2 n1 n2 stype then
+            Some (h0, chunk_store st1, rs1, b1, n1)
+          else search_compat_rep r2 b2 n2 insta t0 stype
+      | _ -> failwith "search_compat_rep: Found an inconsistent inst in pot_rep"
+      )
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory (when iterating in the reversed list) *)
+let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype =
+  match pot_rep with
+  | [] -> None
+  | h0 :: t0 -> (
+      match insta.(h0) with
+      | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+          if is_valid_ldr r2 rd1 b2 b1 n2 n1 stype then
+            Some (h0, chunk_load ld1, rd1, b1, n1)
+          else search_compat_rep_inv r2 b2 n2 insta t0 stype
+      | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+          if is_valid_str b2 b1 n2 n1 stype then
+            Some (h0, chunk_store st1, rs1, b1, n1)
+          else search_compat_rep_inv r2 b2 n2 insta t0 stype
+      | _ ->
+          failwith
+            "search_compat_rep_ldst_inv: Found an inconsistent inst in pot_rep")
+
+let init_symb_mem () =
+  Hashtbl.clear symb_mem;
+  Hashtbl.add symb_mem (Ldr P32) (ref []);
+  Hashtbl.add symb_mem (Ldr P64) (ref []);
+  Hashtbl.add symb_mem (Ldr P32f) (ref []);
+  Hashtbl.add symb_mem (Ldr P64f) (ref []);
+  Hashtbl.add symb_mem (Str P32) (ref []);
+  Hashtbl.add symb_mem (Str P64) (ref []);
+  Hashtbl.add symb_mem (Str P32f) (ref []);
+  Hashtbl.add symb_mem (Str P64f) (ref [])
+
+let reset_str_symb_mem () =
+  Hashtbl.replace symb_mem (Str P32) (ref []);
+  Hashtbl.replace symb_mem (Str P64) (ref []);
+  Hashtbl.replace symb_mem (Str P32f) (ref []);
+  Hashtbl.replace symb_mem (Str P64f) (ref [])
+
+(* Main peephole function in backward style *)
+let pair_rep_inv insta =
+  (* Each list below is a symbolic mem representation
+   * for one type of inst. Lists contains integers which
+   * are the indices of insts in the main array "insta". *)
+  init_symb_mem ();
+  for i = Array.length insta - 1 downto 1 do
+    let h0 = insta.(i) in
+    let h1 = insta.(i - 1) in
+    (* Here we need to update every symbolic memory according to the matched inst *)
+    update_pot_rep_basic h0 insta (Ldr P32) true;
+    update_pot_rep_basic h0 insta (Ldr P64) true;
+    update_pot_rep_basic h0 insta (Ldr P32f) true;
+    update_pot_rep_basic h0 insta (Ldr P64f) true;
+    update_pot_rep_basic h0 insta (Str P32) true;
+    update_pot_rep_basic h0 insta (Str P64) true;
+    update_pot_rep_basic h0 insta (Str P32f) true;
+    update_pot_rep_basic h0 insta (Str P64f) true;
+    match (h0, h1) with
+    (* Non-consecutive ldr *)
+    | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ ->
+        if is_compat_load ldi then (
+          (* Search a previous compatible load *)
+          let ld_t = get_load_pht ldi in
+          let pot_rep = Hashtbl.find symb_mem ld_t in
+          (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep ld_t with
+          (* If we can't find a candidate, add the current load as a potential future one *)
+          | None -> pot_rep := i :: !pot_rep
+          (* Else, perform the peephole *)
+          | Some (rep, c, r, b, n) ->
+              (* The two lines below are used to filter the elected candidate *)
+              let filt x = x != rep in
+              pot_rep := List.filter filt !pot_rep;
+              insta.(rep) <- Pnop;
+              if min_is_rev n n1 then (
+                if debug then
+                  eprintf "LDP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
+                insta.(i) <-
+                  PLoad
+                    (Pldp
+                       (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
+              else (
+                if debug then
+                  eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
+                insta.(i) <-
+                  PLoad
+                    (Pldp
+                       (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1)))));
+          Hashtbl.replace symb_mem ld_t pot_rep)
+    (* Non-consecutive str *)
+    | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ ->
+        if is_compat_store sti then (
+          (* Search a previous compatible store *)
+          let st_t = get_store_pht sti in
+          let pot_rep = Hashtbl.find symb_mem st_t in
+          (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep st_t with
+          (* If we can't find a candidate, clean and add the current store as a potential future one *)
+          | None ->
+              reset_str_symb_mem ();
+              pot_rep := [ i ]
+          (* Else, perform the peephole *)
+          | Some (rep, c, r, b, n) ->
+              (* The two lines below are used to filter the elected candidate *)
+              let filt x = x != rep in
+              pot_rep := List.filter filt !pot_rep;
+              insta.(rep) <- Pnop;
+              if debug then
+                eprintf "STP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
+              insta.(i) <-
+                PStore
+                  (Pstp
+                     (trans_sti sti, rd1, r, chunk_store sti, c, ADimm (b, n1))));
+          Hashtbl.replace symb_mem st_t pot_rep
+          (* Any other inst *))
+    | i, _ -> (
+        (* Clear list of candidates if there is a non supported store *)
+        match i with PStore _ -> reset_str_symb_mem () | _ -> ())
+  done
+
+(* Main peephole function in forward style *)
+let pair_rep insta =
+  (* Each list below is a symbolic mem representation
+   * for one type of inst. Lists contains integers which
+   * are the indices of insts in the main array "insta". *)
+  init_symb_mem ();
+  for i = 0 to Array.length insta - 2 do
+    let h0 = insta.(i) in
+    let h1 = insta.(i + 1) in
+    (* Here we need to update every symbolic memory according to the matched inst *)
+    update_pot_rep_basic h0 insta (Ldr P32) false;
+    update_pot_rep_basic h0 insta (Ldr P64) false;
+    update_pot_rep_basic h0 insta (Ldr P32f) false;
+    update_pot_rep_basic h0 insta (Ldr P64f) false;
+    update_pot_rep_basic h0 insta (Str P32) false;
+    update_pot_rep_basic h0 insta (Str P64) false;
+    update_pot_rep_basic h0 insta (Str P32f) false;
+    update_pot_rep_basic h0 insta (Str P64f) false;
+    match (h0, h1) with
+    (* Consecutive ldr *)
+    | ( PLoad (PLd_rd_a (ldi1, rd1, ADimm (b1, n1))),
+        PLoad (PLd_rd_a (ldi2, rd2, ADimm (b2, n2))) ) ->
+        if are_compat_load ldi1 ldi2 then
+          let ld_t = get_load_pht ldi1 in
+          if is_valid_ldr rd1 rd2 b1 b2 n1 n2 ld_t then (
+            if min_is_rev n1 n2 then (
+              if debug then
+                eprintf "LDP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
+              insta.(i) <-
+                PLoad
+                  (Pldp
+                     ( trans_ldi ldi1,
+                       rd1,
+                       rd2,
+                       chunk_load ldi1,
+                       chunk_load ldi2,
+                       ADimm (b1, n1) )))
+            else (
+              if debug then
+                eprintf "LDP_CONSEC_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
+              insta.(i) <-
+                PLoad
+                  (Pldp
+                     ( trans_ldi ldi1,
+                       rd2,
+                       rd1,
+                       chunk_load ldi2,
+                       chunk_load ldi1,
+                       ADimm (b1, n2) )));
+            insta.(i + 1) <- Pnop)
+    (* Non-consecutive ldr *)
+    | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ ->
+        if is_compat_load ldi then (
+          (* Search a previous compatible load *)
+          let ld_t = get_load_pht ldi in
+          let pot_rep = Hashtbl.find symb_mem ld_t in
+          (match search_compat_rep rd1 b1 n1 insta !pot_rep ld_t with
+          (* If we can't find a candidate, add the current load as a potential future one *)
+          | None -> pot_rep := i :: !pot_rep
+          (* Else, perform the peephole *)
+          | Some (rep, c, r, b, n) ->
+              (* The two lines below are used to filter the elected candidate *)
+              let filt x = x != rep in
+              pot_rep := List.filter filt !pot_rep;
+              insta.(rep) <- Pnop;
+              if min_is_rev n n1 then (
+                if debug then
+                  eprintf "LDP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
+                insta.(i) <-
+                  PLoad
+                    (Pldp
+                       (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
+              else (
+                if debug then
+                  eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
+                insta.(i) <-
+                  PLoad
+                    (Pldp
+                       (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1)))));
+          Hashtbl.replace symb_mem ld_t pot_rep)
+    (* Consecutive str *)
+    | ( PStore (PSt_rs_a (sti1, rd1, ADimm (b1, n1))),
+        PStore (PSt_rs_a (sti2, rd2, ADimm (b2, n2))) ) ->
+        (* Regardless of whether we can perform the peephole or not,
+         * we have to clean the potential candidates for stp now as we are
+         * looking at two new store instructions. *)
+        reset_str_symb_mem ();
+        if are_compat_store sti1 sti2 then
+          let st_t = get_store_pht sti1 in
+          if is_valid_str b1 b2 n1 n2 st_t then (
+            if debug then
+              eprintf "STP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
+            insta.(i) <-
+              PStore
+                (Pstp
+                   ( trans_sti sti1,
+                     rd1,
+                     rd2,
+                     chunk_store sti1,
+                     chunk_store sti2,
+                     ADimm (b1, n1) ));
+            insta.(i + 1) <- Pnop)
+    (* Non-consecutive str *)
+    | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ ->
+        if is_compat_store sti then (
+          (* Search a previous compatible store *)
+          let st_t = get_store_pht sti in
+          let pot_rep = Hashtbl.find symb_mem st_t in
+          (match search_compat_rep rd1 b1 n1 insta !pot_rep st_t with
+          (* If we can't find a candidate, clean and add the current store as a potential future one *)
+          | None ->
+              reset_str_symb_mem ();
+              pot_rep := [ i ]
+          (* Else, perform the peephole *)
+          | Some (rep, c, r, b, n) ->
+              (* The two lines below are used to filter the elected candidate *)
+              let filt x = x != rep in
+              pot_rep := List.filter filt !pot_rep;
+              insta.(rep) <- Pnop;
+              if debug then
+                eprintf "STP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
+              insta.(i) <-
+                PStore
+                  (Pstp (trans_sti sti, r, rd1, c, chunk_store sti, ADimm (b, n))));
+          Hashtbl.replace symb_mem st_t pot_rep)
+    (* Any other inst *)
+    | i, _ -> (
+        (* Clear list of candidates if there is a non supported store *)
+        match i with PStore _ -> reset_str_symb_mem () | _ -> ())
+  done
+
+(* Calling peephole if flag is set *)
+let optimize_bdy (bdy : basic list) : basic list =
+  if !Clflags.option_fcoalesce_mem then (
+    let insta = Array.of_list bdy in
+    pair_rep insta;
+    pair_rep_inv insta;
+    Array.to_list insta)
+  else bdy
+
+(* Called peephole function from Coq *)
+let peephole_opt bdy =
+  Timing.time_coq
+    [
+      'P'; 'e'; 'e'; 'p'; 'h'; 'o'; 'l'; 'e'; ' '; 'o'; 'r'; 'a'; 'c'; 'l'; 'e';
+    ]
+    optimize_bdy bdy
diff --git a/aarch64/PostpassScheduling.v b/aarch64/PostpassScheduling.v
new file mode 100644
index 00000000..f826632b
--- /dev/null
+++ b/aarch64/PostpassScheduling.v
@@ -0,0 +1,146 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(** Implementation (and basic properties) of the verified postpass scheduler *)
+
+Require Import Coqlib Errors AST Integers.
+Require Import Asmblock Axioms Memory Globalenvs.
+Require Import Asmblockdeps Asmblockprops.
+Require Import IterList.
+
+Local Open Scope error_monad_scope.
+
+(** * Oracle taking as input a basic block,
+    returns a scheduled basic block *)
+Axiom schedule: bblock -> (list basic) * option control.
+
+Axiom peephole_opt: (list basic) -> list basic.
+
+Extract Constant schedule => "PostpassSchedulingOracle.schedule".
+
+Extract Constant peephole_opt => "PeepholeOracle.peephole_opt".
+
+Section verify_schedule.
+
+Variable lk: aarch64_linker.
+
+Definition verify_schedule (bb bb' : bblock) : res unit :=
+  match bblock_simub bb bb' with
+  | true => OK tt
+  | false => Error (msg "PostpassScheduling.verify_schedule")
+  end.
+
+Definition verify_size bb bb' := if (Z.eqb (size bb) (size bb')) then OK tt else Error (msg "PostpassScheduling:verify_size: wrong size").
+
+Lemma verify_size_size:
+  forall bb bb', verify_size bb bb' = OK tt -> size bb = size bb'.
+Proof.
+  intros. unfold verify_size in H. destruct (size bb =? size bb') eqn:SIZE; try discriminate.
+  apply Z.eqb_eq. assumption.
+Qed.
+
+Program Definition make_bblock_from_basics lb :=
+  match lb with
+  | nil => Error (msg "PostpassScheduling.make_bblock_from_basics")
+  | b :: lb => OK {| header := nil; body := b::lb; exit := None |}
+  end.
+
+Program Definition schedule_to_bblock (lb: list basic) (oc: option control) : res bblock :=
+  match oc with
+  | None => make_bblock_from_basics lb
+  | Some c => OK {| header := nil; body := lb; exit := Some c |}
+  end.
+Next Obligation.
+  unfold Is_true, non_empty_bblockb.
+  unfold non_empty_exit. rewrite orb_true_r. reflexivity.
+Qed.
+
+Definition do_schedule (bb: bblock) : res bblock :=
+  if (Z.eqb (size bb) 1) then OK (bb) 
+  else match (schedule bb) with (lb, oc) => schedule_to_bblock lb oc end.
+
+(*Definition do_peephole (bb: bblock) : bblock :=*)
+  (*let res := Peephole.optimize_bblock bb in*)
+  (*if (size res =? size bb) then res else bb.*)
+
+Program Definition do_peephole (bb : bblock) :=
+  let optimized := peephole_opt (body bb) in
+  let wf_ok := non_empty_bblockb optimized (exit bb) in
+  {| header := header bb;
+     body := if wf_ok then optimized else (body bb);
+     exit := exit bb |}.
+Next Obligation.
+  destruct (non_empty_bblockb (peephole_opt (body bb))) eqn:Rwf.
+  - rewrite Rwf. cbn. trivial.
+  - exact (correct bb).
+Qed.
+
+Definition verified_schedule (bb : bblock) : res bblock :=
+  let nhbb  := no_header bb in
+  let phbb := do_peephole nhbb in
+  do schbb <- do_schedule phbb;
+  let bb' := stick_header (header bb) schbb in
+  do sizecheck <- verify_size bb bb';
+  do schedcheck <- verify_schedule bb bb';
+  OK (bb').
+
+Lemma verified_schedule_size:
+  forall bb bb', verified_schedule bb = OK bb' -> size bb = size bb'.
+Proof.
+  intros. unfold verified_schedule in H.
+  monadInv H.
+  unfold verify_size in EQ1.
+  destruct (size _ =? size _) eqn:ESIZE_H in EQ1; try discriminate.
+  rewrite Z.eqb_eq in ESIZE_H; rewrite ESIZE_H; reflexivity.
+Qed.
+
+Theorem verified_schedule_correct:
+  forall ge f bb bb',
+  verified_schedule bb = OK bb' ->
+  bblock_simu lk ge f bb bb'.
+Proof.
+  intros.
+  monadInv H.
+  eapply bblock_simub_correct.
+  unfold verify_schedule in EQ0.
+  destruct (bblock_simub _ _) in *; try discriminate; auto.
+Qed.
+
+End verify_schedule.
+
+Fixpoint transf_blocks (lbb : list bblock) : res (list bblock) :=
+  match lbb with
+  | nil => OK nil
+  | bb :: lbb =>
+      do tlbb <- transf_blocks lbb;
+      do tbb <- verified_schedule bb;
+      OK (tbb :: tlbb)
+  end.
+
+Definition transl_function (f: function) : res function :=
+  do lb <- transf_blocks (fn_blocks f); 
+  OK (mkfunction (fn_sig f) lb).
+
+Definition transf_function (f: function) : res function :=
+  do tf <- transl_function f;
+  if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks))
+  then Error (msg "code size exceeded")
+  else OK tf.
+
+Definition transf_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef transf_function f.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
new file mode 100644
index 00000000..cde3e7a7
--- /dev/null
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -0,0 +1,671 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open Asmblock
+open OpWeightsAsm
+open InstructionScheduler
+
+let debug = false
+
+let stats = false
+
+(**
+ * Extracting infos from Asm instructions
+ *)
+
+type location = Reg of Asm.preg | Mem | IREG0_XZR
+
+type ab_inst_rec = {
+  inst : instruction;
+  write_locs : location list;
+  read_locs : location list;
+  is_control : bool;
+}
+
+(** Asm constructor to real instructions *)
+
+exception OpaqueInstruction
+
+let is_XZR = function IREG0_XZR -> true | _ -> false
+
+let reg_of_pc = Reg Asm.PC
+
+let reg_of_dreg r = Reg (Asm.DR r)
+
+let reg_of_ireg r = Reg (Asm.DR (Asm.IR (Asm.RR1 r)))
+
+let reg_of_iregsp r = Reg (Asm.DR (Asm.IR r))
+
+let reg_of_ireg0 r =
+  match r with Asm.RR0 ir -> reg_of_ireg ir | Asm.XZR -> IREG0_XZR
+
+let reg_of_freg r = Reg (Asm.DR (Asm.FR r))
+
+let reg_of_cr r = Reg (Asm.CR r)
+
+let regXSP = Reg (Asm.DR (Asm.IR Asm.XSP))
+
+let flags_wlocs =
+  [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CC; reg_of_cr Asm.CV ]
+
+let get_arith_p_wlocs = function
+  | Pfmovimms _ -> [ reg_of_ireg Asm.X16 ]
+  | Pfmovimmd _ -> [ reg_of_ireg Asm.X16 ]
+  | _ -> []
+
+let arith_p_rec i i' rd =
+  {
+    inst = i;
+    write_locs = [ rd ] @ get_arith_p_wlocs i';
+    read_locs = [];
+    is_control = false;
+  }
+
+let arith_pp_rec i rd rs =
+  { inst = i; write_locs = [ rd ]; read_locs = [ rs ]; is_control = false }
+
+let arith_ppp_rec i rd r1 r2 =
+  { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false }
+
+let arith_rr0r_rec i rd r1 r2 =
+  let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in
+  { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
+
+let arith_rr0_rec i rd r1 =
+  let rlocs = if is_XZR r1 then [] else [ r1 ] in
+  { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
+
+let arith_arrrr0_rec i rd r1 r2 r3 =
+  let rlocs = if is_XZR r3 then [ r1; r2 ] else [ r1; r2; r3 ] in
+  { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
+
+let arith_comparison_pp_rec i r1 r2 =
+  {
+    inst = i;
+    write_locs = flags_wlocs;
+    read_locs = [ r1; r2 ];
+    is_control = false;
+  }
+
+let arith_comparison_r0r_rec i r1 r2 =
+  let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in
+  { inst = i; write_locs = flags_wlocs; read_locs = rlocs; is_control = false }
+
+let arith_comparison_p_rec i r1 =
+  { inst = i; write_locs = flags_wlocs; read_locs = [ r1 ]; is_control = false }
+
+let get_eval_addressing_rlocs a =
+  match a with
+  | Asm.ADimm (base, _) -> [ reg_of_iregsp base ]
+  | Asm.ADreg (base, r) -> [ reg_of_iregsp base; reg_of_ireg r ]
+  | Asm.ADlsl (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ]
+  | Asm.ADsxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ]
+  | Asm.ADuxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ]
+  | Asm.ADadr (base, _, _) -> [ reg_of_iregsp base ]
+  | Asm.ADpostincr (base, _) -> [ reg_of_iregsp base ]
+
+let load_rd_a_rec ld rd a =
+  {
+    inst = ld;
+    write_locs = [ rd ];
+    read_locs = [ Mem ] @ get_eval_addressing_rlocs a;
+    is_control = false;
+  }
+
+let load_rd1_rd2_a_rec ld rd1 rd2 a =
+  {
+    inst = ld;
+    write_locs = [ rd1; rd2 ];
+    read_locs = [ Mem ] @ get_eval_addressing_rlocs a;
+    is_control = false;
+  }
+
+let load_rec ldi =
+  match ldi with
+  | PLd_rd_a (ld, rd, a) ->
+      load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a
+  | Pldp (ld, rd1, rd2, _, _, a) ->
+      load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd1)
+        (reg_of_dreg rd2) a
+
+let store_rs_a_rec st rs a =
+  {
+    inst = st;
+    write_locs = [ Mem ];
+    read_locs = [ rs; Mem ] @ get_eval_addressing_rlocs a;
+    is_control = false;
+  }
+
+let store_rs1_rs2_a_rec st rs1 rs2 a =
+  {
+    inst = st;
+    write_locs = [ Mem ];
+    read_locs = [ rs1; rs2; Mem ] @ get_eval_addressing_rlocs a;
+    is_control = false;
+  }
+
+let store_rec sti =
+  match sti with
+  | PSt_rs_a (st, rs, a) ->
+      store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a
+  | Pstp (st, rs1, rs2, _, _, a) ->
+      store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_dreg rs1)
+        (reg_of_dreg rs2) a
+
+let loadsymbol_rec i rd id =
+  { inst = i; write_locs = [ rd ]; read_locs = [ Mem ]; is_control = false }
+
+let cvtsw2x_rec i rd r1 =
+  { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false }
+
+let cvtuw2x_rec i rd r1 =
+  { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false }
+
+let cvtx2w_rec i rd =
+  { inst = i; write_locs = [ rd ]; read_locs = [ rd ]; is_control = false }
+
+let get_testcond_rlocs c =
+  match c with
+  | Asm.TCeq -> [ reg_of_cr Asm.CZ ]
+  | Asm.TCne -> [ reg_of_cr Asm.CZ ]
+  | Asm.TChs -> [ reg_of_cr Asm.CC ]
+  | Asm.TClo -> [ reg_of_cr Asm.CC ]
+  | Asm.TCmi -> [ reg_of_cr Asm.CN ]
+  | Asm.TCpl -> [ reg_of_cr Asm.CN ]
+  | Asm.TChi -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ]
+  | Asm.TCls -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ]
+  | Asm.TCge -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ]
+  | Asm.TClt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ]
+  | Asm.TCgt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ]
+  | Asm.TCle -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ]
+
+let cset_rec i rd c =
+  {
+    inst = i;
+    write_locs = [ rd ];
+    read_locs = get_testcond_rlocs c;
+    is_control = false;
+  }
+
+let csel_rec i rd r1 r2 c =
+  {
+    inst = i;
+    write_locs = [ rd ];
+    read_locs = [ r1; r2 ] @ get_testcond_rlocs c;
+    is_control = false;
+  }
+
+let fmovi_rec i fsz rd r1 =
+  let rlocs = if is_XZR r1 then [] else [ r1 ] in
+  { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
+
+let fnmul_rec i fsz rd r1 r2 =
+  { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false }
+
+let allocframe_rec i sz linkofs =
+  {
+    inst = i;
+    write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16; reg_of_ireg Asm.X29 ];
+    read_locs = [ regXSP; Mem ];
+    is_control = false;
+  }
+
+let freeframe_rec i sz linkofs =
+  {
+    inst = i;
+    write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16 ];
+    read_locs = [ regXSP; Mem ];
+    is_control = false;
+  }
+
+let nop_rec i =
+  { inst = i; write_locs = []; read_locs = []; is_control = false }
+
+let arith_rec i =
+  match i with
+  | PArithP (i', rd) -> arith_p_rec (PBasic (PArith i)) i' (reg_of_dreg rd)
+  | PArithPP (i', rd, rs) ->
+      arith_pp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg rs)
+  | PArithPPP (i', rd, r1, r2) ->
+      arith_ppp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1)
+        (reg_of_dreg r2)
+  | PArithRR0R (i', rd, r1, r2) ->
+      arith_rr0r_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1)
+        (reg_of_ireg r2)
+  | PArithRR0 (i', rd, r1) ->
+      arith_rr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1)
+  | PArithARRRR0 (i', rd, r1, r2, r3) ->
+      arith_arrrr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg r1)
+        (reg_of_ireg r2) (reg_of_ireg0 r3)
+  | PArithComparisonPP (i', r1, r2) ->
+      arith_comparison_pp_rec (PBasic (PArith i)) (reg_of_dreg r1)
+        (reg_of_dreg r2)
+  | PArithComparisonR0R (i', r1, r2) ->
+      arith_comparison_r0r_rec (PBasic (PArith i)) (reg_of_ireg0 r1)
+        (reg_of_ireg r2)
+  | PArithComparisonP (i', r1) ->
+      arith_comparison_p_rec (PBasic (PArith i)) (reg_of_dreg r1)
+  | Pcset (rd, c) -> cset_rec (PBasic (PArith i)) (reg_of_ireg rd) c
+  | Pfmovi (fsz, rd, r1) ->
+      fmovi_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_ireg0 r1)
+  | Pcsel (rd, r1, r2, c) ->
+      csel_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1)
+        (reg_of_dreg r2) c
+  | Pfnmul (fsz, rd, r1, r2) ->
+      fnmul_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_freg r1)
+        (reg_of_freg r2)
+
+let basic_rec i =
+  match i with
+  | PArith i' -> arith_rec i'
+  | PLoad ld -> load_rec ld
+  | PStore st -> store_rec st
+  | Pallocframe (sz, linkofs) -> allocframe_rec (PBasic i) sz linkofs
+  | Pfreeframe (sz, linkofs) -> freeframe_rec (PBasic i) sz linkofs
+  | Ploadsymbol (rd, id) -> loadsymbol_rec (PBasic i) (reg_of_ireg rd) id
+  | Pcvtsw2x (rd, r1) ->
+      cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1)
+  | Pcvtuw2x (rd, r1) ->
+      cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1)
+  | Pcvtx2w rd -> cvtx2w_rec (PBasic i) (reg_of_ireg rd)
+  | Pnop -> nop_rec (PBasic i)
+
+let builtin_rec i ef args res =
+  { inst = i; write_locs = [ Mem ]; read_locs = [ Mem ]; is_control = true }
+
+let ctl_flow_rec i =
+  match i with
+  | Pb lbl ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_pc ];
+        read_locs = [ reg_of_pc ];
+        is_control = true;
+      }
+  | Pbc (c, lbl) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_pc ];
+        read_locs = [ reg_of_pc ];
+        is_control = true;
+      }
+  | Pbl (id, sg) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ];
+        read_locs = [ reg_of_pc ];
+        is_control = true;
+      }
+  | Pbs (id, sg) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_pc ];
+        read_locs = [];
+        is_control = true;
+      }
+  | Pblr (r, sg) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ];
+        read_locs = [ reg_of_ireg r ];
+        is_control = true;
+      }
+  | Pbr (r, sg) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_pc ];
+        read_locs = [ reg_of_ireg r ];
+        is_control = true;
+      }
+  | Pret r ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_pc ];
+        read_locs = [ reg_of_ireg r ];
+        is_control = true;
+      }
+  | Pcbnz (sz, r, lbl) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_pc ];
+        read_locs = [ reg_of_ireg r; reg_of_pc ];
+        is_control = true;
+      }
+  | Pcbz (sz, r, lbl) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_pc ];
+        read_locs = [ reg_of_ireg r; reg_of_pc ];
+        is_control = true;
+      }
+  | Ptbnz (sz, r, n, lbl) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_pc ];
+        read_locs = [ reg_of_ireg r; reg_of_pc ];
+        is_control = true;
+      }
+  | Ptbz (sz, r, n, lbl) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_pc ];
+        read_locs = [ reg_of_ireg r; reg_of_pc ];
+        is_control = true;
+      }
+  | Pbtbl (r1, tbl) ->
+      {
+        inst = PControl (PCtlFlow i);
+        write_locs = [ reg_of_ireg Asm.X16; reg_of_ireg Asm.X17; reg_of_pc ];
+        read_locs = [ reg_of_ireg r1; reg_of_pc ];
+        is_control = true;
+      }
+
+let control_rec i =
+  match i with
+  | Pbuiltin (ef, args, res) -> builtin_rec (PControl i) ef args res
+  | PCtlFlow i' -> ctl_flow_rec i'
+
+let rec basic_recs body =
+  match body with [] -> [] | bi :: body -> basic_rec bi :: basic_recs body
+
+let exit_rec exit = match exit with None -> [] | Some ex -> [ control_rec ex ]
+
+let instruction_recs bb = basic_recs bb.body @ exit_rec bb.exit
+
+(**
+ * Providing informations relative to the real instructions
+ *)
+
+type inst_info = {
+  write_locs : location list;
+  read_locs : location list;
+  is_control : bool;
+  (* resources consumed by the instruction *)
+  usage : int array;
+  latency : int;
+}
+
+(** Abstraction providing all the necessary informations for solving the scheduling problem *)
+
+let rec_to_info (r : ab_inst_rec) : inst_info =
+  let opweights = OpWeightsAsm.get_opweights () in
+  let usage = opweights.resources_of_op r.inst 0
+  and latency = opweights.latency_of_op r.inst 0 in
+  {
+    write_locs = r.write_locs;
+    read_locs = r.read_locs;
+    usage;
+    latency;
+    is_control = r.is_control;
+  }
+
+let instruction_infos bb = List.map rec_to_info (instruction_recs bb)
+
+let instruction_usages bb =
+  let usages = List.map (fun info -> info.usage) (instruction_infos bb) in
+  Array.of_list usages
+
+(**
+ * Latency constraints building
+ *)
+
+module LocHash = Hashtbl
+
+(* Hash table : location => list of instruction ids *)
+
+let rec intlist n =
+  if n < 0 then failwith "intlist: n < 0"
+  else if n = 0 then []
+  else (n - 1) :: intlist (n - 1)
+
+let find_in_hash hashloc loc =
+  match LocHash.find_opt hashloc loc with Some idl -> idl | None -> []
+
+(* Returns a list of instruction ids *)
+let rec get_accesses hashloc (ll : location list) =
+  match ll with
+  | [] -> []
+  | loc :: llocs -> find_in_hash hashloc loc @ get_accesses hashloc llocs
+
+let compute_latency (ifrom : inst_info) = ifrom.latency
+
+let latency_constraints bb =
+  let written = LocHash.create 70
+  and read = LocHash.create 70
+  and count = ref 0
+  and constraints = ref []
+  and instr_infos = instruction_infos bb in
+  let step (i : inst_info) =
+    let raw = get_accesses written i.read_locs
+    and waw = get_accesses written i.write_locs
+    and war = get_accesses read i.write_locs in
+    List.iter
+      (fun i ->
+        constraints :=
+          {
+            instr_from = i;
+            instr_to = !count;
+            latency = compute_latency (List.nth instr_infos i);
+          }
+          :: !constraints)
+      raw;
+    List.iter
+      (fun i ->
+        constraints :=
+          {
+            instr_from = i;
+            instr_to = !count;
+            latency = compute_latency (List.nth instr_infos i);
+          }
+          :: !constraints)
+      waw;
+    List.iter
+      (fun i ->
+        constraints :=
+          { instr_from = i; instr_to = !count; latency = 0 } :: !constraints)
+      war;
+    if i.is_control then
+      List.iter
+        (fun n ->
+          constraints :=
+            { instr_from = n; instr_to = !count; latency = 0 } :: !constraints)
+        (intlist !count);
+    (* Updating "read" and "written" hashmaps *)
+    List.iter
+      (fun loc ->
+        LocHash.replace written loc [ !count ];
+        LocHash.replace read loc []
+        (* Clearing all the entries of "read" hashmap when a register is written *))
+      i.write_locs;
+    List.iter
+      (fun loc -> LocHash.replace read loc (!count :: find_in_hash read loc))
+      i.read_locs;
+    count := !count + 1
+  in
+  List.iter step instr_infos;
+  !constraints
+
+(**
+ * Using the InstructionScheduler
+ *)
+
+let opweights = OpWeightsAsm.get_opweights ()
+
+let build_problem bb =
+  {
+    max_latency = -1;
+    resource_bounds = opweights.pipelined_resource_bounds;
+    instruction_usages = instruction_usages bb;
+    latency_constraints = latency_constraints bb;
+  }
+
+let get_from_indexes indexes l = List.map (List.nth l) indexes
+
+(*let is_basic = function PBasic _ -> true | _ -> false*)
+let is_control = function PControl _ -> true | _ -> false
+
+let to_control = function
+  | PControl i -> i
+  | _ -> failwith "to_control: basic instruction found"
+
+let rec body_to_instrs bdy =
+  match bdy with [] -> [] | i :: l' -> PBasic i :: body_to_instrs l'
+
+let rec instrs_to_bdy instrs =
+  match instrs with
+  | [] -> []
+  | PBasic i :: l' -> i :: instrs_to_bdy l'
+  | PControl _ :: l' -> failwith "instrs_to_bdy: control instruction found"
+
+let repack li hd =
+  let last = List.nth li (List.length li - 1) in
+  if is_control last then
+    let cut_li =
+      Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1)
+    in
+    { header = hd; body = instrs_to_bdy cut_li; exit = Some (to_control last) }
+  else { header = hd; body = instrs_to_bdy li; exit = None }
+
+module TimeHash = Hashtbl
+
+(*Hash table : time => list of instruction ids *)
+
+(* Flattening the minpack result *)
+let hashtbl2flatarray h maxint =
+  let rec f i =
+    match TimeHash.find_opt h i with
+    | None -> if i > maxint then [] else f (i + 1)
+    | Some bund -> bund @ f (i + 1)
+  in
+  f 0
+
+let find_max l =
+  let rec f = function
+    | [] -> None
+    | e :: l -> (
+        match f l with
+        | None -> Some e
+        | Some m -> if e > m then Some e else Some m)
+  in
+  match f l with None -> raise Not_found | Some m -> m
+
+(* We still use the minpack algorithm even without bundles, but the result is then flattened *)
+(*(* [0, 2, 3, 1, 1, 2, 4, 5] -> [[0], [3, 4], [1, 5], [2], [6], [7]] *)*)
+let minpack_list (l : int list) =
+  let timehash = TimeHash.create (List.length l) in
+  let rec f i = function
+    | [] -> ()
+    | t :: l ->
+        (match TimeHash.find_opt timehash t with
+        | None -> TimeHash.add timehash t [ i ]
+        | Some bund -> TimeHash.replace timehash t (bund @ [ i ]));
+        f (i + 1) l
+  in
+  f 0 l;
+  hashtbl2flatarray timehash (find_max l)
+
+let bb_to_instrs bb =
+  body_to_instrs bb.body
+  @ match bb.exit with None -> [] | Some e -> [ PControl e ]
+
+let build_solution bb sol =
+  (* Remove last element - the total *)
+  let tmp = Array.to_list @@ Array.sub sol 0 (Array.length sol - 1) in
+  let pack = minpack_list tmp and instrs = bb_to_instrs bb in
+  repack (get_from_indexes pack instrs) bb.header
+
+let print_schedule sched =
+  print_string "[ ";
+  Array.iter (fun x -> Printf.printf "%d; " x) sched;
+  print_endline "]"
+
+let do_schedule bb =
+  let problem = build_problem bb in
+  if debug then print_problem stdout problem;
+  let solution = scheduler_by_name !Clflags.option_fpostpass_sched problem in
+  match solution with
+  | None -> failwith "Could not find a valid schedule"
+  | Some sol ->
+      if debug then print_schedule sol;
+      build_solution bb sol
+
+(**
+ * Dumb schedule if the above doesn't work
+ *)
+
+(* Pack result *)
+let pack_result (bb : bblock) = (bb.body, bb.exit)
+
+let smart_schedule bb =
+  let bb' =
+    try do_schedule bb with
+    | OpaqueInstruction ->
+        if debug then
+          Printf.eprintf "OpaqueInstruction raised, using identity scheduling\n";
+        bb (* Identity in case of failure *)
+    | e ->
+        let msg = Printexc.to_string e and stack = Printexc.get_backtrace () in
+        Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg
+          stack;
+        failwith "Invalid schedule"
+  in
+  pack_result bb'
+
+let bblock_schedule bb =
+  let identity_mode = not !Clflags.option_fpostpass in
+  if debug && not identity_mode then (
+    Printf.eprintf "###############################\n";
+    Printf.eprintf "SCHEDULING\n");
+  if stats then (
+    let oc =
+      open_out_gen [ Open_append; Open_creat ] 0o666 "oracle_stats.csv"
+    in
+    Printf.fprintf oc "%d\n" (Camlcoq.Z.to_int (size bb));
+    close_out oc);
+  if identity_mode then pack_result bb else smart_schedule bb
+
+(** Called schedule function from Coq *)
+
+(*let schedule_notime bb = let toto = bblock_schedule in toto*)
+let schedule bb =
+  Timing.time_coq
+    [
+      'P';
+      'o';
+      's';
+      't';
+      'p';
+      'a';
+      's';
+      's';
+      'S';
+      'c';
+      'h';
+      'e';
+      'd';
+      'u';
+      'l';
+      'i';
+      'n';
+      'g';
+      ' ';
+      'o';
+      'r';
+      'a';
+      'c';
+      'l';
+      'e';
+    ]
+    bblock_schedule bb
diff --git a/aarch64/PostpassSchedulingproof.v b/aarch64/PostpassSchedulingproof.v
new file mode 100644
index 00000000..48840602
--- /dev/null
+++ b/aarch64/PostpassSchedulingproof.v
@@ -0,0 +1,481 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé     Grenoble-INP, VERIMAG          *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*           Cyril Six          Kalray                         *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright Kalray. Copyright VERIMAG. All rights reserved.  *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Machblock Conventions Asmblock.
+Require Import Asmblockprops.
+Require Import PostpassScheduling.
+Require Import Asmblockgenproof.
+Require Import Axioms.
+
+Local Open Scope error_monad_scope.
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+  intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Section PRESERVATION_ASMBLOCK.
+
+Variables prog tprog: program.
+Variable lk: aarch64_linker.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_match TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial TRANSL).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro:
+      forall s1 s2, s1 = s2 -> match_states s1 s2.
+
+Lemma prog_main_preserved:
+  prog_main tprog = prog_main prog.
+Proof (match_program_main TRANSL).
+
+Lemma prog_main_address_preserved:
+  (Genv.symbol_address (Genv.globalenv prog) (prog_main prog) Ptrofs.zero) =
+  (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero).
+Proof.
+  unfold Genv.symbol_address. rewrite symbols_preserved.
+  rewrite prog_main_preserved. auto.
+Qed.
+
+Lemma transf_initial_states:
+  forall st1, initial_state prog st1 ->
+  exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+  intros. inv H.
+  econstructor; split.
+  - eapply initial_state_intro.
+    eapply (Genv.init_mem_transf_partial TRANSL); eauto.
+  - econstructor; eauto. subst ge0. subst rs0. rewrite prog_main_address_preserved. auto.
+Qed.
+
+Lemma transf_final_states:
+  forall st1 st2 r,
+  match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+  intros. inv H0. inv H. econstructor; eauto.
+Qed.
+
+Lemma transf_find_bblock:
+  forall ofs f bb tf,
+  find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb ->
+  transf_function f = OK tf ->
+  exists tbb,
+     verified_schedule bb = OK tbb
+     /\ find_bblock (Ptrofs.unsigned ofs) (fn_blocks tf) = Some tbb.
+Proof.
+  intros.
+  monadInv H0. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); try (inv EQ0; fail). inv EQ0.
+  monadInv EQ. simpl in *.
+  generalize (Ptrofs.unsigned ofs) H x EQ0; clear ofs H x g EQ0.
+  induction (fn_blocks f).
+  - intros. simpl in *. discriminate.
+  - intros. simpl in *.
+    monadInv EQ0. simpl.
+    destruct (zlt z 0); try discriminate.
+    destruct (zeq z 0).
+    + inv H. eauto.
+    + monadInv EQ0.
+      exploit IHb; eauto.
+      intros (tbb & SCH & FIND).
+      eexists; split; eauto.
+      inv FIND.
+      unfold verify_size in EQ0.
+      destruct (size a =? size (stick_header (header a) x)) eqn:EQSIZE; try discriminate.
+      rewrite Z.eqb_eq in EQSIZE; rewrite EQSIZE.
+      reflexivity.
+Qed.
+
+Lemma stick_header_neutral: forall a,
+  a = (stick_header (header a) (no_header a)).
+Proof.
+  intros.
+  unfold stick_header. unfold Asmblock.stick_header_obligation_1. simpl. destruct a.
+  simpl. reflexivity.
+Qed.
+
+Lemma symbol_address_preserved:
+  forall l ofs, Genv.symbol_address ge l ofs = Genv.symbol_address tge l ofs.
+Proof.
+  intros. unfold Genv.symbol_address. repeat (rewrite symbols_preserved). reflexivity.
+Qed.
+
+Lemma verified_schedule_label:
+  forall bb tbb l,
+  verified_schedule bb = OK (tbb) ->
+     is_label l bb = is_label l tbb.
+Proof.
+  intros.
+  unfold is_label.
+  monadInv H. simpl. auto.
+Qed.
+
+Remark label_pos_pvar_none_add:
+  forall tc l p p' k,
+  label_pos l (p+k) tc = None -> label_pos l (p'+k) tc = None.
+Proof.
+  induction tc.
+  - intros. simpl. auto.
+  - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+    + discriminate.
+    + pose (IHtc l p p' (k + size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar_none:
+  forall tc l p p',
+  label_pos l p tc = None -> label_pos l p' tc = None.
+Proof.
+  intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+  eapply label_pos_pvar_none_add; eauto.
+Qed.
+
+Remark label_pos_pvar_some_add_add:
+  forall tc l p p' k k',
+  label_pos l (p+k') tc = Some (p+k) -> label_pos l (p'+k') tc = Some (p'+k).
+Proof.
+  induction tc.
+  - intros. simpl in H. discriminate.
+  - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+    + inv H. assert (k = k') by omega. subst. reflexivity.
+    + pose (IHtc l p p' k (k' + size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar_some_add:
+  forall tc l p p' k,
+  label_pos l p tc = Some (p+k) -> label_pos l p' tc = Some (p'+k).
+Proof.
+  intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+  eapply label_pos_pvar_some_add_add; eauto.
+Qed.
+
+Remark label_pos_pvar_add:
+  forall c tc l p p' k,
+  label_pos l (p+k) c = label_pos l p tc ->
+  label_pos l (p'+k) c = label_pos l p' tc.
+Proof.
+  induction c.
+  - intros. simpl in *.
+    exploit label_pos_pvar_none; eauto.
+  - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+    + exploit label_pos_pvar_some_add; eauto.
+    + pose (IHc tc l p p' (k+size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar:
+  forall c tc l p p',
+  label_pos l p c = label_pos l p tc ->
+  label_pos l p' c = label_pos l p' tc.
+Proof.
+  intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+  eapply label_pos_pvar_add; eauto.
+Qed.
+
+Lemma label_pos_head_cons:
+  forall c bb tbb l tc p,
+  verified_schedule bb = OK tbb ->
+  label_pos l p c = label_pos l p tc ->
+  label_pos l p (bb :: c) = label_pos l p (tbb :: tc).
+Proof.
+  intros. simpl.
+  exploit verified_schedule_label; eauto. intros ISLBL.
+  rewrite ISLBL.
+  destruct (is_label l tbb) eqn:ISLBL'; simpl; auto.
+  eapply label_pos_pvar in H0. erewrite H0.
+  erewrite verified_schedule_size; eauto.
+Qed.
+
+Lemma label_pos_preserved:
+  forall c tc l,
+  transf_blocks c = OK tc -> label_pos l 0 c = label_pos l 0 tc.
+Proof.
+  induction c.
+  - intros. simpl in *. inv H. reflexivity.
+  - intros. unfold transf_blocks in H; fold transf_blocks in H. monadInv H.
+    eapply IHc in EQ. eapply label_pos_head_cons; eauto.
+Qed.
+
+Lemma label_pos_preserved_blocks:
+  forall l f tf,
+  transf_function f = OK tf ->
+  label_pos l 0 (fn_blocks f) = label_pos l 0 (fn_blocks tf).
+Proof.
+  intros. monadInv H. monadInv EQ.
+  destruct (zlt Ptrofs.max_unsigned _); try discriminate.
+  monadInv EQ0. simpl. eapply label_pos_preserved; eauto.
+Qed.
+
+Lemma transf_exec_basic:
+  forall i rs m, exec_basic lk ge i rs m = exec_basic lk tge i rs m.
+Proof.
+  intros. pose symbol_address_preserved.
+  unfold exec_basic. 
+  destruct i; simpl; auto; try congruence.
+Qed.
+
+Lemma transf_exec_body:
+  forall bdy rs m, exec_body lk ge bdy rs m = exec_body lk tge bdy rs m.
+Proof.
+  induction bdy; intros.
+  - simpl. reflexivity.
+  - simpl. rewrite transf_exec_basic.
+    destruct (exec_basic _ _ _); auto.
+Qed.
+
+Lemma transf_exec_cfi: forall f tf cfi rs m,
+  transf_function f = OK tf ->
+  exec_cfi ge f cfi rs m = exec_cfi tge tf cfi rs m.
+Proof.
+  intros. destruct cfi; simpl; auto;
+  assert (ge = Genv.globalenv prog); auto;
+  assert (tge = Genv.globalenv tprog); auto;
+  pose symbol_address_preserved.
+  all: try unfold eval_branch; try unfold eval_neg_branch; try unfold goto_label;
+       try erewrite label_pos_preserved_blocks; try rewrite e; eauto.
+  destruct (rs # X16 <- Vundef r1); auto.
+  destruct (list_nth_z tbl (Int.unsigned i)); auto.
+  erewrite label_pos_preserved_blocks; eauto.
+Qed.
+
+Lemma transf_exec_exit:
+  forall f tf sz ex t rs m rs' m',
+  transf_function f = OK tf ->
+  exec_exit ge f sz rs m ex t rs' m' ->
+  exec_exit tge tf sz rs m ex t rs' m'.
+Proof.
+  intros. induction H0.
+  - econstructor.
+  - econstructor. erewrite <- transf_exec_cfi; eauto.
+  - econstructor; eauto.
+    eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+Qed.
+
+Lemma transf_exec_bblock:
+  forall f tf bb t rs m rs' m',
+  transf_function f = OK tf ->
+  exec_bblock lk ge f bb rs m t rs' m' ->
+  exec_bblock lk tge tf bb rs m t rs' m'.
+Proof.
+  intros.
+  destruct H0 as [rs1[m1[BDY EXIT]]].
+  unfold exec_bblock.
+  eexists; eexists; split.
+  rewrite <- transf_exec_body; eauto.
+  eapply transf_exec_exit; eauto.
+Qed.
+
+Theorem transf_step_correct:
+  forall s1 t s2, step lk ge s1 t s2 ->
+  forall s1' (MS: match_states s1 s1'),
+  (exists s2', step lk tge s1' t s2' /\ match_states s2 s2').
+Proof.
+  induction 1; intros; inv MS.
+  - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+    exploit transf_find_bblock; eauto. intros (tbb & VES & FIND).
+    exploit verified_schedule_correct; eauto. intros EBB.
+    eapply transf_exec_bblock in EBB; eauto. 
+    exists (State rs' m').
+    split; try (econstructor; eauto).
+  - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+    remember (State _ m') as s'. exists s'. split; try constructor; auto.
+    subst s'. eapply exec_step_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+Qed.
+
+Theorem transf_program_correct_Asmblock: 
+  forward_simulation (Asmblock.semantics lk prog) (Asmblock.semantics lk tprog).
+Proof.
+  eapply forward_simulation_step.
+  - apply senv_preserved.
+  - apply transf_initial_states.
+  - apply transf_final_states.
+  - apply transf_step_correct.
+Qed.
+
+End PRESERVATION_ASMBLOCK.
+
+(*
+Require Import Asm.
+
+Lemma verified_par_checks_alls_bundles lb x: forall bundle,
+  verify_par lb = OK x ->
+  List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+  induction lb; simpl; try tauto.
+  intros bundle H; monadInv H.
+  destruct 1; subst; eauto.
+  destruct x0; auto.
+Qed.
+
+Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle:
+  verified_schedule_nob bb = OK lb ->
+  List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+  unfold verified_schedule_nob. intros H;
+  monadInv H. destruct x4.
+  intros; eapply verified_par_checks_alls_bundles; eauto.
+Qed.
+
+Lemma verify_par_bblock_PExpand bb i:
+  exit bb = Some (PExpand i) -> verify_par_bblock bb = OK tt.
+Proof.
+  destruct bb as [h bdy ext H]; simpl.
+  intros; subst. destruct i.
+  generalize H.
+  rewrite <- wf_bblock_refl in H.
+  destruct H as [H H0].
+  unfold builtin_alone in H0. erewrite H0; eauto.
+Qed.
+
+Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core.
+
+Lemma verified_schedule_checks_alls_bundles bb lb bundle:
+  verified_schedule bb = OK lb ->
+  List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+  unfold verified_schedule. remember (exit bb) as exb.
+  destruct exb as [c|]; eauto.
+  destruct c as [i|]; eauto.
+  destruct i; intros H. inversion_clear H; simpl.
+  intuition subst.
+  intros; eapply verify_par_bblock_PExpand; eauto.
+Qed.
+
+Lemma transf_blocks_checks_all_bundles lbb: forall lb bundle,
+  transf_blocks lbb = OK lb ->
+  List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+  induction lbb; simpl.
+  - intros lb bundle H; inversion_clear H. simpl; try tauto.
+  - intros lb bundle H0.
+    monadInv H0.
+    rewrite in_app. destruct 1; eauto.
+    eapply verified_schedule_checks_alls_bundles; eauto.
+Qed.
+
+Lemma find_bblock_Some_in lb: 
+  forall ofs b, find_bblock ofs lb = Some b -> List.In b lb.
+Proof.
+  induction lb; simpl; try congruence.
+  intros ofs b.
+  destruct (zlt ofs 0); try congruence.
+  destruct (zeq ofs 0); eauto.
+  intros X; inversion X; eauto.
+Qed.*)
+
+(*
+Section PRESERVATION_ASMVLIW.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma all_bundles_are_checked b ofs f bundle:
+  Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+  find_bblock ofs (fn_blocks f) = Some bundle ->
+  verify_par_bblock bundle = OK tt. 
+Proof.
+  unfold match_prog, match_program in TRANSL.
+  unfold Genv.find_funct_ptr; simpl; intros X.
+  destruct (Genv.find_def_match_2 TRANSL b) as [|f0 y H]; try congruence.
+  destruct y as [tf0|]; try congruence.
+  inversion X as [H1]. subst. clear X.
+  remember (@Gfun fundef unit (Internal f)) as f2.
+  destruct H as [ctx' f1 f2 H0|]; try congruence.
+  inversion Heqf2 as [H2]. subst; clear Heqf2.
+  unfold transf_fundef, transf_partial_fundef in H.
+  destruct f1 as [f1|f1]; try congruence.
+  unfold transf_function, transl_function in H.
+  monadInv H. monadInv EQ.
+  destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks _))); simpl in *|-; try congruence.
+  injection EQ1; intros; subst.
+  monadInv EQ0. simpl in * |-.
+  intros; exploit transf_blocks_checks_all_bundles; eauto.
+  intros; eapply find_bblock_Some_in; eauto.
+Qed.
+
+Lemma checked_bundles_are_parexec_equiv f bundle rs rs' m m':
+  exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+  verify_par_bblock bundle = OK tt ->
+  det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'.
+Proof.
+  intros. unfold verify_par_bblock in H0. destruct (Asmblockdeps.bblock_para_check _) eqn:BPC; try discriminate. clear H0.
+  simpl in H.
+  eapply Asmblockdeps.bblock_para_check_correct; eauto.
+Qed.
+
+Lemma seqexec_parexec_equiv b ofs f bundle rs rs' m m':
+  Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+  find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle ->
+  exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+  det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'.
+Proof.
+  intros; eapply checked_bundles_are_parexec_equiv; eauto.
+  eapply all_bundles_are_checked; eauto.
+Qed.
+
+Theorem transf_program_correct_Asmvliw: 
+  forward_simulation (Asmblock.semantics tprog) (Asmvliw.semantics tprog).
+Proof.
+  eapply forward_simulation_step with (match_states:=fun (s1:Asmvliw.state) s2 => s1=s2); eauto.
+  - intros; subst; auto.
+  - intros s1 t s1' H s2 H0; subst; inversion H; clear H; subst; eexists; split; eauto.
+    + eapply exec_step_internal; eauto.
+      intros; eapply seqexec_parexec_equiv; eauto.
+    + eapply exec_step_builtin; eauto.
+    + eapply exec_step_external; eauto.
+Qed.
+
+End PRESERVATION_ASMVLIW.*)
+
+Section PRESERVATION.
+
+Variable lk: aarch64_linker.
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Theorem transf_program_correct: 
+  forward_simulation (Asmblock.semantics lk prog) (Asmblock.semantics lk tprog).
+Proof.
+  eapply transf_program_correct_Asmblock; eauto.
+Qed.
+
+End PRESERVATION.
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..2c3eb14f
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -0,0 +1,477 @@
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+   
+let use_alias_analysis () = false
+                          
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64 
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+  let last_reg_reads : int list PTree.t ref = ref PTree.empty
+  and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+  and last_mem_reads : int list ref = ref []
+  and last_mem_write : int option ref = ref None
+  and last_branch : int option ref = ref None
+  and last_non_pipelined_op : int array = Array.make
+                                        opweights.nr_non_pipelined_units ( -1 )
+  and latency_constraints : latency_constraint list ref = ref [] in
+  let add_constraint instr_from instr_to latency =
+    assert (instr_from <= instr_to);
+    assert (latency >= 0);
+    if instr_from = instr_to
+    then (if latency = 0
+          then ()
+          else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+    else
+      latency_constraints :=
+        { instr_from = instr_from;
+          instr_to = instr_to;
+          latency = latency
+        }:: !latency_constraints
+  and get_last_reads reg =
+    match PTree.get reg !last_reg_reads
+     with Some l -> l
+        | None -> [] in
+  let add_input_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Read after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        last_mem_reads := i :: !last_mem_reads
+      end
+  and add_output_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Write after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        (* Write after read *)
+        List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+        last_mem_write := Some i;
+        last_mem_reads := []
+      end
+  and add_input_reg i reg =
+    begin
+      (* Read after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, latency) -> add_constraint j i latency
+    end;
+    last_reg_reads := PTree.set reg
+                       (i :: get_last_reads reg)
+                       !last_reg_reads
+  and add_output_reg i latency reg =
+    begin
+      (* Write after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, _) -> add_constraint j i 1
+    end;
+    begin
+      (* Write after read *)
+      List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+    end;
+    last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+    last_reg_reads := PTree.remove reg !last_reg_reads
+    in
+  let add_input_regs i regs = List.iter (add_input_reg i) regs in
+  let rec add_builtin_res i (res : reg builtin_res) =
+    match res with
+    | BR r -> add_output_reg i 10 r
+    | BR_none -> ()
+    | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+                               add_builtin_res i lo in
+  let rec add_builtin_arg i (ba : reg builtin_arg) =
+    match ba with
+    | BA r -> add_input_reg i r
+    | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+    | BA_loadstack(_,_) -> add_input_mem i
+    | BA_addrstack _ -> ()
+    | BA_loadglobal(_, _, _) -> add_input_mem i
+    | BA_addrglobal _ -> ()
+    | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+                              add_builtin_arg i lo
+    | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+                           add_builtin_arg i a2 in
+  let irreversible_action i =
+    match !last_branch with
+    | None -> ()
+    | Some j -> add_constraint j i 1 in
+  let set_branch i =
+    irreversible_action i;
+    last_branch := Some i in
+  let add_non_pipelined_resources i resources =
+    Array.iter2
+      (fun latency last ->
+        if latency >= 0 && last >= 0 then add_constraint last i latency)
+      resources last_non_pipelined_op;
+    Array.iteri (fun rsc latency ->
+        if latency >= 0
+        then last_non_pipelined_op.(rsc) <- i) resources
+  in
+  Array.iteri
+    begin
+      fun i (insn, other_uses) ->
+      List.iter (fun use ->
+          add_input_reg i use)
+        (Regset.elements other_uses);
+      
+      match insn with
+      | Inop _ -> ()
+      | Iop(op, inputs, output, _) ->
+         add_non_pipelined_resources i
+           (opweights.non_pipelined_resources_of_op op (List.length inputs));
+         (if Op.is_trapping_op op then irreversible_action i);
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+      | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+         (if trap=TRAP then irreversible_action i);
+         add_input_mem i;
+         add_input_regs i addr_regs;
+         add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+      | Istore(chunk, addressing, addr_regs, input, _) ->
+         irreversible_action i;
+         add_input_regs i addr_regs;
+         add_input_reg i input;
+         add_output_mem i
+      | Icall(signature, ef, inputs, output, _) ->
+         set_branch i;
+         (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_call signature ef) output;
+         add_output_mem i;
+         failwith "Icall"
+      | Itailcall(signature, ef, inputs) ->
+         set_branch i;
+        (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         failwith "Itailcall"
+      | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+         set_branch i;
+         add_input_mem i;
+         List.iter (add_builtin_arg i) builtin_inputs;
+         add_builtin_res i builtin_output;
+         add_output_mem i;
+         failwith "Ibuiltin"
+      | Icond(cond, inputs, _, _, _) ->
+         set_branch i;
+         add_input_mem i;
+         add_input_regs i inputs
+      | Ijumptable(input, _) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ijumptable"
+      | Ireturn(Some input) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ireturn"
+      | Ireturn(None) ->
+         set_branch i;
+         failwith "Ireturn none"
+    end seqa;
+  !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+  | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+  | Iop(op, inputs, output, _) ->
+     opweights.resources_of_op op (List.length inputs)
+  | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+     opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+  | Istore(chunk, addressing, addr_regs, input, _) ->
+     opweights.resources_of_store chunk addressing (List.length addr_regs)
+  | Icall(signature, ef, inputs, output, _) ->
+     opweights.resources_of_call signature ef
+  | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+     opweights.resources_of_builtin ef
+  | Icond(cond, args, _, _ , _) ->
+     opweights.resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+    
+let print_sequence pp (seqa : instruction array) =
+  Array.iteri (
+      fun i (insn : instruction) ->
+      PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+               
+type 'a symbolic_term_node =
+  | STop of Op.operation * 'a list
+  | STinitial_reg of int
+  | STother of int;;
+
+type symbolic_term = {
+    hash_id : unique_id;
+    hash_ct : symbolic_term symbolic_term_node
+  };;
+
+let rec print_term channel term =
+  match term.hash_ct with
+  | STop(op, args) ->
+     PrintOp.print_operation print_term channel (op, args)
+  | STinitial_reg n -> Printf.fprintf channel "x%d" n
+  | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+    st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+    mutable st_next_id : unique_id };;
+
+let hash_init () = {
+    st_table = Hashtbl.create 20;
+    st_next_id = 0
+  };;
+
+let ground_to_id = function
+  | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+  | STinitial_reg r -> STinitial_reg r
+  | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+  let grounded = ground_to_id term in
+  match Hashtbl.find_opt table.st_table grounded with
+  | Some x -> x
+  | None ->
+     let term' = { hash_id = table.st_next_id;
+                   hash_ct = term } in
+     (if table.st_next_id = max_int then failwith "hash: max_int");
+     table.st_next_id <- table.st_next_id + 1;
+     Hashtbl.add table.st_table grounded term';
+     term';;
+
+type access = {
+    base : symbolic_term;
+    offset : int64;
+    length : int
+  };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+  match addressing, args with
+  | (Op.Aindexed ofs), [reg] -> Some
+     { base = get_reg reg;
+       offset = Camlcoq.camlint64_of_ptrofs ofs;
+       length = length_of_chunk chunk
+     }
+  | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+  let regs = ref PTree.empty
+  and table = hash_init() in
+  let assign reg term = regs := PTree.set reg term !regs
+  and hash term = hash_node table term in
+  let get_reg reg =
+    match PTree.get reg !regs with
+    | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+    | Some x -> x in
+  let targets = Array.make (Array.length seqa) None in
+  Array.iteri
+    begin
+      fun i insn ->
+      match insn with
+      | Iop(Op.Omove, [input], output, _) ->
+         assign output (get_reg input)
+      | Iop(op, inputs, output, _) ->
+         assign output (hash (STop(op, List.map get_reg inputs)))
+
+      | Iload(trap, chunk, addressing, args, output, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access;
+         assign output (hash (STother(i)))
+        
+      | Icall(_, _, _, output, _)
+      | Ibuiltin(_, _, BR output, _) -> 
+         assign output (hash (STother(i)))
+        
+      | Istore(chunk, addressing, args, va, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access
+                                          
+      | Inop _ -> ()
+      | Ibuiltin(_, _, BR_none, _) -> ()
+      | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+      | Itailcall (_, _, _)
+      |Icond (_, _, _, _, _)
+      |Ijumptable (_, _)
+      |Ireturn _ -> ()
+    end seqa;
+  targets;;
+
+let print_access channel = function
+  | None -> Printf.fprintf channel "any"
+  | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+  let targets = symbolic_execution seqa in
+  Array.iteri
+    (fun i insn ->
+      match insn with
+      | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+                      i print_access targets.(i) 
+      | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+                       i print_access targets.(i)
+      | _ -> ()
+    ) seqa;;
+
+let may_overlap a0 b0 =
+  match a0, b0 with
+  | (None, _)  | (_ , None) -> true
+  | (Some a), (Some b) ->
+     if term_equal a.base b.base
+     then (max a.offset b.offset) <
+          (min (Int64.add (Int64.of_int a.length) a.offset)
+               (Int64.add (Int64.of_int b.length) b.offset))
+     else match a.base.hash_ct, b.base.hash_ct with
+          | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+            STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+             (ida=idb) &&
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | STop(Op.Oaddrstack _, []),
+            STop(Op.Oaddrsymbol _, [])
+          | STop(Op.Oaddrsymbol _, []),
+            STop(Op.Oaddrstack _, []) -> false
+          | STop(Op.Oaddrstack(ofsa),[]),
+            STop(Op.Oaddrstack(ofsb),[]) ->
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+  let targets = symbolic_execution seqa
+  and deps = ref [] in
+  let add_constraint instr_from instr_to latency =
+    deps := { instr_from = instr_from;
+              instr_to = instr_to;
+              latency = latency
+            }:: !deps in
+  for i=0 to (Array.length seqa)-1
+  do
+    for j=0 to i-1
+    do
+      match seqa.(j), seqa.(i) with
+      | (Istore _), ((Iload _) | (Istore _)) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 1
+      | (Iload _), (Istore _) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 0
+      | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+      | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+         add_constraint j i 1
+      | (Inop _ | Iop _), _
+      | _, (Inop _ | Iop _)
+      | (Iload _), (Iload _) -> ()
+    done
+  done;
+  !deps;;
+ *)
+
+let define_problem (opweights : opweights) seqa =
+  let simple_deps = get_simple_dependencies opweights seqa in
+  { max_latency = -1;
+    resource_bounds = opweights.pipelined_resource_bounds;
+    instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+    latency_constraints =
+      (* if (use_alias_analysis ())
+      then (get_alias_dependencies seqa) @ simple_deps
+      else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+  let nr_instructions = get_nr_instructions problem in
+  assert(nr_instructions = (Array.length early_ones));
+  match list_scheduler problem with
+  | Some fwd_schedule ->
+     let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+     let constraints' = ref problem.latency_constraints in
+     Array.iteri (fun i is_early ->
+         if is_early then
+           constraints' :=  {
+             instr_from = i;
+             instr_to = nr_instructions ;
+             latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+       early_ones;
+     validated_scheduler reverse_list_scheduler
+       { problem with latency_constraints = !constraints' }
+  | None -> None;;
+  
+let prepass_scheduler_by_name name problem early_ones =
+  match name with
+  | "zigzag" -> zigzag_scheduler problem early_ones
+  | _ -> scheduler_by_name name problem
+  
+let schedule_sequence (seqa : (instruction*Regset.t) array) =
+  let opweights = OpWeights.get_opweights () in
+  try
+    if (Array.length seqa) <= 1
+    then None
+    else
+      begin
+      let nr_instructions = Array.length seqa in
+      (if !Clflags.option_debug_compcert > 6
+       then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+      let problem = define_problem opweights seqa in
+      (if !Clflags.option_debug_compcert > 7
+       then (print_sequence stdout (Array.map fst seqa);
+             print_problem stdout problem));
+      match prepass_scheduler_by_name
+              (!Clflags.option_fprepass_sched)
+              problem
+              (Array.map (fun (ins, _) ->
+                   match ins with
+                   | Icond _ -> true
+                   | _ -> false) seqa) with
+      | None -> Printf.printf "no solution in prepass scheduling\n";
+                None
+      | Some solution ->
+         let positions = Array.init nr_instructions (fun i -> i) in
+         Array.sort (fun i j ->
+             let si = solution.(i) and sj = solution.(j) in
+             if si < sj then -1
+             else if si > sj then 1
+             else i - j) positions;
+         Some positions
+    end
+  with (Failure s) ->
+    Printf.printf "failure in prepass scheduling: %s\n" s;
+    None;;
+                                                            
diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+  
+type opweights =
+  {
+    pipelined_resource_bounds : int array;
+    nr_non_pipelined_units : int;
+    latency_of_op : Op.operation -> int -> int;
+    resources_of_op : Op.operation -> int -> int array;
+    non_pipelined_resources_of_op : Op.operation -> int -> int array;
+    latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+    resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_cond : Op.condition -> int -> int array;
+    latency_of_call : AST.signature -> called_function -> int;
+    resources_of_call : AST.signature -> called_function -> int array;
+    resources_of_builtin : AST.external_function -> int array
+  };;
diff --git a/aarch64/RTLpathSE_simplify.v b/aarch64/RTLpathSE_simplify.v
new file mode 100644
index 00000000..1ee7dac5
--- /dev/null
+++ b/aarch64/RTLpathSE_simplify.v
@@ -0,0 +1,42 @@
+Require Import Coqlib Floats Values Memory.
+Require Import Integers.
+Require Import Op Registers.
+Require Import RTLpathSE_theory.
+Require Import RTLpathSE_simu_specs.
+
+(** Target op simplifications using "fake" values *)
+
+Definition target_op_simplify (op: operation) (lr: list reg) (hst: hsistate_local): option hsval :=
+  None.
+
+Definition target_cbranch_expanse (prev: hsistate_local) (cond: condition) (args: list reg) : option (condition * list_hsval) :=
+  None.
+
+(* Main proof of simplification *)
+
+Lemma target_op_simplify_correct op lr hst fsv ge sp rs0 m0 st args m: forall
+   (H: target_op_simplify op lr hst = Some fsv)
+   (REF: hsilocal_refines ge sp rs0 m0 hst st)
+   (OK0: hsok_local ge sp rs0 m0 hst)
+   (OK1: seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args)
+   (OK2: seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+   seval_sval ge sp (hsval_proj fsv) rs0 m0 = eval_operation ge sp op args m.
+Proof.
+  unfold target_op_simplify; simpl.
+  intros H (LREF & SREF & SREG & SMEM) ? ? ?.
+  congruence.
+Qed.
+
+Lemma target_cbranch_expanse_correct hst c l ge sp rs0 m0 st c' l': forall
+  (TARGET: target_cbranch_expanse hst c l = Some (c', l'))
+  (LREF : hsilocal_refines ge sp rs0 m0 hst st)
+  (OK: hsok_local ge sp rs0 m0 hst),
+  seval_condition ge sp c' (hsval_list_proj l') (si_smem st) rs0 m0 =
+  seval_condition ge sp c (list_sval_inj (map (si_sreg st) l)) (si_smem st) rs0 m0.
+Proof.
+  unfold target_cbranch_expanse, seval_condition; simpl.
+  intros H (LREF & SREF & SREG & SMEM) ?.
+  congruence.
+Qed.
+Global Opaque target_op_simplify.
+Global Opaque target_cbranch_expanse.
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index 60dc1a12..513ee9bd 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -559,25 +559,29 @@ Qed.
 Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
 Proof.
   red; intros; unfold divls_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
 Proof.
   red; intros; unfold modls_base, modl_aux.
   exploit Val.modls_divls; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
 Proof.
   red; intros; unfold divlu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
 Proof.
   red; intros; unfold modlu_base, modl_aux.
   exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrxlimm:
@@ -592,7 +596,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0.
   rewrite Int64.shrx'_zero. auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
@@ -726,42 +730,42 @@ Qed.
 
 Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists.  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
 Proof.
-  red; intros; TrivialExists.
+  red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 End CMCONSTR.
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index 3379cbd8..9ce7a8bf 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -666,7 +666,8 @@ Theorem eval_divs_base:
   Val.divs x y = Some z ->
   exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros; unfold divs_base; TrivialExists.
+  intros; unfold divs_base; TrivialExists; cbn.
+  rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_mods_base:
@@ -678,7 +679,8 @@ Theorem eval_mods_base:
 Proof.
   intros; unfold mods_base, mod_aux.
   exploit Val.mods_divs; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  cbn. rewrite A. reflexivity.
 Qed.
 
 Theorem eval_divu_base:
@@ -689,6 +691,7 @@ Theorem eval_divu_base:
   exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
   intros; unfold divu_base; TrivialExists.
+  cbn. rewrite H1. reflexivity.
 Qed.
 
 Theorem eval_modu_base:
@@ -700,7 +703,8 @@ Theorem eval_modu_base:
 Proof.
   intros; unfold modu_base, mod_aux.
   exploit Val.modu_divu; eauto. intros (q & A & B). subst z.
-  TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+  TrivialExists. repeat (econstructor; eauto with evalexpr).
+  rewrite A. reflexivity.
 Qed.
 
 Theorem eval_shrximm:
@@ -715,7 +719,7 @@ Proof.
   destruct x; simpl in H0; try discriminate.
   change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0.
   rewrite Int.shrx_zero by (compute; auto). auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 (** General shifts *)
@@ -928,7 +932,7 @@ Theorem eval_intoffloat:
   Val.intoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofint:
@@ -939,7 +943,7 @@ Theorem eval_floatofint:
 Proof.
   intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuoffloat:
@@ -948,7 +952,7 @@ Theorem eval_intuoffloat:
   Val.intuoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofintu:
@@ -959,7 +963,7 @@ Theorem eval_floatofintu:
 Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intofsingle:
@@ -968,7 +972,7 @@ Theorem eval_intofsingle:
   Val.intofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofint:
@@ -979,7 +983,7 @@ Theorem eval_singleofint:
 Proof.
   intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuofsingle:
@@ -988,7 +992,7 @@ Theorem eval_intuofsingle:
   Val.intuofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros; TrivialExists.
+  intros; TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofintu:
@@ -999,7 +1003,7 @@ Theorem eval_singleofintu:
 Proof.
   intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval.
 - TrivialExists.
-- TrivialExists.
+- TrivialExists.  cbn.  rewrite H0. reflexivity.
 Qed.
 
 (** Selection *)
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index 8d74daf4..53959152 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -21,24 +21,9 @@ open AisAnnot
 open PrintAsmaux
 open Fileinfo
 
-(* Recognition of FP numbers that are supported by the fmov #imm instructions:
-   "a normalized binary floating point encoding with 1 sign bit,
-    4 bits of fraction and a 3-bit exponent"
-*)
-
-let is_immediate_float64 bits =
-  let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in
-  let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in
-  exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant
-
-let is_immediate_float32 bits =
-  let exp = (Int32.(to_int (shift_right_logical bits 23)) land 0xFF) - 127 in
-  let mant = Int32.logand bits 0x7F_FFFFl in
-  exp >= -3 && exp <= 4 && Int32.logand mant 0x78_0000l = mant
-
 (* Module containing the printing functions *)
 
-module Target : TARGET =
+module Target (*: TARGET*) =
   struct
 
 (* Basic printing functions *)
@@ -120,13 +105,13 @@ module Target : TARGET =
       output_string oc (match sz with D -> dreg_name r | S -> sreg_name r)
 
     let preg_asm oc ty = function
-      | IR r -> if ty = Tint then wreg oc r else xreg oc r
-      | FR r -> if ty = Tsingle then sreg oc r else dreg oc r
+      | DR (IR (RR1 r)) -> if ty = Tint then wreg oc r else xreg oc r
+      | DR (FR r) -> if ty = Tsingle then sreg oc r else dreg oc r
       | _    -> assert false
 
     let preg_annot = function
-      | IR r -> xreg_name r
-      | FR r -> dreg_name r
+      | DR (IR (RR1 r)) -> xreg_name r
+      | DR (FR r) -> dreg_name r
       | _ -> assert false
 
 (* Names of sections *)
@@ -246,8 +231,8 @@ module Target : TARGET =
       fprintf oc "%s:\n" lbl;
       fprintf oc "	ldaxr	x17, [x15]\n";
       fprintf oc "	add	x17, x17, 1\n";
-      fprintf oc "	stlxr	w17, x17, [x15]\n";
-      fprintf oc "	cbnz	w17, %s\n" lbl;
+      fprintf oc "	stlxr	w29, x17, [x15]\n";
+      fprintf oc "	cbnz	w29, %s\n" lbl;
       fprintf oc "%s end profiling %a %d\n" comment
         Profilingaux.pp_id id kind;;
 
@@ -294,7 +279,9 @@ module Target : TARGET =
         (* the upper 32 bits of Xrd are set to 0, performing zero-extension *)
     | Pldrsw(rd, a) ->
         fprintf oc "	ldrsw	%a, %a\n" xreg rd addressing a
-    | Pldp(rd1, rd2, a) ->
+    | Pldpw(rd1, rd2, _, _, a) ->
+        fprintf oc "	ldp	%a, %a, %a\n" wreg rd1 wreg rd2 addressing a
+    | Pldpx(rd1, rd2, _, _, a) ->
         fprintf oc "	ldp	%a, %a, %a\n" xreg rd1 xreg rd2 addressing a
     | Pstrw(rs, a) | Pstrw_a(rs, a) ->
         fprintf oc "	str	%a, %a\n" wreg rs addressing a
@@ -304,7 +291,9 @@ module Target : TARGET =
         fprintf oc "	strb	%a, %a\n" wreg rs addressing a
     | Pstrh(rs, a) ->
         fprintf oc "	strh	%a, %a\n" wreg rs addressing a
-    | Pstp(rs1, rs2, a) ->
+    | Pstpw(rs1, rs2, _, _, a) ->
+        fprintf oc "	stp	%a, %a, %a\n" wreg rs1 wreg rs2 addressing a
+    | Pstpx(rs1, rs2, _, _, a) ->
         fprintf oc "	stp	%a, %a, %a\n" xreg rs1 xreg rs2 addressing a
     (* Integer arithmetic, immediate *)
     | Paddimm(sz, rd, r1, n) ->
@@ -399,6 +388,8 @@ module Target : TARGET =
         fprintf oc "	rev	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
     | Prev16(sz, rd, r1) ->
         fprintf oc "	rev16	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
+    | Prbit(sz, rd, r1) ->
+        fprintf oc "	rbit	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
     (* Conditional data processing *)
     | Pcsel(rd, r1, r2, c) ->
         fprintf oc "	csel	%a, %a, %a, %s\n" xreg rd xreg r1 xreg r2 (condition_name c)
@@ -426,12 +417,20 @@ module Target : TARGET =
         fprintf oc "	str	%a, %a\n" sreg rd addressing a
     | Pstrd(rd, a) | Pstrd_a(rd, a) ->
         fprintf oc "	str	%a, %a\n" dreg rd addressing a
+    | Pldps(rd1, rd2, _, _, a) ->
+        fprintf oc "	ldp	%a, %a, %a\n" sreg rd1 sreg rd2 addressing a
+    | Pldpd(rd1, rd2, _, _, a) ->
+        fprintf oc "	ldp	%a, %a, %a\n" dreg rd1 dreg rd2 addressing a
+    | Pstps(rd1, rd2, _, _, a) ->
+        fprintf oc "	stp	%a, %a, %a\n" sreg rd1 sreg rd2 addressing a
+    | Pstpd(rd1, rd2, _, _, a) ->
+        fprintf oc "	stp	%a, %a, %a\n" dreg rd1 dreg rd2 addressing a
     (* Floating-point move *)
     | Pfmov(rd, r1) ->
         fprintf oc "	fmov	%a, %a\n" dreg rd dreg r1
     | Pfmovimmd(rd, f) ->
         let d = camlint64_of_coqint (Floats.Float.to_bits f) in
-        if is_immediate_float64 d then
+        if is_immediate_float64 f then
           fprintf oc "	fmov	%a, #%.7f\n" dreg rd (Int64.float_of_bits d)
         else begin
           let lbl = label_literal64 d in
@@ -440,7 +439,7 @@ module Target : TARGET =
         end
     | Pfmovimms(rd, f) ->
         let d = camlint_of_coqint (Floats.Float32.to_bits f) in
-        if is_immediate_float32 d then
+        if is_immediate_float32 f then
           fprintf oc "	fmov	%a, #%.7f\n" sreg rd (Int32.float_of_bits d)
         else begin
           let lbl = label_literal32 d in
@@ -489,6 +488,10 @@ module Target : TARGET =
         fprintf oc "	fnmadd	%a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
     | Pfnmsub(sz, rd, r1, r2, r3) ->
         fprintf oc "	fnmsub	%a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
+    | Pfmax (sz, rd, r1, r2) ->
+        fprintf oc "	fmax	%a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
+    | Pfmin (sz, rd, r1, r2) ->
+        fprintf oc "	fmin	%a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
     (* Floating-point comparison *)
     | Pfcmp(sz, r1, r2) ->
         fprintf oc "	fcmp	%a, %a\n" freg (sz, r1) freg (sz, r2)
@@ -498,8 +501,8 @@ module Target : TARGET =
     | Pfsel(rd, r1, r2, c) ->
         fprintf oc "	fcsel	%a, %a, %a, %s\n" dreg rd dreg r1 dreg r2 (condition_name c)
     (* No-op *)
-    | Pnop ->
-        fprintf oc "	nop\n"
+    | Pnop -> ()
+        (*fprintf oc "	nop\n"*)
     (* Pseudo-instructions expanded in Asmexpand *)
     | Pallocframe(sz, linkofs) -> assert false
     | Pfreeframe(sz, linkofs) -> assert false
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
index e0d98c85..e6a60d4e 100644
--- a/aarch64/ValueAOp.v
+++ b/aarch64/ValueAOp.v
@@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omul, v1::v2::nil => mul v1 v2
   | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3)
   | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
-  | Odiv, v1::v2::nil => divs v1 v2
-  | Odivu, v1::v2::nil => divu v1 v2
+  | Odiv, v1::v2::nil => divs_total v1 v2
+  | Odivu, v1::v2::nil => divu_total v1 v2
   | Oand, v1::v2::nil => and v1 v2
   | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a)
   | Oandimm n, v1::nil => and v1 (I n)
@@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3)
   | Omullhs, v1::v2::nil => mullhs v1 v2
   | Omullhu, v1::v2::nil => mullhu v1 v2
-  | Odivl, v1::v2::nil => divls v1 v2
-  | Odivlu, v1::v2::nil => divlu v1 v2
+  | Odivl, v1::v2::nil => divls_total v1 v2
+  | Odivlu, v1::v2::nil => divlu_total v1 v2
   | Oandl, v1::v2::nil => andl v1 v2
   | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a)
   | Oandlimm n, v1::nil => andl v1 (L n)
@@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
 
   | Osingleoffloat, v1::nil => singleoffloat v1
   | Ofloatofsingle, v1::nil => floatofsingle v1
-  | Ointoffloat, v1::nil => intoffloat v1
-  | Ointuoffloat, v1::nil => intuoffloat v1
+  | Ointoffloat, v1::nil => intoffloat_total v1
+  | Ointuoffloat, v1::nil => intuoffloat_total v1
   | Ofloatofint, v1::nil => floatofint v1
   | Ofloatofintu, v1::nil => floatofintu v1
-  | Ointofsingle, v1::nil => intofsingle v1
-  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Ointofsingle, v1::nil => intofsingle_total v1
+  | Ointuofsingle, v1::nil => intuofsingle_total v1
   | Osingleofint, v1::nil => singleofint v1
   | Osingleofintu, v1::nil => singleofintu v1
-  | Olongoffloat, v1::nil => longoffloat v1
-  | Olonguoffloat, v1::nil => longuoffloat v1
+  | Olongoffloat, v1::nil => longoffloat_total v1
+  | Olonguoffloat, v1::nil => longuoffloat_total v1
   | Ofloatoflong, v1::nil => floatoflong v1
   | Ofloatoflongu, v1::nil => floatoflongu v1
-  | Olongofsingle, v1::nil => longofsingle v1
-  | Olonguofsingle, v1::nil => longuofsingle v1
+  | Olongofsingle, v1::nil => longofsingle_total v1
+  | Olonguofsingle, v1::nil => longuofsingle_total v1
   | Osingleoflong, v1::nil => singleoflong v1
   | Osingleoflongu, v1::nil => singleoflongu v1
 
diff --git a/aarch64/extractionMachdep.v b/aarch64/extractionMachdep.v
index e82056e2..69edeb55 100644
--- a/aarch64/extractionMachdep.v
+++ b/aarch64/extractionMachdep.v
@@ -6,6 +6,9 @@
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
 (* *********************************************************************)
@@ -21,4 +24,4 @@ Extract Constant Archi.pic_code => "fun () -> false".  (* for the time being *)
 (* Asm *)
 Extract Constant Asm.symbol_low => "fun _ _ _ -> assert false".
 Extract Constant Asm.symbol_high => "fun _ _ _ -> assert false".
-Extract Constant Asmgen.symbol_is_aligned => "C2C.atom_is_aligned".
+Extract Constant Asmblockgen.symbol_is_aligned => "C2C.atom_is_aligned".
diff --git a/arm/Archi.v b/arm/Archi.v
index 738341cc..c334c2a7 100644
--- a/arm/Archi.v
+++ b/arm/Archi.v
@@ -16,9 +16,8 @@
 
 (** Architecture-dependent parameters for ARM *)
 
+From Flocq Require Import Binary Bits.
 Require Import ZArith List.
-(*From Flocq*)
-Require Import Binary Bits.
 
 Definition ptr64 := false.
 
diff --git a/arm/Asm.v b/arm/Asm.v
index 194074ac..293df274 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -696,7 +696,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pfsubd r1 r2 r3 =>
       Next (nextinstr (rs#r1 <- (Val.subf rs#r2 rs#r3))) m
   | Pflid r1 f =>
-      Next (nextinstr (rs#r1 <- (Vfloat f))) m
+      Next (nextinstr (rs#IR14 <- Vundef #r1 <- (Vfloat f))) m
   | Pfcmpd r1 r2 =>
       Next (nextinstr (compare_float rs rs#r1 rs#r2)) m
   | Pfcmpzd r1 =>
@@ -923,7 +923,7 @@ Inductive step: state -> trace -> state -> Prop :=
       external_call ef ge vargs m t vres m' ->
       rs' = nextinstr
               (set_res res vres
-                (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+                (undef_regs (IR IR14 :: map preg_of (destroyed_by_builtin ef)) rs)) ->
       step (State rs m) t (State rs' m')
   | exec_step_external:
       forall b ef args res rs m t rs' m',
diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml
index 6996c9bb..104bfc94 100644
--- a/arm/Asmexpand.ml
+++ b/arm/Asmexpand.ml
@@ -349,9 +349,7 @@ let expand_builtin_inline name args res =
      emit (Prsb(res, res, SOimm _32));
      emit (Plabel lbl2)
   (* Float arithmetic *)
-  | "__builtin_fabs",  [BA(FR a1)], BR(FR res) ->
-     emit (Pfabsd (res,a1))
-  | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) ->
+  | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) ->
      emit (Pfsqrt (res,a1))
   (* 64-bit integer arithmetic *)
   | "__builtin_negl", [BA_splitlong(BA(IR ah), BA(IR al))],
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index 92ae524f..fd70c9ad 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -225,7 +225,7 @@ Proof.
   TailNoLabel.
   eapply tail_nolabel_trans; TailNoLabel.
 Qed.
-Hint Resolve indexed_memory_access_label.
+Hint Resolve indexed_memory_access_label: labels.
 
 Remark loadind_label:
   forall base ofs ty dst k c, loadind base ofs ty dst k = OK c -> tail_nolabel k c.
@@ -761,13 +761,15 @@ Opaque loadind.
   econstructor; eauto.
   instantiate (2 := tf); instantiate (1 := x).
   unfold nextinstr. rewrite Pregmap.gss.
-  rewrite set_res_other. rewrite undef_regs_other_2.
+  rewrite set_res_other. simpl. rewrite undef_regs_other_2.
+  rewrite Pregmap.gso by auto with asmgen.
   rewrite <- H1. simpl. econstructor; eauto.
   eapply code_tail_next_int; eauto.
   rewrite preg_notin_charact. intros. auto with asmgen.
   auto with asmgen.
   apply agree_nextinstr. eapply agree_set_res; auto.
-  eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto.
+  eapply agree_undef_regs; eauto.
+  intros. simpl. rewrite undef_regs_other_2; auto. apply Pregmap.gso. auto with asmgen.
   congruence.
 
 - (* Mgoto *)
diff --git a/arm/CBuiltins.ml b/arm/CBuiltins.ml
index d6a1ea35..6462a8c5 100644
--- a/arm/CBuiltins.ml
+++ b/arm/CBuiltins.ml
@@ -22,19 +22,6 @@ let builtins = {
     "__builtin_va_list", TPtr(TVoid [], [])
   ];
   builtin_functions = [
-    (* Integer arithmetic *)
-    "__builtin_clz",
-      (TInt(IInt, []), [TInt(IUInt, [])], false);
-    "__builtin_clzl",
-      (TInt(IInt, []), [TInt(IULong, [])], false);
-    "__builtin_clzll",
-      (TInt(IInt, []), [TInt(IULongLong, [])], false);
-    "__builtin_ctz",
-      (TInt(IInt, []), [TInt(IUInt, [])], false);
-    "__builtin_ctzl",
-      (TInt(IInt, []), [TInt(IULong, [])], false);
-    "__builtin_ctzll",
-      (TInt(IInt, []), [TInt(IULongLong, [])], false);
     (* Memory accesses *)
     "__builtin_read16_reversed",
       (TInt(IUShort, []), [TPtr(TInt(IUShort, [AConst]), [])], false);
diff --git a/arm/CSE2deps.v b/arm/CSE2deps.v
index d48dabf3..4592f408 100644
--- a/arm/CSE2deps.v
+++ b/arm/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/arm/CSE2depsproof.v b/arm/CSE2depsproof.v
index 28ef41ca..7dd0914e 100644
--- a/arm/CSE2depsproof.v
+++ b/arm/CSE2depsproof.v
@@ -105,6 +105,68 @@ Section MEMORY_WRITE.
   Qed.
   End INDEXED_AWAY.
 End MEMORY_WRITE.
+
+Section STACK_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Ainstack ofsw) nil = Some addrw.
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Ainstack ofsr) nil = Some addrr.
+
+  Lemma stack_load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    rewrite ptrofs_modulus in *.
+    simpl in *.
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct sp; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    
+    all: try rewrite ptrofs_modulus in *.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem stack_load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply stack_load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End STACK_WRITE.
 End SOUNDNESS.
 
 
@@ -125,7 +187,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+- (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -135,7 +197,15 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml
new file mode 120000
index 00000000..ee2674bf
--- /dev/null
+++ b/arm/ExpansionOracle.ml
@@ -0,0 +1 @@
+../aarch64/ExpansionOracle.ml
+\ No newline at end of file
diff --git a/arm/Machregsaux.ml b/arm/Machregsaux.ml
index 14c75155..24a33e9e 100644
--- a/arm/Machregsaux.ml
+++ b/arm/Machregsaux.ml
@@ -12,27 +12,7 @@
 
 (** Auxiliary functions on machine registers *)
 
-open Camlcoq
-open Machregs
-
-let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
-
-let _ =
-  List.iter
-    (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
-    Machregs.register_names
-
 let is_scratch_register s =  s = "R14"  || s = "r14"
-
-let name_of_register r =
-  try Some (Hashtbl.find register_names r) with Not_found -> None
-
-let register_by_name s =
-  Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
-
-let can_reserve_register r =
-  List.mem r Conventions1.int_callee_save_regs
-  || List.mem r Conventions1.float_callee_save_regs
                                               
 let class_of_type = function
   | AST.Tint | AST.Tlong -> 0
diff --git a/arm/Machregsaux.mli b/arm/Machregsaux.mli
index d7117c21..01b0f9fd 100644
--- a/arm/Machregsaux.mli
+++ b/arm/Machregsaux.mli
@@ -12,9 +12,6 @@
 
 (** Auxiliary functions on machine registers *)
 
-val name_of_register: Machregs.mreg -> string option
-val register_by_name: string -> Machregs.mreg option
 val is_scratch_register: string -> bool
-val can_reserve_register: Machregs.mreg -> bool
 
 val class_of_type: AST.typ -> int
diff --git a/arm/Op.v b/arm/Op.v
index 25e48ce1..ff5fe815 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -718,7 +718,7 @@ Definition is_trivial_op (op: operation) : bool :=
 
 (** Operations that depend on the memory state. *)
 
-Definition condition_depends_on_memory (c: condition) : bool :=
+Definition cond_depends_on_memory (c: condition) : bool :=
   match c with
   | Ccompu _ | Ccompushift _ _| Ccompuimm _ _ => true
   | _ => false
@@ -726,14 +726,14 @@ Definition condition_depends_on_memory (c: condition) : bool :=
 
 Definition op_depends_on_memory (op: operation) : bool :=
   match op with
-  | Ocmp c => condition_depends_on_memory c
-  | Osel c ty => condition_depends_on_memory c
+  | Ocmp c => cond_depends_on_memory c
+  | Osel c ty => cond_depends_on_memory c
   | _ => false
   end.
 
-Lemma condition_depends_on_memory_correct:
+Lemma cond_depends_on_memory_correct:
   forall c args m1 m2,
-  condition_depends_on_memory c = false ->
+  cond_depends_on_memory c = false ->
   eval_condition c args m1 = eval_condition c args m2.
 Proof.
   intros. destruct c; simpl; auto; discriminate.
@@ -745,12 +745,36 @@ Lemma op_depends_on_memory_correct:
   eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
 Proof.
   intros until m2. destruct op; simpl; try congruence; intros C.
-- f_equal; f_equal; apply condition_depends_on_memory_correct; auto.
+- f_equal; f_equal; apply cond_depends_on_memory_correct; auto.
 - destruct args; auto. destruct args; auto.
-  rewrite (condition_depends_on_memory_correct c args m1 m2 C).
+  rewrite (cond_depends_on_memory_correct c args m1 m2 C).
   auto.
 Qed.
 
+Lemma cond_valid_pointer_eq:
+  forall cond args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_condition cond args m1 = eval_condition cond args m2.
+Proof.
+  intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence.
+  all: repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op eqn:OP; simpl; try congruence.
+  - intros MEM; destruct c; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intro MEM; destruct c; simpl; try congruence;
+      repeat (destruct args; simpl; try congruence);
+      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_operation (op: operation) : list ident :=
diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml
new file mode 120000
index 00000000..9885fd52
--- /dev/null
+++ b/arm/PrepassSchedulingOracle.ml
@@ -0,0 +1 @@
+../x86/PrepassSchedulingOracle.ml
+\ No newline at end of file
diff --git a/arm/RTLpathSE_simplify.v b/arm/RTLpathSE_simplify.v
new file mode 120000
index 00000000..55bf0e52
--- /dev/null
+++ b/arm/RTLpathSE_simplify.v
@@ -0,0 +1 @@
+../aarch64/RTLpathSE_simplify.v
+\ No newline at end of file
diff --git a/backend/CSE3.v b/backend/CSE3.v
index df1c2bfc..746ba399 100644
--- a/backend/CSE3.v
+++ b/backend/CSE3.v
@@ -53,12 +53,34 @@ Definition forward_move_l_b (rb : RB.t) (xl : list reg) :=
 Definition subst_args fmap pc xl :=
   forward_move_l_b (PMap.get pc fmap) xl.
 
+Definition find_cond_in_fmap fmap pc cond args :=
+  if Compopts.optim_CSE3_conditions tt
+  then
+    match PMap.get pc fmap with
+    | Some rel =>
+      if is_condition_present (ctx:=ctx) pc rel cond args
+      then Some true
+      else
+        let ncond := negate_condition cond in
+        if is_condition_present (ctx:=ctx) pc rel ncond args
+        then Some false
+        else let args' := subst_args fmap pc args in
+             if is_condition_present (ctx:=ctx) pc rel cond args'
+             then Some true
+             else if is_condition_present (ctx:=ctx) pc rel ncond args'
+                  then Some false
+                  else None
+    | None => None
+    end
+  else None.
+
 Definition transf_instr (fmap : PMap.t RB.t)
            (pc: node) (instr: instruction) :=
   match instr with
   | Iop op args dst s =>
     let args' := subst_args fmap pc args in
-    match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with
+    match (if (negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)
+           then None else find_op_in_fmap fmap pc op args') with
     | None => Iop op args' dst s
     | Some src => Iop Omove (src::nil) dst s
     end
@@ -75,7 +97,11 @@ Definition transf_instr (fmap : PMap.t RB.t)
   | Itailcall sig ros args =>
     Itailcall sig ros (subst_args fmap pc args)
   | Icond cond args s1 s2 expected =>
-    Icond cond (subst_args fmap pc args) s1 s2 expected
+    let args' := subst_args fmap pc args in
+    match find_cond_in_fmap fmap pc cond args with
+    | None => Icond cond args' s1 s2 expected
+    | Some b => Inop (if b then s1 else s2)
+    end
   | Ijumptable arg tbl =>
     Ijumptable (subst_arg fmap pc arg) tbl
   | Ireturn (Some arg) =>
diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v
index ade79c28..75e00f67 100644
--- a/backend/CSE3analysis.v
+++ b/backend/CSE3analysis.v
@@ -145,18 +145,17 @@ Proof.
   exact peq.
 Defined.
 
-Record equation :=
-  mkequation
-    { eq_lhs : reg;
-      eq_op : sym_op;
-      eq_args : list reg }.
+Inductive equation_or_condition :=
+| Equ : reg -> sym_op -> list reg -> equation_or_condition
+| Cond : condition -> list reg -> equation_or_condition.
 
 Definition eq_dec_equation :
-  forall eq eq' : equation, {eq = eq'} + {eq <> eq'}.
+  forall eq eq' : equation_or_condition, {eq = eq'} + {eq <> eq'}.
 Proof.
   generalize peq.
   generalize eq_dec_sym_op.
   generalize eq_dec_args.
+  generalize eq_condition.
   decide equality.
 Defined.
 
@@ -168,22 +167,42 @@ Definition add_i_j (i : reg) (j : eq_id) (m : Regmap.t PSet.t) :=
 Definition add_ilist_j (ilist : list reg) (j : eq_id) (m : Regmap.t PSet.t) :=
   List.fold_left (fun already i => add_i_j i j already) ilist m.
 
-Definition get_reg_kills (eqs : PTree.t equation) :
+Definition get_reg_kills (eqs : PTree.t equation_or_condition) :
   Regmap.t PSet.t :=
-  PTree.fold (fun already (eqno : eq_id) (eq : equation) =>
-                add_i_j (eq_lhs eq) eqno
-                        (add_ilist_j (eq_args eq) eqno already)) eqs
+  PTree.fold (fun already (eqno : eq_id) (eq_cond : equation_or_condition) =>
+                match eq_cond with
+                | Equ lhs sop args =>
+                  add_i_j lhs eqno
+                          (add_ilist_j args eqno already)
+                | Cond cond args => add_ilist_j args eqno already
+                end) eqs
              (PMap.init PSet.empty).
 
-Definition eq_depends_on_mem eq :=
-  match eq_op eq with
-  | SLoad _ _ => true
-  | SOp op => op_depends_on_memory op
+Definition eq_cond_depends_on_mem eq_cond :=
+  match eq_cond with
+  | Equ lhs sop args =>
+    match sop with
+    | SLoad _ _ => true
+    | SOp op => op_depends_on_memory op
+    end
+  | Cond cond args => cond_depends_on_memory cond
+  end.
+
+Definition eq_cond_depends_on_store eq_cond :=
+  match eq_cond with
+  | Equ _ (SLoad _ _) _ => true
+  | _ => false
   end.
 
-Definition get_mem_kills (eqs : PTree.t equation) : PSet.t :=
-  PTree.fold (fun already (eqno : eq_id) (eq : equation) =>
-                if eq_depends_on_mem eq
+Definition get_mem_kills (eqs : PTree.t equation_or_condition) : PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq : equation_or_condition) =>
+                if eq_cond_depends_on_mem eq
+                then PSet.add eqno already
+                else already) eqs PSet.empty.
+
+Definition get_store_kills (eqs : PTree.t equation_or_condition) : PSet.t :=
+  PTree.fold (fun already (eqno : eq_id) (eq : equation_or_condition) =>
+                if eq_cond_depends_on_store eq
                 then PSet.add eqno already
                 else already) eqs PSet.empty.
 
@@ -203,20 +222,25 @@ Proof.
   - right; congruence.
 Qed.
 
-Definition get_moves (eqs : PTree.t equation) :
+Definition get_moves (eqs : PTree.t equation_or_condition) :
   Regmap.t PSet.t :=
-  PTree.fold (fun already (eqno : eq_id) (eq : equation) =>
-                if is_smove (eq_op eq)
-                then add_i_j (eq_lhs eq) eqno already
-                else already) eqs (PMap.init PSet.empty).
+  PTree.fold (fun already (eqno : eq_id) (eq : equation_or_condition) =>
+                match eq with
+                | Equ lhs sop args =>
+                  if is_smove sop
+                  then add_i_j lhs eqno already
+                  else already
+                | _ => already
+                end) eqs (PMap.init PSet.empty).
   
 Record eq_context := mkeqcontext
-                       { eq_catalog : eq_id -> option equation;
-                         eq_find_oracle : node -> equation -> option eq_id;
-                         eq_rhs_oracle : node -> sym_op -> list reg -> PSet.t;
-                         eq_kill_reg : reg -> PSet.t;
-                         eq_kill_mem : unit -> PSet.t;
-                         eq_moves : reg -> PSet.t }.
+              { eq_catalog : eq_id -> option equation_or_condition;
+                eq_find_oracle : node -> equation_or_condition -> option eq_id;
+                eq_rhs_oracle : node -> sym_op -> list reg -> PSet.t;
+                eq_kill_reg : reg -> PSet.t;
+                eq_kill_mem : unit -> PSet.t;
+                eq_kill_store : unit -> PSet.t;
+                eq_moves : reg -> PSet.t }.
 
 Section OPERATIONS.
   Context {ctx : eq_context}.
@@ -238,10 +262,10 @@ Section OPERATIONS.
     | None => x
     | Some eqno =>
       match eq_catalog ctx eqno with
-      | Some eq =>
-        if is_smove (eq_op eq) && peq x (eq_lhs eq)
+      | Some (Equ lhs sop args) =>
+        if is_smove sop && peq x lhs
         then
-          match eq_args eq with
+          match args with
           | src::nil => src
           | _ => x
           end
@@ -256,7 +280,7 @@ Section OPERATIONS.
   Section PER_NODE.
     Variable no : node.
     
-  Definition eq_find  (eq : equation) :=
+  Definition eq_find  (eq : equation_or_condition) :=
     match eq_find_oracle ctx no eq with
     | Some id =>
       match eq_catalog ctx id with
@@ -266,28 +290,34 @@ Section OPERATIONS.
     | None => None
     end.
 
+  Definition is_condition_present
+           (rel : RELATION.t) (cond : condition) (args : list reg) :=
+    match eq_find (Cond cond args) with
+    | Some id => PSet.contains rel id
+    | None => false
+    end.
 
   Definition rhs_find (sop : sym_op) (args : list reg) (rel : RELATION.t) : option reg :=
     match pick_source (PSet.elements (PSet.inter (eq_rhs_oracle ctx no sop args) rel)) with
     | None => None
     | Some src =>
       match eq_catalog ctx src with
-      | None => None
-      | Some eq =>
-        if eq_dec_sym_op sop (eq_op eq) && eq_dec_args args (eq_args eq)
-        then Some (eq_lhs eq)
+      | Some (Equ eq_lhs eq_sop eq_args) =>
+        if eq_dec_sym_op sop eq_sop && eq_dec_args args eq_args
+        then Some eq_lhs
         else None
+      | _ => None
       end
     end.
 
   Definition oper2 (dst : reg) (op: sym_op)(args : list reg)
            (rel : RELATION.t) : RELATION.t :=
-    let rel' := kill_reg dst rel in
-    match eq_find {| eq_lhs := dst;
-                     eq_op  := op;
-                     eq_args:= args |} with
-    | Some id => PSet.add id rel'
-    | None => rel'
+    match eq_find (Equ dst op args) with
+    | Some id =>
+      if PSet.contains rel id
+      then rel
+      else PSet.add id (kill_reg dst rel)
+    | None => kill_reg dst rel
     end.
 
   Definition oper1 (dst : reg) (op: sym_op) (args : list reg)
@@ -298,13 +328,20 @@ Section OPERATIONS.
 
   
   Definition move (src dst : reg) (rel : RELATION.t) : RELATION.t :=
-    match eq_find {| eq_lhs := dst;
-                     eq_op  := SOp Omove;
-                     eq_args:= src::nil |} with
-    | Some eq_id => PSet.add eq_id (kill_reg dst rel)
-    | None => kill_reg dst rel
-    end.
+    if peq src dst
+    then rel
+    else
+      match eq_find (Equ dst (SOp Omove) (src::nil)) with
+      | Some eq_id => PSet.add eq_id (kill_reg dst rel)
+      | None => kill_reg dst rel
+      end.
 
+  Definition is_trivial_sym_op sop :=
+    match sop with
+    | SOp op => is_trivial_op op
+    | SLoad _ _ => false
+    end.
+  
   Definition oper (dst : reg) (op: sym_op) (args : list reg)
              (rel : RELATION.t) : RELATION.t :=
     if is_smove op
@@ -316,15 +353,24 @@ Section OPERATIONS.
       end
     else
       let args' := forward_move_l rel args in
-      match rhs_find op args' rel with
+      match rhs_find op args rel with
       | Some r =>
         if Compopts.optim_CSE3_glb tt
         then RELATION.glb (move r dst rel)
-                          (oper1 dst op args' rel)
-        else oper1 dst op args' rel
-      | None => oper1 dst op args' rel
+                          (RELATION.glb
+                             (oper1 dst op args rel)
+                             (oper1 dst op args' rel))
+        else RELATION.glb
+               (oper1 dst op args rel)
+               (oper1 dst op args' rel)
+      | None => RELATION.glb
+                  (oper1 dst op args rel)
+                  (oper1 dst op args' rel)
       end.
   
+  Definition kill_store (rel : RELATION.t) : RELATION.t :=
+    PSet.subtract rel (eq_kill_store ctx tt).
+
   Definition clever_kill_store
              (chunk : memory_chunk) (addr: addressing) (args : list reg)
              (src : reg)
@@ -333,15 +379,15 @@ Section OPERATIONS.
       (PSet.filter
          (fun eqno =>
             match eq_catalog ctx eqno with
-            | None => false
-            | Some eq =>
-              match eq_op eq with
+            | Some (Equ eq_lhs eq_sop eq_args) =>
+              match eq_sop with
               | SOp op => true
               | SLoad chunk' addr' =>
-                may_overlap chunk addr args chunk' addr' (eq_args eq)
+                may_overlap chunk addr args chunk' addr' eq_args
               end
+            | _ => false
             end)
-         (PSet.inter rel (eq_kill_mem ctx tt))).
+         (PSet.inter rel (eq_kill_store ctx tt))).
 
   Definition store2
              (chunk : memory_chunk) (addr: addressing) (args : list reg)
@@ -349,7 +395,7 @@ Section OPERATIONS.
              (rel : RELATION.t) : RELATION.t :=
     if Compopts.optim_CSE3_alias_analysis tt
     then clever_kill_store chunk addr args src rel
-    else kill_mem rel.
+    else kill_store rel.
 
   Definition store1
              (chunk : memory_chunk) (addr: addressing) (args : list reg)
@@ -358,19 +404,27 @@ Section OPERATIONS.
     let rel' := store2 chunk addr args src rel in
     if loadv_storev_compatible_type chunk ty
     then
-      match eq_find {| eq_lhs := src;
-                       eq_op  := SLoad chunk addr;
-                       eq_args:= args |} with
+      match eq_find (Equ src (SLoad chunk addr) args) with
       | Some id => PSet.add id rel'
       | None => rel'
       end
     else rel'.
     
-  Definition store
+  Definition store (tenv : typing_env)
              (chunk : memory_chunk) (addr: addressing) (args : list reg)
-             (src : reg) (ty: typ)
+             (src : reg)
              (rel : RELATION.t) : RELATION.t :=
-    store1 chunk addr (forward_move_l rel args) (forward_move rel src) ty rel.
+    let args' := forward_move_l rel args in
+    let src' := forward_move rel src in
+    let tsrc := tenv src in
+    let tsrc' := tenv src' in
+    RELATION.glb
+      (RELATION.glb
+         (store1 chunk addr args src tsrc rel)
+         (store1 chunk addr args' src tsrc rel))
+      (RELATION.glb
+         (store1 chunk addr args src' tsrc' rel)
+         (store1 chunk addr args' src' tsrc' rel)).
 
   Definition kill_builtin_res res rel :=
     match res with
@@ -407,28 +461,55 @@ Section OPERATIONS.
     | _ => rel
     end.
 
-  Definition apply_instr (tenv : typing_env) (instr : RTL.instruction) (rel : RELATION.t) : RB.t :=
+  Definition apply_cond1 cond args (rel : RELATION.t) : RB.t :=
+    match eq_find (Cond (negate_condition cond) args) with
+    | Some eq_id =>
+      if PSet.contains rel eq_id
+      then RB.bot
+      else Some rel
+    | None => Some rel
+    end.
+
+  Definition apply_cond0 cond args (rel : RELATION.t) : RELATION.t :=
+    match eq_find (Cond cond args) with
+    | Some eq_id => PSet.add eq_id rel
+    | None => rel
+    end.
+
+  Definition apply_cond cond args (rel : RELATION.t) : RB.t :=
+    if Compopts.optim_CSE3_conditions tt
+    then 
+      match apply_cond1 cond args rel with
+      | Some rel => Some (apply_cond0 cond args rel)
+      | None => RB.bot
+      end
+    else Some rel.
+      
+  Definition apply_instr (tenv : typing_env) (instr : RTL.instruction) (rel : RELATION.t) : list (node * RB.t) :=
   match instr with
-  | Inop _
-  | Icond _ _ _ _ _
-  | Ijumptable _ _ => Some rel
-  | Istore chunk addr args src _ =>
-    Some (store chunk addr args src (tenv (forward_move rel src)) rel)
-  | Iop op args dst _ => Some (oper dst (SOp op) args rel)
-  | Iload trap chunk addr args dst _ => Some (oper dst (SLoad chunk addr) args rel)
-  | Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel))
-  | Ibuiltin ef _ res _ => Some (kill_builtin_res res (apply_external_call ef rel))
-  | Itailcall _ _ _ | Ireturn _ => RB.bot
+  | Inop pc' => (pc', (Some rel))::nil
+  | Icond cond args ifso ifnot _ =>
+    (ifso,  (apply_cond cond args rel))::
+    (ifnot, (apply_cond (negate_condition cond) args rel))::nil
+  | Ijumptable _ targets => List.map (fun pc' => (pc', (Some rel))) targets
+  | Istore chunk addr args src pc' =>
+    (pc', (Some (store tenv chunk addr args src rel)))::nil
+  | Iop op args dst pc' => (pc', (Some (oper dst (SOp op) args rel)))::nil
+  | Iload trap chunk addr args dst pc' => (pc', (Some (oper dst (SLoad chunk addr) args rel)))::nil
+  | Icall _ _ _ dst pc' => (pc', (Some (kill_reg dst (kill_mem rel))))::nil
+  | Ibuiltin ef _ res pc' => (pc', (Some (kill_builtin_res res (apply_external_call ef rel))))::nil
+  | Itailcall _ _ _ | Ireturn _ => nil
   end.
   End PER_NODE.
 
-Definition apply_instr' (tenv : typing_env) code (pc : node) (ro : RB.t) : RB.t :=
-  match ro with
-  | None => None
-  | Some x =>
-    match code ! pc with
-    | None => RB.bot
-    | Some instr => apply_instr pc tenv instr x
+Definition apply_instr' (tenv : typing_env) code (pc : node) (ro : RB.t) :
+  list (node * RB.t)  :=
+  match code ! pc with
+  | None => nil
+  | Some instr => 
+    match ro with
+    | None => List.map (fun pc' => (pc', RB.bot)) (successors_instr instr)
+    | Some x => apply_instr pc tenv instr x
     end
   end.
 
@@ -450,30 +531,32 @@ Definition check_inductiveness (fn : RTL.function) (tenv: typing_env) (inv: inva
          match PMap.get pc inv with
          | None => true
          | Some rel =>
-           let rel' := apply_instr pc tenv instr rel in
            List.forallb
-             (fun pc' => relb_leb rel' (PMap.get pc' inv))
-             (RTL.successors_instr instr)
+             (fun szz =>
+                relb_leb (snd szz) (PMap.get (fst szz) inv))
+             (apply_instr pc tenv instr rel)
          end).
+(* No longer used. Incompatible with transfer functions that yield a different result depending on the successor.
 
 Definition internal_analysis
   (tenv : typing_env)
   (f : RTL.function) : option invariants := DS.fixpoint
   (RTL.fn_code f) RTL.successors_instr
   (apply_instr' tenv (RTL.fn_code f)) (RTL.fn_entrypoint f) (Some RELATION.top).
-
+*)
 End OPERATIONS.
 
 Record analysis_hints :=
   mkanalysis_hints
-    { hint_eq_catalog :  PTree.t equation;
-      hint_eq_find_oracle : node -> equation -> option eq_id;
+    { hint_eq_catalog :  PTree.t equation_or_condition;
+      hint_eq_find_oracle : node -> equation_or_condition -> option eq_id;
       hint_eq_rhs_oracle : node -> sym_op -> list reg -> PSet.t }.
 
 Definition context_from_hints (hints : analysis_hints) :=
   let eqs := hint_eq_catalog hints in
   let reg_kills := get_reg_kills eqs in 
   let mem_kills := get_mem_kills eqs in
+  let store_kills := get_store_kills eqs in
   let moves := get_moves eqs in
   {|
     eq_catalog := fun eq_id => PTree.get eq_id eqs;
@@ -481,5 +564,6 @@ Definition context_from_hints (hints : analysis_hints) :=
     eq_rhs_oracle  := hint_eq_rhs_oracle hints;
     eq_kill_reg := fun reg => PMap.get reg reg_kills;
     eq_kill_mem := fun _ => mem_kills;
+    eq_kill_store := fun _ => store_kills;
     eq_moves    := fun reg => PMap.get reg moves
   |}.
diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml
index 3e4a6b9e..efe6b600 100644
--- a/backend/CSE3analysisaux.ml
+++ b/backend/CSE3analysisaux.ml
@@ -14,9 +14,17 @@ open CSE3analysis
 open Maps
 open HashedSet
 open Camlcoq
+open Coqlib
+   
+type flattened_equation_or_condition =
+  | Flat_equ of int * sym_op * int list
+  | Flat_cond of Op.condition * int list;;
 
-let flatten_eq eq =
-  ((P.to_int eq.eq_lhs), eq.eq_op, List.map P.to_int eq.eq_args);;
+let flatten_eq = function
+  | Equ(lhs, sop, args) ->
+     Flat_equ((P.to_int lhs), sop, (List.map P.to_int args))
+  | Cond(cond, args) ->
+     Flat_cond(cond, (List.map P.to_int args));;
 
 let imp_add_i_j s i j =
   s := PMap.set i (PSet.add j (PMap.get i !s)) !s;;
@@ -39,12 +47,15 @@ let print_reg channel i =
 let print_eq channel (lhs, sop, args) =
   match sop with
   | SOp op ->
-     Printf.printf "%a = %a\n" print_reg lhs (PrintOp.print_operation print_reg) (op, args)
+     Printf.printf "%a = %a" print_reg lhs (PrintOp.print_operation print_reg) (op, args)
   | SLoad(chunk, addr) ->
-     Printf.printf "%a = %s @ %a\n" print_reg lhs (string_of_chunk chunk)
+     Printf.printf "%a = %s @ %a" print_reg lhs (string_of_chunk chunk)
        (PrintOp.print_addressing print_reg) (addr, args);;
 
-let pp_set oc s =
+let print_cond channel (cond, args) =
+  Printf.printf "cond %a" (PrintOp.print_condition print_reg) (cond, args);;
+
+let pp_intset oc s =
   Printf.fprintf oc "{ ";
   List.iter (fun i -> Printf.fprintf oc "%d; " (P.to_int i)) (PSet.elements s);
   Printf.fprintf oc "}";;
@@ -57,9 +68,14 @@ let pp_rhs oc (sop, args) =
        (PrintAST.name_of_chunk chunk)
          (PrintOp.print_addressing PrintRTL.reg) (addr, args);;
 
-let pp_eq oc eq =
-  Printf.fprintf oc "x%d = %a" (P.to_int eq.eq_lhs)
-    pp_rhs (eq.eq_op, eq.eq_args);;
+let pp_eq oc eq_cond =
+  match eq_cond with
+  | Equ(lhs, sop, args) ->
+     Printf.fprintf oc "x%d = %a" (P.to_int lhs)
+       pp_rhs (sop, args)
+  | Cond(cond, args) ->
+     Printf.fprintf oc "cond %a"
+       (PrintOp.print_condition PrintRTL.reg) (cond, args);;
 
 let pp_P oc x = Printf.fprintf oc "%d" (P.to_int x)
               
@@ -67,6 +83,151 @@ let pp_option pp oc = function
   | None -> output_string oc "none"
   | Some x -> pp oc x;;
 
+let is_trivial = function
+  | Equ(lhs, (SOp Op.Omove), [lhs']) -> lhs=lhs'
+  | _ -> false;;
+
+let rec pp_list separator pp_item chan = function
+  | [] -> ()
+  | [h] -> pp_item chan h
+  | h::t ->
+     pp_item chan h;
+     output_string chan separator;
+     pp_list separator pp_item chan t;;
+
+let pp_set separator pp_item chan s =
+  pp_list separator pp_item chan (PSet.elements s);;
+
+let pp_equation hints chan x =
+  match PTree.get x hints.hint_eq_catalog with
+  | None -> output_string chan "???"
+  | Some eq ->
+     match eq with
+     | Equ(lhs, sop, args) ->
+        print_eq chan  (P.to_int lhs, sop, List.map P.to_int args)
+     | Cond(cond, args) ->
+        print_cond chan (cond, List.map P.to_int args);;
+
+let pp_relation hints chan rel =
+  pp_set "; " (pp_equation hints) chan rel;;
+
+let pp_relation_b hints chan = function
+  | None -> output_string chan "bot"
+  | Some rel -> pp_relation hints chan rel;;
+
+let pp_results f (invariants : RB.t PMap.t) hints chan =
+  let max_pc = P.to_int (RTL.max_pc_function f) in
+  for pc=max_pc downto 1
+  do
+    Printf.fprintf chan "%d: %a\n\n" pc
+      (pp_relation_b hints) (PMap.get (P.of_int pc) invariants)
+  done
+
+module IntSet=Set.Make(struct type t=int let compare = ( - ) end);;
+
+let rec union_list prev = function
+  | [] -> prev
+  | h::t -> union_list (RB.lub prev h) t;;
+
+let rb_glb (x : RB.t) (y : RB.t) : RB.t =
+  match x, y with
+  | None, _ | _, None -> None
+  | (Some x'), (Some y') -> Some (RELATION.glb x' y');;
+
+let compute_invariants
+      (nodes : RTL.node list)
+      (entrypoint : RTL.node)
+      (tfr : RTL.node -> RB.t -> (RTL.node * RB.t) list) =
+  let todo = ref IntSet.empty
+  and invariants = ref (PMap.set entrypoint (Some RELATION.top) (PMap.init RB.bot)) in  
+  let add_todo (pc : RTL.node) =
+    todo := IntSet.add (P.to_int pc) !todo in 
+  let update_node (pc : RTL.node) =
+    (if !Clflags.option_debug_compcert > 9
+     then Printf.printf "UP updating node %d\n" (P.to_int pc));
+    let cur = PMap.get pc !invariants in
+    List.iter (fun (next_pc, next_contrib) ->
+        let previous = PMap.get next_pc !invariants in
+        let next = RB.lub previous next_contrib in
+        if not (RB.beq previous next)
+        then (
+          invariants := PMap.set next_pc next !invariants;
+          add_todo next_pc)) (tfr pc cur) in
+  add_todo entrypoint;
+  while not (IntSet.is_empty !todo) do
+    let nxt = IntSet.max_elt !todo in
+    todo := IntSet.remove nxt !todo;
+    update_node (P.of_int nxt)
+  done;
+  !invariants;;
+
+let refine_invariants
+      (nodes : RTL.node list)
+      (entrypoint : RTL.node)
+      (successors : RTL.node -> RTL.node list)
+      (predecessors : RTL.node -> RTL.node list)
+      (tfr : RTL.node -> RB.t -> (RTL.node * RB.t) list)
+      (invariants0 : RB.t PMap.t) =
+  let todo = ref IntSet.empty
+  and invariants = ref invariants0 in  
+  let add_todo (pc : RTL.node) =
+    todo := IntSet.add (P.to_int pc) !todo in 
+  let update_node (pc : RTL.node) =
+    (if !Clflags.option_debug_compcert > 9
+     then Printf.printf "DOWN updating node %d\n" (P.to_int pc));
+    if not (peq pc entrypoint)
+    then
+      let cur = PMap.get pc !invariants in
+      let nxt = union_list RB.bot
+                  (List.map
+                     (fun pred_pc->
+                       rb_glb cur
+                         (List.assoc pc (tfr pred_pc (PMap.get pred_pc !invariants))))
+                     (predecessors pc)) in
+      if not (RB.beq cur nxt)
+      then
+        begin
+          (if !Clflags.option_debug_compcert > 4
+           then Printf.printf "refining CSE3 node %d\n" (P.to_int pc));
+          List.iter add_todo (successors pc)
+        end in
+  (List.iter add_todo nodes);
+  while not (IntSet.is_empty !todo) do
+    let nxt = IntSet.max_elt !todo in
+    todo := IntSet.remove nxt !todo;
+    update_node (P.of_int nxt)
+  done;
+  !invariants;;
+
+let get_default default x ptree =
+  match PTree.get x ptree with
+  | None -> default
+  | Some y -> y;;
+
+let initial_analysis ctx tenv (f : RTL.coq_function) =
+  let tfr = apply_instr' ctx tenv f.RTL.fn_code in
+  compute_invariants
+    (List.map fst (PTree.elements f.RTL.fn_code))
+    f.RTL.fn_entrypoint tfr;;
+
+let refine_analysis ctx tenv
+      (f : RTL.coq_function) (invariants0 : RB.t PMap.t) =
+  let succ_map = RTL.successors_map f in
+  let succ_f x = get_default [] x succ_map in
+  let pred_map = Kildall.make_predecessors f.RTL.fn_code RTL.successors_instr in
+  let pred_f x = get_default [] x pred_map in
+  let tfr = apply_instr' ctx tenv f.RTL.fn_code in
+  refine_invariants
+    (List.map fst (PTree.elements f.RTL.fn_code))
+    f.RTL.fn_entrypoint succ_f pred_f tfr invariants0;;
+
+let add_to_set_in_table table key item =
+  Hashtbl.add table key
+    (PSet.add item
+       (match Hashtbl.find_opt table key with
+        | None -> PSet.empty
+        | Some s -> s));;
+  
 let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
   let cur_eq_id = ref 0
   and cur_catalog = ref PTree.empty
@@ -74,10 +235,13 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
   and rhs_table = Hashtbl.create 100
   and cur_kill_reg = ref (PMap.init PSet.empty)
   and cur_kill_mem = ref PSet.empty
+  and cur_kill_store = ref PSet.empty
   and cur_moves = ref (PMap.init PSet.empty) in
   let eq_find_oracle node eq =
+    assert (not (is_trivial eq));
     let o = Hashtbl.find_opt eq_table (flatten_eq eq) in
-    (if !Clflags.option_debug_compcert > 1
+    (* FIXME (if o = None then failwith "eq_find_oracle"); *)
+    (if !Clflags.option_debug_compcert > 5
      then Printf.printf "@%d: eq_find %a -> %a\n" (P.to_int node)
             pp_eq eq (pp_option pp_P) o);
     o
@@ -86,12 +250,12 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
       match Hashtbl.find_opt rhs_table (sop, List.map P.to_int args) with
       | None -> PSet.empty
       | Some s -> s in
-    (if !Clflags.option_debug_compcert > 1
+    (if !Clflags.option_debug_compcert > 5
      then Printf.printf "@%d: rhs_find %a = %a\n"
-            (P.to_int node) pp_rhs (sop, args) pp_set o);
+            (P.to_int node) pp_rhs (sop, args) pp_intset o);
     o in
   let mutating_eq_find_oracle node eq : P.t option =
-    let (flat_eq_lhs, flat_eq_op, flat_eq_args) as flat_eq = flatten_eq eq in
+    let flat_eq = flatten_eq eq in
     let o =
     match Hashtbl.find_opt eq_table flat_eq with
     | Some x ->
@@ -104,39 +268,52 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
        begin
          Hashtbl.add eq_table flat_eq coq_id;
          (cur_catalog := PTree.set coq_id eq !cur_catalog);
-         Hashtbl.add rhs_table (flat_eq_op, flat_eq_args)
-           (PSet.add coq_id
-              (match Hashtbl.find_opt rhs_table (flat_eq_op, flat_eq_args) with
-               | None -> PSet.empty
-               | Some s -> s));
-         List.iter
-           (fun reg -> imp_add_i_j cur_kill_reg reg coq_id)
-           (eq.eq_lhs :: eq.eq_args);
-         (if eq_depends_on_mem eq
+         (match flat_eq with
+          | Flat_equ(flat_eq_lhs, flat_eq_op, flat_eq_args) ->
+             add_to_set_in_table rhs_table
+               (flat_eq_op, flat_eq_args) coq_id
+          | Flat_cond(flat_eq_cond, flat_eq_args) -> ());
+         (match eq with
+          | Equ(lhs, sop, args) ->
+             List.iter
+               (fun reg -> imp_add_i_j cur_kill_reg reg coq_id)
+               (lhs :: args);
+             (match sop, args with
+              | (SOp Op.Omove), [rhs] -> imp_add_i_j cur_moves lhs coq_id
+              | _, _ -> ())
+          | Cond(cond, args) ->
+             List.iter
+               (fun reg -> imp_add_i_j cur_kill_reg reg coq_id) args
+         );
+         (if eq_cond_depends_on_mem eq
           then cur_kill_mem := PSet.add coq_id !cur_kill_mem);
-         (match eq.eq_op, eq.eq_args with
-          | (SOp Op.Omove), [rhs] -> imp_add_i_j cur_moves eq.eq_lhs coq_id
-          | _, _ -> ());
+         (if eq_cond_depends_on_store eq
+          then cur_kill_store := PSet.add coq_id !cur_kill_store);
          Some coq_id
        end
     in
-    (if !Clflags.option_debug_compcert > 1
+    (if !Clflags.option_debug_compcert > 5
      then Printf.printf "@%d: mutating_eq_find %a -> %a\n" (P.to_int node)
       pp_eq eq (pp_option pp_P) o);    
     o
   in
-  match
-    internal_analysis
-      { eq_catalog     = (fun eq_id -> PTree.get eq_id !cur_catalog);
-        eq_find_oracle = mutating_eq_find_oracle;
-        eq_rhs_oracle  = rhs_find_oracle ;
-        eq_kill_reg    = (fun reg -> PMap.get reg !cur_kill_reg);
-        eq_kill_mem    = (fun () -> !cur_kill_mem);
-        eq_moves       = (fun reg -> PMap.get reg !cur_moves)
-      } tenv f
-  with None -> failwith "CSE3analysisaux analysis failed, try re-running with -fno-cse3"
-     | Some invariants ->
-        invariants,
-        { hint_eq_catalog    = !cur_catalog;
-          hint_eq_find_oracle= eq_find_oracle;
-          hint_eq_rhs_oracle = rhs_find_oracle };;
+  let ctx = { eq_catalog     = (fun eq_id -> PTree.get eq_id !cur_catalog);
+              eq_find_oracle = mutating_eq_find_oracle;
+              eq_rhs_oracle  = rhs_find_oracle ;
+              eq_kill_reg    = (fun reg -> PMap.get reg !cur_kill_reg);
+              eq_kill_mem    = (fun () -> !cur_kill_mem);
+              eq_kill_store  = (fun () -> !cur_kill_store);
+              eq_moves       = (fun reg -> PMap.get reg !cur_moves)
+            } in
+  let invariants = initial_analysis ctx tenv f in
+  let invariants' =
+    if ! Clflags.option_fcse3_refine
+    then refine_analysis ctx tenv f invariants
+    else invariants
+  and hints = { hint_eq_catalog    = !cur_catalog;
+                hint_eq_find_oracle= eq_find_oracle;
+                hint_eq_rhs_oracle = rhs_find_oracle } in
+  (if !Clflags.option_debug_compcert > 1
+   then pp_results f invariants' hints stdout);
+  invariants', hints
+;;
diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v
index f4e3672d..d53cf604 100644
--- a/backend/CSE3analysisproof.v
+++ b/backend/CSE3analysisproof.v
@@ -127,17 +127,35 @@ Proof.
 Qed.
 Hint Resolve add_ilist_j_adds: cse3.
 
-Definition xlget_kills (eqs : list (eq_id * equation)) (m :  Regmap.t PSet.t) :
+Definition xlget_kills (eqs : list (eq_id * equation_or_condition))
+                       (m :  Regmap.t PSet.t) :
   Regmap.t PSet.t :=
-  List.fold_left (fun already (item : eq_id * equation) =>
-    add_i_j (eq_lhs (snd item)) (fst item)
-            (add_ilist_j (eq_args (snd item)) (fst item) already)) eqs m.
-
+  List.fold_left (fun already (item : eq_id * equation_or_condition) =>
+    match snd item with
+    | Equ lhs sop args =>
+      add_i_j lhs (fst item)
+              (add_ilist_j args (fst item) already)
+    | Cond cond args => add_ilist_j args (fst item) already
+    end) eqs m.
+
+Definition xlget_mem_kills (eqs : list (positive * equation_or_condition))
+           (m : PSet.t) : PSet.t :=
+(fold_left
+       (fun (a : PSet.t) (item : positive * equation_or_condition) =>
+          if eq_cond_depends_on_mem (snd item)
+          then PSet.add (fst item) a
+          else a
+       )
+       eqs m).
 
-Definition xlget_mem_kills (eqs : list (positive * equation)) (m : PSet.t) : PSet.t :=
+Definition xlget_store_kills (eqs : list (positive * equation_or_condition))
+           (m : PSet.t) : PSet.t :=
 (fold_left
-       (fun (a : PSet.t) (p : positive * equation) =>
-        if eq_depends_on_mem (snd p) then PSet.add (fst p) a else a)
+       (fun (a : PSet.t) (item : positive * equation_or_condition) =>
+          if eq_cond_depends_on_store (snd item)
+          then PSet.add (fst item) a
+          else a
+       )
        eqs m).
 
 Lemma xlget_kills_monotone :
@@ -147,7 +165,8 @@ Lemma xlget_kills_monotone :
 Proof.
   induction eqs; simpl; trivial.
   intros.
-  auto with cse3.
+  destruct a as [id eq_cond]; cbn.
+  destruct eq_cond as [eq_lhs eq_sop eq_args | eq_cond eq_args]; auto with cse3.
 Qed.
 
 Hint Resolve xlget_kills_monotone : cse3.
@@ -159,9 +178,10 @@ Lemma xlget_mem_kills_monotone :
 Proof.
   induction eqs; simpl; trivial.
   intros.
-  destruct eq_depends_on_mem.
+  destruct a as [id eq_cond]; cbn.
+  destruct eq_cond_depends_on_mem.
   - apply IHeqs.
-    destruct (peq (fst a) j).
+    destruct (peq id j).
     + subst j. apply PSet.gadds.
     + rewrite PSet.gaddo by congruence.
       trivial.
@@ -170,11 +190,28 @@ Qed.
 
 Hint Resolve xlget_mem_kills_monotone : cse3.
 
+Lemma xlget_store_kills_monotone :
+  forall eqs m j,
+    PSet.contains m j = true ->
+    PSet.contains (xlget_store_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl; trivial.
+  intros.
+  destruct a as [id eq_cond]; cbn.
+  destruct eq_cond_depends_on_store.
+  - apply IHeqs.
+    destruct (peq id j).
+    + subst j. apply PSet.gadds.
+    + rewrite PSet.gaddo by congruence.
+      trivial.
+  - auto.
+Qed.
+
+Hint Resolve xlget_store_kills_monotone : cse3.
+
 Lemma xlget_kills_has_lhs :
   forall eqs m lhs sop args j,
-    In (j, {| eq_lhs := lhs;
-              eq_op  := sop;
-              eq_args:= args |}) eqs ->
+    In (j, (Equ lhs sop args))  eqs ->
     PSet.contains (Regmap.get lhs (xlget_kills eqs m)) j = true.
 Proof.
   induction eqs; simpl.
@@ -189,9 +226,7 @@ Hint Resolve xlget_kills_has_lhs : cse3.
 
 Lemma xlget_kills_has_arg :
   forall eqs m lhs sop arg args j,
-    In (j, {| eq_lhs := lhs;
-              eq_op  := sop;
-              eq_args:= args |}) eqs ->
+    In (j, (Equ lhs sop args)) eqs ->
     In arg args ->
     PSet.contains (Regmap.get arg (xlget_kills eqs m)) j = true.
 Proof.
@@ -206,20 +241,38 @@ Qed.
 
 Hint Resolve xlget_kills_has_arg : cse3.
 
+Lemma xlget_cond_kills_has_arg :
+  forall eqs m cond arg args j,
+    In (j, (Cond cond args)) eqs ->
+    In arg args ->
+    PSet.contains (Regmap.get arg (xlget_kills eqs m)) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros until j.
+  intros HEAD_TAIL ARG.
+  destruct HEAD_TAIL as [HEAD | TAIL]; subst; simpl.
+  - auto with cse3.
+  - eapply IHeqs; eassumption.
+Qed.
+
+Hint Resolve xlget_cond_kills_has_arg : cse3.
+
 Lemma get_kills_has_lhs :
   forall eqs lhs sop args j,
-    PTree.get j eqs = Some {| eq_lhs := lhs;
-                              eq_op  := sop;
-                              eq_args:= args |} ->
+    PTree.get j eqs = Some (Equ lhs sop args) ->
     PSet.contains (Regmap.get lhs (get_reg_kills eqs)) j = true.
 Proof.
   unfold get_reg_kills.
   intros.
   rewrite PTree.fold_spec.
   change (fold_left
-       (fun (a : Regmap.t PSet.t) (p : positive * equation) =>
-        add_i_j (eq_lhs (snd p)) (fst p)
-          (add_ilist_j (eq_args (snd p)) (fst p) a))) with xlget_kills.
+       (fun (a : Regmap.t PSet.t) (p : positive * equation_or_condition) =>
+        match snd p with
+        | Equ lhs0 _ args0 =>
+          add_i_j lhs0 (fst p) (add_ilist_j args0 (fst p) a)
+        | Cond _ args0 => add_ilist_j args0 (fst p) a
+        end))  with xlget_kills.
   eapply xlget_kills_has_lhs.
   apply PTree.elements_correct.
   eassumption.
@@ -229,9 +282,7 @@ Hint Resolve get_kills_has_lhs : cse3.
 
 Lemma context_from_hints_get_kills_has_lhs :
   forall hints lhs sop args j,
-    PTree.get j (hint_eq_catalog hints) = Some {| eq_lhs := lhs;
-                              eq_op  := sop;
-                              eq_args:= args |} ->
+    PTree.get j (hint_eq_catalog hints) = Some (Equ lhs sop args) ->
     PSet.contains  (eq_kill_reg (context_from_hints hints) lhs) j = true.
 Proof.
   intros; simpl.
@@ -243,9 +294,7 @@ Hint Resolve context_from_hints_get_kills_has_lhs : cse3.
 
 Lemma get_kills_has_arg :
   forall eqs lhs sop arg args j,
-    PTree.get j eqs = Some {| eq_lhs := lhs;
-                              eq_op  := sop;
-                              eq_args:= args |} ->
+    PTree.get j eqs = Some (Equ lhs sop args) ->
     In arg args ->
     PSet.contains (Regmap.get arg (get_reg_kills eqs)) j = true.
 Proof.
@@ -253,9 +302,12 @@ Proof.
   intros.
   rewrite PTree.fold_spec.
   change (fold_left
-       (fun (a : Regmap.t PSet.t) (p : positive * equation) =>
-        add_i_j (eq_lhs (snd p)) (fst p)
-          (add_ilist_j (eq_args (snd p)) (fst p) a))) with xlget_kills.
+       (fun (a : Regmap.t PSet.t) (p : positive * equation_or_condition) =>
+        match snd p with
+        | Equ lhs0 _ args0 =>
+          add_i_j lhs0 (fst p) (add_ilist_j args0 (fst p) a)
+        | Cond _ args0 => add_ilist_j args0 (fst p) a
+        end)) with xlget_kills.
   eapply xlget_kills_has_arg.
   - apply PTree.elements_correct.
     eassumption.
@@ -266,9 +318,7 @@ Hint Resolve get_kills_has_arg : cse3.
 
 Lemma context_from_hints_get_kills_has_arg :
   forall hints lhs sop arg args j,
-    PTree.get j (hint_eq_catalog hints) = Some {| eq_lhs := lhs;
-                              eq_op  := sop;
-                              eq_args:= args |} ->
+    PTree.get j (hint_eq_catalog hints) = Some (Equ lhs sop args) ->
     In arg args ->
     PSet.contains (eq_kill_reg (context_from_hints hints) arg) j = true.
 Proof.
@@ -279,10 +329,47 @@ Qed.
 
 Hint Resolve context_from_hints_get_kills_has_arg : cse3.
 
+Lemma get_cond_kills_has_arg :
+  forall eqs cond arg args j,
+    PTree.get j eqs = Some (Cond cond args) ->
+    In arg args ->
+    PSet.contains (Regmap.get arg (get_reg_kills eqs)) j = true.
+Proof.
+  unfold get_reg_kills.
+  intros.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : Regmap.t PSet.t) (p : positive * equation_or_condition) =>
+        match snd p with
+        | Equ lhs0 _ args0 =>
+          add_i_j lhs0 (fst p) (add_ilist_j args0 (fst p) a)
+        | Cond _ args0 => add_ilist_j args0 (fst p) a
+        end)) with xlget_kills.
+  eapply xlget_cond_kills_has_arg.
+  - apply PTree.elements_correct.
+    eassumption.
+  - assumption.
+Qed.
+
+Hint Resolve get_cond_kills_has_arg : cse3.
+
+Lemma context_from_hints_get_cond_kills_has_arg :
+  forall hints cond arg args j,
+    PTree.get j (hint_eq_catalog hints) = Some (Cond cond args) ->
+    In arg args ->
+    PSet.contains (eq_kill_reg (context_from_hints hints) arg) j = true.
+Proof.
+  intros.
+  simpl.
+  eapply get_cond_kills_has_arg; eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_cond_kills_has_arg : cse3.
+
 Lemma xlget_kills_has_eq_depends_on_mem :
   forall eqs eq j m,
     In (j, eq) eqs ->
-    eq_depends_on_mem eq = true ->
+    eq_cond_depends_on_mem eq = true ->
     PSet.contains (xlget_mem_kills eqs m) j = true.
 Proof.
   induction eqs; simpl.
@@ -303,17 +390,16 @@ Hint Resolve xlget_kills_has_eq_depends_on_mem : cse3.
 Lemma get_kills_has_eq_depends_on_mem :
   forall eqs eq j,
     PTree.get j eqs = Some eq ->
-    eq_depends_on_mem eq = true ->
+    eq_cond_depends_on_mem eq = true ->
     PSet.contains (get_mem_kills eqs) j = true.
 Proof.
   intros.
   unfold get_mem_kills.
   rewrite PTree.fold_spec.
   change (fold_left
-       (fun (a : PSet.t) (p : positive * equation) =>
-        if eq_depends_on_mem (snd p) then PSet.add (fst p) a else a)
-       (PTree.elements eqs) PSet.empty)
-    with (xlget_mem_kills (PTree.elements eqs) PSet.empty).
+       (fun (a : PSet.t) (p : positive * equation_or_condition) =>
+          if eq_cond_depends_on_mem (snd p) then PSet.add (fst p) a else a))
+       with xlget_mem_kills.
   eapply xlget_kills_has_eq_depends_on_mem.
   apply PTree.elements_correct.
   eassumption.
@@ -323,7 +409,7 @@ Qed.
 Lemma context_from_hints_get_kills_has_eq_depends_on_mem :
   forall hints eq j,
     PTree.get j (hint_eq_catalog hints) = Some eq ->
-    eq_depends_on_mem eq = true ->
+    eq_cond_depends_on_mem eq = true ->
     PSet.contains (eq_kill_mem (context_from_hints hints) tt) j = true.
 Proof.
   intros.
@@ -333,8 +419,65 @@ Qed.
 
 Hint Resolve context_from_hints_get_kills_has_eq_depends_on_mem : cse3.
 
-Definition eq_involves (eq : equation) (i : reg) :=
-  i = (eq_lhs eq) \/ In i (eq_args eq).
+Lemma xlget_kills_has_eq_depends_on_store :
+  forall eqs eq j m,
+    In (j, eq) eqs ->
+    eq_cond_depends_on_store eq = true ->
+    PSet.contains (xlget_store_kills eqs m) j = true.
+Proof.
+  induction eqs; simpl.
+  contradiction.
+  intros.
+  destruct H.
+  { subst a.
+    simpl.
+    rewrite H0.
+    apply xlget_store_kills_monotone.
+    apply PSet.gadds.
+  }
+  eauto.
+Qed.
+
+Hint Resolve xlget_kills_has_eq_depends_on_store : cse3.
+
+Lemma get_kills_has_eq_depends_on_store :
+  forall eqs eq j,
+    PTree.get j eqs = Some eq ->
+    eq_cond_depends_on_store eq = true ->
+    PSet.contains (get_store_kills eqs) j = true.
+Proof.
+  intros.
+  unfold get_store_kills.
+  rewrite PTree.fold_spec.
+  change (fold_left
+       (fun (a : PSet.t) (p : positive * equation_or_condition) =>
+          if eq_cond_depends_on_store (snd p) then PSet.add (fst p) a else a))
+       with xlget_store_kills.
+  eapply xlget_kills_has_eq_depends_on_store.
+  apply PTree.elements_correct.
+  eassumption.
+  trivial.
+Qed.
+  
+Lemma context_from_hints_get_kills_has_eq_depends_on_store :
+  forall hints eq j,
+    PTree.get j (hint_eq_catalog hints) = Some eq ->
+    eq_cond_depends_on_store eq = true ->
+    PSet.contains (eq_kill_store (context_from_hints hints) tt) j = true.
+Proof.
+  intros.
+  simpl.
+  eapply get_kills_has_eq_depends_on_store; eassumption.
+Qed.
+
+Hint Resolve context_from_hints_get_kills_has_eq_depends_on_store : cse3.
+
+Definition eq_involves (eq : equation_or_condition) (i : reg) :=
+  match eq with
+  | Equ lhs sop args =>
+    i = lhs \/ In i args
+  | Cond cond args =>  In i args
+  end.
 
 Section SOUNDNESS.
   Context {F V : Type}.
@@ -363,8 +506,11 @@ Section SOUNDNESS.
       end
     end.
     
-  Definition sem_eq (eq : equation) (rs : regset) (m : mem) :=
-    sem_rhs (eq_op eq) (eq_args eq) rs m (rs # (eq_lhs eq)).
+  Definition sem_eq (eq : equation_or_condition) (rs : regset) (m : mem) :=
+    match eq with
+    | Equ lhs sop args => sem_rhs sop args rs m (rs # lhs)
+    | Cond cond args => eval_condition cond (rs ## args) m = Some true
+    end.
 
   Definition sem_rel (rel : RELATION.t) (rs : regset) (m : mem) :=
     forall i eq,
@@ -398,16 +544,19 @@ Section SOUNDNESS.
 
   Hypothesis ctx_kill_reg_has_lhs :
     forall lhs sop args j,
-      eq_catalog ctx j = Some {| eq_lhs := lhs;
-                                 eq_op  := sop;
-                                 eq_args:= args |} ->
+      eq_catalog ctx j = Some (Equ lhs sop args) ->
       PSet.contains (eq_kill_reg ctx lhs) j = true.
 
   Hypothesis ctx_kill_reg_has_arg :
     forall lhs sop args j,
-      eq_catalog ctx j = Some {| eq_lhs := lhs;
-                                 eq_op  := sop;
-                                 eq_args:= args |} ->
+      eq_catalog ctx j = Some (Equ lhs sop args) ->
+      forall arg,
+      In arg args ->
+      PSet.contains (eq_kill_reg ctx arg) j = true.
+
+  Hypothesis ctx_cond_kill_reg_has_arg :
+    forall cond args j,
+      eq_catalog ctx j = Some (Cond cond args) ->
       forall arg,
       In arg args ->
       PSet.contains (eq_kill_reg ctx arg) j = true.
@@ -415,9 +564,15 @@ Section SOUNDNESS.
   Hypothesis ctx_kill_mem_has_depends_on_mem :
     forall eq j,
       eq_catalog ctx j = Some eq ->
-      eq_depends_on_mem eq = true ->
+      eq_cond_depends_on_mem eq = true ->
       PSet.contains (eq_kill_mem ctx tt) j = true.
 
+  Hypothesis ctx_kill_store_has_depends_on_store :
+    forall eq j,
+      eq_catalog ctx j = Some eq ->
+      eq_cond_depends_on_store eq = true ->
+      PSet.contains (eq_kill_store ctx tt) j = true.
+
   Theorem kill_reg_sound :
     forall rel rs m dst v,
       (sem_rel rel rs m) ->
@@ -427,8 +582,8 @@ Section SOUNDNESS.
     intros until v.
     intros REL i eq.
     specialize REL with (i := i) (eq0 := eq).
-    destruct eq as [lhs sop args]; simpl.
-    specialize ctx_kill_reg_has_lhs with (lhs := lhs) (sop := sop) (args := args) (j := i).
+    destruct eq as [lhs sop args | cond args]; simpl.
+  * specialize ctx_kill_reg_has_lhs with (lhs := lhs) (sop := sop) (args := args) (j := i).
     specialize ctx_kill_reg_has_arg with (lhs := lhs) (sop := sop) (args := args) (j := i) (arg := dst).
     intuition.
     rewrite PSet.gsubtract in H.
@@ -456,6 +611,24 @@ Section SOUNDNESS.
       assumption.
     - rewrite Regmap.gso by congruence.
       assumption.
+  * specialize ctx_cond_kill_reg_has_arg with (cond := cond) (args := args) (j := i) (arg := dst).
+    intuition.
+    rewrite PSet.gsubtract in H.
+    rewrite andb_true_iff in H.
+    rewrite negb_true_iff in H.
+    intuition.
+    simpl in *.
+    assert ({In dst args} + {~In dst args}) as IN_ARGS.
+    {
+      apply List.in_dec.
+      apply peq.
+    }
+    destruct IN_ARGS as [IN_ARGS | NOTIN_ARGS].
+    { intuition.
+      congruence.
+    }
+    rewrite subst_args_notin by assumption.
+    assumption.
   Qed.
 
   Hint Resolve kill_reg_sound : cse3.
@@ -469,14 +642,20 @@ Section SOUNDNESS.
     intros until dst.
     intros REL i eq.
     specialize REL with (i := i) (eq0 := eq).
-    destruct eq as [lhs sop args]; simpl.
-    specialize ctx_kill_reg_has_lhs with (lhs := lhs) (sop := sop) (args := args) (j := i).
+    destruct eq as [lhs sop args | cond args]; simpl.
+  * specialize ctx_kill_reg_has_lhs with (lhs := lhs) (sop := sop) (args := args) (j := i).
     specialize ctx_kill_reg_has_arg with (lhs := lhs) (sop := sop) (args := args) (j := i) (arg := dst).
     intuition.
     rewrite PSet.gsubtract in H.
     rewrite andb_true_iff in H.
     rewrite negb_true_iff in H.
     intuition.
+  * specialize ctx_cond_kill_reg_has_arg with (cond := cond) (args := args) (j := i) (arg := dst).
+    intuition.
+    rewrite PSet.gsubtract in H.
+    rewrite andb_true_iff in H.
+    rewrite negb_true_iff in H.
+    intuition.
   Qed.
     
   Lemma pick_source_sound :
@@ -507,9 +686,11 @@ Section SOUNDNESS.
     destruct (eq_catalog ctx r) as [eq | ] eqn:CATALOG.
     2: reflexivity.
     specialize REL with (i := r) (eq0 := eq).
-    destruct (is_smove (eq_op eq)) as [MOVE | ].
+    destruct eq as [lhs sop args | cond args]; cbn in *; trivial.
+    destruct (is_smove sop) as [MOVE | ].
     2: reflexivity.
-    destruct (peq x (eq_lhs eq)).
+    rewrite MOVE in *; cbn in *.
+    destruct (peq x lhs).
     2: reflexivity.
     simpl.
     subst x.
@@ -517,9 +698,8 @@ Section SOUNDNESS.
     rewrite PSet.ginter in ELEMENT.
     rewrite andb_true_iff in ELEMENT.
     unfold sem_eq in REL.
-    rewrite MOVE in REL.
     simpl in REL.
-    destruct (eq_args eq) as [ | h t].
+    destruct args as [ | h t].
     reflexivity.
     destruct t.
     2: reflexivity.
@@ -554,26 +734,85 @@ Section SOUNDNESS.
     rewrite PSet.gsubtract in SUBTRACT.
     rewrite andb_true_iff in SUBTRACT.
     intuition.
-    destruct (eq_op eq) as [op | chunk addr] eqn:OP.
+    destruct eq as [lhs sop args | cond args] eqn:EQ.
+  * destruct sop as [op | chunk addr] eqn:OP.
     - specialize ctx_kill_mem_has_depends_on_mem with (eq0 := eq) (j := i).
-      unfold eq_depends_on_mem in ctx_kill_mem_has_depends_on_mem.
-      rewrite OP in ctx_kill_mem_has_depends_on_mem.
+      rewrite EQ in ctx_kill_mem_has_depends_on_mem.
+      unfold eq_cond_depends_on_mem in ctx_kill_mem_has_depends_on_mem.
       rewrite (op_depends_on_memory_correct genv sp op) with (m2 := m).
       assumption.
       destruct (op_depends_on_memory op) in *; trivial.
       rewrite ctx_kill_mem_has_depends_on_mem in H0; trivial.
       discriminate H0.
     - specialize ctx_kill_mem_has_depends_on_mem with (eq0 := eq) (j := i).
-      destruct eq as [lhs op args]; simpl in *.
-      rewrite OP in ctx_kill_mem_has_depends_on_mem.
+      rewrite EQ in ctx_kill_mem_has_depends_on_mem.
       rewrite negb_true_iff in H0.
-      rewrite OP in CATALOG.
       intuition.
       congruence.
+  * specialize ctx_kill_mem_has_depends_on_mem with (eq0 := eq) (j := i).
+    rewrite EQ in ctx_kill_mem_has_depends_on_mem.
+    unfold eq_cond_depends_on_mem in ctx_kill_mem_has_depends_on_mem.
+    rewrite (cond_depends_on_memory_correct cond) with (m2 := m).
+    assumption.
+    destruct (cond_depends_on_memory cond) in *; trivial.
+    rewrite negb_true_iff in H0.
+    intuition.
+    congruence.
   Qed.
 
   Hint Resolve kill_mem_sound : cse3.
 
+  (* TODO: shouldn't this already be proved somewhere else? *)
+  Lemma store_preserves_validity:
+    forall m m' wchunk a v
+      (STORE : Mem.storev wchunk m a v = Some m')
+      (b : block) (z : Z),
+      Mem.valid_pointer m' b z = Mem.valid_pointer m b z.
+  Proof.
+    unfold Mem.storev.
+    intros.
+    destruct a; try discriminate.
+    Local Transparent Mem.store.
+    unfold Mem.store in STORE.
+    destruct Mem.valid_access_dec in STORE.
+    2: discriminate.
+    inv STORE.
+    reflexivity.
+  Qed.
+
+  Hint Resolve store_preserves_validity : cse3.
+  
+  Theorem kill_store_sound :
+    forall rel rs m m' wchunk a v,
+      (sem_rel rel rs m) ->
+      (Mem.storev wchunk m a v = Some m') ->
+      (sem_rel (kill_store (ctx:=ctx) rel) rs m').
+  Proof.
+    unfold sem_rel, sem_eq, sem_rhs, kill_store.
+    intros until v.
+    intros REL STORE i eq.
+    specialize REL with (i := i) (eq0 := eq).
+    intros SUBTRACT CATALOG.
+    rewrite PSet.gsubtract in SUBTRACT.
+    rewrite andb_true_iff in SUBTRACT.
+    intuition.
+    destruct eq as [lhs sop args | cond args] eqn:EQ.
+  * destruct sop as [op | chunk addr] eqn:OP.
+    - rewrite op_valid_pointer_eq with (m2 := m).
+      assumption.
+      eapply store_preserves_validity; eauto.
+    - specialize ctx_kill_store_has_depends_on_store with (eq0 := eq) (j := i).
+      rewrite EQ in ctx_kill_store_has_depends_on_store.
+      rewrite negb_true_iff in H0.
+      intuition.
+      congruence.
+  * rewrite cond_valid_pointer_eq with (m2 := m).
+    assumption.
+    eapply store_preserves_validity; eauto.
+  Qed.
+ 
+  Hint Resolve kill_store_sound : cse3.
+ 
   Theorem eq_find_sound:
     forall no eq id,
       eq_find (ctx := ctx) no eq = Some id ->
@@ -592,6 +831,22 @@ Section SOUNDNESS.
 
   Hint Resolve eq_find_sound : cse3.
 
+  Theorem is_condition_present_sound :
+    forall node rel cond args rs m
+      (REL : sem_rel rel rs m)
+      (COND : (is_condition_present (ctx := ctx) node rel cond args) = true),
+      (eval_condition cond (rs ## args) m) = Some true.
+  Proof.
+    unfold sem_rel, is_condition_present.
+    intros.
+    destruct eq_find as [i |] eqn:FIND.
+    2: discriminate.
+    pose proof (eq_find_sound node (Cond cond args) i FIND) as CATALOG.
+    exact (REL i (Cond cond args) COND CATALOG).
+  Qed.
+
+  Hint Resolve is_condition_present_sound : cse3.
+  
   Theorem rhs_find_sound:
     forall no sop args rel src rs m,
       sem_rel rel rs m ->
@@ -610,9 +865,11 @@ Section SOUNDNESS.
     destruct (eq_catalog ctx src') as [eq | ] eqn:CATALOG.
     2: discriminate.
     specialize REL with (i := src') (eq0 := eq).
-    destruct (eq_dec_sym_op sop (eq_op eq)).
+    destruct eq as [eq_lhs eq_sop eq_args | eq_cond eq_args] eqn:EQ.
+    2: discriminate.
+    destruct (eq_dec_sym_op sop eq_sop).
     2: discriminate.
-    destruct (eq_dec_args args (eq_args eq)).
+    destruct (eq_dec_args args eq_args).
     2: discriminate.
     simpl in FIND.
     intuition congruence.
@@ -662,17 +919,14 @@ Section SOUNDNESS.
     sem_rel rel rs m ->
     sem_rhs sop args rs m  v ->
     ~ In dst args ->
-    eq_find (ctx := ctx) no
-            {| eq_lhs := dst;
-               eq_op  := sop;
-               eq_args:= args |} = Some eqno ->
+    eq_find (ctx := ctx) no (Equ dst sop args) = Some eqno ->
     sem_rel (PSet.add eqno (kill_reg (ctx := ctx) dst rel)) (rs # dst <- v) m.
   Proof.
     intros until v.
     intros REL RHS NOTIN FIND i eq CONTAINS CATALOG.
     destruct (peq i eqno).
     - subst i.
-      rewrite eq_find_sound with (no := no) (eq0 := {| eq_lhs := dst; eq_op := sop; eq_args := args |}) in CATALOG by exact FIND.
+      rewrite eq_find_sound with (no := no) (eq0 := Equ dst sop args) in CATALOG by exact FIND.
       clear FIND.
       inv CATALOG.
       unfold sem_eq.
@@ -699,6 +953,27 @@ Section SOUNDNESS.
       + congruence.
   Qed.
 
+  Lemma arglist_idem_write:
+    forall { A : Type} args (rs : Regmap.t A) dst,
+      (rs # dst <- (rs # dst)) ## args = rs ## args.
+  Proof.
+    induction args; trivial.
+    intros. cbn.
+    f_equal; trivial.
+    apply Regmap.gsident.
+  Qed.
+  
+  Lemma sem_rhs_idem_write:
+    forall sop args rs dst m v,
+      sem_rhs sop args rs m v ->
+      sem_rhs sop args (rs # dst <- (rs # dst)) m v.
+  Proof.
+    intros.
+    unfold sem_rhs in *.
+    rewrite arglist_idem_write.
+    assumption.
+  Qed.
+  
   Theorem oper2_sound:
     forall no dst sop args rel rs m v,
       sem_rel rel rs m ->
@@ -709,7 +984,7 @@ Section SOUNDNESS.
     unfold oper2.
     intros until v.
     intros REL NOTIN RHS.    
-    pose proof (eq_find_sound no {| eq_lhs := dst; eq_op := sop; eq_args := args |}) as EQ_FIND_SOUND.
+    pose proof (eq_find_sound no (Equ dst sop args)) as EQ_FIND_SOUND.
     destruct eq_find.
     2: auto with cse3; fail.
     specialize EQ_FIND_SOUND with (id := e).
@@ -726,6 +1001,19 @@ Section SOUNDNESS.
       rewrite Regmap.gss.
       apply sem_rhs_depends_on_args_only; auto.
     }
+    intros INi.
+    destruct (PSet.contains rel e) eqn:CONTAINSe.
+    { pose proof (REL e (Equ dst sop args) CONTAINSe H) as RELe.
+      pose proof (REL i eq CONTAINS INi) as RELi.
+      destruct eq as [eq_lhs eq_sop eq_args | eq_cond eq_args]; cbn in *.
+      - replace v with (rs # dst) by (eapply sem_rhs_det; eassumption).
+        rewrite Regmap.gsident.
+        apply sem_rhs_idem_write.
+        assumption.
+      - replace v with (rs # dst) by (eapply sem_rhs_det; eassumption).
+        rewrite arglist_idem_write.
+        assumption.
+    }
     rewrite PSet.gaddo in CONTAINS by congruence.
     apply (kill_reg_sound rel rs m dst v REL i eq); auto.
   Qed.
@@ -745,6 +1033,29 @@ Section SOUNDNESS.
 
   Hint Resolve oper1_sound : cse3.
 
+  Lemma rel_idem_replace:
+    forall rel rs r m,
+      sem_rel rel rs m ->
+      sem_rel rel rs # r <- (rs # r) m.
+  Proof.
+    intros until m.
+    intro REL.
+    unfold sem_rel, sem_eq, sem_rhs in *.
+    intros.
+    specialize REL with (i:=i) (eq0:=eq).
+    destruct eq as [lhs sop args | cond args] eqn:EQ.
+  * rewrite Regmap.gsident.
+    replace ((rs # r <- (rs # r)) ## args) with
+        (rs ## args).
+    { apply REL; auto. }
+    apply list_map_exten.
+    intros.
+    apply Regmap.gsident.
+    (* TODO simplify? *)
+  * rewrite arglist_idem_write.
+    auto.
+  Qed.
+      
   Lemma move_sound :
     forall no : node,
     forall rel : RELATION.t,
@@ -756,7 +1067,11 @@ Section SOUNDNESS.
     unfold move.
     intros until m.
     intro REL.
-    pose proof (eq_find_sound no  {| eq_lhs := dst; eq_op := SOp Omove; eq_args := src :: nil |}) as EQ_FIND_SOUND.
+    destruct (peq src dst).
+    { subst dst.
+      apply rel_idem_replace; auto.
+    }
+    pose proof (eq_find_sound no (Equ dst (SOp Omove) (src::nil))) as EQ_FIND_SOUND.
     destruct eq_find.
     - intros i eq CONTAINS.
       destruct (peq i e).
@@ -801,17 +1116,21 @@ Section SOUNDNESS.
     - destruct rhs_find as [src |] eqn:RHS_FIND.
       + destruct (Compopts.optim_CSE3_glb tt).
         * apply sem_rel_glb; split.
-          ** pose proof (rhs_find_sound no sop (forward_move_l (ctx:=ctx) rel args) rel src rs m REL RHS_FIND) as SOUND.
-             eapply forward_move_rhs_sound in RHS.
-             2: eassumption.
+          ** pose proof (rhs_find_sound no sop args rel src rs m REL RHS_FIND) as SOUND.                                                                    
              rewrite <- (sem_rhs_det SOUND RHS).
              apply move_sound; auto.
+          ** apply sem_rel_glb; split.
+             *** apply oper1_sound; auto.
+             *** apply oper1_sound; auto.
+                 apply forward_move_rhs_sound; auto.
+        * apply sem_rel_glb; split.
+          ** apply oper1_sound; auto.
           ** apply oper1_sound; auto.
              apply forward_move_rhs_sound; auto.
-        * ** apply oper1_sound; auto.
-             apply forward_move_rhs_sound; auto.
-      + apply oper1_sound; auto.
-        apply forward_move_rhs_sound; auto.
+      + apply sem_rel_glb; split.
+        * apply oper1_sound; auto.
+        * apply oper1_sound; auto.
+          apply forward_move_rhs_sound; auto.
   Qed.
 
   Hint Resolve oper_sound : cse3.
@@ -832,23 +1151,25 @@ Section SOUNDNESS.
     rewrite CATALOG in CONTAINS.
     unfold sem_rel in REL.
     specialize REL with (i := i) (eq0 := eq).
-    destruct eq; simpl in *.
-    unfold sem_eq in *.
+    destruct eq as [eq_lhs eq_sop eq_args | eq_cond eq_args]; simpl in *.
+  * unfold sem_eq in *.
     simpl in *.
-    destruct eq_op as [op' | chunk' addr']; simpl.
-    - destruct (op_depends_on_memory op') eqn:DEPENDS.
-      + erewrite ctx_kill_mem_has_depends_on_mem in CONTAINS by eauto.
-        discriminate.
-      + rewrite op_depends_on_memory_correct with (m2:=m); trivial.
+    destruct eq_sop as [op' | chunk' addr']; simpl.
+    - rewrite op_valid_pointer_eq with (m2 := m).
+      + cbn in *.
         apply REL; auto.
+      + eapply store_preserves_validity; eauto.
     - simpl in REL.
-      erewrite ctx_kill_mem_has_depends_on_mem in CONTAINS by eauto.
+      erewrite ctx_kill_store_has_depends_on_store in CONTAINS by eauto.
       simpl in CONTAINS.
       rewrite negb_true_iff in CONTAINS.
       destruct (eval_addressing genv sp addr' rs ## eq_args) as [a'|] eqn:ADDR'.
       + erewrite may_overlap_sound with (chunk:=chunk) (addr:=addr) (args:=args) (chunk':=chunk') (addr':=addr') (args':=eq_args); try eassumption.
         apply REL; auto.
       + apply REL; auto.
+  * rewrite cond_valid_pointer_eq with (m2 := m).
+    auto.
+    eapply store_preserves_validity; eauto.
   Qed.
 
   Hint Resolve clever_kill_store_sound : cse3.
@@ -886,7 +1207,7 @@ Section SOUNDNESS.
     intros i eq CONTAINS CATALOG.
     destruct (peq i eq_id).
     { subst i.
-      rewrite eq_find_sound with (no:=no) (eq0:={| eq_lhs := src; eq_op := SLoad chunk addr; eq_args := args |}) in CATALOG; trivial.
+      rewrite eq_find_sound with (no:=no) (eq0:=Equ src (SLoad chunk addr) args) in CATALOG; trivial.
       inv CATALOG.
       unfold sem_eq.
       simpl.
@@ -900,22 +1221,28 @@ Section SOUNDNESS.
   
   Hint Resolve store1_sound : cse3.
     
+    
   Theorem store_sound:
     forall no chunk addr args a src rel tenv rs m m',
       sem_rel rel rs m ->
       wt_regset tenv rs ->
       eval_addressing genv sp addr (rs ## args) = Some a ->
       Mem.storev chunk m a (rs#src) = Some m' ->
-      sem_rel (store (ctx:=ctx) no chunk addr args src (tenv (forward_move (ctx:=ctx) rel src)) rel) rs m'.
+      sem_rel (store (ctx:=ctx) no tenv chunk addr args src rel) rs m'.
   Proof.
     unfold store.
     intros until m'.
     intros REL WT ADDR STORE.
-    rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
-    rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
-    apply store1_sound with (a := a) (m := m); trivial.
-    (* rewrite forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
-    assumption. *)
+    apply sem_rel_glb; split.
+    - apply sem_rel_glb; split.
+      * apply store1_sound with (a := a) (m := m); trivial.
+      * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
+        apply store1_sound with (a := a) (m := m); trivial.
+    - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
+      apply sem_rel_glb; split.
+      * apply store1_sound with (a := a) (m := m); trivial.
+      * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
+        apply store1_sound with (a := a) (m := m); trivial.
   Qed.
 
   Hint Resolve store_sound : cse3.
@@ -961,6 +1288,79 @@ Section SOUNDNESS.
 
   Hint Resolve external_call_sound : cse3.
 
+
+  Definition sem_rel_b (rel : RB.t) (rs : regset) (m : mem) :=
+    match rel with
+    | None => False
+    | Some rel => sem_rel rel rs m
+    end.
+  
+  Lemma apply_cond1_sound :
+    forall pc cond args rel rs m
+      (COND : (eval_condition cond (rs ## args) m) = Some true)
+      (REL : (sem_rel rel rs m)),
+      (sem_rel_b (apply_cond1 (ctx:=ctx) pc cond args rel) rs m).
+  Proof.
+    intros.
+    unfold apply_cond1.
+    destruct eq_find as [eq_id | ] eqn:FIND; cbn.
+    2: assumption.
+    destruct PSet.contains eqn:CONTAINS.
+    {
+      pose proof (eq_find_sound pc (Cond (negate_condition cond) args) eq_id FIND) as FIND_SOUND.
+      unfold sem_rel in REL.
+      pose proof (REL eq_id (Cond (negate_condition cond) args) CONTAINS FIND_SOUND) as REL_id.
+      cbn in REL_id.
+      rewrite eval_negate_condition in REL_id.
+      rewrite COND in REL_id.
+      discriminate.
+    }
+    exact REL.
+  Qed.
+  
+  Lemma apply_cond0_sound :
+    forall pc cond args rel rs m
+      (COND : (eval_condition cond (rs ## args) m) = Some true)
+      (REL : (sem_rel rel rs m)),
+      (sem_rel (apply_cond0 (ctx:=ctx) pc cond args rel) rs m).
+  Proof.
+    intros.
+    unfold apply_cond0.
+    destruct eq_find as [eq_id | ] eqn:FIND; cbn.
+    2: assumption.
+    pose proof (eq_find_sound pc (Cond cond args) eq_id FIND) as FIND_SOUND.
+    intros eq_id' eq' CONTAINS CATALOG.
+    destruct (peq eq_id eq_id').
+    { subst eq_id'.
+      unfold sem_eq.
+      rewrite FIND_SOUND in CATALOG.
+      inv CATALOG.
+      assumption.
+    }
+    rewrite PSet.gaddo in CONTAINS by assumption.
+    unfold sem_rel in REL.
+    eapply REL; eassumption.
+  Qed.
+
+  Lemma apply_cond_sound :
+    forall pc cond args rel rs m
+      (COND : (eval_condition cond (rs ## args) m) = Some true)
+      (REL : (sem_rel rel rs m)),
+      (sem_rel_b (apply_cond (ctx:=ctx) pc cond args rel) rs m).
+  Proof.
+    unfold apply_cond.
+    intros.
+    destruct (Compopts.optim_CSE3_conditions tt).
+    {
+      pose proof (apply_cond1_sound pc cond args rel rs m COND REL) as SOUND1.
+      destruct apply_cond1 eqn:COND1.
+      { apply apply_cond0_sound; auto. }
+      exact SOUND1.
+    }
+    exact REL.
+  Qed.
+  
+  (*
   Section INDUCTIVENESS.
     Variable fn : RTL.function.
     Variable tenv : typing_env.
@@ -971,7 +1371,10 @@ Section SOUNDNESS.
         PTree.get pc (fn_code fn) = Some instr ->
         In pc' (successors_instr instr) ->
         RB.ge (PMap.get pc' inv)
-              (apply_instr' (ctx:=ctx) tenv (fn_code fn) pc (PMap.get pc inv)).
+              (match apply_instr' (ctx:=ctx) tenv (fn_code fn) pc
+                                  (PMap.get pc inv) with
+               | Abst_same rel' => rel'
+              end).
 
     Definition is_inductive_allstep :=
       forall pc pc', is_inductive_step pc pc'.
@@ -990,11 +1393,14 @@ Section SOUNDNESS.
       pose proof (ALL INSTR) as AT_PC.
       destruct (inv # pc).
       2: apply RB.ge_bot.
-      rewrite List.forallb_forall in AT_PC.
       unfold apply_instr'.
       rewrite INSTR.
-      apply relb_leb_correct.
-      auto.
+      destruct apply_instr.
+      { (* same *)
+        rewrite List.forallb_forall in AT_PC.
+        apply relb_leb_correct.
+        auto.
+      }
     Qed.
     
     Lemma checked_is_inductive_entry:
@@ -1015,4 +1421,5 @@ Section SOUNDNESS.
   End INDUCTIVENESS.
 
   Hint Resolve checked_is_inductive_allstep checked_is_inductive_entry : cse3.
+  *)
 End SOUNDNESS.
diff --git a/backend/CSE3proof.v b/backend/CSE3proof.v
index 6e489066..0722f904 100644
--- a/backend/CSE3proof.v
+++ b/backend/CSE3proof.v
@@ -352,6 +352,23 @@ Qed.
 
 Hint Resolve rel_ge : cse3.
 
+Lemma relb_ge:
+  forall inv inv'
+         (GE : RB.ge inv' inv)
+         ctx sp rs m
+         (REL: sem_rel_b sp ctx inv rs m),
+  sem_rel_b sp ctx inv' rs m.
+Proof.
+  intros.
+  destruct inv; cbn in *.
+  2: contradiction.
+  destruct inv'; cbn in *.
+  2: assumption.
+  eapply rel_ge; eassumption.
+Qed.
+
+Hint Resolve relb_ge : cse3.
+
 Lemma sem_rhs_sop :
   forall sp op rs args m v,
   eval_operation ge sp op rs ## args m = Some v ->
@@ -422,6 +439,7 @@ Qed.
 
 Hint Resolve sem_rel_b_top : cse3.
 
+(*
 Ltac IND_STEP :=
         match goal with
         REW: (fn_code ?fn) ! ?mpc = Some ?minstr
@@ -442,25 +460,42 @@ Ltac IND_STEP :=
         eapply rel_ge; eauto with cse3 (* ; for printing
         idtac mpc mpc' fn minstr *)
       end.
-
-Lemma if_same : forall {T : Type} (b : bool) (x : T),
-    (if b then x else x) = x.
-Proof.
-  destruct b; trivial.
-Qed.
-
+ *)
+  
 Lemma step_simulation:
   forall S1 t S2, RTL.step ge S1 t S2 -> 
   forall S1', match_states S1 S1' ->
               exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
 Proof.
   induction 1; intros S1' MS; inv MS.
+  all: try set (ctx := (context_from_hints (snd (preanalysis tenv f)))) in *.
+  all: try set (invs := (fst (preanalysis tenv f))) in *.
   - (* Inop *)
     exists (State ts tf sp pc' rs m). split.
     + apply exec_Inop; auto.
       TR_AT. reflexivity.
     + econstructor; eauto.
-      IND_STEP.
+      
+      (* BEGIN INVARIANT *)
+      fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      unfold sem_rel_b.
+      apply (rel_ge inv_pc inv_pc'); auto.
+      (* END INVARIANT *)
+      
   - (* Iop *)
     exists (State ts tf sp pc' (rs # res <- v) m). split.
     + pose (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iop op args res pc')) as instr'.
@@ -469,12 +504,12 @@ Proof.
       destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
       pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SOp op)
                 (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
-      * destruct (if is_trivial_op op
+      * destruct (if (negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)
                then None
                else
                 rhs_find pc (SOp op)
                   (subst_args (fst (preanalysis tenv f)) pc args) t) eqn:FIND.
-        ** destruct (is_trivial_op op). discriminate.
+        ** destruct ((negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)). discriminate.
            apply exec_Iop with (op := Omove) (args := r :: nil).
            TR_AT.
            subst instr'.
@@ -522,9 +557,28 @@ Proof.
     + econstructor; eauto.
       * eapply wt_exec_Iop with (f:=f); try eassumption.
         eauto with wt.
-      * IND_STEP.
-        apply oper_sound; eauto with cse3.
-
+      *
+        (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply oper_sound; unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
   - (* Iload *)
     exists (State ts tf sp pc' (rs # dst <- v) m). split.
     + pose (transf_instr (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc (Iload trap chunk addr args dst pc')) as instr'.
@@ -581,8 +635,27 @@ Proof.
     + econstructor; eauto.
       * eapply wt_exec_Iload with (f:=f); try eassumption.
         eauto with wt.
-      * IND_STEP.
-        apply oper_sound; eauto with cse3.
+      *         (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply oper_sound; unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
         
   - (* Iload notrap1 *)
     exists (State ts tf sp pc' (rs # dst <- Vundef) m). split.
@@ -638,8 +711,27 @@ Proof.
            assumption.
     + econstructor; eauto.
       * apply wt_undef; assumption.
-      * IND_STEP.
-        apply oper_sound; eauto with cse3.
+      *          (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply oper_sound; unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
         
   - (* Iload notrap2 *)
     exists (State ts tf sp pc' (rs # dst <- Vundef) m). split.
@@ -696,8 +788,27 @@ Proof.
            assumption.
     + econstructor; eauto.
       * apply wt_undef; assumption.
-      * IND_STEP.
-        apply oper_sound; eauto with cse3.
+      *  (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply oper_sound; unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
 
   - (* Istore *)
     exists (State ts tf sp pc' rs m'). split.
@@ -710,8 +821,27 @@ Proof.
       * rewrite subst_arg_ok with (sp:=sp) (m:=m) by trivial.
         assumption.
     + econstructor; eauto.
-      IND_STEP.
-      apply store_sound with (a0:=a) (m0:=m); eauto with cse3.
+  (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      apply store_sound with (a0:=a) (m0:=m); unfold ctx; eauto with cse3.
+      (* END INVARIANT *)
       
   - (* Icall *)
     destruct (find_function_translated ros rs fd H0) as [tfd [HTFD1 HTFD2]].
@@ -726,9 +856,29 @@ Proof.
       * econstructor; eauto.
         ** rewrite sig_preserved with (f:=fd); assumption.
         ** intros.
-           IND_STEP.
-           apply kill_reg_sound; eauto with cse3.
-           eapply kill_mem_sound; eauto with cse3.
+
+            (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      (* END INVARIANT *)
+      { apply kill_reg_sound; unfold ctx; eauto with cse3.
+           eapply kill_mem_sound; unfold ctx; eauto with cse3. }
       * rewrite sig_preserved with (f:=fd) by trivial.
         rewrite <- H7.
         apply wt_regset_list; auto.
@@ -766,19 +916,159 @@ Proof.
       * eapply external_call_symbols_preserved; eauto. apply senv_preserved.
     + econstructor; eauto.
       * eapply wt_exec_Ibuiltin with (f:=f); eauto with wt.
-      * IND_STEP.
-        apply kill_builtin_res_sound; eauto with cse3.
-        eapply external_call_sound; eauto with cse3.
+      *   (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      destruct (invs # pc') as [inv_pc' | ] eqn:INV_pc'; cbn in *.
+      2: discriminate.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me.
+      rewrite rel_leb_correct in *.
+      eapply rel_ge.
+      eassumption.
+      (* END INVARIANT *)
+
+        apply kill_builtin_res_sound; unfold ctx; eauto with cse3.
+        eapply external_call_sound; unfold ctx; eauto with cse3.
         
   - (* Icond *)
-    econstructor. split.
-    + eapply exec_Icond with (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); try eassumption.
-      * TR_AT. reflexivity.
-      * rewrite subst_args_ok with (sp:=sp) (m:=m) by trivial.
-        eassumption.
-      * reflexivity.
-    + econstructor; eauto.
-      destruct b; IND_STEP.
+    destruct (find_cond_in_fmap (ctx := ctx) invs pc cond args) as [bfound | ] eqn:FIND_COND.                                                                    
+    + econstructor; split.
+      * eapply exec_Inop; try eassumption.
+        TR_AT. unfold transf_instr. fold invs. fold ctx. rewrite FIND_COND. reflexivity.
+      * replace bfound with b.
+        { econstructor; eauto.
+          (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      rewrite andb_true_iff in IND_step_me.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me as [IND_so [IND_not ZOT]].
+      clear ZOT.
+      rewrite relb_leb_correct in IND_so.
+      rewrite relb_leb_correct in IND_not.
+      
+      destruct b.
+      { eapply relb_ge. eassumption. apply apply_cond_sound; auto. }
+      eapply relb_ge. eassumption. apply apply_cond_sound; trivial.
+      rewrite eval_negate_condition.
+      rewrite H0.
+      reflexivity.
+      (* END INVARIANT *)
+        }
+        unfold sem_rel_b in REL.
+        destruct (invs # pc) as [rel | ] eqn:FIND_REL.
+        2: contradiction.
+        pose proof (is_condition_present_sound pc rel cond args rs m REL) as COND_PRESENT_TRUE.
+        pose proof (is_condition_present_sound pc rel (negate_condition cond) args rs m REL) as COND_PRESENT_FALSE.
+        rewrite eval_negate_condition in COND_PRESENT_FALSE.
+        unfold find_cond_in_fmap in FIND_COND.
+        change (@PMap.get (option RELATION.t)) with (@Regmap.get RB.t) in FIND_COND.
+        rewrite FIND_REL in FIND_COND.
+        destruct (Compopts.optim_CSE3_conditions tt).
+        2: discriminate.
+        destruct (is_condition_present pc rel cond args).
+        { rewrite COND_PRESENT_TRUE in H0 by trivial.
+          congruence.
+        }
+        destruct (is_condition_present pc rel (negate_condition cond) args).
+        { destruct (eval_condition cond rs ## args m) as [b0 | ].
+          2: discriminate.
+          inv H0.
+          cbn in COND_PRESENT_FALSE.
+          intuition.
+          inv H0.
+          inv FIND_COND.
+          destruct b; trivial; cbn in H2; discriminate.
+        }
+        clear COND_PRESENT_TRUE COND_PRESENT_FALSE.
+        pose proof (is_condition_present_sound pc rel cond (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) rs m REL) as COND_PRESENT_TRUE.
+        pose proof (is_condition_present_sound pc rel (negate_condition cond) (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) rs m REL) as COND_PRESENT_FALSE.
+        rewrite eval_negate_condition in COND_PRESENT_FALSE.
+        
+        destruct is_condition_present.
+        { rewrite subst_args_ok with (sp:=sp) (m:=m) in COND_PRESENT_TRUE.
+          { rewrite COND_PRESENT_TRUE in H0 by trivial.
+            congruence.
+          }
+          unfold fmap_sem.
+          unfold sem_rel_b.
+          fold invs.
+          rewrite FIND_REL.
+          exact REL.
+        }
+        destruct is_condition_present.
+        { rewrite subst_args_ok with (sp:=sp) (m:=m) in COND_PRESENT_FALSE.
+          { destruct (eval_condition cond rs ## args m) as [b0 | ].
+            2: discriminate.
+            inv H0.
+            cbn in COND_PRESENT_FALSE.
+            intuition.
+            inv H0.
+            inv FIND_COND.
+            destruct b; trivial; cbn in H2; discriminate.
+          }
+          unfold fmap_sem.
+          unfold sem_rel_b.
+          fold invs.
+          rewrite FIND_REL.
+          exact REL.          
+        }
+        discriminate.
+   + econstructor; split.        
+      * eapply exec_Icond with (args := (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args)); try eassumption.
+        ** TR_AT. unfold transf_instr. fold invs. fold ctx.
+           rewrite FIND_COND.
+           reflexivity.
+        ** rewrite subst_args_ok with (sp:=sp) (m:=m) by trivial.
+           eassumption.
+        ** reflexivity.
+     * econstructor; eauto.
+
+          (* BEGIN INVARIANT *)
+        fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      rewrite andb_true_iff in IND_step_me.
+      rewrite andb_true_iff in IND_step_me.
+      destruct IND_step_me as [IND_so [IND_not ZOT]].
+      clear ZOT.
+      rewrite relb_leb_correct in IND_so.
+      rewrite relb_leb_correct in IND_not.
+      
+      destruct b.
+      { eapply relb_ge. eassumption. apply apply_cond_sound; auto. }
+      eapply relb_ge. eassumption. apply apply_cond_sound; trivial.
+      rewrite eval_negate_condition.
+      rewrite H0.
+      reflexivity.
+      (* END INVARIANT *)
       
   - (* Ijumptable *)
     econstructor. split.
@@ -787,8 +1077,33 @@ Proof.
       * rewrite subst_arg_ok with (sp:=sp) (m:=m) by trivial.
         assumption.
     + econstructor; eauto.
-      assert (In pc' tbl) as IN_LIST by (eapply list_nth_z_in; eassumption).
-      IND_STEP.
+      
+      (* BEGIN INVARIANT *)
+      fold ctx. fold invs.
+      assert ((check_inductiveness f tenv invs)=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+      unfold check_inductiveness in IND.
+      rewrite andb_true_iff in IND.
+      destruct IND as [IND_entry IND_step].
+      rewrite PTree_Properties.for_all_correct in IND_step.
+      pose proof (IND_step pc _ H) as IND_step_me.
+      clear IND_entry IND_step.
+      destruct (invs # pc) as [inv_pc | ] eqn:INV_pc; cbn in REL.
+      2: contradiction.
+      cbn in IND_step_me.
+      rewrite forallb_forall in IND_step_me.
+      assert (RB.ge (invs # pc') (Some inv_pc)) as GE.
+      {
+        apply relb_leb_correct.
+        specialize IND_step_me with (pc', Some inv_pc).
+        apply IND_step_me.
+        apply (in_map (fun pc'0 : node => (pc'0, Some inv_pc))).
+        eapply list_nth_z_in.
+        eassumption.
+      }
+      destruct (invs # pc'); cbn in *.
+      2: contradiction.
+      eapply rel_ge; eauto.
+      (* END INVARIANT *)
 
   - (* Ireturn *)
     destruct or as [arg | ].
@@ -830,9 +1145,18 @@ Proof.
         apply wt_init_regs.
         rewrite <- wt_params in WTARGS.
         assumption.
-      * rewrite @checked_is_inductive_entry with (tenv:=tenv) (ctx:=(context_from_hints (snd (preanalysis tenv f)))).
-        ** apply sem_rel_b_top.
-        ** apply transf_function_invariants_inductive with (tf:=tf); auto.
+      * assert ((check_inductiveness f tenv (fst (preanalysis tenv f)))=true) as IND by (eapply transf_function_invariants_inductive; eauto).
+        unfold check_inductiveness in IND.
+        rewrite andb_true_iff in IND.
+        destruct IND as [IND_entry IND_step].
+        clear IND_step.
+        apply RB.beq_correct in IND_entry.
+        unfold RB.eq in *.
+        destruct ((fst (preanalysis tenv f)) # (fn_entrypoint f)).
+        2: contradiction.
+        cbn.
+        rewrite <- IND_entry.
+        apply sem_rel_top.
            
   - (* external *)
     simpl in FUN.
diff --git a/backend/Debugvar.v b/backend/Debugvar.v
index 56908855..7806984a 100644
--- a/backend/Debugvar.v
+++ b/backend/Debugvar.v
@@ -92,7 +92,7 @@ Fixpoint remove_state (v: ident) (s: avail) : avail :=
       end
   end.
 
-Fixpoint set_debug_info (v: ident) (info: list (builtin_arg loc)) (s: avail) :=
+Definition set_debug_info (v: ident) (info: list (builtin_arg loc)) (s: avail) :=
   match normalize_debug info with
   | Some a => set_state v a s
   | None   => remove_state v s
diff --git a/backend/Duplicate.v b/backend/Duplicate.v
index 0e04b07d..3fd86728 100644
--- a/backend/Duplicate.v
+++ b/backend/Duplicate.v
@@ -18,14 +18,28 @@
 Require Import AST RTL Maps Globalenvs.
 Require Import Coqlib Errors Op.
 
-Local Open Scope error_monad_scope.
-Local Open Scope positive_scope.
 
-(** External oracle returning the new RTL code (entry point unchanged),
+
+Module Type DuplicateOracle.
+
+  (** External oracle returning the new RTL code (entry point unchanged),
     along with the new entrypoint, and a mapping of new nodes to old nodes *)
-Axiom duplicate_aux: function -> code * node * (PTree.t node).
+  Parameter duplicate_aux: function -> code * node * (PTree.t node).
+
+End DuplicateOracle.
+
+
 
-Extract Constant duplicate_aux => "Duplicateaux.duplicate_aux".
+Module Duplicate (D: DuplicateOracle).
+
+Export D.
+
+Definition duplicate_aux := duplicate_aux.
+
+(* Extract Constant duplicate_aux => "Duplicateaux.duplicate_aux". *)
+
+Local Open Scope error_monad_scope.
+Local Open Scope positive_scope.
 
 (** * Verification of node duplications *)
 
@@ -72,7 +86,7 @@ Global Opaque builtin_res_eq_pos.
 
 Definition verify_match_inst dupmap inst tinst :=
   match inst with
-  | Inop n => match tinst with Inop n' => do u <- verify_is_copy dupmap n n'; OK tt | _ => Error(msg "verify_match_inst Inop") end
+  | Inop n => match tinst with Inop n' => verify_is_copy dupmap n n' | _ => Error(msg "verify_match_inst Inop") end
 
   | Iop op lr r n => match tinst with
       Iop op' lr' r' n' =>
@@ -153,10 +167,10 @@ Definition verify_match_inst dupmap inst tinst :=
           if (list_eq_dec Pos.eq_dec lr lr') then
             if (eq_condition cond cond') then
               do u1 <- verify_is_copy dupmap n1 n1';
-              do u2 <- verify_is_copy dupmap n2 n2'; OK tt
+              verify_is_copy dupmap n2 n2'
             else if (eq_condition (negate_condition cond) cond') then
               do u1 <- verify_is_copy dupmap n1 n2';
-              do u2 <- verify_is_copy dupmap n2 n1'; OK tt
+              verify_is_copy dupmap n2 n1'
             else Error (msg "Incompatible conditions in Icond")
           else Error (msg "Different lr in Icond")
       | _ => Error (msg "verify_match_inst Icond") end
@@ -189,8 +203,7 @@ Fixpoint verify_mapping_mn_rec dupmap f f' lm :=
   match lm with
   | nil => OK tt
   | m :: lm => do u <- verify_mapping_mn dupmap f f' m;
-               do u2 <- verify_mapping_mn_rec dupmap f f' lm;
-               OK tt
+               verify_mapping_mn_rec dupmap f f' lm
   end.
 
 Definition verify_mapping_match_nodes dupmap (f f': function): res unit :=
@@ -199,7 +212,7 @@ Definition verify_mapping_match_nodes dupmap (f f': function): res unit :=
 (** Verifies that the [dupmap] of the translated function [f'] is giving correct information in regards to [f] *)
 Definition verify_mapping dupmap (f f': function) : res unit :=
   do u <- verify_mapping_entrypoint dupmap f f';
-  do v <- verify_mapping_match_nodes dupmap f f'; OK tt.
+  verify_mapping_match_nodes dupmap f f'.
 
 (** * Entry points *)
 
@@ -215,3 +228,5 @@ Definition transf_fundef (f: fundef) : res fundef :=
 
 Definition transf_program (p: program) : res program :=
   transform_partial_program transf_fundef p.
+
+End Duplicate.
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 00819834..d55da64a 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -15,31 +15,93 @@
 (* Oracle for Duplicate pass.
  * - Add static prediction information to Icond nodes
  * - Performs tail duplication on interesting traces to form superblocks
+ * - Unrolls a single iteration of innermost loops
  * - (TODO: perform partial loop unrolling inside innermost loops)
  *)
 
 open RTL
 open Maps
 open Camlcoq
+open DebugPrint
 
-let debug_flag = ref false
+let stats_oc = ref None
 
-let debug fmt =
-  if !debug_flag then Printf.eprintf fmt
-  else Printf.ifprintf stderr fmt
+let set_stats_oc () =
+  try
+    let name = Sys.getenv "COMPCERT_PREDICT_STATS" in
+    let oc = open_out_gen [Open_append; Open_creat; Open_text] 0o666 name in
+    stats_oc := Some oc
+  with Not_found -> ()
 
-let get_some = function
-| None -> failwith "Did not get some"
-| Some thing -> thing
+(* number of total CBs *)
+let stats_nb_total = ref 0
+(* we predicted the same thing as the profiling *)
+let stats_nb_correct_predicts = ref 0
+(* we predicted something (say Some true), but the profiling predicted the opposite (say Some false) *)
+let stats_nb_mispredicts = ref 0
+(* we did not predict anything (None) even though the profiling did predict something *)
+let stats_nb_missed_opportunities = ref 0
+(* we predicted something (say Some true) but the profiling preferred not to predict anything (None) *)
+let stats_nb_overpredict = ref 0
 
-let rtl_successors = function
-| Itailcall _ | Ireturn _ -> []
-| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
-| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
-| Icond (_,_,n1,n2,_) -> [n1; n2]
-| Ijumptable (_,ln) -> ln
+(* heuristic specific counters *)
+let wrong_opcode = ref 0
+let wrong_return = ref 0
+let wrong_loop2 = ref 0
+let wrong_loop = ref 0
+let wrong_call = ref 0
+
+let right_opcode = ref 0
+let right_return = ref 0
+let right_loop2 = ref 0
+let right_loop = ref 0
+let right_call = ref 0
+
+let reset_stats () = begin
+  stats_nb_total := 0;
+  stats_nb_correct_predicts := 0;
+  stats_nb_mispredicts := 0;
+  stats_nb_missed_opportunities := 0;
+  stats_nb_overpredict := 0;
+  wrong_opcode := 0;
+  wrong_return := 0;
+  wrong_loop2 := 0;
+  wrong_loop := 0;
+  wrong_call := 0;
+  right_opcode := 0;
+  right_return := 0;
+  right_loop2 := 0;
+  right_loop := 0;
+  right_call := 0;
+end
+
+let incr theref = theref := !theref + 1
+
+let has_some o = match o with Some _ -> true | None -> false
+
+let stats_oc_recording () = has_some !stats_oc
 
-let bfs code entrypoint = begin
+let write_stats_oc () =
+  match !stats_oc with
+  | None -> ()
+  | Some oc -> begin
+      Printf.fprintf oc "%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n" !stats_nb_total
+        !stats_nb_correct_predicts !stats_nb_mispredicts !stats_nb_missed_opportunities
+        !stats_nb_overpredict
+        !wrong_opcode !wrong_return !wrong_loop2 !wrong_loop !wrong_call
+        !right_opcode !right_return !right_loop2 !right_loop !right_call
+        ;
+      close_out oc
+    end
+
+let get_loop_headers = LICMaux.get_loop_headers
+let get_some = LICMaux.get_some
+let rtl_successors = LICMaux.rtl_successors
+
+(* Get list of nodes following a BFS of the code *)
+(* Stops when predicate is reached
+ * Excludes any node given in excluded function *)
+let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin
   debug "bfs\n";
   let visited = ref (PTree.map (fun n i -> false) code)
   and bfs_list = ref []
@@ -49,24 +111,29 @@ let bfs code entrypoint = begin
     Queue.add entrypoint to_visit;
     while not (Queue.is_empty to_visit) do
       node := Queue.pop to_visit;
-      if not (get_some @@ PTree.get !node !visited) then begin
+      if (not (get_some @@ PTree.get !node !visited)) then begin
         visited := PTree.set !node true !visited;
-        match PTree.get !node code with
-        | None -> failwith "No such node"
-        | Some i ->
-            bfs_list := !node :: !bfs_list;
-            let succ = rtl_successors i in
-            List.iter (fun n -> Queue.add n to_visit) succ
+        if not (excluded !node) then begin
+          match PTree.get !node code with
+          | None -> failwith "No such node"
+          | Some i ->
+              bfs_list := !node :: !bfs_list;
+              if not (predicate !node) then
+                let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ
+        end
       end
     done;
     List.rev !bfs_list
   end
 end
 
+let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false)
+
 let optbool o = match o with Some _ -> true | None -> false
 
 let ptree_get_some n ptree = get_some @@ PTree.get n ptree
 
+(* Returns a PTree: node -> list of the predecessors of that node *)
 let get_predecessors_rtl code = begin
   debug "get_predecessors_rtl\n";
   let preds = ref (PTree.map (fun n i -> []) code) in
@@ -89,18 +156,6 @@ end
 
 module PSet = Set.Make(PInt)
 
-let print_intlist l =
-  let rec f = function
-  | [] -> ()
-  | n::ln -> (Printf.printf "%d " (P.to_int n); f ln)
-  in begin
-    if !debug_flag then begin
-      Printf.printf "[";
-      f l;
-      Printf.printf "]"
-    end
-  end
-
 let print_intset s =
   let seq = PSet.to_seq s
   in begin
@@ -113,67 +168,24 @@ let print_intset s =
     end
   end
 
-type vstate = Unvisited | Processed | Visited
-
-(** Getting loop branches with a DFS visit :
-  * Each node is either Unvisited, Visited, or Processed
-  * pre-order: node becomes Processed
-  * post-order: node becomes Visited
-  *
-  * If we come accross an edge to a Processed node, it's a loop!
-  *)
-let get_loop_headers code entrypoint = begin
-  debug "get_loop_headers\n";
-  let visited = ref (PTree.map (fun n i -> Unvisited) code)
-  and is_loop_header = ref (PTree.map (fun n i -> false) code)
-  in let rec dfs_visit code = function
-  | [] -> ()
-  | node :: ln ->
-      match (get_some @@ PTree.get node !visited) with
-      | Visited -> ()
-      | Processed -> begin
-          debug "Node %d is a loop header\n" (P.to_int node);
-          is_loop_header := PTree.set node true !is_loop_header;
-          visited := PTree.set node Visited !visited
-        end
-      | Unvisited -> begin
-          visited := PTree.set node Processed !visited;
-          match PTree.get node code with
-          | None -> failwith "No such node"
-          | Some i -> let next_visits = rtl_successors i in dfs_visit code next_visits;
-          visited := PTree.set node Visited !visited;
-          dfs_visit code ln
-        end
-  in begin
-    dfs_visit code [entrypoint];
-    !is_loop_header
-  end
-end
-
-let ptree_printbool pt =
-  let elements = PTree.elements pt
-  in begin
-    if !debug_flag then begin
-      Printf.printf "[";
-      List.iter (fun (n, b) ->
-        if b then Printf.printf "%d, " (P.to_int n) else ()
-      ) elements;
-      Printf.printf "]"
-    end
-  end
-
 (* Looks ahead (until a branch) to see if a node further down verifies
  * the given predicate *)
-let rec look_ahead code node is_loop_header predicate =
+let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate =
   if (predicate node) then true
-  else match (rtl_successors @@ get_some @@ PTree.get node code) with
+  else match (successors @@ get_some @@ PTree.get node code) with
     | [n] -> if (predicate n) then true
         else (
           if (get_some @@ PTree.get n is_loop_header) then false
-          else look_ahead code n is_loop_header predicate
+          else look_ahead_gen successors code n is_loop_header predicate
         )
     | _ -> false
 
+let look_ahead = look_ahead_gen rtl_successors
+
+(** 
+ * Heuristics mostly based on the paper Branch Prediction for Free 
+ *)
+
 let do_call_heuristic code cond ifso ifnot is_loop_header =
   begin
     debug "\tCall heuristic..\n";
@@ -228,7 +240,7 @@ let do_loop_heuristic code cond ifso ifnot is_loop_header =
     let predicate n = get_some @@ PTree.get n is_loop_header in
     let ifso_loop = look_ahead code ifso is_loop_header predicate in
     let ifnot_loop = look_ahead code ifnot is_loop_header predicate in
-    if ifso_loop && ifnot_loop then None (* TODO - take the innermost loop ? *)
+    if ifso_loop && ifnot_loop then (debug "\t\tLOOP but can't choose which\n"; None) (* TODO - take the innermost loop ? *)
     else if ifso_loop then Some true
     else if ifnot_loop then Some false
     else None
@@ -242,97 +254,218 @@ let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header =
     | Some b -> Some b
   end
 
-(* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *)
-(* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *)
-let get_loop_info is_loop_header bfs_order code =
-  let loop_info = ref (PTree.map (fun n i -> None) code) in
-  let mark_path s n =
-    let visited = ref (PTree.map (fun n i -> false) code) in
-    let rec explore src dest =
-      if (get_some @@ PTree.get src !visited) then false
-      else if src == dest then true
-      else begin
-        visited := PTree.set src true !visited;
-        match rtl_successors @@ get_some @@ PTree.get src code with
-        | [] -> false
-        | [s] -> explore s dest
-        | [s1; s2] -> (explore s1 dest) || (explore s2 dest)
-        | _ -> false
-      end
-    in let rec advance_to_cb src =
-      if (get_some @@ PTree.get src !visited) then None
-      else begin
-        visited := PTree.set src true !visited;
-        match get_some @@ PTree.get src code with
-        | Inop s | Iop (_, _, _, s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
-        | Ibuiltin (_,_,_,s) -> advance_to_cb s
-        | Icond _ -> Some src
-        | Ijumptable _ | Itailcall _ | Ireturn _ -> None
-      end
-    in begin
-      debug "Marking path from %d to %d\n" (P.to_int n) (P.to_int s);
-      match advance_to_cb s with
-      | None -> (debug "Nothing found\n")
-      | Some s -> ( debug "Advancing to %d\n" (P.to_int s);
-          match get_some @@ PTree.get s !loop_info with
-          | None | Some _ -> begin
-              match get_some @@ PTree.get s code with
-              | Icond (_, _, n1, n2, _) ->
-                  let b1 = explore n1 n in
-                  let b2 = explore n2 n in
-                  if (b1 && b2) then (debug "both true\n")
-                  else if b1 then (debug "true privileged\n"; loop_info := PTree.set s (Some true) !loop_info)
-                  else if b2 then (debug "false privileged\n"; loop_info := PTree.set s (Some false) !loop_info)
-                  else (debug "none true\n")
-              | _ -> ( debug "not an icond\n" )
-            end
-          (* | Some _ -> ( debug "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *)
+(** Innermost loop detection *)
+
+type innerLoop = {
+  preds: P.t list;
+  body: P.t list;
+  head: P.t; (* head of the loop *)
+  finals: P.t list; (* the final instructions, which loops back to the head *)
+  (* There may be more than one ; for instance if there is an if inside the loop with both
+   * branches leading to a goto backedge
+   * Such cases usually happen after a tail-duplication *)
+  sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
+    (* may be None if we predict that we do not loop *)
+}
+
+let print_pset = LICMaux.pp_pset
+
+let rtl_successors_pref = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (match p with
+  | Some true -> [n1]
+  | Some false -> [n2]
+  | None -> [n1; n2])
+| Ijumptable (_,ln) -> ln
+
+(* Find the last node of a trace (starting at "node"), until a loop is encountered.
+ * If a non-predicted branch is encountered, returns None *)
+let rec find_last_node_before_loop code node trace is_loop_header =
+  let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
+  let headers = List.filter (fun n -> 
+    get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in 
+  match headers with
+  | [] -> (
+      let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in
+      match next_nodes with
+      | [n] -> (
+          (* To prevent getting out of the superblock and loop infinitely when the prediction is false *)
+          if HashedSet.PSet.contains trace n then
+            find_last_node_before_loop code n trace is_loop_header
+          else None
         )
+      | _ -> None (* May happen when we predict that a loop is not taken *)
+    )
+  | [h] -> Some node
+  | _ -> failwith "Multiple branches leading to a loop"
+
+(* The computation of sb_final requires to already have branch prediction *)
+let get_inner_loops f code is_loop_header =
+  let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; 
+    fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
+  let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
+  begin
+    debug "PREDMAP: "; print_ptree print_intlist predmap;
+    debug "LOOPMAP: "; print_ptree print_pset loopmap;
+    List.map (fun (n, body) ->
+      let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) 
+        @@ get_some @@ PTree.get n predmap in
+      let head = (* the instruction from body which is a loop header *)
+        let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter 
+          (fun n -> ptree_get_some n is_loop_header) body in
+        begin
+          assert (List.length heads == 1);
+          List.hd heads
+        end in
+      let finals = (* the predecessors from head that are in the body *)
+        let head_preds = ptree_get_some head predmap in
+        let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
+        begin
+          debug "HEAD: %d\n" (P.to_int head);
+          debug "BODY: %a\n" print_pset body;
+          debug "HEADPREDS: %a\n" print_intlist head_preds;
+          filtered
+        end in
+      let sb_final = find_last_node_before_loop code head body is_loop_header in
+      let body = HashedSet.PSet.elements body in
+      { preds = preds; body = body; head = head; finals = finals;
+        sb_final = sb_final; }
+    ) 
+    (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
+     * We remove those to get just the inner loops *)
+    @@ List.filter (fun (n, body) ->
+      let count = List.length @@ HashedSet.PSet.elements body in count != 1
+    ) (PTree.elements loopmap)
+  end
+
+let get_loop_bodies code entrypoint =
+  let predecessors = get_predecessors_rtl code in
+  (* Algorithm from Muchnik, Compiler Design & Implementation, Figure 7.21 page 192 *)
+  let natural_loop n m =
+    debug "Natural Loop from %d to %d\n" (P.to_int n) (P.to_int m);
+    let in_body = ref (PTree.map (fun n b -> false) code) in
+    let body = ref [] in
+    let add_to_body n = begin
+      in_body := PTree.set n true !in_body;
+      body := n :: !body
     end
-  in begin
+    in let rec process_node p =
+      debug "    Processing node %d\n" (P.to_int p);
+      List.iter (fun pred ->
+        debug "        Looking at predecessor of %d: %d\n" (P.to_int p) (P.to_int pred);
+        let is_in_body = get_some @@ PTree.get pred !in_body in
+        if (not @@ is_in_body) then begin
+          debug "        --> adding to body\n";
+          add_to_body pred;
+          process_node pred
+        end
+      ) (get_some @@ PTree.get p predecessors)
+    in begin
+      add_to_body m;
+      add_to_body n;
+      (if (m != n) then process_node m);
+      !body
+    end
+  in let option_natural_loop n = function
+    | None -> None
+    | Some m -> Some (natural_loop n m)
+  in PTree.map option_natural_loop (LICMaux.get_loop_backedges code entrypoint)
+
+(* Returns a PTree of either None or Some b where b determines the node in the loop body, for a cb instruction *)
+let get_loop_info f is_loop_header bfs_order code =
+  let loop_info = ref (PTree.map (fun n i -> None) code) in
+  let mark_body body =
     List.iter (fun n ->
       match get_some @@ PTree.get n code with
-      | Inop s | Iop (_,_,_,s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
-      | Ibuiltin (_, _, _, s) ->
-          if get_some @@ PTree.get s is_loop_header then mark_path s n
-      | Icond _ -> () (* loop backedges are never Icond in CompCert RTL.3 *)
-      | Ijumptable _ -> ()
-      | Itailcall _ | Ireturn _ -> ()
-    ) bfs_order;
-    !loop_info
-  end
+      | Icond (_, _, ifso, ifnot, _) -> begin
+          match PTree.get n !loop_info with
+          | None -> ()
+          | Some _ ->
+              let b1 = List.mem ifso body in
+              let b2 = List.mem ifnot body in
+              if (b1 && b2) then ()
+              else if (b1 || b2) then begin
+                if b1 then loop_info := PTree.set n (Some true) !loop_info
+                else if b2 then loop_info := PTree.set n (Some false) !loop_info
+              end
+        end
+      | _ -> ()
+    ) body
+  in let bodymap = get_loop_bodies code f.fn_entrypoint in
+  List.iter (fun (_,obody) ->
+    match obody with
+    | None -> ()
+    | Some body -> mark_body body
+    ) (PTree.elements bodymap);
+  !loop_info
 
-(* Remark - compared to the original paper, we don't use the store heuristic *)
-let get_directions code entrypoint = begin
+(* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *)
+let get_directions f code entrypoint = begin
   debug "get_directions\n";
   let bfs_order = bfs code entrypoint in
   let is_loop_header = get_loop_headers code entrypoint in
-  let loop_info = get_loop_info is_loop_header bfs_order code in
+  let loop_info = get_loop_info f is_loop_header bfs_order code in
   let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *)
   begin
     (* ptree_printbool is_loop_header; *)
     (* debug "\n"; *)
     List.iter (fun n ->
       match (get_some @@ PTree.get n code) with
-      | Icond (cond, lr, ifso, ifnot, _) ->
-          (* debug "Analyzing %d.." (P.to_int n); *)
-          let heuristics = [ do_opcode_heuristic;
-            do_return_heuristic; do_loop2_heuristic loop_info n; do_loop_heuristic; do_call_heuristic;
-             (* do_store_heuristic *) ] in
-          let preferred = ref None in
-          begin
-            debug "Deciding condition for RTL node %d\n" (P.to_int n);
-            List.iter (fun do_heur ->
-              match !preferred with
-              | None -> preferred := do_heur code cond ifso ifnot is_loop_header
-              | Some _ -> ()
-            ) heuristics;
-            directions := PTree.set n !preferred !directions;
-            (match !preferred with | Some false -> debug "\tFALLTHROUGH\n"
-                                   | Some true -> debug "\tBRANCH\n"
-                                   | None -> debug "\tUNSURE\n");
-            debug "---------------------------------------\n"
-          end
+      | Icond (cond, lr, ifso, ifnot, pred) -> begin
+          if stats_oc_recording () || not @@ has_some pred then
+            (* debug "Analyzing %d.." (P.to_int n); *)
+            let heuristics = [ do_opcode_heuristic;
+              do_return_heuristic; do_loop2_heuristic loop_info n; do_loop_heuristic; do_call_heuristic;
+               (* do_store_heuristic *) ] in
+            let preferred = ref None in
+            let current_heuristic = ref 0 in
+            begin
+              debug "Deciding condition for RTL node %d\n" (P.to_int n);
+              List.iter (fun do_heur ->
+                match !preferred with
+                | None -> begin
+                  preferred := do_heur code cond ifso ifnot is_loop_header;
+                  if stats_oc_recording () then begin
+                    (* Getting stats about mispredictions from each heuristic *)
+                    (match !preferred, pred with
+                      | Some false, Some true
+                      | Some true, Some false
+                      (* | Some _, None  *) (* Uncomment for overpredicts *)
+                          -> begin
+                          match !current_heuristic with
+                          | 0 -> incr wrong_opcode
+                          | 1 -> incr wrong_return
+                          | 2 -> incr wrong_loop2
+                          | 3 -> incr wrong_loop
+                          | 4 -> incr wrong_call
+                          | _ -> failwith "Shouldn't happen"
+                          end
+                      | Some false, Some false
+                      | Some true, Some true -> begin
+                          match !current_heuristic with
+                          | 0 -> incr right_opcode
+                          | 1 -> incr right_return
+                          | 2 -> incr right_loop2
+                          | 3 -> incr right_loop
+                          | 4 -> incr right_call
+                          | _ -> failwith "Shouldn't happen"
+                          end
+                      | _ -> ()
+                    );
+                    incr current_heuristic
+                    end
+                  end 
+                | Some _ -> ()
+              ) heuristics;
+              directions := PTree.set n !preferred !directions;
+              (match !preferred with | Some false -> debug "\tFALLTHROUGH\n"
+                                     | Some true -> debug "\tBRANCH\n"
+                                     | None -> debug "\tUNSURE\n");
+              debug "---------------------------------------\n"
+            end
+        end
       | _ -> ()
     ) bfs_order;
     !directions
@@ -340,24 +473,45 @@ let get_directions code entrypoint = begin
 end
 
 let update_direction direction = function
-| Icond (cond, lr, n, n', _) -> Icond (cond, lr, n, n', direction)
-| i -> i
+| Icond (cond, lr, n, n', pred) -> begin
+    (* Counting stats from profiling *)
+    if stats_oc_recording () then begin
+      incr stats_nb_total;
+      match pred, direction with
+      | None, None -> incr stats_nb_correct_predicts
+      | None, Some _ -> incr stats_nb_overpredict
+      | Some _, None -> incr stats_nb_missed_opportunities
+      | Some false, Some false -> incr stats_nb_correct_predicts
+      | Some false, Some true -> incr stats_nb_mispredicts
+      | Some true, Some false -> incr stats_nb_mispredicts
+      | Some true, Some true -> incr stats_nb_correct_predicts
+    end;
 
-let rec update_direction_rec directions = function
-| [] -> PTree.empty
-| m::lm -> let (n, i) = m
-    in let direction = get_some @@ PTree.get n directions
-    in PTree.set n (update_direction direction i) (update_direction_rec directions lm)
+    (* only update if there is no prior existing branch prediction *)
+    (match pred with
+    | None -> Icond (cond, lr, n, n', direction)
+    | Some _ -> begin
+        Icond (cond, lr, n, n', pred) 
+      end
+    )
+    end
+| i -> i
 
 (* Uses branch prediction to write prediction annotations in Icond *)
-let update_directions code entrypoint = begin
+let update_directions f code entrypoint = begin
   debug "Update_directions\n";
-  let directions = get_directions code entrypoint
-  in begin
+  let directions = get_directions f code entrypoint in
+  let code' = ref code in
+  begin
+    debug "Get Directions done, now proceeding to update all direction information..\n";
     (* debug "Ifso directions: ";
     ptree_printbool directions;
     debug "\n"; *)
-    update_direction_rec directions (PTree.elements code)
+    List.iter (fun (n, i) ->
+      let direction = get_some @@ PTree.get n directions in
+      code' := PTree.set n (update_direction direction i) !code'
+    ) (PTree.elements code);
+    !code'
   end
 end
 
@@ -428,18 +582,15 @@ let best_predecessor_of node predecessors code order is_visited =
         ) order)
       with Not_found -> None
 
-let print_trace t = print_intlist t
+let print_trace = print_intlist
 
-let print_traces traces =
-  let rec f = function
+let print_traces oc traces =
+  let rec f oc = function
   | [] -> ()
-  | t::lt -> Printf.printf "\n\t"; print_trace t; Printf.printf ",\n"; f lt
+  | t::lt -> Printf.fprintf oc "\n\t%a,\n%a" print_trace t f lt
   in begin
-    if !debug_flag then begin
-      Printf.printf "Traces: {";
-      f traces;
-      Printf.printf "}\n";
-    end
+    if !debug_flag then
+      Printf.fprintf oc "Traces: {%a}\n" f traces
   end
 
 (* Dumb (but linear) trace selection *)
@@ -514,7 +665,7 @@ let select_traces_chang code entrypoint = begin
       end
     done;
     (* debug "DFS: \t"; print_intlist order; debug "\n"; *)
-    debug "Traces: "; print_traces !traces;
+    debug "Traces: %a" print_traces !traces;
     !traces
   end
 end
@@ -530,26 +681,26 @@ let rec make_identity_ptree_rec = function
 
 let make_identity_ptree code = make_identity_ptree_rec (PTree.elements code)
 
-(* Change the pointers of preds nodes to point to n' instead of n *)
+(* Change the pointers of nodes to point to n' instead of n *)
 let rec change_pointers code n n' = function
   | [] -> code
-  | pred :: preds ->
-      let new_pred_inst = match ptree_get_some pred code with
-        | Icall(a, b, c, d, n0) -> assert (n0 == n); Icall(a, b, c, d, n')
-        | Ibuiltin(a, b, c, n0) -> assert (n0 == n); Ibuiltin(a, b, c, n')
-        | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e == n) ln);
-                               Ijumptable(a, List.map (fun e -> if (e == n) then n' else e) ln)
-        | Icond(a, b, n1, n2, i) -> assert (n1 == n || n2 == n);
-                                 let n1' = if (n1 == n) then n' else n1
-                                 in let n2' = if (n2 == n) then n' else n2
+  | node :: nodes ->
+      let new_pred_inst = match ptree_get_some node code with
+        | Icall(a, b, c, d, n0) -> assert (n0 = n); Icall(a, b, c, d, n')
+        | Ibuiltin(a, b, c, n0) -> assert (n0 = n); Ibuiltin(a, b, c, n')
+        | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e = n) ln);
+                               Ijumptable(a, List.map (fun e -> if (e = n) then n' else e) ln)
+        | Icond(a, b, n1, n2, i) -> assert (n1 = n || n2 = n);
+                                 let n1' = if (n1 = n) then n' else n1
+                                 in let n2' = if (n2 = n) then n' else n2
                                  in Icond(a, b, n1', n2', i)
-        | Inop n0 -> assert (n0 == n); Inop n'
-        | Iop (a, b, c, n0) -> assert (n0 == n); Iop (a, b, c, n')
-        | Iload (a, b, c, d, e, n0) -> assert (n0 == n); Iload (a, b, c, d, e, n')
-        | Istore (a, b, c, d, n0) -> assert (n0 == n); Istore (a, b, c, d, n')
+        | Inop n0 -> assert (n0 = n); Inop n'
+        | Iop (a, b, c, n0) -> assert (n0 = n); Iop (a, b, c, n')
+        | Iload (a, b, c, d, e, n0) -> assert (n0 = n); Iload (a, b, c, d, e, n')
+        | Istore (a, b, c, d, n0) -> assert (n0 = n); Istore (a, b, c, d, n')
         | Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor"
-      in let new_code = PTree.set pred new_pred_inst code
-      in change_pointers new_code n n' preds
+      in let new_code = PTree.set node new_pred_inst code
+      in change_pointers new_code n n' nodes
 
 (* parent: parent of n to keep as parent
  * preds: all the other parents of n
@@ -573,13 +724,21 @@ let is_empty = function
   | [] -> true
   | _ -> false
 
+let next_free_pc code = maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1
+
+let is_a_nop code n =
+  match get_some @@ PTree.get n code with
+  | Inop _ -> true
+  | _ -> false
+
 (* code: RTL code
  * preds: mapping node -> predecessors
  * ptree: the revmap
  * trace: the trace to follow tail duplication on *)
-let tail_duplicate code preds ptree trace =
+let tail_duplicate code preds is_loop_header ptree trace =
+  debug "Tail_duplicate on that trace: %a\n" print_trace trace;
   (* next_int: unused integer that can be used for the next duplication *)
-  let next_int = ref (maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1)
+  let next_int = ref (next_free_pc code)
   (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
   in let last_node = ref None
   in let last_duplicate = ref None
@@ -592,7 +751,12 @@ let tail_duplicate code preds ptree trace =
           if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
           else
             let node_preds = ptree_get_some n preds
-            in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds
+            in let node_preds_nolast = 
+              (* We traverse loop headers without initiating tail duplication 
+               * (see case of two imbricated loops) *)
+              if (get_some @@ PTree.get n is_loop_header) then []
+              else List.filter (fun e -> e <> get_some !last_node) node_preds
+            (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *)
             in let final_node_preds = match !last_duplicate with
               | None -> node_preds_nolast
               | Some n' -> n' :: node_preds_nolast
@@ -601,7 +765,7 @@ let tail_duplicate code preds ptree trace =
               in let (newc, newp) = duplicate code ptree !last_node n final_node_preds (P.of_int n')
               in begin
                 next_int := !next_int + 1;
-                nb_duplicated := !nb_duplicated + 1;
+                (if not @@ is_a_nop code n then nb_duplicated := !nb_duplicated + 1);
                 last_duplicate := Some (P.of_int n');
                 (newc, newp)
               end
@@ -613,50 +777,363 @@ let tail_duplicate code preds ptree trace =
   in let new_code, new_ptree = f code ptree true trace
   in (new_code, new_ptree, !nb_duplicated)
 
-let superblockify_traces code preds traces =
-  let max_nb_duplicated = !Clflags.option_fduplicate (* FIXME - should be architecture dependent *)
-  in let ptree = make_identity_ptree code
+let superblockify_traces code preds is_loop_header traces ptree =
+  let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *)
   in let rec f code ptree = function
     | [] -> (code, ptree, 0)
     | trace :: traces ->
-        let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace
+        let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace
         in if (nb_duplicated < max_nb_duplicated)
           then (debug "End duplication\n"; f new_code new_ptree traces)
           else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
   in let new_code, new_ptree, _ = f code ptree traces
   in (new_code, new_ptree)
 
-let rec invert_iconds_trace code = function
-  | [] -> code
+let invert_iconds code =
+  PTree.map1 (fun i -> match i with
+    | Icond (c, lr, ifso, ifnot, info) -> (match info with
+        | Some true -> begin
+            (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
+            Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)
+          end
+        | _ -> i)
+    | _ -> i
+  ) code
+
+(** Partial loop unrolling
+ *
+ * The following code seeks innermost loops, and unfolds the first iteration
+ * Most of the code has been moved from LICMaux.ml to Duplicateaux.ml to solve
+ * cyclic dependencies between LICMaux and Duplicateaux
+ *)
+
+let print_inner_loop iloop =
+  debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n"
+    print_intlist iloop.preds
+    print_intlist iloop.body
+    (P.to_int iloop.head)
+    print_intlist iloop.finals
+    print_option_pint iloop.sb_final
+
+let rec print_inner_loops = function
+| [] -> ()
+| iloop :: iloops -> begin
+    print_inner_loop iloop;
+    debug "\n";
+    print_inner_loops iloops
+  end
+
+let cb_exit_node = function
+  | Icond (_,_,n1,n2,p) -> begin match p with
+      | Some true -> Some n2
+      | Some false -> Some n1
+      | None -> None
+    end
+  | _ -> None
+
+      (*
+(* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *)
+let get_natural_loop code predmap n =
+  let is_final_node m =
+    let successors = rtl_successors @@ get_some @@ PTree.get m code in
+    List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors
+  in 
+  let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in
+  let is_excluded m = match excluded_node with
+    | None -> false
+    | Some ex -> P.to_int ex == P.to_int m
+  in
+  debug "get_natural_loop for %d\n" (P.to_int n);
+  let body = bfs_until code n is_final_node is_excluded in
+  debug "BODY: %a\n" print_intlist body;
+  let final = List.find is_final_node body in
+  debug "FINAL: %d\n" (P.to_int final);
+  let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in
+  debug "PREDS: %a\n" print_intlist preds;
+  { preds = preds; body = body; head = n; final = final }
+
+let rec count_loop_headers is_loop_header = function
+  | [] -> 0
   | n :: ln ->
-      let code' = match ptree_get_some n code with
-        | Icond (c, lr, ifso, ifnot, info) -> (match info with
-            | Some true -> begin
-                (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
-                PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)) code
-              end
-            | _ -> code)
-        | _ -> code
-      in invert_iconds_trace code' ln
+      let rem = count_loop_headers is_loop_header ln in
+      if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem
 
-let rec invert_iconds code = function
-  | [] -> code
-  | t :: ts ->
-      let code' = if !Clflags.option_finvertcond then invert_iconds_trace code t
-                  else code
-      in invert_iconds code' ts
+let get_inner_loops f code is_loop_header =
+  let predmap = get_predecessors_rtl code in
+  let iloops = ref [] in
+  List.iter (fun (n, ilh) -> if ilh then begin
+    let iloop = get_natural_loop code predmap n in
+    let nb_headers = count_loop_headers is_loop_header iloop.body in
+    if nb_headers == 1 then (* innermost loop *)
+      iloops := iloop :: !iloops end
+  ) (PTree.elements is_loop_header);
+  !iloops
+  *)
 
-let duplicate_aux f =
-  let entrypoint = f.fn_entrypoint in
-  if !Clflags.option_fduplicate < 0 then
-    ((f.fn_code, entrypoint), make_identity_ptree f.fn_code)
+let rec generate_fwmap ln ln' fwmap =
+  match ln with
+  | [] -> begin
+      match ln' with
+      | [] -> fwmap
+      | _ -> failwith "ln and ln' have different lengths"
+    end
+  | n :: ln -> begin
+      match ln' with
+      | n' :: ln' -> generate_fwmap ln ln' (PTree.set n n' fwmap)
+      | _ -> failwith "ln and ln' have different lengths"
+    end
+
+let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap
+
+let apply_map fw n = P.of_int @@ ptree_get_some n fw
+
+let apply_map_list fw ln = List.map (apply_map fw) ln
+
+let apply_map_opt fw n =
+  match PTree.get n fw with
+  | Some n' -> P.of_int n'
+  | None -> n
+
+let change_nexts fwmap = function
+  | Icall (a, b, c, d, n) -> Icall (a, b, c, d, apply_map fwmap n)
+  | Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, apply_map fwmap n)
+  | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map_opt fwmap) ln)
+  | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map_opt fwmap n1, apply_map_opt fwmap n2, i)
+  | Inop n -> Inop (apply_map fwmap n)
+  | Iop (a, b, c, n) -> Iop (a, b, c, apply_map fwmap n)
+  | Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, apply_map fwmap n)
+  | Istore (a, b, c, d, n) -> Istore (a, b, c, d, apply_map fwmap n)
+  | Itailcall (a, b, c) -> Itailcall (a, b, c)
+  | Ireturn o -> Ireturn o
+
+(** Clone a list of instructions into free pc indexes
+ *
+ * The list of instructions should be contiguous, and not include any loop.
+ * It is assumed that the first instruction of the list is the head.
+ * Also, the last instruction of the list should be the loop backedge.
+ *
+ * Returns: (code', revmap', ln', fwmap)
+ *  code' is the updated code, after cloning
+ *  revmap' is the updated revmap
+ *  ln' is the list of the new indexes used to reference the cloned instructions
+ *  fwmap is a map from ln to ln'
+ *)
+let clone code revmap ln = begin
+  assert (List.length ln > 0);
+  let head' = next_free_pc code in
+  (* +head' to ensure we never overlap with the existing code *)
+  let ln' = List.map (fun n -> n + head') @@ List.map P.to_int ln in
+  let fwmap = generate_fwmap ln ln' PTree.empty in
+  let revmap' = generate_revmap ln (List.map P.of_int ln') revmap in
+  let code' = ref code in
+  List.iter (fun n ->
+    let instr = get_some @@ PTree.get n code in
+    let instr' = change_nexts fwmap instr in
+    code' := PTree.set (apply_map fwmap n) instr' !code'
+  ) ln;
+  (!code', revmap', ln', fwmap)
+end
+
+let rec count_ignore_nops code = function
+  | [] -> 0
+  | n::ln ->
+      let inst = get_some @@ PTree.get n code in
+      match inst with
+      | Inop _ -> count_ignore_nops code ln
+      | _ -> 1 + count_ignore_nops code ln
+
+(* Unrolls a single interation of the inner loop
+ * 1) Clones the body into body'
+ * 2) Links the preds to the first instruction of body'
+ * 3) Links the last instruction of body' into the first instruction of body
+ *)
+let unroll_inner_loop_single code revmap iloop =
+  let body = iloop.body in
+  if count_ignore_nops code body > !Clflags.option_funrollsingle then begin
+    debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle;
+    (code, revmap)
+  end else
+    let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
+    let code' = ref code2 in
+    let head' = apply_map fwmap (iloop.head) in
+    let finals' = apply_map_list fwmap (iloop.finals) in
+    begin
+      debug "PREDS: %a\n" print_intlist iloop.preds;
+      debug "IHEAD: %d\n" (P.to_int iloop.head);
+      code' := change_pointers !code' (iloop.head) head' (iloop.preds);
+      code' := change_pointers !code' head' (iloop.head) finals';
+      (!code', revmap2)
+    end
+
+let unroll_inner_loops_single f code revmap =
+  let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+  let inner_loops = get_inner_loops f code is_loop_header in
+  let code' = ref code in
+  let revmap' = ref revmap in
+  begin
+    print_inner_loops inner_loops;
+    List.iter (fun iloop ->
+      let (new_code, new_revmap) = unroll_inner_loop_single !code' !revmap' iloop in
+      code' := new_code; revmap' := new_revmap
+    ) inner_loops;
+    (!code', !revmap')
+  end
+
+let is_some o = match o with Some _ -> true | None -> false
+
+let rec go_through_predicted code start final =
+  if start == final then
+    Some [start]
   else
-    let code = update_directions (f.fn_code) entrypoint in
-    let traces = select_traces code entrypoint in
-    let icond_code = invert_iconds code traces in
-    let preds = get_predecessors_rtl icond_code in
-    if !Clflags.option_fduplicate >= 1 then
-      let (new_code, pTreeId) = ((* print_traces traces; *) superblockify_traces icond_code preds traces) in
-      ((new_code, f.fn_entrypoint), pTreeId)
-    else
-      ((icond_code, entrypoint), make_identity_ptree code)
+    match rtl_successors_pref @@ get_some @@ PTree.get start code with
+    | [n] -> (
+        match go_through_predicted code n final with
+        | Some ln -> Some (start :: ln)
+        | None -> None
+      )
+    | _ -> None
+
+(* Unrolls the body of the inner loop once - duplicating the exit condition as well 
+ * 1) Clones body into body'
+ * 2) Links the last instruction of body (sb_final) into the first of body' 
+ * 3) Links the last instruction of body' into the first of body
+ *)
+let unroll_inner_loop_body code revmap iloop =
+  debug "iloop = "; print_inner_loop iloop;
+  let body = iloop.body in
+  let limit = !Clflags.option_funrollbody in
+  if count_ignore_nops code body > limit then begin
+    debug "Too many nodes in the loop body (%d > %d)\n" (List.length body) limit;
+    (code, revmap)
+  end else if not @@ is_some iloop.sb_final then begin
+    debug "The loop body does not form a superblock OR we have predicted that we do not loop\n";
+    (code, revmap)
+  end else
+    let sb_final = get_some @@ iloop.sb_final in
+    let sb_body = get_some @@ go_through_predicted code iloop.head sb_final in
+    let (code2, revmap2, dupbody, fwmap) = clone code revmap sb_body in
+    let code' = ref code2 in
+    let head' = apply_map fwmap (iloop.head) in
+    let sb_final' = apply_map fwmap sb_final in
+    begin
+      code' := change_pointers !code' iloop.head head' [sb_final];
+      code' := change_pointers !code' head' iloop.head [sb_final']; 
+      (!code', revmap2)
+    end
+
+let unroll_inner_loops_body f code revmap =
+  let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+  let inner_loops = get_inner_loops f code is_loop_header in
+  debug "Number of loops found: %d\n" (List.length inner_loops);
+  let code' = ref code in
+  let revmap' = ref revmap in
+  begin
+    print_inner_loops inner_loops;
+    List.iter (fun iloop ->
+      let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
+      code' := new_code; revmap' := new_revmap
+    ) inner_loops;
+    (!code', !revmap')
+  end
+
+let extract_upto_icond f code head =
+  let rec extract h =
+    let inst = get_some @@ PTree.get h code in
+    match inst with
+    | Icond _ -> [h]
+    | _ -> ( match rtl_successors inst with
+        | [n] -> h :: (extract n)
+        | _ -> failwith "Found a node with more than one successor??"
+      )
+  in List.rev @@ extract head
+
+let rotate_inner_loop f code revmap iloop =
+  let header = extract_upto_icond f code iloop.head in
+  let limit = !Clflags.option_flooprotate in
+  if count_ignore_nops code header > limit then begin
+    debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit;
+    (code, revmap)
+  end else
+    let (code2, revmap2, dupheader, fwmap) = clone code revmap header in
+    let code' = ref code2 in
+    let head' = apply_map fwmap iloop.head in
+    begin
+      code' := change_pointers !code' iloop.head head' iloop.preds;
+      (!code', revmap2)
+    end
+
+let rotate_inner_loops f code revmap =
+  let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+  let inner_loops = get_inner_loops f code is_loop_header in
+  let code' = ref code in
+  let revmap' = ref revmap in
+  begin
+    print_inner_loops inner_loops;
+    List.iter (fun iloop ->
+      let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in
+      code' := new_code; revmap' := new_revmap
+    ) inner_loops;
+    (!code', !revmap')
+  end
+
+let loop_rotate f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  let (code, revmap) =
+    if !Clflags.option_flooprotate > 0 then
+      rotate_inner_loops f code revmap
+    else (code, revmap) in
+  ((code, entrypoint), revmap)
+
+let static_predict f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  begin
+    reset_stats ();
+    set_stats_oc ();
+    let code =
+      if !Clflags.option_fpredict then
+        update_directions f code entrypoint
+      else code in
+    write_stats_oc ();
+    let code =
+      if !Clflags.option_fpredict then
+        invert_iconds code
+      else code in
+    ((code, entrypoint), revmap)
+  end
+
+let unroll_single f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  let (code, revmap) =
+    if !Clflags.option_funrollsingle > 0 then
+      unroll_inner_loops_single f code revmap
+    else (code, revmap) in
+  ((code, entrypoint), revmap)
+
+let unroll_body f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  let (code, revmap) =
+    if !Clflags.option_funrollbody > 0 then
+      unroll_inner_loops_body f code revmap
+    else (code, revmap) in
+  ((code, entrypoint), revmap)
+
+let tail_duplicate f =
+  let entrypoint = f.fn_entrypoint in
+  let code = f.fn_code in
+  let revmap = make_identity_ptree code in
+  let (code, revmap) =
+    if !Clflags.option_ftailduplicate > 0 then
+      let traces = select_traces code entrypoint in
+      let preds = get_predecessors_rtl code in
+      let is_loop_header = get_loop_headers code entrypoint in
+      superblockify_traces code preds is_loop_header traces revmap
+    else (code, revmap) in
+  ((code, entrypoint), revmap)
diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v
new file mode 100644
index 00000000..7e58eedf
--- /dev/null
+++ b/backend/Duplicatepasses.v
@@ -0,0 +1,58 @@
+Require Import RTL.
+Require Import Maps.
+Require Import Duplicate.
+Require Import Duplicateproof.
+
+(** Static Prediction *)
+
+Module StaticPredictOracle <: DuplicateOracle.
+  Axiom duplicate_aux : function -> code * node * (PTree.t node).
+  Extract Constant duplicate_aux => "Duplicateaux.static_predict".
+End StaticPredictOracle.
+
+Module Staticpredictproof := DuplicateProof StaticPredictOracle.
+
+Module Staticpredict := Staticpredictproof.
+
+(** Unrolling one iteration out of the body *)
+
+Module UnrollSingleOracle <: DuplicateOracle.
+  Axiom duplicate_aux : function -> code * node * (PTree.t node).
+  Extract Constant duplicate_aux => "Duplicateaux.unroll_single".
+End UnrollSingleOracle.
+
+Module Unrollsingleproof := DuplicateProof UnrollSingleOracle.
+
+Module Unrollsingle := Unrollsingleproof.
+
+(** Unrolling the body of innermost loops *)
+
+Module UnrollBodyOracle <: DuplicateOracle.
+  Axiom duplicate_aux : function -> code * node * (PTree.t node).
+  Extract Constant duplicate_aux => "Duplicateaux.unroll_body".
+End UnrollBodyOracle.
+
+Module Unrollbodyproof := DuplicateProof UnrollBodyOracle.
+
+Module Unrollbody := Unrollbodyproof.
+
+(** Tail Duplication *)
+
+Module TailDuplicateOracle <: DuplicateOracle.
+ Axiom duplicate_aux : function -> code * node * (PTree.t node).
+ Extract Constant duplicate_aux =>  "Duplicateaux.tail_duplicate".
+End TailDuplicateOracle.
+
+Module Tailduplicateproof := DuplicateProof TailDuplicateOracle.
+
+Module Tailduplicate := Tailduplicateproof.
+
+(** Loop Rotate *)
+
+Module LoopRotateOracle <: DuplicateOracle.
+  Axiom duplicate_aux : function -> code * node * (PTree.t node).
+  Extract Constant duplicate_aux =>  "Duplicateaux.loop_rotate".
+End LoopRotateOracle.
+
+Module Looprotateproof := DuplicateProof LoopRotateOracle.
+Module Looprotate := Looprotateproof.
diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v
index 62455076..2f3bad2f 100644
--- a/backend/Duplicateproof.v
+++ b/backend/Duplicateproof.v
@@ -17,6 +17,9 @@ Require Import AST Linking Errors Globalenvs Smallstep.
 Require Import Coqlib Maps Events Values.
 Require Import Op RTL Duplicate.
 
+Module DuplicateProof (D: DuplicateOracle).
+Include Duplicate D.
+
 Local Open Scope positive_scope.
 
 (** * Definition of [match_states] (independently of the translation) *)
@@ -108,7 +111,7 @@ Proof.
   - monadInv H0. inversion H.
   - inversion H.
     + subst. monadInv H0. destruct x. assumption.
-    + monadInv H0. destruct x0. eapply IHlb; assumption.
+    + monadInv H0. destruct x. eapply IHlb; assumption.
 Qed.
 
 Lemma verify_is_copy_correct:
@@ -141,8 +144,8 @@ Proof.
   intros. unfold verify_match_inst in H.
   destruct i; try (inversion H; fail).
 (* Inop *)
-  - destruct i'; try (inversion H; fail). monadInv H.
-    destruct x. eapply verify_is_copy_correct in EQ.
+  - destruct i'; try (inversion H; fail).
+    eapply verify_is_copy_correct in H.
     constructor; eauto.
 (* Iop *)
   - destruct i'; try (inversion H; fail). monadInv H.
@@ -194,12 +197,12 @@ Proof.
     destruct (list_eq_dec _ _ _); try discriminate. subst.
     destruct (eq_condition _ _); try discriminate.
     + monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
-      destruct x0. eapply verify_is_copy_correct in EQ1.
-      constructor; assumption.
+      eapply verify_is_copy_correct in EQ0.
+      subst; constructor; assumption.
     + destruct (eq_condition _ _); try discriminate.
       monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
-      destruct x0. eapply verify_is_copy_correct in EQ1.
-      constructor; assumption.
+      eapply verify_is_copy_correct in EQ0.
+      subst; constructor; assumption.
 (* Ijumptable *)
   - destruct i'; try (inversion H; fail). monadInv H.
     destruct x. eapply verify_is_copy_list_correct in EQ.
@@ -254,7 +257,7 @@ Proof.
   exists mp; constructor 1; simpl; auto.
   + (* correct *) 
   intros until n'. intros REVM i FNC.
-  unfold verify_mapping_match_nodes in EQ. simpl in EQ. destruct x1.
+  unfold verify_mapping_match_nodes in EQ1. simpl in EQ1. destruct x.
   eapply verify_mapping_mn_rec_correct; eauto.
   simpl; eauto.
   + (* entrypoint *)
@@ -535,3 +538,5 @@ Proof.
 Qed.
 
 End PRESERVATION.
+
+End DuplicateProof.
diff --git a/backend/IRC.ml b/backend/IRC.ml
index 785b0a2d..29d224c8 100644
--- a/backend/IRC.ml
+++ b/backend/IRC.ml
@@ -102,7 +102,7 @@ after IRC elimination, when assigning a stack slot to a spilled variable. *)
 
 let name_of_loc = function
   | R r ->
-      begin match Machregsaux.name_of_register r with
+      begin match Machregsnames.name_of_register r with
                 | None -> "fixed-reg"
                 | Some s -> s
       end
@@ -247,12 +247,10 @@ let class_of_loc = function
 
 let no_spill_class = 2
 
-let reserved_registers = ref ([]: mreg list)
-
 let rec remove_reserved = function
   | [] -> []
   | hd :: tl ->
-      if List.mem hd !reserved_registers
+      if List.mem hd !CPragmas.reserved_registers
       then remove_reserved tl
       else hd :: remove_reserved tl
 
diff --git a/backend/IRC.mli b/backend/IRC.mli
index f7bbf9c5..254f27ff 100644
--- a/backend/IRC.mli
+++ b/backend/IRC.mli
@@ -13,7 +13,6 @@
 (* Iterated Register Coalescing: George and Appel's graph coloring algorithm *)
 
 open Registers
-open Machregs
 open Locations
 open XTL
 
@@ -39,8 +38,5 @@ val add_pref: graph -> var -> var -> unit
 (* Color the graph.  Return an assignment of locations to variables. *)
 val coloring: graph -> (var -> loc)
 
-(* Machine registers that are reserved and not available for allocation. *)
-val reserved_registers: mreg list ref
-
 (* Auxiliaries to deal with register classes *)
 val class_of_loc: loc -> int
diff --git a/backend/KillUselessMoves.v b/backend/KillUselessMoves.v
new file mode 100644
index 00000000..bdd7ec60
--- /dev/null
+++ b/backend/KillUselessMoves.v
@@ -0,0 +1,40 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL.
+Require List.
+
+Definition transf_ros (ros: reg + ident) : reg + ident := ros.
+
+Definition transf_instr (pc: node) (instr: instruction) :=
+  match instr with
+  | Iop op args res s =>
+    if (eq_operation op Omove) && (List.list_eq_dec peq args (res :: nil))
+    then Inop s
+    else instr
+  | _ => instr
+  end.
+
+Definition transf_function (f: function) : function :=
+  {| fn_sig := f.(fn_sig);
+     fn_params := f.(fn_params);
+     fn_stacksize := f.(fn_stacksize);
+     fn_code := PTree.map transf_instr f.(fn_code);
+     fn_entrypoint := f.(fn_entrypoint) |}.
+
+Definition transf_fundef (fd: fundef) : fundef :=
+  AST.transf_fundef transf_function fd.
+
+Definition transf_program (p: program) : program :=
+  transform_program transf_fundef p.
diff --git a/backend/KillUselessMovesproof.v b/backend/KillUselessMovesproof.v
new file mode 100644
index 00000000..629aa6aa
--- /dev/null
+++ b/backend/KillUselessMovesproof.v
@@ -0,0 +1,361 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           David Monniaux     CNRS, VERIMAG                  *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+Require Import Axioms.
+Require Import FunInd.
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import KillUselessMoves.
+
+
+Definition match_prog (p tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma functions_translated:
+  forall v f,
+  Genv.find_funct ge v = Some f ->
+  Genv.find_funct tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_transf TRANSL).
+
+Lemma function_ptr_translated:
+  forall v f,
+  Genv.find_funct_ptr ge v = Some f ->
+  Genv.find_funct_ptr tge v = Some (transf_fundef f).
+Proof (Genv.find_funct_ptr_transf TRANSL).
+
+Lemma symbols_preserved:
+  forall id,
+  Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_transf TRANSL).
+
+Lemma senv_preserved:
+  Senv.equiv ge tge.
+Proof (Genv.senv_transf TRANSL).
+
+Lemma sig_preserved:
+  forall f, funsig (transf_fundef f) = funsig f.
+Proof.
+  destruct f; reflexivity.
+Qed.
+
+Lemma find_function_translated:
+  forall ros rs fd,
+  find_function ge ros rs = Some fd ->
+  find_function tge ros rs = Some (transf_fundef fd).
+Proof.
+  unfold find_function; intros. destruct ros as [r|id].
+  eapply functions_translated; eauto.
+  rewrite symbols_preserved. destruct (Genv.find_symbol ge id); try congruence.
+  eapply function_ptr_translated; eauto.
+Qed.
+
+Lemma transf_function_at:
+  forall f pc i,
+  f.(fn_code)!pc = Some i ->
+  (transf_function f).(fn_code)!pc = Some(transf_instr pc i).
+Proof.
+  intros until i. intro Hcode.
+  unfold transf_function; simpl.
+  rewrite PTree.gmap.
+  unfold option_map.
+  rewrite Hcode.
+  reflexivity.
+Qed.
+
+Ltac TR_AT :=
+  match goal with
+  | [ A: (fn_code _)!_ = Some _ |- _ ] =>
+        generalize (transf_function_at _ _ _ A); intros
+  end.
+
+Section SAME_RS.
+  Context {A : Type}.
+  
+  Definition same_rs (rs rs' : Regmap.t A) :=
+    forall x, rs # x = rs' # x.
+
+  Lemma same_rs_refl : forall rs, same_rs rs rs.
+  Proof.
+    unfold same_rs.
+    reflexivity.
+  Qed.
+
+  Lemma same_rs_comm : forall rs rs', (same_rs rs rs') -> (same_rs rs' rs).
+  Proof.
+    unfold same_rs.
+    congruence.
+  Qed.
+
+  Lemma same_rs_trans : forall rs1 rs2 rs3,
+      (same_rs rs1 rs2) -> (same_rs rs2 rs3) -> (same_rs rs1 rs3).
+  Proof.
+    unfold same_rs.
+    congruence.
+  Qed.
+
+  Lemma same_rs_idem_write : forall rs r,
+      (same_rs rs (rs # r <- (rs # r))).
+  Proof.
+    unfold same_rs.
+    intros.
+    rewrite Regmap.gsident.
+    reflexivity.
+  Qed.
+
+  Lemma same_rs_read:
+    forall rs rs' r, (same_rs rs rs') -> rs # r = rs' # r.
+  Proof.
+    unfold same_rs.
+    auto.
+  Qed.
+  
+  Lemma same_rs_subst:
+    forall rs rs' l, (same_rs rs rs') -> rs ## l = rs' ## l.
+  Proof.
+    induction l; cbn; intuition congruence.
+  Qed.
+
+  Lemma same_rs_write: forall rs rs' r x,
+      (same_rs rs rs') -> (same_rs (rs # r <- x) (rs' # r <- x)).
+  Proof.
+    unfold same_rs.
+    intros.
+    destruct (peq r x0).
+    { subst x0.
+      rewrite Regmap.gss. rewrite Regmap.gss.
+      reflexivity.
+    }
+    rewrite Regmap.gso by congruence.
+    rewrite Regmap.gso by congruence.
+    auto.
+  Qed.
+
+  Lemma same_rs_setres:
+    forall rs rs' (SAME: same_rs rs rs') res vres,
+      same_rs (regmap_setres res vres rs) (regmap_setres res vres rs').
+  Proof.
+    induction res; cbn; auto using same_rs_write.
+  Qed.
+End SAME_RS.
+
+Lemma same_find_function: forall tge rs rs' (SAME: same_rs rs rs') ros,
+  find_function tge ros rs = find_function tge ros rs'.
+Proof.
+  destruct ros; cbn.
+  { rewrite (same_rs_read rs rs' r SAME).
+    reflexivity. }
+  reflexivity.
+Qed.
+
+Inductive match_frames: RTL.stackframe -> RTL.stackframe -> Prop :=
+| match_frames_intro: forall res f sp pc rs rs' (SAME : same_rs rs rs'),
+    match_frames (Stackframe res f sp pc rs)
+                 (Stackframe res (transf_function f) sp pc rs').
+
+Inductive match_states: RTL.state -> RTL.state -> Prop :=
+  | match_regular_states: forall stk f sp pc rs rs' m stk'
+        (SAME: same_rs rs rs')
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (State stk f sp pc rs m)
+                   (State stk' (transf_function f) sp pc rs' m)
+  | match_callstates: forall stk f args m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Callstate stk f args m)
+                   (Callstate stk' (transf_fundef f) args m)
+  | match_returnstates: forall stk v m stk'
+        (STACKS: list_forall2 match_frames stk stk'),
+      match_states (Returnstate stk v m)
+                   (Returnstate stk' v m).
+
+Lemma step_simulation:
+  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1', match_states S1 S1' ->
+  exists S2', RTL.step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+  induction 1; intros S1' MS; inv MS; try TR_AT.
+- (* nop *)
+  econstructor; split. eapply exec_Inop; eauto.
+  constructor; auto.
+- (* op *)
+  cbn in H1.
+  destruct (_ && _) eqn:IS_MOVE in H1.
+  {
+    destruct eq_operation in IS_MOVE. 2: discriminate.
+    destruct list_eq_dec in IS_MOVE. 2: discriminate.
+    subst op. subst args.
+    clear IS_MOVE.
+    cbn in H0.
+    inv H0.
+    econstructor; split.
+    { eapply exec_Inop; eauto. }
+    constructor.
+    2: assumption.
+    eapply same_rs_trans.
+    { apply same_rs_comm.
+      apply same_rs_idem_write.
+    }
+    assumption.
+  }
+  econstructor; split.
+  eapply exec_Iop with (v := v); eauto.
+  rewrite <- H0.
+  rewrite (same_rs_subst rs rs' args SAME).
+  apply eval_operation_preserved. exact symbols_preserved.
+  constructor; auto using same_rs_write.
+(* load *)
+- econstructor; split.
+  assert (eval_addressing tge sp addr rs' ## args = Some a).
+  { rewrite <- H0.
+    rewrite (same_rs_subst rs rs' args SAME).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  }
+  eapply exec_Iload; eauto.
+  constructor; auto using same_rs_write.
+- (* load notrap1 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs' ## args = None).
+  { rewrite <- H0.
+    rewrite (same_rs_subst rs rs' args SAME).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  }
+  eapply exec_Iload_notrap1; eauto.
+  constructor; auto using same_rs_write.
+- (* load notrap2 *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs' ## args = Some a).
+  { rewrite <- H0.
+    rewrite (same_rs_subst rs rs' args SAME).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  }
+  eapply exec_Iload_notrap2; eauto.
+  constructor; auto using same_rs_write. 
+- (* store *)
+  econstructor; split.
+  assert (eval_addressing tge sp addr rs' ## args = Some a).
+  { rewrite <- H0.
+    rewrite (same_rs_subst rs rs' args SAME).
+    apply eval_addressing_preserved. exact symbols_preserved.
+  }
+  rewrite (same_rs_read rs rs' src SAME) in H1.
+  eapply exec_Istore; eauto.
+  constructor; auto.
+(* call *)
+- econstructor; split.
+  eapply exec_Icall with (fd := transf_fundef fd); eauto.
+  eapply find_function_translated; eauto.
+  { rewrite <- (same_find_function ge rs rs') by assumption.
+    assumption. }
+  apply sig_preserved.
+  rewrite (same_rs_subst rs rs' args SAME).  
+  constructor. constructor; auto. constructor; auto.
+(* tailcall *)
+- econstructor; split.
+  eapply exec_Itailcall with (fd := transf_fundef fd); eauto.
+    eapply find_function_translated; eauto.
+  { rewrite <- (same_find_function ge rs rs') by assumption.
+    assumption. }
+    apply sig_preserved.
+  rewrite (same_rs_subst rs rs' args SAME).  
+  constructor. auto.
+(* builtin *)
+- econstructor; split.
+  eapply exec_Ibuiltin; eauto.
+  eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+  {
+    replace (fun r : positive => rs' # r) with (fun r : positive => rs # r).
+    eassumption.
+    apply functional_extensionality.
+    auto using same_rs_read.
+  }
+  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+  auto using same_rs_setres.
+(* cond *)
+- econstructor; split.
+  eapply exec_Icond; eauto.
+  rewrite <- (same_rs_subst rs rs' args SAME); eassumption.
+  constructor; auto.
+(* jumptbl *)
+- econstructor; split.
+  eapply exec_Ijumptable; eauto.
+  rewrite <- (same_rs_read rs rs' arg SAME); eassumption.
+  constructor; auto.
+(* return *)
+- econstructor; split.
+  eapply exec_Ireturn; eauto.
+  destruct or; cbn.
+  + rewrite <- (same_rs_read rs rs' r SAME) by auto.
+    constructor; auto.
+  + constructor; auto.
+(* internal function *)
+-  simpl. econstructor; split.
+  eapply exec_function_internal; eauto.
+  constructor; auto.
+  cbn.
+  apply same_rs_refl.
+(* external function *)
+- econstructor; split.
+  eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  constructor; auto.
+(* return *)
+- inv STACKS. inv H1.
+  econstructor; split.
+  eapply exec_return; eauto.
+  constructor; auto using same_rs_write.
+Qed.
+
+Lemma transf_initial_states:
+  forall S1, RTL.initial_state prog S1 ->
+  exists S2, RTL.initial_state tprog S2 /\ match_states S1 S2.
+Proof.
+  intros. inv H. econstructor; split.
+  econstructor.
+    eapply (Genv.init_mem_transf TRANSL); eauto.
+    rewrite symbols_preserved. rewrite (match_program_main TRANSL). eauto.
+    eapply function_ptr_translated; eauto.
+    rewrite <- H3; apply sig_preserved.
+  constructor. constructor.
+Qed.
+
+Lemma transf_final_states:
+  forall S1 S2 r, match_states S1 S2 -> RTL.final_state S1 r -> RTL.final_state S2 r.
+Proof.
+  intros. inv H0. inv H. inv STACKS. constructor.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (RTL.semantics prog) (RTL.semantics tprog).
+Proof.
+  eapply forward_simulation_step.
+  apply senv_preserved.
+  eexact transf_initial_states.
+  eexact transf_final_states.
+  exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml
index c3907809..b88dbc2d 100644
--- a/backend/LICMaux.ml
+++ b/backend/LICMaux.ml
@@ -16,9 +16,79 @@ open Maps;;
 open Kildall;;
 open HashedSet;;
 open Inject;;
+open DebugPrint;;
 
 type reg = P.t;;
 
+(** get_loop_headers moved from Duplicateaux.ml to LICMaux.ml to prevent cycle dependencies *)
+type vstate = Unvisited | Processed | Visited
+
+let get_some = function
+| None -> failwith "Did not get some"
+| Some thing -> thing
+
+let rtl_successors = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,_) -> [n1; n2]
+| Ijumptable (_,ln) -> ln
+
+(** Getting loop branches with a DFS visit :
+  * Each node is either Unvisited, Visited, or Processed
+  * pre-order: node becomes Processed
+  * post-order: node becomes Visited
+  *
+  * If we come accross an edge to a Processed node, it's a loop!
+  *)
+let get_loop_backedges code entrypoint = begin
+  debug "get_loop_backedges\n";
+  let visited = ref (PTree.map (fun n i -> Unvisited) code)
+  and loop_backedge = ref (PTree.map (fun n i -> None) code)
+  in let rec dfs_visit code origin = function
+  | [] -> ()
+  | node :: ln ->
+      debug "ENTERING node %d, REM are %a\n" (P.to_int node) print_intlist ln;
+      match (get_some @@ PTree.get node !visited) with
+      | Visited -> begin
+          debug "\tNode %d is already Visited, skipping\n" (P.to_int node);
+          dfs_visit code origin ln
+        end
+      | Processed -> begin
+          debug "Node %d is a loop header\n" (P.to_int node);
+          debug "The backedge is from %d\n" (P.to_int @@ get_some origin);
+          loop_backedge := PTree.set node origin !loop_backedge;
+          visited := PTree.set node Visited !visited;
+          dfs_visit code origin ln
+        end
+      | Unvisited -> begin
+          visited := PTree.set node Processed !visited;
+          debug "Node %d is Processed\n" (P.to_int node);
+          (match PTree.get node code with
+          | None -> failwith "No such node"
+          | Some i -> let next_visits = rtl_successors i in begin
+              debug "About to visit: %a\n" print_intlist next_visits;
+              dfs_visit code (Some node) next_visits
+            end);
+          debug "Node %d is Visited!\n" (P.to_int node);
+          visited := PTree.set node Visited !visited;
+          dfs_visit code origin ln
+        end
+  in begin
+    dfs_visit code None [entrypoint];
+    debug "LOOP BACKEDGES: %a\n" print_ptree_opint !loop_backedge;
+    !loop_backedge
+  end
+end
+
+let get_loop_headers code entrypoint =
+  let backedges = get_loop_backedges code entrypoint in
+  PTree.map (fun _ ob ->
+    match ob with
+    | None -> false
+    | Some _ -> true
+  ) backedges
+
 module Dominator =
   struct
     type t = Unreachable | Dominated of int | Multiple
@@ -57,7 +127,7 @@ let apply_dominator (is_marked : node -> bool) (pc : node)
 
 let dominated_parts1 (f : coq_function) :
       (bool PTree.t) * (Dominator.t PMap.t option) =
-  let headers = Duplicateaux.get_loop_headers f.fn_code f.fn_entrypoint in
+  let headers = get_loop_headers f.fn_code f.fn_entrypoint in
   let dominated = Dominator_Solver.fixpoint f.fn_code RTL.successors_instr
     (apply_dominator (fun pc -> match PTree.get pc headers with
                                 | Some x -> x
@@ -152,7 +222,8 @@ let rewrite_loop_body (last_alloc : reg ref)
                                (List.map (map_reg mapper) args),
                                new_res));
                 PTree.set res new_res mapper
-             | Iload(trap, chunk, addr, args, res, pc')
+             | Iload(_, chunk, addr, args, v, pc')
+             | Istore(chunk, addr, args, v, pc')
                   when Archi.has_notrap_loads &&
                        !Clflags.option_fnontrap_loads ->
                 let new_res = P.succ !last_alloc in
@@ -160,7 +231,7 @@ let rewrite_loop_body (last_alloc : reg ref)
                 add_inj (INJload(chunk, addr,
                                  (List.map (map_reg mapper) args),
                                  new_res));
-                PTree.set res new_res mapper
+                PTree.set v new_res mapper
              | _ -> mapper in
            List.iter (fun x ->
                if PSet.contains loop_body x
@@ -248,7 +319,7 @@ let print_dominated_parts1 oc f =
        (PTree.elements f.fn_code);;
   
 let loop_headers (f : coq_function) : RTL.node list =
-  List.map fst (List.filter snd (PTree.elements (Duplicateaux.get_loop_headers f.fn_code f.fn_entrypoint)));;
+  List.map fst (List.filter snd (PTree.elements (get_loop_headers f.fn_code f.fn_entrypoint)));;
 
 let print_loop_headers f =
   print_endline "Loop headers";
diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml
index 3f1a8b6e..5914f6a3 100644
--- a/backend/Linearizeaux.ml
+++ b/backend/Linearizeaux.ml
@@ -126,400 +126,64 @@ let enumerate_aux_flat f reach =
  * This is a slight alteration to the above heuristic, ensuring that any
  * superblock will be contiguous in memory, while still following the original
  * heuristic
+ *
+ * Slight change: instead of taking the minimum pc of the superblock, we just take
+ * the pc of the first block.
+ * (experimentally this leads to slightly better performance..)
  *)
 
-let get_some = function
-| None -> failwith "Did not get some"
-| Some thing -> thing
-
-exception EmptyList
-
-let rec last_element = function
-  | [] -> raise EmptyList
-  | e :: [] -> e
-  | e' :: e :: l -> last_element (e::l)
-
-let print_plist l =
-  let rec f = function
-  | [] -> ()
-  | n :: l -> Printf.printf "%d, " (P.to_int n); f l
-  in begin
-    if !debug_flag then begin
-      Printf.printf "[";
-      f l;
-      Printf.printf "]"
-    end
-  end
-
-(* adapted from the above join_points function, but with PTree *)
-let get_join_points code entry =
-  let reached = ref (PTree.map (fun n i -> false) code) in
-  let reached_twice = ref (PTree.map (fun n i -> false) code) in
-  let rec traverse pc =
-    if get_some @@ PTree.get pc !reached then begin
-      if not (get_some @@ PTree.get pc !reached_twice) then
-        reached_twice := PTree.set pc true !reached_twice
-    end else begin
-      reached := PTree.set pc true !reached;
-      traverse_succs (successors_block @@ get_some @@ PTree.get pc code)
-    end
-  and traverse_succs = function
-    | [] -> ()
-    | [pc] -> traverse pc
-    | pc :: l -> traverse pc; traverse_succs l
-  in traverse entry; !reached_twice
-
-let forward_sequences code entry =
-  let visited = ref (PTree.map (fun n i -> false) code) in
-  let join_points = get_join_points code entry in
-  (* returns the list of traversed nodes, and a list of nodes to start traversing next *)
-  let rec traverse_fallthrough code node =
-    (* debug "Traversing %d..\n" (P.to_int node); *)
-    if not (get_some @@ PTree.get node !visited) then begin
-      visited := PTree.set node true !visited;
-      match PTree.get node code with
-      | None -> failwith "No such node"
-      | Some bb ->
-          let ln, rem = match (last_element bb) with
-          | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
-          | Lbuiltin _ -> assert false
-          | Ltailcall _ | Lreturn -> begin (* debug "STOP tailcall/return\n"; *) ([], []) end
-          | Lbranch n ->
-              if get_some @@ PTree.get n join_points then ([], [n])
-              else let ln, rem = traverse_fallthrough code n in (ln, rem)
-          | Lcond (_, _, ifso, ifnot, info) -> (match info with
-            | None -> begin (* debug "STOP Lcond None\n"; *) ([], [ifso; ifnot]) end
-            | Some false ->
-                if get_some @@ PTree.get ifnot join_points then ([], [ifso; ifnot])
-                else let ln, rem = traverse_fallthrough code ifnot in (ln, [ifso] @ rem)
-            | Some true ->
-                if get_some @@ PTree.get ifso join_points then ([], [ifso; ifnot])
-                else let ln, rem = traverse_fallthrough code ifso in (ln, [ifnot] @ rem)
-            )
-          | Ljumptable(_, ln) -> begin (* debug "STOP Ljumptable\n"; *) ([], ln) end
-          in ([node] @ ln, rem)
-      end
-    else ([], [])
-  in let rec f code = function
-  | [] -> []
-  | node :: ln ->
-      let fs, rem_from_node = traverse_fallthrough code node
-      in [fs] @ ((f code rem_from_node) @ (f code ln))
-  in (f code [entry])
-
-(** Unused code
-module PInt = struct
-  type t = P.t
-  let compare x y = compare (P.to_int x) (P.to_int y)
-end
-
-module PSet = Set.Make(PInt)
-
-module LPInt = struct
-  type t = P.t list
-  let rec compare x y =
-    match x with
-    | [] -> ( match y with
-      | [] -> 0
-      | _ -> 1 )
-    | e :: l -> match y with
-      | [] -> -1
-      | e' :: l' ->
-          let e_cmp = PInt.compare e e' in
-          if e_cmp == 0 then compare l l' else e_cmp
-end
-
-module LPSet = Set.Make(LPInt)
-
-let iter_lpset f s = Seq.iter f (LPSet.to_seq s)
-
-let first_of = function
-  | [] -> None
-  | e :: l -> Some e
-
-let rec last_of = function
-  | [] -> None
-  | e :: l -> (match l with [] -> Some e | e :: l -> last_of l)
-
-let can_be_merged code s s' =
-  let last_s = get_some @@ last_of s in
-  let first_s' = get_some @@ first_of s' in
-  match get_some @@ PTree.get last_s code with
-  | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
-  | Lbuiltin _ | Ltailcall _ | Lreturn -> false
-  | Lbranch n -> n == first_s'
-  | Lcond (_, _, ifso, ifnot, info) -> (match info with
-    | None -> false
-    | Some false -> ifnot == first_s'
-    | Some true -> failwith "Inconsistency detected - ifnot is not the preferred branch")
-  | Ljumptable (_, ln) ->
-      match ln with
-      | [] -> false
-      | n :: ln -> n == first_s'
-
-let merge s s' = Some s
-
-let try_merge code (fs: (BinNums.positive list) list) =
-  let seqs = ref (LPSet.of_list fs) in
-  let oldLength = ref (LPSet.cardinal !seqs) in
-  let continue = ref true in
-  let found = ref false in
-  while !continue do
-    begin
-      found := false;
-      iter_lpset (fun s ->
-        if !found then ()
-        else iter_lpset (fun s' ->
-          if (!found || s == s') then ()
-          else if (can_be_merged code s s') then
-            begin
-              seqs := LPSet.remove s !seqs;
-              seqs := LPSet.remove s' !seqs;
-              seqs := LPSet.add (get_some (merge s s')) !seqs;
-              found := true;
-            end
-          else ()
-        ) !seqs
-      ) !seqs;
-      if !oldLength == LPSet.cardinal !seqs then
-        continue := false
-      else
-        oldLength := LPSet.cardinal !seqs
-    end
-  done;
-  !seqs
-*)
-
-(** Code adapted from Duplicateaux.get_loop_headers
-  *
-  * Getting loop branches with a DFS visit :
-  * Each node is either Unvisited, Visited, or Processed
-  * pre-order: node becomes Processed
-  * post-order: node becomes Visited
-  *
-  * If we come accross an edge to a Processed node, it's a loop!
-  *)
-type pos = BinNums.positive
-
-module PP = struct
-  type t = pos * pos
-  let compare a b =
-    let ax, ay = a in
-    let bx, by = b in
-    let dx = compare ax bx in
-    if (dx == 0) then compare ay by
-    else dx
-end
-
-module PPMap = Map.Make(PP)
-
-type vstate = Unvisited | Processed | Visited
-
-let get_loop_edges code entry =
-  let visited = ref (PTree.map (fun n i -> Unvisited) code) in
-  let is_loop_edge = ref PPMap.empty
-  in let rec dfs_visit code from = function
-  | [] -> ()
-  | node :: ln ->
-      match (get_some @@ PTree.get node !visited) with
-      | Visited -> ()
-      | Processed -> begin
-          let from_node = get_some from in
-          is_loop_edge := PPMap.add (from_node, node) true !is_loop_edge;
-          visited := PTree.set node Visited !visited
-        end
-      | Unvisited -> begin
-          visited := PTree.set node Processed !visited;
-          let bb = get_some @@ PTree.get node code in
-          let next_visits = (match (last_element bb) with
-          | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
-          | Lbuiltin _ -> assert false
-          | Ltailcall _ | Lreturn -> []
-          | Lbranch n -> [n]
-          | Lcond (_, _, ifso, ifnot, _) -> [ifso; ifnot]
-          | Ljumptable(_, ln) -> ln
-          ) in dfs_visit code (Some node) next_visits;
-          visited := PTree.set node Visited !visited;
-          dfs_visit code from ln
-        end
-  in begin
-    dfs_visit code None [entry];
-    !is_loop_edge
-  end
-
-let ppmap_is_true pp ppmap = PPMap.mem pp ppmap && PPMap.find pp ppmap
-
-module Int = struct
-  type t = int
-  let compare x y = compare x y
-end
-
-module ISet = Set.Make(Int)
-
-let print_iset s = begin
-  if !debug_flag then begin
-    Printf.printf "{";
-    ISet.iter (fun e -> Printf.printf "%d, " e) s;
-    Printf.printf "}"
-  end
-end
-
-let print_depmap dm = begin
-  if !debug_flag then begin
-    Printf.printf "[|";
-    Array.iter (fun s -> print_iset s; Printf.printf ", ") dm;
-    Printf.printf "|]\n"
-  end
-end
-
-let construct_depmap code entry fs =
-  let is_loop_edge = get_loop_edges code entry in
-  let visited = ref (PTree.map (fun n i -> false) code) in
-  let depmap = Array.map (fun e -> ISet.empty) fs in
-  let find_index_of_node n =
-    let index = ref 0 in
-    begin
-      Array.iteri (fun i s ->
-        match List.find_opt (fun e -> e == n) s with
-        | Some _ -> index := i
-        | None -> ()
-      ) fs;
-      !index
-    end
-  in let check_and_update_depmap from target =
-    (* debug "From %d to %d\n" (P.to_int from) (P.to_int target); *)
-    if not (ppmap_is_true (from, target) is_loop_edge) then
-      let in_index_fs = find_index_of_node from in
-      let out_index_fs = find_index_of_node target in
-      if out_index_fs != in_index_fs then
-        depmap.(out_index_fs) <- ISet.add in_index_fs depmap.(out_index_fs)
-      else ()
-    else ()
-  in let rec dfs_visit code = function
-  | [] -> ()
-  | node :: ln ->
-      begin
-        match (get_some @@ PTree.get node !visited) with
-        | true -> ()
-        | false -> begin
-            visited := PTree.set node true !visited;
-            let bb = get_some @@ PTree.get node code in
-            let next_visits =
-              match (last_element bb) with
-              | Ltailcall _ | Lreturn -> []
-              | Lbranch n -> (check_and_update_depmap node n; [n])
-              | Lcond (_, _, ifso, ifnot, _) -> begin
-                  check_and_update_depmap node ifso;
-                  check_and_update_depmap node ifnot;
-                  [ifso; ifnot]
-                end
-              | Ljumptable(_, ln) -> begin
-                  List.iter (fun n -> check_and_update_depmap node n) ln;
-                  ln
-                end
-              (* end of bblocks should not be another value than one of the above *)
-              | _ -> failwith "last_element gave an invalid output"
-            in dfs_visit code next_visits
-          end;
-        dfs_visit code ln
-      end
-  in begin
-    dfs_visit code [entry];
-    depmap
-  end
-
-let print_sequence s =
-  if !debug_flag then begin
-    Printf.printf "[";
-    List.iter (fun n -> Printf.printf "%d, " (P.to_int n)) s;
-    Printf.printf "]\n"
-  end
-
-let print_ssequence ofs =
-  if !debug_flag then begin
-    Printf.printf "[";
-    List.iter (fun s -> print_sequence s) ofs;
-    Printf.printf "]\n"
-  end
-
-let order_sequences code entry fs =
-  let fs_a = Array.of_list fs in
-  let depmap = construct_depmap code entry fs_a in
-  let fs_evaluated = Array.map (fun e -> false) fs_a in
-  let ordered_fs = ref [] in
-  let evaluate s_id =
-    begin
-      assert (not fs_evaluated.(s_id));
-      ordered_fs := fs_a.(s_id) :: !ordered_fs;
-      fs_evaluated.(s_id) <- true;
-      (* debug "++++++\n";
-      debug "Scheduling %d\n" s_id;
-      debug "Initial depmap: "; print_depmap depmap; *)
-      Array.iteri (fun i deps ->
-        depmap.(i) <- ISet.remove s_id deps
-      ) depmap;
-      (* debug "Final depmap: "; print_depmap depmap; *)
+let super_blocks f joins =
+  let blocks = ref [] in
+  let visited = ref IntSet.empty in
+  (* start_block:
+       pc is the function entry point
+          or a join point
+          or the successor of a conditional test *)
+  let rec start_block pc =
+    let npc = P.to_int pc in
+    if not (IntSet.mem npc !visited) then begin
+      visited := IntSet.add npc !visited;
+      in_block [] npc pc
     end
-  in let choose_best_of candidates =
-    let current_best_id = ref None in
-    let current_best_score = ref None in
-    begin
-      List.iter (fun id ->
-        match !current_best_id with
-        | None -> begin
-            current_best_id := Some id;
-            match fs_a.(id) with
-            | [] -> current_best_score := None
-            | n::l -> current_best_score := Some (P.to_int n)
-          end
-        | Some b -> begin
-            match fs_a.(id) with
-            | [] -> ()
-            | n::l -> let nscore = P.to_int n in
-              match !current_best_score with
-              | None -> (current_best_id := Some id; current_best_score := Some nscore)
-              | Some bs -> if nscore > bs then (current_best_id := Some id; current_best_score := Some nscore)
+  (* in_block: add pc to block and check successors *)
+  and in_block blk minpc pc =
+    let blk = pc :: blk in
+    match PTree.get pc f.fn_code with
+    | None -> assert false
+    | Some b ->
+       let rec do_instr_list = function
+       | [] -> assert false
+       | Lbranch s :: _ -> next_in_block blk minpc s
+       | Ltailcall (sig0, ros) :: _ -> end_block blk minpc
+       | Lcond (cond, args, ifso, ifnot, pred) :: _ -> begin
+            match pred with
+            | None -> (end_block blk minpc; start_block ifso; start_block ifnot)
+            | Some true -> (next_in_block blk minpc ifso; start_block ifnot)
+            | Some false -> (next_in_block blk minpc ifnot; start_block ifso)
           end
-      ) candidates;
-      !current_best_id
-    end
-  in let select_next () =
-    let candidates = ref [] in
-    begin
-      Array.iteri (fun i deps ->
-        begin
-          (* debug "Deps of %d: " i; print_iset deps; debug "\n"; *)
-          (* FIXME - if we keep it that way (no dependency check), remove all the unneeded stuff *)
-          if ((* deps == ISet.empty && *) not fs_evaluated.(i)) then
-            candidates := i :: !candidates
-        end
-      ) depmap;
-      if not (List.length !candidates > 0) then begin
-        Array.iteri (fun i deps ->
-          if (not fs_evaluated.(i)) then candidates := i :: !candidates
-        ) depmap;
-      end;
-      get_some (choose_best_of !candidates)
-    end
-  in begin
-    debug "-------------------------------\n";
-    debug "depmap: "; print_depmap depmap;
-    debug "forward sequences identified: "; print_ssequence fs;
-    while List.length !ordered_fs != List.length fs do
-      let next_id = select_next () in
-      evaluate next_id
-    done;
-    debug "forward sequences ordered: "; print_ssequence (List.rev (!ordered_fs));
-    List.rev (!ordered_fs)
-  end
+       | Ljumptable(arg, tbl) :: _ ->
+             end_block blk minpc; List.iter start_block tbl
+       | Lreturn :: _ -> end_block blk minpc
+       | instr :: b' -> do_instr_list b' in
+       do_instr_list b
+  (* next_in_block: check if join point and either extend block
+     or start block *)
+  and next_in_block blk minpc pc =
+    let npc = P.to_int pc in
+    if IntSet.mem npc joins
+    then (end_block blk minpc; start_block pc)
+    else in_block blk minpc pc
+  (* end_block: record block that we just discovered *)
+  and end_block blk minpc =
+    blocks := (minpc, List.rev blk) :: !blocks
+  in
+    start_block f.fn_entrypoint; !blocks
+
+(* Build the enumeration *)
 
-let enumerate_aux_trace f reach =
-  let code = f.fn_code in
-  let entry = f.fn_entrypoint in
-  let fs = forward_sequences code entry in
-  let ofs = order_sequences code entry fs in
-  List.flatten ofs
+let enumerate_aux_sb f reach =
+  flatten_blocks (super_blocks f (join_points f))
 
 let enumerate_aux f reach =
-  if !Clflags.option_ftracelinearize then enumerate_aux_trace f reach
+  if !Clflags.option_ftracelinearize then enumerate_aux_sb f reach
   else enumerate_aux_flat f reach
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index 3fe61470..22658fb7 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -324,7 +324,6 @@ Local Opaque mreg_type.
  
     apply wt_setreg; auto; try (apply wt_undef_regs; auto).
     eapply Val.has_subtype; eauto.
-    
     change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. eapply type_of_operation_sound; eauto.
     red; intros; subst op. simpl in ISMOVE.
     destruct args; try discriminate. destruct args; discriminate.
diff --git a/backend/Machregsnames.ml b/backend/Machregsnames.ml
new file mode 100644
index 00000000..fdcbd0e5
--- /dev/null
+++ b/backend/Machregsnames.ml
@@ -0,0 +1,24 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+let register_names : (Machregs.mreg, string) Hashtbl.t = Hashtbl.create 31
+
+let _ =
+  List.iter
+    (fun (s, r) -> Hashtbl.add register_names r (Camlcoq.camlstring_of_coqstring s))
+    Machregs.register_names
+
+let name_of_register r =
+  Hashtbl.find_opt register_names r
+
+let register_by_name s =
+  Machregs.register_by_name (Camlcoq.coqstring_uppercase_ascii_of_camlstring s)
diff --git a/backend/Machregsnames.mli b/backend/Machregsnames.mli
new file mode 100644
index 00000000..1b600d35
--- /dev/null
+++ b/backend/Machregsnames.mli
@@ -0,0 +1,16 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(** Auxiliary functions on machine registers *)
+
+val name_of_register: Machregs.mreg -> string option
+val register_by_name: string -> Machregs.mreg option
diff --git a/backend/NeedDomain.v b/backend/NeedDomain.v
index 3c2d8e20..d9e9e025 100644
--- a/backend/NeedDomain.v
+++ b/backend/NeedDomain.v
@@ -47,7 +47,7 @@ Definition iagree (p q mask: int) : Prop :=
   forall i, 0 <= i < Int.zwordsize -> Int.testbit mask i = true ->
             Int.testbit p i = Int.testbit q i.
 
-Fixpoint vagree (v w: val) (x: nval) {struct x}: Prop :=
+Definition vagree (v w: val) (x: nval) : Prop :=
   match x with
   | Nothing => True
   | I m =>
diff --git a/backend/PrintAsmaux.ml b/backend/PrintAsmaux.ml
index 7fa10aee..5cb693af 100644
--- a/backend/PrintAsmaux.ml
+++ b/backend/PrintAsmaux.ml
@@ -297,8 +297,8 @@ let print_inline_asm print_preg oc txt sg args res =
 
 let print_version_and_options oc comment =
   let version_string =
-    if Version.buildnr <> "" && Version.tag <> "" then
-      sprintf "Release: %s, Build: %s, Tag: %s" Version.version Version.buildnr Version.tag
+    if Version.buildnr <> "" && Version.tag <> "" && Version.branch <> "" then
+      sprintf "Release: %s, Build: %s, Tag: %s, Branch: %s" Version.version Version.buildnr Version.tag Version.branch
     else
       Version.version in
   fprintf oc "%s File generated by CompCert %s\n" comment version_string;
diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml
index d8f2ac12..87e8a1fc 100644
--- a/backend/PrintLTL.ml
+++ b/backend/PrintLTL.ml
@@ -22,7 +22,7 @@ open PrintAST
 open PrintOp
 
 let mreg pp r =
-  match Machregsaux.name_of_register r with
+  match Machregsnames.name_of_register r with
   | Some s -> fprintf pp "%s" s
   | None -> fprintf pp "<unknown machreg>"
 
@@ -133,10 +133,10 @@ let print_program pp (prog: LTL.program) =
 
 let destination : string option ref = ref None
 
-let print_if prog =
+let print_if passno prog =
   match !destination with
   | None -> ()
   | Some f ->
-      let oc = open_out f in
+      let oc = open_out (f ^ "." ^ Z.to_string passno) in
       print_program oc prog;
       close_out oc
diff --git a/backend/PrintMach.ml b/backend/PrintMach.ml
index 70e65832..3481421b 100644
--- a/backend/PrintMach.ml
+++ b/backend/PrintMach.ml
@@ -16,12 +16,11 @@ open Printf
 open Camlcoq
 open Datatypes
 open AST
-open Machregsaux
 open Mach
 open PrintAST
 
 let reg pp r =
-  match name_of_register r with
+  match Machregsnames.name_of_register r with
   | Some s -> fprintf pp "%s" s
   | None -> fprintf pp "<unknown reg>"
 
diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml
index d1b79623..6f2b1df9 100644
--- a/backend/PrintXTL.ml
+++ b/backend/PrintXTL.ml
@@ -22,7 +22,7 @@ open PrintOp
 open XTL
 
 let mreg pp r =
-  match Machregsaux.name_of_register r with
+  match Machregsnames.name_of_register r with
   | Some s -> fprintf pp "%s" s
   | None -> fprintf pp "<unknown machreg>"
 
diff --git a/backend/Selection.v b/backend/Selection.v
index 342bd8ca..8667922f 100644
--- a/backend/Selection.v
+++ b/backend/Selection.v
@@ -251,10 +251,16 @@ Function sel_known_builtin (bf: builtin_function) (args: exprlist) :=
       Some (sel_select ty a1 a2 a3)
   | BI_standard BI_fabs, a1 ::: Enil =>
       Some (SelectOp.absf a1)
+  | BI_standard BI_fabsf, a1 ::: Enil =>
+      Some (SelectOp.absfs a1)
   | _, _ =>
       None
   end.
 
+(** A CminorSel statement that does nothing, like [Sskip], but reduces. *)
+
+Definition Sno_op := Sseq Sskip Sskip.
+
 (** Builtin functions in general *)
 
 Definition sel_builtin_default (optid: option ident) (ef: external_function)
@@ -264,17 +270,22 @@ Definition sel_builtin_default (optid: option ident) (ef: external_function)
 
 Definition sel_builtin (optid: option ident) (ef: external_function)
                                (args: list Cminor.expr) :=
-  match optid, ef with
-  | Some id, EF_builtin name sg =>
+  match ef with
+  | EF_builtin name sg =>
       match lookup_builtin_function name sg with
       | Some bf =>
-          match sel_known_builtin bf (sel_exprlist args) with
-          | Some a => Sassign id a
-          | None => sel_builtin_default optid ef args
+          match optid with
+          | Some id =>
+              match sel_known_builtin bf (sel_exprlist args) with
+              | Some a => Sassign id a
+              | None => sel_builtin_default optid ef args
+              end
+          | None =>
+              Sno_op   (**r builtins with semantics are pure *)
           end
       | None => sel_builtin_default optid ef args
       end
-  | _, _ =>
+  | _ =>
       sel_builtin_default optid ef args
   end.
 
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index 955c45a4..8f3f5f00 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -396,13 +396,10 @@ Proof.
   inv ARGS; try discriminate. inv H0; try discriminate.
   inv SEL.  
   simpl in SEM; inv SEM. apply eval_absf; auto.
-  (* + (* expect *)
-  inv ARGS; try discriminate.
-  inv H0; try discriminate.
-  inv H2; try discriminate.
-  simpl in SEM. inv SEM. inv SEL.
-  destruct v1; destruct v0.
-  all: econstructor; split; eauto. *)
++ (* fabsf *)
+  inv ARGS; try discriminate. inv H0; try discriminate.
+  inv SEL.  
+  simpl in SEM; inv SEM. apply eval_absfs; auto.
 - eapply eval_platform_builtin; eauto.
 Qed.
 
@@ -786,6 +783,8 @@ Lemma sel_select_opt_correct:
   Cminor.eval_expr ge sp e m cond vcond ->
   Cminor.eval_expr ge sp e m a1 v1 ->
   Cminor.eval_expr ge sp e m a2 v2 ->
+  Val.has_type v1 ty ->
+  Val.has_type v2 ty ->
   Val.bool_of_val vcond b ->
   env_lessdef e e' -> Mem.extends m m' ->
   exists v', eval_expr tge sp e' m' le a v' /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v'.
@@ -795,7 +794,7 @@ Proof.
   exploit sel_expr_correct. eexact H0. eauto. eauto. intros (vcond' & EVC & LDC).
   exploit sel_expr_correct. eexact H1. eauto. eauto. intros (v1' & EV1 & LD1).
   exploit sel_expr_correct. eexact H2. eauto. eauto. intros (v2' & EV2 & LD2).
-  assert (Val.bool_of_val vcond' b) by (inv H3; inv LDC; constructor).
+  assert (Val.bool_of_val vcond' b) by (inv H5; inv LDC; constructor).
   exploit eval_condition_of_expr. eexact EVC. eauto. rewrite C. intros (vargs' & EVARGS & EVCOND).
   exploit eval_select; eauto. intros (v' & X & Y). 
   exists v'; split; eauto. 
@@ -852,8 +851,8 @@ Lemma sel_builtin_default_correct:
   external_call ef ge vl m1 t v m2 ->
   env_lessdef e1 e1' -> Mem.extends m1 m1' ->
   exists e2' m2',
-     step tge (State f (sel_builtin_default optid ef al) k sp e1' m1')
-            t (State f Sskip k sp e2' m2')
+     plus step tge (State f (sel_builtin_default optid ef al) k sp e1' m1')
+                 t (State f Sskip k sp e2' m2')
   /\ env_lessdef (set_optvar optid v e1) e2'
   /\ Mem.extends m2 m2'.
 Proof.
@@ -861,6 +860,7 @@ Proof.
   exploit sel_builtin_args_correct; eauto. intros (vl' & A & B).
   exploit external_call_mem_extends; eauto. intros (v' & m2' & D & E & F & _).
   econstructor; exists m2'; split.
+  apply plus_one.
   econstructor. eexact A. eapply external_call_symbols_preserved. eexact senv_preserved. eexact D.
   split; auto. apply sel_builtin_res_correct; auto.
 Qed. 
@@ -871,8 +871,8 @@ Lemma sel_builtin_correct:
   external_call ef ge vl m1 t v m2 ->
   env_lessdef e1 e1' -> Mem.extends m1 m1' ->
   exists e2' m2',
-     step tge (State f (sel_builtin optid ef al) k sp e1' m1')
-            t (State f Sskip k sp e2' m2')
+     plus step tge (State f (sel_builtin optid ef al) k sp e1' m1')
+                 t (State f Sskip k sp e2' m2')
   /\ env_lessdef (set_optvar optid v e1) e2'
   /\ Mem.extends m2 m2'.
 Proof.
@@ -880,15 +880,18 @@ Proof.
   exploit sel_exprlist_correct; eauto. intros (vl' & A & B).
   exploit external_call_mem_extends; eauto. intros (v' & m2' & D & E & F & _).
   unfold sel_builtin.
-  destruct optid as [id|]; eauto using sel_builtin_default_correct.
   destruct ef; eauto using sel_builtin_default_correct.
   destruct (lookup_builtin_function name sg) as [bf|] eqn:LKUP; eauto using sel_builtin_default_correct.
-  destruct (sel_known_builtin bf (sel_exprlist al)) as [a|] eqn:SKB; eauto using sel_builtin_default_correct.
   simpl in D. red in D. rewrite LKUP in D. inv D.
+  destruct optid as [id|]; eauto using sel_builtin_default_correct.
+- destruct (sel_known_builtin bf (sel_exprlist al)) as [a|] eqn:SKB; eauto using sel_builtin_default_correct.
   exploit eval_sel_known_builtin; eauto. intros (v'' & U & V).
   econstructor; exists m2'; split.
-  econstructor. eexact U.
+  apply plus_one. econstructor. eexact U.
   split; auto. apply set_var_lessdef; auto. apply Val.lessdef_trans with v'; auto.
+- exists e1', m2'; split.
+  eapply plus_two. constructor. constructor. auto.
+  simpl; auto.  
 Qed.
 
 (** If-conversion *)
@@ -1179,8 +1182,8 @@ Remark sel_builtin_nolabel:
   forall (hf: helper_functions) optid ef args, nolabel' (sel_builtin optid ef args).
 Proof.
   unfold sel_builtin; intros; red; intros.
-  destruct optid; auto. destruct ef; auto. destruct lookup_builtin_function; auto.
-  destruct sel_known_builtin; auto. 
+  destruct ef; auto. destruct lookup_builtin_function; auto.
+  destruct optid; auto. destruct sel_known_builtin; auto. 
 Qed. 
 
 Remark find_label_commut:
@@ -1243,34 +1246,34 @@ Definition measure (s: Cminor.state) : nat :=
 Lemma sel_step_correct:
   forall S1 t S2, Cminor.step ge S1 t S2 ->
   forall T1, match_states S1 T1 -> wt_state S1 ->
-  (exists T2, step tge T1 t T2 /\ match_states S2 T2)
+  (exists T2, plus step tge T1 t T2 /\ match_states S2 T2)
   \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 T1)%nat
   \/ (exists S3 T2, star Cminor.step ge S2 E0 S3 /\ step tge T1 t T2 /\ match_states S3 T2).
 Proof.
   induction 1; intros T1 ME WTS; inv ME; try (monadInv TS).
 - (* skip seq *)
-  inv MC. left; econstructor; split. econstructor. econstructor; eauto.
+  inv MC. left; econstructor; split. apply plus_one; econstructor. econstructor; eauto.
   inv H.
 - (* skip block *)
-  inv MC. left; econstructor; split. econstructor. econstructor; eauto.
+  inv MC. left; econstructor; split. apply plus_one; econstructor. econstructor; eauto.
   inv H.
 - (* skip call *)
   exploit Mem.free_parallel_extends; eauto. intros [m2' [A B]].
   left; econstructor; split.
-  econstructor. eapply match_is_call_cont; eauto.
+  apply plus_one; econstructor. eapply match_is_call_cont; eauto.
   erewrite stackspace_function_translated; eauto.
   econstructor; eauto. eapply match_is_call_cont; eauto.
 - (* assign *)
   exploit sel_expr_correct; eauto. intros [v' [A B]].
   left; econstructor; split.
-  econstructor; eauto.
+  apply plus_one; econstructor; eauto.
   econstructor; eauto. apply set_var_lessdef; auto.
 - (* store *)
   exploit sel_expr_correct. try apply LINK. try apply HF. eexact H. eauto. eauto. intros [vaddr' [A B]].
   exploit sel_expr_correct. try apply LINK. try apply HF. eexact H0. eauto. eauto. intros [v' [C D]].
   exploit Mem.storev_extends; eauto. intros [m2' [P Q]].
   left; econstructor; split.
-  eapply eval_store; eauto.
+  apply plus_one; eapply eval_store; eauto.
   econstructor; eauto.
 - (* Scall *)
   exploit classify_call_correct; eauto.
@@ -1280,7 +1283,7 @@ Proof.
   exploit sel_exprlist_correct; eauto. intros [vargs' [C D]].
   exploit functions_translated; eauto. intros (cunit' & fd' & U & V & W).
   left; econstructor; split.
-  econstructor; eauto. econstructor; eauto.
+  apply plus_one; econstructor; eauto. econstructor; eauto.
   eapply sig_function_translated; eauto.
   eapply match_callstate with (cunit := cunit'); eauto.
   eapply match_cont_call with (cunit := cunit) (hf := hf); eauto.
@@ -1289,7 +1292,7 @@ Proof.
   exploit sel_exprlist_correct; eauto. intros [vargs' [C D]].
   exploit functions_translated; eauto. intros (cunit' & fd' & X & Y & Z).
   left; econstructor; split.
-  econstructor; eauto.
+  apply plus_one; econstructor; eauto.
   subst vf. econstructor; eauto. rewrite symbols_preserved; eauto.
   eapply sig_function_translated; eauto.
   eapply match_callstate with (cunit := cunit'); eauto.
@@ -1304,6 +1307,7 @@ Proof.
   exploit sel_exprlist_correct; eauto. intros [vargs' [C D]].
   exploit functions_translated; eauto. intros (cunit' & fd' & E & F & G).
   left; econstructor; split.
+  apply plus_one.
   exploit classify_call_correct. eexact LINK. eauto. eauto.
   destruct (classify_call (prog_defmap cunit)) as [ | id | ef]; intros.
   econstructor; eauto. econstructor; eauto. eapply sig_function_translated; eauto.
@@ -1317,7 +1321,7 @@ Proof.
   left; econstructor; split. eexact P. econstructor; eauto.
 - (* Seq *)
   left; econstructor; split.
-  constructor.
+  apply plus_one; constructor.
   econstructor; eauto. constructor; auto.
 - (* Sifthenelse *)
   simpl in TS. destruct (if_conversion (known_id f) env a s1 s2) as [s|] eqn:IFC; monadInv TS.
@@ -1329,21 +1333,21 @@ Proof.
 + exploit sel_expr_correct; eauto. intros [v' [A B]].
   assert (Val.bool_of_val v' b). inv B. auto. inv H0.
   left; exists (State f' (if b then x else x0) k' sp e' m'); split.
-  econstructor; eauto. eapply eval_condexpr_of_expr; eauto.
+  apply plus_one; econstructor; eauto. eapply eval_condexpr_of_expr; eauto.
   econstructor; eauto. destruct b; auto.
 - (* Sloop *)
-  left; econstructor; split. constructor. econstructor; eauto.
+  left; econstructor; split. apply plus_one; constructor. econstructor; eauto.
   constructor; auto. simpl; rewrite EQ; auto.
 - (* Sblock *)
-  left; econstructor; split. constructor. econstructor; eauto. constructor; auto.
+  left; econstructor; split. apply plus_one; constructor. econstructor; eauto. constructor; auto.
 - (* Sexit seq *)
-  inv MC. left; econstructor; split. constructor. econstructor; eauto.
+  inv MC. left; econstructor; split. apply plus_one; constructor. econstructor; eauto.
   inv H.
 - (* Sexit0 block *)
-  inv MC. left; econstructor; split. constructor. econstructor; eauto.
+  inv MC. left; econstructor; split. apply plus_one; constructor. econstructor; eauto.
   inv H.
 - (* SexitS block *)
-  inv MC. left; econstructor; split. constructor. econstructor; eauto.
+  inv MC. left; econstructor; split. apply plus_one; constructor. econstructor; eauto.
   inv H.
 - (* Sswitch *)
   inv H0; simpl in TS.
@@ -1351,29 +1355,29 @@ Proof.
   destruct (validate_switch Int.modulus default cases ct) eqn:VALID; inv TS.
   exploit sel_expr_correct; eauto. intros [v' [A B]]. inv B.
   left; econstructor; split.
-  econstructor. eapply sel_switch_int_correct; eauto.
+  apply plus_one; econstructor. eapply sel_switch_int_correct; eauto.
   econstructor; eauto.
 + set (ct := compile_switch Int64.modulus default cases) in *.
   destruct (validate_switch Int64.modulus default cases ct) eqn:VALID; inv TS.
   exploit sel_expr_correct; eauto. intros [v' [A B]]. inv B.
   left; econstructor; split.
-  econstructor. eapply sel_switch_long_correct; eauto.
+  apply plus_one; econstructor. eapply sel_switch_long_correct; eauto.
   econstructor; eauto.
 - (* Sreturn None *)
   exploit Mem.free_parallel_extends; eauto. intros [m2' [P Q]].
   erewrite <- stackspace_function_translated in P by eauto.
   left; econstructor; split.
-  econstructor. simpl; eauto.
+  apply plus_one; econstructor. simpl; eauto.
   econstructor; eauto. eapply call_cont_commut; eauto.
 - (* Sreturn Some *)
   exploit Mem.free_parallel_extends; eauto. intros [m2' [P Q]].
   erewrite <- stackspace_function_translated in P by eauto.
   exploit sel_expr_correct; eauto. intros [v' [A B]].
   left; econstructor; split.
-  econstructor; eauto.
+  apply plus_one; econstructor; eauto.
   econstructor; eauto. eapply call_cont_commut; eauto.
 - (* Slabel *)
-  left; econstructor; split. constructor. econstructor; eauto.
+  left; econstructor; split. apply plus_one; constructor. econstructor; eauto.
 - (* Sgoto *)
   assert (sel_stmt (prog_defmap cunit) (known_id f) env (Cminor.fn_body f) = OK (fn_body f')).
   { monadInv TF; simpl. congruence. }
@@ -1384,7 +1388,7 @@ Proof.
   as [[s'' k'']|] eqn:?; intros; try contradiction.
   destruct H1.
   left; econstructor; split.
-  econstructor; eauto.
+  apply plus_one; econstructor; eauto.
   econstructor; eauto.
 - (* internal function *)
   destruct TF as (hf & HF & TF). 
@@ -1392,7 +1396,7 @@ Proof.
   exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
   intros [m2' [A B]].
   left; econstructor; split.
-  econstructor; simpl; eauto.
+  apply plus_one; econstructor; simpl; eauto.
   econstructor; simpl; eauto.
   apply match_cont_other; auto.
   apply set_locals_lessdef. apply set_params_lessdef; auto.
@@ -1402,7 +1406,7 @@ Proof.
   exploit external_call_mem_extends; eauto.
   intros [vres' [m2 [A [B [C D]]]]].
   left; econstructor; split.
-  econstructor. eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  apply plus_one; econstructor. eapply external_call_symbols_preserved; eauto. apply senv_preserved.
   econstructor; eauto.
 - (* external call turned into a Sbuiltin *)
   exploit sel_builtin_correct; eauto. intros (e2' & m2' & P & Q & R).
@@ -1410,7 +1414,7 @@ Proof.
 - (* return *)
   inv MC.
   left; econstructor; split.
-  econstructor.
+  apply plus_one; econstructor.
   econstructor; eauto. destruct optid; simpl; auto. apply set_var_lessdef; auto.
 - (* return of an external call turned into a Sbuiltin *)
   right; left; split. simpl; omega. split. auto. econstructor; eauto.
@@ -1453,7 +1457,7 @@ Proof.
   unfold MS.
   exploit sel_step_correct; eauto.
   intros [(T2 & D & E) | [(D & E & F) | (S3 & T2 & D & E & F)]].
-+ exists S2, T2. intuition auto using star_refl, plus_one.
++ exists S2, T2. intuition auto using star_refl.
 + subst t. exists S2, T1. intuition auto using star_refl.
 + assert (wt_state S3) by (eapply subject_reduction_star; eauto using wt_prog).
   exists S3, T2. intuition auto using plus_one.
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index 49d2956e..ed3eef04 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -869,7 +869,7 @@ Qed.
 Remark transl_destroyed_by_op:
   forall op e, destroyed_by_op (transl_op e op) = destroyed_by_op op.
 Proof.
-  intros; destruct op; reflexivity.
+  intros; destruct op; try reflexivity; simpl.
 Qed.
 
 Remark transl_destroyed_by_load:
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index 78458582..269ebb6f 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -3,6 +3,7 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
@@ -12,7 +13,7 @@
 
 (** Branch tunneling (optimization of branches to branches). *)
 
-Require Import Coqlib Maps UnionFind.
+Require Import Coqlib Maps Errors.
 Require Import AST.
 Require Import LTL.
 
@@ -21,10 +22,10 @@ Require Import LTL.
   so that they jump directly to the end of the branch sequence.
   For example:
 <<
-     L1: nop L2;                          L1: nop L3;
-     L2; nop L3;               becomes    L2: nop L3;
+     L1: if (cond) nop L2;                L1: nop L3;
+     L2: nop L3;               becomes    L2: nop L3;
      L3: instr;                           L3: instr;
-     L4: if (cond) goto L1;               L4: if (cond) goto L3;
+     L4: if (cond) goto L1;               L4: if (cond) nop L1;
 >>
   This optimization can be applied to several of our intermediate
   languages.  We choose to perform it on the [LTL] language,
@@ -37,11 +38,14 @@ Require Import LTL.
   dead code (as the "nop L3" in the example above).
 *)
 
-(** The naive implementation of branch tunneling would replace
-  any branch to a node [pc] by a branch to the node
-  [branch_target f pc], defined as follows:
+(** The implementation consists in two passes: the first pass
+    records the branch t of each "nop"
+    and the second pass replace any "nop" node to [pc]
+    by a branch to a "nop" at [branch_t f pc]
+
+Naively, we may define [branch_t f pc] as follows:
 <<
-  branch_target f pc = branch_target f pc'  if f(pc) = nop pc'
+  branch_t f pc = branch_t f pc'  if f(pc) = nop pc'
                      = pc                   otherwise
 >>
   However, this definition can fail to terminate if
@@ -50,56 +54,114 @@ Require Import LTL.
      L1: nop L1;
 >>
   or
-<<   L1: nop L2;
+<<
+     L1: nop L2;
      L2: nop L1;
 >>
   Coq warns us of this fact by not accepting the definition
-  of [branch_target] above.
+  of [branch_t] above.
+
+  To handle this problem, we use a union-find data structure, adding equalities [pc = pc']
+  for every instruction [pc: nop pc'] in the function.
+
+  Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure,
+  we need to iterate until we reach a fixpoint.
+
+  Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure
+  in order to help the proof.
+
+  A verifier checks that this data-structure is correct.
+*)
+
+Definition UF := PTree.t (node * Z).
 
-  To handle this problem, we proceed in two passes.  The first pass
-  populates a union-find data structure, adding equalities [pc = pc']
-  for every instruction [pc: nop pc'] in the function. *)
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: LTL.function -> UF. 
+Extract Constant branch_target => "Tunnelingaux.branch_target".
 
-Module U := UnionFind.UF(PTree).
+Local Open Scope error_monad_scope.
 
-Definition record_goto (uf: U.t) (pc: node) (b: bblock) : U.t :=
-  match b with
-  | Lbranch s :: _ => U.union uf pc s
-  | _ => uf
+Definition get (td: UF) pc:node*Z :=
+  match td!pc with
+  | Some (t,d) => (t,Z.abs d)
+  | _ => (pc,0)
   end.
 
-Definition record_gotos (f: LTL.function) : U.t :=
-  PTree.fold record_goto f.(fn_code) U.empty.
+Definition target (td: UF) (pc:node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+Definition check_included (td: UF) (c: code): option bblock
+  := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil).
+
+(* we check the validity of targets and their bound:
+   the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit
+ := match td!pc with
+    | None => OK tt
+    | Some (tpc, dpc) =>
+       let dpc := Z.abs dpc in
+       match bb with
+       | Lbranch s ::_ =>
+         let (ts, ds) := get td s in 
+         if peq tpc ts then
+            if zlt ds dpc then OK tt
+            else Error (msg "bad distance in Lbranch")
+         else Error (msg "invalid skip of Lbranch")
+       | Lcond _ _ s1 s2 _ :: _ =>
+          let (ts1, ds1) := get td s1 in
+          let (ts2, ds2) := get td s2 in
+          if peq tpc ts1 then
+            if peq tpc ts2 then
+              if zlt ds1 dpc then
+                if zlt ds2 dpc then OK tt
+                else Error (msg "bad distance on else branch")
+              else Error (msg "bad distance on then branch")
+            else Error (msg "invalid skip of else branch")
+          else Error (msg "invalid skip of then branch")
+      | _ => Error (msg "cannot skip this block")
+      end
+   end.
+
+Definition check_code (td: UF) (c:code): res unit
+  := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt).
 
 (** The second pass rewrites all LTL instructions, replacing every
-  successor [s] of every instruction by the canonical representative
+  successor [s] of every instruction by [t s], the canonical representative
   of its equivalence class in the union-find data structure. *)
 
-Definition tunnel_instr (uf: U.t) (i: instruction) : instruction :=
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
   match i with
-  | Lbranch s => Lbranch (U.repr uf s)
+  | Lbranch s => Lbranch (t s)
   | Lcond cond args s1 s2 info =>
-      let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in
+      let s1' := t s1 in let s2' := t s2 in
       if peq s1' s2'
       then Lbranch s1'
       else Lcond cond args s1' s2' info
-  | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl)
+  | Ljumptable arg tbl => Ljumptable arg (List.map t tbl)
   | _ => i
   end.
 
-Definition tunnel_block (uf: U.t) (b: bblock) : bblock :=
-  List.map (tunnel_instr uf) b.
+Definition tunnel_block (t: node -> node) (b: bblock) : bblock :=
+  List.map (tunnel_instr t) b.
 
-Definition tunnel_function (f: LTL.function) : LTL.function :=
-  let uf := record_gotos f in
-  mkfunction
-    (fn_sig f)
-    (fn_stacksize f)
-    (PTree.map1 (tunnel_block uf) (fn_code f))
-    (U.repr uf (fn_entrypoint f)).
+Definition tunnel_function (f: LTL.function) : res LTL.function :=
+  let td := branch_target f in
+  let c := (fn_code f) in
+  if check_included td c then 
+    do _ <- check_code td c ; OK
+    (mkfunction
+      (fn_sig f)
+      (fn_stacksize f)
+      (PTree.map1 (tunnel_block td) c)
+      (td (fn_entrypoint f)))
+  else 
+   Error (msg "Some node of the union-find is not in the CFG")
+   .
 
-Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
-  transf_fundef tunnel_function f.
+Definition tunnel_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef tunnel_function f.
 
-Definition transf_program (p: LTL.program) : LTL.program :=
-  transform_program tunnel_fundef p.
+Definition transf_program (p: program) : res program :=
+  transform_partial_program tunnel_fundef p.
diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml
new file mode 100644
index 00000000..87e6d303
--- /dev/null
+++ b/backend/Tunnelingaux.ml
@@ -0,0 +1,283 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Sylvain Boulmé  Grenoble-INP, VERIMAG             *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [Tunneling.v]
+
+*)
+
+open Coqlib
+open LTL
+open Maps
+open Camlcoq
+
+let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *)
+let debug_flag = ref false
+let final_dump = false   (* set to true to have a more verbose debugging *)
+
+let debug fmt =
+  if !debug_flag then Printf.eprintf fmt
+  else Printf.ifprintf stderr fmt
+
+exception BugOnPC of int
+
+(* type of labels in the cfg *)
+type label = int * P.t
+
+(* instructions under analyzis *)
+type simple_inst = (* a simplified view of LTL instructions *)
+  LBRANCH of node
+| LCOND of node * node
+| OTHER
+and node = {
+    lab : label;
+    mutable inst: simple_inst;
+    mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
+    mutable dist: int;
+    mutable tag: int
+  }
+
+(* type of the (simplified) CFG *)
+type cfg = {
+    nodes: (int, node) Hashtbl.t;
+    mutable rems: node list; (* remaining conditions that may become lbranch or not *)
+    mutable num_rems: int;
+    mutable iter_num: int (* number of iterations in elimination of conditions *)
+  }
+
+let lab_i (n: node): int = fst n.lab
+let lab_p (n: node): P.t = snd n.lab
+
+let rec target c n = (* inspired from the "find" of union-find algorithm *)
+  match n.inst with
+  | LCOND(s1,s2) ->
+     if n.link != n
+     then update c n
+     else if n.tag < c.iter_num then (
+       (* we try to change the condition ... *)
+       n.tag <- c.iter_num; (* ... but at most once by iteration *)
+       let ts1 = target c s1 in
+       let ts2 = target c s2 in
+       if ts1 == ts2 then (n.link <- ts1; ts1) else n
+     ) else n
+  | _ ->
+     if n.link != n
+     then update c n
+     else n
+and update c n =
+  let t = target c n.link in
+  n.link <- t; t
+
+let get_node c p =
+  let li = P.to_int p in
+  try
+    Hashtbl.find c.nodes li
+  with
+    Not_found ->
+      let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 }  in
+      Hashtbl.add c.nodes li n;
+      n
+
+let set_branch c p s =
+  let li = P.to_int p in
+  try
+    let n = Hashtbl.find c.nodes li in
+    n.inst <- LBRANCH s;
+    n.link <- target c s
+  with
+    Not_found ->
+      let n = { lab = (li,p); inst = LBRANCH s; link = target c s; dist = 0; tag = 0 } in
+      Hashtbl.add c.nodes li n
+
+
+(* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
+let build_simplified_cfg c acc pc bb =
+  match bb with
+  | Lbranch s :: _ ->
+     let ns = get_node c s in
+     set_branch c pc ns;
+     acc
+  | Lcond (_, _, s1, s2, _) :: _ ->
+     c.num_rems <- c.num_rems + 1;
+     let ns1 = get_node c s1 in
+     let ns2 = get_node c s2 in
+     let npc = get_node c pc in
+     npc.inst <- LCOND(ns1, ns2);
+     npc::acc
+  | _ -> acc
+
+(* try to change a condition into a branch
+[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond
+*)
+let try_change_cond c acc pc =
+  match pc.inst with
+  | LCOND(s1,s2) ->
+     let ts1 = target c s1 in
+     let ts2 = target c s2 in
+     if ts1 == ts2 then (
+       pc.link <- ts1;
+       c.num_rems <- c.num_rems - 1;
+       acc
+     ) else
+       pc::acc
+  | _  -> raise (BugOnPC (lab_i pc)) (* LCOND expected *)
+
+(* repeat [try_change_cond] until no condition is changed into a branch *)
+let rec repeat_change_cond c =
+  c.iter_num <- c.iter_num + 1;
+  debug "++ Tunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems);
+  let old =  c.num_rems in
+  c.rems <- List.fold_left (try_change_cond c) [] c.rems;
+  let curr = c.num_rems in
+  let continue =
+    match limit_tunneling with
+    | Some n -> curr < old && c.iter_num < n
+    | None -> curr < old
+  in
+  if continue
+  then repeat_change_cond c
+
+
+(* compute the final distance of each nop nodes to its target *)
+let undef_dist = -1
+let self_dist = undef_dist-1
+let rec dist n =
+  if n.dist = undef_dist
+  then (
+    n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
+    n.dist <-
+      (match n.inst with
+       | OTHER -> 0
+       | LBRANCH p -> 1 + dist p
+       | LCOND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
+    n.dist
+  ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
+    else n.dist
+
+let final_export f c =
+  let count = ref 0 in
+  let filter_nops_init_dist _ n acc =
+    let tn = target c n in
+    if tn == n
+    then (
+      n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
+      acc
+    ) else (
+      n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
+      count := !count+1;
+      n::acc
+    )
+  in
+  let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
+  let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in
+  debug "* Tunneling.branch_target: final number of eliminated nops = %d\n" !count;
+  res
+
+(*********************************************)
+(*** START: printing and debugging functions *)
+
+let string_of_labeli nodes ipc =
+  try
+    let pc = Hashtbl.find nodes ipc in
+    if pc.link == pc
+    then Printf.sprintf "(Target@%d)" (dist pc)
+    else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
+  with
+    Not_found -> ""
+
+let print_bblock c println (pc, bb) =
+  match bb with
+  | Lbranch s::_ -> (if println then debug "\n"); debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
+  | Lcond (_, _, s1, s2, _)::_ -> (if println then debug "\n"); debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
+  | _ -> debug "%d " pc; true
+
+
+let print_cfg f c  =
+  let a = Array.of_list (PTree.fold (fun acc pc bb -> (P.to_int pc,bb)::acc) f.fn_code []) in
+  Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
+  let ep = P.to_int f.fn_entrypoint in
+  debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
+  let println = Array.fold_left (print_bblock c) false a in
+  (if println then debug "\n");debug "remaining cond:";
+  List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
+  debug "\n"
+
+(*************************************************************)
+(* Copy-paste of the extracted code of the verifier          *)
+(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+let get td pc =
+  match PTree.get pc td with
+  | Some p -> let (t0, d) = p in (t0, d)
+  | None -> (pc, Z.of_uint 0)
+
+let check_bblock td pc bb =
+  match PTree.get pc td with
+  | Some p ->
+    let (tpc, dpc) = p in
+    let dpc0 = dpc in
+    (match bb with
+     | [] ->
+       raise (BugOnPC (P.to_int pc))
+     | i :: _ ->
+       (match i with
+        | Lbranch s ->
+          let (ts, ds) = get td s in
+          if peq tpc ts
+          then if zlt ds dpc0
+               then ()
+               else raise (BugOnPC (P.to_int pc))
+          else raise (BugOnPC (P.to_int pc))
+        | Lcond (_, _, s1, s2, _) ->
+          let (ts1, ds1) = get td s1 in
+          let (ts2, ds2) = get td s2 in
+          if peq tpc ts1
+          then if peq tpc ts2
+               then if zlt ds1 dpc0
+                    then if zlt ds2 dpc0
+                         then ()
+                         else raise (BugOnPC (P.to_int pc))
+                    else raise (BugOnPC (P.to_int pc))
+               else raise (BugOnPC (P.to_int pc))
+          else raise (BugOnPC (P.to_int pc))
+        | _ ->
+          raise (BugOnPC (P.to_int pc))))
+  | None -> ()
+
+(** val check_code : coq_UF -> code -> unit res **)
+
+let check_code td c =
+  PTree.fold (fun _ pc bb -> check_bblock td pc bb) c (())
+
+(*** END: copy-paste & debugging functions *******)
+
+let branch_target f =
+  debug "* Tunneling.branch_target: starting on a new function\n";
+  if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
+  let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
+  c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code [];
+  repeat_change_cond c;
+  let res = final_export f c in
+  if !debug_flag then (
+    try
+      check_code res f.fn_code;
+      if final_dump then print_cfg f c;
+    with e -> (
+      print_cfg f c;
+      check_code res f.fn_code
+    )
+  );
+  res
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index cdf6c800..126b7b87 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -3,6 +3,7 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Sylvain Boulmé  Grenoble-INP, VERIMAG                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
@@ -12,131 +13,163 @@
 
 (** Correctness proof for the branch tunneling optimization. *)
 
-Require Import Coqlib Maps UnionFind.
+Require Import Coqlib Maps Errors.
 Require Import AST Linking.
 Require Import Values Memory Events Globalenvs Smallstep.
 Require Import Op Locations LTL.
 Require Import Tunneling.
 
-Definition match_prog (p tp: program) :=
-  match_program (fun ctx f tf => tf = tunnel_fundef f) eq p tp.
+Local Open Scope nat.
 
-Lemma transf_program_match:
-  forall p, match_prog p (transf_program p).
+
+(** * Properties of the branch_target, when the verifier succeeds *)
+
+Definition check_included_spec (c:code) (td:UF) (ok: option bblock) :=
+   ok <> None -> forall pc, c!pc = None -> td!pc = None.
+
+Lemma check_included_correct (td: UF) (c: code): 
+  check_included_spec c td (check_included td c).
+Proof. 
+  apply PTree_Properties.fold_rec with (P := check_included_spec c).
+- (* extensionality *)
+  unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_included_spec.
+  intros m [|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence.
+  intros; eapply IND; try congruence.
+Qed.
+
+Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop :=
+ | TB_default (TB: target pc = pc) ob
+     : target_bounds target bound pc ob
+ | TB_branch s bb
+     (EQ: target pc = target s)
+     (DECREASE: bound s < bound pc)
+     : target_bounds target bound pc (Some (Lbranch s::bb))
+ | TB_cond cond args s1 s2 info bb
+     (EQ1: target pc = target s1)
+     (EQ2: target pc = target s2)
+     (DEC1: bound s1 < bound pc)
+     (DEC2: bound s2 < bound pc)
+     : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb))
+ .
+Local Hint Resolve TB_default: core.
+
+Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc.
 Proof.
-  intros. eapply match_transform_program; eauto.
+  unfold target, get. intros H; rewrite H; auto.
 Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
 
-(** * Properties of the branch map computed using union-find. *)
+Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z.
+Proof.
+  unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; omega || auto.
+Qed.
+Local Hint Resolve get_nonneg: core.
 
-(** A variant of [record_goto] that also incrementally computes a measure [f: node -> nat]
-  counting the number of [Lnop] instructions starting at a given [pc] that were eliminated. *)
+Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
 
-Definition measure_edge (u: U.t) (pc s: node) (f: node -> nat) : node -> nat :=
-  fun x => if peq (U.repr u s) pc then f x
-           else if peq (U.repr u x) pc then (f x + f s + 1)%nat
-           else f x.
+Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock):
+  check_bblock td pc bb = OK tt -> 
+  target_bounds (target td) (bound td) pc (Some bb).
+Proof.
+  unfold check_bblock, bound.
+  destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto.
+  assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. }
+  assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. }
+  destruct bb as [|[ ] bb]; simpl; try congruence.
+  + destruct (get td s) as (ts, ds) eqn:Hs.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_branch.
+    * rewrite Tpc. unfold target; rewrite Hs; simpl; auto.
+    * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto.
+  + destruct (get td s1) as (ts1, ds1) eqn:Hs1.
+    destruct (get td s2) as (ts2, ds2) eqn:Hs2.
+    repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+    intros; apply TB_cond.
+    * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto.
+    * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto.
+    * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto.
+    * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto.
+Qed.
 
-Definition record_goto' (uf: U.t * (node -> nat)) (pc: node) (b: bblock) : U.t * (node -> nat) :=
-  match b with
-  | Lbranch s :: b' => let (u, f) := uf in (U.union u pc s, measure_edge u pc s f)
-  | _ => uf
-  end.
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+   ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb).
 
-Definition branch_map_correct (c: code) (uf: U.t * (node -> nat)): Prop :=
-  forall pc,
-  match c!pc with
-  | Some(Lbranch s :: b) =>
-      U.repr (fst uf) pc = pc \/ (U.repr (fst uf) pc = U.repr (fst uf) s /\ snd uf s < snd uf pc)%nat
-  | _ =>
-      U.repr (fst uf) pc = pc
-  end.
+Lemma check_code_correct (td:UF) c:
+   check_code_spec td c (check_code td c).
+Proof.
+  apply PTree_Properties.fold_rec with (P := check_code_spec td).
+- (* extensionality *)
+  unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite  <- ! EQ; eauto.
+- (* base case *)
+  intros _ pc.  rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+  unfold check_code_spec.
+  intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence.
+  intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto.
+  intros X; inversion X; subst.
+  apply check_bblock_correct; auto.
+Qed.
 
-Lemma record_gotos'_correct:
-  forall c,
-  branch_map_correct c (PTree.fold record_goto' c (U.empty, fun (x: node) => O)).
+Theorem branch_target_bounds:
+  forall f tf pc, 
+  tunnel_function f = OK tf ->
+  target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
 Proof.
-  intros.
-  apply PTree_Properties.fold_rec with (P := fun c uf => branch_map_correct c uf).
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros _.
+  destruct ((fn_code f)!pc) eqn:X.
+  - exploit check_code_correct; eauto.
+  - exploit check_included_correct; eauto.
+    congruence.
+Qed.
 
-- (* extensionality *)
-  intros. red; intros. rewrite <- H. apply H0.
+Lemma tunnel_function_unfold:
+  forall f tf pc,
+  tunnel_function f = OK tf -> 
+  (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc.
+Proof.
+  unfold tunnel_function; intros f f' pc.
+  destruct (check_included _ _) eqn:H1; try congruence.
+  destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+  intros X; inversion X; clear X; subst.
+  simpl. rewrite PTree.gmap1. auto.
+Qed.
 
-- (* base case *)
-  red; intros; simpl. rewrite PTree.gempty. apply U.repr_empty.
+Lemma tunnel_fundef_Internal:
+  forall f tf, tunnel_fundef (Internal f) = OK tf
+  -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'.
+Proof.
+  intros f tf; simpl.
+  destruct (tunnel_function f) eqn:X; simpl; try congruence.
+  intros EQ; inversion EQ.
+  eexists; split; eauto.
+Qed.
 
-- (* inductive case *)
-  intros m uf pc bb; intros. destruct uf as [u f].
-  assert (PC: U.repr u pc = pc).
-    generalize (H1 pc). rewrite H. auto.
-  assert (record_goto' (u, f) pc bb = (u, f)
-          \/ exists s, exists bb', bb = Lbranch s :: bb' /\ record_goto' (u, f) pc bb = (U.union u pc s, measure_edge u pc s f)).
-    unfold record_goto'; simpl. destruct bb; auto. destruct i; auto. right. exists s; exists bb; auto.
-  destruct H2 as [B | [s [bb' [EQ B]]]].
-
-+ (* u and f are unchanged *)
-  rewrite B.
-  red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'.
-  destruct bb; auto. destruct i; auto.
-  apply H1.
-
-+ (* b is Lbranch s, u becomes union u pc s, f becomes measure_edge u pc s f *)
-  rewrite B.
-  red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. rewrite EQ.
-
-* (* The new instruction *)
-  rewrite (U.repr_union_2 u pc s); auto. rewrite U.repr_union_3.
-  unfold measure_edge. destruct (peq (U.repr u s) pc). auto. right. split. auto.
-  rewrite PC. rewrite peq_true. omega.
-
-* (* An old instruction *)
-  assert (U.repr u pc' = pc' -> U.repr (U.union u pc s) pc' = pc').
-  { intro. rewrite <- H2 at 2. apply U.repr_union_1. congruence. }
-  generalize (H1 pc'). simpl. destruct (m!pc'); auto. destruct b; auto. destruct i; auto.
-  intros [P | [P Q]]. left; auto. right.
-  split. apply U.sameclass_union_2. auto.
-  unfold measure_edge. destruct (peq (U.repr u s) pc). auto.
-  rewrite P. destruct (peq (U.repr u s0) pc). omega. auto.
-Qed.
-
-Definition record_gotos' (f: function) :=
-  PTree.fold record_goto' f.(fn_code) (U.empty, fun (x: node) => O).
-
-Lemma record_gotos_gotos':
-  forall f, fst (record_gotos' f) = record_gotos f.
-Proof.
-  intros. unfold record_gotos', record_gotos.
-  repeat rewrite PTree.fold_spec.
-  generalize (PTree.elements (fn_code f)) (U.empty) (fun _ : node => O).
-  induction l; intros; simpl.
-  auto.
-  unfold record_goto' at 2. unfold record_goto at 2.
-  destruct (snd a). apply IHl. destruct i; apply IHl.
-Qed.
-
-Definition branch_target (f: function) (pc: node) : node :=
-  U.repr (record_gotos f) pc.
-
-Definition count_gotos (f: function) (pc: node) : nat :=
-  snd (record_gotos' f) pc.
-
-Theorem record_gotos_correct:
-  forall f pc,
-  match f.(fn_code)!pc with
-  | Some(Lbranch s :: b) =>
-       branch_target f pc = pc \/
-       (branch_target f pc = branch_target f s /\ count_gotos f s < count_gotos f pc)%nat
-  | _ => branch_target f pc = pc
-  end.
+Lemma tunnel_fundef_External:
+  forall tf ef, tunnel_fundef (External ef) = OK tf
+  -> tf = External ef.
 Proof.
-  intros.
-  generalize (record_gotos'_correct f.(fn_code) pc). simpl.
-  fold (record_gotos' f). unfold branch_map_correct, branch_target, count_gotos.
-  rewrite record_gotos_gotos'. auto.
+  intros tf ef; simpl. intros H; inversion H; auto.
 Qed.
 
 (** * Preservation of semantics *)
 
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
 Section PRESERVATION.
 
 Variables prog tprog: program.
@@ -145,32 +178,65 @@ Let ge := Genv.globalenv prog.
 Let tge := Genv.globalenv tprog.
 
 Lemma functions_translated:
-  forall v f,
+  forall (v: val) (f: fundef),
   Genv.find_funct ge v = Some f ->
-  Genv.find_funct tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_transf TRANSL).
+  exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+Qed.
 
 Lemma function_ptr_translated:
   forall v f,
   Genv.find_funct_ptr ge v = Some f ->
-  Genv.find_funct_ptr tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_ptr_transf TRANSL).
+  exists tf,
+  Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
 
-Lemma symbols_preserved:
-  forall id,
-  Genv.find_symbol tge id = Genv.find_symbol ge id.
-Proof (Genv.find_symbol_transf TRANSL).
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
 
 Lemma senv_preserved:
   Senv.equiv ge tge.
-Proof (Genv.senv_transf TRANSL).
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
 
 Lemma sig_preserved:
-  forall f, funsig (tunnel_fundef f) = funsig f.
+  forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
 Proof.
-  destruct f; reflexivity.
+  intros. destruct f.
+  - simpl in H. monadInv H. unfold tunnel_function in EQ. 
+    destruct (check_included _ _); try congruence.
+    monadInv EQ. simpl; auto.
+  - simpl in H. monadInv H. reflexivity.
 Qed.
 
+Lemma fn_stacksize_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+Lemma fn_entrypoint_preserved:
+  forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f).
+Proof.
+  intros f tf; unfold tunnel_function.
+  destruct (check_included _ _); try congruence.
+  destruct (check_code _ _); simpl; try congruence.
+  intros H; inversion H; simpl; auto.
+Qed.
+
+
 (** The proof of semantic preservation is a simulation argument
   based on diagrams of the following form:
 <<
@@ -185,7 +251,7 @@ Qed.
   between states [st1] and [st2], as well as the postcondition between
   [st1'] and [st2'].  One transition in the source code (left) can correspond
   to zero or one transition in the transformed code (right).  The
-  "zero transition" case occurs when executing a [Lgoto] instruction
+  "zero transition" case occurs when executing a [Lnop] instruction
   in the source code that has been removed by tunneling.
 
   In the definition of [match_states], what changes between the original and
@@ -194,52 +260,52 @@ Qed.
   and memory states, since some [Vundef] values can become more defined
   as a consequence of eliminating useless [Lcond] instructions. *)
 
-Definition tunneled_block (f: function) (b: bblock) :=
-  tunnel_block (record_gotos f) b.
-
-Definition tunneled_code (f: function) :=
-  PTree.map1 (tunneled_block f) (fn_code f).
-
 Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
   forall l, Val.lessdef (ls1 l) (ls2 l).
 
 Inductive match_stackframes: stackframe -> stackframe -> Prop :=
   | match_stackframes_intro:
-      forall f sp ls0 bb tls0,
+      forall f tf sp ls0 bb tls0,
       locmap_lessdef ls0 tls0 ->
+      tunnel_function f = OK tf ->
       match_stackframes
          (Stackframe f sp ls0 bb)
-         (Stackframe (tunnel_function f) sp tls0 (tunneled_block f bb)).
+         (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)).
 
 Inductive match_states: state -> state -> Prop :=
   | match_states_intro:
-      forall s f sp pc ls m ts tls tm
+      forall s f tf sp pc ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
       match_states (State s f sp pc ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
+                   (State ts tf sp (branch_target f pc) tls tm)
   | match_states_block:
-      forall s f sp bb ls m ts tls tm
+      forall s f tf sp bb ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_function f = OK tf),
       match_states (Block s f sp bb ls m)
-                   (Block ts (tunnel_function f) sp (tunneled_block f bb) tls tm)
+                   (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm)
   | match_states_interm:
-      forall s f sp pc bb ls m ts tls tm
+      forall s f tf sp pc i bb ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
-      match_states (Block s f sp (Lbranch pc :: bb) ls m)
-                   (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
+        (MEM: Mem.extends m tm)
+        (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc)
+        (TF: tunnel_function f = OK tf),
+      match_states (Block s f sp (i :: bb) ls m)
+                   (State ts tf sp pc tls tm)
   | match_states_call:
-      forall s f ls m ts tls tm
+      forall s f tf ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
         (LS: locmap_lessdef ls tls)
-        (MEM: Mem.extends m tm),
+        (MEM: Mem.extends m tm)
+        (TF: tunnel_fundef f = OK tf),
       match_states (Callstate s f ls m)
-                   (Callstate ts (tunnel_fundef f) tls tm)
+                   (Callstate ts tf tls tm)
   | match_states_return:
       forall s ls m ts tls tm
         (STK: list_forall2 match_stackframes s ts)
@@ -289,22 +355,6 @@ Proof.
   induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. 
 Qed.
 
-(*
-Lemma locmap_undef_lessdef:
-  forall ll ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) (Locmap.undef ll ls2).
-Proof.
-  induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_lessdef; auto. 
-Qed.
-
-Lemma locmap_undef_lessdef_1:
-  forall ll ls1 ls2,
-  locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) ls2.
-Proof.
-  induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_undef_lessdef; auto. 
-Qed.
-*)
-
 Lemma locmap_getpair_lessdef:
   forall p ls1 ls2,
   locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
@@ -348,15 +398,16 @@ Lemma find_function_translated:
   forall ros ls tls fd,
   locmap_lessdef ls tls ->
   find_function ge ros ls = Some fd ->
-  find_function tge ros tls = Some (tunnel_fundef fd).
+  exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd.
 Proof.
   intros. destruct ros; simpl in *.
 - assert (E: tls (R m) = ls (R m)).
   { exploit Genv.find_funct_inv; eauto. intros (b & EQ). 
     generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
-  rewrite E. apply functions_translated; auto.
+  rewrite E. exploit functions_translated; eauto.
 - rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. 
-  apply function_ptr_translated; auto.
+  exploit function_ptr_translated; eauto.
+  intros (tf & X1 & X2). exists tf; intuition.
 Qed.
 
 Lemma call_regs_lessdef:
@@ -383,11 +434,12 @@ Qed.
 
 Definition measure (st: state) : nat :=
   match st with
-  | State s f sp pc ls m => (count_gotos f pc * 2)%nat
-  | Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat
-  | Block s f sp bb ls m => 0%nat
-  | Callstate s f ls m => 0%nat
-  | Returnstate s ls m => 0%nat
+  | State s f sp pc ls m => (bound (branch_target f) pc) * 2
+  | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1
+  | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1
+  | Block s f sp bb ls m => 0
+  | Callstate s f ls m => 0
+  | Returnstate s ls m => 0
   end.
 
 Lemma match_parent_locset:
@@ -406,24 +458,23 @@ Lemma tunnel_step_correct:
   (exists st2', step tge st1' t st2' /\ match_states st2 st2')
   \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
 Proof.
-  induction 1; intros; try inv MS.
+  induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH).
 
 - (* entering a block *)
-  assert (DEFAULT: branch_target f pc = pc ->
-    (exists st2' : state,
-     step tge (State ts (tunnel_function f) sp (branch_target f pc) tls tm) E0 st2'
-     /\ match_states (Block s f sp bb rs m) st2')).
-  { intros. rewrite H0. econstructor; split.
-    econstructor. simpl. rewrite PTree.gmap1. rewrite H. simpl. eauto.
-    econstructor; eauto. }
-
-  generalize (record_gotos_correct f pc). rewrite H.
-  destruct bb; auto. destruct i; auto.
-  intros [A | [B C]]. auto.
-  right. split. simpl. omega.
-  split. auto.
-  rewrite B. econstructor; eauto.
-
+  exploit (branch_target_bounds f tf pc); eauto.
+  rewrite H. intros X; inversion X.
+  + (* TB_default *) 
+    rewrite TB; left. econstructor; split.
+    * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto.
+    * econstructor; eauto.
+  + (* FT_branch *)
+    simpl; right.
+    rewrite EQ; repeat (econstructor; omega || eauto).
+  + (* FT_cond *)
+    simpl; right.
+    repeat (econstructor; omega || eauto); simpl.
+    apply Nat.max_case; omega.
+    destruct (peq _ _); try congruence.
 - (* Lop *)
   exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. 
   intros (tv & EV & LD).
@@ -485,20 +536,25 @@ Proof.
   eauto. eauto.
   econstructor; eauto using locmap_undef_regs_lessdef.
 - (* Lcall *)
-  left; simpl; econstructor; split.
-  eapply exec_Lcall with (fd := tunnel_fundef fd); eauto.
-  eapply find_function_translated; eauto.
-  rewrite sig_preserved. auto.
-  econstructor; eauto.
-  constructor; auto.
-  constructor; auto.
+  left; simpl.
+  exploit find_function_translated; eauto.
+  intros (tfd & Htfd & FIND).
+  econstructor; split.
+  + eapply exec_Lcall; eauto.
+    erewrite sig_preserved; eauto.
+  + econstructor; eauto.
+    constructor; auto.
+    constructor; auto.
 - (* Ltailcall *)
-  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
+  exploit find_function_translated. 2: eauto.
+  { eauto using return_regs_lessdef, match_parent_locset. }
+  intros (tfd & Htfd & FIND).
+  exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
   left; simpl; econstructor; split.
-  eapply exec_Ltailcall with (fd := tunnel_fundef fd); eauto.
-  eapply find_function_translated; eauto using return_regs_lessdef, match_parent_locset.
-  apply sig_preserved.
-  econstructor; eauto using return_regs_lessdef, match_parent_locset.
+  + eapply exec_Ltailcall; eauto.
+    * eapply sig_preserved; eauto.
+    * erewrite fn_stacksize_preserved; eauto.
+  + econstructor; eauto using return_regs_lessdef, match_parent_locset.
 - (* Lbuiltin *)
   exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
   exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
@@ -513,45 +569,58 @@ Proof.
   fold (branch_target f pc). econstructor; eauto.
 - (* Lbranch (eliminated) *)
   right; split. simpl. omega. split. auto. constructor; auto.
-
-- (* Lcond *)
-  simpl tunneled_block.
-  set (s1 := U.repr (record_gotos f) pc1). set (s2 := U.repr (record_gotos f) pc2).
-  destruct (peq s1 s2).
-+ left; econstructor; split.
-  eapply exec_Lbranch. 
-  destruct b.
-* constructor; eauto using locmap_undef_regs_lessdef_1.
-* rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
-+ left; econstructor; split.
-  eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
-  destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
-
+- (* Lcond (preserved) *)
+  simpl; left; destruct (peq _ _) eqn: EQ.
+  + econstructor; split.
+    eapply exec_Lbranch. 
+    destruct b.
+    * constructor; eauto using locmap_undef_regs_lessdef_1.
+    * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
+  + econstructor; split.
+    eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
+    destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcond (eliminated) *)
+  destruct (peq _ _) eqn: EQ; try inv H1.
+  right; split; simpl. 
+  + destruct b.
+    generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega.
+    generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega.
+  + destruct b.
+    -- repeat (constructor; auto).
+    -- rewrite e; repeat (constructor; auto).
 - (* Ljumptable *)
   assert (tls (R arg) = Vint n).
   { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
   left; simpl; econstructor; split.
   eapply exec_Ljumptable.
-  eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H0. reflexivity. eauto.
+  eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto.
   econstructor; eauto using locmap_undef_regs_lessdef.
 - (* Lreturn *)
   exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). 
   left; simpl; econstructor; split.
-  eapply exec_Lreturn; eauto.
-  constructor; eauto using return_regs_lessdef, match_parent_locset.
+  + eapply exec_Lreturn; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + constructor; eauto using return_regs_lessdef, match_parent_locset.
 - (* internal function *)
+  exploit tunnel_fundef_Internal; eauto.
+  intros (tf' & TF' & ITF). subst.
   exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
-  intros (tm' & ALLOC & MEM'). 
-  left; simpl; econstructor; split.
-  eapply exec_function_internal; eauto.
-  simpl. econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
+  intros (tm' & ALLOC & MEM').
+  left; simpl.
+  econstructor; split.
+  + eapply exec_function_internal; eauto.
+    erewrite fn_stacksize_preserved; eauto.
+  + simpl.
+    erewrite (fn_entrypoint_preserved f tf'); auto.
+    econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
 - (* external function *)
   exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
   intros (tvres & tm' & A & B & C & D).
   left; simpl; econstructor; split.
-  eapply exec_function_external; eauto.
-  eapply external_call_symbols_preserved; eauto. apply senv_preserved.
-  simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
+  + erewrite (tunnel_fundef_External tf ef); eauto.
+    eapply exec_function_external; eauto.
+    eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+  + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
 - (* return *)
   inv STK. inv H1.
   left; econstructor; split.
@@ -564,14 +633,15 @@ Lemma transf_initial_states:
   exists st2, initial_state tprog st2 /\ match_states st1 st2.
 Proof.
   intros. inversion H.
-  exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) m0); split.
+  exploit function_ptr_translated; eauto.
+  intros (tf & Htf & Hf).
+  exists (Callstate nil tf (Locmap.init Vundef) m0); split.
   econstructor; eauto.
-  apply (Genv.init_mem_transf TRANSL); auto.
+  apply (Genv.init_mem_transf_partial TRANSL); auto.
   rewrite (match_program_main TRANSL).
   rewrite symbols_preserved. eauto.
-  apply function_ptr_translated; auto.
-  rewrite <- H3. apply sig_preserved.
-  constructor. constructor. red; simpl; auto. apply Mem.extends_refl.
+  rewrite <- H3. apply sig_preserved. auto.
+  constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto.
 Qed.
 
 Lemma transf_final_states:
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index 779e7bb9..f1a46baa 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -2069,7 +2069,6 @@ Definition divfs := binop_single Float32.div.
 Lemma divfs_sound:
   forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.divfs v w) (divfs x y).
 Proof (binop_single_sound Float32.div).
-
 (** Conversions *)
 
 Definition zero_ext (nbits: Z) (v: aval) :=
@@ -2483,6 +2482,468 @@ Proof.
   destruct 1; simpl; auto with va.
 Qed.
 
+
+(* Extensions for KVX and Risc-V *)
+
+Definition intoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_int f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intuoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_intu f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_int f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition intuofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_intu f with
+      | Some i => I i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_long f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longuoffloat_total (x: aval) :=
+  match x with
+  | F f =>
+      match Float.to_longu f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_long f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Definition longuofsingle_total (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_longu f with
+      | Some i => L i
+      | None => ntop
+      end
+  | _ => ntop1 x
+  end.
+
+Lemma intoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intoffloat v)) (intoffloat_total x).
+Proof.
+  unfold Val.intoffloat, intoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intuoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intuoffloat v)) (intuoffloat_total x).
+Proof.
+  unfold Val.intoffloat, intoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intofsingle v)) (intofsingle_total x).
+Proof.
+  unfold Val.intofsingle, intofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma intuofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.intuofsingle v)) (intuofsingle_total x).
+Proof.
+  unfold Val.intofsingle, intofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma singleofint_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleofint v)) (singleofint x).
+Proof.
+  unfold Val.singleofint, singleofint; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma singleofintu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleofintu v)) (singleofintu x).
+Proof.
+  unfold Val.singleofintu, singleofintu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma longoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longoffloat v)) (longoffloat_total x).
+Proof.
+  unfold Val.longoffloat, longoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longuoffloat_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longuoffloat v)) (longuoffloat_total x).
+Proof.
+  unfold Val.longoffloat, longoffloat_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longofsingle v)) (longofsingle_total x).
+Proof.
+  unfold Val.longofsingle, longofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma longuofsingle_total_sound:
+  forall v x
+         (MATCH : vmatch v x),
+    vmatch (Val.maketotal (Val.longuofsingle v)) (longuofsingle_total x).
+Proof.
+  unfold Val.longofsingle, longofsingle_total. intros.
+  inv MATCH; simpl in *; try constructor.
+  all: destruct (Float32.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor].
+Qed.
+
+Lemma singleoflong_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleoflong v)) (singleoflong x).
+Proof.
+  unfold Val.singleoflong, singleoflong; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma singleoflongu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.singleoflongu v)) (singleoflongu x).
+Proof.
+  unfold Val.singleoflongu, singleoflongu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatoflong_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatoflong v)) (floatoflong x).
+Proof.
+  unfold Val.floatoflong, floatoflong; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatoflongu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatoflongu v)) (floatoflongu x).
+Proof.
+  unfold Val.floatoflongu, floatoflongu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatofint_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatofint v)) (floatofint x).
+Proof.
+  unfold Val.floatofint, floatofint; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+Lemma floatofintu_total_sound:
+  forall v x, vmatch v x ->
+              vmatch (Val.maketotal (Val.floatofintu v)) (floatofintu x).
+Proof.
+  unfold Val.floatofintu, floatofintu; intros.
+  inv H; simpl.
+  all: auto with va.
+  all: unfold ntop1, provenance.
+  all: try constructor.
+Qed.
+
+
+Definition divs_total (v w: aval) := 
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+      || Int.eq i1 (Int.repr Int.min_signed) && Int.eq i2 Int.mone
+      then ntop
+      else I (Int.divs i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divs_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divs v w)) (divs_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct (_ || _) eqn:E; cbn; unfold ntop; auto with va.
+  }
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2; cbn; auto with va.
+Qed.
+
+Definition divu_total (v w: aval) :=
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+       then ntop
+      else I (Int.divu i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divu v w)) (divu_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct Int.eq eqn:E; cbn; unfold ntop; auto with va.
+  }
+  all: unfold ntop2; auto with va.
+  all: destruct Int.eq eqn:E; unfold ntop2; cbn; auto with va.
+Qed.
+
+Definition mods_total (v w: aval) :=
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+      || Int.eq i1 (Int.repr Int.min_signed) && Int.eq i2 Int.mone
+      then ntop
+      else I (Int.mods i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma mods_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.mods v w)) (mods_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct (_ || _) eqn:E; cbn; unfold ntop; auto with va.
+  }
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2; cbn; auto with va.
+Qed.
+
+Definition modu_total (v w: aval) :=
+  match w, v with
+  | I i2, I i1 =>
+      if Int.eq i2 Int.zero
+      then ntop
+      else I (Int.modu i1 i2)
+  | I i2, _ => uns (provenance v) (usize i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma modu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modu v w)) (modu_total x y).
+Proof.
+  assert (UNS: forall i j, j <> Int.zero -> is_uns (usize j) (Int.modu i j)).
+  {
+    intros. apply is_uns_mon with (usize (Int.modu i j)).
+    { apply is_uns_usize.
+    }
+    unfold usize, Int.size.
+    apply Zsize_monotone.
+    generalize (Int.unsigned_range_2 j); intros RANGE.
+    assert (Int.unsigned j <> 0).
+    { red; intros; elim H. rewrite <- (Int.repr_unsigned j). rewrite H0. auto. }
+    exploit (Z_mod_lt (Int.unsigned i) (Int.unsigned j)). omega. intros MOD.
+    unfold Int.modu. rewrite Int.unsigned_repr. omega. omega.
+  }
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  { destruct Int.eq eqn:E; unfold ntop; cbn; auto with va.
+  }
+  all: try discriminate.
+  all: unfold ntop2; auto with va.
+  all: try (destruct Int.eq eqn:E; cbn; unfold ntop2; auto with va; fail).
+  all: try apply vmatch_uns_undef.
+  
+  all:
+    generalize (Int.eq_spec i0 Int.zero);
+    destruct (Int.eq i0 Int.zero);
+    cbn;
+    intro.
+  all: try apply vmatch_uns_undef.
+  all: apply vmatch_uns; auto.
+Qed.
+
+
+Lemma shrx_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.shrx v w)) (shrx x y).
+Proof.
+  intros until y. intros HX HY.
+  inv HX; inv HY; cbn.
+  all: unfold ntop1; auto with va.
+  all: destruct Int.ltu eqn:LTU; cbn; unfold ntop; auto with va.
+Qed.
+
+
+Definition divls_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+      || Int64.eq i1 (Int64.repr Int64.min_signed) && Int64.eq i2 Int64.mone
+      then ntop
+      else L (Int64.divs i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divls_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divls v w)) (divls_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2, ntop; cbn; auto with va.
+Qed.
+
+Definition divlu_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+       then ntop
+      else L (Int64.divu i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma divlu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.divlu v w)) (divlu_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct Int64.eq eqn:E; unfold ntop2, ntop; cbn; auto with va.
+Qed.
+
+
+Definition modls_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+      || Int64.eq i1 (Int64.repr Int64.min_signed) && Int64.eq i2 Int64.mone
+      then ntop
+      else L (Int64.mods i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma modls_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modls v w)) (modls_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct (_ || _) eqn:E; unfold ntop2, ntop; cbn; auto with va.
+Qed.
+
+
+Definition modlu_total (v w: aval) :=
+  match w, v with
+  | L i2, L i1 =>
+      if Int64.eq i2 Int64.zero
+      then ntop
+      else L (Int64.modu i1 i2)
+  | _, _ => ntop2 v w
+  end.
+
+Lemma modlu_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.modlu v w)) (modlu_total x y).
+Proof.
+  intros until y.
+  intros HX HY.
+  inv HX; inv HY; cbn in *.
+  all: unfold ntop2; auto with va.
+  all: destruct Int64.eq eqn:E; cbn; unfold ntop2, ntop; auto with va.
+Qed.
+
+Lemma shrxl_total_sound:
+  forall v w x y, vmatch v x -> vmatch w y -> vmatch (Val.maketotal (Val.shrxl v w)) (shrxl x y).
+Proof.
+  intros until y. intros HX HY.
+  inv HX; inv HY; cbn.
+  all: unfold ntop1; auto with va.
+  all: destruct Int.ltu eqn:LTU; cbn; unfold ntop; auto with va.
+Qed.
+
 (** Comparisons and variation intervals *)
 
 Definition cmp_intv (c: comparison) (i: Z * Z) (n: Z) : abool :=
@@ -4734,6 +5195,26 @@ Hint Resolve cnot_sound symbol_address_sound
        longoffloat_sound longuoffloat_sound floatoflong_sound floatoflongu_sound
        longofsingle_sound longuofsingle_sound singleoflong_sound singleoflongu_sound
        longofwords_sound loword_sound hiword_sound
+       intoffloat_total_sound
+       intuoffloat_total_sound
+       intofsingle_total_sound
+       intuofsingle_total_sound
+       singleofint_total_sound
+       singleofintu_total_sound
+       longoffloat_total_sound
+       longuoffloat_total_sound
+       longofsingle_total_sound
+       longuofsingle_total_sound
+       singleoflong_total_sound
+       singleoflongu_total_sound
+       floatoflong_total_sound
+       floatoflongu_total_sound
+       floatofint_total_sound
+       floatofintu_total_sound
+       divu_total_sound divs_total_sound
+       modu_total_sound mods_total_sound shrx_total_sound
+       divlu_total_sound divls_total_sound
+       modlu_total_sound modls_total_sound shrxl_total_sound
        cmpu_bool_sound cmp_bool_sound cmplu_bool_sound cmpl_bool_sound
        cmpf_bool_sound cmpfs_bool_sound
        maskzero_sound : va.
diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml
index 75f5eb3e..d830ada6 100644
--- a/cfrontend/C2C.ml
+++ b/cfrontend/C2C.ml
@@ -194,12 +194,28 @@ let builtins_generic = {
       (TInt(IUInt, []), [TInt(IUInt, [])], false);
     "__builtin_bswap16",
       (TInt(IUShort, []), [TInt(IUShort, [])], false);
+    "__builtin_clz",
+      (TInt(IInt, []), [TInt(IUInt, [])], false);
+    "__builtin_clzl",
+      (TInt(IInt, []), [TInt(IULong, [])], false);
+    "__builtin_clzll",
+      (TInt(IInt, []), [TInt(IULongLong, [])], false);
+    "__builtin_ctz",
+      (TInt(IInt, []), [TInt(IUInt, [])], false);
+    "__builtin_ctzl",
+      (TInt(IInt, []), [TInt(IULong, [])], false);
+    "__builtin_ctzll",
+      (TInt(IInt, []), [TInt(IULongLong, [])], false);
     (* Floating-point absolute value *)
     "__builtin_fabs",
     (TFloat(FDouble, []), [TFloat(FDouble, [])], false);
+    "__builtin_fabsf",
+    (TFloat(FFloat, []), [TFloat(FFloat, [])], false);
     (* Float arithmetic *)
     "__builtin_fsqrt",
     (TFloat(FDouble, []), [TFloat(FDouble, [])], false);
+    "__builtin_sqrt",
+    (TFloat(FDouble, []), [TFloat(FDouble, [])], false);
     (* Block copy *)
     "__builtin_memcpy_aligned",
          (TVoid [],
@@ -1540,7 +1556,7 @@ let convertProgram p =
         let p' =
           { prog_defs = gl2;
             prog_public = public_globals gl2;
-            prog_main = intern_string "main";
+            prog_main = intern_string !Clflags.main_function_name;
             prog_types = typs;
             prog_comp_env = ce } in
         Diagnostics.check_errors ();
diff --git a/cfrontend/CPragmas.ml b/cfrontend/CPragmas.ml
index 44660718..22ab2b5a 100644
--- a/cfrontend/CPragmas.ml
+++ b/cfrontend/CPragmas.ml
@@ -49,13 +49,15 @@ let process_use_section_pragma classname id =
 
 (* #pragma reserve_register *)
 
+let reserved_registers = ref ([]: Machregs.mreg list)
+
 let process_reserve_register_pragma name =
-  match Machregsaux.register_by_name name with
+  match Machregsnames.register_by_name name with
   | None ->
       C2C.error "unknown register in `reserve_register' pragma"
   | Some r ->
-      if Machregsaux.can_reserve_register r then
-        IRC.reserved_registers := r :: !IRC.reserved_registers
+      if Conventions1.is_callee_save r then
+        reserved_registers := r :: !reserved_registers
       else
         C2C.error "cannot reserve this register (not a callee-save)"
 
@@ -84,5 +86,8 @@ let process_pragma name =
   | _ ->
       false
 
+let reset () =
+  reserved_registers := []
+
 let initialize () =
   C2C.process_pragma_hook := process_pragma
diff --git a/cfrontend/PrintCsyntax.ml b/cfrontend/PrintCsyntax.ml
index beca056f..cfb2b584 100644
--- a/cfrontend/PrintCsyntax.ml
+++ b/cfrontend/PrintCsyntax.ml
@@ -270,6 +270,9 @@ let rec expr p (prec, e) =
   | Ebuiltin(EF_debug(kind,txt,_),_,args,_) ->
       fprintf p "__builtin_debug@[<hov 1>(%d,%S%a)@]"
         (P.to_int kind) (extern_atom txt) exprlist (false,args)
+  | Ebuiltin(EF_builtin(name, _), _, args, _) ->
+      fprintf p "%s@[<hov 1>(%a)@]"
+                (camlstring_of_coqstring name) exprlist (true, args)
   | Ebuiltin(_, _, args, _) ->
       fprintf p "<unknown builtin>@[<hov 1>(%a)@]" exprlist (true, args)
   | Eparen(a1, tycast, ty) ->
diff --git a/cfrontend/SimplExpr.v b/cfrontend/SimplExpr.v
index 7cdff468..c7e57a54 100644
--- a/cfrontend/SimplExpr.v
+++ b/cfrontend/SimplExpr.v
@@ -268,8 +268,13 @@ Fixpoint transl_expr (dst: destination) (a: Csyntax.expr) : mon (list statement
       do (sl2, a2) <- transl_expr For_val r2;
       ret (finish dst (sl1 ++ sl2) (Ebinop op a1 a2 ty))
   | Csyntax.Ecast r1 ty =>
-      do (sl1, a1) <- transl_expr For_val r1;
-      ret (finish dst sl1 (Ecast a1 ty))
+      match dst with
+      | For_val | For_set _ =>
+          do (sl1, a1) <- transl_expr For_val r1;
+          ret (finish dst sl1 (Ecast a1 ty))
+      | For_effects =>
+          transl_expr For_effects r1
+      end
   | Csyntax.Eseqand r1 r2 ty =>
       do (sl1, a1) <- transl_expr For_val r1;
       match dst with
diff --git a/cfrontend/SimplExprproof.v b/cfrontend/SimplExprproof.v
index ee1df409..9a3f32ec 100644
--- a/cfrontend/SimplExprproof.v
+++ b/cfrontend/SimplExprproof.v
@@ -145,18 +145,18 @@ Proof.
   assert (A: forall dst a, dst = For_val \/ dst = For_effects -> final dst a = nil).
     intros. destruct H; subst dst; auto.
   apply tr_expr_exprlist; intros; simpl in *; try discriminate; auto.
-  rewrite H0; auto. simpl; auto.
-  rewrite H0; auto. simpl; auto.
-  destruct H1; congruence.
-  destruct (andb_prop _ _ H6). inv H1.
+- rewrite H0; auto. simpl; auto.
+- rewrite H0; auto. simpl; auto.
+- destruct H1; congruence.
+- destruct (andb_prop _ _ H6). inv H1.
     rewrite H0; eauto. simpl; auto.
     unfold chunk_for_volatile_type in H9.
     destruct (type_is_volatile (Csyntax.typeof e1)); simpl in H8; congruence.
-  rewrite H0; auto. simpl; auto.
-  rewrite H0; auto. simpl; auto.
-  destruct (andb_prop _ _ H7). rewrite H0; auto. rewrite H2; auto. simpl; auto.
-  rewrite H0; auto. simpl; auto.
-  destruct (andb_prop _ _ H6). rewrite H0; auto.
+- rewrite H0; auto. simpl; auto.
+- rewrite H0; auto. simpl; auto.
+- destruct (andb_prop _ _ H7). rewrite H0; auto. rewrite H2; auto. simpl; auto.
+- rewrite H0; auto. simpl; auto.
+- destruct (andb_prop _ _ H6). rewrite H0; auto.
 Qed.
 
 Lemma tr_simple_expr_nil:
@@ -234,11 +234,11 @@ Proof.
 Opaque makeif.
   intros e m.
   apply (eval_simple_rvalue_lvalue_ind ge e m); intros until tmps; intros TR; inv TR.
-(* value *)
+- (* value *)
   auto.
-  auto.
-  exists a0; auto.
-(* rvalof *)
+- auto.
+- exists a0; auto.
+- (* rvalof *)
   inv H7; try congruence.
   exploit H0; eauto. intros [A [B C]].
   subst sl1; simpl.
@@ -248,53 +248,55 @@ Opaque makeif.
     exploit deref_loc_translated; eauto. unfold chunk_for_volatile_type; rewrite H2. tauto.
   destruct dst; auto.
   econstructor. split. simpl; eauto. auto.
-(* addrof *)
+- (* addrof *)
   exploit H0; eauto. intros [A [B C]].
   subst sl1; simpl.
   assert (eval_expr tge e le m (Eaddrof' a1 ty) (Vptr b ofs)) by (apply eval_Eaddrof'; auto).
   assert (typeof (Eaddrof' a1 ty) = ty) by (apply typeof_Eaddrof').
   destruct dst; auto. simpl; econstructor; eauto.  
-(* unop *)
+- (* unop *)
   exploit H0; eauto. intros [A [B C]].
   subst sl1; simpl.
   assert (eval_expr tge e le m (Eunop op a1 ty) v). econstructor; eauto. congruence.
   destruct dst; auto. simpl; econstructor; eauto.
-(* binop *)
+- (* binop *)
   exploit H0; eauto. intros [A [B C]].
   exploit H2; eauto. intros [D [E F]].
   subst sl1 sl2; simpl.
   assert (eval_expr tge e le m (Ebinop op a1 a2 ty) v). econstructor; eauto. rewrite comp_env_preserved; congruence.
   destruct dst; auto. simpl; econstructor; eauto.
-(* cast *)
+- (* cast effects *)
+  exploit H0; eauto.
+- (* cast val *)
   exploit H0; eauto. intros [A [B C]].
   subst sl1; simpl.
   assert (eval_expr tge e le m (Ecast a1 ty) v). econstructor; eauto. congruence.
   destruct dst; auto. simpl; econstructor; eauto.
-(* sizeof *)
+- (* sizeof *)
   rewrite <- comp_env_preserved.
   destruct dst.
   split; auto. split; auto. constructor.
   auto.
   exists (Esizeof ty1 ty). split. auto. split. auto. constructor.
-(* alignof *)
+- (* alignof *)
   rewrite <- comp_env_preserved.
   destruct dst.
   split; auto. split; auto. constructor.
   auto.
   exists (Ealignof ty1 ty). split. auto. split. auto. constructor.
-(* var local *)
+- (* var local *)
   split; auto. split; auto. apply eval_Evar_local; auto.
-(* var global *)
+- (* var global *)
   split; auto. split; auto. apply eval_Evar_global; auto.
     rewrite symbols_preserved; auto.
-(* deref *)
+- (* deref *)
   exploit H0; eauto. intros [A [B C]]. subst sl1.
   split; auto. split. rewrite typeof_Ederef'; auto. apply eval_Ederef'; auto. 
-(* field struct *)
+- (* field struct *)
   rewrite <- comp_env_preserved in *.
   exploit H0; eauto. intros [A [B C]]. subst sl1.
   split; auto. split; auto. rewrite B in H1. eapply eval_Efield_struct; eauto.
-(* field union *)
+- (* field union *)
   rewrite <- comp_env_preserved in *.
   exploit H0; eauto. intros [A [B C]]. subst sl1.
   split; auto. split; auto. rewrite B in H1. eapply eval_Efield_union; eauto.
@@ -408,43 +410,43 @@ Ltac UNCHANGED :=
   (*generalize compat_dest_change; intro CDC.*)
   apply leftcontext_leftcontextlist_ind; intros.
 
-(* base *)
+- (* base *)
   TR. rewrite <- app_nil_end; auto. red; auto.
   intros. rewrite <- app_nil_end; auto.
-(* deref *)
+- (* deref *)
   inv H1.
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1; rewrite app_ass; eauto. auto.
   intros. rewrite <- app_ass. econstructor; eauto.
-(* field *)
+- (* field *)
   inv H1.
   exploit H0. eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1; rewrite app_ass; eauto. auto.
   intros. rewrite <- app_ass. econstructor; eauto.
-(* rvalof *)
+- (* rvalof *)
   inv H1.
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1; rewrite app_ass; eauto. red; eauto.
   intros. rewrite <- app_ass; econstructor; eauto.
   exploit typeof_context; eauto. congruence.
-(* addrof *)
+- (* addrof *)
   inv H1.
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1; rewrite app_ass; eauto. auto.
   intros. rewrite <- app_ass. econstructor; eauto.
-(* unop *)
+- (* unop *)
   inv H1.
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1; rewrite app_ass; eauto. auto.
   intros. rewrite <- app_ass. econstructor; eauto.
-(* binop left *)
+- (* binop left *)
   inv H1.
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1. rewrite app_ass. eauto.
   red; auto.
   intros. rewrite <- app_ass. econstructor; eauto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
-(* binop right *)
+- (* binop right *)
   inv H2.
   assert (sl1 = nil) by (eapply tr_simple_expr_nil; eauto). subst sl1; simpl.
   exploit H1; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
@@ -452,14 +454,19 @@ Ltac UNCHANGED :=
   red; auto.
   intros. rewrite <- app_ass. change (sl3 ++ sl2') with (nil ++ sl3 ++ sl2'). rewrite app_ass. econstructor; eauto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
-(* cast *)
+- (* cast *)
   inv H1.
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
-  TR. subst sl1; rewrite app_ass; eauto. auto.
+  TR. eauto. auto. 
+  intros. econstructor; eauto.
++ (* generic *)
+  exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
+  TR. subst sl1. rewrite app_ass. eauto. auto. 
   intros. rewrite <- app_ass. econstructor; eauto.
-(* seqand *)
+- (* seqand *)
   inv H1.
-  (* for val *)
++ (* for val *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR.
   rewrite Q. rewrite app_ass. eauto.
@@ -467,15 +474,15 @@ Ltac UNCHANGED :=
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   auto. auto. auto. auto.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR.
   rewrite Q. rewrite app_ass. eauto.
   red; auto.
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
-  auto. auto. auto. auto.
-  (* for set *)
+  auto. auto. auto.
++ (* for set *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR.
   rewrite Q. rewrite app_ass. eauto.
@@ -483,9 +490,9 @@ Ltac UNCHANGED :=
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   auto. auto. auto. auto.
-(* seqor *)
+- (* seqor *)
   inv H1.
-  (* for val *)
++ (* for val *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR.
   rewrite Q. rewrite app_ass. eauto.
@@ -493,15 +500,15 @@ Ltac UNCHANGED :=
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   auto. auto. auto. auto.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR.
   rewrite Q. rewrite app_ass. eauto.
   red; auto.
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
-  auto. auto. auto. auto.
-  (* for set *)
+  auto. auto. auto.
++ (* for set *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR.
   rewrite Q. rewrite app_ass. eauto.
@@ -509,9 +516,9 @@ Ltac UNCHANGED :=
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   auto. auto. auto. auto.
-(* condition *)
+- (* condition *)
   inv H1.
-  (* for val *)
++ (* for val *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR.
   rewrite Q. rewrite app_ass. eauto.
@@ -520,7 +527,7 @@ Ltac UNCHANGED :=
   eapply tr_expr_invariant; eauto. UNCHANGED.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   auto. auto. auto. auto. auto. auto.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR.
   rewrite Q. rewrite app_ass. eauto.
@@ -529,7 +536,7 @@ Ltac UNCHANGED :=
   eapply tr_expr_invariant; eauto. UNCHANGED.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   auto. auto. auto. auto. auto.
-  (* for set *)
++ (* for set *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR.
   rewrite Q. rewrite app_ass. eauto.
@@ -538,16 +545,16 @@ Ltac UNCHANGED :=
   eapply tr_expr_invariant; eauto. UNCHANGED.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   auto. auto. auto. auto. auto. auto.
-(* assign left *)
+- (* assign left *)
   inv H1.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1. rewrite app_ass. eauto.
   red; auto.
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   auto. auto. auto.
-  (* for val *)
++ (* for val *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1. rewrite app_ass. eauto.
   red; auto.
@@ -556,9 +563,9 @@ Ltac UNCHANGED :=
   auto. auto. auto. auto. auto. auto.
   eapply typeof_context; eauto.
   auto.
-(* assign right *)
+- (* assign right *)
   inv H2.
-  (* for effects *)
++ (* for effects *)
   assert (sl1 = nil) by (eapply tr_simple_expr_nil; eauto). subst sl1; simpl.
   exploit H1; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl2. rewrite app_ass. eauto.
@@ -567,7 +574,7 @@ Ltac UNCHANGED :=
   econstructor.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   apply S; auto. auto. auto. auto.
-  (* for val *)
++ (* for val *)
   assert (sl1 = nil) by (eapply tr_simple_expr_nil; eauto). subst sl1; simpl.
   exploit H1; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl2. rewrite app_ass. eauto.
@@ -577,9 +584,9 @@ Ltac UNCHANGED :=
   eapply tr_expr_invariant; eauto. UNCHANGED.
   apply S; auto. auto. auto. auto. auto. auto. auto. auto.
   eapply typeof_context; eauto.
-(* assignop left *)
+- (* assignop left *)
   inv H1.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1. rewrite app_ass. eauto.
   red; auto.
@@ -587,7 +594,7 @@ Ltac UNCHANGED :=
   eapply tr_expr_invariant; eauto. UNCHANGED.
   symmetry; eapply typeof_context; eauto. eauto.
   auto. auto. auto. auto. auto. auto.
-  (* for val *)
++ (* for val *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1. rewrite app_ass. eauto.
   red; auto.
@@ -595,9 +602,9 @@ Ltac UNCHANGED :=
   eapply tr_expr_invariant; eauto. UNCHANGED.
   eauto. auto. auto. auto. auto. auto. auto. auto. auto. auto. auto.
   eapply typeof_context; eauto.
-(* assignop right *)
+- (* assignop right *)
   inv H2.
-  (* for effects *)
++ (* for effects *)
   assert (sl1 = nil) by (eapply tr_simple_expr_nil; eauto). subst sl1; simpl.
   exploit H1; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl2. rewrite app_ass. eauto.
@@ -605,7 +612,7 @@ Ltac UNCHANGED :=
   intros. rewrite <- app_ass. change (sl0 ++ sl2') with (nil ++ sl0 ++ sl2'). rewrite app_ass. econstructor.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   apply S; auto. auto. eauto. auto. auto. auto. auto. auto. auto.
-  (* for val *)
++ (* for val *)
   assert (sl1 = nil) by (eapply tr_simple_expr_nil; eauto). subst sl1; simpl.
   exploit H1; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl2. rewrite app_ass. eauto.
@@ -613,35 +620,35 @@ Ltac UNCHANGED :=
   intros. rewrite <- app_ass. change (sl0 ++ sl2') with (nil ++ sl0 ++ sl2'). rewrite app_ass. econstructor.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   apply S; auto. eauto. auto. auto. auto. auto. auto. auto. auto. auto. auto. auto. auto.
-(* postincr *)
+- (* postincr *)
   inv H1.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q; rewrite app_ass; eauto. red; auto.
   intros. rewrite <- app_ass. econstructor; eauto.
   symmetry; eapply typeof_context; eauto.
-  (* for val *)
++ (* for val *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q; rewrite app_ass; eauto. red; auto.
   intros. rewrite <- app_ass. econstructor; eauto.
   eapply typeof_context; eauto.
-(* call left *)
+- (* call left *)
   inv H1.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q; rewrite app_ass; eauto. red; auto.
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_exprlist_invariant; eauto. UNCHANGED.
   auto. auto. auto.
-  (* for val *)
++ (* for val *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q; rewrite app_ass; eauto. red; auto.
   intros. rewrite <- app_ass. econstructor. auto. apply S; auto.
   eapply tr_exprlist_invariant; eauto. UNCHANGED.
   auto. auto. auto. auto.
-(* call right *)
+- (* call right *)
   inv H2.
-  (* for effects *)
++ (* for effects *)
   assert (sl1 = nil) by (eapply tr_simple_expr_nil; eauto). subst sl1; simpl.
   exploit H1; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q; rewrite app_ass; eauto.
@@ -650,7 +657,7 @@ Ltac UNCHANGED :=
   intros. rewrite <- app_ass. change (sl3++sl2') with (nil ++ sl3 ++ sl2'). rewrite app_ass. econstructor.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   apply S; auto. auto. auto. auto.
-  (* for val *)
++ (* for val *)
   assert (sl1 = nil) by (eapply tr_simple_expr_nil; eauto). subst sl1; simpl.
   exploit H1; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q; rewrite app_ass; eauto.
@@ -660,42 +667,42 @@ Ltac UNCHANGED :=
   auto. eapply tr_expr_invariant; eauto. UNCHANGED.
   apply S; auto.
   auto. auto. auto. auto.
-(* builtin *)
+- (* builtin *)
   inv H1.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q; rewrite app_ass; eauto.
   red; auto.
   intros. rewrite <- app_ass. change (sl3++sl2') with (nil ++ sl3 ++ sl2'). rewrite app_ass. econstructor.
   apply S; auto. auto.
-  (* for val *)
++ (* for val *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q; rewrite app_ass; eauto.
   red; auto.
   intros. rewrite <- app_ass. change (sl3++sl2') with (nil ++ sl3 ++ sl2'). rewrite app_ass. econstructor.
   auto. apply S; auto. auto. auto.
-(* comma *)
+- (* comma *)
   inv H1.
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q; rewrite app_ass; eauto. red; auto.
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_expr_invariant; eauto. UNCHANGED.
   auto. auto. auto.
-(* paren *)
+- (* paren *)
   inv H1.
-  (* for val *)
++ (* for val *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q. eauto. red; auto.
   intros. econstructor; eauto.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q. eauto. auto.
   intros. econstructor; eauto.
-  (* for set *)
++ (* for set *)
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. rewrite Q. eauto. auto.
   intros. econstructor; eauto.
-(* cons left *)
+- (* cons left *)
   inv H1.
   exploit H0; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
   TR. subst sl1. rewrite app_ass. eauto.
@@ -703,7 +710,7 @@ Ltac UNCHANGED :=
   intros. rewrite <- app_ass. econstructor. apply S; auto.
   eapply tr_exprlist_invariant; eauto.  UNCHANGED.
   auto. auto. auto.
-(* cons right *)
+- (* cons right *)
   inv H2.
   assert (sl1 = nil) by (eapply tr_simple_expr_nil; eauto). subst sl1; simpl.
   exploit H1; eauto. intros [dst' [sl1' [sl2' [a' [tmp' [P [Q [R S]]]]]]]].
diff --git a/cfrontend/SimplExprspec.v b/cfrontend/SimplExprspec.v
index 95e3957c..98425311 100644
--- a/cfrontend/SimplExprspec.v
+++ b/cfrontend/SimplExprspec.v
@@ -108,7 +108,12 @@ Inductive tr_expr: temp_env -> destination -> Csyntax.expr -> list statement ->
       tr_expr le dst (Csyntax.Ebinop op e1 e2 ty)
                    (sl1 ++ sl2 ++ final dst (Ebinop op a1 a2 ty))
                    (Ebinop op a1 a2 ty) tmp
-  | tr_cast: forall le dst e1 ty sl1 a1 tmp,
+  | tr_cast_effects: forall le e1 ty sl1 a1 any tmp,
+      tr_expr le For_effects e1 sl1 a1 tmp ->
+      tr_expr le For_effects (Csyntax.Ecast e1 ty)
+                   sl1
+                   any tmp
+  | tr_cast_val: forall le dst e1 ty sl1 a1 tmp,
       tr_expr le For_val e1 sl1 a1 tmp ->
       tr_expr le dst (Csyntax.Ecast e1 ty)
                    (sl1 ++ final dst (Ecast a1 ty))
@@ -767,58 +772,69 @@ Lemma transl_meets_spec:
     exists tmps, (forall le, tr_exprlist le rl sl al tmps) /\ contained tmps g g').
 Proof.
   apply expr_exprlist_ind; simpl add_dest; intros.
-(* val *)
+- (* val *)
   simpl in H. destruct v; monadInv H; exists (@nil ident); split; auto with gensym.
 Opaque makeif.
-- intros. destruct dst; simpl in *; inv H2.
++ intros. destruct dst; simpl in *; inv H2.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
-- intros. destruct dst; simpl in *; inv H2.
++ intros. destruct dst; simpl in *; inv H2.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
-- intros. destruct dst; simpl in *; inv H2.
++ intros. destruct dst; simpl in *; inv H2.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
-- intros. destruct dst; simpl in *; inv H2.
++ intros. destruct dst; simpl in *; inv H2.
     constructor. auto. intros; constructor.
     constructor.
     constructor. auto. intros; constructor.
-(* var *)
-- monadInv H; econstructor; split; auto with gensym. UseFinish. constructor.
-(* field *)
-- monadInv H0. exploit H; eauto. auto. intros [tmp [A B]]. UseFinish.
+- (* var *)
+  monadInv H; econstructor; split; auto with gensym. UseFinish. constructor.
+- (* field *)
+  monadInv H0. exploit H; eauto. auto. intros [tmp [A B]]. UseFinish.
   econstructor; split; eauto. intros; apply tr_expr_add_dest. constructor; auto.
-(* valof *)
-- monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
+- (* valof *)
+  monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
   exploit transl_valof_meets_spec; eauto. intros [tmp2 [Csyntax D]]. UseFinish.
   exists (tmp1 ++ tmp2); split.
   intros; apply tr_expr_add_dest. econstructor; eauto with gensym.
   eauto with gensym.
-(* deref *)
-- monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
+- (* deref *)
+  monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
   econstructor; split; eauto. intros; apply tr_expr_add_dest. constructor; auto.
-(* addrof *)
-- monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
+- (* addrof *)
+  monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
   econstructor; split; eauto. intros; apply tr_expr_add_dest. econstructor; eauto.
-(* unop *)
-- monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
+- (* unop *)
+  monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
   econstructor; split; eauto. intros; apply tr_expr_add_dest. constructor; auto.
-(* binop *)
-- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- (* binop *)
+  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]]. UseFinish.
   exists (tmp1 ++ tmp2); split.
   intros; apply tr_expr_add_dest. econstructor; eauto with gensym.
   eauto with gensym.
-(* cast *)
-- monadInv H0. exploit H; eauto. intros [tmp [A B]]. UseFinish.
-  econstructor; split; eauto. intros; apply tr_expr_add_dest. constructor; auto.
-(* seqand *)
-- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- (* cast *)
+  destruct dst.
++ (* for value *)
+  monadInv H0. exploit H; eauto. intros [tmp [A B]].
+  econstructor; split; eauto. intros; apply tr_expr_add_dest.
+  rewrite (app_nil_end sl).
+  apply tr_cast_val with (dst := For_val); auto.
++ (* for effects *)
+  exploit H; eauto. intros [tmp [A B]].
+  econstructor; split; eauto. intros; eapply tr_cast_effects; eauto.
++ (* for set *)
+  monadInv H0. exploit H; eauto. intros [tmp [A B]].
+  econstructor; split; eauto. intros; apply tr_expr_add_dest.
+  apply tr_cast_val with (dst := For_set sd); auto.
+- (* seqand *)
+  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0.
-  (* for value *)
++ (* for value *)
   exploit H0; eauto with gensym. intros [tmp2 [C D]].
   simpl add_dest in *.
   exists (x0 :: tmp1 ++ tmp2); split.
@@ -826,23 +842,23 @@ Opaque makeif.
   apply list_disjoint_cons_r; eauto with gensym.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto with gensym. intros [tmp2 [Csyntax D]].
   simpl add_dest in *.
   exists (tmp1 ++ tmp2); split.
   intros; eapply tr_seqand_effects; eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for set *)
++ (* for set *)
   exploit H0; eauto with gensym. intros [tmp2 [C D]].
   simpl add_dest in *.
   exists (tmp1 ++ tmp2); split.
   intros; eapply tr_seqand_set; eauto with gensym.
   apply list_disjoint_cons_r; eauto with gensym.
   apply contained_app; eauto with gensym.
-(* seqor *)
-- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- (* seqor *)
+  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0.
-  (* for value *)
++ (* for value *)
   exploit H0; eauto with gensym. intros [tmp2 [Csyntax D]].
   simpl add_dest in *.
   exists (x0 :: tmp1 ++ tmp2); split.
@@ -850,23 +866,23 @@ Opaque makeif.
   apply list_disjoint_cons_r; eauto with gensym.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto with gensym. intros [tmp2 [C D]].
   simpl add_dest in *.
   exists (tmp1 ++ tmp2); split.
   intros; eapply tr_seqor_effects; eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for set *)
++ (* for set *)
   exploit H0; eauto with gensym. intros [tmp2 [C D]].
   simpl add_dest in *.
   exists (tmp1 ++ tmp2); split.
   intros; eapply tr_seqor_set; eauto with gensym.
   apply list_disjoint_cons_r; eauto with gensym.
   apply contained_app; eauto with gensym.
-(* condition *)
-- monadInv H2. exploit H; eauto. intros [tmp1 [A B]].
+- (* condition *)
+  monadInv H2. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0.
-  (* for value *)
++ (* for value *)
   exploit H0; eauto with gensym. intros [tmp2 [C D]].
   exploit H1; eauto with gensym. intros [tmp3 [E F]].
   simpl add_dest in *.
@@ -877,14 +893,14 @@ Opaque makeif.
   apply contained_cons. eauto with gensym.
   apply contained_app. eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for effects *)
++ (* for effects *)
   exploit H0; eauto. intros [tmp2 [Csyntax D]].
   exploit H1; eauto. intros [tmp3 [E F]].
   simpl add_dest in *.
   exists (tmp1 ++ tmp2 ++ tmp3); split.
   intros; eapply tr_condition_effects; eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for test *)
++ (* for test *)
   exploit H0; eauto with gensym. intros [tmp2 [C D]].
   exploit H1; eauto 10 with gensym. intros [tmp3 [E F]].
   simpl add_dest in *.
@@ -895,70 +911,70 @@ Opaque makeif.
   apply contained_cons; eauto with gensym.
   apply contained_app; eauto with gensym.
   apply contained_app; eauto with gensym.
-(* sizeof *)
-- monadInv H. UseFinish.
+- (* sizeof *)
+  monadInv H. UseFinish.
   exists (@nil ident); split; auto with gensym. constructor.
-(* alignof *)
-- monadInv H. UseFinish.
+- (* alignof *)
+  monadInv H. UseFinish.
   exists (@nil ident); split; auto with gensym. constructor.
-(* assign *)
-- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- (* assign *)
+  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]].
   destruct dst; monadInv EQ2; simpl add_dest in *.
-  (* for value *)
++ (* for value *)
   exists (x1 :: tmp1 ++ tmp2); split.
   intros. eapply tr_assign_val with (dst := For_val); eauto with gensym.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for effects *)
++ (* for effects *)
   exists (tmp1 ++ tmp2); split.
   econstructor; eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for set *)
++ (* for set *)
   exists (x1 :: tmp1 ++ tmp2); split.
   repeat rewrite app_ass. simpl.
   intros. eapply tr_assign_val with (dst := For_set sd); eauto with gensym.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
-(* assignop *)
-- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- (* assignop *)
+  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]].
   exploit transl_valof_meets_spec; eauto. intros [tmp3 [E F]].
   destruct dst; monadInv EQ3; simpl add_dest in *.
-  (* for value *)
++ (* for value *)
   exists (x2 :: tmp1 ++ tmp2 ++ tmp3); split.
   intros. eapply tr_assignop_val with (dst := For_val); eauto with gensym.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for effects *)
++ (* for effects *)
   exists (tmp1 ++ tmp2 ++ tmp3); split.
   econstructor; eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for set *)
++ (* for set *)
   exists (x2 :: tmp1 ++ tmp2 ++ tmp3); split.
   repeat rewrite app_ass. simpl.
   intros. eapply tr_assignop_val with (dst := For_set sd); eauto with gensym.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
-(* postincr *)
-- monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
+- (* postincr *)
+  monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0; simpl add_dest in *.
-  (* for value *)
++ (* for value *)
   exists (x0 :: tmp1); split.
   econstructor; eauto with gensym.
   apply contained_cons; eauto with gensym.
-  (* for effects *)
++ (* for effects *)
   exploit transl_valof_meets_spec; eauto. intros [tmp2 [Csyntax D]].
   exists (tmp1 ++ tmp2); split.
   econstructor; eauto with gensym.
   eauto with gensym.
-  (* for set *)
++ (* for set *)
   repeat rewrite app_ass; simpl.
   exists (x0 :: tmp1); split.
   econstructor; eauto with gensym.
   apply contained_cons; eauto with gensym.
-(* comma *)
-- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- (* comma *)
+  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto with gensym. intros [tmp2 [Csyntax D]].
   exists (tmp1 ++ tmp2); split.
   econstructor; eauto with gensym.
@@ -967,47 +983,47 @@ Opaque makeif.
   simpl. eapply incl_tran. 2: apply add_dest_incl. auto with gensym.
   destruct dst; simpl; auto with gensym.
   apply contained_app; eauto with gensym.
-(* call *)
-- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- (* call *)
+  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]].
   destruct dst; monadInv EQ2; simpl add_dest in *.
-  (* for value *)
++ (* for value *)
   exists (x1 :: tmp1 ++ tmp2); split.
   econstructor; eauto with gensym. congruence.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for effects *)
++ (* for effects *)
   exists (tmp1 ++ tmp2); split.
   econstructor; eauto with gensym.
   apply contained_app; eauto with gensym.
-  (* for set *)
++ (* for set *)
   exists (x1 :: tmp1 ++ tmp2); split.
   repeat rewrite app_ass. econstructor; eauto with gensym. congruence.
   apply contained_cons. eauto with gensym.
   apply contained_app; eauto with gensym.
-(* builtin *)
-- monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
+- (* builtin *)
+  monadInv H0. exploit H; eauto. intros [tmp1 [A B]].
   destruct dst; monadInv EQ0; simpl add_dest in *.
-  (* for value *)
++ (* for value *)
   exists (x0 :: tmp1); split.
   econstructor; eauto with gensym. congruence.
   apply contained_cons; eauto with gensym.
-  (* for effects *)
++ (* for effects *)
   exists tmp1; split.
   econstructor; eauto with gensym.
   auto.
-  (* for set *)
++ (* for set *)
   exists (x0 :: tmp1); split.
   repeat rewrite app_ass. econstructor; eauto with gensym. congruence.
   apply contained_cons; eauto with gensym.
-(* loc *)
-- monadInv H.
-(* paren *)
-- monadInv H0.
-(* nil *)
-- monadInv H; exists (@nil ident); split; auto with gensym. constructor.
-(* cons *)
-- monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
+- (* loc *)
+  monadInv H.
+- (* paren *)
+  monadInv H0.
+- (* nil *)
+  monadInv H; exists (@nil ident); split; auto with gensym. constructor.
+- (* cons *)
+  monadInv H1. exploit H; eauto. intros [tmp1 [A B]].
   exploit H0; eauto. intros [tmp2 [Csyntax D]].
   exists (tmp1 ++ tmp2); split.
   econstructor; eauto with gensym.
diff --git a/common/AST.v b/common/AST.v
index 268e13d5..979db4b9 100644
--- a/common/AST.v
+++ b/common/AST.v
@@ -105,7 +105,7 @@ Lemma rettype_eq: forall (t1 t2: rettype), {t1=t2} + {t1<>t2}.
 Proof. generalize typ_eq; decide equality. Defined.
 Global Opaque rettype_eq.
 
-Fixpoint proj_rettype (r: rettype) : typ :=
+Definition proj_rettype (r: rettype) : typ :=
   match r with
   | Tret t => t
   | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => Tint
diff --git a/common/Builtins0.v b/common/Builtins0.v
index 8da98314..d84c9112 100644
--- a/common/Builtins0.v
+++ b/common/Builtins0.v
@@ -332,6 +332,7 @@ End LOOKUP.
 Inductive standard_builtin : Type :=
   | BI_select (t: typ)
   | BI_fabs
+  | BI_fabsf
   | BI_fsqrt
   | BI_negl
   | BI_addl
@@ -364,7 +365,9 @@ Definition standard_builtin_table : list (string * standard_builtin) :=
  :: ("__builtin_sel", BI_select Tfloat)
  :: ("__builtin_sel", BI_select Tsingle)
  :: ("__builtin_fabs", BI_fabs)
+ :: ("__builtin_fabsf", BI_fabsf)
  :: ("__builtin_fsqrt", BI_fsqrt)
+ :: ("__builtin_sqrt", BI_fsqrt)
  :: ("__builtin_negl", BI_negl)
  :: ("__builtin_addl", BI_addl)
  :: ("__builtin_subl", BI_subl)
@@ -396,6 +399,8 @@ Definition standard_builtin_sig (b: standard_builtin) : signature :=
       mksignature (Tint :: t :: t :: nil) t cc_default
   | BI_fabs | BI_fsqrt =>
       mksignature (Tfloat :: nil) Tfloat cc_default
+  | BI_fabsf =>
+      mksignature (Tsingle :: nil) Tsingle cc_default
   | BI_negl =>
       mksignature (Tlong :: nil) Tlong cc_default
   | BI_addl | BI_subl | BI_i64_umulh| BI_i64_smulh 
@@ -428,6 +433,7 @@ Program Definition standard_builtin_sem (b: standard_builtin) : builtin_sem (sig
           | _ => None
         end) _ _
   | BI_fabs => mkbuiltin_n1t Tfloat Tfloat Float.abs
+  | BI_fabsf => mkbuiltin_n1t Tsingle Tsingle Float32.abs
   | BI_fsqrt => mkbuiltin_n1t Tfloat Tfloat Float.sqrt
   | BI_negl => mkbuiltin_n1t Tlong Tlong Int64.neg
   | BI_addl => mkbuiltin_v2t Tlong Val.addl _ _ 
diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml
new file mode 100644
index 00000000..6f8449ee
--- /dev/null
+++ b/common/DebugPrint.ml
@@ -0,0 +1,146 @@
+open Maps
+open Camlcoq
+open Registers
+
+let debug_flag = ref false
+
+let debug fmt =
+  if !debug_flag then (flush stderr; flush stdout; Printf.eprintf fmt)
+  else Printf.ifprintf stderr fmt
+
+let print_ptree_bool oc pt =
+  if !debug_flag then
+    let elements = PTree.elements pt in
+    begin
+      Printf.fprintf oc "[";
+      List.iter (fun (n, b) ->
+        if b then Printf.fprintf oc "%d, " (P.to_int n)
+      ) elements;
+      Printf.fprintf oc "]\n"
+    end
+  else ()
+
+let print_ptree_opint oc pt =
+  if !debug_flag then
+    let elements = PTree.elements pt in
+    begin
+      Printf.fprintf oc "[";
+      List.iter (fun (n, op) ->
+        match op with
+        | None -> ()
+        | Some p -> Printf.fprintf oc "%d -> %d, " (P.to_int n) (P.to_int p)
+      ) elements;
+      Printf.fprintf oc "]\n"
+    end
+  else ()
+
+let print_intlist oc l =
+  let rec f oc = function
+  | [] -> ()
+  | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln)
+  in begin
+    if !debug_flag then begin
+      Printf.fprintf oc "[%a]" f l
+    end
+  end
+
+let print_ptree_oplist oc pt =
+  if !debug_flag then
+    let elements = PTree.elements pt in
+    begin
+      Printf.fprintf oc "[";
+      List.iter (fun (n, ol) ->
+        match ol with
+        | None -> ()
+        | Some l -> Printf.fprintf oc "%d -> %a,\n" (P.to_int n) print_intlist l
+      ) elements;
+      Printf.fprintf oc "]\n"
+    end
+  else ()
+
+(* Adapted from backend/PrintRTL.ml: print_function *)
+let print_code code = let open PrintRTL in let open Printf in
+  if (!debug_flag) then begin
+    fprintf stdout "{\n";
+    let instrs =
+      List.sort
+        (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
+        (List.rev_map
+          (fun (pc, i) -> (P.to_int pc, i))
+          (PTree.elements code)) in
+    List.iter (print_instruction stdout) instrs;
+    fprintf stdout "}"
+  end
+
+let ptree_printbool pt =
+  let elements = PTree.elements pt
+  in begin
+    if !debug_flag then begin
+      Printf.printf "[";
+      List.iter (fun (n, b) ->
+        if b then Printf.printf "%d, " (P.to_int n) else ()
+      ) elements;
+      Printf.printf "]"
+    end
+  end
+
+let print_ptree printer pt =
+  let elements = PTree.elements pt in
+  begin
+    debug "[\n";
+    List.iter (fun (n, elt) ->
+      debug "\t%d: %a\n" (P.to_int n) printer elt
+    ) elements;
+    debug "]\n"
+  end
+
+let print_option_pint oc o =
+  if !debug_flag then
+    match o with
+    | None -> Printf.fprintf oc "None"
+    | Some n -> Printf.fprintf oc "Some %d" (P.to_int n)
+
+let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else ()
+
+let print_regset rs = begin
+  debug "[";
+  List.iter (fun n -> debug "%d " (P.to_int n)) (Regset.elements rs);
+  debug "]"
+end
+
+let print_ptree_regset pt = begin
+  debug "[";
+  List.iter (fun (n, rs) ->
+    debug "\n\t";
+    debug "%d: " (P.to_int n);
+    print_regset rs
+  ) (PTree.elements pt);
+  debug "]"
+end
+
+let print_true_nodes booltree = begin
+  debug "[";
+  List.iter (fun (n,b) ->
+    if b then debug "%d " (P.to_int n)
+  ) (PTree.elements booltree);
+  debug "]";
+end
+
+
+let print_instructions insts code =
+  let get_some = function
+  | None -> failwith "Did not get some"
+  | Some thing -> thing
+  in if (!debug_flag) then begin
+    debug "[ ";
+    List.iter (
+      fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code))
+    ) insts; debug "]"
+  end
+
+let print_arrayp arr = begin
+  debug "[| ";
+  Array.iter (fun n -> debug "%d, " (P.to_int n)) arr;
+  debug "|]"
+end
+
diff --git a/common/Memory.v b/common/Memory.v
index cd8a2001..65f36966 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -1322,6 +1322,18 @@ Proof.
   eapply load_store_same.
   eassumption.
 Qed.
+
+Theorem storev_preserv_valid (b : block) (ofs: Z): valid_pointer m1 b ofs = valid_pointer m2 b ofs.
+Proof.
+  unfold storev in STORE.
+  cut (valid_pointer m1 b ofs = true <-> valid_pointer m2 b ofs = true).
+  { destruct (valid_pointer _ _ _), (valid_pointer _ _ _); intuition congruence. }
+  destruct addr; try congruence.
+  rewrite! valid_pointer_valid_access. split.
+  - intros; eapply store_valid_access_1; eauto.
+  - intros; eapply store_valid_access_2; eauto.
+Qed.
+
 End STOREV.
 
 Lemma load_store_overlap:
diff --git a/common/Values.v b/common/Values.v
index 6401ba52..5d32e54e 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -89,6 +89,27 @@ Definition has_type (v: val) (t: typ) : Prop :=
   | _, _ => False
   end.
 
+Definition has_type_b (v: val) (t: typ) :=
+  match v, t with
+  | Vundef, _ => true
+  | Vint _, Tint => true
+  | Vlong _, Tlong => true
+  | Vfloat _, Tfloat => true
+  | Vsingle _, Tsingle => true
+  | Vptr _ _, Tint => negb Archi.ptr64
+  | Vptr _ _, Tlong => Archi.ptr64
+  | (Vint _ | Vsingle _), Tany32 => true
+  | Vptr _ _, Tany32 => negb Archi.ptr64
+  | _, Tany64 => true
+  | _, _ => false
+  end.
+
+Lemma has_type_b_correct: forall v t,
+    has_type_b v t = true <-> has_type v t.
+Proof.
+  destruct v; destruct t; cbn; destruct Archi.ptr64; cbn; split; intros; auto; discriminate.
+Qed.
+
 Fixpoint has_type_list (vl: list val) (tl: list typ) {struct vl} : Prop :=
   match vl, tl with
   | nil, nil => True
@@ -2613,6 +2634,55 @@ Qed.
 
 End VAL_INJ_OPS.
 
+(* Specializations of cmpu_bool, cmpu, cmplu_bool, and cmplu for maximal pointer validity *)
+
+Definition mxcmpu_bool cmp v1 v2: option bool :=
+  cmpu_bool (fun _ _ => true) cmp v1 v2.
+
+Lemma mxcmpu_bool_correct vptr (cmp: comparison) (v1 v2: val) b:
+   cmpu_bool vptr cmp v1 v2 = Some b
+   -> mxcmpu_bool cmp v1 v2 = Some b.
+Proof.
+  intros; eapply cmpu_bool_lessdef; (econstructor 1 || eauto).
+Qed.
+
+Definition mxcmpu cmp v1 v2 := of_optbool (mxcmpu_bool cmp v1 v2).
+
+Lemma mxcmpu_correct vptr (cmp: comparison) (v1 v2: val):
+   lessdef (cmpu vptr cmp v1 v2) (mxcmpu cmp v1 v2).
+Proof.
+  unfold cmpu, mxcmpu.
+  remember (cmpu_bool _ cmp v1 v2) as ob.
+  destruct ob; simpl.
+  - erewrite mxcmpu_bool_correct; eauto.
+    econstructor.
+  - econstructor.
+Qed.
+
+Definition mxcmplu_bool (cmp: comparison) (v1 v2: val)
+ := (cmplu_bool (fun _ _ => true) cmp v1 v2).
+
+Lemma mxcmplu_bool_correct vptr (cmp: comparison) (v1 v2: val) b:
+   (cmplu_bool vptr cmp v1 v2) = Some b
+   -> (mxcmplu_bool cmp v1 v2) = Some b.
+Proof.
+  intros; eapply cmplu_bool_lessdef; (econstructor 1 || eauto).
+Qed.
+
+Definition mxcmplu cmp v1 v2 := of_optbool (mxcmplu_bool cmp v1 v2).
+
+Lemma mxcmplu_correct vptr (cmp: comparison) (v1 v2: val):
+   lessdef (maketotal (cmplu vptr cmp v1 v2))
+               (mxcmplu cmp v1 v2).
+Proof.
+  unfold cmplu, mxcmplu.
+  remember (cmplu_bool _ cmp v1 v2) as ob.
+  destruct ob as [b|]; simpl.
+  - erewrite mxcmplu_bool_correct; eauto.
+    simpl. econstructor.
+  - econstructor.
+Qed.
+
 End Val.
 
 Notation meminj := Val.meminj.
@@ -2706,3 +2776,24 @@ Proof.
   unfold compose_meminj; rewrite H1; rewrite H3; eauto.
   rewrite Ptrofs.add_assoc. decEq. unfold Ptrofs.add. apply Ptrofs.eqm_samerepr. auto with ints.
 Qed.
+
+
+(** Particular cases of extensionality lemma *)
+
+Lemma cmpu_bool_valid_pointer_eq vptr1 vptr2 c v1 v2:
+   (forall (b : block) (z : Z), vptr1 b z = vptr2 b z) ->
+   Val.cmpu_bool vptr1 c v1 v2 = Val.cmpu_bool vptr2 c v1 v2.
+Proof.
+  intros EQ; unfold Val.cmpu_bool; destruct v1; try congruence;
+  destruct v2; try congruence;
+  rewrite !EQ; auto.
+Qed.
+
+Lemma cmplu_bool_valid_pointer_eq  vptr1 vptr2 c v1 v2:
+   (forall (b : block) (z : Z), vptr1 b z = vptr2 b z) ->
+   Val.cmplu_bool vptr1 c v1 v2 = Val.cmplu_bool vptr2 c v1 v2.
+Proof.
+  intros EQ; unfold Val.cmplu_bool; destruct v1; try congruence;
+  destruct v2; try congruence;
+  rewrite !EQ; auto.
+Qed.
diff --git a/config_rv32.sh b/config_rv32.sh
index a5a5cf1c..654cacfa 100755
--- a/config_rv32.sh
+++ b/config_rv32.sh
@@ -1 +1 @@
-exec ./config_simple.sh rv32-linux --toolprefix riscv64-linux-gnu- "$@"
+exec ./config_simple.sh rv32-linux --toolprefix riscv64-unknown-elf- "$@"
diff --git a/configure b/configure
index d0bbd0c1..e8ebb6f8 100755
--- a/configure
+++ b/configure
@@ -27,6 +27,8 @@ clightgen=false
 install_coqdev=false
 responsefile="gnu"
 ignore_coq_version=false
+library_Flocq=local
+library_MenhirLib=local
 
 usage='Usage: ./configure [options] target
 For help on options and targets, do: ./configure -help
@@ -48,11 +50,11 @@ Supported targets:
   armeb-hardfloat      (ARM, EABI using hardware FP registers, big endian)
   x86_32-linux         (x86 32 bits, Linux)
   x86_32-bsd           (x86 32 bits, BSD)
-  x86_32-macosx        (x86 32 bits, MacOS X)
   x86_32-cygwin        (x86 32 bits, Cygwin environment under Windows)
   x86_64-linux         (x86 64 bits, Linux)
   x86_64-bsd           (x86 64 bits, BSD)
   x86_64-macosx        (x86 64 bits, MacOS X)
+  x86_64-cygwin        (x86 64 bits, Cygwin environment under Windows)
   rv32-linux           (RISC-V 32 bits, Linux)
   rv64-linux           (RISC-V 64 bits, Linux)
   kvx-mbr              (Kalray KVX, bare runtime)
@@ -87,6 +89,8 @@ Options:
   -libdir <dir>        Install libraries in <dir>
   -coqdevdir <dir>     Install Coq development (.vo files) in <dir>
   -toolprefix <pref>   Prefix names of tools ("gcc", etc) with <pref>
+  -use-external-Flocq  Use an already-installed Flocq library
+  -use-external-MenhirLib  Use an already-installed MenhirLib library
   -no-runtime-lib      Do not compile nor install the runtime support library
   -no-standard-headers Do not install nor use the standard .h headers
   -clightgen           Also compile and install the clightgen tool
@@ -127,6 +131,10 @@ while : ; do
         ignore_coq_version=true;;
     -install-coqdev|--install-coqdev|-install-coq-dev|--install-coq-dev)
         install_coqdev=true;;
+    -use-external-Flocq|--use-external-Flocq)
+        library_Flocq=external;;
+    -use-external-MenhirLib|--use-external-MenhirLib)
+        library_MenhirLib=external;;
     -help|--help)
         echo "$help"; exit 0;;
     -*)
@@ -332,29 +340,6 @@ if test "$arch" = "x86" -a "$bitsize" = "32"; then
         libmath="-lm"
         system="linux"
         ;;
-    macosx)
-        # kernel major versions count upwards from 4 for OSX 10.0 to 15 for OSX 10.11
-        kernel_major=`uname -r | cut -d "." -f 1`
-
-        abi="macosx"
-        casm="${toolprefix}gcc"
-        casm_options="-arch i386 -c"
-        cc="${toolprefix}gcc -arch i386"
-        clinker="${toolprefix}gcc"
-        clinker_needs_no_pie=false
-        cprepro="${toolprefix}gcc"
-        cprepro_options="-std=c99 -arch i386 -U__GNUC__ -U__clang__ -U__BLOCKS__ '-D__attribute__(x)=' '-D__asm(x)=' '-D_Nullable=' '-D_Nonnull=' -E"
-        libmath=""
-        system="macosx"
-
-        if [[ $kernel_major -gt 11 ]]; then
-          # OSX >= 10.8
-          clinker_options="-arch i386 -Wl,-no_pie"
-        else
-          # OSX <= 10.7
-          clinker_options="-arch i386"
-        fi
-        ;;
     *)
         echo "Error: invalid eabi/system '$target' for architecture IA32/X86_32." 1>&2
         echo "$usage" 1>&2
@@ -407,6 +392,18 @@ if test "$arch" = "x86" -a "$bitsize" = "64"; then
         libmath=""
         system="macosx"
         ;;
+    cygwin)
+        abi="standard"
+        casm="${toolprefix}gcc"
+        casm_options="-m64 -c"
+        cc="${toolprefix}gcc -m64"
+        clinker="${toolprefix}gcc"
+        clinker_options="-m64"
+        cprepro="${toolprefix}gcc"
+        cprepro_options="-std=c99 -m64 -U__GNUC__ '-D__attribute__(x)=' -E"
+        libmath="-lm"
+        system="cygwin"
+        ;;
     *)
         echo "Error: invalid eabi/system '$target' for architecture X86_64." 1>&2
         echo "$usage" 1>&2
@@ -568,24 +565,24 @@ missingtools=false
 echo "Testing Coq... " | tr -d '\n'
 coq_ver=$(${COQBIN}coqc -v 2>/dev/null | sed -n -e 's/The Coq Proof Assistant, version \([^ ]*\).*$/\1/p')
 case "$coq_ver" in
-  8.9.0|8.9.1|8.10.0|8.10.1|8.10.2|8.11.0|8.11.1|8.11.2)
+  8.8.0|8.8.1|8.8.2|8.9.0|8.9.1|8.10.0|8.10.1|8.10.2|8.11.0|8.11.1|8.11.2|8.12.0|8.12.1|8.12.2)
         echo "version $coq_ver -- good!";;
   ?*)
         echo "version $coq_ver -- UNSUPPORTED"
         if $ignore_coq_version; then
             echo "Warning: this version of Coq is unsupported, proceed at your own risks."
         else
-            echo "Error: CompCert requires one of the following Coq versions: 8.11.1, 8.11.0, 8.10.2, 8.10.1, 8.10.0, 8.9.1, 8.9.0"
+            echo "Error: CompCert requires a version of Coq between 8.8.0 and 8.12.1"
             missingtools=true
         fi;;
   "")
         echo "NOT FOUND"
-        echo "Error: make sure Coq version 8.9.1 is installed."
+        echo "Error: make sure Coq version 8.11.2 is installed."
         missingtools=true;;
 esac
 
 echo "Testing OCaml... " | tr -d '\n'
-ocaml_ver=`ocamlopt -version 2>/dev/null`
+ocaml_ver=`ocamlc -version 2>/dev/null`
 case "$ocaml_ver" in
   4.00.*|4.01.*| 4.02.*|4.03.*|4.04.*)
         echo "version $ocaml_ver -- UNSUPPORTED"
@@ -603,9 +600,19 @@ case "$ocaml_ver" in
         missingtools=true;;
 esac
 
+echo "Testing OCaml native-code compiler..." | tr -d '\n'
+ocamlopt_ver=`ocamlopt -version 2>/dev/null`
+if test "$ocamlopt_ver" = "$ocaml_ver"; then
+  echo "yes"
+  ocaml_native_comp=true
+else
+  echo "no, will build to bytecode only"
+  ocaml_native_comp=false
+fi
+
 echo "Testing OCaml .opt compilers... " | tr -d '\n'
-ocaml_opt_ver=`ocamlopt.opt -version 2>/dev/null`
-if test "$ocaml_opt_ver" = "$ocaml_ver"; then
+ocamlopt_opt_ver=`ocamlopt.opt -version 2>/dev/null`
+if test "$ocamlopt_opt_ver" = "$ocaml_ver"; then
   echo "yes"
   ocaml_opt_comp=true
 else
@@ -620,8 +627,11 @@ case "$menhir_ver" in
   20[0-9][0-9][0-9][0-9][0-9][0-9])
       if test "$menhir_ver" -ge $MENHIR_REQUIRED; then
           echo "version $menhir_ver -- good!"
-          menhir_dir=$(menhir --suggest-menhirLib | tr -d '\r' | tr '\\' '/')
-          if test -z "$menhir_dir"; then
+          menhir_dir=$(ocamlfind query menhirLib 2>/dev/null) || \
+          menhir_dir=$(menhir --suggest-menhirLib) || \
+          menhir_dir=""
+          menhir_dir=$(echo "$menhir_dir" | tr -d '\r' | tr '\\' '/')
+          if test ! -d "$menhir_dir"; then
               echo "Error: cannot determine the location of the Menhir API library."
               echo "This can be due to an incorrect Menhir package."
               echo "Consider using the OPAM package for Menhir."
@@ -660,47 +670,6 @@ if $missingtools; then
   exit 2
 fi
 
-cat > .merlin <<EOF
-S lib
-S common
-S $arch
-S backend
-S cfrontend
-S driver
-S debug
-S exportclight
-S cparser
-S extraction
-
-B lib
-B common
-B $arch
-B backend
-B cfrontend
-B driver
-B debug
-B exportclight
-B cparser
-B extraction
-
-EOF
-
-echo "-R lib compcert.lib \
--R common compcert.common \
--R ${arch} compcert.${arch} \
--R backend compcert.backend \
--R cfrontend compcert.cfrontend \
--R driver compcert.driver \
--R flocq compcert.flocq \
--R exportclight compcert.exportclight \
--R cparser compcert.cparser \
--R MenhirLib compcert.MenhirLib" > _CoqProject
-case $arch in
-    x86)
-        echo "-R x86_${bitsize} compcert.x86_${bitsize}" >> _CoqProject
-        ;;
-esac
-
 #
 # Generate Makefile.config
 #
@@ -714,6 +683,7 @@ LIBDIR=$libdir
 MANDIR=$sharedir/man
 SHAREDIR=$sharedir
 COQDEVDIR=$coqdevdir
+OCAML_NATIVE_COMP=$ocaml_native_comp
 OCAML_OPT_COMP=$ocaml_opt_comp
 MENHIR_DIR=$menhir_dir
 COMPFLAGS=-bin-annot
@@ -740,8 +710,11 @@ HAS_STANDARD_HEADERS=$has_standard_headers
 INSTALL_COQDEV=$install_coqdev
 LIBMATH=$libmath
 MODEL=$model
+OS=${os:-unspecified}
 SYSTEM=$system
 RESPONSEFILE=$responsefile
+LIBRARY_FLOCQ=$library_Flocq
+LIBRARY_MENHIRLIB=$library_MenhirLib
 EOF
 else
 cat >> Makefile.config <<'EOF'
@@ -835,12 +808,30 @@ CLIGHTGEN=false
 # Whether the other tools support responsefiles in gnu syntax
 RESPONSEFILE="none"
 
+# Whether to use the local copies of Flocq and MenhirLib
+LIBRARY_FLOCQ=local      # external
+LIBRARY_MENHIRLIB=local  # external
+EOF
+fi
+
+if [ "$arch" = "aarch64" ]; then # for aarch64 scheduling
+cat >> Makefile.config <<EOF
+ARCHDIRS=$arch scheduling/abstractbb scheduling/postpass_lib
+BACKENDLIB=Machblock.v Machblockgen.v Machblockgenproof.v OptionMonad.v IterList.v \\
+    Asmblock.v Asmblockgen.v Asmblockgenproof0.v Asmblockgenproof1.v Asmblockgenproof.v Asm.v Asmblockprops.v\\
+    ForwardSimulationBlock.v PostpassScheduling.v PostpassSchedulingproof.v\\
+    Asmblockdeps.v\\
+    AbstractBasicBlocksDef.v SeqSimuTheory.v ImpSimuTest.v Parallelizability.v\\
+    ImpConfig.v ImpCore.v ImpExtern.v ImpHCons.v ImpIO.v ImpLoops.v ImpMonads.v ImpPrelude.v
+    # TODO: UPDATE THIS
+    # DecBoolOps.v Chunks.v Peephole.v ExtValues.v ExtFloats.v
+EXTRA_EXTRACTION=   Asmgen.Asmgen_expand.loadimm32 Asmgen.Asmgen_expand.addimm64 Asmgen.Asmgen_expand.storeptr
 EOF
 fi
 
 if [ "$arch" = "kvx" ]; then
 cat >> Makefile.config <<EOF
-ARCHDIRS=$arch $arch/lib $arch/abstractbb $arch/abstractbb/Impure
+ARCHDIRS=$arch scheduling/abstractbb scheduling/postpass_lib
 EXECUTE=kvx-cluster --syscall=libstd_scalls.so --
 CFLAGS= -D __KVX_COS__
 SIMU=kvx-cluster -- 
@@ -848,12 +839,47 @@ BACKENDLIB=Machblock.v Machblockgen.v Machblockgenproof.v\\
     Asmblock.v Asmblockgen.v Asmblockgenproof0.v Asmblockgenproof1.v Asmblockgenproof.v Asmvliw.v Asmblockprops.v\\
     ForwardSimulationBlock.v PostpassScheduling.v PostpassSchedulingproof.v\\
     Asmblockdeps.v DecBoolOps.v Chunks.v Peephole.v ExtValues.v ExtFloats.v\\
-    AbstractBasicBlocksDef.v SeqSimuTheory.v ImpSimuTest.v Parallelizability.v\\
-    ImpConfig.v ImpCore.v ImpExtern.v ImpHCons.v ImpIO.v ImpLoops.v ImpMonads.v ImpPrelude.v
+    AbstractBasicBlocksDef.v SeqSimuTheory.v ImpSimuTest.v Parallelizability.v
+EOF
+fi
+
+if [ "$arch" = "riscV" ] ; then
+cat >> Makefile.config <<EOF
+EXTRA_EXTRACTION=Asm.ireg_eq Asm.ireg0_eq
+BACKENDLIB=Asmgenproof0.v Asmgenproof1.v ExtValues.v
 EOF
 fi
 
 #
+# Generate Merlin and CoqProject files to simplify development
+#
+cat > .merlin <<EOF
+S lib
+S common
+S $arch
+S backend
+S cfrontend
+S driver
+S debug
+S exportclight
+S cparser
+S extraction
+
+B lib
+B common
+B $arch
+B backend
+B cfrontend
+B driver
+B debug
+B exportclight
+B cparser
+B extraction
+EOF
+
+make CoqProject
+
+#
 # Clean up target-dependent files to force their recompilation
 #
 rm -f .depend $arch/Archi.vo ${arch}_${bitsize}/Archi.vo runtime/*.o
@@ -890,6 +916,9 @@ CompCert configuration:
     Linker needs '-no-pie'........ $clinker_needs_no_pie
     Math library.................. $libmath
     Build command to use.......... $make
+    Menhir API library............ $menhir_dir
+    The Flocq library............. $library_Flocq
+    The MenhirLib library......... $library_MenhirLib
     Binaries installed in......... $bindirexp
     Runtime library provided...... $has_runtime_lib
     Library files installed in.... $libdirexp
diff --git a/coq b/coq
index fcf744fd..925bc4b9 100755
--- a/coq
+++ b/coq
@@ -1,10 +1,8 @@
 #!/bin/sh
-# Start coqide with the right -I options
+# Start coqide with the right options
 # Use the Makefile to rebuild dependencies if needed
 # Recompile the modified file after coqide editing
 
-INCLUDES=`make print-includes`
-
 make -q ${1}o || {
   make -n ${1}o | grep -v "\\b${1}\\b" | \
   (while read cmd; do
@@ -12,4 +10,4 @@ make -q ${1}o || {
    done)
 }
 
-"${COQBIN}coqide" -async-proofs off $INCLUDES $1 && make ${1}o
+"${COQBIN}coqide" -async-proofs off $1 && make ${1}o
diff --git a/cparser/Bitfields.ml b/cparser/Bitfields.ml
index 696a9a8d..7a00f719 100644
--- a/cparser/Bitfields.ml
+++ b/cparser/Bitfields.ml
@@ -50,8 +50,7 @@ let bitfield_table =
       (Hashtbl.create 57: (ident * string, bitfield_info) Hashtbl.t)
 
 let is_bitfield structid fieldname =
-  try Some (Hashtbl.find bitfield_table (structid, fieldname))
-  with Not_found -> None
+  Hashtbl.find_opt bitfield_table (structid, fieldname)
 
 (* Mapping struct/union identifier -> list of members after transformation,
    including the carrier fields, but without the bit fields.
@@ -189,10 +188,12 @@ let rec transf_union_members env id count = function
           { fld_name = carrier; fld_typ = carrier_typ; fld_bitfield = None; fld_anonymous = false;}
           :: transf_union_members env id (count + 1) ms)
 
-let transf_composite env su id attr ml =
+let transf_composite env loc su id attr ml =
   if List.for_all (fun f -> f.fld_bitfield = None) ml then
     (attr, ml)
   else begin
+    if find_custom_attributes ["packed";"__packed__"] attr <> [] then
+      Diagnostics.error loc "bitfields in packed structs not allowed";
     let ml' =
       match su with
       | Struct -> transf_struct_members env id 1 ml
@@ -550,7 +551,7 @@ and transf_init env i =
 
 (* Declarations *)
 
-let transf_decl env (sto, id, ty, init_opt) =
+let transf_decl env loc (sto, id, ty, init_opt) =
   (sto, id, ty,
    match init_opt with None -> None | Some i -> Some(transf_init env i))
 
@@ -559,12 +560,12 @@ let transf_decl env (sto, id, ty, init_opt) =
 let transf_stmt env s =
   Transform.stmt
      ~expr:(fun loc env ctx e -> transf_exp env ctx e)
-     ~decl:transf_decl
+     ~decl:(fun env (sto, id, ty, init_opt) -> transf_decl env s.sloc (sto, id, ty, init_opt))
      env s
 
 (* Functions *)
 
-let transf_fundef env f =
+let transf_fundef env loc f =
   Transform.fundef transf_stmt env f
 
 (* Programs *)
diff --git a/cparser/Cabs.v b/cparser/Cabs.v
index 2dae061a..174261ef 100644
--- a/cparser/Cabs.v
+++ b/cparser/Cabs.v
@@ -92,6 +92,7 @@ with parameter :=
 (* The optional expression is the bitfield *)
 with field_group :=
   | Field_group : list spec_elem -> list (option name * option expression) -> loc -> field_group
+  | Field_group_static_assert : expression -> loc -> constant -> loc -> loc -> field_group
 
 (* The decl_type is in the order in which they are printed. Only the name of
  * the declared identifier is pulled out. *)
@@ -197,6 +198,7 @@ Inductive definition :=
  | FUNDEF : list spec_elem -> name -> list definition -> statement -> loc -> definition
  | DECDEF : init_name_group -> loc -> definition  (* global variable(s), or function prototype *)
  | PRAGMA : string -> loc -> definition
+ | STATIC_ASSERT : expression -> loc -> constant -> loc -> loc -> definition
 
 (*
 ** statements
diff --git a/cparser/Cabshelper.ml b/cparser/Cabshelper.ml
index 22f3b3c7..7cffef08 100644
--- a/cparser/Cabshelper.ml
+++ b/cparser/Cabshelper.ml
@@ -44,6 +44,7 @@ let get_definitionloc (d : definition) : loc =
   | FUNDEF(_, _, _, _, l) -> l
   | DECDEF(_, l) -> l
   | PRAGMA(_, l) -> l
+  | STATIC_ASSERT(_, _, _, _, l) -> l
 
 let get_statementloc (s : statement) : loc =
 begin
diff --git a/cparser/Diagnostics.ml b/cparser/Diagnostics.ml
index 7957375c..86a5e522 100644
--- a/cparser/Diagnostics.ml
+++ b/cparser/Diagnostics.ml
@@ -400,16 +400,16 @@ let raise_on_errors () =
     raise Abort
 
 let crash exn =
-  if Version.buildnr <> "" && Version.tag <> "" then begin
+  if Version.buildnr <> "" && Version.tag <> "" && Version.branch <> "" then begin
     let backtrace = Printexc.get_backtrace () in
-    eprintf "%tThis is CompCert, Release %s, Build:%s, Tag:%s%t\n"
-      bc Version.version Version.buildnr Version.tag rsc;
+    eprintf "%tThis is CompCert, Release %s, Build:%s, Tag:%s, Branch:%s%t\n"
+      bc Version.version Version.buildnr Version.tag Version.branch rsc;
     eprintf "Backtrace (please include this in your support request):\n%s"
       backtrace;
     eprintf "%tUncaught exception: %s.\n\
 \    Please report this problem to our support.\n\
-\    Error occurred in Build: %s, Tag: %s.\n%t"
-      rc (Printexc.to_string exn) Version.buildnr Version.tag rsc;
+\    Error occurred in Build: %s, Tag: %s, Branch %s.\n%t"
+      rc (Printexc.to_string exn) Version.buildnr Version.tag Version.branch rsc;
     exit 2
   end else begin
     let backtrace = Printexc.get_backtrace ()
diff --git a/cparser/Elab.ml b/cparser/Elab.ml
index 8e24e29f..3b242233 100644
--- a/cparser/Elab.ml
+++ b/cparser/Elab.ml
@@ -346,10 +346,7 @@ let integer_representable v ik =
     v >= 0L && v < Int64.shift_left 1L (bitsize - 1)
 
 let elab_int_constant loc s0 =
-  let s = String.map (fun d -> match d with
-  | '0'..'9' | 'A'..'F' | 'L' | 'U' | 'X' -> d
-  | 'a'..'f' | 'l' | 'u' | 'x' -> Char.chr (Char.code d - 32)
-  | _ -> error loc "bad digit '%c' in integer literal '%s'" d s0; d) s0 in
+  let s = String.uppercase_ascii s0 in
   (* Determine possible types and chop type suffix *)
   let (s, dec_kinds, hex_kinds) =
     if has_suffix s "ULL" || has_suffix s "LLU" then
@@ -479,6 +476,23 @@ let elab_simple_string loc wide chars =
   | CStr s -> s
   | _ -> error loc "cannot use wide string literal in 'asm'"; ""
 
+(** Elaboration and checking of static assertions *)
+
+let elab_static_assert env exp loc_exp msg loc_msg loc =
+  let (exp, env) = !elab_expr_f loc_exp env exp in
+  match Ceval.integer_expr env exp  with
+  | None ->
+      error loc_exp "expression in static assertion is not an integer constant"
+  | Some n ->
+      if n = 0L then begin
+        match elab_constant loc_msg msg with
+          | CStr s ->
+              error loc "static assertion failed: \"%s\"" s
+          | _ ->
+              (* This can happen with a wide string literal *)
+              error loc "static assertion failed (cannot display associated message)"
+      end
+
 
 (** * Elaboration of type expressions, type specifiers, name declarations *)
 
@@ -987,7 +1001,9 @@ and elab_name_group loc env  (spec, namelist) =
 
 (* Elaboration of a field group *)
 
-and elab_field_group env (Field_group (spec, fieldlist, loc)) =
+and elab_field_group env = function
+
+| Field_group (spec, fieldlist, loc) ->
 
   let fieldlist = List.map
     (function (None, x) -> (Name ("", JUSTBASE, [], loc), x)
@@ -999,6 +1015,7 @@ and elab_field_group env (Field_group (spec, fieldlist, loc)) =
     elab_name_group loc env  (spec, List.map fst fieldlist) in
 
   if sto <> Storage_default then
+    (* This should actually never be triggered, catched by pre-parser *)
     error loc "non-default storage in struct or union";
   if fieldlist = [] then
       (* This should actually never be triggered, empty structs are captured earlier *)
@@ -1052,6 +1069,10 @@ and elab_field_group env (Field_group (spec, fieldlist, loc)) =
   in
   (mmap2 elab_bitfield env' fieldlist names)
 
+| Field_group_static_assert(exp, loc_exp, msg, loc_msg, loc) ->
+    elab_static_assert env exp loc_exp msg loc_msg loc;
+    ([], env)
+  
 (* Elaboration of a struct or union. C99 section 6.7.2.1 *)
 
 and elab_struct_or_union_info kind loc env members attrs =
@@ -1719,11 +1740,12 @@ let elab_expr ctx loc env a =
   let check_ptr_arith env ty s =
     match unroll env ty with
     | TVoid _ ->
-        error "illegal arithmetic on a pointer to void in binary '%c'" s
+        error "illegal arithmetic on a pointer to void in %s" s
     | TFun _ ->
-        error "illegal arithmetic on a pointer to the function type %a in binary '%c'" (print_typ env) ty s
-    | _ -> if incomplete_type env ty then
-        error "arithmetic on a pointer to an incomplete type %a in binary '%c'" (print_typ env) ty s
+        error "illegal arithmetic on a pointer to the function type %a in %s" (print_typ env) ty s
+    | _ ->
+        if incomplete_type env ty then
+          error "arithmetic on a pointer to an incomplete type %a in %s" (print_typ env) ty s
   in
 
   let check_static_var env id sto ty =
@@ -1818,14 +1840,18 @@ let elab_expr ctx loc env a =
       (preprocessing) --> __builtin_va_arg(ap, ty)
       (elaboration)   --> __builtin_va_arg(ap, sizeof(ty))
 *)
-  | CALL((VARIABLE "__builtin_va_start" as a1), [a2; a3]) ->
+  | CALL((VARIABLE "__builtin_va_start" as a1), args) ->
       if not ctx.ctx_vararg then
         error "'va_start' used in function with fixed args";
-      let b1,env = elab env a1 in
-      let b2,env = elab env a2 in
-      let _b3,env = elab env a3 in
-      { edesc = ECall(b1, [b2]);
-        etyp = TVoid [] },env
+      let b1, env = elab env a1 in
+      begin match args with
+        | [a2; a3] ->
+          let b2,env = elab env a2 in
+          let _b3,env = elab env a3 in
+          { edesc = ECall(b1, [b2]);
+            etyp = TVoid [] },env
+        | _ -> fatal_error "'__builtin_va_start' expects 2 arguments"
+      end
 
   | BUILTIN_VA_ARG (a2, a3) ->
       let ident =
@@ -1842,6 +1868,16 @@ let elab_expr ctx loc env a =
           (print_typ env) ty (print_typ env) ty'  (print_typ env) ty'  (print_typ env) ty;
       { edesc = ECall(ident, [b2; b3]); etyp = ty },env
 
+  | CALL(VARIABLE "__builtin_constant_p", al) ->
+      begin match al with
+      | [a1] ->
+          let b1,env = elab env a1 in
+          let v = if Ceval.is_constant_expr env b1 then 1L else 0L in
+          intconst v IInt, env
+      | _ ->
+          fatal_error "'__builtin_constant_p' expects one argument"
+      end
+
   | CALL((VARIABLE "__builtin_sel" as a0), al) ->
       begin match al with
       | [a1; a2; a3] ->
@@ -2115,7 +2151,7 @@ let elab_expr ctx loc env a =
             | _, _ -> fatal_error "invalid operands to binary '+' (%a and %a)"
                   (print_typ env) b1.etyp (print_typ env) b2.etyp
           in
-          check_ptr_arith env ty '+';
+          check_ptr_arith env ty "binary '+'";
           TPtr(ty, [])
         end in
       { edesc = EBinop(Oadd, b1, b2, tyres); etyp = tyres },env
@@ -2130,20 +2166,20 @@ let elab_expr ctx loc env a =
         end else begin
           match wrap unroll loc env b1.etyp, wrap  unroll loc env b2.etyp with
           | (TPtr(ty, a) | TArray(ty, _, a)), (TInt _ | TEnum _) ->
-              if not (wrap pointer_arithmetic_ok loc env ty) then
-                error "illegal pointer arithmetic in binary '-'";
+              check_ptr_arith env ty "binary '-'";
               (TPtr(ty, []), TPtr(ty, []))
           | (TPtr(ty1, a1) | TArray(ty1, _, a1)),
             (TPtr(ty2, a2) | TArray(ty2, _, a2)) ->
               if not (compatible_types AttrIgnoreAll env ty1 ty2) then
                 error "%a and %a are not pointers to compatible types"
                    (print_typ env) b1.etyp (print_typ env) b1.etyp;
-              check_ptr_arith env ty1 '-';
-              check_ptr_arith env ty2 '-';
+              check_ptr_arith env ty1 "binary '-'";
+              check_ptr_arith env ty2 "binary '-'";
               if wrap sizeof loc env ty1 = Some 0 then
                 error "subtraction between two pointers to zero-sized objects";
               (TPtr(ty1, []), TInt(ptrdiff_t_ikind(), []))
-          | _, _ -> fatal_error "invalid operands to binary '-' (%a and %a)"
+          | _, _ ->
+              fatal_error "invalid operands to binary '-' (%a and %a)"
                 (print_typ env) b1.etyp (print_typ env) b2.etyp
         end in
       { edesc = EBinop(Osub, b1, b2, tyop); etyp = tyres },env
@@ -2301,6 +2337,11 @@ let elab_expr ctx loc env a =
       error "expression is not assignable";
     if not (is_scalar_type env b1.etyp) then
       error "cannot %s value of type %a" msg (print_typ env) b1.etyp;
+    begin match unroll env b1.etyp with
+    | TPtr (ty, _) | TArray (ty, _ , _) ->
+      check_ptr_arith env ty ("unary " ^ msg)
+    | _ -> ()
+    end;
     { edesc = EUnop(op, b1); etyp = b1.etyp },env
 
 (* Elaboration of binary operators over integers *)
@@ -2657,6 +2698,8 @@ let elab_fundef genv spec name defs body loc =
        and structs and unions defined in the parameter list. *)
   let (fun_id, sto, inline, noret, ty, kr_params, genv, lenv) =
     elab_fundef_name genv spec name in
+  if Env.is_builtin fun_id.C.name then
+    error loc "definition of builtin function '%s'" fun_id.C.name;
   let s = fun_id.C.name in
   if sto = Storage_auto || sto = Storage_register then
     fatal_error loc "invalid storage class %s on function"
@@ -2847,6 +2890,7 @@ let elab_definition (for_loop: bool) (local: bool) (nonstatic_inline: bool)
   (* "int f(int x) { ... }" *)
   (* "int f(x, y) double y; { ... }" *)
   | FUNDEF(spec, name, defs, body, loc) ->
+      (* This should actually never be triggered, catched by pre-parser *)
       if local then error loc "function definition is not allowed here";
       let env1 = elab_fundef env spec name defs body loc in
       ([], env1)
@@ -2860,6 +2904,11 @@ let elab_definition (for_loop: bool) (local: bool) (nonstatic_inline: bool)
       emit_elab env loc (Gpragma s);
       ([], env)
 
+  (* static assertion *)
+  | STATIC_ASSERT(exp, loc_exp, msg, loc_msg, loc) ->
+      elab_static_assert env exp loc_exp msg loc_msg loc;
+      ([], env)
+
 (* Extended asm *)
 
 let elab_asm_operand ctx loc env (ASMOPERAND(label, wide, chars, e)) =
diff --git a/cparser/Env.ml b/cparser/Env.ml
index 4723a725..00806be1 100644
--- a/cparser/Env.ml
+++ b/cparser/Env.ml
@@ -316,6 +316,9 @@ let set_builtins blt =
   List.iter Init.add_typedef blt.builtin_typedefs;
   List.iter Init.add_function blt.builtin_functions
 
+let is_builtin name =
+  ident_is_bound !Init.env name
+
 (* Error reporting *)
 
 open Printf
diff --git a/cparser/Env.mli b/cparser/Env.mli
index 1baab68f..589a76c7 100644
--- a/cparser/Env.mli
+++ b/cparser/Env.mli
@@ -84,3 +84,4 @@ val initial: unit -> t
 val initial_identifiers: unit -> C.ident list
 val initial_declarations: unit -> C.globdecl list
 val set_builtins: C.builtins -> unit
+val is_builtin : string -> bool
diff --git a/cparser/ExtendedAsm.ml b/cparser/ExtendedAsm.ml
index 257e9cf7..df2da2a2 100644
--- a/cparser/ExtendedAsm.ml
+++ b/cparser/ExtendedAsm.ml
@@ -156,7 +156,7 @@ let transf_outputs loc env = function
 let check_clobbers loc clob =
   List.iter
     (fun c ->
-      if Machregsaux.register_by_name c <> None
+      if Machregsnames.register_by_name c <> None
       || Machregsaux.is_scratch_register c
       || c = "memory" || c = "cc" (* GCC does not accept MEMORY or CC *)
       then ()
diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index b36b3e81..f5e8edb3 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -35,6 +35,7 @@ let () =
       ("_Bool", fun loc -> UNDERSCORE_BOOL loc);
       ("_Complex", fun loc -> reserved_keyword loc "_Complex");
       ("_Imaginary", fun loc -> reserved_keyword loc "_Imaginary");
+      ("_Static_assert", fun loc -> STATIC_ASSERT loc);
       ("__alignof", fun loc -> ALIGNOF loc);
       ("__alignof__", fun loc -> ALIGNOF loc);
       ("__asm", fun loc -> ASM loc);
@@ -579,6 +580,7 @@ and singleline_comment = parse
       | Pre_parser.SLASH loc -> loop (Parser.SLASH loc)
       | Pre_parser.STAR loc -> loop (Parser.STAR loc)
       | Pre_parser.STATIC loc -> loop (Parser.STATIC loc)
+      | Pre_parser.STATIC_ASSERT loc -> loop (Parser.STATIC_ASSERT loc)
       | Pre_parser.STRING_LITERAL (wide, str, loc) ->
           (* Merge consecutive string literals *)
           let rec doConcat wide str =
diff --git a/cparser/Machine.ml b/cparser/Machine.ml
index 97ca9223..4f5a93d2 100644
--- a/cparser/Machine.ml
+++ b/cparser/Machine.ml
@@ -61,6 +61,7 @@ type t = {
   supports_unaligned_accesses: bool;
   struct_passing_style: struct_passing_style;
   struct_return_style : struct_return_style;
+  has_non_trapping_loads : bool;
 }
 
 let ilp32ll64 = {
@@ -96,6 +97,7 @@ let ilp32ll64 = {
   supports_unaligned_accesses = false;
   struct_passing_style = SP_ref_callee;
   struct_return_style = SR_ref;
+  has_non_trapping_loads = false;
 }
 
 let i32lpll64 = {
@@ -131,6 +133,7 @@ let i32lpll64 = {
   supports_unaligned_accesses = false;
   struct_passing_style = SP_ref_callee;
   struct_return_style = SR_ref;
+  has_non_trapping_loads = false;
 }
 
 let il32pll64 = {
@@ -166,6 +169,7 @@ let il32pll64 = {
   supports_unaligned_accesses = false;
   struct_passing_style = SP_ref_callee;
   struct_return_style = SR_ref;
+  has_non_trapping_loads = false;
 }
 
 (* Canned configurations for some ABIs *)
@@ -238,7 +242,7 @@ let rv64 =
                    struct_passing_style = SP_ref_callee; (* Wrong *)
                    struct_return_style = SR_ref } (* to check *)
 
-let kvx =
+let kvxbase =
   { name = "kvx";
     char_signed = true;
     wchar_signed = true;
@@ -270,7 +274,17 @@ let kvx =
     bitfields_msb_first = false; (* TO CHECK *)
     supports_unaligned_accesses = true;
     struct_passing_style = SP_value32_ref_callee;
-    struct_return_style = SR_int1to4 }
+    struct_return_style = SR_int1to4;
+    has_non_trapping_loads = false;
+}
+
+let kvxcos =
+  { kvxbase with has_non_trapping_loads = false;
+}
+
+let kvxmbr =
+  { kvxbase with has_non_trapping_loads = true;
+}
 
 let aarch64 =
   { i32lpll64 with name = "aarch64";
@@ -323,6 +337,7 @@ let undef = {
   supports_unaligned_accesses = false;
   struct_passing_style = SP_ref_callee;
   struct_return_style = SR_ref;
+  has_non_trapping_loads = false;
 }
 
 (* The current configuration.  Must be initialized before use. *)
diff --git a/cparser/Machine.mli b/cparser/Machine.mli
index 0e1e22d1..07b55832 100644
--- a/cparser/Machine.mli
+++ b/cparser/Machine.mli
@@ -60,6 +60,7 @@ type t = {
   supports_unaligned_accesses: bool;
   struct_passing_style: struct_passing_style;
   struct_return_style: struct_return_style;
+  has_non_trapping_loads: bool;
 }
 
 (* The current configuration *)
@@ -87,7 +88,8 @@ val arm_littleendian : t
 val arm_bigendian : t
 val rv32 : t
 val rv64 : t
-val kvx : t
+val kvxmbr : t
+val kvxcos : t
 val aarch64 : t
 
 val gcc_extensions : t -> t
diff --git a/cparser/Parse.ml b/cparser/Parse.ml
index 29245083..d9f9aa1c 100644
--- a/cparser/Parse.ml
+++ b/cparser/Parse.ml
@@ -18,7 +18,14 @@
 module CharSet = Set.Make(struct type t = char let compare = compare end)
 
 let transform_program t p name =
-  let run_pass pass flag p = if CharSet.mem flag t then pass p else p in
+  let run_pass pass flag p =
+    if CharSet.mem flag t then begin
+      let p = pass p in
+      Diagnostics.check_errors ();
+      p
+    end else
+      p
+  in
   let p1 = (run_pass StructPassing.program 's'
   (run_pass PackedStructs.program 'p'
   (run_pass Unblock.program 'b'
diff --git a/cparser/Parser.vy b/cparser/Parser.vy
index 4f3b9789..ebed6e34 100644
--- a/cparser/Parser.vy
+++ b/cparser/Parser.vy
@@ -37,7 +37,7 @@ Require Cabs.
   STRUCT UNION ENUM UNDERSCORE_BOOL PACKED ALIGNAS ATTRIBUTE ASM
 
 %token<Cabs.loc> CASE DEFAULT IF_ ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK
-  RETURN BUILTIN_VA_ARG BUILTIN_OFFSETOF
+  RETURN BUILTIN_VA_ARG BUILTIN_OFFSETOF STATIC_ASSERT
 
 %token EOF
 
@@ -55,6 +55,8 @@ Require Cabs.
 %type<list Cabs.spec_elem> declaration_specifiers_typespec_opt
 %type<list Cabs.init_name (* Reverse order *)> init_declarator_list
 %type<Cabs.init_name> init_declarator
+%type<(Cabs.expression * Cabs.loc) * (Cabs.constant * Cabs.loc) * Cabs.loc>
+  static_assert_declaration
 %type<Cabs.storage * Cabs.loc> storage_class_specifier
 %type<Cabs.typeSpecifier * Cabs.loc> type_specifier struct_or_union_specifier enum_specifier
 %type<Cabs.structOrUnion * Cabs.loc> struct_or_union
@@ -343,6 +345,9 @@ declaration:
     { Cabs.DECDEF (fst decspec, rev' decls) (snd decspec) }
 | decspec = declaration_specifiers SEMICOLON
     { Cabs.DECDEF (fst decspec, []) (snd decspec) }
+| asrt = static_assert_declaration
+    { let '((e, loc_e), (s, loc_s), loc) := asrt in
+      Cabs.STATIC_ASSERT e loc_e s loc_s loc }
 
 declaration_specifiers_typespec_opt:
 | storage = storage_class_specifier rest = declaration_specifiers_typespec_opt
@@ -461,6 +466,10 @@ struct_declaration:
 (* Extension to C99 grammar needed to parse some GNU header files. *)
 | decspec = specifier_qualifier_list SEMICOLON
     { Cabs.Field_group (fst decspec) [(None,None)] (snd decspec) }
+(* C11 static assertions *)
+| asrt = static_assert_declaration
+    { let '((e, loc_e), (s, loc_s), loc) := asrt in
+      Cabs.Field_group_static_assert e loc_e s loc_s loc }
 
 specifier_qualifier_list:
 | typ = type_specifier rest = specifier_qualifier_list
@@ -751,6 +760,14 @@ designator:
 | DOT id = OTHER_NAME
     { Cabs.INFIELD_INIT (fst id) }
 
+(* C11 6.7.10 *)
+
+static_assert_declaration:
+| loc = STATIC_ASSERT LPAREN expr = constant_expression
+                        COMMA str = STRING_LITERAL RPAREN SEMICOLON
+    { let '((wide, chars), locs) := str in
+      (expr, (Cabs.CONST_STRING wide chars, locs), loc) }
+
 (* 6.8 *)
 statement_dangerous:
 | stmt = labeled_statement(statement_dangerous)
diff --git a/cparser/StructPassing.ml b/cparser/StructPassing.ml
index 3aff090e..6d63b8f9 100644
--- a/cparser/StructPassing.ml
+++ b/cparser/StructPassing.ml
@@ -433,7 +433,7 @@ and transf_init env = function
 
 (* Declarations *)
 
-let transf_decl env (sto, id, ty, init) =
+let transf_decl env loc (sto, id, ty, init) =
   (sto, id, transf_type env ty,
    match init with None -> None | Some i -> Some (transf_init env i))
 
@@ -503,7 +503,7 @@ let rec transf_stmt s =
   | Sblock sl ->
       {s with sdesc = Sblock(List.map transf_stmt sl)}
   | Sdecl d ->
-      {s with sdesc = Sdecl(transf_decl env d)}
+      {s with sdesc = Sdecl(transf_decl env s.sloc d)}
   | Sasm(attr, template, outputs, inputs, clob) ->
       {s with sdesc = Sasm(attr, template,
                            List.map transf_asm_operand outputs,
@@ -549,13 +549,13 @@ let rec transf_funparams loc env params =
                             actions,
            IdentMap.add x (ereinterpret tx' y) subst)
 
-let transf_fundef env f =
+let transf_fundef env loc f =
   reset_temps();
   let ret = transf_type env f.fd_ret in
   let (params, actions, subst) =
     transf_funparams f.fd_body.sloc env f.fd_params in
   let locals =
-    List.map (fun d -> transf_decl env (subst_decl subst d)) f.fd_locals in
+    List.map (fun d -> transf_decl env loc (subst_decl subst d)) f.fd_locals in
   let (attr1, ret1, params1, body1) =
     match classify_return env f.fd_ret with
     | Ret_scalar ->
@@ -586,7 +586,7 @@ let transf_fundef env f =
 
 (* Composites *)
 
-let transf_composite env su id attr fl =
+let transf_composite env loc su id attr fl =
   (attr, List.map (fun f -> {f with fld_typ = transf_type env f.fld_typ}) fl)
 
 (* Entry point *)
@@ -604,5 +604,5 @@ let program p =
     ~decl:transf_decl
     ~fundef:transf_fundef
     ~composite:transf_composite
-    ~typedef:(fun env id ty -> transf_type env ty)
+    ~typedef:(fun env loc id ty -> transf_type env ty)
     p
diff --git a/cparser/Transform.ml b/cparser/Transform.ml
index 349a3155..a57d94c4 100644
--- a/cparser/Transform.ml
+++ b/cparser/Transform.ml
@@ -161,8 +161,8 @@ let stmt ~expr ?(decl = fun env decl -> assert false) env s =
   | Scontinue -> s
   | Sswitch(e, s1) ->
       {s with sdesc = Sswitch(expr s.sloc env Val e, stm s1)}
-  | Slabeled(lbl, s) ->
-      {s with sdesc = Slabeled(lbl, stm s)}
+  | Slabeled(lbl, s1) ->
+      {s with sdesc = Slabeled(lbl, stm s1)}
   | Sgoto lbl -> s
   | Sreturn None -> s
   | Sreturn (Some e) ->
@@ -191,12 +191,12 @@ let fundef trstmt env f =
 (* Generic transformation of a program *)
 
 let program
-    ?(decl = fun env d -> d)
-    ?(fundef = fun env fd -> fd)
-    ?(composite = fun env su id attr fl -> (attr, fl))
-    ?(typedef = fun env id ty -> ty)
-    ?(enum = fun env id attr members -> (attr, members))
-    ?(pragma = fun env s -> s)
+    ?(decl = fun env loc d -> d)
+    ?(fundef = fun env loc fd -> fd)
+    ?(composite = fun env loc su id attr fl -> (attr, fl))
+    ?(typedef = fun env loc id ty -> ty)
+    ?(enum = fun env loc id attr members -> (attr, members))
+    ?(pragma = fun env loc s -> s)
     p =
 
   let rec transf_globdecls env accu = function
@@ -205,25 +205,25 @@ let program
       let (desc', env') =
         match g.gdesc with
         | Gdecl((sto, id, ty, init) as d) ->
-           (Gdecl(decl env d), Env.add_ident env id sto ty)
+           (Gdecl(decl env g.gloc d), Env.add_ident env id sto ty)
         | Gfundef f ->
-           (Gfundef(fundef env f),
+            (Gfundef(fundef env g.gloc f),
             Env.add_ident env f.fd_name f.fd_storage (fundef_typ f))
         | Gcompositedecl(su, id, attr) ->
             (Gcompositedecl(su, id, attr),
              Env.add_composite env id (composite_info_decl su attr))
         | Gcompositedef(su, id, attr, fl) ->
-            let (attr', fl') = composite env su id attr fl in
+            let (attr', fl') = composite env  g.gloc su id attr fl in
             (Gcompositedef(su, id, attr', fl'),
              Env.add_composite env id (composite_info_def env su attr fl))
         | Gtypedef(id, ty) ->
-            (Gtypedef(id, typedef env id ty), Env.add_typedef env id ty)
+            (Gtypedef(id, typedef env g.gloc id ty), Env.add_typedef env id ty)
         | Genumdef(id, attr, members) ->
-            let (attr', members') = enum env id attr members in
+            let (attr', members') = enum env g.gloc id attr members in
             (Genumdef(id, attr', members'),
              Env.add_enum env id {ei_members =  members; ei_attr = attr})
         | Gpragma s ->
-            (Gpragma(pragma env s), env)
+            (Gpragma(pragma env g.gloc s), env)
       in
         transf_globdecls env' ({g with gdesc = desc'} :: accu) gl
 
diff --git a/cparser/Transform.mli b/cparser/Transform.mli
index dbd8e575..220b7944 100644
--- a/cparser/Transform.mli
+++ b/cparser/Transform.mli
@@ -62,14 +62,14 @@ val fundef : (Env.t -> C.stmt -> C.stmt) -> Env.t -> C.fundef -> C.fundef
 (** Generic transformation of a program *)
 
 val program :
-  ?decl:(Env.t -> C.decl -> C.decl) ->
-  ?fundef:(Env.t -> C.fundef -> C.fundef) ->
-  ?composite:(Env.t -> C.struct_or_union ->
+  ?decl:(Env.t -> C.location -> C.decl -> C.decl) ->
+  ?fundef:(Env.t -> C.location -> C.fundef -> C.fundef) ->
+  ?composite:(Env.t -> C.location -> C.struct_or_union ->
                 C.ident -> C.attributes -> C.field list ->
                   C.attributes * C.field list) ->
-  ?typedef:(Env.t -> C.ident -> C.typ -> C.typ) ->
-  ?enum:(Env.t -> C.ident -> C.attributes -> C.enumerator list ->
+  ?typedef:(Env.t -> C.location -> C.ident -> C.typ -> C.typ) ->
+  ?enum:(Env.t -> C.location -> C.ident -> C.attributes -> C.enumerator list ->
                   C.attributes * C.enumerator list) ->
-  ?pragma:(Env.t -> string -> string) ->
+  ?pragma:(Env.t -> C.location -> string -> string) ->
   C.program ->
   C.program
diff --git a/cparser/deLexer.ml b/cparser/deLexer.ml
index 43c1a679..e2f4f77f 100644
--- a/cparser/deLexer.ml
+++ b/cparser/deLexer.ml
@@ -117,6 +117,7 @@ let delex (symbol : string) : string =
   | "DOT" -> "."
   | "PRAGMA" -> "#pragma \n"
   | "BUILTIN_OFFSETOF" -> "__builtin_offsetof"
+  | "STATIC_ASSERT" -> "_Static_assert"
   | "EOF" -> ""                             (* this should be ok *)
   | _ -> raise Not_found               (* this should not happen *)
 
diff --git a/cparser/handcrafted.messages b/cparser/handcrafted.messages
index 6d972439..23e90b3e 100644
--- a/cparser/handcrafted.messages
+++ b/cparser/handcrafted.messages
@@ -179,22 +179,22 @@
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN INT XOR_ASSIGN 
+translation_unit_file: ALIGNAS LPAREN INT XOR_ASSIGN
 ##
-## Ends in an error in state: 314.
+## Ends in an error in state: 322.
 ##
 ## attribute_specifier -> ALIGNAS LPAREN type_name . RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ALIGNAS LPAREN type_name 
+## ALIGNAS LPAREN type_name
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
 ## In state 67, spurious reduction of production specifier_qualifier_list(type_name) -> type_specifier_no_typedef_name list(specifier_qualifier_no_typedef_name) 
-## In state 306, spurious reduction of production option(abstract_declarator(type_name)) -> 
-## In state 312, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
+## In state 314, spurious reduction of production option(abstract_declarator(type_name)) -> 
+## In state 320, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
 ##
 
 # Maybe the type name was not complete, but we have reduced anyway
@@ -212,40 +212,40 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ ALIGNOF LPAREN VOID XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ ALIGNOF LPAREN VOID XOR_ASSIGN
 ##
-## Ends in an error in state: 304.
+## Ends in an error in state: 312.
 ##
 ## unary_expression -> ALIGNOF LPAREN type_name . RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## ALIGNOF LPAREN type_name 
+## ALIGNOF LPAREN type_name
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
 ## In state 67, spurious reduction of production specifier_qualifier_list(type_name) -> type_specifier_no_typedef_name list(specifier_qualifier_no_typedef_name) 
-## In state 306, spurious reduction of production option(abstract_declarator(type_name)) -> 
-## In state 312, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
+## In state 314, spurious reduction of production option(abstract_declarator(type_name)) -> 
+## In state 320, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ SIZEOF LPAREN VOID XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ SIZEOF LPAREN VOID XOR_ASSIGN
 ##
-## Ends in an error in state: 388.
+## Ends in an error in state: 396.
 ##
 ## postfix_expression -> LPAREN type_name . RPAREN LBRACE initializer_list option(COMMA) RBRACE [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ## unary_expression -> SIZEOF LPAREN type_name . RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## SIZEOF LPAREN type_name 
+## SIZEOF LPAREN type_name
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
 ## In state 67, spurious reduction of production specifier_qualifier_list(type_name) -> type_specifier_no_typedef_name list(specifier_qualifier_no_typedef_name) 
-## In state 306, spurious reduction of production option(abstract_declarator(type_name)) -> 
-## In state 312, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
+## In state 314, spurious reduction of production option(abstract_declarator(type_name)) -> 
+## In state 320, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
 ##
 
 Ill-formed use of $2.
@@ -256,22 +256,22 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG LPAREN PRE_NAME VAR_NAME COMMA VOID XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG LPAREN PRE_NAME VAR_NAME COMMA VOID XOR_ASSIGN
 ##
-## Ends in an error in state: 333.
+## Ends in an error in state: 341.
 ##
 ## postfix_expression -> BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name . RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name 
+## BUILTIN_VA_ARG LPAREN assignment_expression COMMA type_name
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
 ## In state 67, spurious reduction of production specifier_qualifier_list(type_name) -> type_specifier_no_typedef_name list(specifier_qualifier_no_typedef_name) 
-## In state 306, spurious reduction of production option(abstract_declarator(type_name)) -> 
-## In state 312, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
+## In state 314, spurious reduction of production option(abstract_declarator(type_name)) -> 
+## In state 320, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
 ##
 
 Ill-formed use of __builtin_va_arg.
@@ -282,22 +282,22 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ INC LPAREN VOID XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ INC LPAREN VOID XOR_ASSIGN
 ##
-## Ends in an error in state: 363.
+## Ends in an error in state: 371.
 ##
 ## postfix_expression -> LPAREN type_name . RPAREN LBRACE initializer_list option(COMMA) RBRACE [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN type_name 
+## LPAREN type_name
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
 ## In state 67, spurious reduction of production specifier_qualifier_list(type_name) -> type_specifier_no_typedef_name list(specifier_qualifier_no_typedef_name) 
-## In state 306, spurious reduction of production option(abstract_declarator(type_name)) -> 
-## In state 312, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
+## In state 314, spurious reduction of production option(abstract_declarator(type_name)) -> 
+## In state 320, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
 ##
 
 # gcc simply says it expects a closing parenthesis,
@@ -311,23 +311,23 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN VOID XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN VOID XOR_ASSIGN
 ##
-## Ends in an error in state: 385.
+## Ends in an error in state: 393.
 ##
 ## cast_expression -> LPAREN type_name . RPAREN cast_expression [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ## postfix_expression -> LPAREN type_name . RPAREN LBRACE initializer_list option(COMMA) RBRACE [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN type_name 
+## LPAREN type_name
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
 ## In state 67, spurious reduction of production specifier_qualifier_list(type_name) -> type_specifier_no_typedef_name list(specifier_qualifier_no_typedef_name) 
-## In state 306, spurious reduction of production option(abstract_declarator(type_name)) -> 
-## In state 312, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
+## In state 314, spurious reduction of production option(abstract_declarator(type_name)) -> 
+## In state 320, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
 ##
 
 # gcc and clang say they expect a closing parenthesis.
@@ -339,35 +339,35 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: ALIGNAS LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 316.
+## Ends in an error in state: 324.
 ##
 ## argument_expression_list -> argument_expression_list . COMMA assignment_expression [ RPAREN COMMA ]
 ## attribute_specifier -> ALIGNAS LPAREN argument_expression_list . RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ALIGNAS LPAREN argument_expression_list 
+## ALIGNAS LPAREN argument_expression_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 230, spurious reduction of production argument_expression_list -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 155, spurious reduction of production argument_expression_list -> assignment_expression 
 ##
 
 # We are trying to recognize an alignas specifier.
@@ -389,27 +389,27 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN INT LBRACK RPAREN 
+translation_unit_file: ALIGNAS LPAREN INT LBRACK RPAREN
 ##
-## Ends in an error in state: 151.
+## Ends in an error in state: 248.
 ##
 ## direct_abstract_declarator -> option(direct_abstract_declarator) LBRACK option(type_qualifier_list) . optional(assignment_expression,RBRACK) [ RPAREN LPAREN LBRACK COMMA ]
 ## type_qualifier_list -> option(type_qualifier_list) . type_qualifier_noattr [ VOLATILE TILDE STRING_LITERAL STAR SIZEOF RESTRICT RBRACK PRE_NAME PLUS PACKED MINUS LPAREN INC DEC CONSTANT CONST BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ## type_qualifier_list -> option(type_qualifier_list) . attribute_specifier [ VOLATILE TILDE STRING_LITERAL STAR SIZEOF RESTRICT RBRACK PRE_NAME PLUS PACKED MINUS LPAREN INC DEC CONSTANT CONST BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## option(direct_abstract_declarator) LBRACK option(type_qualifier_list) 
+## option(direct_abstract_declarator) LBRACK option(type_qualifier_list)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LBRACK RPAREN 
+translation_unit_file: INT PRE_NAME VAR_NAME LBRACK RPAREN
 ##
-## Ends in an error in state: 257.
+## Ends in an error in state: 265.
 ##
 ## direct_declarator -> direct_declarator LBRACK option(type_qualifier_list) . optional(assignment_expression,RBRACK) [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK LBRACE INT INLINE FLOAT EXTERN EQ ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ## type_qualifier_list -> option(type_qualifier_list) . type_qualifier_noattr [ VOLATILE TILDE STRING_LITERAL STAR SIZEOF RESTRICT RBRACK PRE_NAME PLUS PACKED MINUS LPAREN INC DEC CONSTANT CONST BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ## type_qualifier_list -> option(type_qualifier_list) . attribute_specifier [ VOLATILE TILDE STRING_LITERAL STAR SIZEOF RESTRICT RBRACK PRE_NAME PLUS PACKED MINUS LPAREN INC DEC CONSTANT CONST BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## direct_declarator LBRACK option(type_qualifier_list) 
+## direct_declarator LBRACK option(type_qualifier_list)
 ##
 
 # We are trying to recognize an array declarator.
@@ -434,32 +434,32 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN INT LPAREN INT COMMA ELLIPSIS XOR_ASSIGN 
+translation_unit_file: ALIGNAS LPAREN INT LPAREN INT COMMA ELLIPSIS XOR_ASSIGN
 ##
-## Ends in an error in state: 268.
+## Ends in an error in state: 276.
 ##
 ## direct_abstract_declarator -> LPAREN option(context_parameter_type_list) . RPAREN [ RPAREN LPAREN LBRACK COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN option(context_parameter_type_list) 
+## LPAREN option(context_parameter_type_list)
 ##
-translation_unit_file: ALIGNAS LPAREN INT LBRACK RBRACK LPAREN INT COMMA ELLIPSIS XOR_ASSIGN 
+translation_unit_file: ALIGNAS LPAREN INT LBRACK RBRACK LPAREN INT COMMA ELLIPSIS XOR_ASSIGN
 ##
-## Ends in an error in state: 251.
+## Ends in an error in state: 259.
 ##
 ## direct_abstract_declarator -> direct_abstract_declarator LPAREN option(context_parameter_type_list) . RPAREN [ RPAREN LPAREN LBRACK COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## direct_abstract_declarator LPAREN option(context_parameter_type_list) 
+## direct_abstract_declarator LPAREN option(context_parameter_type_list)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT COMMA ELLIPSIS XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT COMMA ELLIPSIS XOR_ASSIGN
 ##
-## Ends in an error in state: 285.
+## Ends in an error in state: 293.
 ##
 ## direct_declarator -> direct_declarator LPAREN context_parameter_type_list . RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK LBRACE INT INLINE FLOAT EXTERN EQ ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## direct_declarator LPAREN context_parameter_type_list 
+## direct_declarator LPAREN context_parameter_type_list
 ##
 
 # Unlikely error, since only the ELLIPSIS allows us to tell that
@@ -469,20 +469,20 @@ At this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN INT LPAREN LPAREN RPAREN COMMA 
+translation_unit_file: ALIGNAS LPAREN INT LPAREN LPAREN RPAREN COMMA
 ##
-## Ends in an error in state: 266.
+## Ends in an error in state: 274.
 ##
 ## direct_abstract_declarator -> LPAREN save_context abstract_declarator(type_name) . RPAREN [ RPAREN LPAREN LBRACK COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN save_context abstract_declarator(type_name) 
+## LPAREN save_context abstract_declarator(type_name)
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 259, spurious reduction of production abstract_declarator(type_name) -> direct_abstract_declarator 
+## In state 267, spurious reduction of production abstract_declarator(type_name) -> direct_abstract_declarator 
 ##
 #
 # The first LPAREN in this example must be the beginning of an abstract_declarator.
@@ -511,15 +511,15 @@ At this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN INT LPAREN XOR_ASSIGN 
+translation_unit_file: ALIGNAS LPAREN INT LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 307.
+## Ends in an error in state: 315.
 ##
 ## direct_abstract_declarator -> LPAREN . save_context abstract_declarator(type_name) RPAREN [ RPAREN LPAREN LBRACK COMMA ]
 ## direct_abstract_declarator -> LPAREN . option(context_parameter_type_list) RPAREN [ RPAREN LPAREN LBRACK COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN 
+## LPAREN
 ##
 
 # gcc and clang both say they want a closing parenthesis.
@@ -534,16 +534,16 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 145.
+## Ends in an error in state: 242.
 ##
 ## direct_abstract_declarator -> LPAREN . save_context abstract_declarator(type_name) RPAREN [ RPAREN LPAREN LBRACK COMMA ]
 ## direct_abstract_declarator -> LPAREN . option(context_parameter_type_list) RPAREN [ RPAREN LPAREN LBRACK COMMA ]
 ## direct_declarator -> LPAREN . save_context declarator RPAREN [ RPAREN PACKED LPAREN LBRACK COMMA ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN 
+## LPAREN
 ##
 
 # Analogous to the above, but has a third item.
@@ -557,16 +557,16 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN VOLATILE ADD_ASSIGN 
+translation_unit_file: ALIGNAS LPAREN VOLATILE ADD_ASSIGN
 ##
-## Ends in an error in state: 299.
+## Ends in an error in state: 307.
 ##
 ## option(type_qualifier_list) -> type_qualifier_list . [ VOLATILE RESTRICT PACKED CONST ATTRIBUTE ALIGNAS ]
 ## specifier_qualifier_list(type_name) -> type_qualifier_list . typedef_name option(type_qualifier_list) [ STAR RPAREN LPAREN LBRACK COMMA ]
 ## specifier_qualifier_list(type_name) -> type_qualifier_list . type_specifier_no_typedef_name list(specifier_qualifier_no_typedef_name) [ STAR RPAREN LPAREN LBRACK COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## type_qualifier_list 
+## type_qualifier_list
 ##
 
 # We are trying to recognize a specifier-qualifier-list, and have not yet seen
@@ -582,7 +582,7 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN XOR_ASSIGN 
+translation_unit_file: ALIGNAS LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 61.
 ##
@@ -590,7 +590,7 @@ translation_unit_file: ALIGNAS LPAREN XOR_ASSIGN
 ## attribute_specifier -> ALIGNAS LPAREN . type_name RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ALIGNAS LPAREN 
+## ALIGNAS LPAREN
 ##
 
 # This one seems easy. We have recognized ALIGNAS LPAREN, and nothing that makes sense beyond that.
@@ -604,7 +604,7 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS XOR_ASSIGN 
+translation_unit_file: ALIGNAS XOR_ASSIGN
 ##
 ## Ends in an error in state: 60.
 ##
@@ -612,7 +612,7 @@ translation_unit_file: ALIGNAS XOR_ASSIGN
 ## attribute_specifier -> ALIGNAS . LPAREN type_name RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ALIGNAS 
+## ALIGNAS
 ##
 
 # Fingers in the nose.
@@ -622,14 +622,14 @@ At this point, an opening parenthesis '(' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN LPAREN COMMA XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE LPAREN LPAREN COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 345.
+## Ends in an error in state: 353.
 ##
 ## gcc_attribute_list -> gcc_attribute_list COMMA . gcc_attribute [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## gcc_attribute_list COMMA 
+## gcc_attribute_list COMMA
 ##
 
 # We are expecting a gcc_attribute. This symbol is nullable, so
@@ -644,14 +644,14 @@ At this point, a gcc attribute is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN LPAREN RPAREN XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE LPAREN LPAREN RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 343.
+## Ends in an error in state: 351.
 ##
 ## attribute_specifier -> ATTRIBUTE LPAREN LPAREN gcc_attribute_list RPAREN . RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ATTRIBUTE LPAREN LPAREN gcc_attribute_list RPAREN 
+## ATTRIBUTE LPAREN LPAREN gcc_attribute_list RPAREN
 ##
 
 Ill-formed attribute specifier.
@@ -659,15 +659,15 @@ At this point, a second closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN RPAREN XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 342.
+## Ends in an error in state: 350.
 ##
 ## attribute_specifier -> ATTRIBUTE LPAREN LPAREN gcc_attribute_list . RPAREN RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ## gcc_attribute_list -> gcc_attribute_list . COMMA gcc_attribute [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## ATTRIBUTE LPAREN LPAREN gcc_attribute_list 
+## ATTRIBUTE LPAREN LPAREN gcc_attribute_list
 ##
 
 # We have a seen a (non-empty) attribute list, so we expect either
@@ -682,35 +682,35 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN PRE_NAME TYPEDEF_NAME COMMA PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN PRE_NAME TYPEDEF_NAME COMMA PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 338.
+## Ends in an error in state: 346.
 ##
 ## argument_expression_list -> argument_expression_list . COMMA assignment_expression [ RPAREN COMMA ]
 ## gcc_attribute -> gcc_attribute_word LPAREN typedef_name COMMA argument_expression_list . RPAREN [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## gcc_attribute_word LPAREN typedef_name COMMA argument_expression_list 
+## gcc_attribute_word LPAREN typedef_name COMMA argument_expression_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 230, spurious reduction of production argument_expression_list -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 155, spurious reduction of production argument_expression_list -> assignment_expression 
 ##
 
 # We know for sure that we are parsing a gcc attribute.
@@ -726,14 +726,14 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN PRE_NAME TYPEDEF_NAME COMMA XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN PRE_NAME TYPEDEF_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 337.
+## Ends in an error in state: 345.
 ##
 ## gcc_attribute -> gcc_attribute_word LPAREN typedef_name COMMA . argument_expression_list RPAREN [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## gcc_attribute_word LPAREN typedef_name COMMA 
+## gcc_attribute_word LPAREN typedef_name COMMA
 ##
 
 # gcc/clang agree.
@@ -743,14 +743,14 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 336.
+## Ends in an error in state: 344.
 ##
 ## gcc_attribute -> gcc_attribute_word LPAREN typedef_name . COMMA argument_expression_list RPAREN [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## gcc_attribute_word LPAREN typedef_name 
+## gcc_attribute_word LPAREN typedef_name
 ##
 
 # gcc and clang complain about the TYPEDEF_NAME, not sure why.
@@ -760,7 +760,7 @@ At this point, a comma ',' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 47.
 ##
@@ -768,7 +768,7 @@ translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME LPAREN XOR_ASSI
 ## gcc_attribute -> gcc_attribute_word LPAREN . typedef_name COMMA argument_expression_list RPAREN [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## gcc_attribute_word LPAREN 
+## gcc_attribute_word LPAREN
 ##
 
 # gcc and clang just say they expect an expression.
@@ -780,7 +780,7 @@ At this point, a list of expressions is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME XOR_ASSIGN
 ##
 ## Ends in an error in state: 46.
 ##
@@ -789,7 +789,7 @@ translation_unit_file: ATTRIBUTE LPAREN LPAREN PRE_NAME VAR_NAME XOR_ASSIGN
 ## gcc_attribute -> gcc_attribute_word . LPAREN typedef_name COMMA argument_expression_list RPAREN [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## gcc_attribute_word 
+## gcc_attribute_word
 ##
 
 # gcc and clang say they expect a closing parenthesis (as usual).
@@ -806,14 +806,14 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN LPAREN XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE LPAREN LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 39.
 ##
 ## attribute_specifier -> ATTRIBUTE LPAREN LPAREN . gcc_attribute_list RPAREN RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ATTRIBUTE LPAREN LPAREN 
+## ATTRIBUTE LPAREN LPAREN
 ##
 
 # A non-empty attribute list is expected.
@@ -828,14 +828,14 @@ At this point, a gcc attribute is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE LPAREN XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 38.
 ##
 ## attribute_specifier -> ATTRIBUTE LPAREN . LPAREN gcc_attribute_list RPAREN RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ATTRIBUTE LPAREN 
+## ATTRIBUTE LPAREN
 ##
 
 Ill-formed gcc attribute specifier.
@@ -843,14 +843,14 @@ At this point, a second opening parenthesis '(' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ATTRIBUTE XOR_ASSIGN 
+translation_unit_file: ATTRIBUTE XOR_ASSIGN
 ##
 ## Ends in an error in state: 37.
 ##
 ## attribute_specifier -> ATTRIBUTE . LPAREN LPAREN gcc_attribute_list RPAREN RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ATTRIBUTE 
+## ATTRIBUTE
 ##
 
 Ill-formed gcc attribute specifier.
@@ -858,15 +858,15 @@ At this point, two opening parentheses '((' are expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ENUM LBRACE PRE_NAME VAR_NAME COMMA XOR_ASSIGN 
+translation_unit_file: ENUM LBRACE PRE_NAME VAR_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 353.
+## Ends in an error in state: 361.
 ##
 ## enumerator_list -> enumerator_list COMMA . declare_varname(enumerator) [ RBRACE COMMA ]
 ## option(COMMA) -> COMMA . [ RBRACE ]
 ##
 ## The known suffix of the stack is as follows:
-## enumerator_list COMMA 
+## enumerator_list COMMA
 ##
 
 # We omit the possibility of a closing brace.
@@ -879,36 +879,36 @@ At this point, an enumerator is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ENUM LBRACE PRE_NAME VAR_NAME EQ CONSTANT SEMICOLON 
+translation_unit_file: ENUM LBRACE PRE_NAME VAR_NAME EQ CONSTANT SEMICOLON
 ##
-## Ends in an error in state: 352.
+## Ends in an error in state: 360.
 ##
 ## enum_specifier -> ENUM attribute_specifier_list option(other_identifier) LBRACE enumerator_list . option(COMMA) RBRACE [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK INT INLINE FLOAT EXTERN ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ## enumerator_list -> enumerator_list . COMMA declare_varname(enumerator) [ RBRACE COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## ENUM attribute_specifier_list option(other_identifier) LBRACE enumerator_list 
+## ENUM attribute_specifier_list option(other_identifier) LBRACE enumerator_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 154, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 357, spurious reduction of production enumerator -> enumeration_constant EQ conditional_expression 
-## In state 354, spurious reduction of production declare_varname(enumerator) -> enumerator 
-## In state 361, spurious reduction of production enumerator_list -> declare_varname(enumerator) 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 79, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 365, spurious reduction of production enumerator -> enumeration_constant EQ conditional_expression 
+## In state 362, spurious reduction of production declare_varname(enumerator) -> enumerator 
+## In state 369, spurious reduction of production enumerator_list -> declare_varname(enumerator) 
 ##
 #
 # At first sight, it seems that the last enumerator that we have recognized
@@ -937,14 +937,14 @@ then at this point, a closing brace '}' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ENUM LBRACE PRE_NAME VAR_NAME EQ XOR_ASSIGN 
+translation_unit_file: ENUM LBRACE PRE_NAME VAR_NAME EQ XOR_ASSIGN
 ##
-## Ends in an error in state: 356.
+## Ends in an error in state: 364.
 ##
 ## enumerator -> enumeration_constant EQ . conditional_expression [ RBRACE COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## enumeration_constant EQ 
+## enumeration_constant EQ
 ##
 
 Ill-formed enumeration specifier.
@@ -952,15 +952,15 @@ At this point, a constant expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ENUM LBRACE PRE_NAME VAR_NAME XOR_ASSIGN 
+translation_unit_file: ENUM LBRACE PRE_NAME VAR_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 355.
+## Ends in an error in state: 363.
 ##
 ## enumerator -> enumeration_constant . [ RBRACE COMMA ]
 ## enumerator -> enumeration_constant . EQ conditional_expression [ RBRACE COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## enumeration_constant 
+## enumeration_constant
 ##
 
 # Here, both clang and gcc give an incomplete diagnostic message.
@@ -973,14 +973,14 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ENUM LBRACE XOR_ASSIGN 
+translation_unit_file: ENUM LBRACE XOR_ASSIGN
 ##
-## Ends in an error in state: 350.
+## Ends in an error in state: 358.
 ##
 ## enum_specifier -> ENUM attribute_specifier_list option(other_identifier) LBRACE . enumerator_list option(COMMA) RBRACE [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK INT INLINE FLOAT EXTERN ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ENUM attribute_specifier_list option(other_identifier) LBRACE 
+## ENUM attribute_specifier_list option(other_identifier) LBRACE
 ##
 
 # gcc says it expects an identifier.
@@ -991,15 +991,15 @@ At this point, an enumerator is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ENUM XOR_ASSIGN 
+translation_unit_file: ENUM XOR_ASSIGN
 ##
-## Ends in an error in state: 348.
+## Ends in an error in state: 356.
 ##
 ## enum_specifier -> ENUM attribute_specifier_list . option(other_identifier) LBRACE enumerator_list option(COMMA) RBRACE [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK INT INLINE FLOAT EXTERN ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ## enum_specifier -> ENUM attribute_specifier_list . general_identifier [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK INT INLINE FLOAT EXTERN ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ENUM attribute_specifier_list 
+## ENUM attribute_specifier_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
@@ -1018,16 +1018,16 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ ALIGNOF LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ ALIGNOF LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 65.
 ##
 ## unary_expression -> ALIGNOF LPAREN . type_name RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## ALIGNOF LPAREN 
+## ALIGNOF LPAREN
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ SIZEOF LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ SIZEOF LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 28.
 ##
@@ -1036,7 +1036,7 @@ translation_unit_file: INT PRE_NAME VAR_NAME EQ SIZEOF LPAREN XOR_ASSIGN
 ## unary_expression -> SIZEOF LPAREN . type_name RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## SIZEOF LPAREN 
+## SIZEOF LPAREN
 ##
 
 # Tricky because we could be looking at the beginning of a compound
@@ -1051,14 +1051,14 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ ALIGNOF XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ ALIGNOF XOR_ASSIGN
 ##
 ## Ends in an error in state: 64.
 ##
 ## unary_expression -> ALIGNOF . LPAREN type_name RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## ALIGNOF 
+## ALIGNOF
 ##
 
 Ill-formed use of $0.
@@ -1067,7 +1067,7 @@ followed with a type name.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ SIZEOF XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ SIZEOF XOR_ASSIGN
 ##
 ## Ends in an error in state: 23.
 ##
@@ -1075,7 +1075,7 @@ translation_unit_file: INT PRE_NAME VAR_NAME EQ SIZEOF XOR_ASSIGN
 ## unary_expression -> SIZEOF . LPAREN type_name RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## SIZEOF 
+## SIZEOF
 ##
 
 # Let's not reveal that sizeof can be used without parentheses.
@@ -1089,33 +1089,33 @@ followed with an expression or a type name.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 331.
+## Ends in an error in state: 339.
 ##
 ## postfix_expression -> BUILTIN_VA_ARG LPAREN assignment_expression . COMMA type_name RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_VA_ARG LPAREN assignment_expression 
+## BUILTIN_VA_ARG LPAREN assignment_expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
 ##
 
 Ill-formed use of $2.
@@ -1126,14 +1126,14 @@ then at this point, a comma ',' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG LPAREN PRE_NAME VAR_NAME COMMA XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG LPAREN PRE_NAME VAR_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 332.
+## Ends in an error in state: 340.
 ##
 ## postfix_expression -> BUILTIN_VA_ARG LPAREN assignment_expression COMMA . type_name RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_VA_ARG LPAREN assignment_expression COMMA 
+## BUILTIN_VA_ARG LPAREN assignment_expression COMMA
 ##
 
 Ill-formed use of $3.
@@ -1141,14 +1141,14 @@ At this point, a type name is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 51.
 ##
 ## postfix_expression -> BUILTIN_VA_ARG LPAREN . assignment_expression COMMA type_name RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_VA_ARG LPAREN 
+## BUILTIN_VA_ARG LPAREN
 ##
 
 Ill-formed use of $1.
@@ -1156,14 +1156,14 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ BUILTIN_VA_ARG XOR_ASSIGN
 ##
 ## Ends in an error in state: 50.
 ##
 ## postfix_expression -> BUILTIN_VA_ARG . LPAREN assignment_expression COMMA type_name RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_VA_ARG 
+## BUILTIN_VA_ARG
 ##
 
 Ill-formed use of $0.
@@ -1171,23 +1171,23 @@ At this point, an opening parenthesis '(' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ DEC XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ DEC XOR_ASSIGN
 ##
 ## Ends in an error in state: 48.
 ##
 ## unary_expression -> DEC . unary_expression [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## DEC 
+## DEC
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ INC XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ INC XOR_ASSIGN
 ##
 ## Ends in an error in state: 33.
 ##
 ## unary_expression -> INC . unary_expression [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## INC 
+## INC
 ##
 
 Ill-formed expression.
@@ -1195,14 +1195,14 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ INC LPAREN INT RPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ INC LPAREN INT RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 364.
+## Ends in an error in state: 372.
 ##
 ## postfix_expression -> LPAREN type_name RPAREN . LBRACE initializer_list option(COMMA) RBRACE [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN type_name RPAREN 
+## LPAREN type_name RPAREN
 ##
 
 # Here, we seem to be certain that this must be the beginning of a
@@ -1226,7 +1226,7 @@ If this is intended to be the beginning of a cast expression,
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ INC LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ INC LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 34.
 ##
@@ -1234,7 +1234,7 @@ translation_unit_file: INT PRE_NAME VAR_NAME EQ INC LPAREN XOR_ASSIGN
 ## primary_expression -> LPAREN . expression RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN 
+## LPAREN
 ##
 
 # gcc and clang expect an expression.
@@ -1247,35 +1247,35 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 382.
+## Ends in an error in state: 390.
 ##
 ## expression -> expression . COMMA assignment_expression [ RPAREN COMMA ]
 ## primary_expression -> LPAREN expression . RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN expression 
+## LPAREN expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 # Since we are saying "if this expression is complete",
@@ -1291,36 +1291,36 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN INT RPAREN LBRACE PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN INT RPAREN LBRACE PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 379.
+## Ends in an error in state: 387.
 ##
 ## initializer_list -> initializer_list . COMMA option(designation) c_initializer [ RBRACE COMMA ]
 ## postfix_expression -> LPAREN type_name RPAREN LBRACE initializer_list . option(COMMA) RBRACE [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN type_name RPAREN LBRACE initializer_list 
+## LPAREN type_name RPAREN LBRACE initializer_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 372, spurious reduction of production c_initializer -> assignment_expression 
-## In state 378, spurious reduction of production initializer_list -> option(designation) c_initializer 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 380, spurious reduction of production c_initializer -> assignment_expression 
+## In state 386, spurious reduction of production initializer_list -> option(designation) c_initializer 
 ##
 
 # Let's ignore the fact that a comma can precede a closing brace.
@@ -1333,14 +1333,14 @@ then at this point, a closing brace '}' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN INT RPAREN LBRACE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN INT RPAREN LBRACE XOR_ASSIGN
 ##
-## Ends in an error in state: 365.
+## Ends in an error in state: 373.
 ##
 ## postfix_expression -> LPAREN type_name RPAREN LBRACE . initializer_list option(COMMA) RBRACE [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN type_name RPAREN LBRACE 
+## LPAREN type_name RPAREN LBRACE
 ##
 
 # gcc and clang say an expression is expected, which is incomplete.
@@ -1350,15 +1350,15 @@ At this point, an initializer is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN INT RPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN INT RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 386.
+## Ends in an error in state: 394.
 ##
 ## cast_expression -> LPAREN type_name RPAREN . cast_expression [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ## postfix_expression -> LPAREN type_name RPAREN . LBRACE initializer_list option(COMMA) RBRACE [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN type_name RPAREN 
+## LPAREN type_name RPAREN
 ##
 
 # clang and gcc expect an expression.
@@ -1372,7 +1372,7 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 30.
 ##
@@ -1381,7 +1381,7 @@ translation_unit_file: INT PRE_NAME VAR_NAME EQ LPAREN XOR_ASSIGN
 ## primary_expression -> LPAREN . expression RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN 
+## LPAREN
 ##
 
 # clang and gcc expect an expression.
@@ -1394,14 +1394,14 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ TILDE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ TILDE XOR_ASSIGN
 ##
-## Ends in an error in state: 153.
+## Ends in an error in state: 78.
 ##
 ## unary_expression -> unary_operator . cast_expression [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LEQ LEFT_ASSIGN LEFT HAT GT GEQ EQEQ EQ DIV_ASSIGN COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## unary_operator 
+## unary_operator
 ##
 
 # clang and gcc expect an expression.
@@ -1411,95 +1411,95 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME AND XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME AND XOR_ASSIGN
 ##
-## Ends in an error in state: 213.
+## Ends in an error in state: 138.
 ##
 ## and_expression -> and_expression AND . equality_expression [ SEMICOLON RPAREN RBRACK RBRACE QUESTION HAT COMMA COLON BARBAR BAR ANDAND AND ]
 ##
 ## The known suffix of the stack is as follows:
-## and_expression AND 
+## and_expression AND
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME ANDAND XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME ANDAND XOR_ASSIGN
 ##
-## Ends in an error in state: 202.
+## Ends in an error in state: 127.
 ##
 ## logical_and_expression -> logical_and_expression ANDAND . inclusive_or_expression [ SEMICOLON RPAREN RBRACK RBRACE QUESTION COMMA COLON BARBAR ANDAND ]
 ##
 ## The known suffix of the stack is as follows:
-## logical_and_expression ANDAND 
+## logical_and_expression ANDAND
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME BAR XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME BAR XOR_ASSIGN
 ##
-## Ends in an error in state: 204.
+## Ends in an error in state: 129.
 ##
 ## inclusive_or_expression -> inclusive_or_expression BAR . exclusive_or_expression [ SEMICOLON RPAREN RBRACK RBRACE QUESTION COMMA COLON BARBAR BAR ANDAND ]
 ##
 ## The known suffix of the stack is as follows:
-## inclusive_or_expression BAR 
+## inclusive_or_expression BAR
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME BARBAR XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME BARBAR XOR_ASSIGN
 ##
-## Ends in an error in state: 225.
+## Ends in an error in state: 150.
 ##
 ## logical_or_expression -> logical_or_expression BARBAR . logical_and_expression [ SEMICOLON RPAREN RBRACK RBRACE QUESTION COMMA COLON BARBAR ]
 ##
 ## The known suffix of the stack is as follows:
-## logical_or_expression BARBAR 
+## logical_or_expression BARBAR
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME HAT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME HAT XOR_ASSIGN
 ##
-## Ends in an error in state: 206.
+## Ends in an error in state: 131.
 ##
 ## exclusive_or_expression -> exclusive_or_expression HAT . and_expression [ SEMICOLON RPAREN RBRACK RBRACE QUESTION HAT COMMA COLON BARBAR BAR ANDAND ]
 ##
 ## The known suffix of the stack is as follows:
-## exclusive_or_expression HAT 
+## exclusive_or_expression HAT
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LT XOR_ASSIGN
 ##
-## Ends in an error in state: 196.
+## Ends in an error in state: 121.
 ##
 ## relational_expression -> relational_expression relational_operator . shift_expression [ SEMICOLON RPAREN RBRACK RBRACE QUESTION NEQ LT LEQ HAT GT GEQ EQEQ COMMA COLON BARBAR BAR ANDAND AND ]
 ##
 ## The known suffix of the stack is as follows:
-## relational_expression relational_operator 
+## relational_expression relational_operator
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME NEQ XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME NEQ XOR_ASSIGN
 ##
-## Ends in an error in state: 210.
+## Ends in an error in state: 135.
 ##
 ## equality_expression -> equality_expression equality_operator . relational_expression [ SEMICOLON RPAREN RBRACK RBRACE QUESTION NEQ HAT EQEQ COMMA COLON BARBAR BAR ANDAND AND ]
 ##
 ## The known suffix of the stack is as follows:
-## equality_expression equality_operator 
+## equality_expression equality_operator
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME PLUS XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME PLUS XOR_ASSIGN
 ##
-## Ends in an error in state: 189.
+## Ends in an error in state: 114.
 ##
 ## additive_expression -> additive_expression additive_operator . multiplicative_expression [ SEMICOLON RPAREN RIGHT RBRACK RBRACE QUESTION PLUS NEQ MINUS LT LEQ LEFT HAT GT GEQ EQEQ COMMA COLON BARBAR BAR ANDAND AND ]
 ##
 ## The known suffix of the stack is as follows:
-## additive_expression additive_operator 
+## additive_expression additive_operator
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME RIGHT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME RIGHT XOR_ASSIGN
 ##
-## Ends in an error in state: 178.
+## Ends in an error in state: 103.
 ##
 ## shift_expression -> shift_expression shift_operator . additive_expression [ SEMICOLON RPAREN RIGHT RBRACK RBRACE QUESTION NEQ LT LEQ LEFT HAT GT GEQ EQEQ COMMA COLON BARBAR BAR ANDAND AND ]
 ##
 ## The known suffix of the stack is as follows:
-## shift_expression shift_operator 
+## shift_expression shift_operator
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME STAR XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME STAR XOR_ASSIGN
 ##
-## Ends in an error in state: 183.
+## Ends in an error in state: 108.
 ##
 ## multiplicative_expression -> multiplicative_expression multiplicative_operator . cast_expression [ STAR SLASH SEMICOLON RPAREN RIGHT RBRACK RBRACE QUESTION PLUS PERCENT NEQ MINUS LT LEQ LEFT HAT GT GEQ EQEQ COMMA COLON BARBAR BAR ANDAND AND ]
 ##
 ## The known suffix of the stack is as follows:
-## multiplicative_expression multiplicative_operator 
+## multiplicative_expression multiplicative_operator
 ##
 
 # clang and gcc expect an expression.
@@ -1509,14 +1509,14 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME XOR_ASSIGN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME XOR_ASSIGN XOR_ASSIGN
 ##
-## Ends in an error in state: 174.
+## Ends in an error in state: 99.
 ##
 ## assignment_expression -> unary_expression assignment_operator . assignment_expression [ SEMICOLON RPAREN RBRACK RBRACE COMMA COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## unary_expression assignment_operator 
+## unary_expression assignment_operator
 ##
 
 # clang and gcc expect an expression.
@@ -1526,14 +1526,14 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME COMMA XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 232.
+## Ends in an error in state: 157.
 ##
 ## argument_expression_list -> argument_expression_list COMMA . assignment_expression [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## argument_expression_list COMMA 
+## argument_expression_list COMMA
 ##
 
 # Here, we could say more about the context if we parameterized
@@ -1547,23 +1547,23 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME DOT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME DOT XOR_ASSIGN
 ##
-## Ends in an error in state: 238.
+## Ends in an error in state: 163.
 ##
 ## postfix_expression -> postfix_expression DOT . general_identifier [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## postfix_expression DOT 
+## postfix_expression DOT
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME PTR XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME PTR XOR_ASSIGN
 ##
-## Ends in an error in state: 159.
+## Ends in an error in state: 84.
 ##
 ## postfix_expression -> postfix_expression PTR . general_identifier [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## postfix_expression PTR 
+## postfix_expression PTR
 ##
 
 # clang and gcc expect an identifier.
@@ -1573,35 +1573,35 @@ At this point, the name of a struct or union member is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LBRACK PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LBRACK PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 235.
+## Ends in an error in state: 160.
 ##
 ## expression -> expression . COMMA assignment_expression [ RBRACK COMMA ]
 ## postfix_expression -> postfix_expression LBRACK expression . RBRACK [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## postfix_expression LBRACK expression 
+## postfix_expression LBRACK expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 # We know for sure that an array subscript expression has begun, and
@@ -1618,14 +1618,14 @@ then at this point, a closing bracket ']' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LBRACK XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LBRACK XOR_ASSIGN
 ##
-## Ends in an error in state: 234.
+## Ends in an error in state: 159.
 ##
 ## postfix_expression -> postfix_expression LBRACK . expression RBRACK [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## postfix_expression LBRACK 
+## postfix_expression LBRACK
 ##
 
 Ill-formed expression.
@@ -1633,35 +1633,35 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 231.
+## Ends in an error in state: 156.
 ##
 ## argument_expression_list -> argument_expression_list . COMMA assignment_expression [ RPAREN COMMA ]
 ## option(argument_expression_list) -> argument_expression_list . [ RPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## argument_expression_list 
+## argument_expression_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 230, spurious reduction of production argument_expression_list -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 155, spurious reduction of production argument_expression_list -> assignment_expression 
 ##
 
 Up to this point, a list of expressions has been recognized:
@@ -1671,14 +1671,14 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 161.
+## Ends in an error in state: 86.
 ##
 ## postfix_expression -> postfix_expression LPAREN . option(argument_expression_list) RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## postfix_expression LPAREN 
+## postfix_expression LPAREN
 ##
 
 # gcc and clang expect an expression: this is incomplete.
@@ -1689,23 +1689,23 @@ followed with a closing parenthesis ')', is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME QUESTION PRE_NAME VAR_NAME COLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME QUESTION PRE_NAME VAR_NAME COLON XOR_ASSIGN
 ##
-## Ends in an error in state: 222.
+## Ends in an error in state: 147.
 ##
 ## conditional_expression -> logical_or_expression QUESTION expression COLON . conditional_expression [ SEMICOLON RPAREN RBRACK RBRACE COMMA COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## logical_or_expression QUESTION expression COLON 
+## logical_or_expression QUESTION expression COLON
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME QUESTION XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME QUESTION XOR_ASSIGN
 ##
-## Ends in an error in state: 200.
+## Ends in an error in state: 125.
 ##
 ## conditional_expression -> logical_or_expression QUESTION . expression COLON conditional_expression [ SEMICOLON RPAREN RBRACK RBRACE COMMA COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## logical_or_expression QUESTION 
+## logical_or_expression QUESTION
 ##
 
 Ill-formed conditional expression.
@@ -1713,35 +1713,35 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME QUESTION PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME VAR_NAME QUESTION PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 218.
+## Ends in an error in state: 143.
 ##
 ## conditional_expression -> logical_or_expression QUESTION expression . COLON conditional_expression [ SEMICOLON RPAREN RBRACK RBRACE COMMA COLON ]
 ## expression -> expression . COMMA assignment_expression [ COMMA COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## logical_or_expression QUESTION expression 
+## logical_or_expression QUESTION expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 # gcc and clang simply expect a colon.
@@ -1756,35 +1756,35 @@ then at this point, a colon ':' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: PACKED LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: PACKED LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 391.
+## Ends in an error in state: 399.
 ##
 ## argument_expression_list -> argument_expression_list . COMMA assignment_expression [ RPAREN COMMA ]
 ## attribute_specifier -> PACKED LPAREN argument_expression_list . RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## PACKED LPAREN argument_expression_list 
+## PACKED LPAREN argument_expression_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 230, spurious reduction of production argument_expression_list -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 155, spurious reduction of production argument_expression_list -> assignment_expression 
 ##
 
 Ill-formed $2 attribute.
@@ -1795,14 +1795,14 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: PACKED LPAREN XOR_ASSIGN 
+translation_unit_file: PACKED LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 19.
 ##
 ## attribute_specifier -> PACKED LPAREN . argument_expression_list RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## PACKED LPAREN 
+## PACKED LPAREN
 ##
 
 # clang expects a "parameter declarator" (?).
@@ -1814,14 +1814,14 @@ At this point, a list of expressions is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME TYPEDEF_NAME 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ PRE_NAME TYPEDEF_NAME
 ##
 ## Ends in an error in state: 24.
 ##
 ## primary_expression -> PRE_NAME . VAR_NAME [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## PRE_NAME 
+## PRE_NAME
 ##
 
 Ill-formed expression.
@@ -1830,14 +1830,14 @@ The following identifier is used as a variable, but has been defined as a type:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: PACKED XOR_ASSIGN 
+translation_unit_file: PACKED XOR_ASSIGN
 ##
 ## Ends in an error in state: 18.
 ##
 ## attribute_specifier -> PACKED . LPAREN argument_expression_list RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER RBRACK PRE_NAME PLUS PACKED NORETURN MINUS LPAREN LONG LBRACK LBRACE INT INLINE INC FLOAT EXTERN EQ ENUM DOUBLE DEC CONSTANT CONST COMMA COLON CHAR BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AUTO ATTRIBUTE AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## PACKED 
+## PACKED
 ##
 
 # This one seems important, since CompCert currently does not support __packed__
@@ -1850,15 +1850,15 @@ is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: XOR_ASSIGN 
+translation_unit_file: XOR_ASSIGN
 ##
 ## Ends in an error in state: 2.
 ##
-## list(translation_item) -> list(translation_item) . translation_item [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PRAGMA PACKED NORETURN LONG INT INLINE FLOAT EXTERN EOF ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
+## list(translation_item) -> list(translation_item) . translation_item [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC_ASSERT STATIC SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PRAGMA PACKED NORETURN LONG INT INLINE FLOAT EXTERN EOF ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ## translation_unit_file -> list(translation_item) . EOF [ # ]
 ##
 ## The known suffix of the stack is as follows:
-## list(translation_item) 
+## list(translation_item)
 ##
 
 # We are at the toplevel.
@@ -1873,9 +1873,9 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: TYPEDEF PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: TYPEDEF PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 394.
+## Ends in an error in state: 402.
 ##
 ## declaration_specifiers_typedef -> TYPEDEF list(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . storage_class_specifier_no_typedef [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
@@ -1884,11 +1884,11 @@ translation_unit_file: TYPEDEF PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## TYPEDEF list(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) 
+## TYPEDEF list(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type)
 ##
-translation_unit_file: PRE_NAME TYPEDEF_NAME TYPEDEF XOR_ASSIGN 
+translation_unit_file: PRE_NAME TYPEDEF_NAME TYPEDEF XOR_ASSIGN
 ##
-## Ends in an error in state: 403.
+## Ends in an error in state: 411.
 ##
 ## declaration_specifiers_typedef -> typedef_name list(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . storage_class_specifier_no_typedef [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
@@ -1897,11 +1897,11 @@ translation_unit_file: PRE_NAME TYPEDEF_NAME TYPEDEF XOR_ASSIGN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## typedef_name list(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) 
+## typedef_name list(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type)
 ##
-translation_unit_file: VOLATILE TYPEDEF PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: VOLATILE TYPEDEF PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 413.
+## Ends in an error in state: 422.
 ##
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . storage_class_specifier_no_typedef [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
@@ -1910,11 +1910,11 @@ translation_unit_file: VOLATILE TYPEDEF PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type)
 ##
-translation_unit_file: VOLATILE PRE_NAME TYPEDEF_NAME TYPEDEF XOR_ASSIGN 
+translation_unit_file: VOLATILE PRE_NAME TYPEDEF_NAME TYPEDEF XOR_ASSIGN
 ##
-## Ends in an error in state: 419.
+## Ends in an error in state: 428.
 ##
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . storage_class_specifier_no_typedef [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
@@ -1923,47 +1923,47 @@ translation_unit_file: VOLATILE PRE_NAME TYPEDEF_NAME TYPEDEF XOR_ASSIGN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type)
 ##
-translation_unit_file: TYPEDEF INT XOR_ASSIGN 
+translation_unit_file: TYPEDEF INT XOR_ASSIGN
 ##
-## Ends in an error in state: 396.
+## Ends in an error in state: 404.
 ##
 ## declaration_specifiers_typedef -> TYPEDEF list(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## TYPEDEF list(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## TYPEDEF list(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
-translation_unit_file: INT TYPEDEF XOR_ASSIGN 
+translation_unit_file: INT TYPEDEF XOR_ASSIGN
 ##
-## Ends in an error in state: 407.
+## Ends in an error in state: 415.
 ##
 ## declaration_specifiers_typedef -> type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) TYPEDEF list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) TYPEDEF list(declaration_specifier_no_typedef_name) 
+## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) TYPEDEF list(declaration_specifier_no_typedef_name)
 ##
-translation_unit_file: VOLATILE TYPEDEF INT XOR_ASSIGN 
+translation_unit_file: VOLATILE TYPEDEF INT XOR_ASSIGN
 ##
-## Ends in an error in state: 415.
+## Ends in an error in state: 424.
 ##
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
-translation_unit_file: VOLATILE INT TYPEDEF XOR_ASSIGN 
+translation_unit_file: VOLATILE INT TYPEDEF XOR_ASSIGN
 ##
-## Ends in an error in state: 423.
+## Ends in an error in state: 432.
 ##
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) TYPEDEF list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) TYPEDEF list(declaration_specifier_no_typedef_name) 
+## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) TYPEDEF list(declaration_specifier_no_typedef_name)
 ##
 
 # We have begun a type definition (a.k.a. declaration_specifiers_typedef).
@@ -1994,7 +1994,7 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: TYPEDEF XOR_ASSIGN 
+translation_unit_file: TYPEDEF XOR_ASSIGN
 ##
 ## Ends in an error in state: 9.
 ##
@@ -2006,11 +2006,11 @@ translation_unit_file: TYPEDEF XOR_ASSIGN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT RESTRICT REGISTER PRE_NAME PACKED NORETURN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## TYPEDEF list(declaration_specifier_no_type) 
+## TYPEDEF list(declaration_specifier_no_type)
 ##
-translation_unit_file: VOLATILE TYPEDEF XOR_ASSIGN 
+translation_unit_file: VOLATILE TYPEDEF XOR_ASSIGN
 ##
-## Ends in an error in state: 411.
+## Ends in an error in state: 420.
 ##
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) . typedef_name list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) . type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
@@ -2020,7 +2020,7 @@ translation_unit_file: VOLATILE TYPEDEF XOR_ASSIGN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT RESTRICT REGISTER PRE_NAME PACKED NORETURN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type)
 ##
 
 # We have seen the TYPEDEF keyword, and possibly some declaration_specifiers_no_type.
@@ -2042,21 +2042,21 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN VOLATILE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN VOLATILE XOR_ASSIGN
 ##
-## Ends in an error in state: 133.
+## Ends in an error in state: 230.
 ##
 ## declaration_specifiers(parameter_declaration) -> rlist(declaration_specifier_no_type) . typedef_name list(declaration_specifier_no_type) [ STAR RPAREN PRE_NAME LPAREN LBRACK COMMA ]
 ## declaration_specifiers(parameter_declaration) -> rlist(declaration_specifier_no_type) . type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) [ STAR RPAREN PRE_NAME LPAREN LBRACK COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type)
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 125, spurious reduction of production rlist(declaration_specifier_no_type) -> type_qualifier_noattr 
+## In state 222, spurious reduction of production rlist(declaration_specifier_no_type) -> type_qualifier_noattr 
 ##
 
 # Analogous to the above, except we are in the context of a parameter declaration,
@@ -2078,9 +2078,9 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 401.
+## Ends in an error in state: 409.
 ##
 ## declaration_specifiers(declaration(external_declaration)) -> typedef_name list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> typedef_name list(declaration_specifier_no_type) . TYPEDEF list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
@@ -2090,11 +2090,11 @@ translation_unit_file: PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE TYPEDEF STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## typedef_name list(declaration_specifier_no_type) 
+## typedef_name list(declaration_specifier_no_type)
 ##
-translation_unit_file: VOLATILE PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: VOLATILE PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 417.
+## Ends in an error in state: 426.
 ##
 ## declaration_specifiers(declaration(external_declaration)) -> rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) . TYPEDEF list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
@@ -2104,29 +2104,29 @@ translation_unit_file: VOLATILE PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE TYPEDEF STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type)
 ##
-translation_unit_file: INT XOR_ASSIGN 
+translation_unit_file: INT XOR_ASSIGN
 ##
-## Ends in an error in state: 405.
+## Ends in an error in state: 413.
 ##
 ## declaration_specifiers(declaration(external_declaration)) -> type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . TYPEDEF list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
-translation_unit_file: VOLATILE INT XOR_ASSIGN 
+translation_unit_file: VOLATILE INT XOR_ASSIGN
 ##
-## Ends in an error in state: 421.
+## Ends in an error in state: 430.
 ##
 ## declaration_specifiers(declaration(external_declaration)) -> rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . TYPEDEF list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
 
 # We have seen a TYPEDEF_NAME or a primitive type specifier,
@@ -2178,9 +2178,9 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 112.
+## Ends in an error in state: 209.
 ##
 ## declaration_specifiers(parameter_declaration) -> typedef_name list(declaration_specifier_no_type) . [ STAR RPAREN PRE_NAME LPAREN LBRACK COMMA ]
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . storage_class_specifier_no_typedef [ VOLATILE STATIC STAR RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LBRACK INLINE EXTERN CONST COMMA AUTO ATTRIBUTE ALIGNAS ]
@@ -2189,11 +2189,11 @@ translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME TYPEDEF_NAME XOR_AS
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE STATIC STAR RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LBRACK INLINE EXTERN CONST COMMA AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## typedef_name list(declaration_specifier_no_type) 
+## typedef_name list(declaration_specifier_no_type)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN VOLATILE PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN VOLATILE PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 135.
+## Ends in an error in state: 232.
 ##
 ## declaration_specifiers(parameter_declaration) -> rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) . [ STAR RPAREN PRE_NAME LPAREN LBRACK COMMA ]
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . storage_class_specifier_no_typedef [ VOLATILE STATIC STAR RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LBRACK INLINE EXTERN CONST COMMA AUTO ATTRIBUTE ALIGNAS ]
@@ -2202,27 +2202,27 @@ translation_unit_file: INT PRE_NAME VAR_NAME LPAREN VOLATILE PRE_NAME TYPEDEF_NA
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE STATIC STAR RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LBRACK INLINE EXTERN CONST COMMA AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT XOR_ASSIGN
 ##
-## Ends in an error in state: 118.
+## Ends in an error in state: 215.
 ##
 ## declaration_specifiers(parameter_declaration) -> type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR RPAREN PRE_NAME LPAREN LBRACK COMMA ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC STAR SIGNED SHORT RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK INT INLINE FLOAT EXTERN ENUM DOUBLE CONST COMMA CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN VOLATILE INT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN VOLATILE INT XOR_ASSIGN
 ##
-## Ends in an error in state: 137.
+## Ends in an error in state: 234.
 ##
 ## declaration_specifiers(parameter_declaration) -> rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR RPAREN PRE_NAME LPAREN LBRACK COMMA ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC STAR SIGNED SHORT RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK INT INLINE FLOAT EXTERN ENUM DOUBLE CONST COMMA CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
 
 # Analogous to the above situation, except this time, we are in the
@@ -2263,9 +2263,9 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: VOLATILE XOR_ASSIGN 
+translation_unit_file: VOLATILE XOR_ASSIGN
 ##
-## Ends in an error in state: 409.
+## Ends in an error in state: 418.
 ##
 ## declaration_specifiers(declaration(external_declaration)) -> rlist(declaration_specifier_no_type) . typedef_name list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers(declaration(external_declaration)) -> rlist(declaration_specifier_no_type) . type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
@@ -2275,13 +2275,13 @@ translation_unit_file: VOLATILE XOR_ASSIGN
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) . type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) TYPEDEF list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type)
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 125, spurious reduction of production rlist(declaration_specifier_no_type) -> type_qualifier_noattr 
+## In state 222, spurious reduction of production rlist(declaration_specifier_no_type) -> type_qualifier_noattr 
 ##
 
 # We have seen some specifiers or qualifiers. We have probably seen at least
@@ -2308,9 +2308,9 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE VOLATILE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE VOLATILE XOR_ASSIGN
 ##
-## Ends in an error in state: 518.
+## Ends in an error in state: 528.
 ##
 ## declaration_specifiers(declaration(block_item)) -> rlist(declaration_specifier_no_type) . typedef_name list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers(declaration(block_item)) -> rlist(declaration_specifier_no_type) . type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
@@ -2320,13 +2320,13 @@ translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) . type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) TYPEDEF list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type)
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 125, spurious reduction of production rlist(declaration_specifier_no_type) -> type_qualifier_noattr 
+## In state 222, spurious reduction of production rlist(declaration_specifier_no_type) -> type_qualifier_noattr 
 ##
 # Identical to the previous one, except we are not at the top level,
 # so we know this cannot be the beginning of a function definition.
@@ -2339,9 +2339,9 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE PRE_NAME TYPEDEF_NAME VOLATILE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE PRE_NAME TYPEDEF_NAME VOLATILE XOR_ASSIGN
 ##
-## Ends in an error in state: 515.
+## Ends in an error in state: 524.
 ##
 ## declaration_specifiers(declaration(block_item)) -> typedef_name list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> typedef_name list(declaration_specifier_no_type) . TYPEDEF list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
@@ -2351,11 +2351,11 @@ translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE TYPEDEF STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## typedef_name list(declaration_specifier_no_type) 
+## typedef_name list(declaration_specifier_no_type)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE VOLATILE PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE VOLATILE PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 520.
+## Ends in an error in state: 530.
 ##
 ## declaration_specifiers(declaration(block_item)) -> rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) . TYPEDEF list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
@@ -2365,29 +2365,29 @@ translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE TYPEDEF STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE INT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE INT XOR_ASSIGN
 ##
-## Ends in an error in state: 517.
+## Ends in an error in state: 526.
 ##
 ## declaration_specifiers(declaration(block_item)) -> type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . TYPEDEF list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE VOLATILE INT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE VOLATILE INT XOR_ASSIGN
 ##
-## Ends in an error in state: 522.
+## Ends in an error in state: 532.
 ##
 ## declaration_specifiers(declaration(block_item)) -> rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . TYPEDEF list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
 
 # This is analogous to the error sentence TYPEDEF_NAME VOLATILE XOR_ASSIGN,
@@ -2418,20 +2418,20 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: UNION LBRACE PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: UNION LBRACE PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 92.
+## Ends in an error in state: 189.
 ##
-## struct_declaration -> specifier_qualifier_list(struct_declaration) . option(struct_declarator_list) SEMICOLON [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT SIGNED SHORT RESTRICT RBRACE PRE_NAME PACKED LONG INT FLOAT ENUM DOUBLE CONST CHAR ATTRIBUTE ALIGNAS ]
+## struct_declaration -> specifier_qualifier_list(struct_declaration) . option(struct_declarator_list) SEMICOLON [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC_ASSERT SIGNED SHORT RESTRICT RBRACE PRE_NAME PACKED LONG INT FLOAT ENUM DOUBLE CONST CHAR ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## specifier_qualifier_list(struct_declaration) 
+## specifier_qualifier_list(struct_declaration)
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 78, spurious reduction of production specifier_qualifier_list(struct_declaration) -> typedef_name option(type_qualifier_list) 
+## In state 174, spurious reduction of production specifier_qualifier_list(struct_declaration) -> typedef_name option(type_qualifier_list) 
 ##
 
 # We have (spuriously) recognized a specifier_qualifier_list,
@@ -2463,35 +2463,35 @@ at this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: UNION LBRACE LONG COLON CONSTANT RPAREN 
+translation_unit_file: UNION LBRACE LONG COLON CONSTANT RPAREN
 ##
-## Ends in an error in state: 287.
+## Ends in an error in state: 295.
 ##
 ## option(struct_declarator_list) -> struct_declarator_list . [ SEMICOLON ]
 ## struct_declarator_list -> struct_declarator_list . COMMA struct_declarator [ SEMICOLON COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## struct_declarator_list 
+## struct_declarator_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 154, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 292, spurious reduction of production struct_declarator -> option(declarator) COLON conditional_expression 
-## In state 294, spurious reduction of production struct_declarator_list -> struct_declarator 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 79, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 300, spurious reduction of production struct_declarator -> option(declarator) COLON conditional_expression 
+## In state 302, spurious reduction of production struct_declarator_list -> struct_declarator 
 ##
 
 # We have seen a non-empty struct_declarator_list.
@@ -2507,14 +2507,14 @@ then at this point, a semicolon ';' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: UNION LBRACE INT COLON XOR_ASSIGN 
+translation_unit_file: UNION LBRACE INT COLON XOR_ASSIGN
 ##
-## Ends in an error in state: 291.
+## Ends in an error in state: 299.
 ##
 ## struct_declarator -> option(declarator) COLON . conditional_expression [ SEMICOLON COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## option(declarator) COLON 
+## option(declarator) COLON
 ##
 
 Ill-formed struct declarator.
@@ -2522,14 +2522,14 @@ At this point, a constant expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: UNION LBRACE INT PRE_NAME VAR_NAME COMMA XOR_ASSIGN 
+translation_unit_file: UNION LBRACE INT PRE_NAME VAR_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 288.
+## Ends in an error in state: 296.
 ##
 ## struct_declarator_list -> struct_declarator_list COMMA . struct_declarator [ SEMICOLON COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## struct_declarator_list COMMA 
+## struct_declarator_list COMMA
 ##
 
 Ill-formed struct declaration.
@@ -2537,23 +2537,23 @@ At this point, a struct declarator is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: UNION LBRACE INT PRE_NAME VAR_NAME RPAREN 
+translation_unit_file: UNION LBRACE INT PRE_NAME VAR_NAME RPAREN
 ##
-## Ends in an error in state: 293.
+## Ends in an error in state: 301.
 ##
 ## option(declarator) -> declarator . [ COLON ]
 ## struct_declarator -> declarator . [ SEMICOLON COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## declarator 
+## declarator
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 255, spurious reduction of production declarator_noattrend -> direct_declarator 
-## In state 260, spurious reduction of production attribute_specifier_list -> 
-## In state 261, spurious reduction of production declarator -> declarator_noattrend attribute_specifier_list 
+## In state 263, spurious reduction of production declarator_noattrend -> direct_declarator 
+## In state 268, spurious reduction of production attribute_specifier_list -> 
+## In state 269, spurious reduction of production declarator -> declarator_noattrend attribute_specifier_list 
 ##
 
 # Assuming the declarator so far is complete, we expect
@@ -2574,16 +2574,16 @@ then at this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: UNION LBRACE VOLATILE ADD_ASSIGN 
+translation_unit_file: UNION LBRACE VOLATILE ADD_ASSIGN
 ##
-## Ends in an error in state: 86.
+## Ends in an error in state: 182.
 ##
 ## option(type_qualifier_list) -> type_qualifier_list . [ VOLATILE RESTRICT PACKED CONST ATTRIBUTE ALIGNAS ]
 ## specifier_qualifier_list(struct_declaration) -> type_qualifier_list . typedef_name option(type_qualifier_list) [ STAR SEMICOLON PRE_NAME LPAREN COLON ]
 ## specifier_qualifier_list(struct_declaration) -> type_qualifier_list . type_specifier_no_typedef_name list(specifier_qualifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## type_qualifier_list 
+## type_qualifier_list
 ##
 
 # A list of qualifiers has been read.
@@ -2597,15 +2597,15 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: UNION LBRACE XOR_ASSIGN 
+translation_unit_file: UNION LBRACE XOR_ASSIGN
 ##
 ## Ends in an error in state: 75.
 ##
-## struct_declaration_list -> struct_declaration_list . struct_declaration [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT SIGNED SHORT RESTRICT RBRACE PRE_NAME PACKED LONG INT FLOAT ENUM DOUBLE CONST CHAR ATTRIBUTE ALIGNAS ]
+## struct_declaration_list -> struct_declaration_list . struct_declaration [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC_ASSERT SIGNED SHORT RESTRICT RBRACE PRE_NAME PACKED LONG INT FLOAT ENUM DOUBLE CONST CHAR ATTRIBUTE ALIGNAS ]
 ## struct_or_union_specifier -> struct_or_union attribute_specifier_list option(other_identifier) LBRACE struct_declaration_list . RBRACE [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK INT INLINE FLOAT EXTERN ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## struct_or_union attribute_specifier_list option(other_identifier) LBRACE struct_declaration_list 
+## struct_or_union attribute_specifier_list option(other_identifier) LBRACE struct_declaration_list
 ##
 
 # gcc and clang do not seem prepared to accept a struct or union with
@@ -2617,7 +2617,7 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: UNION XOR_ASSIGN 
+translation_unit_file: UNION XOR_ASSIGN
 ##
 ## Ends in an error in state: 72.
 ##
@@ -2625,7 +2625,7 @@ translation_unit_file: UNION XOR_ASSIGN
 ## struct_or_union_specifier -> struct_or_union attribute_specifier_list . general_identifier [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF STRUCT STATIC STAR SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK INT INLINE FLOAT EXTERN ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## struct_or_union attribute_specifier_list 
+## struct_or_union attribute_specifier_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
@@ -2646,22 +2646,22 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 264.
+## Ends in an error in state: 272.
 ##
 ## direct_declarator -> LPAREN save_context declarator . RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK LBRACE INT INLINE FLOAT EXTERN EQ ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN save_context declarator 
+## LPAREN save_context declarator
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 255, spurious reduction of production declarator_noattrend -> direct_declarator 
-## In state 260, spurious reduction of production attribute_specifier_list -> 
-## In state 261, spurious reduction of production declarator -> declarator_noattrend attribute_specifier_list 
+## In state 263, spurious reduction of production declarator_noattrend -> direct_declarator 
+## In state 268, spurious reduction of production attribute_specifier_list -> 
+## In state 269, spurious reduction of production declarator -> declarator_noattrend attribute_specifier_list 
 ##
 
 Up to this point, a declarator has been recognized:
@@ -2671,14 +2671,14 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT LPAREN XOR_ASSIGN 
+translation_unit_file: INT LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 98.
+## Ends in an error in state: 195.
 ##
 ## direct_declarator -> LPAREN save_context . declarator RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK LBRACE INT INLINE FLOAT EXTERN EQ ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## LPAREN save_context 
+## LPAREN save_context
 ##
 
 # clang and gcc expect identifier or '(', as usual.
@@ -2688,15 +2688,15 @@ At this point, a declarator is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 281.
+## Ends in an error in state: 289.
 ##
 ## identifier_list -> identifier_list . COMMA PRE_NAME VAR_NAME [ RPAREN COMMA ]
 ## option(identifier_list) -> identifier_list . [ RPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## identifier_list 
+## identifier_list
 ##
 
 Ill-formed K&R function definition.
@@ -2707,15 +2707,15 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 104.
+## Ends in an error in state: 201.
 ##
 ## context_parameter_type_list -> save_context . parameter_type_list save_context [ RPAREN ]
 ## direct_declarator -> direct_declarator LPAREN save_context . option(identifier_list) RPAREN [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK LBRACE INT INLINE FLOAT EXTERN EQ ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## direct_declarator LPAREN save_context 
+## direct_declarator LPAREN save_context
 ##
 
 # Ignore K&R syntax, just request ANSI syntax.
@@ -2731,9 +2731,9 @@ followed with a closing parenthesis ')', is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT STAR RPAREN 
+translation_unit_file: INT STAR RPAREN
 ##
-## Ends in an error in state: 101.
+## Ends in an error in state: 198.
 ##
 ## declarator_noattrend -> list(pointer1) STAR option(type_qualifier_list) . direct_declarator [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LONG LBRACE INT INLINE FLOAT EXTERN EQ ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ## list(pointer1) -> list(pointer1) STAR option(type_qualifier_list) . [ STAR ]
@@ -2741,7 +2741,7 @@ translation_unit_file: INT STAR RPAREN
 ## type_qualifier_list -> option(type_qualifier_list) . attribute_specifier [ VOLATILE STAR RESTRICT PRE_NAME PACKED LPAREN CONST ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## list(pointer1) STAR option(type_qualifier_list) 
+## list(pointer1) STAR option(type_qualifier_list)
 ##
 
 # If the pointer isn't finished, we expect
@@ -2764,26 +2764,26 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: TYPEDEF INT PRE_NAME VAR_NAME XOR_ASSIGN 
+translation_unit_file: TYPEDEF INT PRE_NAME VAR_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 534.
+## Ends in an error in state: 544.
 ##
 ## option(typedef_declarator_list) -> typedef_declarator_list . [ SEMICOLON ]
 ## typedef_declarator_list -> typedef_declarator_list . COMMA typedef_declarator [ SEMICOLON COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## typedef_declarator_list 
+## typedef_declarator_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 255, spurious reduction of production declarator_noattrend -> direct_declarator 
-## In state 260, spurious reduction of production attribute_specifier_list -> 
-## In state 261, spurious reduction of production declarator -> declarator_noattrend attribute_specifier_list 
-## In state 538, spurious reduction of production declare_typename(declarator) -> declarator 
-## In state 537, spurious reduction of production typedef_declarator -> declare_typename(declarator) 
-## In state 539, spurious reduction of production typedef_declarator_list -> typedef_declarator 
+## In state 263, spurious reduction of production declarator_noattrend -> direct_declarator 
+## In state 268, spurious reduction of production attribute_specifier_list -> 
+## In state 269, spurious reduction of production declarator -> declarator_noattrend attribute_specifier_list 
+## In state 548, spurious reduction of production declare_typename(declarator) -> declarator 
+## In state 547, spurious reduction of production typedef_declarator -> declare_typename(declarator) 
+## In state 549, spurious reduction of production typedef_declarator_list -> typedef_declarator 
 ##
 
 # Because attribute_specifier_list, declarator and declarator_noattrend have been marked
@@ -2807,28 +2807,28 @@ then at this point, a semicolon ';' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: TYPEDEF INT PRE_NAME VAR_NAME COMMA XOR_ASSIGN 
+translation_unit_file: TYPEDEF INT PRE_NAME VAR_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 535.
+## Ends in an error in state: 545.
 ##
 ## typedef_declarator_list -> typedef_declarator_list COMMA . typedef_declarator [ SEMICOLON COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## typedef_declarator_list COMMA 
+## typedef_declarator_list COMMA
 ##
 
 At this point, a declarator is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN INT LPAREN RPAREN LPAREN XOR_ASSIGN 
+translation_unit_file: ALIGNAS LPAREN INT LPAREN RPAREN LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 249.
+## Ends in an error in state: 257.
 ##
 ## direct_abstract_declarator -> direct_abstract_declarator LPAREN . option(context_parameter_type_list) RPAREN [ RPAREN LPAREN LBRACK COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## direct_abstract_declarator LPAREN 
+## direct_abstract_declarator LPAREN
 ##
 
 At this point, a list of parameter declarations,
@@ -2836,32 +2836,32 @@ followed with a closing parenthesis ')', is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM CONST XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM CONST XOR_ASSIGN
 ##
-## Ends in an error in state: 450.
+## Ends in an error in state: 459.
 ##
 ## asm_attributes -> CONST . asm_attributes [ LPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## CONST 
+## CONST
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM VOLATILE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM VOLATILE XOR_ASSIGN
 ##
-## Ends in an error in state: 449.
+## Ends in an error in state: 458.
 ##
 ## asm_attributes -> VOLATILE . asm_attributes [ LPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## VOLATILE 
+## VOLATILE
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM XOR_ASSIGN
 ##
-## Ends in an error in state: 448.
+## Ends in an error in state: 457.
 ##
-## asm_statement -> ASM . asm_attributes LPAREN string_literals_list asm_arguments RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## asm_statement -> ASM . asm_attributes LPAREN string_literals_list asm_arguments RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ASM 
+## ASM
 ##
 
 Ill-formed assembly statement.
@@ -2871,26 +2871,26 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON COLON COLON STRING_LITERAL COMMA XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON COLON COLON STRING_LITERAL COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 474.
+## Ends in an error in state: 483.
 ##
 ## asm_flags -> asm_flags COMMA . string_literals_list [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## asm_flags COMMA 
+## asm_flags COMMA
 ##
 # We are in the clobber list.
 # We have seen a comma, so we expect a string literal.
 # first(asm_flags) = STRING_LITERAL
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON COLON COLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON COLON COLON XOR_ASSIGN
 ##
-## Ends in an error in state: 471.
+## Ends in an error in state: 480.
 ##
 ## asm_arguments -> COLON asm_operands COLON asm_operands COLON . asm_flags [ RPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## COLON asm_operands COLON asm_operands COLON 
+## COLON asm_operands COLON asm_operands COLON
 ##
 # We are at the beginning of the clobber list.
 # first(asm_flags) = STRING_LITERAL
@@ -2903,21 +2903,21 @@ Examples of clobbered resources:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON COLON COLON STRING_LITERAL XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON COLON COLON STRING_LITERAL XOR_ASSIGN
 ##
-## Ends in an error in state: 473.
+## Ends in an error in state: 482.
 ##
 ## asm_arguments -> COLON asm_operands COLON asm_operands COLON asm_flags . [ RPAREN ]
 ## asm_flags -> asm_flags . COMMA string_literals_list [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## COLON asm_operands COLON asm_operands COLON asm_flags 
+## COLON asm_operands COLON asm_operands COLON asm_flags
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 472, spurious reduction of production asm_flags -> string_literals_list 
+## In state 481, spurious reduction of production asm_flags -> string_literals_list 
 ##
 
 # Let's ignore the possibility of concatenating string literals.
@@ -2932,22 +2932,22 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL LPAREN CONSTANT RPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL LPAREN CONSTANT RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 468.
+## Ends in an error in state: 477.
 ##
 ## asm_arguments -> COLON asm_operands . [ RPAREN ]
 ## asm_arguments -> COLON asm_operands . COLON asm_operands [ RPAREN ]
 ## asm_arguments -> COLON asm_operands . COLON asm_operands COLON asm_flags [ RPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## COLON asm_operands 
+## COLON asm_operands
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 460, spurious reduction of production asm_operands -> asm_operands_ne 
+## In state 469, spurious reduction of production asm_operands -> asm_operands_ne 
 ##
 
 # We have seen one COLON, hence the outputs. (The list of outputs may be empty.)
@@ -2962,21 +2962,21 @@ then at this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON COLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON COLON XOR_ASSIGN
 ##
-## Ends in an error in state: 470.
+## Ends in an error in state: 479.
 ##
 ## asm_arguments -> COLON asm_operands COLON asm_operands . [ RPAREN ]
 ## asm_arguments -> COLON asm_operands COLON asm_operands . COLON asm_flags [ RPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## COLON asm_operands COLON asm_operands 
+## COLON asm_operands COLON asm_operands
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 469, spurious reduction of production asm_operands -> 
+## In state 478, spurious reduction of production asm_operands -> 
 ##
 
 # We have seen two COLONs, hence the outputs and inputs. (The list of inputs may be empty.)
@@ -2994,14 +2994,14 @@ then at this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON LBRACK PRE_NAME VAR_NAME RBRACK XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON LBRACK PRE_NAME VAR_NAME RBRACK XOR_ASSIGN
 ##
-## Ends in an error in state: 463.
+## Ends in an error in state: 472.
 ##
 ## asm_operand -> asm_op_name . string_literals_list LPAREN expression RPAREN [ RPAREN COMMA COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## asm_op_name 
+## asm_op_name
 ##
 
 # Example of asm_operand: [oldval]"=r"(res)
@@ -3013,14 +3013,14 @@ At this point, a string literal, representing a constraint, is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON LBRACK PRE_NAME VAR_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON LBRACK PRE_NAME VAR_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 458.
+## Ends in an error in state: 467.
 ##
 ## asm_op_name -> LBRACK general_identifier . RBRACK [ STRING_LITERAL ]
 ##
 ## The known suffix of the stack is as follows:
-## LBRACK general_identifier 
+## LBRACK general_identifier
 ##
 
 Ill-formed assembly operand.
@@ -3028,14 +3028,14 @@ At this point, a closing bracket ']' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON LBRACK XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON LBRACK XOR_ASSIGN
 ##
-## Ends in an error in state: 457.
+## Ends in an error in state: 466.
 ##
 ## asm_op_name -> LBRACK . general_identifier RBRACK [ STRING_LITERAL ]
 ##
 ## The known suffix of the stack is as follows:
-## LBRACK 
+## LBRACK
 ##
 
 Ill-formed assembly operand.
@@ -3043,14 +3043,14 @@ At this point, an identifier is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL LPAREN CONSTANT RPAREN COMMA XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL LPAREN CONSTANT RPAREN COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 461.
+## Ends in an error in state: 470.
 ##
 ## asm_operands_ne -> asm_operands_ne COMMA . asm_operand [ RPAREN COMMA COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## asm_operands_ne COMMA 
+## asm_operands_ne COMMA
 ##
 
 # clang and gcc request a string literal (which is incomplete).
@@ -3060,35 +3060,35 @@ At this point, an assembly operand is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 466.
+## Ends in an error in state: 475.
 ##
 ## asm_operand -> asm_op_name string_literals_list LPAREN expression . RPAREN [ RPAREN COMMA COLON ]
 ## expression -> expression . COMMA assignment_expression [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## asm_op_name string_literals_list LPAREN expression 
+## asm_op_name string_literals_list LPAREN expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 Ill-formed assembly operand.
@@ -3099,14 +3099,14 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 465.
+## Ends in an error in state: 474.
 ##
 ## asm_operand -> asm_op_name string_literals_list LPAREN . expression RPAREN [ RPAREN COMMA COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## asm_op_name string_literals_list LPAREN 
+## asm_op_name string_literals_list LPAREN
 ##
 
 Ill-formed assembly operand.
@@ -3114,15 +3114,15 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL COLON STRING_LITERAL XOR_ASSIGN
 ##
-## Ends in an error in state: 464.
+## Ends in an error in state: 473.
 ##
 ## asm_operand -> asm_op_name string_literals_list . LPAREN expression RPAREN [ RPAREN COMMA COLON ]
 ## string_literals_list -> string_literals_list . STRING_LITERAL [ STRING_LITERAL LPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## asm_op_name string_literals_list 
+## asm_op_name string_literals_list
 ##
 
 # If we disregard the concatenation of string literals, then
@@ -3134,15 +3134,15 @@ followed with an expression and a closing parenthesis ')', is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL XOR_ASSIGN
 ##
-## Ends in an error in state: 455.
+## Ends in an error in state: 464.
 ##
-## asm_statement -> ASM asm_attributes LPAREN string_literals_list . asm_arguments RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## asm_statement -> ASM asm_attributes LPAREN string_literals_list . asm_arguments RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ## string_literals_list -> string_literals_list . STRING_LITERAL [ STRING_LITERAL RPAREN COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## ASM asm_attributes LPAREN string_literals_list 
+## ASM asm_attributes LPAREN string_literals_list
 ##
 # Expecting either one more string literal, or COLON, or RPAREN.
 
@@ -3156,14 +3156,14 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 454.
+## Ends in an error in state: 463.
 ##
-## asm_statement -> ASM asm_attributes LPAREN . string_literals_list asm_arguments RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## asm_statement -> ASM asm_attributes LPAREN . string_literals_list asm_arguments RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ASM asm_attributes LPAREN 
+## ASM asm_attributes LPAREN
 ##
 
 Ill-formed assembly statement.
@@ -3171,50 +3171,50 @@ At this point, a string literal, representing an instruction, is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE BREAK XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE BREAK XOR_ASSIGN
 ##
-## Ends in an error in state: 446.
+## Ends in an error in state: 455.
 ##
-## jump_statement -> BREAK . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## jump_statement -> BREAK . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## BREAK 
+## BREAK
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE CONTINUE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE CONTINUE XOR_ASSIGN
 ##
-## Ends in an error in state: 441.
+## Ends in an error in state: 450.
 ##
-## jump_statement -> CONTINUE . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## jump_statement -> CONTINUE . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## CONTINUE 
+## CONTINUE
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON WHILE LPAREN PRE_NAME VAR_NAME RPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON WHILE LPAREN PRE_NAME VAR_NAME RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 565.
+## Ends in an error in state: 575.
 ##
-## iteration_statement -> save_context do_statement1 WHILE LPAREN expression RPAREN . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context do_statement1 WHILE LPAREN expression RPAREN . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context do_statement1 WHILE LPAREN expression RPAREN 
+## save_context do_statement1 WHILE LPAREN expression RPAREN
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE GOTO PRE_NAME VAR_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE GOTO PRE_NAME VAR_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 437.
+## Ends in an error in state: 446.
 ##
-## jump_statement -> GOTO general_identifier . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## jump_statement -> GOTO general_identifier . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## GOTO general_identifier 
+## GOTO general_identifier
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL RPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE ASM LPAREN STRING_LITERAL RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 478.
+## Ends in an error in state: 487.
 ##
-## asm_statement -> ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## asm_statement -> ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN 
+## ASM asm_attributes LPAREN string_literals_list asm_arguments RPAREN
 ##
 
 Ill-formed statement.
@@ -3222,32 +3222,32 @@ At this point, a semicolon ';' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE CASE CONSTANT COLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE CASE CONSTANT COLON XOR_ASSIGN
 ##
-## Ends in an error in state: 445.
+## Ends in an error in state: 454.
 ##
-## labeled_statement -> CASE conditional_expression COLON . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## labeled_statement -> CASE conditional_expression COLON . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## CASE conditional_expression COLON 
+## CASE conditional_expression COLON
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DEFAULT COLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DEFAULT COLON XOR_ASSIGN
 ##
-## Ends in an error in state: 440.
+## Ends in an error in state: 449.
 ##
-## labeled_statement -> DEFAULT COLON . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## labeled_statement -> DEFAULT COLON . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## DEFAULT COLON 
+## DEFAULT COLON
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE PRE_NAME VAR_NAME COLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE PRE_NAME VAR_NAME COLON XOR_ASSIGN
 ##
-## Ends in an error in state: 494.
+## Ends in an error in state: 503.
 ##
-## labeled_statement -> general_identifier COLON . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## labeled_statement -> general_identifier COLON . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## general_identifier COLON 
+## general_identifier COLON
 ##
 
 # gcc and clang request an expression, which seems misleading (incomplete).
@@ -3257,32 +3257,32 @@ At this point, a statement is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE CASE CONSTANT SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE CASE CONSTANT SEMICOLON
 ##
-## Ends in an error in state: 444.
+## Ends in an error in state: 453.
 ##
-## labeled_statement -> CASE conditional_expression . COLON statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## labeled_statement -> CASE conditional_expression . COLON statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## CASE conditional_expression 
+## CASE conditional_expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 154, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 79, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
 ##
 
 Ill-formed labeled statement.
@@ -3293,14 +3293,14 @@ then at this point, a colon ':' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE CASE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE CASE XOR_ASSIGN
 ##
-## Ends in an error in state: 443.
+## Ends in an error in state: 452.
 ##
-## labeled_statement -> CASE . conditional_expression COLON statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## labeled_statement -> CASE . conditional_expression COLON statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## CASE 
+## CASE
 ##
 
 Ill-formed labeled statement.
@@ -3308,23 +3308,23 @@ At this point, a constant expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DEFAULT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DEFAULT XOR_ASSIGN
 ##
-## Ends in an error in state: 439.
+## Ends in an error in state: 448.
 ##
-## labeled_statement -> DEFAULT . COLON statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## labeled_statement -> DEFAULT . COLON statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## DEFAULT 
+## DEFAULT
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 493.
+## Ends in an error in state: 502.
 ##
-## labeled_statement -> general_identifier . COLON statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## labeled_statement -> general_identifier . COLON statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## general_identifier 
+## general_identifier
 ##
 
 # gcc and clang apparently do not allow a TYPEDEF_NAME to be reclassified as a label.
@@ -3334,35 +3334,35 @@ At this point, a colon ':' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON WHILE LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON WHILE LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 564.
+## Ends in an error in state: 574.
 ##
 ## expression -> expression . COMMA assignment_expression [ RPAREN COMMA ]
-## iteration_statement -> save_context do_statement1 WHILE LPAREN expression . RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context do_statement1 WHILE LPAREN expression . RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context do_statement1 WHILE LPAREN expression 
+## save_context do_statement1 WHILE LPAREN expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 Ill-formed 'do' ... 'while' statement.
@@ -3373,14 +3373,14 @@ then at this point, a closing parenthesis ')' and a semicolon ';' are expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON WHILE LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON WHILE LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 563.
+## Ends in an error in state: 573.
 ##
-## iteration_statement -> save_context do_statement1 WHILE LPAREN . expression RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context do_statement1 WHILE LPAREN . expression RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context do_statement1 WHILE LPAREN 
+## save_context do_statement1 WHILE LPAREN
 ##
 
 Ill-formed 'do' ... 'while' statement.
@@ -3388,14 +3388,14 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON WHILE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON WHILE XOR_ASSIGN
 ##
-## Ends in an error in state: 562.
+## Ends in an error in state: 572.
 ##
-## iteration_statement -> save_context do_statement1 WHILE . LPAREN expression RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context do_statement1 WHILE . LPAREN expression RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context do_statement1 WHILE 
+## save_context do_statement1 WHILE
 ##
 
 Ill-formed 'do' ... 'while' statement.
@@ -3403,14 +3403,14 @@ At this point, an opening parenthesis '(' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO SEMICOLON XOR_ASSIGN
 ##
-## Ends in an error in state: 561.
+## Ends in an error in state: 571.
 ##
-## iteration_statement -> save_context do_statement1 . WHILE LPAREN expression RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context do_statement1 . WHILE LPAREN expression RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context do_statement1 
+## save_context do_statement1
 ##
 
 # Quite nicely, in this case, there is no doubt that the statement is
@@ -3424,14 +3424,14 @@ At this point, a 'while' keyword is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO XOR_ASSIGN
 ##
-## Ends in an error in state: 557.
+## Ends in an error in state: 567.
 ##
 ## do_statement1 -> save_context DO . statement [ WHILE ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context DO 
+## save_context DO
 ##
 
 # gcc and clang expect an expression.
@@ -3441,14 +3441,14 @@ At this point, a statement (the loop body) is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN SEMICOLON SEMICOLON RPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN SEMICOLON SEMICOLON RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 527.
+## Ends in an error in state: 537.
 ##
-## iteration_statement -> save_context FOR LPAREN for_statement_header optional(expression,SEMICOLON) optional(expression,RPAREN) . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context FOR LPAREN for_statement_header optional(expression,SEMICOLON) optional(expression,RPAREN) . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context FOR LPAREN for_statement_header optional(expression,SEMICOLON) optional(expression,RPAREN) 
+## save_context FOR LPAREN for_statement_header optional(expression,SEMICOLON) optional(expression,RPAREN)
 ##
 
 Ill-formed 'for' statement.
@@ -3456,35 +3456,35 @@ At this point, a statement (the loop body) is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN SEMICOLON SEMICOLON PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN SEMICOLON SEMICOLON PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 529.
+## Ends in an error in state: 539.
 ##
 ## expression -> expression . COMMA assignment_expression [ RPAREN COMMA ]
 ## optional(expression,RPAREN) -> expression . RPAREN [ WHILE TILDE SWITCH STRING_LITERAL STAR SIZEOF SEMICOLON RETURN PRE_NAME PLUS MINUS LPAREN LBRACE INC IF GOTO FOR DO DEFAULT DEC CONTINUE CONSTANT CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG ASM AND ALIGNOF ]
 ##
 ## The known suffix of the stack is as follows:
-## expression 
+## expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 # The use of optional(expression,RPAREN) tells us that we are in a FOR statement.
@@ -3498,14 +3498,14 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN SEMICOLON SEMICOLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN SEMICOLON SEMICOLON XOR_ASSIGN
 ##
-## Ends in an error in state: 525.
+## Ends in an error in state: 535.
 ##
-## iteration_statement -> save_context FOR LPAREN for_statement_header optional(expression,SEMICOLON) . optional(expression,RPAREN) statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context FOR LPAREN for_statement_header optional(expression,SEMICOLON) . optional(expression,RPAREN) statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context FOR LPAREN for_statement_header optional(expression,SEMICOLON) 
+## save_context FOR LPAREN for_statement_header optional(expression,SEMICOLON)
 ##
 
 # Expecting the third part of the loop header -- the expression
@@ -3518,14 +3518,14 @@ followed with a closing parenthesis ')', is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN SEMICOLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN SEMICOLON XOR_ASSIGN
 ##
-## Ends in an error in state: 524.
+## Ends in an error in state: 534.
 ##
-## iteration_statement -> save_context FOR LPAREN for_statement_header . optional(expression,SEMICOLON) optional(expression,RPAREN) statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context FOR LPAREN for_statement_header . optional(expression,SEMICOLON) optional(expression,RPAREN) statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context FOR LPAREN for_statement_header 
+## save_context FOR LPAREN for_statement_header
 ##
 
 # Expecting the second part of the loop header -- the controlling expression.
@@ -3537,35 +3537,35 @@ followed with a semicolon ';', is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN PRE_NAME VAR_NAME RPAREN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN PRE_NAME VAR_NAME RPAREN
 ##
-## Ends in an error in state: 531.
+## Ends in an error in state: 541.
 ##
 ## expression -> expression . COMMA assignment_expression [ SEMICOLON COMMA ]
 ## optional(expression,SEMICOLON) -> expression . SEMICOLON [ TILDE STRING_LITERAL STAR SIZEOF SEMICOLON RPAREN PRE_NAME PLUS MINUS LPAREN INC DEC CONSTANT BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AND ALIGNOF ]
 ##
 ## The known suffix of the stack is as follows:
-## expression 
+## expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 # At the time of writing, optional(expression,SEMICOLON) is used only in FOR
@@ -3579,14 +3579,14 @@ then at this point, a semicolon ';' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 512.
+## Ends in an error in state: 521.
 ##
-## iteration_statement -> save_context FOR LPAREN . for_statement_header optional(expression,SEMICOLON) optional(expression,RPAREN) statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context FOR LPAREN . for_statement_header optional(expression,SEMICOLON) optional(expression,RPAREN) statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context FOR LPAREN 
+## save_context FOR LPAREN
 ##
 
 # gcc and clang say they expect an expression, which is incomplete.
@@ -3600,14 +3600,14 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE FOR XOR_ASSIGN
 ##
-## Ends in an error in state: 511.
+## Ends in an error in state: 520.
 ##
-## iteration_statement -> save_context FOR . LPAREN for_statement_header optional(expression,SEMICOLON) optional(expression,RPAREN) statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context FOR . LPAREN for_statement_header optional(expression,SEMICOLON) optional(expression,RPAREN) statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context FOR 
+## save_context FOR
 ##
 
 Ill-formed 'for' statement.
@@ -3615,14 +3615,14 @@ At this point, an opening parenthesis '(' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE GOTO XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE GOTO XOR_ASSIGN
 ##
-## Ends in an error in state: 436.
+## Ends in an error in state: 445.
 ##
-## jump_statement -> GOTO . general_identifier SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## jump_statement -> GOTO . general_identifier SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## GOTO 
+## GOTO
 ##
 
 Ill-formed 'goto' statement.
@@ -3630,14 +3630,14 @@ At this point, an identifier (a 'goto' label) is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO IF LPAREN CONSTANT RPAREN SEMICOLON ELSE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE DO IF LPAREN CONSTANT RPAREN SEMICOLON ELSE XOR_ASSIGN
 ##
-## Ends in an error in state: 559.
+## Ends in an error in state: 569.
 ##
-## selection_statement -> save_context ifelse_statement1 . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## selection_statement -> save_context ifelse_statement1 . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context ifelse_statement1 
+## save_context ifelse_statement1
 ##
 
 Ill-formed 'if' ... 'else' statement.
@@ -3645,15 +3645,15 @@ At this point, a statement is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE IF LPAREN PRE_NAME VAR_NAME RPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE IF LPAREN PRE_NAME VAR_NAME RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 508.
+## Ends in an error in state: 517.
 ##
 ## ifelse_statement1 -> IF LPAREN expression RPAREN save_context . statement ELSE [ WHILE TILDE SWITCH STRING_LITERAL STAR SIZEOF SEMICOLON RETURN PRE_NAME PLUS MINUS LPAREN LBRACE INC IF GOTO FOR DO DEFAULT DEC CONTINUE CONSTANT CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG ASM AND ALIGNOF ]
-## selection_statement -> save_context IF LPAREN expression RPAREN save_context . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## selection_statement -> save_context IF LPAREN expression RPAREN save_context . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context IF LPAREN expression RPAREN save_context 
+## save_context IF LPAREN expression RPAREN save_context
 ##
 
 Ill-formed 'if' statement.
@@ -3661,36 +3661,36 @@ At this point, a statement is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE IF LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE IF LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 506.
+## Ends in an error in state: 515.
 ##
 ## expression -> expression . COMMA assignment_expression [ RPAREN COMMA ]
 ## ifelse_statement1 -> IF LPAREN expression . RPAREN save_context statement ELSE [ WHILE TILDE SWITCH STRING_LITERAL STAR SIZEOF SEMICOLON RETURN PRE_NAME PLUS MINUS LPAREN LBRACE INC IF GOTO FOR DO DEFAULT DEC CONTINUE CONSTANT CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG ASM AND ALIGNOF ]
-## selection_statement -> save_context IF LPAREN expression . RPAREN save_context statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## selection_statement -> save_context IF LPAREN expression . RPAREN save_context statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context IF LPAREN expression 
+## save_context IF LPAREN expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 Ill-formed 'if' statement.
@@ -3701,15 +3701,15 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE IF LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE IF LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 505.
+## Ends in an error in state: 514.
 ##
 ## ifelse_statement1 -> IF LPAREN . expression RPAREN save_context statement ELSE [ WHILE TILDE SWITCH STRING_LITERAL STAR SIZEOF SEMICOLON RETURN PRE_NAME PLUS MINUS LPAREN LBRACE INC IF GOTO FOR DO DEFAULT DEC CONTINUE CONSTANT CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG ASM AND ALIGNOF ]
-## selection_statement -> save_context IF LPAREN . expression RPAREN save_context statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## selection_statement -> save_context IF LPAREN . expression RPAREN save_context statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context IF LPAREN 
+## save_context IF LPAREN
 ##
 
 Ill-formed 'if' statement.
@@ -3717,15 +3717,15 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE IF XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE IF XOR_ASSIGN
 ##
-## Ends in an error in state: 504.
+## Ends in an error in state: 513.
 ##
 ## ifelse_statement1 -> IF . LPAREN expression RPAREN save_context statement ELSE [ WHILE TILDE SWITCH STRING_LITERAL STAR SIZEOF SEMICOLON RETURN PRE_NAME PLUS MINUS LPAREN LBRACE INC IF GOTO FOR DO DEFAULT DEC CONTINUE CONSTANT CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG ASM AND ALIGNOF ]
-## selection_statement -> save_context IF . LPAREN expression RPAREN save_context statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## selection_statement -> save_context IF . LPAREN expression RPAREN save_context statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context IF 
+## save_context IF
 ##
 
 Ill-formed 'if' statement.
@@ -3733,14 +3733,14 @@ At this point, an opening parenthesis '(' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE SWITCH LPAREN PRE_NAME VAR_NAME RPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE SWITCH LPAREN PRE_NAME VAR_NAME RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 502.
+## Ends in an error in state: 511.
 ##
-## selection_statement -> save_context SWITCH LPAREN expression RPAREN . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## selection_statement -> save_context SWITCH LPAREN expression RPAREN . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context SWITCH LPAREN expression RPAREN 
+## save_context SWITCH LPAREN expression RPAREN
 ##
 
 
@@ -3757,35 +3757,35 @@ enclosed within braces '{' and '}'.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE SWITCH LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE SWITCH LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 501.
+## Ends in an error in state: 510.
 ##
 ## expression -> expression . COMMA assignment_expression [ RPAREN COMMA ]
-## selection_statement -> save_context SWITCH LPAREN expression . RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## selection_statement -> save_context SWITCH LPAREN expression . RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context SWITCH LPAREN expression 
+## save_context SWITCH LPAREN expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 Ill-formed 'switch' statement.
@@ -3796,14 +3796,14 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE SWITCH LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE SWITCH LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 500.
+## Ends in an error in state: 509.
 ##
-## selection_statement -> save_context SWITCH LPAREN . expression RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## selection_statement -> save_context SWITCH LPAREN . expression RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context SWITCH LPAREN 
+## save_context SWITCH LPAREN
 ##
 
 Ill-formed 'switch' statement.
@@ -3811,14 +3811,14 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE SWITCH XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE SWITCH XOR_ASSIGN
 ##
-## Ends in an error in state: 499.
+## Ends in an error in state: 508.
 ##
-## selection_statement -> save_context SWITCH . LPAREN expression RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## selection_statement -> save_context SWITCH . LPAREN expression RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context SWITCH 
+## save_context SWITCH
 ##
 
 Ill-formed 'switch' statement.
@@ -3826,14 +3826,14 @@ At this point, an opening parenthesis '(' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE WHILE LPAREN PRE_NAME VAR_NAME RPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE WHILE LPAREN PRE_NAME VAR_NAME RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 486.
+## Ends in an error in state: 495.
 ##
-## iteration_statement -> save_context WHILE LPAREN expression RPAREN . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context WHILE LPAREN expression RPAREN . statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context WHILE LPAREN expression RPAREN 
+## save_context WHILE LPAREN expression RPAREN
 ##
 
 Ill-formed 'while' statement.
@@ -3841,35 +3841,35 @@ At this point, a statement (the loop body) is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE WHILE LPAREN PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE WHILE LPAREN PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 485.
+## Ends in an error in state: 494.
 ##
 ## expression -> expression . COMMA assignment_expression [ RPAREN COMMA ]
-## iteration_statement -> save_context WHILE LPAREN expression . RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context WHILE LPAREN expression . RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context WHILE LPAREN expression 
+## save_context WHILE LPAREN expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 Ill-formed 'while' statement.
@@ -3880,14 +3880,14 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE WHILE LPAREN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE WHILE LPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 484.
+## Ends in an error in state: 493.
 ##
-## iteration_statement -> save_context WHILE LPAREN . expression RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context WHILE LPAREN . expression RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context WHILE LPAREN 
+## save_context WHILE LPAREN
 ##
 
 Ill-formed 'while' statement.
@@ -3895,14 +3895,14 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE WHILE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE WHILE XOR_ASSIGN
 ##
-## Ends in an error in state: 483.
+## Ends in an error in state: 492.
 ##
-## iteration_statement -> save_context WHILE . LPAREN expression RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## iteration_statement -> save_context WHILE . LPAREN expression RPAREN statement [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context WHILE 
+## save_context WHILE
 ##
 
 Ill-formed 'while' statement.
@@ -3910,15 +3910,15 @@ At this point, an opening parenthesis '(' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE XOR_ASSIGN
 ##
-## Ends in an error in state: 427.
+## Ends in an error in state: 436.
 ##
-## block_item_list -> option(block_item_list) . block_item [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
-## compound_statement -> save_context LBRACE option(block_item_list) . RBRACE [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN EOF ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## block_item_list -> option(block_item_list) . block_item [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## compound_statement -> save_context LBRACE option(block_item_list) . RBRACE [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN EOF ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## save_context LBRACE option(block_item_list) 
+## save_context LBRACE option(block_item_list)
 ##
 # We are possibly at the end of a block.
 #
@@ -3938,14 +3938,14 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE RETURN XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE RETURN XOR_ASSIGN
 ##
-## Ends in an error in state: 428.
+## Ends in an error in state: 437.
 ##
-## jump_statement -> RETURN . option(expression) SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## jump_statement -> RETURN . option(expression) SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN ENUM ELSE DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## RETURN 
+## RETURN
 ##
 
 # clang and gcc expect an expression.
@@ -3957,37 +3957,37 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE STRING_LITERAL RPAREN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE STRING_LITERAL RPAREN
 ##
-## Ends in an error in state: 431.
+## Ends in an error in state: 440.
 ##
 ## expression -> expression . COMMA assignment_expression [ SEMICOLON COMMA ]
 ## option(expression) -> expression . [ SEMICOLON ]
 ##
 ## The known suffix of the stack is as follows:
-## expression 
+## expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 155, spurious reduction of production primary_expression -> string_literals_list 
-## In state 157, spurious reduction of production postfix_expression -> primary_expression 
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 224, spurious reduction of production expression -> assignment_expression 
+## In state 80, spurious reduction of production primary_expression -> string_literals_list 
+## In state 82, spurious reduction of production postfix_expression -> primary_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 149, spurious reduction of production expression -> assignment_expression 
 ##
 
 Up to this point, an expression has been recognized:
@@ -3997,16 +3997,16 @@ then at this point, a semicolon ';' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 568.
+## Ends in an error in state: 578.
 ##
 ## declaration_specifiers(declaration(block_item)) -> typedef_name . list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers_typedef -> typedef_name . list(declaration_specifier_no_type) TYPEDEF list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## general_identifier -> typedef_name . [ COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## typedef_name 
+## typedef_name
 ##
 
 # We see a type name "foo" at the beginning of a block_item, it seems.
@@ -4032,14 +4032,14 @@ at this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE PRE_NAME VAR_NAME COMMA XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME RPAREN LBRACE PRE_NAME VAR_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 219.
+## Ends in an error in state: 144.
 ##
 ## expression -> expression COMMA . assignment_expression [ SEMICOLON RPAREN RBRACK COMMA COLON ]
 ##
 ## The known suffix of the stack is as follows:
-## expression COMMA 
+## expression COMMA
 ##
 
 Ill-formed use of the sequencing operator ','.
@@ -4047,26 +4047,26 @@ At this point, an expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME COMMA PRE_NAME VAR_NAME RPAREN 
+translation_unit_file: INT PRE_NAME VAR_NAME COMMA PRE_NAME VAR_NAME RPAREN
 ##
-## Ends in an error in state: 545.
+## Ends in an error in state: 555.
 ##
 ## init_declarator_list -> init_declarator_list . COMMA init_declarator [ SEMICOLON COMMA ]
 ## option(init_declarator_list) -> init_declarator_list . [ SEMICOLON ]
 ##
 ## The known suffix of the stack is as follows:
-## init_declarator_list 
+## init_declarator_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 255, spurious reduction of production declarator_noattrend -> direct_declarator 
-## In state 553, spurious reduction of production declare_varname(declarator_noattrend) -> declarator_noattrend 
-## In state 548, spurious reduction of production save_context -> 
-## In state 549, spurious reduction of production attribute_specifier_list -> 
-## In state 550, spurious reduction of production init_declarator -> declare_varname(declarator_noattrend) save_context attribute_specifier_list 
-## In state 547, spurious reduction of production init_declarator_list -> init_declarator_list COMMA init_declarator 
+## In state 263, spurious reduction of production declarator_noattrend -> direct_declarator 
+## In state 563, spurious reduction of production declare_varname(declarator_noattrend) -> declarator_noattrend 
+## In state 558, spurious reduction of production save_context -> 
+## In state 559, spurious reduction of production attribute_specifier_list -> 
+## In state 560, spurious reduction of production init_declarator -> declare_varname(declarator_noattrend) save_context attribute_specifier_list 
+## In state 557, spurious reduction of production init_declarator_list -> init_declarator_list COMMA init_declarator 
 ##
 
 Up to this point, a list of declarators has been recognized:
@@ -4076,14 +4076,14 @@ then at this point, a semicolon ';' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME COMMA XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 546.
+## Ends in an error in state: 556.
 ##
 ## init_declarator_list -> init_declarator_list COMMA . init_declarator [ SEMICOLON COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## init_declarator_list COMMA 
+## init_declarator_list COMMA
 ##
 
 Ill-formed declaration.
@@ -4091,23 +4091,23 @@ At this point, an init declarator is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE DOT PRE_NAME VAR_NAME EQ ALIGNAS 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE DOT PRE_NAME VAR_NAME EQ ALIGNAS
 ##
-## Ends in an error in state: 366.
+## Ends in an error in state: 374.
 ##
 ## initializer_list -> option(designation) . c_initializer [ RBRACE COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## option(designation) 
+## option(designation)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE PRE_NAME VAR_NAME COMMA DOT PRE_NAME VAR_NAME EQ ALIGNAS 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE PRE_NAME VAR_NAME COMMA DOT PRE_NAME VAR_NAME EQ ALIGNAS
 ##
-## Ends in an error in state: 370.
+## Ends in an error in state: 378.
 ##
 ## initializer_list -> initializer_list COMMA option(designation) . c_initializer [ RBRACE COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## initializer_list COMMA option(designation) 
+## initializer_list COMMA option(designation)
 ##
 
 Ill-formed initializer list.
@@ -4115,15 +4115,15 @@ At this point, an initializer is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE DOT PRE_NAME VAR_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE DOT PRE_NAME VAR_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 373.
+## Ends in an error in state: 381.
 ##
 ## designation -> designator_list . EQ [ TILDE STRING_LITERAL STAR SIZEOF PRE_NAME PLUS MINUS LPAREN LBRACE INC DEC CONSTANT BUILTIN_VA_ARG BUILTIN_OFFSETOF BANG AND ALIGNOF ]
 ## option(designator_list) -> designator_list . [ LBRACK DOT ]
 ##
 ## The known suffix of the stack is as follows:
-## designator_list 
+## designator_list
 ##
 
 # We are expecting either one more designator,
@@ -4137,14 +4137,14 @@ then at this point, an equals sign '=' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE DOT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE DOT XOR_ASSIGN
 ##
-## Ends in an error in state: 326.
+## Ends in an error in state: 334.
 ##
 ## designator -> DOT . general_identifier [ RPAREN LBRACK EQ DOT ]
 ##
 ## The known suffix of the stack is as follows:
-## DOT 
+## DOT
 ##
 
 # clang gives examples of designators.
@@ -4154,32 +4154,32 @@ At this point, the name of a struct or union member is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE LBRACK PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE LBRACK PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 324.
+## Ends in an error in state: 332.
 ##
 ## designator -> LBRACK conditional_expression . RBRACK [ RPAREN LBRACK EQ DOT ]
 ##
 ## The known suffix of the stack is as follows:
-## LBRACK conditional_expression 
+## LBRACK conditional_expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 154, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 79, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
 ##
 
 Ill-formed designator.
@@ -4190,14 +4190,14 @@ then at this point, a closing bracket ']' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE LBRACK XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE LBRACK XOR_ASSIGN
 ##
-## Ends in an error in state: 323.
+## Ends in an error in state: 331.
 ##
 ## designator -> LBRACK . conditional_expression RBRACK [ RPAREN LBRACK EQ DOT ]
 ##
 ## The known suffix of the stack is as follows:
-## LBRACK 
+## LBRACK
 ##
 
 Ill-formed designator.
@@ -4205,15 +4205,15 @@ At this point, a constant expression is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE PRE_NAME VAR_NAME COMMA XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE PRE_NAME VAR_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 369.
+## Ends in an error in state: 377.
 ##
 ## initializer_list -> initializer_list COMMA . option(designation) c_initializer [ RBRACE COMMA ]
 ## option(COMMA) -> COMMA . [ RBRACE ]
 ##
 ## The known suffix of the stack is as follows:
-## initializer_list COMMA 
+## initializer_list COMMA
 ##
 
 # This could be a trailing comma, in which case a closing brace is legal.
@@ -4226,36 +4226,36 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE CONSTANT SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE CONSTANT SEMICOLON
 ##
-## Ends in an error in state: 368.
+## Ends in an error in state: 376.
 ##
 ## c_initializer -> LBRACE initializer_list . option(COMMA) RBRACE [ SEMICOLON RBRACE COMMA ]
 ## initializer_list -> initializer_list . COMMA option(designation) c_initializer [ RBRACE COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## LBRACE initializer_list 
+## LBRACE initializer_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
-## In state 372, spurious reduction of production c_initializer -> assignment_expression 
-## In state 378, spurious reduction of production initializer_list -> option(designation) c_initializer 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 380, spurious reduction of production c_initializer -> assignment_expression 
+## In state 386, spurious reduction of production initializer_list -> option(designation) c_initializer 
 ##
 
 # Omitting the fact that the closing brace can be preceded with a comma.
@@ -4268,14 +4268,14 @@ then at this point, a closing brace '}' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ LBRACE XOR_ASSIGN
 ##
-## Ends in an error in state: 367.
+## Ends in an error in state: 375.
 ##
 ## c_initializer -> LBRACE . initializer_list option(COMMA) RBRACE [ SEMICOLON RBRACE COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## LBRACE 
+## LBRACE
 ##
 
 # An initializer list is expected.
@@ -4289,14 +4289,14 @@ followed with an initializer, is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME EQ XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME EQ XOR_ASSIGN
 ##
-## Ends in an error in state: 551.
+## Ends in an error in state: 561.
 ##
 ## init_declarator -> declare_varname(declarator_noattrend) save_context attribute_specifier_list EQ . c_initializer [ SEMICOLON COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## declare_varname(declarator_noattrend) save_context attribute_specifier_list EQ 
+## declare_varname(declarator_noattrend) save_context attribute_specifier_list EQ
 ##
 
 # clang and gcc expect an expression (incomplete).
@@ -4306,33 +4306,33 @@ At this point, an initializer is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LBRACK CONSTANT SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME LBRACK CONSTANT SEMICOLON
 ##
-## Ends in an error in state: 243.
+## Ends in an error in state: 251.
 ##
 ## optional(assignment_expression,RBRACK) -> assignment_expression . RBRACK [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK LBRACE INT INLINE FLOAT EXTERN EQ ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## assignment_expression 
+## assignment_expression
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 158, spurious reduction of production unary_expression -> postfix_expression 
-## In state 162, spurious reduction of production cast_expression -> unary_expression 
-## In state 185, spurious reduction of production multiplicative_expression -> cast_expression 
-## In state 179, spurious reduction of production additive_expression -> multiplicative_expression 
-## In state 198, spurious reduction of production shift_expression -> additive_expression 
-## In state 175, spurious reduction of production relational_expression -> shift_expression 
-## In state 191, spurious reduction of production equality_expression -> relational_expression 
-## In state 207, spurious reduction of production and_expression -> equality_expression 
-## In state 215, spurious reduction of production exclusive_or_expression -> and_expression 
-## In state 216, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
-## In state 217, spurious reduction of production logical_and_expression -> inclusive_or_expression 
-## In state 201, spurious reduction of production logical_or_expression -> logical_and_expression 
-## In state 199, spurious reduction of production conditional_expression -> logical_or_expression 
-## In state 220, spurious reduction of production assignment_expression -> conditional_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 87, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+## In state 145, spurious reduction of production assignment_expression -> conditional_expression 
 ##
 
 # At the time of writing, optional(expression,RBRACK) is used only in direct
@@ -4348,14 +4348,14 @@ then at this point, a closing bracket ']' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME COMMA XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 282.
+## Ends in an error in state: 290.
 ##
 ## identifier_list -> identifier_list COMMA . PRE_NAME VAR_NAME [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## identifier_list COMMA 
+## identifier_list COMMA
 ##
 
 # Strangely, gcc requests ')'.
@@ -4365,14 +4365,14 @@ At this point, an identifier is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME COMMA PRE_NAME TYPEDEF_NAME 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME COMMA PRE_NAME TYPEDEF_NAME
 ##
-## Ends in an error in state: 283.
+## Ends in an error in state: 291.
 ##
 ## identifier_list -> identifier_list COMMA PRE_NAME . VAR_NAME [ RPAREN COMMA ]
 ##
 ## The known suffix of the stack is as follows:
-## identifier_list COMMA PRE_NAME 
+## identifier_list COMMA PRE_NAME
 ##
 
 Ill-formed K&R function definition.
@@ -4381,29 +4381,29 @@ The following type name is used as a K&R parameter name:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN INT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN INT XOR_ASSIGN
 ##
-## Ends in an error in state: 586.
+## Ends in an error in state: 596.
 ##
 ## declaration_specifiers(declaration(block_item)) -> type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN VOLATILE INT XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN VOLATILE INT XOR_ASSIGN
 ##
-## Ends in an error in state: 591.
+## Ends in an error in state: 601.
 ##
 ## declaration_specifiers(declaration(block_item)) -> rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_typedef_name) -> list(declaration_specifier_no_typedef_name) . declaration_specifier_no_typedef_name [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC STAR SIGNED SHORT SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) 
+## rlist(declaration_specifier_no_type) type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 584.
+## Ends in an error in state: 594.
 ##
 ## declaration_specifiers(declaration(block_item)) -> typedef_name list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . storage_class_specifier_no_typedef [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
@@ -4412,11 +4412,11 @@ translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN PRE
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## typedef_name list(declaration_specifier_no_type) 
+## typedef_name list(declaration_specifier_no_type)
 ##
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN VOLATILE PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN VOLATILE PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 589.
+## Ends in an error in state: 599.
 ##
 ## declaration_specifiers(declaration(block_item)) -> rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) . [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . storage_class_specifier_no_typedef [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
@@ -4425,7 +4425,7 @@ translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN VOL
 ## list(declaration_specifier_no_type) -> list(declaration_specifier_no_type) . attribute_specifier [ VOLATILE STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type) typedef_name list(declaration_specifier_no_type)
 ##
 
 # We omit the case of the empty list of declarators
@@ -4439,21 +4439,21 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN VOLATILE XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN VOLATILE XOR_ASSIGN
 ##
-## Ends in an error in state: 587.
+## Ends in an error in state: 597.
 ##
 ## declaration_specifiers(declaration(block_item)) -> rlist(declaration_specifier_no_type) . typedef_name list(declaration_specifier_no_type) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ## declaration_specifiers(declaration(block_item)) -> rlist(declaration_specifier_no_type) . type_specifier_no_typedef_name list(declaration_specifier_no_typedef_name) [ STAR SEMICOLON PRE_NAME LPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## rlist(declaration_specifier_no_type) 
+## rlist(declaration_specifier_no_type)
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 125, spurious reduction of production rlist(declaration_specifier_no_type) -> type_qualifier_noattr 
+## In state 222, spurious reduction of production rlist(declaration_specifier_no_type) -> type_qualifier_noattr 
 ##
 
 Ill-formed K&R parameter declaration.
@@ -4464,16 +4464,16 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: VOID PRE_NAME TYPEDEF_NAME PACKED LPAREN CONSTANT RPAREN XOR_ASSIGN 
+translation_unit_file: VOID PRE_NAME TYPEDEF_NAME PACKED LPAREN CONSTANT RPAREN XOR_ASSIGN
 ##
-## Ends in an error in state: 600.
+## Ends in an error in state: 610.
 ##
 ## attribute_specifier_list -> attribute_specifier . attribute_specifier_list [ SEMICOLON LBRACE EQ COMMA ]
 ## rlist(declaration_specifier_no_type) -> attribute_specifier . [ VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT SIGNED SHORT PRE_NAME LONG INT FLOAT ENUM DOUBLE CHAR ]
 ## rlist(declaration_specifier_no_type) -> attribute_specifier . rlist(declaration_specifier_no_type) [ VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT SIGNED SHORT PRE_NAME LONG INT FLOAT ENUM DOUBLE CHAR ]
 ##
 ## The known suffix of the stack is as follows:
-## attribute_specifier 
+## attribute_specifier
 ##
 
 # We have just parsed a list of attribute specifiers, but we cannot
@@ -4497,15 +4497,15 @@ If this is the parameter declaration of a K&R function definition,
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT COMMA XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 141.
+## Ends in an error in state: 238.
 ##
 ## parameter_list -> parameter_list COMMA . parameter_declaration [ RPAREN COMMA ]
 ## parameter_type_list -> parameter_list COMMA . ELLIPSIS [ RPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## parameter_list COMMA 
+## parameter_list COMMA
 ##
 
 At this point, one of the following is expected:
@@ -4514,27 +4514,27 @@ At this point, one of the following is expected:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME SEMICOLON 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN INT PRE_NAME VAR_NAME SEMICOLON
 ##
-## Ends in an error in state: 140.
+## Ends in an error in state: 237.
 ##
 ## parameter_list -> parameter_list . COMMA parameter_declaration [ RPAREN COMMA ]
 ## parameter_type_list -> parameter_list . [ RPAREN ]
 ## parameter_type_list -> parameter_list . COMMA ELLIPSIS [ RPAREN ]
 ##
 ## The known suffix of the stack is as follows:
-## parameter_list 
+## parameter_list
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
-## In state 255, spurious reduction of production declarator_noattrend -> direct_declarator 
-## In state 260, spurious reduction of production attribute_specifier_list -> 
-## In state 261, spurious reduction of production declarator -> declarator_noattrend attribute_specifier_list 
-## In state 277, spurious reduction of production declare_varname(declarator) -> declarator 
-## In state 276, spurious reduction of production parameter_declaration -> declaration_specifiers(parameter_declaration) declare_varname(declarator) 
-## In state 148, spurious reduction of production parameter_list -> parameter_declaration 
+## In state 263, spurious reduction of production declarator_noattrend -> direct_declarator 
+## In state 268, spurious reduction of production attribute_specifier_list -> 
+## In state 269, spurious reduction of production declarator -> declarator_noattrend attribute_specifier_list 
+## In state 285, spurious reduction of production declare_varname(declarator) -> declarator 
+## In state 284, spurious reduction of production parameter_declaration -> declaration_specifiers(parameter_declaration) declare_varname(declarator) 
+## In state 245, spurious reduction of production parameter_list -> parameter_declaration 
 ##
 
 # We omit the possibility of an ellipsis.
@@ -4550,14 +4550,14 @@ then at this point, a closing parenthesis ')' is expected.
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: PRE_NAME VAR_NAME 
+translation_unit_file: PRE_NAME VAR_NAME
 ##
 ## Ends in an error in state: 16.
 ##
 ## typedef_name -> PRE_NAME . TYPEDEF_NAME [ VOLATILE TYPEDEF STATIC STAR SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LBRACK INLINE EXTERN CONST COMMA COLON AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## PRE_NAME 
+## PRE_NAME
 ##
 
 # This can only happen in a declaration
@@ -4568,15 +4568,15 @@ The following identifier is used as a type, but has not been defined as such:
 
 # ------------------------------------------------------------------------------
 
-translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN INT SEMICOLON XOR_ASSIGN 
+translation_unit_file: INT PRE_NAME VAR_NAME LPAREN PRE_NAME VAR_NAME RPAREN INT SEMICOLON XOR_ASSIGN
 ##
-## Ends in an error in state: 596.
+## Ends in an error in state: 606.
 ##
 ## declaration_list -> declaration_list . kr_param_declaration [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT RESTRICT REGISTER PRE_NAME PACKED NORETURN LONG LBRACE INT INLINE FLOAT EXTERN ENUM DOUBLE CONST CHAR AUTO ATTRIBUTE ALIGNAS ]
 ## function_definition1 -> declaration_specifiers(declaration(external_declaration)) declare_varname(declarator_noattrend) save_context declaration_list . [ LBRACE ]
 ##
 ## The known suffix of the stack is as follows:
-## declaration_specifiers(declaration(external_declaration)) declare_varname(declarator_noattrend) save_context declaration_list 
+## declaration_specifiers(declaration(external_declaration)) declare_varname(declarator_noattrend) save_context declaration_list
 ##
 # clang requests the function body; gcc requests a declaration :-)
 
@@ -4587,7 +4587,7 @@ At this point, one of the following is expected:
 
 #------------------------------------------------------------------------------
 
-translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF XOR_ASSIGN 
+translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF XOR_ASSIGN
 ##
 ## Ends in an error in state: 52.
 ##
@@ -4595,7 +4595,7 @@ translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF XOR_ASSIGN
 ## postfix_expression -> BUILTIN_OFFSETOF . LPAREN type_name COMMA general_identifier designator_list RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_OFFSETOF 
+## BUILTIN_OFFSETOF
 ##
 
 Ill-formed __builtin_offsetof.
@@ -4603,7 +4603,7 @@ At this point, an opening parenthesis '(' is expected.
 
 #------------------------------------------------------------------------------
 
-translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN XOR_ASSIGN 
+translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN XOR_ASSIGN
 ##
 ## Ends in an error in state: 53.
 ##
@@ -4611,7 +4611,7 @@ translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN XOR_ASSIGN
 ## postfix_expression -> BUILTIN_OFFSETOF LPAREN . type_name COMMA general_identifier designator_list RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_OFFSETOF LPAREN 
+## BUILTIN_OFFSETOF LPAREN
 ##
 
 Ill-formed __builtin_offsetof.
@@ -4619,23 +4619,23 @@ At this point, a struct or union name is expected.
 
 #------------------------------------------------------------------------------
 
-translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN VOID XOR_ASSIGN 
+translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN VOID XOR_ASSIGN
 ##
-## Ends in an error in state: 318.
+## Ends in an error in state: 326.
 ##
 ## postfix_expression -> BUILTIN_OFFSETOF LPAREN type_name . COMMA general_identifier RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ## postfix_expression -> BUILTIN_OFFSETOF LPAREN type_name . COMMA general_identifier designator_list RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_OFFSETOF LPAREN type_name 
+## BUILTIN_OFFSETOF LPAREN type_name
 ##
 ## WARNING: This example involves spurious reductions.
 ## This implies that, although the LR(1) items shown above provide an
 ## accurate view of the past (what has been recognized so far), they
 ## may provide an INCOMPLETE view of the future (what was expected next).
 ## In state 67, spurious reduction of production specifier_qualifier_list(type_name) -> type_specifier_no_typedef_name list(specifier_qualifier_no_typedef_name) 
-## In state 306, spurious reduction of production option(abstract_declarator(type_name)) -> 
-## In state 312, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
+## In state 314, spurious reduction of production option(abstract_declarator(type_name)) -> 
+## In state 320, spurious reduction of production type_name -> specifier_qualifier_list(type_name) option(abstract_declarator(type_name)) 
 ##
 
 Ill-formed __builtin_offsetof.
@@ -4643,15 +4643,15 @@ At this point, a colon ',' is expected
 
 #------------------------------------------------------------------------------
 
-translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN VOID COMMA XOR_ASSIGN 
+translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN VOID COMMA XOR_ASSIGN
 ##
-## Ends in an error in state: 319.
+## Ends in an error in state: 327.
 ##
 ## postfix_expression -> BUILTIN_OFFSETOF LPAREN type_name COMMA . general_identifier RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ## postfix_expression -> BUILTIN_OFFSETOF LPAREN type_name COMMA . general_identifier designator_list RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_OFFSETOF LPAREN type_name COMMA 
+## BUILTIN_OFFSETOF LPAREN type_name COMMA
 ##
 
 Ill-formed __builtin_offsetof.
@@ -4659,15 +4659,15 @@ At this point, a member-designator is expected.
 
 #------------------------------------------------------------------------------
 
-translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN VOID COMMA PRE_NAME TYPEDEF_NAME XOR_ASSIGN 
+translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN VOID COMMA PRE_NAME TYPEDEF_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 320.
+## Ends in an error in state: 328.
 ##
 ## postfix_expression -> BUILTIN_OFFSETOF LPAREN type_name COMMA general_identifier . RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ## postfix_expression -> BUILTIN_OFFSETOF LPAREN type_name COMMA general_identifier . designator_list RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_OFFSETOF LPAREN type_name COMMA general_identifier 
+## BUILTIN_OFFSETOF LPAREN type_name COMMA general_identifier
 ##
 
 Ill-formed __builtin_offsetof.
@@ -4675,24 +4675,134 @@ At this point, a member-designator is expected.
 
 #------------------------------------------------------------------------------
 
-translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN VOID COMMA PRE_NAME TYPEDEF_NAME LBRACK STRING_LITERAL RBRACK XOR_ASSIGN 
+translation_unit_file: PACKED LPAREN BUILTIN_OFFSETOF LPAREN VOID COMMA PRE_NAME TYPEDEF_NAME LBRACK STRING_LITERAL RBRACK XOR_ASSIGN
 ##
-## Ends in an error in state: 329.
+## Ends in an error in state: 337.
 ##
 ## option(designator_list) -> designator_list . [ LBRACK DOT ]
 ## postfix_expression -> BUILTIN_OFFSETOF LPAREN type_name COMMA general_identifier designator_list . RPAREN [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RBRACK RBRACE QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA COLON BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## BUILTIN_OFFSETOF LPAREN type_name COMMA general_identifier designator_list 
+## BUILTIN_OFFSETOF LPAREN type_name COMMA general_identifier designator_list
 ##
 
 
 Ill-formed __builtin_offsetof.
 At this point, a member-designator is expected.
+# ------------------------------------------------------------------------------
+
+translation_unit_file: STATIC_ASSERT XOR_ASSIGN
+##
+## Ends in an error in state: 76.
+##
+## static_assert_declaration -> STATIC_ASSERT . LPAREN conditional_expression COMMA string_literals_list RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN EOF ENUM DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+##
+## The known suffix of the stack is as follows:
+## STATIC_ASSERT
+##
+
+Ill-formed _Static_assert.
+At this point, an opening parenthesis '(' is expected.
 
 #------------------------------------------------------------------------------
 
-translation_unit_file: ALIGNAS LPAREN PRE_NAME XOR_ASSIGN 
+translation_unit_file: STATIC_ASSERT LPAREN XOR_ASSIGN
+##
+## Ends in an error in state: 77.
+##
+## static_assert_declaration -> STATIC_ASSERT LPAREN . conditional_expression COMMA string_literals_list RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN EOF ENUM DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+##
+## The known suffix of the stack is as follows:
+## STATIC_ASSERT LPAREN
+##
+
+Ill-formed _Static_assert.
+At this point, a constant expression is expected.
+
+#------------------------------------------------------------------------------
+
+translation_unit_file: STATIC_ASSERT LPAREN STRING_LITERAL XOR_ASSIGN
+##
+## Ends in an error in state: 167.
+##
+## static_assert_declaration -> STATIC_ASSERT LPAREN conditional_expression . COMMA string_literals_list RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN EOF ENUM DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+##
+## The known suffix of the stack is as follows:
+## STATIC_ASSERT LPAREN conditional_expression
+##
+## WARNING: This example involves spurious reductions.
+## This implies that, although the LR(1) items shown above provide an
+## accurate view of the past (what has been recognized so far), they
+## may provide an INCOMPLETE view of the future (what was expected next).
+## In state 80, spurious reduction of production primary_expression -> string_literals_list 
+## In state 82, spurious reduction of production postfix_expression -> primary_expression 
+## In state 83, spurious reduction of production unary_expression -> postfix_expression 
+## In state 79, spurious reduction of production cast_expression -> unary_expression 
+## In state 110, spurious reduction of production multiplicative_expression -> cast_expression 
+## In state 104, spurious reduction of production additive_expression -> multiplicative_expression 
+## In state 123, spurious reduction of production shift_expression -> additive_expression 
+## In state 100, spurious reduction of production relational_expression -> shift_expression 
+## In state 116, spurious reduction of production equality_expression -> relational_expression 
+## In state 132, spurious reduction of production and_expression -> equality_expression 
+## In state 140, spurious reduction of production exclusive_or_expression -> and_expression 
+## In state 141, spurious reduction of production inclusive_or_expression -> exclusive_or_expression 
+## In state 142, spurious reduction of production logical_and_expression -> inclusive_or_expression 
+## In state 126, spurious reduction of production logical_or_expression -> logical_and_expression 
+## In state 124, spurious reduction of production conditional_expression -> logical_or_expression 
+##
+
+Ill-formed _Static_assert.
+At this point, a comma ',' is expected.
+
+#------------------------------------------------------------------------------
+
+translation_unit_file: STATIC_ASSERT LPAREN STRING_LITERAL COMMA XOR_ASSIGN
+##
+## Ends in an error in state: 168.
+##
+## static_assert_declaration -> STATIC_ASSERT LPAREN conditional_expression COMMA . string_literals_list RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN EOF ENUM DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+##
+## The known suffix of the stack is as follows:
+## STATIC_ASSERT LPAREN conditional_expression COMMA
+##
+
+Ill-formed _Static_assert.
+At this point, a string literal is expected.
+
+#------------------------------------------------------------------------------
+
+translation_unit_file: STATIC_ASSERT LPAREN STRING_LITERAL COMMA STRING_LITERAL XOR_ASSIGN
+##
+## Ends in an error in state: 169.
+##
+## static_assert_declaration -> STATIC_ASSERT LPAREN conditional_expression COMMA string_literals_list . RPAREN SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN EOF ENUM DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+## string_literals_list -> string_literals_list . STRING_LITERAL [ STRING_LITERAL RPAREN ]
+##
+## The known suffix of the stack is as follows:
+## STATIC_ASSERT LPAREN conditional_expression COMMA string_literals_list
+##
+
+Ill-formed _Static_assert.
+At this point, a closing parenthesis ')' is expected.
+
+#------------------------------------------------------------------------------
+
+translation_unit_file: STATIC_ASSERT LPAREN STRING_LITERAL COMMA STRING_LITERAL RPAREN XOR_ASSIGN
+##
+## Ends in an error in state: 170.
+##
+## static_assert_declaration -> STATIC_ASSERT LPAREN conditional_expression COMMA string_literals_list RPAREN . SEMICOLON [ WHILE VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF TILDE SWITCH STRUCT STRING_LITERAL STATIC_ASSERT STATIC STAR SIZEOF SIGNED SHORT SEMICOLON RETURN RESTRICT REGISTER RBRACE PRE_NAME PRAGMA PLUS PACKED NORETURN MINUS LPAREN LONG LBRACE INT INLINE INC IF GOTO FOR FLOAT EXTERN EOF ENUM DOUBLE DO DEFAULT DEC CONTINUE CONSTANT CONST CHAR CASE BUILTIN_VA_ARG BUILTIN_OFFSETOF BREAK BANG AUTO ATTRIBUTE ASM AND ALIGNOF ALIGNAS ]
+##
+## The known suffix of the stack is as follows:
+## STATIC_ASSERT LPAREN conditional_expression COMMA string_literals_list RPAREN
+##
+
+Ill-formed _Static_assert.
+At this point, a semicolon ';' is expected.
+
+#------------------------------------------------------------------------------
+
+translation_unit_file: ALIGNAS LPAREN PRE_NAME XOR_ASSIGN
 ##
 ## Ends in an error in state: 29.
 ##
@@ -4700,20 +4810,20 @@ translation_unit_file: ALIGNAS LPAREN PRE_NAME XOR_ASSIGN
 ## typedef_name -> PRE_NAME . TYPEDEF_NAME [ VOLATILE TYPEDEF STATIC STAR SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LBRACK INLINE EXTERN CONST COMMA AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## PRE_NAME 
+## PRE_NAME
 ##
-translation_unit_file: ALIGNAS LPAREN VOID LPAREN VOID LPAREN PRE_NAME XOR_ASSIGN 
+translation_unit_file: ALIGNAS LPAREN VOID LPAREN VOID LPAREN PRE_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 147.
+## Ends in an error in state: 244.
 ##
 ## declarator_identifier -> PRE_NAME . low_prec TYPEDEF_NAME [ RPAREN PACKED LPAREN LBRACK ATTRIBUTE ALIGNAS ]
 ## declarator_identifier -> PRE_NAME . VAR_NAME [ RPAREN PACKED LPAREN LBRACK ATTRIBUTE ALIGNAS ]
 ## typedef_name -> PRE_NAME . TYPEDEF_NAME [ VOLATILE STATIC STAR RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LBRACK INLINE EXTERN CONST COMMA AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## PRE_NAME 
+## PRE_NAME
 ##
-translation_unit_file: UNION PRE_NAME XOR_ASSIGN 
+translation_unit_file: UNION PRE_NAME XOR_ASSIGN
 ##
 ## Ends in an error in state: 40.
 ##
@@ -4721,38 +4831,38 @@ translation_unit_file: UNION PRE_NAME XOR_ASSIGN
 ## typedef_name -> PRE_NAME . TYPEDEF_NAME [ XOR_ASSIGN VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL TYPEDEF SUB_ASSIGN STRUCT STATIC STAR SLASH SIGNED SHORT SEMICOLON RPAREN RIGHT_ASSIGN RIGHT RESTRICT REGISTER RBRACK RBRACE QUESTION PTR PRE_NAME PLUS PERCENT PACKED OR_ASSIGN NORETURN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LONG LEQ LEFT_ASSIGN LEFT LBRACK LBRACE INT INLINE INC HAT GT GEQ FLOAT EXTERN EQEQ EQ ENUM DOUBLE DOT DIV_ASSIGN DEC CONST COMMA COLON CHAR BARBAR BAR AUTO ATTRIBUTE AND_ASSIGN ANDAND AND ALIGNAS ADD_ASSIGN ]
 ##
 ## The known suffix of the stack is as follows:
-## PRE_NAME 
+## PRE_NAME
 ##
-translation_unit_file: VOID PRE_NAME TYPEDEF_NAME LBRACE PRE_NAME XOR_ASSIGN 
+translation_unit_file: VOID PRE_NAME TYPEDEF_NAME LBRACE PRE_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 433.
+## Ends in an error in state: 442.
 ##
 ## general_identifier -> PRE_NAME . VAR_NAME [ COLON ]
 ## primary_expression -> PRE_NAME . VAR_NAME [ XOR_ASSIGN SUB_ASSIGN STAR SLASH SEMICOLON RIGHT_ASSIGN RIGHT QUESTION PTR PLUS PERCENT OR_ASSIGN NEQ MUL_ASSIGN MOD_ASSIGN MINUS LT LPAREN LEQ LEFT_ASSIGN LEFT LBRACK INC HAT GT GEQ EQEQ EQ DOT DIV_ASSIGN DEC COMMA BARBAR BAR AND_ASSIGN ANDAND AND ADD_ASSIGN ]
 ## typedef_name -> PRE_NAME . TYPEDEF_NAME [ VOLATILE TYPEDEF STATIC STAR SEMICOLON RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN INLINE EXTERN CONST COLON AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## PRE_NAME 
+## PRE_NAME
 ##
-translation_unit_file: VOID PRE_NAME TYPEDEF_NAME LPAREN PRE_NAME XOR_ASSIGN 
+translation_unit_file: VOID PRE_NAME TYPEDEF_NAME LPAREN PRE_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 105.
+## Ends in an error in state: 202.
 ##
 ## identifier_list -> PRE_NAME . VAR_NAME [ RPAREN COMMA ]
 ## typedef_name -> PRE_NAME . TYPEDEF_NAME [ VOLATILE STATIC STAR RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LBRACK INLINE EXTERN CONST COMMA AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## PRE_NAME 
+## PRE_NAME
 ##
-translation_unit_file: VOID PRE_NAME XOR_ASSIGN 
+translation_unit_file: VOID PRE_NAME XOR_ASSIGN
 ##
-## Ends in an error in state: 93.
+## Ends in an error in state: 190.
 ##
 ## declarator_identifier -> PRE_NAME . low_prec TYPEDEF_NAME [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK LBRACE INT INLINE FLOAT EXTERN EQ ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ## declarator_identifier -> PRE_NAME . VAR_NAME [ VOLATILE VOID UNSIGNED UNION UNDERSCORE_BOOL STRUCT STATIC SIGNED SHORT SEMICOLON RPAREN RESTRICT REGISTER PRE_NAME PACKED NORETURN LPAREN LONG LBRACK LBRACE INT INLINE FLOAT EXTERN EQ ENUM DOUBLE CONST COMMA COLON CHAR AUTO ATTRIBUTE ALIGNAS ]
 ##
 ## The known suffix of the stack is as follows:
-## PRE_NAME 
+## PRE_NAME
 ##
 
 # This is not supposed to be possible, since the Lexer can only emit a
diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly
index e21a3519..822c7011 100644
--- a/cparser/pre_parser.mly
+++ b/cparser/pre_parser.mly
@@ -57,7 +57,7 @@
   AUTO REGISTER INLINE THREAD_LOCAL NORETURN CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE
   UNDERSCORE_BOOL CONST VOLATILE VOID STRUCT UNION ENUM CASE DEFAULT IF ELSE
   SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN BUILTIN_VA_ARG ALIGNOF
-  ATTRIBUTE ALIGNAS PACKED ASM BUILTIN_OFFSETOF
+  ATTRIBUTE ALIGNAS PACKED ASM BUILTIN_OFFSETOF STATIC_ASSERT
 
 %token EOF
 
@@ -404,6 +404,7 @@ expression:
 declaration(phantom):
 | declaration_specifiers(declaration(phantom)) init_declarator_list?    SEMICOLON
 | declaration_specifiers_typedef               typedef_declarator_list? SEMICOLON
+| static_assert_declaration
     {}
 
 init_declarator_list:
@@ -519,6 +520,7 @@ struct_declaration_list:
 
 struct_declaration:
 | specifier_qualifier_list(struct_declaration) struct_declarator_list? SEMICOLON
+| static_assert_declaration
     {}
 
 (* As in the standard, except it also encodes the constraint described
@@ -608,6 +610,10 @@ gcc_attribute_word:
 | PACKED
     {}
 
+static_assert_declaration:
+|  STATIC_ASSERT LPAREN constant_expression COMMA string_literals_list RPAREN SEMICOLON
+    {}
+
 function_specifier:
 | INLINE
 | NORETURN
diff --git a/debug/Dwarfgen.ml b/debug/Dwarfgen.ml
index 6c1d0846..020ac60e 100644
--- a/debug/Dwarfgen.ml
+++ b/debug/Dwarfgen.ml
@@ -543,8 +543,8 @@ let diab_file_loc sec (f,l)  =
 
 let prod_name =
   let version_string =
-    if Version.buildnr <> "" && Version.tag <> "" then
-      Printf.sprintf "Release: %s, Build: %s, Tag: %s" Version.version Version.buildnr Version.tag
+    if Version.buildnr <> "" && Version.tag <> "" && Version.branch <> "" then
+      Printf.sprintf "Release: %s, Build: %s, Tag: %s, Branch:%s" Version.version Version.buildnr Version.tag Version.branch
     else
       Version.version in
   Printf.sprintf "AbsInt Angewandte Informatik GmbH:CompCert Version %s:(%s,%s,%s,%s)"
diff --git a/doc/index-kvx.html b/doc/index-kvx.html
index 97eefc24..62afb423 100644
--- a/doc/index-kvx.html
+++ b/doc/index-kvx.html
@@ -22,25 +22,28 @@ a:active {color : Red; text-decoration : underline; }
 </HEAD>
 <BODY>
 
-<font color=gray><H1 align="center">The CompCert verified compiler</H1>
+<font color=gray>
+<H1 align="center">The CompCert verified compiler</H1>
 <H2 align="center">Commented Coq development</H2>
-<H3 align="center">Version 3.7, 2020-03-31</H3></font>
+<H3 align="center">Version 3.8, 2020-11-16</H3>
+</font>
 <H3 align="center">PATCHED for the Kalray MPPA-KVX VLIW CORE<!--@DATE@--></H3>
 
 <H2>Introduction</H2>
 
-<p>This web page is a patched version of the table of contents of the official CompCert documentation,
-  as given on <A HREF="http://compcert.inria.fr/doc/index.html">the CompCert Web site</A>.
+<p>This web page is a patched version of the table of contents of the official CompCert sources documentation,
+  as given on <A HREF="http://compcert.org/doc/index.html">the CompCert Web site</A>.
   The unmodified parts of this table appear in <font color=gray>gray</font>.
   <br>
   <br>
-  A high-level view of this backend of CompCert is provided by this HAL preprint of Six, Boulm&eacute; and Monniaux (2019):
-  <div><a href=https://hal.archives-ouvertes.fr/hal-02185883>Certified Compiler Backends for VLIW Processors (Highly Modular Postpass-Scheduling in the CompCert Certified Compiler)</a></div>
+  A high-level view of this CompCert backend is provided by this OOPSLA'20 paper (of Six, Boulm&eacute; and Monniaux):
+  <div><a href=https://hal.archives-ouvertes.fr/hal-02185883>Certified and Efficient Instruction Scheduling. Application to Interlocked VLIW Processors.</a></div>
   <br>
-  Our source code is available on our <a href=https://gricad-gitlab.univ-grenoble-alpes.fr/certicompil/compcert-kvx>GitLab public repository</a> (see conditions in the LICENSE file).
+  See also the <tt>README.md</tt> of our <a href=https://gricad-gitlab.univ-grenoble-alpes.fr/certicompil/compcert-kvx>GitLab public repository</a>.
 </p>
 
-<font color=gray><H2>Table of contents</H2>
+<font color=gray>
+<H2>Table of contents</H2>
 
 <H3>General-purpose libraries, data structures and algorithms</H3>
 
@@ -62,9 +65,9 @@ inequations by fixpoint iteration.
 
 <H4>The <tt>abstractbb</tt> library, introduced for KVX core</H4>
 <UL>
-<LI> <A HREF="html/compcert.kvx.abstractbb.AbstractBasicBlocksDef.html">AbstractBasicBlocksDef</A>: an IR for verifying some semantic properties on basic-blocks.
-<LI> <A HREF="html/compcert.kvx.abstractbb.Parallelizability.html">Parallelizability</A>: verifying that sequential and parallel semantics are equivalent for a given abstract basic-block.
-<LI> <A HREF="html/compcert.kvx.abstractbb.ImpSimuTest.html">ImpSimuTest</A>: verifying that a given abstract basic-block is simulated by another one for sequential semantics. This module refines <A HREF="html/compcert.kvx.abstractbb.SeqSimuTheory.html">SeqSimuTheory</A> with hash-consing and uses <A HREF=https://github.com/boulme/ImpureDemo>the Impure library</A> to reason on physical equality and handling of imperative code in Coq.
+<LI> <A HREF="html/compcert.scheduling.abstractbb.AbstractBasicBlocksDef.html">AbstractBasicBlocksDef</A>: an IR for verifying some semantic properties on basic-blocks.
+<LI> <A HREF="html/compcert.scheduling.abstractbb.Parallelizability.html">Parallelizability</A>: verifying that sequential and parallel semantics are equivalent for a given abstract basic-block.
+<LI> <A HREF="html/compcert.scheduling.abstractbb.ImpSimuTest.html">ImpSimuTest</A>: verifying that a given abstract basic-block is simulated by another one for sequential semantics. This module refines <A HREF="html/compcert.scheduling.abstractbb.SeqSimuTheory.html">SeqSimuTheory</A> with hash-consing and uses <A HREF=https://github.com/boulme/ImpureDemo>the Impure library</A> to reason on physical equality and handling of imperative code in Coq.
 </UL>
 
 <font color=gray>
@@ -85,6 +88,8 @@ See also: <A HREF="html/compcert.common.Memdata.html">Memdata</A> (in-memory rep
 <LI> <A HREF="html/compcert.common.Determinism.html">Determinism</A>: determinism properties of small-step semantics.
 <LI> <A HREF="html/compcert.kvx.Op.html"><I>Op</I></A>: operators, addressing modes and their
 semantics.
+<LI> <A HREF="html/compcert.common.Builtins.html">Builtins</A>: semantics of built-in functions. <BR>
+See also: <A HREF="html/compcert.common.Builtins0.html">Builtins0</A> (target-independent part), <A HREF="html/compcert.kvx.Builtins1.html"><I>Builtins1</I></A> (target-dependent part).
 <LI> <A HREF="html/compcert.common.Unityping.html">Unityping</A>: a solver for atomic unification constraints.
 </UL>
 
@@ -123,7 +128,9 @@ view of the activation record.
 </font>
 <H4>Languages introduced for KVX core</H4>
 <UL>
-<LI> <A HREF="html/compcert.kvx.lib.Machblock.html">Machblock</A>: a variant of Mach, with a syntax for basic-blocks, and a block-step semantics (execute one basic-block in one step).
+  <LI> <A HREF="html/compcert.scheduling.RTLpath.html">RTLpath</A>: extends RTL with annotations for delimitating superblocks (with possible liveness information).
+    This IR is generic over the processor, and used for prepass scheduling.
+<LI> <A HREF="html/compcert.scheduling.postpass_lib.Machblock.html">Machblock</A>: a variant of Mach, with a syntax for basic-blocks, and a block-step semantics (execute one basic-block in one step).
 This IR is generic over the processor, even if currently, only used for KVX.
 <LI> <A HREF="html/compcert.kvx.Asmvliw.html"><I>Asmvliw</I></A>: abstract syntax and semantics for KVX VLIW assembly: atomic instructions are grouped by "bundles". These bundles are executed sequentially, but execution is parallel within bundles.
 <LI> <A HREF="html/compcert.kvx.Asmblock.html"><I>Asmblock</I></A>: a variant of Asmvliw, with a sequential semantics within bundles, which make them corresponds here to usual basic-blocks.
@@ -131,14 +138,14 @@ This IR is generic over the processor, even if currently, only used for KVX.
 <LI> <A HREF="html/compcert.kvx.Asm.html"><I>Asm</I></A>: a variant of Asmvliw with a flat syntax for bundles, instead of a structured one (bundle termination is encoded as a pseudo-instruction). This IR is mainly a wrapper of <I>Asmvliw</I> for a smooth integration in CompCert (and an easier pretty-printing of the abstract syntax).
 </UL>
 
-<font color=gray><H3>Compiler passes</H3></font>
+<H3>Compiler passes</H3>
 
 <TABLE cellpadding="5%" style="color:#808080">
 <TR valign="top">
-  <TH>Pass</TH>
-  <TH>Source &amp; target</TH>
-  <TH>Compiler&nbsp;code</TH>
-  <TH>Correctness&nbsp;proof</TH>
+  <TH align=left>Pass</TH>
+  <TH align=left>Source &amp; target</TH>
+  <TH align=left>Compiler&nbsp;code</TH>
+  <TH align=left>Correctness&nbsp;proof</TH>
 </TR>
 
 <TR valign="top">
@@ -172,7 +179,8 @@ This IR is generic over the processor, even if currently, only used for KVX.
 </TR>
 
 <TR valign="top">
-  <TD>Recognition of operators<br>and addressing modes</TD>
+  <TD>Recognition of operators<br>and addressing modes;<br>
+      if-conversion</TD>
   <TD>Cminor to CminorSel</TD>
   <TD><A HREF="html/compcert.backend.Selection.html">Selection</A><br>
       <A HREF="html/compcert.kvx.SelectOp.html"><I>SelectOp</I></A><br>
@@ -215,16 +223,6 @@ This IR is generic over the processor, even if currently, only used for KVX.
   <TD><A HREF="html/compcert.backend.Renumber.html">Renumber</A></TD>
   <TD><A HREF="html/compcert.backend.Renumberproof.html">Renumberproof</A></TD>
 </TR>
-
-<TR valign="top">
-  <TD>Constant propagation</TD>
-  <TD>RTL to RTL</TD>
-  <TD><A HREF="html/compcert.backend.Constprop.html">Constprop</A><br>
-      <A HREF="html/compcert.kvx.ConstpropOp.html"><I>ConstpropOp</I></A></TD>
-  <TD><A HREF="html/compcert.backend.Constpropproof.html">Constpropproof</A><br>
-      <A HREF="html/compcert.kvx.ConstpropOpproof.html"><I>ConstproppOproof</I></A></TD>
-</TR>
-
 <TR valign="top">
   <TD>Common subexpression elimination</TD>
   <TD>RTL to RTL</TD>
@@ -235,12 +233,19 @@ This IR is generic over the processor, even if currently, only used for KVX.
 </TR>
 
 <TR valign="top">
+  <TD>Constant propagation</TD>
+  <TD>RTL to RTL</TD>
+  <TD><A HREF="html/compcert.backend.Constprop.html">Constprop</A><br>
+      <A HREF="html/compcert.kvx.ConstpropOp.html"><I>ConstpropOp</I></A></TD>
+  <TD><A HREF="html/compcert.backend.Constpropproof.html">Constpropproof</A><br>
+      <A HREF="html/compcert.kvx.ConstpropOpproof.html"><I>ConstproppOproof</I></A></TD>
+</TR>
+<TR valign="top">
   <TD>Redundancy elimination</TD>
   <TD>RTL to RTL</TD>
   <TD><A HREF="html/compcert.backend.Deadcode.html">Deadcode</A></TD>
   <TD><A HREF="html/compcert.backend.Deadcodeproof.html">Deadcodeproof</A></TD>
 </TR>
-
 <TR valign="top">
   <TD>Removal of unused static globals</TD>
   <TD>RTL to RTL</TD>
@@ -248,6 +253,60 @@ This IR is generic over the processor, even if currently, only used for KVX.
   <TD><A HREF="html/compcert.backend.Unusedglobproof.html">Unusedglobproof</A></TD>
 </TR>
 
+<TR valign="top" style="color:#000000">
+  <TD colspan="4"><b>Passes introduced for profiling (for later use in trace selection)</b></TD>
+</TR>
+<TR valign="top" style="color:#000000">
+  <TD>Insert profiling annotations (for recording experiments -- see PROFILE.md).
+  </TD>
+  <TD>RTL to RTL</TD>
+  <TD><A HREF="html/compcert.backend.Profiling.html">Profiling</A></TD>
+  <TD><A HREF="html/compcert.backend.Profilingproof.html">Profilingproof</A></TD>
+</TR>
+<TR valign="top" style="color:#000000">
+  <TD>Update ICond nodes (from recorded experiments -- see PROFILE.md).
+  </TD>
+  <TD>RTL to RTL</TD>
+  <TD><A HREF="html/compcert.backend.ProfilingExploit.html">ProfilingExploit</A></TD>
+  <TD><A HREF="html/compcert.backend.ProfilingExploitproof.html">ProfilingExploitproof</A></TD>
+</TR>
+<TR valign="top" style="color:#000000">
+  <TD colspan="4"><b>Passes introduced for superblock prepass scheduling</b></TD>
+</TR>
+<TR valign="top" style="color:#000000">
+  <TD>Code duplications (trace selection, loop unrollings, etc)
+  </TD>
+  <TD>RTL to RTL</TD>
+  <TD><A HREF="html/compcert.backend.Duplicate.html">Duplicate</A> (generic checker)</TD>
+  <TD><A HREF="html/compcert.backend.Duplicateproof.html">Duplicateproof</A> (generic proof)<BR>
+    <a href="html/compcert.backend.Duplicatepasses.html">Duplicatepasses</a> (several passes from several oracles)</TD>
+</TR>
+<TR valign="top" style="color:#000000">
+  <TD>Superblock selection (with Liveness information)</TD>
+  <TD>RTL to RTLPath</TD>
+  <TD><A HREF="html/compcert.scheduling.RTLpathLivegen.html">RTLpathLivegen</A></TD>
+  <TD><A HREF="html/compcert.scheduling.RTLpathLivegenproof.html">RTLpathLivegenproof</A></TD>
+</TR>
+<TR valign="top" style="color:#000000">
+  <TD>Superblock prepass scheduling</TD>
+  <TD>RTLPath to RTLPath</TD>
+  <TD><A HREF="html/compcert.scheduling.RTLpathScheduler.html">RTLpathScheduler</A></TD>
+  <TD><A HREF="html/compcert.scheduling.RTLpathSchedulerproof.html">RTLpathSchedulerproof</A><BR>
+    with <A HREF="html/compcert.scheduling.RTLpathSE_theory.html">RTLpathSE_theory</A> (the theory of symbolic execution on RTLpath)<BR>
+    and <A HREF="html/compcert.scheduling.RTLpathSE_simu_specs.html">RTLpathSE_simu_specs</A> (the low-level specifications of the simulation checker)<BR>
+    and <A HREF="html/compcert.scheduling.RTLpathSE_impl.html">RTLpathSE_impl</A> (the simulation checker with hash-consing)</TD>
+</TR>
+<TR valign="top" style="color:#000000">
+  <TD>Forgeting superblocks</TD>
+  <TD>RTLPath to RTL</TD>
+  <TD><A HREF="html/compcert.scheduling.RTLpath.html#transf_program">RTLpath.transf_program</A></TD>
+  <TD><A HREF="html/compcert.scheduling.RTLpathproof.html">RTLpathproof</A></TD>
+</TR>
+
+<TR valign="top">
+  <TD colspan="4"><b>Passes from register allocation</b></TD>
+</TR>
+
 <TR valign="top">
   <TD>Register allocation (validation a posteriori)</TD>
   <TD>RTL to LTL</TD>
@@ -292,36 +351,37 @@ This IR is generic over the processor, even if currently, only used for KVX.
   <TD><A HREF="html/compcert.backend.Stackingproof.html">Stackingproof</A><br>
       <A HREF="html/compcert.common.Separation.html">Separation</A></TD>
 </TR>
-</TABLE>
 
-<H4>Compilation passes introduced for KVX VLIW</H4>
-<TABLE cellpadding="5%">
-<TR valign="top">
+<TR valign="top" style="color:#000000">
+  <TD colspan="4"><b>Passes introduced for KVX VLIW</b></TD>
+</TR>
+<TR valign="top" style="color:#000000">
   <TD>Reconstruction of basic-blocks at Mach level</TD>
   <TD>Mach to Machblock</TD>
-  <TD><A HREF="html/compcert.kvx.lib.Machblockgen.html">Machblockgen</A></TD>
-  <TD><A HREF="html/compcert.kvx.lib.ForwardSimulationBlock.html">ForwardSimulationBlock</A><BR>
-      <A HREF="html/compcert.kvx.lib.Machblockgenproof.html">Machblockgenproof</A></TD>
+  <TD><A HREF="html/compcert.scheduling.postpass_lib.Machblockgen.html">Machblockgen</A></TD>
+  <TD><A HREF="html/compcert.scheduling.postpass_lib.ForwardSimulationBlock.html">ForwardSimulationBlock</A><BR>
+      <A HREF="html/compcert.scheduling.postpass_lib.Machblockgenproof.html">Machblockgenproof</A></TD>
 </TR>
 
-<TR valign="top">
+<TR valign="top" style="color:#000000">
   <TD>Emission of purely sequential assembly code</TD>
   <TD>Machblock to Asmblock</TD>
   <TD><A HREF="html/compcert.kvx.Asmblockgen.html"><I>Asmblockgen</I></A></TD>
-  <TD><A HREF="html/compcert.kvx.lib.Asmblockgenproof0.html"><I>Asmblockgenproof0</I></A><BR>
+  <TD><A HREF="html/compcert.kvx.Asmblockgenproof0.html"><I>Asmblockgenproof0</I></A><BR>
       <A HREF="html/compcert.kvx.Asmblockgenproof1.html"><I>Asmblockgenproof1</I></A><BR>
       <A HREF="html/compcert.kvx.Asmblockgenproof.html"><I>Asmblockgenproof</I></A></TD>
 </TR>
 
-<TR valign="top">
+<TR valign="top" style="color:#000000">
   <TD>Bundling (and basic-block scheduling)</TD>
   <TD>Asmblock to Asmvliw</TD>
-  <TD><A HREF="html/compcert.kvx.PostpassScheduling.html"><I>PostpassScheduling</I></A> using<BR>
-      <A HREF="html/compcert.kvx.Asmblockdeps.html"><I>Asmblockdeps</I></A> and the <tt>abstractbb</tt> library</TD>
+  <TD><A HREF="html/compcert.kvx.PostpassScheduling.html"><I>PostpassScheduling</I></A><BR>
+    using <A HREF="html/compcert.kvx.Asmblockdeps.html"><I>Asmblockdeps</I></A><BR>
+    and the <tt>abstractbb</tt> library</TD>
   <TD><A HREF="html/compcert.kvx.PostpassSchedulingproof.html"><I>PostpassSchedulingproof</I></A></TD>
 </TR>
 
-<TR valign="top">
+<TR valign="top" style="color:#000000">
   <TD>Flattening bundles (only a bureaucratic operation)</TD>
   <TD>Asmvliw to Asm</TD>
   <TD><A HREF="html/compcert.kvx.Asmgen.html"><I>Asmgen</I></A></TD>
@@ -329,12 +389,13 @@ This IR is generic over the processor, even if currently, only used for KVX.
 </TR>
 </TABLE>
 
-<font color=gray>
-<H3>All together</H3>
+<H3>All together (there are many more RTL passes than on vanilla CompCert: their order is specified in Compiler)</H3>
 
 <UL>
+</font>
 <LI> <A HREF="html/compcert.driver.Compiler.html">Compiler</A>: composing the passes together;
 whole-compiler semantic preservation theorems.
+<font color=gray>
 <LI> <A HREF="html/compcert.driver.Complements.html">Complements</A>: interesting consequences of the semantic preservation theorems.
 </UL>
 
diff --git a/doc/index.html b/doc/index.html
index 5f4ac5e1..6c97fb15 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -24,7 +24,7 @@ a:active {color : Red; text-decoration : underline; }
 
 <H1 align="center">The CompCert verified compiler</H1>
 <H2 align="center">Commented Coq development</H2>
-<H3 align="center">Version 3.7, 2020-03-31</H3>
+<H3 align="center">Version 3.8, 2020-11-16</H3>
 
 <H2>Introduction</H2>
 
@@ -56,12 +56,14 @@ substantially changed since the overview papers above were
 written.</P>
 
 <P>The complete sources for CompCert can be downloaded from
-<A HREF="http://compcert.inria.fr/">the CompCert Web site</A>.</P>
+<A HREF="https://github.com/AbsInt/CompCert/">the Git repository</A>
+or <A HREF="https://compcert.org/">the CompCert Web site</A>.
+</P>
 
 <P>This document and the CompCert sources are copyright Institut
 National de Recherche en Informatique et en Automatique (INRIA) and
 AbsInt Angewandte Informatik GmbH, and are distributed under the terms of the
-following <A HREF="LICENSE">license</A>.
+following <A HREF="LICENSE.txt">license</A>.
 </P>
 
 <H2>Table of contents</H2>
@@ -349,7 +351,7 @@ reconstruction.
 </UL>
 
 <HR>
-<ADDRESS>Xavier.Leroy@inria.fr</ADDRESS>
+<ADDRESS>xavier.leroy@college-de-france.fr</ADDRESS>
 <HR>
 
 </BODY>
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index eb21b3f8..9b7b5c4d 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -27,17 +27,35 @@ let option_ftailcalls = ref true
 let option_fconstprop = ref true
 let option_fcse = ref true
 let option_fcse2 = ref false
+
 let option_fcse3 = ref true
 let option_fcse3_alias_analysis = ref true
 let option_fcse3_across_calls = ref false
 let option_fcse3_across_merges = ref true
 let option_fcse3_glb = ref true
+let option_fcse3_trivial_ops = ref false
+let option_fcse3_refine = ref true
+let option_fcse3_conditions = ref true
+                        
 let option_fredundancy = ref true
-let option_fduplicate = ref (-1)
-let option_finvertcond = ref true
-let option_ftracelinearize = ref false
+
+(** Options relative to superblock scheduling *)
+let option_fpredict = ref true (* insert static branch prediction information, and swaps ifso/ifnot branches accordingly *)
+let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size n *)
+let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *)
+let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *)
+let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *)
+let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *)
+
+(* Scheduling *)
+let option_mtune = ref ""
+                
+let option_fprepass = ref true
+let option_fprepass_sched = ref "list"
+
 let option_fpostpass = ref true
 let option_fpostpass_sched = ref "list"
+
 let option_fifconversion = ref true
 let option_Obranchless = ref false
 let option_falignfunctions = ref (None: int option)
@@ -88,10 +106,11 @@ let option_div_i32 = ref "stsud"
 let option_div_i64 = ref "stsud" 
 let option_fcoalesce_mem = ref true
 let option_fforward_moves = ref false
-let option_fmove_loop_invariants = ref true
+let option_fmove_loop_invariants = ref false
 let option_fnontrap_loads = ref true
 let option_all_loads_nontrap = ref false
 let option_inline_auto_threshold = ref 0
 let option_profile_arcs = ref false
 let option_fbranch_probabilities = ref true
 let option_debug_compcert = ref 0
+let main_function_name = ref "main"
diff --git a/driver/CommonOptions.ml b/driver/CommonOptions.ml
index c151ecf2..e8a6941c 100644
--- a/driver/CommonOptions.ml
+++ b/driver/CommonOptions.ml
@@ -15,8 +15,9 @@ open Commandline
 
 (* The version string for [tool_name] *)
 let version_string tool_name =
-  if Version.buildnr <> "" && Version.tag <> "" then
-    Printf.sprintf "The CompCert %s, Release: %s, Build: %s, Tag: %s\n" tool_name Version.version Version.buildnr Version.tag
+  if Version.buildnr <> "" && Version.tag <> "" && Version.branch <> "" then
+    Printf.sprintf "The CompCert %s, Release: %s, Build: %s, Tag: %s, Branch: %s\n"
+      tool_name Version.version Version.buildnr Version.tag Version.branch
   else
     Printf.sprintf "The CompCert %s, version %s\n" tool_name Version.version
 
@@ -26,7 +27,7 @@ let print_version_and_exit tool_name () =
 
 let version_options tool_name =
   [ Exact "-version", Unit (print_version_and_exit tool_name);
-    Exact "--version", Unit (print_version_and_exit tool_name);]
+    Exact "--version", Unit (print_version_and_exit tool_name) ]
 
 (* Language support options *)
 
@@ -76,6 +77,7 @@ let general_help =
   -v             Print external commands before invoking them
   -timings       Show the time spent in various compiler passes
   -version       Print the version string and exit
+  -version-file <file> Print version inforation to <file> and exit
   -target <value> Generate code for the given target
   -conf <file>   Read configuration from file
   @<file>        Read command line options from <file>
@@ -86,4 +88,4 @@ let general_options =
     Exact "-target", Ignore;(* Ignore option since it is already handled *)
     Exact "-v", Set option_v;
     Exact "-stdlib", String(fun s -> stdlib_path := s);
-    Exact "-timings", Set option_timings;]
+    Exact "-timings", Set option_timings ]
diff --git a/driver/Compiler.vexpand b/driver/Compiler.vexpand
index 0f59aab7..a751b232 100644
--- a/driver/Compiler.vexpand
+++ b/driver/Compiler.vexpand
@@ -35,6 +35,7 @@ Require Cshmgen.
 Require Cminorgen.
 Require Selection.
 Require RTLgen.
+Require Import Duplicatepasses.
 EXPAND_RTL_REQUIRE
 Require Asmgen.
 (** Proofs of semantic preservation. *)
@@ -53,7 +54,7 @@ Require Import Compopts.
 Parameter print_Clight: Clight.program -> unit.
 Parameter print_Cminor: Cminor.program -> unit.
 Parameter print_RTL: Z -> RTL.program -> unit.
-Parameter print_LTL: LTL.program -> unit.
+Parameter print_LTL: Z -> LTL.program -> unit.
 Parameter print_Mach: Mach.program -> unit.
 
 Local Open Scope string_scope.
@@ -297,6 +298,14 @@ EXPAND_ASM_SEMANTICS
     eapply RTLgenproof.transf_program_correct; eassumption.
 EXPAND_RTL_FORWARD_SIMULATIONS
   eapply compose_forward_simulations.
+    eapply RTLpathLivegenproof.transf_program_correct; eassumption.
+  pose proof RTLpathLivegenproof.all_fundef_liveness_ok as X.
+    refine (modusponens _ _ (X _ _ _) _); eauto. intro.
+  eapply compose_forward_simulations.
+    eapply RTLpathSchedulerproof.transf_program_correct; eassumption.
+  eapply compose_forward_simulations.
+    eapply RTLpathproof.transf_program_correct; eassumption.
+  eapply compose_forward_simulations.
     eapply Allocationproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
     eapply Tunnelingproof.transf_program_correct; eassumption.
diff --git a/driver/Compopts.v b/driver/Compopts.v
index d576ede6..65264124 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -27,9 +27,6 @@ Parameter generate_float_constants: unit -> bool.
 (** For value analysis.  Currently always false. *)
 Parameter va_strict: unit -> bool.
 
-(** Flag -fduplicate. Branch prediction annotation + tail duplication *)
-Parameter optim_duplicate: unit -> bool.
-
 (** Flag -ftailcalls.  For tail call optimization. *)
 Parameter optim_tailcalls: unit -> bool.
 
@@ -57,6 +54,12 @@ Parameter optim_CSE3_across_merges: unit -> bool.
 (** Flag -fcse3-glb *)
 Parameter optim_CSE3_glb: unit -> bool.
 
+(** Flag -fcse3-trivial-ops. For DMonniaux's common subexpression elimination, simplify trivial operations as well. *)
+Parameter optim_CSE3_trivial_ops: unit -> bool.
+
+(** Flag -fcse3-conditions. For DMonniaux's common subexpression elimination: remove redundant conditional branches. *)
+Parameter optim_CSE3_conditions: unit -> bool.
+
 (** Flag -fmove-loop-invariants. *)
 Parameter optim_move_loop_invariants: unit -> bool.
 
diff --git a/driver/Configuration.ml b/driver/Configuration.ml
index 1d40214a..ecc2aba6 100644
--- a/driver/Configuration.ml
+++ b/driver/Configuration.ml
@@ -126,6 +126,7 @@ let arch =
   | "powerpc"|"arm"|"x86"|"riscV"|"kvx"|"aarch64" as a -> a
   | v -> bad_config "arch" [v]
 let model = get_config_string "model"
+let os = get_config_string "os"
 let abi = get_config_string "abi"
 let is_big_endian =
   match get_config_string "endianness" with
diff --git a/driver/Configuration.mli b/driver/Configuration.mli
index a71da72d..75e547ff 100644
--- a/driver/Configuration.mli
+++ b/driver/Configuration.mli
@@ -19,6 +19,9 @@ val model: string
 val abi: string
   (** ABI to use *)
 
+val os: string
+  (** ABI to use *)
+
 val is_big_endian: bool
   (** Endianness to use *)
 
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 90afb812..c9eacadc 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -202,20 +202,26 @@ Processing options:
   -fcse3-across-calls   Propagate CSE3 information across function calls [off]
   -fcse3-across-merges  Propagate CSE3 information across control-flow merges [on]
   -fcse3-glb            Refine CSE3 information using greatest lower bounds [on]
+  -fcse3-trivial-ops    Replace trivial operations as well using CSE3 [off]
+  -fcse3-refine         Refine CSE3 invariants by descending iteration [on]
+  -fcse3-conditions     Remove redundant conditions using CSE3 [on]
   -fmove-loop-invariants Perform loop-invariant code motion [off]
   -fredundancy   Perform redundancy elimination [on]
-  -fpostpass     Perform postpass scheduling (only for K1 architecture) [on]
+  -mtune=         Type of CPU (for scheduling on some architectures)
+  -fprepass      Perform prepass scheduling (only on some architectures) [on]
+  -fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
+                   (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+  -fpostpass     Perform postpass scheduling (only for K1 architecture) [on] 
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
-  -fduplicate <nb_nodes> Perform tail duplication to form superblocks on predicted traces
-    nb_nodes control the heuristic deciding to duplicate or not
-    A value of -1 desactivates the entire pass (including branch prediction)
-    A value of 0 desactivates the duplication (but activates the branch prediction)
-    FIXME : this is desactivated by default for now
-    -finvertcond    Invert conditions based on predicted paths (to prefer fallthrough).
-                    Requires -fduplicate to be also activated [on]
-    -ftracelinearize Linearizes based on the traces identified by duplicate phase
-                    It is heavily recommended to activate -finvertcond with this pass [off]
+  -fpredict      Insert static branch prediction information [on]
+                 Also swaps ifso/ifnot branches accordingly at RTL level
+  -ftailduplicate n Perform tail duplication for RTL code blocks of size n (not counting Inops) [0]
+  -ftracelinearize  Uses branch prediction information to improve the Linearize [on]
+  -funrollsingle n  Unrolls a single iteration of innermost loops of size n (not counting Inops) [0]
+  -funrollbody n  Unrolls once the body of innermost loops of size n (not counting Inops) [0]
+  -flooprotate n  Duplicates the header (condition computation part) of innermost loops to perform a loop rotate [0]
+                  Doesn't duplicate if the size of that header is strictly greater than n
   -fforward-moves   Forward moves after CSE
   -finline       Perform inlining of functions [on]
   -finline-functions-called-once Integrate functions only required by their
@@ -258,17 +264,12 @@ Code generation options: (use -fno-<opt> to turn off -f<opt>)
   -trace         Have the interpreter produce a detailed trace of reductions
   -random        Randomize execution order
   -all           Simulate all possible execution orders
+  -main <name>   Start executing at function <name> instead of main()
 |}
 
 let print_usage_and_exit () =
   printf "%s" usage_string; exit 0
 
-let enforce_buildnr nr =
-  let build = int_of_string Version.buildnr in
-  if nr != build then
-    fatal_error no_loc "Mismatching builds: This is CompCert build %d, but QSK requires build %d.\n\
-Please use matching builds of QSK and CompCert." build nr
-
 let dump_mnemonics destfile =
   let oc = open_out_bin destfile in
   let pp = Format.formatter_of_out_channel oc in
@@ -280,6 +281,7 @@ let dump_mnemonics destfile =
 let optimization_options = [
     option_ftailcalls; option_fifconversion; option_fconstprop;
     option_fcse; option_fcse2; option_fcse3;
+    option_fpredict; option_ftracelinearize;
     option_fpostpass;
     option_fredundancy; option_finline; option_finline_functions_called_once;
 ]
@@ -314,10 +316,7 @@ let cmdline_actions =
   @ version_options tool_name @
 (* Enforcing CompCert build numbers for QSKs and mnemonics dump *)
   (if Version.buildnr <> "" then
-    [ Exact "-qsk-enforce-build", Integer enforce_buildnr;
-      Exact "--qsk-enforce-build", Integer enforce_buildnr;
-      Exact "-dump-mnemonics", String  dump_mnemonics;
-    ]
+     [Exact "-dump-mnemonics", String  dump_mnemonics;]
    else []) @
 (* Processing options *)
  [ Exact "-c", Set option_c;
@@ -403,7 +402,8 @@ let cmdline_actions =
   Exact "-quiet", Unit (fun () -> Interp.trace := 0);
   Exact "-trace", Unit (fun () -> Interp.trace := 2);
   Exact "-random", Unit (fun () -> Interp.mode := Interp.Random);
-  Exact "-all", Unit (fun () -> Interp.mode := Interp.All)
+  Exact "-all", Unit (fun () -> Interp.mode := Interp.All);
+  Exact "-main", String (fun s -> main_function_name := s)
  ]
 (* Optimization options *)
 (* -f options: come in -f and -fno- variants *)
@@ -417,12 +417,21 @@ let cmdline_actions =
   @ f_opt "cse3-across-calls" option_fcse3_across_calls
   @ f_opt "cse3-across-merges" option_fcse3_across_merges
   @ f_opt "cse3-glb" option_fcse3_glb
+  @ f_opt "cse3-trivial-ops" option_fcse3_trivial_ops
+  @ f_opt "cse3-refine" option_fcse3_refine
+  @ f_opt "cse3-conditions" option_fcse3_conditions
   @ f_opt "move-loop-invariants" option_fmove_loop_invariants
   @ f_opt "redundancy" option_fredundancy
+  @ [ Exact "-mtune", String (fun s -> option_mtune := s) ]
+  @ f_opt "prepass" option_fprepass
   @ f_opt "postpass" option_fpostpass
-  @ [ Exact "-fduplicate", Integer (fun n -> option_fduplicate := n) ]
-  @ f_opt "invertcond" option_finvertcond
+  @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ]
+  @ f_opt "predict" option_fpredict
+  @ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ]
+  @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ]
+  @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ]
   @ f_opt "tracelinearize" option_ftracelinearize
+  @ f_opt_str "prepass" option_fprepass option_fprepass_sched
   @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
   @ f_opt "inline" option_finline
   @ f_opt "inline-functions-called-once" option_finline_functions_called_once
@@ -483,6 +492,8 @@ let _ =
       fatal_error no_loc "ambiguous '-o' option (multiple source files)";
     if !num_input_files = 0 then
       fatal_error no_loc "no input file";
+    if not !option_interp && !main_function_name <> "main" then
+      fatal_error no_loc "option '-main' requires option '-interp'";
     let linker_args = time "Total compilation time" perform_actions () in
     if not (nolink ()) && linker_args <> [] then begin
       linker (output_filename_default "a.out") linker_args
diff --git a/driver/Frontend.ml b/driver/Frontend.ml
index 5db0040f..c8890046 100644
--- a/driver/Frontend.ml
+++ b/driver/Frontend.ml
@@ -75,6 +75,7 @@ let preprocess ifile ofile =
 let parse_c_file sourcename ifile =
   Debug.init_compile_unit sourcename;
   Sections.initialize();
+  CPragmas.reset();
   (* Simplification options *)
   let simplifs =
     "b" (* blocks: mandatory *)
@@ -116,7 +117,10 @@ let init () =
     | "riscV"   -> if Configuration.model = "64"
                    then Machine.rv64
                    else Machine.rv32
-    | "kvx" -> Machine.kvx
+    | "kvx" -> if Configuration.os = "cos" then Machine.kvxcos
+               else if Configuration.os = "mbr" then Machine.kvxmbr
+               else (Printf.eprintf "Configuration OS = %s\n" Configuration.os;
+                 failwith "Wrong OS configuration for KVX")
     | "aarch64" -> Machine.aarch64
     | _         -> assert false
   end;
diff --git a/driver/Interp.ml b/driver/Interp.ml
index d4286779..6c83e819 100644
--- a/driver/Interp.ml
+++ b/driver/Interp.ml
@@ -587,41 +587,60 @@ let world_program prog =
 
 (* Massaging the program to get a suitable "main" function *)
 
-let change_main_function p old_main old_main_ty =
-  let old_main = Evalof(Evar(old_main, old_main_ty), old_main_ty) in
+let change_main_function p new_main_fn =
+  let new_main_id = intern_string "%main%" in
+  { p with
+    Ctypes.prog_main = new_main_id;
+    Ctypes.prog_defs =
+      (new_main_id, Gfun(Internal new_main_fn)) :: p.Ctypes.prog_defs }
+
+let call_main3_function main_id main_ty =
+  let main_var = Evalof(Evar(main_id, main_ty), main_ty) in
   let arg1 = Eval(Vint(coqint_of_camlint 0l), type_int32s) in
   let arg2 = arg1 in
   let body =
-    Sreturn(Some(Ecall(old_main, Econs(arg1, Econs(arg2, Enil)), type_int32s))) in
-  let new_main_fn =
-    { fn_return = type_int32s; fn_callconv = cc_default;
-      fn_params = []; fn_vars = []; fn_body = body } in
-  let new_main_id = intern_string "___main" in
-  { prog_main = new_main_id;
-    Ctypes.prog_defs = (new_main_id, Gfun(Ctypes.Internal new_main_fn)) :: p.Ctypes.prog_defs;
-    Ctypes.prog_public = p.Ctypes.prog_public;
-    prog_types = p.prog_types;
-    prog_comp_env = p.prog_comp_env }
+    Sreturn(Some(Ecall(main_var, Econs(arg1, Econs(arg2, Enil)), type_int32s)))
+  in
+  { fn_return = type_int32s; fn_callconv = cc_default;
+    fn_params = []; fn_vars = []; fn_body = body }
+
+let call_other_main_function main_id main_ty main_ty_res =
+  let main_var = Evalof(Evar(main_id, main_ty), main_ty) in
+  let body =
+    Ssequence(Sdo(Ecall(main_var, Enil, main_ty_res)),
+              Sreturn(Some(Eval(Vint(coqint_of_camlint 0l), type_int32s)))) in
+  { fn_return = type_int32s; fn_callconv = cc_default;
+    fn_params = []; fn_vars = []; fn_body = body }
 
 let rec find_main_function name = function
   | [] -> None
-  | (id, Gfun fd) :: gdl -> if id = name then Some fd else find_main_function name gdl
-  | (id, Gvar v) :: gdl -> find_main_function name gdl
+  | (id, Gfun fd) :: gdl ->
+       if id = name then Some fd else find_main_function name gdl
+  | (id, Gvar v) :: gdl ->
+       find_main_function name gdl
 
 let fixup_main p =
   match find_main_function p.Ctypes.prog_main p.Ctypes.prog_defs with
   | None ->
-      fprintf err_formatter "ERROR: no main() function@.";
+      fprintf err_formatter "ERROR: no entry function %s()@."
+                            (extern_atom p.Ctypes.prog_main);
       None
   | Some main_fd ->
       match type_of_fundef main_fd with
       | Tfunction(Tnil, Ctypes.Tint(I32, Signed, _), _) ->
           Some p
-      | Tfunction(Tcons(Ctypes.Tint _, Tcons(Tpointer(Tpointer(Ctypes.Tint(I8,_,_),_),_), Tnil)),
+      | Tfunction(Tcons(Ctypes.Tint _,
+                  Tcons(Tpointer(Tpointer(Ctypes.Tint(I8,_,_),_),_), Tnil)),
                   Ctypes.Tint _, _) as ty ->
-          Some (change_main_function p p.Ctypes.prog_main ty)
+          Some (change_main_function p
+                   (call_main3_function p.Ctypes.prog_main ty))
+      | Tfunction(Tnil, ty_res, _) as ty ->
+          Some (change_main_function p
+                   (call_other_main_function p.Ctypes.prog_main ty ty_res))
       | _ ->
-          fprintf err_formatter "ERROR: wrong type for main() function@.";
+          fprintf err_formatter
+             "ERROR: wrong type for entry function %s()@."
+             (extern_atom p.Ctypes.prog_main);
           None
 
 (* Execution of a whole program *)
diff --git a/exportclight/Clightdefs.v b/exportclight/Clightdefs.v
index 83d82d88..8af920df 100644
--- a/exportclight/Clightdefs.v
+++ b/exportclight/Clightdefs.v
@@ -15,7 +15,7 @@
 
 (** All imports and definitions used by .v Clight files generated by clightgen *)
 
-From Coq Require Import String List ZArith.
+From Coq Require Import Ascii String List ZArith.
 From compcert Require Import Integers Floats Maps Errors AST Ctypes Cop Clight.
 
 Definition tvoid := Tvoid.
@@ -80,3 +80,212 @@ Definition mkprogram (types: list composite_definition)
      prog_types := types;
      prog_comp_env := ce;
      prog_comp_env_eq := EQ |}.
+
+(** The following encoding of character strings as positive numbers
+    must be kept consistent with the OCaml function [Camlcoq.pos_of_string]. *)
+
+Definition append_bit_pos (b: bool) (p: positive) : positive :=
+  if b then xI p else xO p.
+
+Definition append_char_pos_default (c: ascii) (p: positive) : positive :=
+  let '(Ascii b7 b6 b5 b4 b3 b2 b1 b0) := c in
+  xI (xI (xI (xI (xI (xI
+    (append_bit_pos b0 (append_bit_pos b1
+    (append_bit_pos b2 (append_bit_pos b3
+    (append_bit_pos b4 (append_bit_pos b5
+    (append_bit_pos b6 (append_bit_pos b7 p))))))))))))).
+
+Definition append_char_pos (c: ascii) (p: positive) : positive :=
+  match c with
+  | "0"%char => xO (xO (xO (xO (xO (xO p)))))
+  | "1"%char => xI (xO (xO (xO (xO (xO p)))))
+  | "2"%char => xO (xI (xO (xO (xO (xO p)))))
+  | "3"%char => xI (xI (xO (xO (xO (xO p)))))
+  | "4"%char => xO (xO (xI (xO (xO (xO p)))))
+  | "5"%char => xI (xO (xI (xO (xO (xO p)))))
+  | "6"%char => xO (xI (xI (xO (xO (xO p)))))
+  | "7"%char => xI (xI (xI (xO (xO (xO p)))))
+  | "8"%char => xO (xO (xO (xI (xO (xO p)))))
+  | "9"%char => xI (xO (xO (xI (xO (xO p)))))
+  | "a"%char => xO (xI (xO (xI (xO (xO p)))))
+  | "b"%char => xI (xI (xO (xI (xO (xO p)))))
+  | "c"%char => xO (xO (xI (xI (xO (xO p)))))
+  | "d"%char => xI (xO (xI (xI (xO (xO p)))))
+  | "e"%char => xO (xI (xI (xI (xO (xO p)))))
+  | "f"%char => xI (xI (xI (xI (xO (xO p)))))
+  | "g"%char => xO (xO (xO (xO (xI (xO p)))))
+  | "h"%char => xI (xO (xO (xO (xI (xO p)))))
+  | "i"%char => xO (xI (xO (xO (xI (xO p)))))
+  | "j"%char => xI (xI (xO (xO (xI (xO p)))))
+  | "k"%char => xO (xO (xI (xO (xI (xO p)))))
+  | "l"%char => xI (xO (xI (xO (xI (xO p)))))
+  | "m"%char => xO (xI (xI (xO (xI (xO p)))))
+  | "n"%char => xI (xI (xI (xO (xI (xO p)))))
+  | "o"%char => xO (xO (xO (xI (xI (xO p)))))
+  | "p"%char => xI (xO (xO (xI (xI (xO p)))))
+  | "q"%char => xO (xI (xO (xI (xI (xO p)))))
+  | "r"%char => xI (xI (xO (xI (xI (xO p)))))
+  | "s"%char => xO (xO (xI (xI (xI (xO p)))))
+  | "t"%char => xI (xO (xI (xI (xI (xO p)))))
+  | "u"%char => xO (xI (xI (xI (xI (xO p)))))
+  | "v"%char => xI (xI (xI (xI (xI (xO p)))))
+  | "w"%char => xO (xO (xO (xO (xO (xI p)))))
+  | "x"%char => xI (xO (xO (xO (xO (xI p)))))
+  | "y"%char => xO (xI (xO (xO (xO (xI p)))))
+  | "z"%char => xI (xI (xO (xO (xO (xI p)))))
+  | "A"%char => xO (xO (xI (xO (xO (xI p)))))
+  | "B"%char => xI (xO (xI (xO (xO (xI p)))))
+  | "C"%char => xO (xI (xI (xO (xO (xI p)))))
+  | "D"%char => xI (xI (xI (xO (xO (xI p)))))
+  | "E"%char => xO (xO (xO (xI (xO (xI p)))))
+  | "F"%char => xI (xO (xO (xI (xO (xI p)))))
+  | "G"%char => xO (xI (xO (xI (xO (xI p)))))
+  | "H"%char => xI (xI (xO (xI (xO (xI p)))))
+  | "I"%char => xO (xO (xI (xI (xO (xI p)))))
+  | "J"%char => xI (xO (xI (xI (xO (xI p)))))
+  | "K"%char => xO (xI (xI (xI (xO (xI p)))))
+  | "L"%char => xI (xI (xI (xI (xO (xI p)))))
+  | "M"%char => xO (xO (xO (xO (xI (xI p)))))
+  | "N"%char => xI (xO (xO (xO (xI (xI p)))))
+  | "O"%char => xO (xI (xO (xO (xI (xI p)))))
+  | "P"%char => xI (xI (xO (xO (xI (xI p)))))
+  | "Q"%char => xO (xO (xI (xO (xI (xI p)))))
+  | "R"%char => xI (xO (xI (xO (xI (xI p)))))
+  | "S"%char => xO (xI (xI (xO (xI (xI p)))))
+  | "T"%char => xI (xI (xI (xO (xI (xI p)))))
+  | "U"%char => xO (xO (xO (xI (xI (xI p)))))
+  | "V"%char => xI (xO (xO (xI (xI (xI p)))))
+  | "W"%char => xO (xI (xO (xI (xI (xI p)))))
+  | "X"%char => xI (xI (xO (xI (xI (xI p)))))
+  | "Y"%char => xO (xO (xI (xI (xI (xI p)))))
+  | "Z"%char => xI (xO (xI (xI (xI (xI p)))))
+  | "_"%char => xO (xI (xI (xI (xI (xI p)))))
+  | _ => append_char_pos_default c p
+  end.
+
+Fixpoint ident_of_string (s: string) : ident :=
+  match s with
+  | EmptyString => xH
+  | String c s => append_char_pos c (ident_of_string s)
+  end.
+
+(** A convenient notation [$ "ident"] to force evaluation of
+    [ident_of_string "ident"] *)
+
+Ltac ident_of_string s :=
+  let x := constr:(ident_of_string s) in
+  let y := eval compute in x in
+  exact y.
+
+Notation "$ s" := (ltac:(ident_of_string s))
+                  (at level 1, only parsing) : string_scope.
+
+(** The inverse conversion, from encoded strings to strings *)
+
+Section DECODE_BITS.
+
+Variable rec: positive -> string.
+
+Fixpoint decode_n_bits (n: nat) (l: list bool) (p: positive) : string :=
+  match n with
+  | O => 
+      match l with 
+      | b7 :: b6 :: b5 :: b4 :: b3 :: b2 :: b1 :: b0 :: _ =>
+          String (Ascii b7 b6 b5 b4 b3 b2 b1 b0) (rec p)
+      | _ => EmptyString
+      end
+  | S n =>
+      match p with
+      | xO q => decode_n_bits n (false :: l) q
+      | xI q => decode_n_bits n (true :: l) q
+      | xH => EmptyString
+      end
+  end.
+
+Definition decode_8_bits := Eval compute in (decode_n_bits 8%nat nil).
+
+End DECODE_BITS.
+
+Fixpoint string_of_ident (p: positive) : string :=
+  match p with
+  | xO (xO (xO (xO (xO (xO p))))) => String "0"%char (string_of_ident p)
+  | xI (xO (xO (xO (xO (xO p))))) => String "1"%char (string_of_ident p)
+  | xO (xI (xO (xO (xO (xO p))))) => String "2"%char (string_of_ident p)
+  | xI (xI (xO (xO (xO (xO p))))) => String "3"%char (string_of_ident p)
+  | xO (xO (xI (xO (xO (xO p))))) => String "4"%char (string_of_ident p)
+  | xI (xO (xI (xO (xO (xO p))))) => String "5"%char (string_of_ident p)
+  | xO (xI (xI (xO (xO (xO p))))) => String "6"%char (string_of_ident p)
+  | xI (xI (xI (xO (xO (xO p))))) => String "7"%char (string_of_ident p)
+  | xO (xO (xO (xI (xO (xO p))))) => String "8"%char (string_of_ident p)
+  | xI (xO (xO (xI (xO (xO p))))) => String "9"%char (string_of_ident p)
+  | xO (xI (xO (xI (xO (xO p))))) => String "a"%char (string_of_ident p)
+  | xI (xI (xO (xI (xO (xO p))))) => String "b"%char (string_of_ident p)
+  | xO (xO (xI (xI (xO (xO p))))) => String "c"%char (string_of_ident p)
+  | xI (xO (xI (xI (xO (xO p))))) => String "d"%char (string_of_ident p)
+  | xO (xI (xI (xI (xO (xO p))))) => String "e"%char (string_of_ident p)
+  | xI (xI (xI (xI (xO (xO p))))) => String "f"%char (string_of_ident p)
+  | xO (xO (xO (xO (xI (xO p))))) => String "g"%char (string_of_ident p)
+  | xI (xO (xO (xO (xI (xO p))))) => String "h"%char (string_of_ident p)
+  | xO (xI (xO (xO (xI (xO p))))) => String "i"%char (string_of_ident p)
+  | xI (xI (xO (xO (xI (xO p))))) => String "j"%char (string_of_ident p)
+  | xO (xO (xI (xO (xI (xO p))))) => String "k"%char (string_of_ident p)
+  | xI (xO (xI (xO (xI (xO p))))) => String "l"%char (string_of_ident p)
+  | xO (xI (xI (xO (xI (xO p))))) => String "m"%char (string_of_ident p)
+  | xI (xI (xI (xO (xI (xO p))))) => String "n"%char (string_of_ident p)
+  | xO (xO (xO (xI (xI (xO p))))) => String "o"%char (string_of_ident p)
+  | xI (xO (xO (xI (xI (xO p))))) => String "p"%char (string_of_ident p)
+  | xO (xI (xO (xI (xI (xO p))))) => String "q"%char (string_of_ident p)
+  | xI (xI (xO (xI (xI (xO p))))) => String "r"%char (string_of_ident p)
+  | xO (xO (xI (xI (xI (xO p))))) => String "s"%char (string_of_ident p)
+  | xI (xO (xI (xI (xI (xO p))))) => String "t"%char (string_of_ident p)
+  | xO (xI (xI (xI (xI (xO p))))) => String "u"%char (string_of_ident p)
+  | xI (xI (xI (xI (xI (xO p))))) => String "v"%char (string_of_ident p)
+  | xO (xO (xO (xO (xO (xI p))))) => String "w"%char (string_of_ident p)
+  | xI (xO (xO (xO (xO (xI p))))) => String "x"%char (string_of_ident p)
+  | xO (xI (xO (xO (xO (xI p))))) => String "y"%char (string_of_ident p)
+  | xI (xI (xO (xO (xO (xI p))))) => String "z"%char (string_of_ident p)
+  | xO (xO (xI (xO (xO (xI p))))) => String "A"%char (string_of_ident p)
+  | xI (xO (xI (xO (xO (xI p))))) => String "B"%char (string_of_ident p)
+  | xO (xI (xI (xO (xO (xI p))))) => String "C"%char (string_of_ident p)
+  | xI (xI (xI (xO (xO (xI p))))) => String "D"%char (string_of_ident p)
+  | xO (xO (xO (xI (xO (xI p))))) => String "E"%char (string_of_ident p)
+  | xI (xO (xO (xI (xO (xI p))))) => String "F"%char (string_of_ident p)
+  | xO (xI (xO (xI (xO (xI p))))) => String "G"%char (string_of_ident p)
+  | xI (xI (xO (xI (xO (xI p))))) => String "H"%char (string_of_ident p)
+  | xO (xO (xI (xI (xO (xI p))))) => String "I"%char (string_of_ident p)
+  | xI (xO (xI (xI (xO (xI p))))) => String "J"%char (string_of_ident p)
+  | xO (xI (xI (xI (xO (xI p))))) => String "K"%char (string_of_ident p)
+  | xI (xI (xI (xI (xO (xI p))))) => String "L"%char (string_of_ident p)
+  | xO (xO (xO (xO (xI (xI p))))) => String "M"%char (string_of_ident p)
+  | xI (xO (xO (xO (xI (xI p))))) => String "N"%char (string_of_ident p)
+  | xO (xI (xO (xO (xI (xI p))))) => String "O"%char (string_of_ident p)
+  | xI (xI (xO (xO (xI (xI p))))) => String "P"%char (string_of_ident p)
+  | xO (xO (xI (xO (xI (xI p))))) => String "Q"%char (string_of_ident p)
+  | xI (xO (xI (xO (xI (xI p))))) => String "R"%char (string_of_ident p)
+  | xO (xI (xI (xO (xI (xI p))))) => String "S"%char (string_of_ident p)
+  | xI (xI (xI (xO (xI (xI p))))) => String "T"%char (string_of_ident p)
+  | xO (xO (xO (xI (xI (xI p))))) => String "U"%char (string_of_ident p)
+  | xI (xO (xO (xI (xI (xI p))))) => String "V"%char (string_of_ident p)
+  | xO (xI (xO (xI (xI (xI p))))) => String "W"%char (string_of_ident p)
+  | xI (xI (xO (xI (xI (xI p))))) => String "X"%char (string_of_ident p)
+  | xO (xO (xI (xI (xI (xI p))))) => String "Y"%char (string_of_ident p)
+  | xI (xO (xI (xI (xI (xI p))))) => String "Z"%char (string_of_ident p)
+  | xO (xI (xI (xI (xI (xI p))))) => String "_"%char (string_of_ident p)
+  | xI (xI (xI (xI (xI (xI p))))) => decode_8_bits string_of_ident p
+  | _ => EmptyString
+  end.
+
+Lemma string_of_ident_of_string:
+  forall s, string_of_ident (ident_of_string s) = s.
+Proof.
+  induction s as [ | c s]; simpl.
+- auto.
+- rewrite <- IHs at 2. destruct c as [[] [] [] [] [] [] [] []]; reflexivity.
+Qed.
+
+Corollary ident_of_string_injective:
+  forall s1 s2, ident_of_string s1 = ident_of_string s2 -> s1 = s2.
+Proof.
+  intros. rewrite <- (string_of_ident_of_string s1), <- (string_of_ident_of_string s2).
+  congruence.
+Qed.
diff --git a/exportclight/Clightgen.ml b/exportclight/Clightgen.ml
index f7279a5e..5e27370e 100644
--- a/exportclight/Clightgen.ml
+++ b/exportclight/Clightgen.ml
@@ -91,13 +91,15 @@ let process_i_file sourcename =
   compile_c_file sourcename sourcename ofile
 
 let usage_string =
-  version_string tool_name^
+  version_string tool_name ^
 {|Usage: clightgen [options] <source files>
 Recognized source files:
   .c             C source file
   .i or .p       C source file that should not be preprocessed
 Processing options:
   -normalize     Normalize the generated Clight code w.r.t. loads in expressions
+  -canonical-idents  Use canonical numbers to represent identifiers  (default)
+  -short-idents  Use small, non-canonical numbers to represent identifiers
   -E             Preprocess only, send result to standard output
   -o <file>      Generate output in <file>
 |} ^
@@ -142,6 +144,8 @@ let cmdline_actions =
 (* Processing options *)
  [ Exact "-E", Set option_E;
   Exact "-normalize", Set option_normalize;
+  Exact "-canonical-idents", Set Camlcoq.use_canonical_atoms;
+  Exact "-short-idents", Unset Camlcoq.use_canonical_atoms;
   Exact "-o", String(fun s -> option_o := Some s);
   Prefix "-o", Self (fun s -> let s = String.sub s 2 ((String.length s) - 2) in
                               option_o := Some s);]
@@ -175,12 +179,13 @@ let cmdline_actions =
   ]
 
 let _ =
-  try
+try
   Gc.set { (Gc.get()) with
               Gc.minor_heap_size = 524288; (* 512k *)
               Gc.major_heap_increment = 4194304 (* 4M *)
          };
   Printexc.record_backtrace true;
+  Camlcoq.use_canonical_atoms := true;
   Frontend.init ();
   parse_cmdline cmdline_actions;
   if !option_o <> None && !num_input_files >= 2 then
@@ -188,7 +193,7 @@ let _ =
   if !num_input_files = 0 then
     fatal_error no_loc "no input file";
   perform_actions ()
-      with
+with
   | Sys_error msg
   | CmdError msg -> error no_loc "%s" msg; exit 2
   | Abort -> exit 2
diff --git a/exportclight/Clightnorm.ml b/exportclight/Clightnorm.ml
index a0001250..a6158b60 100644
--- a/exportclight/Clightnorm.ml
+++ b/exportclight/Clightnorm.ml
@@ -143,7 +143,18 @@ and norm_lbl_stmt ls =
   | LSnil -> LSnil
   | LScons(n, s, ls) -> LScons(n, norm_stmt s, norm_lbl_stmt ls)
 
-let next_var curr (v, _) = if P.lt v curr then curr else P.succ v
+(* In "canonical atoms" mode, temporaries are between 2^7 and 2^12 - 1.
+   Below 2^7 are single-letter identifiers and above 2^12 are all
+   other identifiers. *)
+
+let first_temp = P.of_int 128
+let last_temp  = P.of_int 4095
+
+let next_var curr (v, _) =
+  if P.lt v curr
+  || !use_canonical_atoms && (P.lt v first_temp || P.gt v last_temp)
+  then curr
+  else P.succ v
 
 let next_var_list vars start = List.fold_left next_var start vars
 
diff --git a/exportclight/ExportClight.ml b/exportclight/ExportClight.ml
index c9d6fced..4ff901eb 100644
--- a/exportclight/ExportClight.ml
+++ b/exportclight/ExportClight.ml
@@ -43,6 +43,48 @@ let print_list fn p l =
       in plist l;
       fprintf p ")@]"
 
+(* Numbers *)
+
+let coqint p n =
+  let n = camlint_of_coqint n in
+  if n >= 0l
+  then fprintf p "(Int.repr %ld)" n
+  else fprintf p "(Int.repr (%ld))" n
+
+let coqptrofs p n =
+  let s = Z.to_string n in
+  if Z.ge n Z.zero
+  then fprintf p "(Ptrofs.repr %s)" s
+  else fprintf p "(Ptrofs.repr (%s))" s
+
+let coqint64 p n =
+  let n = camlint64_of_coqint n in
+  if n >= 0L
+  then fprintf p "(Int64.repr %Ld)" n
+  else fprintf p "(Int64.repr (%Ld))" n
+
+let coqfloat p n =
+  fprintf p "(Float.of_bits %a)" coqint64 (Floats.Float.to_bits n)
+
+let coqsingle p n =
+  fprintf p "(Float32.of_bits %a)" coqint (Floats.Float32.to_bits n)
+
+let positive p n =
+  fprintf p "%s%%positive" (Z.to_string (Z.Zpos n))
+
+let coqN p n =
+  fprintf p "%s%%N" (Z.to_string (Z.of_N n))
+
+let coqZ p n =
+  if Z.ge n Z.zero
+  then fprintf p "%s" (Z.to_string n)
+  else fprintf p "(%s)" (Z.to_string n)
+
+(* Coq strings *)
+
+let coqstring p s =
+  fprintf p "\"%s\"" (camlstring_of_coqstring s)
+
 (* Identifiers *)
 
 exception Not_an_identifier
@@ -69,7 +111,7 @@ let ident p id =
     let s = Hashtbl.find temp_names id in
     fprintf p "%s" s
   with Not_found ->
-    fprintf p "%ld%%positive" (P.to_int32 id)
+    positive p id
 
 let iter_hashtbl_sorted (h: ('a, string) Hashtbl.t) (f: 'a * string -> unit) =
   List.iter f
@@ -81,65 +123,33 @@ let define_idents p =
     string_of_atom
     (fun (id, name) ->
       try
-        fprintf p "Definition _%s : ident := %ld%%positive.@ "
-                  (sanitize name) (P.to_int32 id)
+        if !use_canonical_atoms && id = pos_of_string name then
+          fprintf p "Definition _%s : ident := $\"%s\".@ "
+                    (sanitize name) name
+        else
+          fprintf p "Definition _%s : ident := %a.@ "
+                    (sanitize name) positive id
       with Not_an_identifier ->
         ());
   iter_hashtbl_sorted
     temp_names
     (fun (id, name) ->
-      fprintf p "Definition %s : ident := %ld%%positive.@ "
-                name (P.to_int32 id));
+      fprintf p "Definition %s : ident := %a.@ "
+                name positive id);
   fprintf p "@ "
 
 let name_temporary t =
-  let t1 = P.to_int t and t0 = P.to_int (first_unused_ident ()) in
-  if t1 >= t0 && not (Hashtbl.mem temp_names t)
-  then Hashtbl.add temp_names t (sprintf "_t'%d" (t1 - t0 + 1))
+  if not (Hashtbl.mem string_of_atom t) && not (Hashtbl.mem temp_names t)
+  then begin
+    let t0 = first_unused_ident () in
+    let d = Z.succ (Z.sub (Z.Zpos t) (Z.Zpos t0)) in
+    Hashtbl.add temp_names t ("_t'" ^ Z.to_string d)
+  end
 
 let name_opt_temporary = function
   | None -> ()
   | Some id -> name_temporary id
 
-(* Numbers *)
-
-let coqint p n =
-  let n = camlint_of_coqint n in
-  if n >= 0l
-  then fprintf p "(Int.repr %ld)" n
-  else fprintf p "(Int.repr (%ld))" n
-
-let coqptrofs p n =
-  let s = Z.to_string n in
-  if Z.ge n Z.zero
-  then fprintf p "(Ptrofs.repr %s)" s
-  else fprintf p "(Ptrofs.repr (%s))" s
-
-let coqint64 p n =
-  let n = camlint64_of_coqint n in
-  if n >= 0L
-  then fprintf p "(Int64.repr %Ld)" n
-  else fprintf p "(Int64.repr (%Ld))" n
-
-let coqfloat p n =
-  fprintf p "(Float.of_bits %a)" coqint64 (Floats.Float.to_bits n)
-
-let coqsingle p n =
-  fprintf p "(Float32.of_bits %a)" coqint (Floats.Float32.to_bits n)
-
-let coqN p n =
-  fprintf p "%ld%%N" (N.to_int32 n)
-
-let coqZ p n =
-  if Z.ge n Z.zero
-  then fprintf p "%s" (Z.to_string n)
-  else fprintf p "(%s)" (Z.to_string n)
-
-(* Coq strings *)
-
-let coqstring p s =
-  fprintf p "\"%s\"" (camlstring_of_coqstring s)
-
 (* Raw attributes *)
 
 let attribute p a =
@@ -247,8 +257,6 @@ let signatur p sg =
      astrettype sg.sig_res
      callconv sg.sig_cc
 
-let assertions = ref ([]: (string * typ list) list)
-
 let external_function p = function
   | EF_external(name, sg) ->
       fprintf p "@[<hov 2>(EF_external %a@ %a)@]" coqstring name signatur sg
@@ -264,14 +272,15 @@ let external_function p = function
   | EF_free -> fprintf p "EF_free"
   | EF_memcpy(sz, al) ->
       fprintf p "(EF_memcpy %ld %ld)" (Z.to_int32 sz) (Z.to_int32 al)
-  | EF_annot(kind,text, targs) ->
-      assertions := (camlstring_of_coqstring text, targs) :: !assertions;
-      fprintf p "(EF_annot %a %a)" coqstring text (print_list asttype) targs
-  | EF_annot_val(kind,text, targ) ->
-      assertions := (camlstring_of_coqstring text, [targ]) :: !assertions;
-      fprintf p "(EF_annot_val %a %a)" coqstring text asttype targ
+  | EF_annot(kind, text, targs) ->
+      fprintf p "(EF_annot %a %a %a)"
+                positive kind coqstring text (print_list asttype) targs
+  | EF_annot_val(kind, text, targ) ->
+      fprintf p "(EF_annot_val %a %a %a)"
+                positive kind coqstring text asttype targ
   | EF_debug(kind, text, targs) ->
-      fprintf p "(EF_debug %ld%%positive %ld%%positive %a)" (P.to_int32 kind) (P.to_int32 text) (print_list asttype) targs
+      fprintf p "(EF_debug %a %a %a)"
+                positive kind positive text (print_list asttype) targs
   | EF_inline_asm(text, sg, clob) ->
       fprintf p "@[<hov 2>(EF_inline_asm %a@ %a@ %a)@]"
               coqstring text
@@ -441,61 +450,13 @@ let print_composite_definition p (Composite(id, su, m, a)) =
     (print_list (print_pair ident typ)) m
     attribute a
 
-(* Assertion processing *)
-
-let re_annot_param = Str.regexp "%%\\|%[1-9][0-9]*"
-
-type fragment = Text of string | Param of int
-
-(* For compatibility with OCaml < 4.00 *)
-let list_iteri f l =
-  let rec iteri i = function
-  | [] -> ()
-  | a::l -> f i a; iteri (i + 1) l
-  in iteri 0 l
-
-let print_assertion p (txt, targs) =
-  let frags =
-    List.map
-      (function
-       | Str.Text s -> Text s
-       | Str.Delim "%%" -> Text "%"
-       | Str.Delim s -> Param(int_of_string(String.sub s 1 (String.length s - 1))))
-      (Str.full_split re_annot_param txt) in
-  let max_param = ref 0 in
-  List.iter
-    (function
-     | Text _ -> ()
-     | Param n -> max_param := max n !max_param)
-    frags;
-  fprintf p "  | \"%s\"%%string, " txt;
-  list_iteri
-    (fun i targ -> fprintf p "_x%d :: " (i + 1))
-    targs;
-  fprintf p "nil =>@ ";
-  fprintf p "    ";
-  List.iter
-    (function
-     | Text s -> fprintf p "%s" s
-     | Param n -> fprintf p "_x%d" n)
-    frags;
-  fprintf p "@ "
-
-let print_assertions p =
-  if !assertions <> [] then begin
-    fprintf p "Definition assertions (txt: string) args : Prop :=@ ";
-    fprintf p "  match txt, args with@ ";
-    List.iter (print_assertion p) !assertions;
-    fprintf p "  | _, _ => False@ ";
-    fprintf p "  end.@ @ "
-  end
-
 (* The prologue *)
 
 let prologue = "\
 From Coq Require Import String List ZArith.\n\
 From compcert Require Import Coqlib Integers Floats AST Ctypes Cop Clight Clightdefs.\n\
-Local Open Scope Z_scope.\n"
+Local Open Scope Z_scope.\n\
+Local Open Scope string_scope.\n"
 
 (* Naming the compiler-generated temporaries occurring in the program *)
 
@@ -554,15 +515,16 @@ let name_program p =
 
 let print_clightgen_info p sourcefile normalized =
   fprintf p "@[<v 2>Module Info.";
-  fprintf p "@ Definition version := %S%%string." Version.version;
-  fprintf p "@ Definition build_number := %S%%string." Version.buildnr;
-  fprintf p "@ Definition build_tag := %S%%string." Version.tag;
-  fprintf p "@ Definition arch := %S%%string." Configuration.arch;
-  fprintf p "@ Definition model := %S%%string." Configuration.model;
-  fprintf p "@ Definition abi := %S%%string." Configuration.abi;
+  fprintf p "@ Definition version := %S." Version.version;
+  fprintf p "@ Definition build_number := %S." Version.buildnr;
+  fprintf p "@ Definition build_tag := %S." Version.tag;
+  fprintf p "@ Definition build_branch := %S." Version.branch;
+  fprintf p "@ Definition arch := %S." Configuration.arch;
+  fprintf p "@ Definition model := %S." Configuration.model;
+  fprintf p "@ Definition abi := %S." Configuration.abi;
   fprintf p "@ Definition bitsize := %d." (if Archi.ptr64 then 64 else 32);
   fprintf p "@ Definition big_endian := %B." Archi.big_endian;
-  fprintf p "@ Definition source_file := %S%%string." sourcefile;
+  fprintf p "@ Definition source_file := %S." sourcefile;
   fprintf p "@ Definition normalized := %B." normalized;
   fprintf p "@]@ End Info.@ @ "  
   
@@ -588,5 +550,4 @@ let print_program p prog sourcefile normalized =
   fprintf p "Definition prog : Clight.program := @ ";
   fprintf p "  mkprogram composites global_definitions public_idents %a Logic.I.@ @ "
             ident prog.Ctypes.prog_main;
-  print_assertions p;
   fprintf p "@]@."
diff --git a/extraction/extraction.v b/extraction/extraction.vexpand
index e43594fc..55ca3b5c 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.vexpand
@@ -13,6 +13,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
+Require Import ZArith PeanoNat.
 Require Coqlib.
 Require Wfsimpl.
 Require DecidableClass Decidableplus.
@@ -111,8 +112,6 @@ Extract Constant Compopts.generate_float_constants =>
   "fun _ -> !Clflags.option_ffloatconstprop >= 2".
 Extract Constant Compopts.optim_tailcalls =>
   "fun _ -> !Clflags.option_ftailcalls".
-Extract Constant Compopts.optim_duplicate =>
-  "fun _ -> (if !Clflags.option_fduplicate = -1 then false else true)".
 Extract Constant Compopts.optim_constprop =>
   "fun _ -> !Clflags.option_fconstprop".
 Extract Constant Compopts.optim_CSE =>
@@ -129,6 +128,10 @@ Extract Constant Compopts.optim_CSE3_across_merges =>
   "fun _ -> !Clflags.option_fcse3_across_merges".
 Extract Constant Compopts.optim_CSE3_glb =>
   "fun _ -> !Clflags.option_fcse3_glb".
+Extract Constant Compopts.optim_CSE3_trivial_ops =>
+  "fun _ -> !Clflags.option_fcse3_trivial_ops".
+Extract Constant Compopts.optim_CSE3_conditions =>
+  "fun _ -> !Clflags.option_fcse3_conditions".
 Extract Constant Compopts.optim_move_loop_invariants =>
   "fun _ -> !Clflags.option_fmove_loop_invariants".
 
@@ -220,8 +223,9 @@ Set Extraction AccessOpaque.
 
 Cd "extraction".
 
-Separate Extraction
-   CSE3analysis.internal_analysis CSE3analysis.eq_depends_on_mem
+Separate Extraction 
+   Z.ldiff Z.lnot Nat.leb
+   CSE3analysis.eq_cond_depends_on_mem CSE3analysis.apply_instr'
    Compiler.transf_c_program Compiler.transf_cminor_program
    Cexec.do_initial_state Cexec.do_step Cexec.at_final_state
    Ctypes.merge_attributes Ctypes.remove_attributes Ctypes.build_composite_env
@@ -245,4 +249,4 @@ Separate Extraction
    Globalenvs.Senv.invert_symbol
    Parser.translation_unit_file
    Compopts.optim_postpass
-   Archi.has_notrap_loads.
+   Archi.has_notrap_loads
diff --git a/filter_peeplog.fish b/filter_peeplog.fish
new file mode 100755
index 00000000..72a0eaf1
--- /dev/null
+++ b/filter_peeplog.fish
@@ -0,0 +1,39 @@
+echo "LDP_CONSEC_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr32" | wc -l)
+echo "LDP_CONSEC_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr64" | wc -l)
+echo "LDP_CONSEC_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr32" | wc -l)
+echo "LDP_CONSEC_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr64" | wc -l)
+echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32" | wc -l)
+echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64" | wc -l)
+echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32" | wc -l)
+echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64" | wc -l)
+echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32" | wc -l)
+echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64" | wc -l)
+echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32" | wc -l)
+echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64" | wc -l)
+echo "\n"
+echo "LDP_CONSEC_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr32f" | wc -l)
+echo "LDP_CONSEC_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr64f" | wc -l)
+echo "LDP_CONSEC_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr32f" | wc -l)
+echo "LDP_CONSEC_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr64f" | wc -l)
+echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32f" | wc -l)
+echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64f" | wc -l)
+echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32f" | wc -l)
+echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64f" | wc -l)
+echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32f" | wc -l)
+echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64f" | wc -l)
+echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32f" | wc -l)
+echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64f" | wc -l)
+echo "\n"
+echo "STP_CONSEC_PEEP_IMM_INC_str32" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str32" | wc -l)
+echo "STP_CONSEC_PEEP_IMM_INC_str64" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str64" | wc -l)
+echo "STP_FORW_SPACED_PEEP_IMM_INC_str32" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str32" | wc -l)
+echo "STP_FORW_SPACED_PEEP_IMM_INC_str64" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str64" | wc -l)
+echo "STP_BACK_SPACED_PEEP_IMM_INC_str32" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str32" | wc -l)
+echo "STP_BACK_SPACED_PEEP_IMM_INC_str64" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str64" | wc -l)
+echo "\n"
+echo "STP_CONSEC_PEEP_IMM_INC_str32f" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str32f" | wc -l)
+echo "STP_CONSEC_PEEP_IMM_INC_str64f" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str64f" | wc -l)
+echo "STP_FORW_SPACED_PEEP_IMM_INC_str32f" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str32f" | wc -l)
+echo "STP_FORW_SPACED_PEEP_IMM_INC_str64f" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str64f" | wc -l)
+echo "STP_BACK_SPACED_PEEP_IMM_INC_str32f" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str32f" | wc -l)
+echo "STP_BACK_SPACED_PEEP_IMM_INC_str64f" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str64f" | wc -l)
diff --git a/kvx/Asm.v b/kvx/Asm.v
index 30aafc55..fd20316c 100644
--- a/kvx/Asm.v
+++ b/kvx/Asm.v
@@ -35,12 +35,14 @@ Require Import Smallstep.
 Require Import Locations.
 Require Stacklayout.
 Require Import Conventions.
-Require Import Asmvliw.
+Require Export Asmvliw.
 Require Import Linking.
 Require Import Errors.
 
 (** Definitions for OCaml code *)
 Definition label := positive.
+
+(* Necessary definition for Asmexpandaux.mli *)
 Definition preg := preg.
 
 Inductive addressing : Type :=
@@ -102,6 +104,9 @@ Inductive instruction : Type :=
   | Palclrd (dst: ireg) (addr: ireg)
   | Palclrw (dst: ireg) (addr: ireg)
   | Pclzll (rd rs: ireg)
+  | Pclzw  (rd rs: ireg)
+  | Pctzll (rd rs: ireg)
+  | Pctzw  (rd rs: ireg)
   | Pstsud (rd rs1 rs2: ireg)
             
   (** Loads *)
@@ -611,15 +616,15 @@ Program Definition genv_trans (ge: genv) : Asmvliw.genv :=
       Genv.genv_defs := PTree.map1 globdef_proj (Genv.genv_defs ge);
       Genv.genv_next := Genv.genv_next ge |}.
 Next Obligation.
-  destruct ge. simpl in *. eauto.
+  destruct ge. cbn in *. eauto.
 Qed. Next Obligation.
-  destruct ge; simpl in *.
+  destruct ge; cbn in *.
   rewrite PTree.gmap1 in H.
   destruct (genv_defs ! b) eqn:GEN.
   - eauto.
   - discriminate.
 Qed. Next Obligation.
-  destruct ge; simpl in *.
+  destruct ge; cbn in *.
   eauto.
 Qed.
 
@@ -655,14 +660,14 @@ Program Definition transf_function (f: Asmvliw.function) : function :=
 
 Lemma transf_function_proj: forall f, function_proj (transf_function f) = f.
 Proof.
-  intros f. destruct f as [sig blks]. unfold function_proj. simpl. auto.
+  intros f. destruct f as [sig blks]. unfold function_proj. cbn. auto.
 Qed.
 
 Definition transf_fundef : Asmvliw.fundef -> fundef := AST.transf_fundef transf_function.
 
 Lemma transf_fundef_proj: forall f, fundef_proj (transf_fundef f) = f.
 Proof.
-  intros f. destruct f as [f|e]; simpl; auto.
+  intros f. destruct f as [f|e]; cbn; auto.
   rewrite transf_function_proj. auto.
 Qed.
 
@@ -674,18 +679,18 @@ Lemma program_equals {A B: Type} : forall (p1 p2: AST.program A B),
   prog_main p1 = prog_main p2 ->
   p1 = p2.
 Proof.
-  intros. destruct p1. destruct p2. simpl in *. subst. auto.
+  intros. destruct p1. destruct p2. cbn in *. subst. auto.
 Qed.
 
 Lemma transf_program_proj: forall p, program_proj (transf_program p) = p.
 Proof.
-  intros p. destruct p as [defs pub main]. unfold program_proj. simpl.
-  apply program_equals; simpl; auto.
+  intros p. destruct p as [defs pub main]. unfold program_proj. cbn.
+  apply program_equals; cbn; auto.
   induction defs.
-  - simpl; auto.
-  - simpl. rewrite IHdefs. 
-    destruct a as [id gd]; simpl.
-    destruct gd as [f|v]; simpl; auto.
+  - cbn; auto.
+  - cbn. rewrite IHdefs. 
+    destruct a as [id gd]; cbn.
+    destruct gd as [f|v]; cbn; auto.
     rewrite transf_fundef_proj. auto.
 Qed.
 
@@ -707,16 +712,16 @@ Lemma match_program_transf:
   forall p tp, match_prog p tp -> transf_program p = tp.
 Proof.
   intros p tp H. inversion_clear H. inv H1.
-  destruct p as [defs pub main]. destruct tp as [tdefs tpub tmain]. simpl in *.
-  subst. unfold transf_program. unfold transform_program. simpl.
-  apply program_equals; simpl; auto.
-  induction H0; simpl; auto.
+  destruct p as [defs pub main]. destruct tp as [tdefs tpub tmain]. cbn in *.
+  subst. unfold transf_program. unfold transform_program. cbn.
+  apply program_equals; cbn; auto.
+  induction H0; cbn; auto.
   rewrite IHlist_forall2. apply cons_extract.
   destruct a1 as [ida gda]. destruct b1 as [idb gdb].
-  simpl in *.
+  cbn in *.
   inv H. inv H2.
-  - simpl in *. subst. auto.
-  - simpl in *. subst. inv H. auto.
+  - cbn in *. subst. auto.
+  - cbn in *. subst. inv H. auto.
 Qed.
 
 Section PRESERVATION.
@@ -744,7 +749,7 @@ Proof.
   pose proof (match_program_transf prog tprog TRANSF) as TR.
   subst. unfold semantics. rewrite transf_program_proj.
 
-  eapply forward_simulation_step with (match_states := match_states); simpl; auto.
+  eapply forward_simulation_step with (match_states := match_states); cbn; auto.
   - intros. exists s1. split; auto. congruence.
   - intros. inv H. auto.
   - intros. exists s1'. inv H0. split; auto. congruence.
diff --git a/kvx/Asmblock.v b/kvx/Asmblock.v
index 9c8e4cc3..64b2c535 100644
--- a/kvx/Asmblock.v
+++ b/kvx/Asmblock.v
@@ -78,7 +78,7 @@ Fixpoint code_to_basics (c: code) :=
 
 Lemma code_to_basics_id: forall c, code_to_basics (basics_to_code c) = Some c.
 Proof.
-  intros. induction c as [|i c]; simpl; auto.
+  intros. induction c as [|i c]; cbn; auto.
   rewrite IHc. auto.
 Qed.
 
@@ -88,8 +88,8 @@ Lemma code_to_basics_dist:
   code_to_basics c' = Some l' ->
   code_to_basics (c ++ c') = Some (l ++ l').
 Proof.
-  induction c as [|i c]; simpl; auto.
-  - intros. inv H. simpl. auto.
+  induction c as [|i c]; cbn; auto.
+  - intros. inv H. cbn. auto.
   - intros. destruct i; try discriminate. destruct (code_to_basics c) eqn:CTB; try discriminate.
     inv H. erewrite IHc; eauto. auto.
 Qed.
@@ -138,9 +138,9 @@ Lemma non_empty_bblock_refl:
 Proof.
   intros. split.
   - destruct body; destruct exit.
-    all: simpl; auto. intros. inversion H; contradiction.
+    all: cbn; auto. intros. inversion H; contradiction.
   - destruct body; destruct exit.
-    all: simpl; auto.
+    all: cbn; auto.
     all: intros; try (right; discriminate); try (left; discriminate).
     contradiction.
 Qed.
@@ -155,14 +155,14 @@ Lemma builtin_alone_refl:
 Proof.
   intros. split.
   - destruct body; destruct exit.
-    all: simpl; auto.
-    all: exploreInst; simpl; auto.
+    all: cbn; auto.
+    all: exploreInst; cbn; auto.
     unfold builtin_alone. intros. assert (Some (Pbuiltin e l b0) = Some (Pbuiltin e l b0)); auto.
     assert (b :: body = nil). eapply H; eauto. discriminate.
   - destruct body; destruct exit.
-    all: simpl; auto; try constructor.
+    all: cbn; auto; try constructor.
     + exploreInst; try discriminate.
-        simpl. contradiction.
+        cbn. contradiction.
     + intros. discriminate.
 Qed.
 
@@ -185,14 +185,14 @@ Ltac bblock_auto_correct := (apply non_empty_bblock_refl; try discriminate; try
 
 Lemma Istrue_proof_irrelevant (b: bool): forall (p1 p2:Is_true b), p1=p2.
 Proof.
-  destruct b; simpl; auto.
+  destruct b; cbn; auto.
   - destruct p1, p2; auto.
   - destruct p1.
 Qed.
 
 Lemma bblock_equality bb1 bb2: header bb1=header bb2 -> body bb1 = body bb2 -> exit bb1 = exit bb2 -> bb1 = bb2.
 Proof.
-  destruct bb1 as [h1 b1 e1 c1], bb2 as [h2 b2 e2 c2]; simpl.
+  destruct bb1 as [h1 b1 e1 c1], bb2 as [h2 b2 e2 c2]; cbn.
   intros; subst.
   rewrite (Istrue_proof_irrelevant _ c1 c2).
   auto.
@@ -212,51 +212,51 @@ Qed.
 Lemma length_nonil {A: Type} : forall l:(list A), l <> nil -> (length l > 0)%nat.
 Proof.
   intros. destruct l; try (contradict H; auto; fail).
-  simpl. omega.
+  cbn. omega.
 Qed.
 
 Lemma to_nat_pos : forall z:Z, (Z.to_nat z > 0)%nat -> z > 0.
 Proof.
   intros. destruct z; auto.
-  - contradict H. simpl. apply gt_irrefl.
+  - contradict H. cbn. apply gt_irrefl.
   - apply Zgt_pos_0.
-  - contradict H. simpl. apply gt_irrefl.
+  - contradict H. cbn. apply gt_irrefl.
 Qed.
 
 Lemma size_positive (b:bblock): size b > 0.
 Proof.
-  unfold size. destruct b as [hd bdy ex cor]. simpl.
-  destruct ex; destruct bdy; try (apply to_nat_pos; rewrite Nat2Z.id; simpl; omega).
-  inversion cor; contradict H; simpl; auto.
+  unfold size. destruct b as [hd bdy ex cor]. cbn.
+  destruct ex; destruct bdy; try (apply to_nat_pos; rewrite Nat2Z.id; cbn; omega).
+  inversion cor; contradict H; cbn; auto.
 Qed.
 
 
 Program Definition no_header (bb : bblock) := {| header := nil; body := body bb; exit := exit bb |}.
 Next Obligation.
-  destruct bb; simpl. assumption.
+  destruct bb; cbn. assumption.
 Defined.
 
 Lemma no_header_size:
   forall bb, size (no_header bb) = size bb.
 Proof.
-  intros. destruct bb as [hd bdy ex COR]. unfold no_header. simpl. reflexivity.
+  intros. destruct bb as [hd bdy ex COR]. unfold no_header. cbn. reflexivity.
 Qed.
 
 Program Definition stick_header (h : list label) (bb : bblock) := {| header := h; body := body bb; exit := exit bb |}.
 Next Obligation.
-  destruct bb; simpl. assumption.
+  destruct bb; cbn. assumption.
 Defined.
 
 Lemma stick_header_size:
   forall h bb, size (stick_header h bb) = size bb.
 Proof.
-  intros. destruct bb. unfold stick_header. simpl. reflexivity.
+  intros. destruct bb. unfold stick_header. cbn. reflexivity.
 Qed.
 
 Lemma stick_header_no_header:
   forall bb, stick_header (header bb) (no_header bb) = bb.
 Proof.
-  intros. destruct bb as [hd bdy ex COR]. simpl. unfold no_header; unfold stick_header; simpl. reflexivity.
+  intros. destruct bb as [hd bdy ex COR]. cbn. unfold no_header; unfold stick_header; cbn. reflexivity.
 Qed.
 
 (** * Sequential Semantics of basic blocks *)
@@ -308,7 +308,7 @@ Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
 Theorem builtin_body_nil:
   forall bb ef args res, exit bb = Some (PExpand (Pbuiltin ef args res)) -> body bb = nil.
 Proof.
-  intros. destruct bb as [hd bdy ex WF]. simpl in *.
+  intros. destruct bb as [hd bdy ex WF]. cbn in *.
   apply wf_bblock_refl in WF. inv WF. unfold builtin_alone in H1.
   eapply H1; eauto.
 Qed.
@@ -321,11 +321,11 @@ Theorem exec_body_app:
     /\ exec_body l' rs' m' = Next rs'' m''.
 Proof.
   induction l.
-  - intros. simpl in H. repeat eexists. auto.
-  - intros. rewrite <- app_comm_cons in H. simpl in H.
+  - intros. cbn in H. repeat eexists. auto.
+  - intros. rewrite <- app_comm_cons in H. cbn in H.
     destruct (exec_basic_instr a rs m) eqn:EXEBI.
     + apply IHl in H. destruct H as (rs1 & m1 & EXEB1 & EXEB2).
-      repeat eexists. simpl. rewrite EXEBI. eauto. auto.
+      repeat eexists. cbn. rewrite EXEBI. eauto. auto.
     + discriminate.
 Qed.
 
diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v
index 3d981100..b6d18c3e 100644
--- a/kvx/Asmblockdeps.v
+++ b/kvx/Asmblockdeps.v
@@ -40,6 +40,10 @@ Require Import Chunks.
 
 Require Import Lia.
 
+
+Import ListNotations.
+Local Open Scope list_scope.
+
 Open Scope impure.
 
 (** Definition of [L] *)
@@ -394,8 +398,8 @@ Definition control_eval (o: control_op) (l: list value) :=
     end
   | Ocbu bt l, [Val v; Val vpc] =>
     match cmpu_for_btest bt with
-    | (Some c, Int) => eval_branch_deps fn l vpc (Val_cmpu_bool c v (Vint (Int.repr 0)))
-    | (Some c, Long) => eval_branch_deps fn l vpc (Val_cmplu_bool c v (Vlong (Int64.repr 0)))
+    | (Some c, Int) => eval_branch_deps fn l vpc (Val.mxcmpu_bool c v (Vint (Int.repr 0)))
+    | (Some c, Long) => eval_branch_deps fn l vpc (Val.mxcmplu_bool c v (Vlong (Int64.repr 0)))
     | (None, _) => None
     end
   | Odiv, [Val v1; Val v2] => 
@@ -706,19 +710,20 @@ Proof.
   destruct r; destruct r'.
   all: try discriminate; try contradiction.
   - intros. apply not_eq_add. apply ireg_to_pos_discr. congruence.
-  - intros. unfold ppos. cutrewrite (3 + ireg_to_pos g = (1 + ireg_to_pos g) + 2). apply Pos.add_no_neutral.
-    apply eq_sym. rewrite Pos.add_comm. rewrite Pos.add_assoc. reflexivity.
+  - intros. unfold ppos. replace (3 + ireg_to_pos g) with ((1 + ireg_to_pos g) + 2).
+    apply Pos.add_no_neutral.
+    rewrite Pos.add_comm, Pos.add_assoc. reflexivity.
   - intros. unfold ppos. rewrite Pos.add_comm. apply Pos.add_no_neutral.
   - intros. unfold ppos. apply not_eq_sym.
-    cutrewrite (3 + ireg_to_pos g = (1 + ireg_to_pos g) + 2). apply Pos.add_no_neutral.
-    apply eq_sym. rewrite Pos.add_comm. rewrite Pos.add_assoc. reflexivity.
+    replace (3 + ireg_to_pos g) with ((1 + ireg_to_pos g) + 2). apply Pos.add_no_neutral.
+    rewrite Pos.add_comm, Pos.add_assoc. reflexivity.
   - intros. unfold ppos. apply not_eq_sym. rewrite Pos.add_comm. apply Pos.add_no_neutral.
 Qed.
 
 Lemma ppos_pmem_discr: forall r, pmem <> ppos r.
 Proof.
   intros. destruct r.
-  - unfold ppos. unfold pmem. apply not_eq_sym. rewrite Pos.add_comm. cutrewrite (3 = 2 + 1). rewrite Pos.add_assoc. apply Pos.add_no_neutral.
+  - unfold ppos. unfold pmem. apply not_eq_sym. rewrite Pos.add_comm. replace 3 with (2 + 1). rewrite Pos.add_assoc. apply Pos.add_no_neutral.
     reflexivity.
   - unfold ppos. unfold pmem. discriminate.
   - unfold ppos. unfold pmem. discriminate.
@@ -1227,12 +1232,12 @@ Proof.
     (* Pcbu *)
     + rewrite (H0 PC). Simpl. rewrite (H0 r). destruct (cmpu_for_btest _); simpl; auto. destruct o; simpl; auto.
       unfold par_eval_branch. unfold eval_branch_deps. unfold incrPC. Simpl. destruct i.
-      ++ destruct (Val_cmpu_bool _ _ _); simpl; auto. destruct b.
+      ++ destruct (Val.mxcmpu_bool _ _ _); simpl; auto. destruct b.
          +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
              destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
              intros rr; destruct rr; Simpl.
          +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl.
-      ++ destruct (Val_cmplu_bool _ _ _); simpl; auto. destruct b.
+      ++ destruct (Val.mxcmplu_bool _ _ _); simpl; auto. destruct b.
          +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
              destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
              intros rr; destruct rr; Simpl.
@@ -1250,7 +1255,7 @@ Theorem bisimu_par_exit ex sz ge fn rsr rsw mr mw sr sw:
 Proof.
   intros; unfold estep.
   exploit (bisimu_par_control ex sz rsw#PC ge fn rsr rsw mr mw sr sw); eauto.
-  cutrewrite (rsw # PC <- (rsw PC) = rsw); auto.
+  replace (rsw # PC <- (rsw PC)) with rsw; auto.
   apply extensionality. intros; destruct x; simpl; auto.
 Qed.
 
@@ -1461,8 +1466,8 @@ Proof.
   destruct H2 as (m2' & H2 & H4). discriminate. rewrite H2 in H3.
   destruct (exec_bblock ge fn p2 rs m); simpl in H3.
   * destruct H3 as (s' & H3 & H5 & H6). inv H3. inv MS'.
-    cutrewrite (rs0=rs1).
-    - cutrewrite (m0=m1); auto. congruence.
+    replace rs0 with rs1.
+    - replace m0 with m1; auto. congruence.
     - apply functional_extensionality. intros r.
       generalize (H0 r). intros Hr. congruence.
   * discriminate.
diff --git a/kvx/Asmblockgen.v b/kvx/Asmblockgen.v
index 7167cebe..e218f4ef 100644
--- a/kvx/Asmblockgen.v
+++ b/kvx/Asmblockgen.v
@@ -19,7 +19,7 @@
 Require Archi.
 Require Import Coqlib Errors.
 Require Import AST Integers Floats Memdata.
-Require Import Op Locations Machblock Asmblock.
+Require Import Op Locations Machblock Asmvliw Asmblock.
 Require ExtValues.
 Require Import Chunks.
 
@@ -36,12 +36,6 @@ Import PArithCoercions.
 
 (** Extracting integer or float registers. *)
 
-Definition ireg_of (r: mreg) : res ireg :=
-  match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.ireg_of") end.
-
-Definition freg_of (r: mreg) : res freg :=
-  match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end.
-
 Inductive immed32 : Type :=
   | Imm32_single (imm: int).
 
diff --git a/kvx/lib/Asmblockgenproof0.v b/kvx/Asmblockgenproof0.v
index 1af59238..12bb863a 100644
--- a/kvx/lib/Asmblockgenproof0.v
+++ b/kvx/Asmblockgenproof0.v
@@ -36,7 +36,6 @@ Require Import Asmblock.
 Require Import Asmblockgen.
 Require Import Conventions1.
 Require Import Axioms.
-Require Import Machblockgenproof. (* FIXME: only use to import [is_tail_app] and [is_tail_app_inv] *)
 Require Import Asmblockprops.
 
 Module MB:=Machblock.
diff --git a/kvx/Asmblockgenproof1.v b/kvx/Asmblockgenproof1.v
index 74b9b62b..c6ad70ab 100644
--- a/kvx/Asmblockgenproof1.v
+++ b/kvx/Asmblockgenproof1.v
@@ -256,7 +256,7 @@ Lemma transl_compu_correct:
   exists rs',
      exec_straight ge (transl_comp cmp Unsigned r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
   /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)  
-  /\ (Val_cmpu_bool cmp rs#r1 rs#r2 = Some b ->
+  /\ (Val.mxcmpu_bool cmp rs#r1 rs#r2 = Some b ->
        exec_control ge fn (Some (PCtlFlow ((Pcb BTwnez RTMP lbl)))) (nextblock tbb rs') m
         = eval_branch fn lbl (nextblock tbb rs') m (Some b))
   .
@@ -272,8 +272,8 @@ Proof.
       assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)).
       { rewrite Heqrs'. auto. }
       rewrite H0. rewrite <- H.
-      remember (Val_cmpu_bool cmp rs#r1 rs#r2) as cmpubool.
-      destruct cmp; simpl; unfold Val_cmpu;
+      remember (Val.mxcmpu_bool cmp rs#r1 rs#r2) as cmpubool.
+      destruct cmp; simpl; unfold Val.mxcmpu;
         rewrite <- Heqcmpubool; destruct cmpubool; simpl; auto;
       destruct b0; simpl; auto.
     }
@@ -285,7 +285,7 @@ Lemma transl_compui_correct:
   exists rs',
      exec_straight ge (transl_compi cmp Unsigned r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
   /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)  
-  /\ (Val_cmpu_bool cmp rs#r1 (Vint n) = Some b ->
+  /\ (Val.mxcmpu_bool cmp rs#r1 (Vint n) = Some b ->
        exec_control ge fn (Some (PCtlFlow ((Pcb BTwnez RTMP lbl)))) (nextblock tbb rs') m
         = eval_branch fn lbl (nextblock tbb rs') m (Some b))
   .
@@ -301,8 +301,8 @@ Proof.
       assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Unsigned) rs # r1 (Vint n))).
       { rewrite Heqrs'. auto. }
       rewrite H0. rewrite <- H.
-      remember (Val_cmpu_bool cmp rs#r1 (Vint n)) as cmpubool.
-      destruct cmp; simpl; unfold Val_cmpu;
+      remember (Val.mxcmpu_bool cmp rs#r1 (Vint n)) as cmpubool.
+      destruct cmp; simpl; unfold Val.mxcmpu;
         rewrite <- Heqcmpubool; destruct cmpubool; simpl; auto;
       destruct b0; simpl; auto.
     }
@@ -656,7 +656,7 @@ Lemma transl_complu_correct:
   exists rs',
      exec_straight ge (transl_compl cmp Unsigned r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
   /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)  
-  /\ ( Val_cmplu_bool cmp rs#r1 rs#r2 = Some b ->
+  /\ ( Val.mxcmplu_bool cmp rs#r1 rs#r2 = Some b ->
        exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
         = eval_branch fn lbl (nextblock tbb rs') m (Some b))
   .
@@ -672,9 +672,9 @@ Proof.
       assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)).
       { rewrite Heqrs'. auto. }
       rewrite H0. rewrite <- H.
-      remember (Val_cmplu_bool cmp rs#r1 rs#r2) as cmpbool.
+      remember (Val.mxcmplu_bool cmp rs#r1 rs#r2) as cmpbool.
       destruct cmp; simpl;
-      unfold compare_long, Val_cmplu; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+      unfold compare_long, Val.mxcmplu; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
       destruct b0; simpl; auto.
     }
     rewrite H0. simpl; auto.
@@ -685,7 +685,7 @@ Lemma transl_compilu_correct:
   exists rs',
      exec_straight ge (transl_compil cmp Unsigned r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
   /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)  
-  /\ ( Val_cmplu_bool cmp rs#r1 (Vlong n) = Some b ->
+  /\ ( Val.mxcmplu_bool cmp rs#r1 (Vlong n) = Some b ->
        exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
         = eval_branch fn lbl (nextblock tbb rs') m (Some b))
   .
@@ -701,9 +701,9 @@ Proof.
       assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Unsigned) rs # r1 (Vlong n))).
       { rewrite Heqrs'. auto. }
       rewrite H0. rewrite <- H.
-      remember (Val_cmplu_bool cmp rs#r1 (Vlong n)) as cmpbool.
+      remember (Val.mxcmplu_bool cmp rs#r1 (Vlong n)) as cmpbool.
       destruct cmp; simpl;
-      unfold compare_long, Val_cmplu; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+      unfold compare_long, Val.mxcmplu; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
       destruct b0; simpl; auto.
     }
     rewrite H0. simpl; auto.
@@ -715,7 +715,7 @@ Lemma transl_opt_compuimm_correct:
   exists rs', exists insn,
      exec_straight_opt (transl_opt_compuimm n cmp r1 lbl k) rs m ((PControl insn) ::g k) rs' m
   /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)  
-  /\ ( Val_cmpu_bool cmp rs#r1 (Vint n) = Some b ->
+  /\ ( Val.mxcmpu_bool cmp rs#r1 (Vint n) = Some b ->
        exec_control ge fn (Some insn) (nextblock tbb rs') m = eval_branch fn lbl (nextblock tbb rs') m (Some b))
   .
 Proof.
@@ -791,7 +791,7 @@ Lemma transl_opt_compluimm_correct:
   exists rs', exists insn,
      exec_straight_opt (transl_opt_compluimm n cmp r1 lbl k) rs m ((PControl insn) ::g k) rs' m
   /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
-  /\ ( Val_cmplu_bool cmp rs#r1 (Vlong n) = Some b ->
+  /\ ( Val.mxcmplu_bool cmp rs#r1 (Vlong n) = Some b ->
        exec_control ge fn (Some insn) (nextblock tbb rs') m = eval_branch fn lbl (nextblock tbb rs') m (Some b))
   .
 Proof.
@@ -859,7 +859,7 @@ Proof.
     destruct cmp; discriminate.
 Qed.
 
-Local Hint Resolve Val_cmpu_bool_correct Val_cmplu_bool_correct: core.
+Local Hint Resolve Val.mxcmpu_bool_correct Val.mxcmplu_bool_correct: core.
 
 Lemma transl_cbranch_correct_1:
   forall cond args lbl k c m ms b sp rs m' tbb,
@@ -988,7 +988,7 @@ Proof.
     split.
     * constructor. eexact A'.
     * split; auto.
-      { apply C'; auto. eapply Val_cmplu_bool_correct; eauto. }
+      { apply C'; auto. eapply Val.mxcmplu_bool_correct; eauto. }
 
 (* Ccompf *)
 - exploit (transl_compf_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C).
@@ -1078,7 +1078,7 @@ Lemma transl_cond_int32u_correct:
   forall cmp rd r1 r2 k rs m,
   exists rs',
      exec_straight ge (basics_to_code (transl_cond_int32u cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
-  /\ rs'#rd = Val_cmpu cmp rs#r1 rs#r2
+  /\ rs'#rd = Val.mxcmpu cmp rs#r1 rs#r2
   /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
 Proof.
   intros. destruct cmp; simpl. 
@@ -1122,7 +1122,7 @@ Lemma transl_cond_int64u_correct:
   forall cmp rd r1 r2 k rs m,
   exists rs',
      exec_straight ge (basics_to_code (transl_cond_int64u cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
-  /\ rs'#rd = Val_cmplu cmp rs#r1 rs#r2
+  /\ rs'#rd = Val.mxcmplu cmp rs#r1 rs#r2
   /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
 Proof.
   intros. destruct cmp; simpl. 
@@ -1163,7 +1163,7 @@ Proof.
   split; intros; Simpl.
 Qed.
 
-Local Hint Resolve Val_cmpu_correct Val_cmplu_correct: core.
+Local Hint Resolve Val.mxcmpu_correct Val.mxcmplu_correct: core.
 
 Lemma transl_condimm_int32u_correct:
   forall cmp rd r1 n k rs m,
@@ -1279,12 +1279,12 @@ Proof.
 - econstructor; split. apply exec_straight_one; [simpl; eauto].
   split; intros; Simpl.
   unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
-  cutrewrite (Cge = swap_comparison Cle); auto. rewrite Float32.cmp_swap.
+  replace (Cge) with (swap_comparison Cle); auto. rewrite Float32.cmp_swap.
   destruct (Float32.cmp _ _ _); auto.
 - econstructor; split. apply exec_straight_one; [simpl; eauto].
   split; intros; Simpl.
   unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
-  cutrewrite (Clt = swap_comparison Cgt); auto. rewrite Float32.cmp_swap.
+  replace (Clt) with (swap_comparison Cgt); auto. rewrite Float32.cmp_swap.
   destruct (Float32.cmp _ _ _); auto.
 - econstructor; split. apply exec_straight_one; [simpl; eauto].
   split; intros; Simpl.
@@ -1345,12 +1345,12 @@ Proof.
 - econstructor; split. apply exec_straight_one; [simpl; eauto].
   split; intros; Simpl.
   unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
-  cutrewrite (Cge = swap_comparison Cle); auto. rewrite Float.cmp_swap.
+  replace (Cge) with (swap_comparison Cle); auto. rewrite Float.cmp_swap.
   destruct (Float.cmp _ _ _); auto.
 - econstructor; split. apply exec_straight_one; [simpl; eauto].
   split; intros; Simpl.
   unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
-  cutrewrite (Clt = swap_comparison Cgt); auto. rewrite Float.cmp_swap.
+  replace (Clt) with (swap_comparison Cgt); auto. rewrite Float.cmp_swap.
   destruct (Float.cmp _ _ _); auto.
 - econstructor; split. apply exec_straight_one; [simpl; eauto].
   split; intros; Simpl.
@@ -1374,7 +1374,7 @@ Proof.
   exploit transl_cond_int32s_correct; eauto. simpl. intros (rs' & A & B & C). exists rs'; eauto.
 + (* cmpu *)
   exploit transl_cond_int32u_correct; eauto. simpl. intros (rs' & A & B & C).
-  exists rs'; repeat split; eauto. rewrite B; eapply Val_cmpu_correct.
+  exists rs'; repeat split; eauto. rewrite B; eapply Val.mxcmpu_correct.
 + (* cmpimm *)
   apply transl_condimm_int32s_correct; eauto with asmgen.
 + (* cmpuimm *)
@@ -1385,7 +1385,7 @@ Proof.
 + (* cmplu *)
   exploit transl_cond_int64u_correct; eauto. simpl. intros (rs' & A & B & C).
   exists rs'; repeat split; eauto. rewrite B, MKTOT; eauto.
-  eapply Val_cmplu_correct.
+  eapply Val.mxcmplu_correct.
 + (* cmplimm *)
   exploit transl_condimm_int64s_correct; eauto. instantiate (1 := x); eauto with asmgen. simpl. 
   intros (rs' & A & B & C).
diff --git a/kvx/Asmblockprops.v b/kvx/Asmblockprops.v
index bc14b231..c3929be5 100644
--- a/kvx/Asmblockprops.v
+++ b/kvx/Asmblockprops.v
@@ -53,7 +53,7 @@ Qed.
 Lemma preg_of_not_SP:
   forall r, preg_of r <> SP.
 Proof.
-  intros. unfold preg_of; destruct r; simpl; congruence.
+  intros. unfold preg_of; destruct r; cbn; congruence.
 Qed.
 
 Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen.
@@ -233,7 +233,7 @@ Proof.
   destruct (ireg_eq rd2 ra); try discriminate.
 *)
   rewrite Pregmap.gso; try discriminate.
-  simpl in *.
+  cbn in *.
   destruct (Mem.loadv _ _ _); try discriminate.
   destruct (Mem.loadv _ _ _); try discriminate.
   destruct (Mem.loadv _ _ _); try discriminate.
@@ -264,7 +264,7 @@ Lemma exec_store_q_offset_pc_var:
   exec_store_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
 Proof.
   intros. unfold exec_store_q_offset in *. unfold parexec_store_q_offset in *. rewrite Pregmap.gso; try discriminate.
-  simpl in *.
+  cbn in *.
   destruct (gpreg_q_expand _) as [s0 s1].
   destruct (Mem.storev _ _ _); try discriminate.
   destruct (Mem.storev _ _ _); try discriminate.
diff --git a/kvx/Asmexpand.ml b/kvx/Asmexpand.ml
index 5d4fd2f5..1e76a355 100644
--- a/kvx/Asmexpand.ml
+++ b/kvx/Asmexpand.ml
@@ -359,7 +359,7 @@ let expand_bswap32 d s = let open Asmvliw in
   emit (Psrliw(GPR32, s, _8)); emit Psemi;
   emit (Pandiw(GPR32, GPR32, coqint_of_camlint 0xFFl)); emit Psemi;
   emit (Pslliw(GPR32, GPR32, _16)); emit Psemi;
-  emit (Porw(GPR16, GPR16, GPR31)); emit Psemi;
+  emit (Porw(GPR16, GPR16, GPR32)); emit Psemi;
   emit (Psrliw(GPR32, s, _16)); emit Psemi;
   emit (Pandiw(GPR32, GPR32, coqint_of_camlint 0xFFl)); emit Psemi;
   emit (Pslliw(GPR32, GPR32, _8)); emit Psemi;
@@ -399,8 +399,14 @@ let expand_builtin_inline name args res = let open Asmvliw in
   (* Vararg stuff *)
   | "__builtin_va_start", [BA(IR a)], _ ->
      expand_builtin_va_start a
+  | "__builtin_kvx_clzw", [BA(IR a)], BR(IR res) ->
+     emit (Pclzw(res, a))
   | "__builtin_clzll", [BA(IR a)], BR(IR res) ->
      emit (Pclzll(res, a))
+  | "__builtin_kvx_ctzw", [BA(IR a)], BR(IR res) ->
+     emit (Pctzw(res, a))
+  | "__builtin_ctzll", [BA(IR a)], BR(IR res) ->
+     emit (Pctzll(res, a))
   | "__builtin_kvx_stsud", [BA(IR a1); BA(IR a2)], BR(IR res) ->
      emit (Pstsud(res, a1, a2))
   | "__builtin_kvx_get", [BA_int(n)], BR(IR res) ->
diff --git a/kvx/Asmgenproof.v b/kvx/Asmgenproof.v
index 9e35e268..636c105f 100644
--- a/kvx/Asmgenproof.v
+++ b/kvx/Asmgenproof.v
@@ -39,7 +39,7 @@ Proof.
   unfold Asmgen.transf_program in H. apply bind_inversion in H. destruct H.
   inversion_clear H. apply bind_inversion in H1. destruct H1.
   inversion_clear H. inversion H2. unfold time, Compopts.time in *. remember (Machblockgen.transf_program p) as mbp.
-  unfold match_prog; simpl.
+  unfold match_prog; cbn.
   exists mbp; split. apply Machblockgenproof.transf_program_match; auto.
   exists x; split. apply Asmblockgenproof.transf_program_match; auto.
   exists x0; split. apply PostpassSchedulingproof.transf_program_match; auto.
@@ -72,7 +72,7 @@ Let tge := Genv.globalenv tprog.
 Theorem transf_program_correct:
   forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog).
 Proof.
-  unfold match_prog in TRANSF. simpl in TRANSF.
+  unfold match_prog in TRANSF. cbn in TRANSF.
   inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H.
   eapply compose_forward_simulations. 
   exploit Machblockgenproof.transf_program_correct; eauto.
diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v
index 296963a7..8afe8d07 100644
--- a/kvx/Asmvliw.v
+++ b/kvx/Asmvliw.v
@@ -163,6 +163,30 @@ Module PregEq.
   Definition eq := preg_eq.
 End PregEq.
 
+(* FIXME - R16 and R32 are excluded *)
+Definition preg_of (r: mreg) : preg :=
+  match r with
+  | R0  => GPR0  | R1  => GPR1  | R2  => GPR2  | R3  => GPR3  | R4  => GPR4
+  | R5  => GPR5  | R6  => GPR6  | R7  => GPR7  | R8  => GPR8  | R9  => GPR9
+  | R10 => GPR10 | R11 => GPR11 (* | R12 => GPR12 | R13 => GPR13 | R14  => GPR14 *)
+  | R15 => GPR15 (* | R16 => GPR16 *) | R17 => GPR17 | R18 => GPR18 | R19  => GPR19
+  | R20 => GPR20 | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24  => GPR24
+  | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 | R29  => GPR29
+  | R30 => GPR30 | R31 => GPR31 (* | R32 => GPR32 *) | R33 => GPR33 | R34  => GPR34
+  | R35 => GPR35 | R36 => GPR36 | R37 => GPR37 | R38 => GPR38 | R39  => GPR39
+  | R40 => GPR40 | R41 => GPR41 | R42 => GPR42 | R43 => GPR43 | R44  => GPR44
+  | R45 => GPR45 | R46 => GPR46 | R47 => GPR47 | R48 => GPR48 | R49  => GPR49
+  | R50 => GPR50 | R51 => GPR51 | R52 => GPR52 | R53 => GPR53 | R54  => GPR54
+  | R55 => GPR55 | R56 => GPR56 | R57 => GPR57 | R58 => GPR58 | R59  => GPR59
+  | R60 => GPR60 | R61 => GPR61 | R62 => GPR62 | R63 => GPR63
+  end.
+
+Definition ireg_of (r: mreg) : res ireg :=
+  match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.ireg_of") end.
+
+Definition freg_of (r: mreg) : res freg :=
+  match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end.
+
 Module Pregmap := EMap(PregEq).
 
 (** ** Conventional names for stack pointer ([SP]), return address ([RA]), frame pointer ([FP]) and other temporaries used *)
@@ -829,58 +853,6 @@ Definition cmpu_for_btest (bt: btest) :=
   end.
 
 
-(* **** a few lemma on comparisons of unsigned (e.g. pointers) *)
-
-Definition Val_cmpu_bool cmp v1 v2: option bool :=
-  Val.cmpu_bool (fun _ _ => true) cmp v1 v2.
-
-Lemma Val_cmpu_bool_correct (m:mem) (cmp: comparison) (v1 v2: val) b:
-   (Val.cmpu_bool (Mem.valid_pointer m) cmp v1 v2) = Some b
-   -> (Val_cmpu_bool cmp v1 v2) = Some b.
-Proof.
-  intros; eapply Val.cmpu_bool_lessdef; (econstructor 1 || eauto).
-Qed.
-
-Definition Val_cmpu cmp v1 v2 := Val.of_optbool (Val_cmpu_bool cmp v1 v2).
-
-Lemma Val_cmpu_correct (m:mem) (cmp: comparison) (v1 v2: val):
-   Val.lessdef (Val.cmpu (Mem.valid_pointer m) cmp v1 v2)
-               (Val_cmpu cmp v1 v2).
-Proof.
-  unfold Val.cmpu, Val_cmpu.
-  remember (Val.cmpu_bool (Mem.valid_pointer m) cmp v1 v2) as ob.
-  destruct ob; simpl.
-  - erewrite Val_cmpu_bool_correct; eauto.
-    econstructor.
-  - econstructor.
-Qed.
-
-Definition Val_cmplu_bool (cmp: comparison) (v1 v2: val)
- := (Val.cmplu_bool (fun _ _ => true) cmp v1 v2).
-
-Lemma Val_cmplu_bool_correct (m:mem) (cmp: comparison) (v1 v2: val) b:
-   (Val.cmplu_bool (Mem.valid_pointer m) cmp v1 v2) = Some b
-   -> (Val_cmplu_bool cmp v1 v2) = Some b.
-Proof.
-  intros; eapply Val.cmplu_bool_lessdef; (econstructor 1 || eauto).
-Qed.
-
-Definition Val_cmplu cmp v1 v2 := Val.of_optbool (Val_cmplu_bool cmp v1 v2).
-
-Lemma Val_cmplu_correct (m:mem) (cmp: comparison) (v1 v2: val):
-   Val.lessdef (Val.maketotal (Val.cmplu (Mem.valid_pointer m) cmp v1 v2))
-               (Val_cmplu cmp v1 v2).
-Proof.
-  unfold Val.cmplu, Val_cmplu.
-  remember (Val.cmplu_bool (Mem.valid_pointer m) cmp v1 v2) as ob.
-  destruct ob as [b|]; simpl.
-  - erewrite Val_cmplu_bool_correct; eauto.
-    simpl. econstructor.
-  - econstructor.
-Qed.
-
-
-
 (** **** Comparing integers *)
 Definition compare_int (t: itest) (v1 v2: val): val :=
   match t with
@@ -890,12 +862,12 @@ Definition compare_int (t: itest) (v1 v2: val): val :=
   | ITge  => Val.cmp Cge v1 v2
   | ITle  => Val.cmp Cle v1 v2
   | ITgt  => Val.cmp Cgt v1 v2
-  | ITneu => Val_cmpu Cne v1 v2
-  | ITequ => Val_cmpu Ceq v1 v2
-  | ITltu => Val_cmpu Clt v1 v2
-  | ITgeu => Val_cmpu Cge v1 v2
-  | ITleu => Val_cmpu Cle v1 v2
-  | ITgtu => Val_cmpu Cgt v1 v2
+  | ITneu => Val.mxcmpu Cne v1 v2
+  | ITequ => Val.mxcmpu Ceq v1 v2
+  | ITltu => Val.mxcmpu Clt v1 v2
+  | ITgeu => Val.mxcmpu Cge v1 v2
+  | ITleu => Val.mxcmpu Cle v1 v2
+  | ITgtu => Val.mxcmpu Cgt v1 v2
   end.
 
 Definition compare_long (t: itest) (v1 v2: val): val :=
@@ -906,12 +878,12 @@ Definition compare_long (t: itest) (v1 v2: val): val :=
   | ITge  => Val.cmpl Cge v1 v2
   | ITle  => Val.cmpl Cle v1 v2
   | ITgt  => Val.cmpl Cgt v1 v2
-  | ITneu => Some (Val_cmplu Cne v1 v2)
-  | ITequ => Some (Val_cmplu Ceq v1 v2)
-  | ITltu => Some (Val_cmplu Clt v1 v2)
-  | ITgeu => Some (Val_cmplu Cge v1 v2)
-  | ITleu => Some (Val_cmplu Cle v1 v2)
-  | ITgtu => Some (Val_cmplu Cgt v1 v2)
+  | ITneu => Some (Val.mxcmplu Cne v1 v2)
+  | ITequ => Some (Val.mxcmplu Ceq v1 v2)
+  | ITltu => Some (Val.mxcmplu Clt v1 v2)
+  | ITgeu => Some (Val.mxcmplu Cge v1 v2)
+  | ITleu => Some (Val.mxcmplu Cle v1 v2)
+  | ITgtu => Some (Val.mxcmplu Cgt v1 v2)
   end in 
   match res with
   | Some v => v
@@ -1123,13 +1095,13 @@ Definition cmove bt v1 v2 v3 :=
 Definition cmoveu bt v1 v2 v3 :=
   match cmpu_for_btest bt with
   | (Some c, Int)  =>
-    match Val_cmpu_bool c v2 (Vint Int.zero) with
+    match Val.mxcmpu_bool c v2 (Vint Int.zero) with
     | None => Vundef
     | Some true => v3
     | Some false => v1
     end
   | (Some c, Long) =>
-    match Val_cmplu_bool c v2 (Vlong Int64.zero) with
+    match Val.mxcmplu_bool c v2 (Vlong Int64.zero) with
     | None => Vundef
     | Some true => v3
     | Some false => v1
@@ -1426,13 +1398,13 @@ Definition is_label (lbl: label) (bb: bblock) : bool :=
 Lemma is_label_correct_true lbl bb:
   List.In lbl (header bb) <-> is_label lbl bb = true. 
 Proof.
-  unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition.
+  unfold is_label; destruct (in_dec lbl (header bb)); cbn; intuition.
 Qed.
 
 Lemma is_label_correct_false lbl bb:
   ~(List.In lbl (header bb)) <-> is_label lbl bb = false. 
 Proof.
-  unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition.
+  unfold is_label; destruct (in_dec lbl (header bb)); cbn; intuition.
 Qed.
 
 
@@ -1505,8 +1477,8 @@ Definition parexec_control (f: function) (oc: option control) (rsr rsw: regset)
       end
     | Pcbu bt r l => 
       match cmpu_for_btest bt with
-      | (Some c, Int) => par_eval_branch f l rsr rsw mw (Val_cmpu_bool c rsr#r (Vint (Int.repr 0)))
-      | (Some c, Long) => par_eval_branch f l rsr rsw mw (Val_cmplu_bool c rsr#r (Vlong (Int64.repr 0)))
+      | (Some c, Int) => par_eval_branch f l rsr rsw mw (Val.mxcmpu_bool c rsr#r (Vint (Int.repr 0)))
+      | (Some c, Long) => par_eval_branch f l rsr rsw mw (Val.mxcmplu_bool c rsr#r (Vlong (Int64.repr 0)))
       | (None, _) => Stuck
       end
     (**r Pseudo-instructions *)
@@ -1548,24 +1520,6 @@ Definition det_parexec (f: function) (bundle: bblock) (rs: regset) (m: mem) rs'
   code.  *)
 
 
-(* FIXME - R16 and R32 are excluded *)
-Definition preg_of (r: mreg) : preg :=
-  match r with
-  | R0  => GPR0  | R1  => GPR1  | R2  => GPR2  | R3  => GPR3  | R4  => GPR4
-  | R5  => GPR5  | R6  => GPR6  | R7  => GPR7  | R8  => GPR8  | R9  => GPR9
-  | R10 => GPR10 | R11 => GPR11 (* | R12 => GPR12 | R13 => GPR13 | R14  => GPR14 *)
-  | R15 => GPR15 (* | R16 => GPR16 *) | R17 => GPR17 | R18 => GPR18 | R19  => GPR19
-  | R20 => GPR20 | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24  => GPR24
-  | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 | R29  => GPR29
-  | R30 => GPR30 | R31 => GPR31 (* | R32 => GPR32 *) | R33 => GPR33 | R34  => GPR34
-  | R35 => GPR35 | R36 => GPR36 | R37 => GPR37 | R38 => GPR38 | R39  => GPR39
-  | R40 => GPR40 | R41 => GPR41 | R42 => GPR42 | R43 => GPR43 | R44  => GPR44
-  | R45 => GPR45 | R46 => GPR46 | R47 => GPR47 | R48 => GPR48 | R49  => GPR49
-  | R50 => GPR50 | R51 => GPR51 | R52 => GPR52 | R53 => GPR53 | R54  => GPR54
-  | R55 => GPR55 | R56 => GPR56 | R57 => GPR57 | R58 => GPR58 | R59  => GPR59
-  | R60 => GPR60 | R61 => GPR61 | R62 => GPR62 | R63 => GPR63
-  end.
-
 (** **** Undefine all registers except SP and callee-save registers *)
 
 Definition undef_caller_save_regs (rs: regset) : regset :=
@@ -1667,7 +1621,7 @@ Proof.
    constructor 1.
    - rewrite app_nil_r; auto.
    - unfold parexec_wio_bblock.
-     destruct (parexec_wio f _ _ _); simpl; auto.
+     destruct (parexec_wio f _ _ _); cbn; auto.
 Qed.
 
 
@@ -1739,7 +1693,7 @@ Ltac Det_WIO X :=
       exploit det_parexec_write_in_order; [ eapply H | idtac]; clear H; intro X
   | _ => idtac
   end.
-  intros; constructor; simpl.
+  intros; constructor; cbn.
 - (* determ *) intros s t1 s1 t2 s2 H H0. inv H; Det_WIO X1;
   inv H0; Det_WIO X2; Equalities.
   + split. constructor. auto. 
@@ -1754,7 +1708,7 @@ Ltac Det_WIO X :=
     exploit external_call_determ. eexact H3. eexact H8. intros [A B].
     split. auto. intros. destruct B; auto. subst. auto.
 - (* trace length *)
-  red; intros. inv H; simpl.
+  red; intros. inv H; cbn.
   omega.
   eapply external_call_trace_length; eauto.
   eapply external_call_trace_length; eauto.
diff --git a/kvx/Builtins1.v b/kvx/Builtins1.v
index eeb578d0..441345bf 100644
--- a/kvx/Builtins1.v
+++ b/kvx/Builtins1.v
@@ -24,7 +24,6 @@ Inductive platform_builtin : Type :=
 | BI_fmax
 | BI_fminf
 | BI_fmaxf
-| BI_fabsf
 | BI_fma
 | BI_fmaf.
 
@@ -35,7 +34,6 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
   :: ("__builtin_fmax", BI_fmax)
   :: ("__builtin_fminf", BI_fminf)
   :: ("__builtin_fmaxf", BI_fmaxf)
-  :: ("__builtin_fabsf", BI_fabsf)
   :: ("__builtin_fma", BI_fma)
   :: ("__builtin_fmaf", BI_fmaf)
   :: nil.
@@ -46,8 +44,6 @@ Definition platform_builtin_sig (b: platform_builtin) : signature :=
       mksignature (Tfloat :: Tfloat :: nil) Tfloat cc_default
   | BI_fminf | BI_fmaxf =>
       mksignature (Tsingle :: Tsingle :: nil) Tsingle cc_default
-  | BI_fabsf =>
-      mksignature (Tsingle :: nil) Tsingle cc_default
   | BI_fma =>
       mksignature (Tfloat :: Tfloat :: Tfloat :: nil) Tfloat cc_default
   | BI_fmaf =>
@@ -60,7 +56,6 @@ Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (pl
   | BI_fmax => mkbuiltin_n2t Tfloat Tfloat Tfloat ExtFloat.max
   | BI_fminf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.min
   | BI_fmaxf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.max
-  | BI_fabsf => mkbuiltin_n1t Tsingle Tsingle Float32.abs
   | BI_fma => mkbuiltin_n3t Tfloat Tfloat Tfloat Tfloat Float.fma
   | BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma
   end.
diff --git a/kvx/CBuiltins.ml b/kvx/CBuiltins.ml
index fa2f4c60..7398e0f4 100644
--- a/kvx/CBuiltins.ml
+++ b/kvx/CBuiltins.ml
@@ -73,8 +73,10 @@ let builtins = {
       (* "__builtin_kvx_cbs", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
       (* "__builtin_kvx_cbsdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *)
       (* "__builtin_kvx_clz", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
-      "__builtin_clzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
+      "__builtin_kvx_clzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
       "__builtin_clzll", (TInt(ILongLong, []), [TInt(IULongLong, [])], false);
+      "__builtin_kvx_ctzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
+      "__builtin_ctzll", (TInt(ILongLong, []), [TInt(IULongLong, [])], false);
       (* "__builtin_kvx_clzdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *)
       (* "__builtin_kvx_cmove", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, []); TInt(IInt, [])], false); *)
       (* "__builtin_kvx_ctz", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
diff --git a/kvx/CSE2deps.v b/kvx/CSE2deps.v
index b4b80e2f..c0deacf0 100644
--- a/kvx/CSE2deps.v
+++ b/kvx/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/kvx/CSE2depsproof.v b/kvx/CSE2depsproof.v
index f283c8ac..a5f7b317 100644
--- a/kvx/CSE2depsproof.v
+++ b/kvx/CSE2depsproof.v
@@ -71,7 +71,7 @@ Section MEMORY_WRITE.
     unfold largest_size_chunk in *.
 
     rewrite ptrofs_modulus in *.
-    simpl in *.
+    cbn in *.
     inv ADDRR.
     inv ADDRW.
     destruct base; try discriminate.
@@ -123,17 +123,24 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
-  destruct args as [ | base [ | ]]. 1,3: discriminate.
-  destruct args' as [ | base' [ | ]]. 1,3: discriminate.
-  simpl in OVERLAP.
-  destruct (peq base base'). 2: discriminate.
-  subst base'.
-  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
-  2: discriminate.
-  simpl in *.
-  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+  - (* Aindexed / Aindexed *)
+    destruct args as [ | base [ | ]]. 1,3: discriminate.
+    destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+    cbn in OVERLAP.
+    destruct (peq base base'). 2: discriminate.
+    subst base'.
+    destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+    2: discriminate.
+    cbn in *.
+    eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+  - (* Ainstack / Ainstack *)
+    destruct args. 2: discriminate.
+    destruct args'. 2: discriminate.
+    cbn in OVERLAP.
+    destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+    2: discriminate.
+    cbn in *.
+    eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/kvx/CombineOpproof.v b/kvx/CombineOpproof.v
index dafc90df..5dffc565 100644
--- a/kvx/CombineOpproof.v
+++ b/kvx/CombineOpproof.v
@@ -46,7 +46,7 @@ Qed.
 Ltac UseGetSound :=
   match goal with
   | [ H: get _ = Some _ |- _ ] =>
-      let x := fresh "EQ" in (generalize (get_op_sound _ _ _ H); intros x; simpl in x; FuncInv)
+      let x := fresh "EQ" in (generalize (get_op_sound _ _ _ H); intros x; cbn in x; FuncInv)
   end.
 
 Lemma combine_compimm_ne_0_sound:
@@ -58,7 +58,7 @@ Proof.
   intros until args. functional induction (combine_compimm_ne_0 get x); intros EQ; inv EQ.
   (* of cmp *)
   UseGetSound. rewrite <- H.
-  destruct (eval_condition cond (map valu args) m); simpl; auto. destruct b; auto.
+  destruct (eval_condition cond (map valu args) m); cbn; auto. destruct b; auto.
 Qed.
 
 Lemma combine_compimm_eq_0_sound:
@@ -71,7 +71,7 @@ Proof.
   (* of cmp *)
   UseGetSound. rewrite <- H.
   rewrite eval_negate_condition.
-  destruct (eval_condition c (map valu args) m); simpl; auto. destruct b; auto.
+  destruct (eval_condition c (map valu args) m); cbn; auto. destruct b; auto.
 Qed.
 
 Lemma combine_compimm_eq_1_sound:
@@ -83,7 +83,7 @@ Proof.
   intros until args. functional induction (combine_compimm_eq_1 get x); intros EQ; inv EQ.
   (* of cmp *)
   UseGetSound. rewrite <- H.
-  destruct (eval_condition cond (map valu args) m); simpl; auto. destruct b; auto.
+  destruct (eval_condition cond (map valu args) m); cbn; auto. destruct b; auto.
 Qed.
 
 Lemma combine_compimm_ne_1_sound:
@@ -96,7 +96,7 @@ Proof.
   (* of cmp *)
   UseGetSound. rewrite <- H.
   rewrite eval_negate_condition.
-  destruct (eval_condition c (map valu args) m); simpl; auto. destruct b; auto.
+  destruct (eval_condition c (map valu args) m); cbn; auto. destruct b; auto.
 Qed.
 
 Theorem combine_cond_sound:
@@ -106,21 +106,21 @@ Theorem combine_cond_sound:
 Proof.
   intros. functional inversion H; subst.
   (* compimm ne zero *)
-  - simpl; eapply combine_compimm_ne_0_sound; eauto.
+  - cbn; eapply combine_compimm_ne_0_sound; eauto.
   (* compimm ne one *)
-  - simpl; eapply combine_compimm_ne_1_sound; eauto.
+  - cbn; eapply combine_compimm_ne_1_sound; eauto.
   (* compimm eq zero *)
-  - simpl; eapply combine_compimm_eq_0_sound; eauto.
+  - cbn; eapply combine_compimm_eq_0_sound; eauto.
   (* compimm eq one *)
-  - simpl; eapply combine_compimm_eq_1_sound; eauto.
+  - cbn; eapply combine_compimm_eq_1_sound; eauto.
   (* compuimm ne zero *)
-  - simpl; eapply combine_compimm_ne_0_sound; eauto.
+  - cbn; eapply combine_compimm_ne_0_sound; eauto.
   (* compuimm ne one *)
-  - simpl; eapply combine_compimm_ne_1_sound; eauto.
+  - cbn; eapply combine_compimm_ne_1_sound; eauto.
   (* compuimm eq zero *)
-  - simpl; eapply combine_compimm_eq_0_sound; eauto.
+  - cbn; eapply combine_compimm_eq_0_sound; eauto.
   (* compuimm eq one *)
-  - simpl; eapply combine_compimm_eq_1_sound; eauto.
+  - cbn; eapply combine_compimm_eq_1_sound; eauto.
 Qed.
 
 Theorem combine_addr_sound:
@@ -130,10 +130,10 @@ Theorem combine_addr_sound:
 Proof.
   intros. functional inversion H; subst.
 - (* indexed - addimm *)
-  UseGetSound. simpl. rewrite <- H0. destruct v; auto. simpl; rewrite H7; simpl.
+  UseGetSound. cbn. rewrite <- H0. destruct v; auto. cbn; rewrite H7; cbn.
   rewrite Ptrofs.add_assoc. auto.
 - (* indexed - addimml *)
-  UseGetSound. simpl. rewrite <- H0. destruct v; auto. simpl; rewrite H7; simpl.
+  UseGetSound. cbn. rewrite <- H0. destruct v; auto. cbn; rewrite H7; cbn.
   rewrite Ptrofs.add_assoc. auto.
 Qed.
 
@@ -144,33 +144,33 @@ Theorem combine_op_sound:
 Proof.
   intros. functional inversion H; subst.
   (* addimm - addimm *)
-  - UseGetSound. FuncInv. simpl.
+  - UseGetSound. FuncInv. cbn.
     rewrite <- H0. rewrite Val.add_assoc. auto.
   (* andimm - andimm *)
-  - UseGetSound; simpl.
+  - UseGetSound; cbn.
     generalize (Int.eq_spec p m0); rewrite H7; intros.
-    rewrite <- H0. rewrite Val.and_assoc. simpl. fold p. rewrite H1. auto.
-  - UseGetSound; simpl.
+    rewrite <- H0. rewrite Val.and_assoc. cbn. fold p. rewrite H1. auto.
+  - UseGetSound; cbn.
     rewrite <- H0. rewrite Val.and_assoc. auto.
   (* orimm - orimm *)
-  - UseGetSound. simpl. rewrite <- H0. rewrite Val.or_assoc. auto.
+  - UseGetSound. cbn. rewrite <- H0. rewrite Val.or_assoc. auto.
   (* xorimm - xorimm *)
-  - UseGetSound. simpl. rewrite <- H0. rewrite Val.xor_assoc. auto.
+  - UseGetSound. cbn. rewrite <- H0. rewrite Val.xor_assoc. auto.
   (* addlimm - addlimm *)
-  - UseGetSound. FuncInv. simpl.
+  - UseGetSound. FuncInv. cbn.
     rewrite <- H0. rewrite Val.addl_assoc. auto.
   (* andlimm - andlimm *)
-  - UseGetSound; simpl.
+  - UseGetSound; cbn.
     generalize (Int64.eq_spec p m0); rewrite H7; intros.
-    rewrite <- H0. rewrite Val.andl_assoc. simpl. fold p. rewrite H1. auto.
-  - UseGetSound; simpl.
+    rewrite <- H0. rewrite Val.andl_assoc. cbn. fold p. rewrite H1. auto.
+  - UseGetSound; cbn.
     rewrite <- H0. rewrite Val.andl_assoc. auto.
   (* orlimm - orlimm *)
-  - UseGetSound. simpl. rewrite <- H0. rewrite Val.orl_assoc. auto.
+  - UseGetSound. cbn. rewrite <- H0. rewrite Val.orl_assoc. auto.
   (* xorlimm - xorlimm *)
-  - UseGetSound. simpl. rewrite <- H0. rewrite Val.xorl_assoc. auto.
+  - UseGetSound. cbn. rewrite <- H0. rewrite Val.xorl_assoc. auto.
   (* cmp *)
-  - simpl. decEq; decEq. eapply combine_cond_sound; eauto.
+  - cbn. decEq; decEq. eapply combine_cond_sound; eauto.
 Qed.
 
 End COMBINE.
diff --git a/kvx/ConstpropOpproof.v b/kvx/ConstpropOpproof.v
index 05bbdde1..ffd35bcc 100644
--- a/kvx/ConstpropOpproof.v
+++ b/kvx/ConstpropOpproof.v
@@ -105,7 +105,7 @@ Proof.
   + (* global *)
     inv H2. exists (Genv.symbol_address ge id ofs); auto.
   + (* stack *)
-    inv H2. exists (Vptr sp ofs); split; auto. simpl. rewrite Ptrofs.add_zero_l; auto.
+    inv H2. exists (Vptr sp ofs); split; auto. cbn. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 Lemma cond_strength_reduction_correct:
@@ -115,7 +115,7 @@ Lemma cond_strength_reduction_correct:
   eval_condition cond' e##args' m = eval_condition cond e##args m.
 Proof.
   intros until vl. unfold cond_strength_reduction.
-  case (cond_strength_reduction_match cond args vl); simpl; intros; InvApproxRegs; SimplVM.
+  case (cond_strength_reduction_match cond args vl); cbn; intros; InvApproxRegs; SimplVM.
 - apply Val.swap_cmp_bool.
 - auto.
 - apply Val.swap_cmpu_bool.
@@ -137,7 +137,7 @@ Proof.
   intros. unfold make_cmp_base.
   generalize (cond_strength_reduction_correct c args vl H).
   destruct (cond_strength_reduction c args vl) as [c' args']. intros EQ.
-  econstructor; split. simpl; eauto. rewrite EQ. auto.
+  econstructor; split. cbn; eauto. rewrite EQ. auto.
 Qed.
 
 Lemma make_cmp_correct:
@@ -154,43 +154,43 @@ Proof.
   unfold make_cmp. case (make_cmp_match c args vl); intros.
 - unfold make_cmp_imm_eq.
   destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
-+ simpl in H; inv H. InvBooleans. subst n.
-  exists (e#r1); split; auto. simpl.
-  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
++ cbn in H; inv H. InvBooleans. subst n.
+  exists (e#r1); split; auto. cbn.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
 + destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
-* simpl in H; inv H. InvBooleans. subst n.
-  exists (Val.xor e#r1 (Vint Int.one)); split; auto. simpl.
-  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
+* cbn in H; inv H. InvBooleans. subst n.
+  exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
 * apply make_cmp_base_correct; auto.
 - unfold make_cmp_imm_ne.
   destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
-+ simpl in H; inv H. InvBooleans. subst n.
-  exists (e#r1); split; auto. simpl.
-  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
++ cbn in H; inv H. InvBooleans. subst n.
+  exists (e#r1); split; auto. cbn.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
 + destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
-* simpl in H; inv H. InvBooleans. subst n.
-  exists (Val.xor e#r1 (Vint Int.one)); split; auto. simpl.
-  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
+* cbn in H; inv H. InvBooleans. subst n.
+  exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
 * apply make_cmp_base_correct; auto.
 - unfold make_cmp_imm_eq.
   destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
-+ simpl in H; inv H. InvBooleans. subst n.
-  exists (e#r1); split; auto. simpl.
-  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
++ cbn in H; inv H. InvBooleans. subst n.
+  exists (e#r1); split; auto. cbn.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
 + destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
-* simpl in H; inv H. InvBooleans. subst n.
-  exists (Val.xor e#r1 (Vint Int.one)); split; auto. simpl.
-  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
+* cbn in H; inv H. InvBooleans. subst n.
+  exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
 * apply make_cmp_base_correct; auto.
 - unfold make_cmp_imm_ne.
   destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
-+ simpl in H; inv H. InvBooleans. subst n.
-  exists (e#r1); split; auto. simpl.
-  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
++ cbn in H; inv H. InvBooleans. subst n.
+  exists (e#r1); split; auto. cbn.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
 + destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
-* simpl in H; inv H. InvBooleans. subst n.
-  exists (Val.xor e#r1 (Vint Int.one)); split; auto. simpl.
-  exploit Y; eauto. intros [A | [A | A]]; rewrite A; simpl; auto.
+* cbn in H; inv H. InvBooleans. subst n.
+  exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn.
+  exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
 * apply make_cmp_base_correct; auto.
 - apply make_cmp_base_correct; auto.
 Qed.
@@ -203,7 +203,7 @@ Proof.
   intros. unfold make_addimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros.
   subst. exists (e#r); split; auto.
-  destruct (e#r); simpl; auto; rewrite ?Int.add_zero, ?Ptrofs.add_zero; auto.
+  destruct (e#r); cbn; auto; rewrite ?Int.add_zero, ?Ptrofs.add_zero; auto.
   exists (Val.add e#r (Vint n)); split; auto.
 Qed.
 
@@ -215,10 +215,10 @@ Lemma make_shlimm_correct:
 Proof.
   intros; unfold make_shlimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
-  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int.shl_zero. auto.
+  exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.shl_zero. auto.
   destruct (Int.ltu n Int.iwordsize).
-  econstructor; split. simpl. eauto. auto.
-  econstructor; split. simpl. eauto. rewrite H; auto.
+  econstructor; split. cbn. eauto. auto.
+  econstructor; split. cbn. eauto. rewrite H; auto.
 Qed.
 
 Lemma make_shrimm_correct:
@@ -229,10 +229,10 @@ Lemma make_shrimm_correct:
 Proof.
   intros; unfold make_shrimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
-  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int.shr_zero. auto.
+  exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.shr_zero. auto.
   destruct (Int.ltu n Int.iwordsize).
-  econstructor; split. simpl. eauto. auto.
-  econstructor; split. simpl. eauto. rewrite H; auto.
+  econstructor; split. cbn. eauto. auto.
+  econstructor; split. cbn. eauto. rewrite H; auto.
 Qed.
 
 Lemma make_shruimm_correct:
@@ -243,10 +243,10 @@ Lemma make_shruimm_correct:
 Proof.
   intros; unfold make_shruimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
-  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int.shru_zero. auto.
+  exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.shru_zero. auto.
   destruct (Int.ltu n Int.iwordsize).
-  econstructor; split. simpl. eauto. auto.
-  econstructor; split. simpl. eauto. rewrite H; auto.
+  econstructor; split. cbn. eauto. auto.
+  econstructor; split. cbn. eauto. rewrite H; auto.
 Qed.
 
 Lemma make_mulimm_correct:
@@ -257,12 +257,12 @@ Lemma make_mulimm_correct:
 Proof.
   intros; unfold make_mulimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
-  exists (Vint Int.zero); split; auto. destruct (e#r1); simpl; auto. rewrite Int.mul_zero; auto.
+  exists (Vint Int.zero); split; auto. destruct (e#r1); cbn; auto. rewrite Int.mul_zero; auto.
   predSpec Int.eq Int.eq_spec n Int.one; intros. subst.
-  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int.mul_one; auto.
+  exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.mul_one; auto.
   destruct (Int.is_power2 n) eqn:?; intros.
-  rewrite (Val.mul_pow2 e#r1 _ _ Heqo). econstructor; split. simpl; eauto. auto.
-  econstructor; split; eauto. simpl. rewrite H; auto.
+  rewrite (Val.mul_pow2 e#r1 _ _ Heqo). econstructor; split. cbn; eauto. auto.
+  econstructor; split; eauto. cbn. rewrite H; auto.
 Qed.
 
 Lemma make_divimm_correct:
@@ -275,11 +275,11 @@ Proof.
   intros; unfold make_divimm.
   predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
   destruct (e#r1) eqn:?;
-    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
+    try (rewrite Val.divs_one in H; exists (Vint i); split; cbn; try rewrite Heqv0; auto);
     inv H; auto.
   destruct (Int.is_power2 n) eqn:?.
   destruct (Int.ltu i (Int.repr 31)) eqn:?.
-  exists v; split; auto. simpl.
+  exists v; split; auto. cbn.
   erewrite Val.divs_pow2; eauto. reflexivity. congruence.
   exists v; auto.
   exists v; auto.
@@ -295,10 +295,10 @@ Proof.
   intros; unfold make_divuimm.
   predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
   destruct (e#r1) eqn:?;
-    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
+    try (rewrite Val.divu_one in H; exists (Vint i); split; cbn; try rewrite Heqv0; auto);
     inv H; auto.
   destruct (Int.is_power2 n) eqn:?.
-  econstructor; split. simpl; eauto.
+  econstructor; split. cbn; eauto.
   rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
   exists v; auto.
 Qed.
@@ -312,7 +312,7 @@ Lemma make_moduimm_correct:
 Proof.
   intros; unfold make_moduimm.
   destruct (Int.is_power2 n) eqn:?.
-  exists v; split; auto. simpl. decEq. eapply Val.modu_pow2; eauto. congruence.
+  exists v; split; auto. cbn. decEq. eapply Val.modu_pow2; eauto. congruence.
   exists v; auto.
 Qed.
 
@@ -324,18 +324,18 @@ Lemma make_andimm_correct:
 Proof.
   intros; unfold make_andimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros.
-  subst n. exists (Vint Int.zero); split; auto. destruct (e#r); simpl; auto. rewrite Int.and_zero; auto.
+  subst n. exists (Vint Int.zero); split; auto. destruct (e#r); cbn; auto. rewrite Int.and_zero; auto.
   predSpec Int.eq Int.eq_spec n Int.mone; intros.
-  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int.and_mone; auto.
+  subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int.and_mone; auto.
   destruct (match x with Uns _ k => Int.eq (Int.zero_ext k (Int.not n)) Int.zero
                        | _ => false end) eqn:UNS.
   destruct x; try congruence.
   exists (e#r); split; auto.
-  inv H; auto. simpl. replace (Int.and i n) with i; auto.
+  inv H; auto. cbn. replace (Int.and i n) with i; auto.
   generalize (Int.eq_spec (Int.zero_ext n0 (Int.not n)) Int.zero); rewrite UNS; intro EQ.
   Int.bit_solve. destruct (zlt i0 n0).
   replace (Int.testbit n i0) with (negb (Int.testbit Int.zero i0)).
-  rewrite Int.bits_zero. simpl. rewrite andb_true_r. auto.
+  rewrite Int.bits_zero. cbn. rewrite andb_true_r. auto.
   rewrite <- EQ. rewrite Int.bits_zero_ext by omega. rewrite zlt_true by auto.
   rewrite Int.bits_not by auto. apply negb_involutive.
   rewrite H6 by auto. auto.
@@ -349,9 +349,9 @@ Lemma make_orimm_correct:
 Proof.
   intros; unfold make_orimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros.
-  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int.or_zero; auto.
+  subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int.or_zero; auto.
   predSpec Int.eq Int.eq_spec n Int.mone; intros.
-  subst n. exists (Vint Int.mone); split; auto. destruct (e#r); simpl; auto. rewrite Int.or_mone; auto.
+  subst n. exists (Vint Int.mone); split; auto. destruct (e#r); cbn; auto. rewrite Int.or_mone; auto.
   econstructor; split; eauto. auto.
 Qed.
 
@@ -362,7 +362,7 @@ Lemma make_xorimm_correct:
 Proof.
   intros; unfold make_xorimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros.
-  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int.xor_zero; auto.
+  subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int.xor_zero; auto.
   predSpec Int.eq Int.eq_spec n Int.mone; intros.
   subst n. exists (Val.notint e#r); split; auto.
   econstructor; split; eauto. auto.
@@ -376,7 +376,7 @@ Proof.
   intros. unfold make_addlimm.
   predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
   subst. exists (e#r); split; auto.
-  destruct (e#r); simpl; auto; rewrite ? Int64.add_zero, ? Ptrofs.add_zero; auto.
+  destruct (e#r); cbn; auto; rewrite ? Int64.add_zero, ? Ptrofs.add_zero; auto.
   exists (Val.addl e#r (Vlong n)); split; auto.
 Qed.
 
@@ -388,11 +388,11 @@ Lemma make_shllimm_correct:
 Proof.
   intros; unfold make_shllimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
-  exists (e#r1); split; auto. destruct (e#r1); simpl; auto.
+  exists (e#r1); split; auto. destruct (e#r1); cbn; auto.
   unfold Int64.shl'. rewrite Z.shiftl_0_r, Int64.repr_unsigned. auto.
   destruct (Int.ltu n Int64.iwordsize').
-  econstructor; split. simpl. eauto. auto.
-  econstructor; split. simpl. eauto. rewrite H; auto.
+  econstructor; split. cbn. eauto. auto.
+  econstructor; split. cbn. eauto. rewrite H; auto.
 Qed.
 
 Lemma make_shrlimm_correct:
@@ -403,11 +403,11 @@ Lemma make_shrlimm_correct:
 Proof.
   intros; unfold make_shrlimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
-  exists (e#r1); split; auto. destruct (e#r1); simpl; auto.
+  exists (e#r1); split; auto. destruct (e#r1); cbn; auto.
   unfold Int64.shr'. rewrite Z.shiftr_0_r, Int64.repr_signed. auto.
   destruct (Int.ltu n Int64.iwordsize').
-  econstructor; split. simpl. eauto. auto.
-  econstructor; split. simpl. eauto. rewrite H; auto.
+  econstructor; split. cbn. eauto. auto.
+  econstructor; split. cbn. eauto. rewrite H; auto.
 Qed.
 
 Lemma make_shrluimm_correct:
@@ -418,11 +418,11 @@ Lemma make_shrluimm_correct:
 Proof.
   intros; unfold make_shrluimm.
   predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
-  exists (e#r1); split; auto. destruct (e#r1); simpl; auto.
+  exists (e#r1); split; auto. destruct (e#r1); cbn; auto.
   unfold Int64.shru'. rewrite Z.shiftr_0_r, Int64.repr_unsigned. auto.
   destruct (Int.ltu n Int64.iwordsize').
-  econstructor; split. simpl. eauto. auto.
-  econstructor; split. simpl. eauto. rewrite H; auto.
+  econstructor; split. cbn. eauto. auto.
+  econstructor; split. cbn. eauto. rewrite H; auto.
 Qed.
 
 Lemma make_mullimm_correct:
@@ -433,15 +433,15 @@ Lemma make_mullimm_correct:
 Proof.
   intros; unfold make_mullimm.
   predSpec Int64.eq Int64.eq_spec n Int64.zero; intros. subst.
-  exists (Vlong Int64.zero); split; auto. destruct (e#r1); simpl; auto. rewrite Int64.mul_zero; auto.
+  exists (Vlong Int64.zero); split; auto. destruct (e#r1); cbn; auto. rewrite Int64.mul_zero; auto.
   predSpec Int64.eq Int64.eq_spec n Int64.one; intros. subst.
-  exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int64.mul_one; auto.
+  exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int64.mul_one; auto.
   destruct (Int64.is_power2' n) eqn:?; intros.
   exists (Val.shll e#r1 (Vint i)); split; auto.
-  destruct (e#r1); simpl; auto.
+  destruct (e#r1); cbn; auto.
   erewrite Int64.is_power2'_range by eauto.
   erewrite Int64.mul_pow2' by eauto. auto.
-  econstructor; split; eauto. simpl; rewrite H; auto.
+  econstructor; split; eauto. cbn; rewrite H; auto.
 Qed.
 
 Lemma make_divlimm_correct:
@@ -453,7 +453,7 @@ Lemma make_divlimm_correct:
 Proof.
   intros; unfold make_divlimm.
   destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
-  rewrite H0 in H. econstructor; split. simpl; eauto.
+  rewrite H0 in H. econstructor; split. cbn; eauto.
   erewrite Val.divls_pow2; eauto. auto.
   exists v; auto.
   exists v; auto.
@@ -468,9 +468,9 @@ Lemma make_divluimm_correct:
 Proof.
   intros; unfold make_divluimm.
   destruct (Int64.is_power2' n) eqn:?.
-  econstructor; split. simpl; eauto.
+  econstructor; split. cbn; eauto.
   rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl.
+  cbn.
   erewrite Int64.is_power2'_range by eauto.    
   erewrite Int64.divu_pow2' by eauto.  auto. 
   exists v; auto.
@@ -485,9 +485,9 @@ Lemma make_modluimm_correct:
 Proof.
   intros; unfold make_modluimm.
   destruct (Int64.is_power2 n) eqn:?.
-  exists v; split; auto. simpl. decEq.
+  exists v; split; auto. cbn. decEq.
   rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. 
-  simpl. erewrite Int64.modu_and by eauto. auto.
+  cbn. erewrite Int64.modu_and by eauto. auto.
   exists v; auto.
 Qed.
 
@@ -498,9 +498,9 @@ Lemma make_andlimm_correct:
 Proof.
   intros; unfold make_andlimm.
   predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
-  subst n. exists (Vlong Int64.zero); split; auto. destruct (e#r); simpl; auto. rewrite Int64.and_zero; auto.
+  subst n. exists (Vlong Int64.zero); split; auto. destruct (e#r); cbn; auto. rewrite Int64.and_zero; auto.
   predSpec Int64.eq Int64.eq_spec n Int64.mone; intros.
-  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int64.and_mone; auto.
+  subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int64.and_mone; auto.
   econstructor; split; eauto. auto.
 Qed.
 
@@ -511,9 +511,9 @@ Lemma make_orlimm_correct:
 Proof.
   intros; unfold make_orlimm.
   predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
-  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int64.or_zero; auto.
+  subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int64.or_zero; auto.
   predSpec Int64.eq Int64.eq_spec n Int64.mone; intros.
-  subst n. exists (Vlong Int64.mone); split; auto. destruct (e#r); simpl; auto. rewrite Int64.or_mone; auto.
+  subst n. exists (Vlong Int64.mone); split; auto. destruct (e#r); cbn; auto. rewrite Int64.or_mone; auto.
   econstructor; split; eauto. auto.
 Qed.
 
@@ -524,7 +524,7 @@ Lemma make_xorlimm_correct:
 Proof.
   intros; unfold make_xorlimm.
   predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
-  subst n. exists (e#r); split; auto. destruct (e#r); simpl; auto. rewrite Int64.xor_zero; auto.
+  subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int64.xor_zero; auto.
   predSpec Int64.eq Int64.eq_spec n Int64.mone; intros.
   subst n. exists (Val.notl e#r); split; auto.
   econstructor; split; eauto. auto.
@@ -538,9 +538,9 @@ Lemma make_mulfimm_correct:
 Proof.
   intros; unfold make_mulfimm.
   destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros.
-  simpl. econstructor; split. eauto. rewrite H; subst n.
-  destruct (e#r1); simpl; auto. rewrite Float.mul2_add; auto.
-  simpl. econstructor; split; eauto.
+  cbn. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r1); cbn; auto. rewrite Float.mul2_add; auto.
+  cbn. econstructor; split; eauto.
 Qed.
 
 Lemma make_mulfimm_correct_2:
@@ -551,10 +551,10 @@ Lemma make_mulfimm_correct_2:
 Proof.
   intros; unfold make_mulfimm.
   destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros.
-  simpl. econstructor; split. eauto. rewrite H; subst n.
-  destruct (e#r2); simpl; auto. rewrite Float.mul2_add; auto.
+  cbn. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r2); cbn; auto. rewrite Float.mul2_add; auto.
   rewrite Float.mul_commut; auto.
-  simpl. econstructor; split; eauto.
+  cbn. econstructor; split; eauto.
 Qed.
 
 Lemma make_mulfsimm_correct:
@@ -565,9 +565,9 @@ Lemma make_mulfsimm_correct:
 Proof.
   intros; unfold make_mulfsimm.
   destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros.
-  simpl. econstructor; split. eauto. rewrite H; subst n.
-  destruct (e#r1); simpl; auto. rewrite Float32.mul2_add; auto.
-  simpl. econstructor; split; eauto.
+  cbn. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r1); cbn; auto. rewrite Float32.mul2_add; auto.
+  cbn. econstructor; split; eauto.
 Qed.
 
 Lemma make_mulfsimm_correct_2:
@@ -578,10 +578,10 @@ Lemma make_mulfsimm_correct_2:
 Proof.
   intros; unfold make_mulfsimm.
   destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros.
-  simpl. econstructor; split. eauto. rewrite H; subst n.
-  destruct (e#r2); simpl; auto. rewrite Float32.mul2_add; auto.
+  cbn. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r2); cbn; auto. rewrite Float32.mul2_add; auto.
   rewrite Float32.mul_commut; auto.
-  simpl. econstructor; split; eauto.
+  cbn. econstructor; split; eauto.
 Qed.
 
 Lemma make_cast8signed_correct:
@@ -594,8 +594,8 @@ Proof.
   exists e#r; split; auto.
   assert (V: vmatch bc e#r (Sgn Ptop 8)).
   { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
-  inv V; simpl; auto. rewrite is_sgn_sign_ext in H4 by auto. rewrite H4; auto.
-  econstructor; split; simpl; eauto.
+  inv V; cbn; auto. rewrite is_sgn_sign_ext in H4 by auto. rewrite H4; auto.
+  econstructor; split; cbn; eauto.
 Qed.
 
 Lemma make_cast16signed_correct:
@@ -608,8 +608,8 @@ Proof.
   exists e#r; split; auto.
   assert (V: vmatch bc e#r (Sgn Ptop 16)).
   { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
-  inv V; simpl; auto. rewrite is_sgn_sign_ext in H4 by auto. rewrite H4; auto.
-  econstructor; split; simpl; eauto.
+  inv V; cbn; auto. rewrite is_sgn_sign_ext in H4 by auto. rewrite H4; auto.
+  econstructor; split; cbn; eauto.
 Qed.
 
 Lemma op_strength_reduction_correct:
@@ -620,7 +620,7 @@ Lemma op_strength_reduction_correct:
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some w /\ Val.lessdef v w.
 Proof.
   intros until v; unfold op_strength_reduction;
-  case (op_strength_reduction_match op args vl); simpl; intros.
+  case (op_strength_reduction_match op args vl); cbn; intros.
 - (* cast8signed *)
   InvApproxRegs; SimplVM; inv H0. apply make_cast8signed_correct; auto.
 - (* cast16signed *)
@@ -733,15 +733,15 @@ Lemma addr_strength_reduction_correct:
   exists res', eval_addressing ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'.
 Proof.
   intros until res. unfold addr_strength_reduction.
-  destruct (addr_strength_reduction_match addr args vl); simpl;
+  destruct (addr_strength_reduction_match addr args vl); cbn;
   intros VL EA; InvApproxRegs; SimplVM; try (inv EA).
 - destruct (orb _ _).
 + exists (Val.offset_ptr e#r1 n); auto.
-+ simpl. rewrite Genv.shift_symbol_address. econstructor; split; eauto. 
-  inv H0; simpl; auto.
++ cbn. rewrite Genv.shift_symbol_address. econstructor; split; eauto. 
+  inv H0; cbn; auto.
 - rewrite Ptrofs.add_zero_l. econstructor; split; eauto.
   change (Vptr sp (Ptrofs.add n1 n)) with (Val.offset_ptr (Vptr sp n1) n). 
-  inv H0; simpl; auto.
+  inv H0; cbn; auto.
 - exists res; auto.
 Qed.
 
diff --git a/kvx/Conventions1.v b/kvx/Conventions1.v
index ab30ded9..0b2cf406 100644
--- a/kvx/Conventions1.v
+++ b/kvx/Conventions1.v
@@ -108,7 +108,7 @@ Lemma loc_result_type:
   subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
 Proof.
   intros. unfold proj_sig_res, loc_result, mreg_type.
-  destruct (sig_res sig); try destruct Archi.ptr64; simpl; trivial; destruct t; trivial.
+  destruct (sig_res sig); try destruct Archi.ptr64; cbn; trivial; destruct t; trivial.
 Qed.
 
 (** The result locations are caller-save registers *)
@@ -118,7 +118,7 @@ Lemma loc_result_caller_save:
   forall_rpair (fun r => is_callee_save r = false) (loc_result s).
 Proof.
   intros. unfold loc_result, is_callee_save;
-            destruct (sig_res s); simpl; auto; try destruct Archi.ptr64; simpl; auto; try destruct t; simpl; auto.
+            destruct (sig_res s); cbn; auto; try destruct Archi.ptr64; cbn; auto; try destruct t; cbn; auto.
 Qed.
 
 (** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
@@ -296,9 +296,9 @@ Proof.
              OKREGS regs -> OKF f -> ofs >= 0 -> OK (one_arg regs rn ofs ty f)).
   { intros until f; intros OR OF OO; red; unfold one_arg; intros.
     destruct (list_nth_z regs rn) as [r|] eqn:NTH; destruct H.
-  - subst p; simpl. apply OR. eapply list_nth_z_in; eauto. 
+  - subst p; cbn. apply OR. eapply list_nth_z_in; eauto. 
   - eapply OF; eauto. 
-  - subst p; simpl. auto using align_divides, typealign_pos.
+  - subst p; cbn. auto using align_divides, typealign_pos.
   - eapply OF; [idtac|eauto].
     generalize (AL ofs ty OO) (SKK ty); omega.
   }
@@ -310,16 +310,16 @@ Proof.
     assert (OO': ofs' >= 0) by (apply (AL ofs Tlong); auto).
     assert (DFL: OK (Twolong (S Outgoing (ofs' + 1) Tint) (S Outgoing ofs' Tint)
                      :: f rn' (ofs' + 2))).
-    { red; simpl; intros. destruct H.
-    - subst p; simpl. 
+    { red; cbn; intros. destruct H.
+    - subst p; cbn. 
       repeat split; auto using Z.divide_1_l. omega.
     - eapply OF; [idtac|eauto]. omega.
     }
     destruct (list_nth_z regs rn') as [r1|] eqn:NTH1;
     destruct (list_nth_z regs (rn' + 1)) as [r2|] eqn:NTH2;
     try apply DFL.
-    red; simpl; intros; destruct H.
-  - subst p; simpl. split; apply OR; eauto using list_nth_z_in.  
+    red; cbn; intros; destruct H.
+  - subst p; cbn. split; apply OR; eauto using list_nth_z_in.  
   - eapply OF; [idtac|eauto]. auto.
   }
   assert (C: forall regs rn ofs ty f,
@@ -327,10 +327,10 @@ Proof.
   { intros until f; intros OR OF OO OTY; unfold hybrid_arg; red; intros.
     set (rn' := align rn 2) in *.
     destruct (list_nth_z regs rn') as [r|] eqn:NTH; destruct H.
-  - subst p; simpl. apply OR. eapply list_nth_z_in; eauto. 
+  - subst p; cbn. apply OR. eapply list_nth_z_in; eauto. 
   - eapply OF; eauto. 
-  - subst p; simpl. rewrite OTY. split. apply (AL ofs Tlong OO). apply Z.divide_1_l. 
-  - eapply OF; [idtac|eauto]. generalize (AL ofs Tlong OO); simpl; omega.
+  - subst p; cbn. rewrite OTY. split. apply (AL ofs Tlong OO). apply Z.divide_1_l. 
+  - eapply OF; [idtac|eauto]. generalize (AL ofs Tlong OO); cbn; omega.
   }
   assert (D: OKREGS param_regs).
   { red. decide_goal. }
@@ -339,8 +339,8 @@ Proof.
 
   cut (forall va tyl rn ofs, ofs >= 0 -> OK (loc_arguments_rec va tyl rn ofs)).
   unfold OK. eauto.
-  induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; simpl.
-  - red; simpl; tauto.
+  induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; cbn.
+  - red; cbn; tauto.
   - destruct ty1.  
 + (* int *) apply A; auto.
 + (* float *) 
@@ -369,10 +369,10 @@ Remark fold_max_outgoing_above:
 Proof.
   assert (A: forall n l, max_outgoing_1 n l >= n).
   { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
-  induction l; simpl; intros. 
+  induction l; cbn; intros. 
   - omega.
   - eapply Zge_trans. eauto.
-    destruct a; simpl. apply A. eapply Zge_trans; eauto.
+    destruct a; cbn. apply A. eapply Zge_trans; eauto.
 Qed.
 
 Lemma size_arguments_above:
@@ -392,14 +392,14 @@ Proof.
   assert (B: forall p n,
              In (S Outgoing ofs ty) (regs_of_rpair p) ->
              ofs + typesize ty <= max_outgoing_2 n p).
-  { intros. destruct p; simpl in H; intuition; subst; simpl.
+  { intros. destruct p; cbn in H; intuition; subst; cbn.
   - xomega.
   - eapply Z.le_trans. 2: apply A. xomega.
   - xomega. }
   assert (C: forall l n,
              In (S Outgoing ofs ty) (regs_of_rpairs l) ->
              ofs + typesize ty <= fold_left max_outgoing_2 l n).
-  { induction l; simpl; intros.
+  { induction l; cbn; intros.
   - contradiction.
   - rewrite in_app_iff in H. destruct H.
   + eapply Z.le_trans. eapply B; eauto. apply Z.ge_le. apply fold_max_outgoing_above.
diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml
new file mode 120000
index 00000000..ee2674bf
--- /dev/null
+++ b/kvx/ExpansionOracle.ml
@@ -0,0 +1 @@
+../aarch64/ExpansionOracle.ml
+\ No newline at end of file
diff --git a/kvx/ExtValues.v b/kvx/ExtValues.v
index 3664c00a..a0c10ddd 100644
--- a/kvx/ExtValues.v
+++ b/kvx/ExtValues.v
@@ -62,10 +62,10 @@ Lemma shift1_4_of_z_correct :
     end.
 Proof.
   intro. unfold shift1_4_of_z.
-  destruct (Z.eq_dec _ _); simpl; try congruence.
-  destruct (Z.eq_dec _ _); simpl; try congruence.
-  destruct (Z.eq_dec _ _); simpl; try congruence.
-  destruct (Z.eq_dec _ _); simpl; try congruence.
+  destruct (Z.eq_dec _ _); cbn; try congruence.
+  destruct (Z.eq_dec _ _); cbn; try congruence.
+  destruct (Z.eq_dec _ _); cbn; try congruence.
+  destruct (Z.eq_dec _ _); cbn; try congruence.
   trivial.
 Qed.
 
@@ -215,19 +215,19 @@ Theorem divu_is_divlu: forall v1 v2 : val,
     end.
 Proof.
   intros.
-  destruct v1; simpl; trivial.
-  destruct v2; simpl; trivial.
+  destruct v1; cbn; trivial.
+  destruct v2; cbn; trivial.
   destruct i as [i_val i_range].
   destruct i0 as [i0_val i0_range].
-  simpl.
+  cbn.
   unfold Int.eq, Int64.eq, Int.zero, Int64.zero.
-  simpl.
+  cbn.
   rewrite Int.unsigned_repr by (compute; split; discriminate).
   rewrite (Int64.unsigned_repr 0) by (compute; split; discriminate).
   rewrite (unsigned64_repr i0_val) by assumption.
-  destruct (zeq i0_val 0) as [ | Hnot0]; simpl; trivial.
+  destruct (zeq i0_val 0) as [ | Hnot0]; cbn; trivial.
   f_equal. f_equal.
-  unfold Int.divu, Int64.divu. simpl.
+  unfold Int.divu, Int64.divu. cbn.
   rewrite (unsigned64_repr i_val) by assumption.
   rewrite (unsigned64_repr i0_val) by assumption.
   unfold Int64.loword.
@@ -260,19 +260,19 @@ Theorem modu_is_modlu: forall v1 v2 : val,
     end.
 Proof.
   intros.
-  destruct v1; simpl; trivial.
-  destruct v2; simpl; trivial.
+  destruct v1; cbn; trivial.
+  destruct v2; cbn; trivial.
   destruct i as [i_val i_range].
   destruct i0 as [i0_val i0_range].
-  simpl.
+  cbn.
   unfold Int.eq, Int64.eq, Int.zero, Int64.zero.
-  simpl.
+  cbn.
   rewrite Int.unsigned_repr by (compute; split; discriminate).
   rewrite (Int64.unsigned_repr 0) by (compute; split; discriminate).
   rewrite (unsigned64_repr i0_val) by assumption.
-  destruct (zeq i0_val 0) as [ | Hnot0]; simpl; trivial.
+  destruct (zeq i0_val 0) as [ | Hnot0]; cbn; trivial.
   f_equal. f_equal.
-  unfold Int.modu, Int64.modu. simpl.
+  unfold Int.modu, Int64.modu. cbn.
   rewrite (unsigned64_repr i_val) by assumption.
   rewrite (unsigned64_repr i0_val) by assumption.
   unfold Int64.loword.
@@ -347,19 +347,19 @@ Theorem divs_is_divls: forall v1 v2 : val,
     end.
 Proof.
   intros.
-  destruct v1; simpl; trivial.
-  destruct v2; simpl; trivial.
+  destruct v1; cbn; trivial.
+  destruct v2; cbn; trivial.
   destruct i as [i_val i_range].
   destruct i0 as [i0_val i0_range].
-  simpl.
+  cbn.
   unfold Int.eq, Int64.eq, Int.zero, Int64.zero.
-  simpl.
+  cbn.
   replace (Int.unsigned (Int.repr 0)) with 0 in * by reflexivity.
-  destruct (zeq _ _) as [H0' | Hnot0]; simpl; trivial.
-  destruct (zeq i_val (Int.unsigned (Int.repr Int.min_signed))) as [Hmin | Hnotmin]; simpl.
+  destruct (zeq _ _) as [H0' | Hnot0]; cbn; trivial.
+  destruct (zeq i_val (Int.unsigned (Int.repr Int.min_signed))) as [Hmin | Hnotmin]; cbn.
   { subst.
     destruct (zeq i0_val (Int.unsigned Int.mone)) as [Hmone | Hnotmone]; trivial.
-    unfold Int.signed. simpl.
+    unfold Int.signed. cbn.
     replace (Int64.unsigned (Int64.repr 0)) with 0 in * by reflexivity.
     rewrite if_zlt_min_signed_half_modulus.
     replace (if
@@ -370,7 +370,7 @@ Proof.
              (Int64.unsigned (Int64.repr Int64.min_signed))
           then true
               else false) with false by reflexivity.
-    simpl.
+    cbn.
     rewrite orb_false_r.
     destruct (zlt i0_val Int.half_modulus) as [Hlt_half | Hge_half].
     {
@@ -380,7 +380,7 @@ Proof.
       unfold Val.loword.
       f_equal.
       unfold Int64.divs, Int.divs, Int64.loword.
-      unfold Int.signed, Int64.signed. simpl.      
+      unfold Int.signed, Int64.signed. cbn.      
       rewrite if_zlt_min_signed_half_modulus.
       change Int.half_modulus with 2147483648 in *.
       destruct (zlt _ _) as [discard|]; try omega. clear discard.
@@ -390,7 +390,7 @@ Proof.
       with 18446744071562067968.
       change Int64.half_modulus with 9223372036854775808.
       change Int64.modulus with 18446744073709551616.
-      simpl.
+      cbn.
       rewrite (Int64.unsigned_repr i0_val) by (change Int64.max_unsigned with 18446744073709551615; omega).
       destruct (zlt i0_val 9223372036854775808) as [discard |]; try omega.
       clear discard.
@@ -449,7 +449,7 @@ Lemma big_unsigned_signed:
 Proof.
   destruct x as [xval xrange].
   intro BIG.
-  unfold Int.signed, Int.unsigned in *. simpl in *.
+  unfold Int.signed, Int.unsigned in *. cbn in *.
   destruct (zlt _ _).
   omega.
   trivial.
@@ -499,10 +499,10 @@ Lemma divs_is_quot: forall v1 v2 : val,
     end.
 
 Proof.
-  destruct v1; destruct v2; simpl; trivial.
+  destruct v1; destruct v2; cbn; trivial.
   unfold Int.divs.
   rewrite signed_0_eqb.
-  destruct (Int.eq i0 Int.zero) eqn:Eeq0; simpl; trivial.
+  destruct (Int.eq i0 Int.zero) eqn:Eeq0; cbn; trivial.
   destruct (Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone) eqn:EXCEPTION.
   { replace (Int.signed i0) with (-1).
     replace (Int.signed i) with Int.min_signed.
@@ -523,7 +523,7 @@ Proof.
     unfold Int.eq in EXCEPTION.
     destruct (zeq _ _) in EXCEPTION; try discriminate.
     destruct (zeq _ _) as [Hmone | ]  in EXCEPTION; try discriminate.
-    destruct i0 as [i0val i0range]; unfold Int.signed in *; simpl in *.
+    destruct i0 as [i0val i0range]; unfold Int.signed in *; cbn in *.
     rewrite Hmone.
     reflexivity.
   }
@@ -651,7 +651,7 @@ Qed.
 Lemma sub_add_neg :
   forall x y, Val.sub x y = Val.add x (Val.neg y).
 Proof.
-  destruct x; destruct y; simpl; trivial.
+  destruct x; destruct y; cbn; trivial.
   f_equal.
   apply Int.sub_add_opp.
 Qed.
@@ -659,7 +659,7 @@ Qed.
 Lemma neg_mul_distr_r :
   forall x y, Val.neg (Val.mul x y) = Val.mul x (Val.neg y).
 Proof.
-  destruct x; destruct y; simpl; trivial.
+  destruct x; destruct y; cbn; trivial.
   f_equal.
   apply Int.neg_mul_distr_r.
 Qed.
@@ -668,7 +668,7 @@ Qed.
 Lemma sub_addl_negl :
   forall x y, Val.subl x y = Val.addl x (Val.negl y).
 Proof.
-  destruct x; destruct y; simpl; trivial.
+  destruct x; destruct y; cbn; trivial.
   + f_equal. apply Int64.sub_add_opp.
   + destruct (Archi.ptr64) eqn:ARCHI64; trivial.
     f_equal. rewrite Ptrofs.sub_add_opp.
@@ -681,15 +681,15 @@ Proof.
     rewrite Hagree2.
     reflexivity.
     exact (Ptrofs.agree64_of_int ARCHI64 i0).
-  +  destruct (Archi.ptr64) eqn:ARCHI64; simpl; trivial.
-     destruct (eq_block _ _); simpl; trivial.
+  +  destruct (Archi.ptr64) eqn:ARCHI64; cbn; trivial.
+     destruct (eq_block _ _); cbn; trivial.
 Qed.
  *)
 
 Lemma negl_mull_distr_r :
   forall x y, Val.negl (Val.mull x y) = Val.mull x (Val.negl y).
 Proof.
-  destruct x; destruct y; simpl; trivial.
+  destruct x; destruct y; cbn; trivial.
   f_equal.
   apply Int64.neg_mul_distr_r.
 Qed.
diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml
index 76956959..e3b18181 100644
--- a/kvx/Machregsaux.ml
+++ b/kvx/Machregsaux.ml
@@ -27,14 +27,6 @@ let _ =
 
 let is_scratch_register r = false
 
-let name_of_register r =
-  try Some (Hashtbl.find register_names r) with Not_found -> None
-
-let register_by_name s =
-  Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
-
-let can_reserve_register r = Conventions1.is_callee_save r
-
 let class_of_type = function
   | AST.Tint | AST.Tlong 
   | AST.Tfloat | AST.Tsingle -> 0
diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli
index d7117c21..01b0f9fd 100644
--- a/kvx/Machregsaux.mli
+++ b/kvx/Machregsaux.mli
@@ -12,9 +12,6 @@
 
 (** Auxiliary functions on machine registers *)
 
-val name_of_register: Machregs.mreg -> string option
-val register_by_name: string -> Machregs.mreg option
 val is_scratch_register: string -> bool
-val can_reserve_register: Machregs.mreg -> bool
 
 val class_of_type: AST.typ -> int
diff --git a/kvx/NeedOp.v b/kvx/NeedOp.v
index 4c354d5a..f636336d 100644
--- a/kvx/NeedOp.v
+++ b/kvx/NeedOp.v
@@ -229,7 +229,7 @@ Lemma needs_of_condition0_sound:
 Proof.
   intros until arg2.
   intros Hcond Hagree.
-  apply eval_condition0_inj with (f := inject_id) (m1 := m1) (v1 := arg1); simpl; auto.
+  apply eval_condition0_inj with (f := inject_id) (m1 := m1) (v1 := arg1); cbn; auto.
   apply val_inject_lessdef. apply lessdef_vagree. assumption.
 Qed.
 
@@ -239,7 +239,7 @@ Lemma addl_sound:
   vagree (Val.addl v1 v2) (Val.addl w1 w2) x.
 Proof.
   unfold default; intros.
-  destruct x; simpl in *; trivial.
+  destruct x; cbn in *; trivial.
   - unfold Val.addl.
     destruct v1; destruct v2; trivial; destruct Archi.ptr64; trivial.
   - apply Val.addl_lessdef; trivial.
@@ -249,7 +249,7 @@ Lemma subl_lessdef:
   forall v1 v1' v2 v2',
   Val.lessdef v1 v1' -> Val.lessdef v2 v2' -> Val.lessdef (Val.subl v1 v2) (Val.subl v1' v2').
 Proof.
-  intros. inv H. inv H0. auto. destruct v1'; simpl; auto. simpl; auto.
+  intros. inv H. inv H0. auto. destruct v1'; cbn; auto. cbn; auto.
 Qed.
 
 Lemma subl_sound:
@@ -258,10 +258,10 @@ Lemma subl_sound:
   vagree (Val.subl v1 v2) (Val.subl w1 w2) x.
 Proof.
   unfold default; intros.
-  destruct x; simpl in *; trivial.
+  destruct x; cbn in *; trivial.
   - unfold Val.subl.
-    destruct v1; destruct v2; trivial; destruct Archi.ptr64; simpl; trivial.
-    destruct (eq_block _ _) ; simpl; trivial.
+    destruct v1; destruct v2; trivial; destruct Archi.ptr64; cbn; trivial.
+    destruct (eq_block _ _) ; cbn; trivial.
   - apply subl_lessdef; trivial.
 Qed.
 
@@ -272,7 +272,7 @@ Lemma mull_sound:
   vagree (Val.mull v1 v2) (Val.mull w1 w2) x.
 Proof.
   unfold default; intros.
-  destruct x; simpl in *; trivial.
+  destruct x; cbn in *; trivial.
   - unfold Val.mull.
     destruct v1; destruct v2; trivial.
   - unfold Val.mull.
@@ -284,7 +284,7 @@ Qed.
 
 Remark default_idem: forall nv, default (default nv) = default nv.
 Proof.
-  destruct nv; simpl; trivial.
+  destruct nv; cbn; trivial.
 Qed.
 
 Lemma vagree_triple_op_float :
@@ -298,14 +298,14 @@ Proof.
   induction nv;
   intros Hax Hby Hcz.
   - trivial.
-  - simpl in *. destruct a; simpl; trivial.
-    destruct b; simpl; trivial.
-    destruct c; simpl; trivial.
-  - simpl in *. destruct a; simpl; trivial.
-    destruct b; simpl; trivial.
-    destruct c; simpl; trivial.
+  - cbn in *. destruct a; cbn; trivial.
+    destruct b; cbn; trivial.
+    destruct c; cbn; trivial.
+  - cbn in *. destruct a; cbn; trivial.
+    destruct b; cbn; trivial.
+    destruct c; cbn; trivial.
     inv Hax. inv Hby. inv Hcz.
-    simpl.
+    cbn.
     constructor.
 Qed.
 
@@ -320,14 +320,14 @@ Proof.
   induction nv;
   intros Hax Hby Hcz.
   - trivial.
-  - simpl in *. destruct a; simpl; trivial.
-    destruct b; simpl; trivial.
-    destruct c; simpl; trivial.
-  - simpl in *. destruct a; simpl; trivial.
-    destruct b; simpl; trivial.
-    destruct c; simpl; trivial.
+  - cbn in *. destruct a; cbn; trivial.
+    destruct b; cbn; trivial.
+    destruct c; cbn; trivial.
+  - cbn in *. destruct a; cbn; trivial.
+    destruct b; cbn; trivial.
+    destruct c; cbn; trivial.
     inv Hax. inv Hby. inv Hcz.
-    simpl.
+    cbn.
     constructor.
 Qed.
 
@@ -343,7 +343,7 @@ Lemma needs_of_operation_sound:
   /\ vagree v v' nv.
 Proof.
   unfold needs_of_operation; intros; destruct op; try (eapply default_needs_of_operation_sound; eauto; fail);
-  simpl in *; FuncInv; InvAgree; TrivialExists.
+  cbn in *; FuncInv; InvAgree; TrivialExists.
 - apply sign_ext_sound; auto. compute; auto. 
 - apply sign_ext_sound; auto. compute; auto. 
 - apply add_sound; auto.
@@ -384,17 +384,17 @@ Proof.
 - destruct (eval_condition0 _ _ _) as [b|] eqn:EC.
   erewrite needs_of_condition0_sound by eauto.
   apply select_sound; auto.
-  simpl; auto with na.
+  cbn; auto with na.
   (* select imm *)
 - destruct (eval_condition0 _ _ _) as [b|] eqn:EC.
   { erewrite needs_of_condition0_sound by eauto.
   apply select_sound; auto with na. }
-  simpl; auto with na.
+  cbn; auto with na.
   (* select long imm *)
 - destruct (eval_condition0 _ _ _) as [b|] eqn:EC.
   { erewrite needs_of_condition0_sound by eauto.
   apply select_sound; auto with na. }
-  simpl; auto with na.
+  cbn; auto with na.
 Qed.
 
 Lemma operation_is_redundant_sound:
@@ -404,7 +404,7 @@ Lemma operation_is_redundant_sound:
   vagree_list (arg1 :: args) (arg1' :: args') (needs_of_operation op nv) ->
   vagree v arg1' nv.
 Proof.
-  intros. destruct op; simpl in *; try discriminate; inv H1; FuncInv; subst.
+  intros. destruct op; cbn in *; try discriminate; inv H1; FuncInv; subst.
 - apply sign_ext_redundant_sound; auto. omega.
 - apply sign_ext_redundant_sound; auto. omega.
 - apply andimm_redundant_sound; auto.
diff --git a/kvx/Op.v b/kvx/Op.v
index 544bb081..4458adb3 100644
--- a/kvx/Op.v
+++ b/kvx/Op.v
@@ -508,9 +508,9 @@ Qed.
 Ltac FuncInv :=
   match goal with
   | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ =>
-      destruct x; simpl in H; FuncInv
+      destruct x; cbn in H; FuncInv
   | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
-      destruct v; simpl in H; FuncInv
+      destruct v; cbn in H; FuncInv
   | H: (if Archi.ptr64 then _ else _) = Some _ |- _ =>
       destruct Archi.ptr64 eqn:?; FuncInv
   | H: (Some _ = Some _) |- _ =>
@@ -727,27 +727,27 @@ Qed.
 Remark type_sub:
   forall v1 v2, Val.has_type (Val.sub v1 v2) Tint.
 Proof.
-  intros. unfold Val.has_type, Val.sub. destruct Archi.ptr64, v1, v2; simpl; auto.
+  intros. unfold Val.has_type, Val.sub. destruct Archi.ptr64, v1, v2; cbn; auto.
   destruct (eq_block _ _); auto.
 Qed.
 
 Remark type_subl:
   forall v1 v2, Val.has_type (Val.subl v1 v2) Tlong.
 Proof.
-  intros. unfold Val.has_type, Val.subl. destruct Archi.ptr64, v1, v2; simpl; auto.
+  intros. unfold Val.has_type, Val.subl. destruct Archi.ptr64, v1, v2; cbn; auto.
   destruct (eq_block _ _); auto.
 Qed.
 
 Remark type_shl:
   forall v1 v2, Val.has_type (Val.shl v1 v2) Tint.
 Proof.
-  destruct v1, v2; simpl; trivial; destruct (Int.ltu _ _); simpl; trivial.
+  destruct v1, v2; cbn; trivial; destruct (Int.ltu _ _); cbn; trivial.
 Qed.
 
 Remark type_shll:
   forall v1 v2, Val.has_type (Val.shll v1 v2) Tlong.
 Proof.
-  destruct v1, v2; simpl; trivial; destruct (Int.ltu _ _); simpl; trivial.
+  destruct v1, v2; cbn; trivial; destruct (Int.ltu _ _); cbn; trivial.
 Qed.
 
 Lemma type_of_operation_sound:
@@ -757,7 +757,7 @@ Lemma type_of_operation_sound:
   Val.has_type v (snd (type_of_operation op)).
 Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   intros.
-  destruct op; simpl; simpl in H0; FuncInv; subst; simpl.
+  destruct op; cbn; cbn in H0; FuncInv; subst; cbn.
   (* move *)
   - congruence.
   (* intconst, longconst, floatconst, singleconst *)
@@ -777,30 +777,30 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - apply type_add.
   (* addx, addximm *)
   - apply type_add.
-  - destruct v0; simpl; trivial.
-    destruct (Int.ltu _ _); simpl; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu _ _); cbn; trivial.
   (* neg, sub *)
   - destruct v0...
   - apply type_sub.
   (* revsubimm, revsubx, revsubximm *)
   - destruct v0...
   - apply type_sub.
-  - destruct v0; simpl; trivial.
-    destruct (Int.ltu _ _); simpl; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu _ _); cbn; trivial.
   (* mul, mulimm, mulhs, mulhu *)
   - destruct v0; destruct v1...
   - destruct v0...
   - destruct v0; destruct v1...
   - destruct v0; destruct v1...
   (* div, divu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
+  - destruct v0; destruct v1; cbn in *; inv H0.
+    destruct (_ || _); inv H2...
+  - destruct v0; destruct v1; cbn in *; inv H0.
     destruct (Int.eq i0 Int.zero); inv H2...
   (* mod, modu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
+  - destruct v0; destruct v1; cbn in *; inv H0.
+    destruct (_ || _); inv H2...
+  - destruct v0; destruct v1; cbn in *; inv H0.
     destruct (Int.eq i0 Int.zero); inv H2...
   (* and, andimm *)
   - destruct v0; destruct v1...
@@ -829,18 +829,18 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1...
   - destruct v0...
   (* shl, shlimm *)
-  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  - destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
+  - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int.iwordsize)...
+  - destruct v0; cbn... destruct (Int.ltu n Int.iwordsize)...
   (* shr, shrimm *)
-  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  - destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
+  - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int.iwordsize)...
+  - destruct v0; cbn... destruct (Int.ltu n Int.iwordsize)...
   (* shru, shruimm *)
-  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  - destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
+  - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int.iwordsize)...
+  - destruct v0; cbn... destruct (Int.ltu n Int.iwordsize)...
   (* shrx *)
-  - destruct v0; simpl... destruct (Int.ltu n (Int.repr 31)); simpl; trivial.
+  - destruct v0; cbn... destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
   (* shrimm *)
-  - destruct v0; simpl...   
+  - destruct v0; cbn...   
   (* madd *)
   - apply type_add.
   - apply type_add.
@@ -858,13 +858,13 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - apply type_addl.
   (* addxl addxlimm *)
   - apply type_addl.
-  - destruct v0; simpl; trivial.
-    destruct (Int.ltu _ _); simpl; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu _ _); cbn; trivial.
   (* negl, subl *)
   - destruct v0...
   - apply type_subl.
-  - destruct v0; simpl; trivial.
-    destruct (Int.ltu _ _); simpl; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu _ _); cbn; trivial.
   - destruct v0...
   - apply type_subl.
   (* mull, mullhs, mullhu *)
@@ -873,14 +873,14 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1...
   - destruct v0; destruct v1...
   (* divl, divlu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
+  - destruct v0; destruct v1; cbn in *; inv H0.
+    destruct (_ || _); inv H2...
+  - destruct v0; destruct v1; cbn in *; inv H0.
     destruct (Int64.eq i0 Int64.zero); inv H2...
   (* modl, modlu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
+  - destruct v0; destruct v1; cbn in *; inv H0.
+    destruct (_ || _); inv H2...
+  - destruct v0; destruct v1; cbn in *; inv H0.
     destruct (Int64.eq i0 Int64.zero); inv H2...
   (* andl, andlimm *)
   - destruct v0; destruct v1...
@@ -909,16 +909,16 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1...
   - destruct v0...
   (* shll, shllimm *)
-  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  - destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
+  - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int64.iwordsize')...
+  - destruct v0; cbn... destruct (Int.ltu n Int64.iwordsize')...
   (* shr, shrimm *)
-  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  - destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
+  - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int64.iwordsize')...
+  - destruct v0; cbn... destruct (Int.ltu n Int64.iwordsize')...
   (* shru, shruimm *)
-  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  - destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
+  - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int64.iwordsize')...
+  - destruct v0; cbn... destruct (Int.ltu n Int64.iwordsize')...
   (* shrxl *)
-  - destruct v0; simpl... destruct (Int.ltu n (Int.repr 63)); simpl; trivial.
+  - destruct v0; cbn... destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
   (* maddl, maddlim *)
   - apply type_addl.
   - apply type_addl.
@@ -960,59 +960,59 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0...
   - destruct v0...
   (* intoffloat, intuoffloat *)
-  - destruct v0; simpl... destruct (Float.to_int f); simpl; trivial.
-  - destruct v0; simpl... destruct (Float.to_intu f); simpl; trivial.
+  - destruct v0; cbn... destruct (Float.to_int f); cbn; trivial.
+  - destruct v0; cbn... destruct (Float.to_intu f); cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - destruct v0; simpl...  destruct (Float32.to_int f); simpl; trivial.
-  - destruct v0; simpl...  destruct (Float32.to_intu f); simpl; trivial.
+  - destruct v0; cbn...  destruct (Float32.to_int f); cbn; trivial.
+  - destruct v0; cbn...  destruct (Float32.to_intu f); cbn; trivial.
   (* singleofint, singleofintu *)
-  - destruct v0; simpl...
-  - destruct v0; simpl...
+  - destruct v0; cbn...
+  - destruct v0; cbn...
   (* longoffloat, longuoffloat *)
-  - destruct v0; simpl... destruct (Float.to_long f); simpl; trivial.
-  - destruct v0; simpl... destruct (Float.to_longu f); simpl; trivial.
+  - destruct v0; cbn... destruct (Float.to_long f); cbn; trivial.
+  - destruct v0; cbn... destruct (Float.to_longu f); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - destruct v0; simpl...
-  - destruct v0; simpl...
+  - destruct v0; cbn...
+  - destruct v0; cbn...
   (* longofsingle, longuofsingle *)
-  - destruct v0; simpl... destruct (Float32.to_long f); simpl; trivial.
-  - destruct v0; simpl... destruct (Float32.to_longu f); simpl; trivial.
+  - destruct v0; cbn... destruct (Float32.to_long f); cbn; trivial.
+  - destruct v0; cbn... destruct (Float32.to_longu f); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - destruct v0; simpl...
-  - destruct v0; simpl...
+  - destruct v0; cbn...
+  - destruct v0; cbn...
   (* cmp *)
   - destruct (eval_condition cond vl m)... destruct b...
  (* extfz *)
   - unfold extfz.
     destruct (is_bitfield _ _).
-    + destruct v0; simpl; trivial.
+    + destruct v0; cbn; trivial.
     + constructor.
  (* extfs *)
   - unfold extfs.
     destruct (is_bitfield _ _).
-    + destruct v0; simpl; trivial.
+    + destruct v0; cbn; trivial.
     + constructor.
  (* extfzl *)
   - unfold extfzl.
     destruct (is_bitfieldl _ _).
-    + destruct v0; simpl; trivial.
+    + destruct v0; cbn; trivial.
     + constructor.
  (* extfsl *)
   - unfold extfsl.
     destruct (is_bitfieldl _ _).
-    + destruct v0; simpl; trivial.
+    + destruct v0; cbn; trivial.
     + constructor.
  (* insf *)
   - unfold insf, bitfield_mask.
     destruct (is_bitfield _ _).
-    + destruct v0; destruct v1; simpl; trivial.
-      destruct (Int.ltu _ _); simpl; trivial.
+    + destruct v0; destruct v1; cbn; trivial.
+      destruct (Int.ltu _ _); cbn; trivial.
     + constructor.
  (* insf *)
   - unfold insfl, bitfield_mask.
     destruct (is_bitfieldl _ _).
-    + destruct v0; destruct v1; simpl; trivial.
-      destruct (Int.ltu _ _); simpl; trivial.
+    + destruct v0; destruct v1; cbn; trivial.
+      destruct (Int.ltu _ _); cbn; trivial.
     + constructor.
  (* Osel *)
   - unfold Val.select. destruct (eval_condition0 _ _ m).
@@ -1047,7 +1047,7 @@ Lemma is_trapping_op_sound:
     eval_operation genv sp op vl m <> None.
 Proof.
   unfold args_of_operation.
-  destruct op; destruct eq_operation; intros; simpl in *; try congruence.
+  destruct op; destruct eq_operation; intros; cbn in *; try congruence.
   all: try (destruct vl as [ | vh1 vl1]; try discriminate).
   all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
   all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
@@ -1101,7 +1101,7 @@ Lemma eval_negate_condition:
   forall cond vl m,
   eval_condition (negate_condition cond) vl m = option_map negb (eval_condition cond vl m).
 Proof.
-  intros. destruct cond; simpl.
+  intros. destruct cond; cbn.
   repeat (destruct vl; auto). apply Val.negate_cmp_bool.
   repeat (destruct vl; auto). apply Val.negate_cmpu_bool.
   repeat (destruct vl; auto). apply Val.negate_cmp_bool.
@@ -1147,7 +1147,7 @@ Lemma eval_shift_stack_addressing:
   eval_addressing ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl =
   eval_addressing ge (Vptr sp (Ptrofs.repr delta)) addr vl.
 Proof.
-  intros. destruct addr; simpl; auto. destruct vl; auto.
+  intros. destruct addr; cbn; auto. destruct vl; auto.
   rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto.
 Qed.
 
@@ -1156,7 +1156,7 @@ Lemma eval_shift_stack_operation:
   eval_operation ge (Vptr sp Ptrofs.zero) (shift_stack_operation delta op) vl m =
   eval_operation ge (Vptr sp (Ptrofs.repr delta)) op vl m.
 Proof.
-  intros. destruct op; simpl; auto. destruct vl; auto.
+  intros. destruct op; cbn; auto. destruct vl; auto.
   rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto.
 Qed.
 
@@ -1183,12 +1183,12 @@ Proof.
   assert (A: forall x n,
              Val.offset_ptr x (Ptrofs.add n (Ptrofs.repr delta)) =
              Val.add (Val.offset_ptr x n) (Vint (Int.repr delta))).
-  { intros; destruct x; simpl; auto. rewrite H1. 
+  { intros; destruct x; cbn; auto. rewrite H1. 
     rewrite Ptrofs.add_assoc. f_equal; f_equal; f_equal. symmetry; auto with ptrofs. }
-  destruct addr; simpl in H; inv H; simpl in *; FuncInv; subst.
+  destruct addr; cbn in H; inv H; cbn in *; FuncInv; subst.
 - rewrite A; auto.
 - unfold Genv.symbol_address. destruct (Genv.find_symbol ge i); auto. 
-  simpl. rewrite H1. f_equal; f_equal; f_equal. symmetry; auto with ptrofs.
+  cbn. rewrite H1. f_equal; f_equal; f_equal. symmetry; auto with ptrofs.
 - rewrite A; auto.
 Qed.
 
@@ -1205,12 +1205,25 @@ Definition is_trivial_op (op: operation) : bool :=
 
 (** Operations that depend on the memory state. *)
 
+Definition cond_depends_on_memory (c: condition) : bool :=
+  match c with
+  | Ccompu _ | Ccompuimm _ _ => negb Archi.ptr64
+  | Ccomplu _ | Ccompluimm _ _  => Archi.ptr64
+  | _ => false
+  end.
+
+Lemma cond_depends_on_memory_correct:
+  forall c args m1 m2,
+  cond_depends_on_memory c = false ->
+  eval_condition c args m1 = eval_condition c args m2.
+Proof.
+  intros; destruct c; cbn; discriminate || reflexivity.
+Qed.
+
 Definition op_depends_on_memory (op: operation) : bool :=
   match op with
-  | Ocmp (Ccompu _) => negb Archi.ptr64
-  | Ocmp (Ccompuimm _ _) => negb Archi.ptr64
-  | Ocmp (Ccomplu _) => Archi.ptr64
-  | Ocmp (Ccompluimm _ _) => Archi.ptr64
+  | Ocmp (Ccompu _ | Ccompuimm _ _) => negb Archi.ptr64
+  | Ocmp (Ccomplu _ | Ccompluimm _ _) => Archi.ptr64
   
   | Osel (Ccompu0 _) _   | Oselimm (Ccompu0 _) _   | Osellimm (Ccompu0 _) _ => negb Archi.ptr64
   | Osel (Ccomplu0 _) _ | Oselimm (Ccomplu0 _) _ | Osellimm (Ccomplu0 _) _ => Archi.ptr64
@@ -1223,21 +1236,51 @@ Lemma op_depends_on_memory_correct:
   op_depends_on_memory op = false ->
   eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
 Proof.
-  intros until m2. destruct op; simpl; try congruence.
-  - destruct cond; simpl; try congruence;
+  intros until m2. destruct op; cbn; try congruence.
+  - destruct cond; cbn; try congruence;
     intros SF; auto; rewrite ? negb_false_iff in SF;
       unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
-  - destruct c0; simpl; try congruence;
+  - destruct c0; cbn; try congruence;
     intros SF; auto; rewrite ? negb_false_iff in SF;
       unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
-  - destruct c0; simpl; try congruence;
+  - destruct c0; cbn; try congruence;
     intros SF; auto; rewrite ? negb_false_iff in SF;
       unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
-  - destruct c0; simpl; try congruence;
+  - destruct c0; cbn; try congruence;
     intros SF; auto; rewrite ? negb_false_iff in SF;
       unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
 Qed.
 
+Lemma cond_valid_pointer_eq:
+  forall cond args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_condition cond args m1 = eval_condition cond args m2.
+Proof.
+  intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence.
+  all: repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op; cbn; try congruence.
+  - intros MEM; destruct cond; cbn; try congruence;
+    repeat (destruct args; cbn; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intros MEM; destruct c0; cbn; try congruence;
+    repeat (destruct args; cbn; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intros MEM; destruct c0; cbn; try congruence;
+    repeat (destruct args; cbn; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intros MEM; destruct c0; cbn; try congruence;
+    repeat (destruct args; cbn; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
@@ -1348,19 +1391,19 @@ Lemma eval_condition_inj:
   eval_condition cond vl1 m1 = Some b ->
   eval_condition cond vl2 m2 = Some b.
 Proof.
-  intros. destruct cond; simpl in H0; FuncInv; InvInject; simpl; auto.
-- inv H3; inv H2; simpl in H0; inv H0; auto.
+  intros. destruct cond; cbn in H0; FuncInv; InvInject; cbn; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
 - eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
-- inv H3; simpl in H0; inv H0; auto.
+- inv H3; cbn in H0; inv H0; auto.
 - eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
-- inv H3; inv H2; simpl in H0; inv H0; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
 - eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
-- inv H3; simpl in H0; inv H0; auto.
+- inv H3; cbn in H0; inv H0; auto.
 - eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
-- inv H3; inv H2; simpl in H0; inv H0; auto.
-- inv H3; inv H2; simpl in H0; inv H0; auto.
-- inv H3; inv H2; simpl in H0; inv H0; auto.
-- inv H3; inv H2; simpl in H0; inv H0; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
 Qed.
 
 Lemma eval_condition0_inj:
@@ -1369,10 +1412,10 @@ Lemma eval_condition0_inj:
   eval_condition0 cond v1 m1 = Some b ->
   eval_condition0 cond v2 m2 = Some b.
 Proof.
-  intros. destruct cond; simpl in H0; FuncInv; InvInject; simpl; auto.
-  - inv H; simpl in *; congruence.
+  intros. destruct cond; cbn in H0; FuncInv; InvInject; cbn; auto.
+  - inv H; cbn in *; congruence.
   - eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
-  - inv H; simpl in *; congruence.
+  - inv H; cbn in *; congruence.
   - eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
 Qed.
 
@@ -1393,248 +1436,244 @@ Lemma eval_operation_inj:
   eval_operation ge1 sp1 op vl1 m1 = Some v1 ->
   exists v2, eval_operation ge2 sp2 op vl2 m2 = Some v2 /\ Val.inject f v1 v2.
 Proof.
-  intros until v1; intros GL; intros. destruct op; simpl in H1; simpl; FuncInv; InvInject; TrivialExists.
+  intros until v1; intros GL; intros. destruct op; cbn in H1; cbn; FuncInv; InvInject; TrivialExists.
   (* addrsymbol *)
-  - apply GL; simpl; auto.
+  - apply GL; cbn; auto.
   (* addrstack *)
   - apply Val.offset_ptr_inject; auto. 
   (* castsigned *)
-  - inv H4; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* add, addimm *)
   - apply Val.add_inject; auto.
   - apply Val.add_inject; auto.
   (* addx, addximm *)
   - apply Val.add_inject; trivial.
-    inv H4; inv H2; simpl; try destruct (Int.ltu _ _); simpl; auto.
-  - inv H4; simpl; trivial.
-    destruct (Int.ltu _ _); simpl; trivial.
+    inv H4; inv H2; cbn; try destruct (Int.ltu _ _); cbn; auto.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu _ _); cbn; trivial.
   (* neg, sub *)
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
   - apply Val.sub_inject; auto.
   (* revsubimm, revsubx, revsubximm *)
-  - inv H4; simpl; trivial.
+  - inv H4; cbn; trivial.
   - apply Val.sub_inject; trivial.
-    inv H4; inv H2; simpl; try destruct (Int.ltu _ _); simpl; auto.
-  - inv H4; simpl; try destruct (Int.ltu _ _); simpl; auto.
+    inv H4; inv H2; cbn; try destruct (Int.ltu _ _); cbn; auto.
+  - inv H4; cbn; try destruct (Int.ltu _ _); cbn; auto.
   (* mul, mulimm, mulhs, mulhu *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
   (* div, divu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero
-              || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
+  - inv H4; inv H3; cbn in H1; inv H1. cbn.
+    destruct (_ || _); inv H2.
     TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H3; cbn in H1; inv H1. cbn.
     destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
   (* mod, modu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero
-                     || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
+  - inv H4; inv H3; cbn in H1; inv H1. cbn.
+    destruct (_ || _); inv H2.
     TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H3; cbn in H1; inv H1. cbn.
     destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
   (* and, andimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* nand, nandimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* or, orimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* nor, norimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* xor, xorimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* nxor, nxorimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* not *)
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
   (* andn, andnimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* orn, ornimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* shl, shlimm *)
-  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
-  - inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
+  - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+  - inv H4; cbn; auto. destruct (Int.ltu n Int.iwordsize); auto.
   (* shr, shrimm *)
-  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
-  - inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
+  - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+  - inv H4; cbn; auto. destruct (Int.ltu n Int.iwordsize); auto.
   (* shru, shruimm *)
-  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
-  - inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
+  - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+  - inv H4; cbn; auto. destruct (Int.ltu n Int.iwordsize); auto.
   (* shrx *)
-  - inv H4; simpl; auto.
-    destruct (Int.ltu n (Int.repr 31)); inv H; simpl; auto.
+  - inv H4; cbn; auto.
+    destruct (Int.ltu n (Int.repr 31)); inv H; cbn; auto.
   (* rorimm *)
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
   (* madd, maddim *)
-  - inv H2; inv H3; inv H4; simpl; auto.
-  - inv H2; inv H4; simpl; auto.
+  - inv H2; inv H3; inv H4; cbn; auto.
+  - inv H2; inv H4; cbn; auto.
   (* msub *)
   - apply Val.sub_inject; auto.
-    inv H3; inv H2; simpl; auto.
+    inv H3; inv H2; cbn; auto.
   (* makelong, highlong, lowlong *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* cast32 *)
-  - inv H4; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* addl, addlimm *)
   - apply Val.addl_inject; auto.
   - apply Val.addl_inject; auto.
   (* addxl, addxlimm *)
   - apply Val.addl_inject; auto.
-    inv H4; simpl; trivial.
-    destruct (Int.ltu _ _); simpl; trivial.
-  - inv H4; simpl; trivial.
-    destruct (Int.ltu _ _); simpl; trivial.
+    inv H4; cbn; trivial.
+    destruct (Int.ltu _ _); cbn; trivial.
+  - inv H4; cbn; trivial.
+    destruct (Int.ltu _ _); cbn; trivial.
   (* negl, subl *)
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
   - apply Val.subl_inject; auto.
-    inv H4; inv H2; simpl; trivial;
-    destruct (Int.ltu _ _); simpl; trivial.
-  - inv H4; simpl; trivial;
-      destruct (Int.ltu _ _); simpl; trivial.
-  - inv H4; simpl; auto.
+    inv H4; inv H2; cbn; trivial;
+    destruct (Int.ltu _ _); cbn; trivial.
+  - inv H4; cbn; trivial;
+      destruct (Int.ltu _ _); cbn; trivial.
+  - inv H4; cbn; auto.
   - apply Val.subl_inject; auto.
   (* mull, mullhs, mullhu *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
   (* divl, divlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero
-              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
+  - inv H4; inv H3; cbn in H1; inv H1. cbn.
+    destruct (_ || _); inv H2.
     TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H3; cbn in H1; inv H1. cbn.
     destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
   (* modl, modlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero
-                     || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
+  - inv H4; inv H3; cbn in H1; inv H1. cbn.
+    destruct (_ || _); inv H2.
     TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H3; cbn in H1; inv H1. cbn.
     destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
   (* andl, andlimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* nandl, nandlimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* orl, orlimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* norl, norlimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* xorl, xorlimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* nxorl, nxorlimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* notl *)
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
   (* andnl, andnlimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* ornl, ornlimm *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; cbn; auto.
   (* shll, shllimm *)
-  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
-  - inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
+  - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  - inv H4; cbn; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
   (* shr, shrimm *)
-  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
-  - inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
+  - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  - inv H4; cbn; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
   (* shru, shruimm *)
-  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
-  - inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
+  - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  - inv H4; cbn; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
   (* shrx *)
-  - inv H4; simpl; auto.
-    destruct (Int.ltu n (Int.repr 63)); simpl; auto.
+  - inv H4; cbn; auto.
+    destruct (Int.ltu n (Int.repr 63)); cbn; auto.
 
   (* maddl, maddlimm *)
   - apply Val.addl_inject; auto.
-    inv H2; inv H3; inv H4; simpl; auto.
+    inv H2; inv H3; inv H4; cbn; auto.
   - apply Val.addl_inject; auto.
-    inv H4; inv H2; simpl; auto.
+    inv H4; inv H2; cbn; auto.
   (* msubl, msublimm *)
   - apply Val.subl_inject; auto.
-    inv H2; inv H3; inv H4; simpl; auto.
+    inv H2; inv H3; inv H4; cbn; auto.
     
   (* negf, absf *)
-  - inv H4; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* addf, subf *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
   (* mulf, divf *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
   (* minf, maxf *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
   (* fmaddf, fmsubf *)
-  - inv H4; inv H3; inv H2; simpl; auto.
-  - inv H4; inv H3; inv H2; simpl; auto.
+  - inv H4; inv H3; inv H2; cbn; auto.
+  - inv H4; inv H3; inv H2; cbn; auto.
   (* negfs, absfs *)
-  - inv H4; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* addfs, subfs *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
   (* mulfs, divfs *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
   (* minfs, maxfs *)
-  - inv H4; inv H2; simpl; auto.
-  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; cbn; auto.
+  - inv H4; inv H2; cbn; auto.
   (* invfs *)
-  - inv H4; simpl; auto.    
+  - inv H4; cbn; auto.    
   (* fmaddfs, fmsubfs *)
-  - inv H4; inv H3; inv H2; simpl; auto.
-  - inv H4; inv H3; inv H2; simpl; auto.
+  - inv H4; inv H3; inv H2; cbn; auto.
+  - inv H4; inv H3; inv H2; cbn; auto.
   (* singleoffloat, floatofsingle *)
-  - inv H4; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* intoffloat, intuoffloat *)
-  - inv H4; simpl; auto. destruct (Float.to_int f0); simpl; auto.
-  - inv H4; simpl; auto. destruct (Float.to_intu f0); simpl; auto.
+  - inv H4; cbn; auto. destruct (Float.to_int f0); cbn; auto.
+  - inv H4; cbn; auto. destruct (Float.to_intu f0); cbn; auto.
   (* intofsingle, intuofsingle *)
-  - inv H4; simpl; auto. destruct (Float32.to_int f0); simpl; auto.
-  - inv H4; simpl; auto. destruct (Float32.to_intu f0); simpl; auto.
+  - inv H4; cbn; auto. destruct (Float32.to_int f0); cbn; auto.
+  - inv H4; cbn; auto. destruct (Float32.to_intu f0); cbn; auto.
   (* singleofint, singleofintu *)
-  - inv H4; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* longoffloat, longuoffloat *)
-  - inv H4; simpl; auto. destruct (Float.to_long f0); simpl; auto.
-  - inv H4; simpl; auto. destruct (Float.to_longu f0); simpl; auto.
+  - inv H4; cbn; auto. destruct (Float.to_long f0); cbn; auto.
+  - inv H4; cbn; auto. destruct (Float.to_longu f0); cbn; auto.
   (* floatoflong, floatoflongu *)
-  - inv H4; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* longofsingle, longuofsingle *)
-  - inv H4; simpl; auto. destruct (Float32.to_long f0); simpl; auto.
-  - inv H4; simpl; auto. destruct (Float32.to_longu f0); simpl; auto.
+  - inv H4; cbn; auto. destruct (Float32.to_long f0); cbn; auto.
+  - inv H4; cbn; auto. destruct (Float32.to_longu f0); cbn; auto.
   (* singleoflong, singleoflongu *)
-  - inv H4; simpl; auto.
-  - inv H4; simpl; auto.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* cmp *)
   - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
     exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
-    destruct b; simpl; constructor.
-    simpl; constructor.
+    destruct b; cbn; constructor.
+    cbn; constructor.
 
  (* extfz *)
   - unfold extfz.
@@ -1664,16 +1703,16 @@ Proof.
   - unfold insf.
     destruct (is_bitfield _ _).
     + inv H4; inv H2; trivial.
-      simpl. destruct (Int.ltu _ _); trivial.
-      simpl. trivial.
+      cbn. destruct (Int.ltu _ _); trivial.
+      cbn. trivial.
     + trivial.
 
  (* insfl *)
   - unfold insfl.
     destruct (is_bitfieldl _ _).
     + inv H4; inv H2; trivial.
-      simpl. destruct (Int.ltu _ _); trivial.
-      simpl. trivial.
+      cbn. destruct (Int.ltu _ _); trivial.
+      cbn. trivial.
     + trivial.
 
  (* Osel *)
@@ -1711,13 +1750,13 @@ Lemma eval_addressing_inj:
   eval_addressing ge1 sp1 addr vl1 = Some v1 ->
   exists v2, eval_addressing ge2 sp2 addr vl2 = Some v2 /\ Val.inject f v1 v2.
 Proof.
-  intros. destruct addr; simpl in H2; simpl; FuncInv; InvInject; TrivialExists.
+  intros. destruct addr; cbn in H2; cbn; FuncInv; InvInject; TrivialExists.
   - apply Val.addl_inject; trivial.
-    destruct v0; destruct v'0; simpl; trivial; destruct (Int.ltu _ _); simpl; trivial; inv H3.
+    destruct v0; destruct v'0; cbn; trivial; destruct (Int.ltu _ _); cbn; trivial; inv H3.
     apply Val.inject_long.
   - apply Val.addl_inject; auto.
   - apply Val.offset_ptr_inject; auto.
-  - apply H; simpl; auto.
+  - apply H; cbn; auto.
   - apply Val.offset_ptr_inject; auto. 
 Qed.
 
@@ -1732,7 +1771,7 @@ Lemma eval_addressing_inj_none:
   eval_addressing ge2 sp2 addr vl2 = None.
 Proof.
   intros until vl2. intros Hglobal Hinjsp Hinjvl.
-  destruct addr; simpl in *.
+  destruct addr; cbn in *.
   1,2: inv Hinjvl; trivial;
        inv H0; trivial;
        inv H2; trivial;
@@ -1856,7 +1895,7 @@ Lemma eval_addressing_lessdef_none:
   eval_addressing genv sp addr vl2 = None.
 Proof.
   intros until vl2. intros Hlessdef Heval1.
-  destruct addr; simpl in *.
+  destruct addr; cbn in *.
   1, 2, 4, 5: inv Hlessdef; trivial;
   inv H0; trivial;
   inv H2; trivial;
@@ -1941,7 +1980,7 @@ Lemma eval_operation_inject:
   /\ Val.inject f v1 v2.
 Proof.
   intros.
-  rewrite eval_shift_stack_operation. simpl.
+  rewrite eval_shift_stack_operation. cbn.
   eapply eval_operation_inj with (sp1 := Vptr sp1 Ptrofs.zero) (m1 := m1); eauto.
   intros; eapply Mem.valid_pointer_inject_val; eauto.
   intros; eapply Mem.weak_valid_pointer_inject_val; eauto.
diff --git a/kvx/OpWeights.ml b/kvx/OpWeights.ml
new file mode 100644
index 00000000..23c2e5d3
--- /dev/null
+++ b/kvx/OpWeights.ml
@@ -0,0 +1,115 @@
+open Op;;
+open PostpassSchedulingOracle;;
+open PrepassSchedulingOracleDeps;;
+
+module KV3 =
+  struct
+let resource_bounds = PostpassSchedulingOracle.resource_bounds;;
+let nr_non_pipelined_units = 0;;
+
+let rec nlist_rec x l = function
+  | 0 -> l
+  | n when n > 0 -> nlist_rec x (x :: l) (n-1)
+  | _ -> failwith "nlist_rec";;
+let nlist x n = nlist_rec x [] n;;
+
+let bogus_register = Machregs.R0;;
+let bogus_inputs n = nlist bogus_register n;;
+
+let insns_of_op (op : operation) (nargs : int) =
+  match Asmblockgen.transl_op op
+          (bogus_inputs nargs) bogus_register [] with
+  | Errors.Error msg -> failwith "OpWeights.insns_of_op"
+  | Errors.OK insns -> insns;;
+
+let insn_of_op op nargs =
+  match insns_of_op op nargs with
+  | [] -> failwith "OpWeights.insn_of_op"
+  | h::_ -> h;;
+
+let insns_of_cond (cond : condition) (nargs : int) =
+  match Asmblockgen.transl_cond_op cond
+           Asmvliw.GPR0 (bogus_inputs nargs) [] with
+  | Errors.Error msg -> failwith "OpWeights.insns_of_cond"
+  | Errors.OK insns -> insns;;
+
+let insn_of_cond cond nargs =
+  match insns_of_cond cond nargs with
+  | [] -> failwith "OpWeights.insn_of_cond"
+  | h::_ -> h;;
+
+let insns_of_load trap chunk addressing (nargs : int) =
+  match Asmblockgen.transl_load trap chunk addressing
+          (bogus_inputs nargs) bogus_register [] with
+  | Errors.Error msg -> failwith "OpWeights.insns_of_load"
+  | Errors.OK insns -> insns;;
+
+let insn_of_load trap chunk addressing nargs =
+  match insns_of_load trap chunk addressing nargs with
+  | [] -> failwith "OpWeights.insn_of_load"
+  | h::_ -> h;;
+
+let insns_of_store chunk addressing (nargs : int) =
+  match Asmblockgen.transl_store chunk addressing
+          (bogus_inputs nargs) bogus_register [] with
+  | Errors.Error msg -> failwith "OpWeights.insns_of_store"
+  | Errors.OK insns -> insns;;
+
+let insn_of_store chunk addressing nargs =
+  match insns_of_store chunk addressing nargs with
+  | [] -> failwith "OpWeights.insn_of_store"
+  | h::_ -> h;;
+
+let latency_of_op (op : operation) (nargs : int) =
+  let insn = insn_of_op op nargs in
+  let record = basic_rec insn in
+  let latency = real_inst_to_latency record.inst in
+  latency;;
+
+let resources_of_op (op : operation) (nargs : int) =
+  let insn = insn_of_op op nargs in
+  let record = basic_rec insn in
+  rec_to_usage record;;
+  
+let non_pipelined_resources_of_op (op : operation) (nargs : int) = [| |]
+
+let resources_of_cond (cond : condition) (nargs : int) =
+  let insn = insn_of_cond cond nargs in
+  let record = basic_rec insn in
+  rec_to_usage record;;
+
+let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+let latency_of_call _ _ = 6;;
+
+let resources_of_load trap chunk addressing nargs =
+  let insn = insn_of_load trap chunk addressing nargs in
+  let record = basic_rec insn in
+  rec_to_usage record;;
+
+let resources_of_store chunk addressing nargs =
+  let insn = insn_of_store chunk addressing nargs in
+  let record = basic_rec insn in
+  rec_to_usage record;;
+
+let resources_of_call _ _ = resource_bounds;;
+let resources_of_builtin _ = resource_bounds;;
+  end;;
+
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "kv3" | "" ->
+     {
+       pipelined_resource_bounds = KV3.resource_bounds;
+       nr_non_pipelined_units = KV3.nr_non_pipelined_units;
+       latency_of_op = KV3.latency_of_op;
+       resources_of_op = KV3.resources_of_op;
+       non_pipelined_resources_of_op = KV3.non_pipelined_resources_of_op;
+       latency_of_load = KV3.latency_of_load;
+       resources_of_load = KV3.resources_of_load;
+       resources_of_store = KV3.resources_of_store;
+       resources_of_cond = KV3.resources_of_cond;
+       latency_of_call = KV3.latency_of_call;
+       resources_of_call = KV3.resources_of_call;
+       resources_of_builtin = KV3.resources_of_builtin
+     }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/kvx/Peephole.v b/kvx/Peephole.v
index 35f4bbd9..5adb823b 100644
--- a/kvx/Peephole.v
+++ b/kvx/Peephole.v
@@ -153,6 +153,6 @@ Program Definition optimize_bblock (bb : bblock) :=
      exit := exit bb |}.
 Next Obligation.
   destruct (wf_bblockb (optimize_body (body bb))) eqn:Rwf.
-  - rewrite Rwf. simpl. trivial.
+  - rewrite Rwf. cbn. trivial.
   - exact (correct bb).
 Qed.
diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml
new file mode 120000
index 00000000..912e9ffa
--- /dev/null
+++ b/kvx/PrepassSchedulingOracle.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracle.ml
+\ No newline at end of file
diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml
new file mode 120000
index 00000000..1e955b85
--- /dev/null
+++ b/kvx/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracleDeps.ml
+\ No newline at end of file
diff --git a/kvx/RTLpathSE_simplify.v b/kvx/RTLpathSE_simplify.v
new file mode 120000
index 00000000..55bf0e52
--- /dev/null
+++ b/kvx/RTLpathSE_simplify.v
@@ -0,0 +1 @@
+../aarch64/RTLpathSE_simplify.v
+\ No newline at end of file
diff --git a/kvx/SelectOp.vp b/kvx/SelectOp.vp
index 9e5d45a0..aa241c1e 100644
--- a/kvx/SelectOp.vp
+++ b/kvx/SelectOp.vp
@@ -103,8 +103,14 @@ Nondetfunction select0 (ty : typ) (cond0 : condition0) (e1 e2 e3: expr) :=
   | _, _, _ => (Eop (Osel cond0 ty) (e1 ::: e2 ::: e3 ::: Enil))
   end.
 
+Definition same_expr_pure (e1 e2: expr) :=
+  match e1, e2 with
+  | Evar v1, Evar v2 => if ident_eq v1 v2 then true else false
+  | _, _ => false
+  end.
+
 Definition select (ty : typ) (cond : condition) (args : exprlist) (e1 e2: expr)  : option expr :=
-  Some(
+  Some (if same_expr_pure e1 e2 then e1 else
   match cond_to_condition0 cond args with
   | None => select0 ty (Ccomp0 Cne) e1 e2 (Eop (Ocmp cond) args)
   | Some(cond0, ec) => select0 ty cond0 e1 e2 ec
@@ -356,12 +362,6 @@ Nondetfunction orimm (n1: int) (e2: expr) :=
        | _ => Eop (Oorimm n1) (e2:::Enil)
   end.
 
-Definition same_expr_pure (e1 e2: expr) :=
-  match e1, e2 with
-  | Evar v1, Evar v2 => if ident_eq v1 v2 then true else false
-  | _, _ => false
-  end.
-
 Nondetfunction or (e1: expr) (e2: expr) :=
   match e1, e2 with
   | Eop (Ointconst n1) Enil, t2 => orimm n1 t2
@@ -704,7 +704,6 @@ Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr
   | BI_fmax => Some (Eop Omaxf args)
   | BI_fminf => Some (Eop Ominfs args)
   | BI_fmaxf => Some (Eop Omaxfs args)
-  | BI_fabsf => Some (Eop Oabsfs args)
   | BI_fma => gen_fma args
   | BI_fmaf => gen_fmaf args
   end.
diff --git a/kvx/SelectOpproof.v b/kvx/SelectOpproof.v
index d1d0b95c..7a301929 100644
--- a/kvx/SelectOpproof.v
+++ b/kvx/SelectOpproof.v
@@ -1199,7 +1199,6 @@ Qed.
 Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
 Proof.
   red; intros until x. unfold cast8unsigned.
-
   rewrite Val.zero_ext_and. apply eval_andimm. compute; auto. discriminate.
 Qed.
 
@@ -1548,6 +1547,15 @@ Proof.
   intros until b.
   intro Hop; injection Hop; clear Hop; intro; subst a.
   intros HeL He1 He2 HeC.
+  destruct same_expr_pure eqn:SAME.
+  {
+    destruct (eval_same_expr a1 a2 le v1 v2 SAME He1 He2) as [EQ1 EQ2].
+    subst a2. subst v2.
+    exists v1; split; trivial.
+    cbn.
+    rewrite if_same.
+    apply Val.lessdef_normalize.
+  }
   unfold cond_to_condition0.
   destruct (cond_to_condition0_match cond al).
   {
diff --git a/kvx/Stacklayout.v b/kvx/Stacklayout.v
index 740c0cf2..1c2e1d56 100644
--- a/kvx/Stacklayout.v
+++ b/kvx/Stacklayout.v
@@ -63,7 +63,7 @@ Lemma frame_env_separated:
        ** P.
 Proof.
 Local Opaque Z.add Z.mul sepconj range'.
-  intros; simpl.
+  intros; cbn.
   set (w := if Archi.ptr64 then 8 else 4).
   set (olink := align (4 * b.(bound_outgoing)) w).
   set (oretaddr := olink + w).
@@ -105,7 +105,7 @@ Lemma frame_env_range:
   let fe := make_env b in
   0 <= fe_stack_data fe /\ fe_stack_data fe + bound_stack_data b <= fe_size fe.
 Proof.
-  intros; simpl.
+  intros; cbn.
   set (w := if Archi.ptr64 then 8 else 4).
   set (olink := align (4 * b.(bound_outgoing)) w).
   set (oretaddr := olink + w).
@@ -133,7 +133,7 @@ Lemma frame_env_aligned:
   /\ (align_chunk Mptr | fe_ofs_link fe)
   /\ (align_chunk Mptr | fe_ofs_retaddr fe).
 Proof.
-  intros; simpl.
+  intros; cbn.
   set (w := if Archi.ptr64 then 8 else 4).
   set (olink := align (4 * b.(bound_outgoing)) w).
   set (oretaddr := olink + w).
diff --git a/kvx/TargetPrinter.ml b/kvx/TargetPrinter.ml
index dfafc137..5b6230ca 100644
--- a/kvx/TargetPrinter.ml
+++ b/kvx/TargetPrinter.ml
@@ -414,6 +414,9 @@ module Target (*: TARGET*) =
       | Psemi -> fprintf oc ";;\n"
 
       | Pclzll (rd, rs) -> fprintf oc "	clzd %a = %a\n" ireg rd ireg rs
+      | Pclzw (rd, rs) -> fprintf oc "	clzw %a = %a\n" ireg rd ireg rs
+      | Pctzll (rd, rs) -> fprintf oc "	ctzd %a = %a\n" ireg rd ireg rs
+      | Pctzw (rd, rs) -> fprintf oc "	ctzw %a = %a\n" ireg rd ireg rs
       | Pstsud (rd, rs1, rs2) -> fprintf oc "	stsud %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
 
 
diff --git a/kvx/ValueAOp.v b/kvx/ValueAOp.v
index e634fdc0..87554258 100644
--- a/kvx/ValueAOp.v
+++ b/kvx/ValueAOp.v
@@ -16,87 +16,6 @@
 Require Import Coqlib Compopts.
 Require Import AST Integers Floats Values Memory Globalenvs.
 Require Import Op ExtValues ExtFloats RTL ValueDomain.
-
-Definition intoffloat_total (x: aval) :=
-  match x with
-  | F f =>
-      match Float.to_int f with
-      | Some i => I i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition intuoffloat_total (x: aval) :=
-  match x with
-  | F f =>
-      match Float.to_intu f with
-      | Some i => I i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition intofsingle_total (x: aval) :=
-  match x with
-  | FS f =>
-      match Float32.to_int f with
-      | Some i => I i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition intuofsingle_total (x: aval) :=
-  match x with
-  | FS f =>
-      match Float32.to_intu f with
-      | Some i => I i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition longoffloat_total (x: aval) :=
-  match x with
-  | F f =>
-      match Float.to_long f with
-      | Some i => L i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition longuoffloat_total (x: aval) :=
-  match x with
-  | F f =>
-      match Float.to_longu f with
-      | Some i => L i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition longofsingle_total (x: aval) :=
-  match x with
-  | FS f =>
-      match Float32.to_long f with
-      | Some i => L i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
-Definition longuofsingle_total (x: aval) :=
-  match x with
-  | FS f =>
-      match Float32.to_longu f with
-      | Some i => L i
-      | None => ntop
-      end
-  | _ => ntop1 x
-  end.
-
 Definition minf := binop_float ExtFloat.min.
 Definition maxf := binop_float ExtFloat.max.
 Definition minfs := binop_single ExtFloat32.min.
@@ -400,196 +319,6 @@ Hypothesis GENV: genv_match bc ge.
 Variable sp: block.
 Hypothesis STACK: bc sp = BCstack.
 
-Lemma intoffloat_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.intoffloat v)) (intoffloat_total x).
-Proof.
-  unfold Val.intoffloat, intoffloat_total. intros.
-  inv MATCH; simpl in *; try constructor.
-  all: destruct (Float.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve intoffloat_total_sound : va.
-
-Lemma intuoffloat_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.intuoffloat v)) (intuoffloat_total x).
-Proof.
-  unfold Val.intoffloat, intoffloat_total. intros.
-  inv MATCH; simpl in *; try constructor.
-  all: destruct (Float.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve intuoffloat_total_sound : va.
-
-Lemma intofsingle_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.intofsingle v)) (intofsingle_total x).
-Proof.
-  unfold Val.intofsingle, intofsingle_total. intros.
-  inv MATCH; simpl in *; try constructor.
-  all: destruct (Float32.to_int f) as [i|] eqn:E; simpl; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve intofsingle_total_sound : va.
-
-Lemma intuofsingle_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.intuofsingle v)) (intuofsingle_total x).
-Proof.
-  unfold Val.intofsingle, intofsingle_total. intros.
-  inv MATCH; simpl in *; try constructor.
-  all: destruct (Float32.to_intu f) as [i|] eqn:E; simpl; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve intuofsingle_total_sound : va.
-
-Lemma singleofint_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.singleofint v)) (singleofint x).
-Proof.
-  unfold Val.singleofint, singleofint; intros.
-  inv H; simpl.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve singleofint_total_sound : va.
-
-Lemma singleofintu_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.singleofintu v)) (singleofintu x).
-Proof.
-  unfold Val.singleofintu, singleofintu; intros.
-  inv H; simpl.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve singleofintu_total_sound : va.
-
-Lemma longoffloat_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.longoffloat v)) (longoffloat_total x).
-Proof.
-  unfold Val.longoffloat, longoffloat_total. intros.
-  inv MATCH; simpl in *; try constructor.
-  all: destruct (Float.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve longoffloat_total_sound : va.
-
-Lemma longuoffloat_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.longuoffloat v)) (longuoffloat_total x).
-Proof.
-  unfold Val.longoffloat, longoffloat_total. intros.
-  inv MATCH; simpl in *; try constructor.
-  all: destruct (Float.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve longuoffloat_total_sound : va.
-
-Lemma longofsingle_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.longofsingle v)) (longofsingle_total x).
-Proof.
-  unfold Val.longofsingle, longofsingle_total. intros.
-  inv MATCH; simpl in *; try constructor.
-  all: destruct (Float32.to_long f) as [i|] eqn:E; simpl; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve longofsingle_total_sound : va.
-
-Lemma longuofsingle_total_sound:
-  forall v x
-         (MATCH : vmatch bc v x),
-    vmatch bc (Val.maketotal (Val.longuofsingle v)) (longuofsingle_total x).
-Proof.
-  unfold Val.longofsingle, longofsingle_total. intros.
-  inv MATCH; simpl in *; try constructor.
-  all: destruct (Float32.to_longu f) as [i|] eqn:E; simpl; [auto with va | constructor].
-  unfold ntop1, provenance.
-  destruct (va_strict tt); constructor.
-Qed.
-
-Hint Resolve longuofsingle_total_sound : va.
-
-Lemma singleoflong_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.singleoflong v)) (singleoflong x).
-Proof.
-  unfold Val.singleoflong, singleoflong; intros.
-  inv H; simpl.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve singleoflong_total_sound : va.
-
-Lemma singleoflongu_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.singleoflongu v)) (singleoflongu x).
-Proof.
-  unfold Val.singleoflongu, singleoflongu; intros.
-  inv H; simpl.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve singleoflongu_total_sound : va.
-
-Lemma floatoflong_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.floatoflong v)) (floatoflong x).
-Proof.
-  unfold Val.floatoflong, floatoflong; intros.
-  inv H; simpl.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve floatoflong_total_sound : va.
-
-Lemma floatoflongu_total_sound:
-  forall v x, vmatch bc v x ->
-              vmatch bc (Val.maketotal (Val.floatoflongu v)) (floatoflongu x).
-Proof.
-  unfold Val.floatoflongu, floatoflongu; intros.
-  inv H; simpl.
-  all: auto with va.
-  all: unfold ntop1, provenance.
-  all: try constructor.
-Qed.
-
-Hint Resolve floatoflongu_total_sound : va.
-
 Lemma minf_sound:
   forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.minf v w) (minf x y).
 Proof.
@@ -620,7 +349,7 @@ Proof.
   intros v x;
   intro MATCH;
   inversion MATCH;
-  simpl;
+  cbn;
   constructor.
 Qed.
 
@@ -632,9 +361,9 @@ Lemma triple_op_float_sound:
 Proof.
   intros until z.
   intros Hax Hby Hcz.
-  inv Hax; simpl; try constructor;
-  inv Hby; simpl; try constructor;
-  inv Hcz; simpl; try constructor.
+  inv Hax; cbn; try constructor;
+  inv Hby; cbn; try constructor;
+  inv Hcz; cbn; try constructor.
 Qed.
 
 Lemma triple_op_single_sound:
@@ -645,9 +374,9 @@ Lemma triple_op_single_sound:
 Proof.
   intros until z.
   intros Hax Hby Hcz.
-  inv Hax; simpl; try constructor;
-  inv Hby; simpl; try constructor;
-  inv Hcz; simpl; try constructor.
+  inv Hax; cbn; try constructor;
+  inv Hby; cbn; try constructor;
+  inv Hcz; cbn; try constructor.
 Qed.
 
 Lemma fmaddf_sound :
@@ -691,9 +420,9 @@ Proof.
   intros until aargs; intros VM. inv VM.
   destruct cond; auto with va.
   inv H0.
-  destruct cond; simpl; eauto with va.
+  destruct cond; cbn; eauto with va.
   inv H2.
-  destruct cond; simpl; eauto with va.
+  destruct cond; cbn; eauto with va.
   destruct cond; auto with va.
 Qed.
 
@@ -703,7 +432,7 @@ Theorem eval_static_condition0_sound:
   cmatch (eval_condition0 cond varg m) (eval_static_condition0 cond aarg).
 Proof.
   intros until aarg; intro VM.
-  destruct cond; simpl; eauto with va.
+  destruct cond; cbn; eauto with va.
 Qed.
 
 Lemma symbol_address_sound:
@@ -812,19 +541,9 @@ Proof.
     + eauto with va.
     + destruct n; destruct shift; reflexivity.
   - (* shrx *)
-    inv H1; simpl; try constructor.
-    all: destruct Int.ltu; [simpl | constructor; fail].
+    inv H1; cbn; try constructor.
+    all: destruct Int.ltu; [cbn | constructor; fail].
     all: auto with va.
-  - replace (match Val.shll a1 (Vint (int_of_shift1_4 shift)) with
-    | Vlong n2 => Vlong (Int64.add n n2)
-    | Vptr b2 ofs2 =>
-        if Archi.ptr64
-        then Vptr b2 (Ptrofs.add ofs2 (Ptrofs.of_int64 n))
-        else Vundef
-    | _ => Vundef
-             end) with (Val.addl (Vlong n) (Val.shll a1 (Vint (int_of_shift1_4 shift)))).
-    + eauto with va.
-    + destruct a1; destruct shift; reflexivity.
   - inv H1; constructor.
   - replace (match Val.shll a1 (Vint (int_of_shift1_4 shift)) with
     | Vlong n2 => Vlong (Int64.sub n n2)
@@ -832,10 +551,6 @@ Proof.
              end) with (Val.subl (Vlong n) (Val.shll a1 (Vint (int_of_shift1_4 shift)))).
     + eauto with va.
     + destruct a1; destruct shift; reflexivity.
-  - (* shrxl *)
-    inv H1; simpl; try constructor.
-    all: destruct Int.ltu; [simpl | constructor; fail].
-    all: auto with va.
   - apply of_optbool_sound. eapply eval_static_condition_sound; eauto.
 
   (* extfz *)
@@ -865,12 +580,12 @@ Proof.
   (* insf *)
   - unfold insf, eval_static_insf.
     destruct (is_bitfield _ _).
-    + inv H1; inv H0; simpl; try constructor; destruct (Int.ltu _ _); simpl; constructor.
+    + inv H1; inv H0; cbn; try constructor; destruct (Int.ltu _ _); cbn; constructor.
     + constructor.
   (* insfl *)
   - unfold insfl, eval_static_insfl.
     destruct (is_bitfieldl _ _).
-    + inv H1; inv H0; simpl; try constructor; destruct (Int.ltu _ _); simpl; constructor.
+    + inv H1; inv H0; cbn; try constructor; destruct (Int.ltu _ _); cbn; constructor.
     + constructor.
     (* select *)
   - apply select_sound; auto. eapply eval_static_condition0_sound; eauto.
diff --git a/lib/Camlcoq.ml b/lib/Camlcoq.ml
index 66322efb..af65b28e 100644
--- a/lib/Camlcoq.ml
+++ b/lib/Camlcoq.ml
@@ -282,23 +282,96 @@ type atom = positive
 let atom_of_string = (Hashtbl.create 17 : (string, atom) Hashtbl.t)
 let string_of_atom = (Hashtbl.create 17 : (atom, string) Hashtbl.t)
 let next_atom = ref Coq_xH
+let use_canonical_atoms = ref false
+
+(* If [use_canonical_atoms] is false, strings are numbered from 1 up
+   in the order in which they are encountered.  This produces small
+   numbers, and is therefore efficient, but the number for a given
+   string may differ between the compilation of different units.
+
+   If [use_canonical_atoms] is true, strings are Huffman-encoded as bit
+   sequences, which are then encoded as positive numbers.  The same
+   string is always represented by the same number in all compilation
+   units.  However, the numbers are bigger than in the first
+   implementation.  Also, this places a hard limit on the number of
+   fresh identifiers that can be generated starting with
+   [first_unused_ident]. *)
+
+let rec append_bits_pos nbits n p =
+  if nbits <= 0 then p else
+  if n land 1 = 0
+  then Coq_xO (append_bits_pos (nbits - 1) (n lsr 1) p)
+  else Coq_xI (append_bits_pos (nbits - 1) (n lsr 1) p)
+
+(* The encoding of strings as bit sequences is optimized for C identifiers:
+   - numbers are encoded as a 6-bit integer between 0 and 9
+   - lowercase letters are encoded as a 6-bit integer between 10 and 35
+   - uppercase letters are encoded as a 6-bit integer between 36 and 61
+   - the underscore character is encoded as the 6-bit integer 62
+   - all other characters are encoded as 6 "one" bits followed by
+     the 8-bit encoding of the character. *)
+
+let append_char_pos c p =
+  match c with
+  | '0'..'9' -> append_bits_pos 6 (Char.code c - Char.code '0') p
+  | 'a'..'z' -> append_bits_pos 6 (Char.code c - Char.code 'a' + 10) p
+  | 'A'..'Z' -> append_bits_pos 6 (Char.code c - Char.code 'A' + 36) p
+  | '_'      -> append_bits_pos 6 62 p
+  | _        -> append_bits_pos 6 63 (append_bits_pos 8 (Char.code c) p)
+
+(* The empty string is represented as the positive "1", that is, [xH]. *)
+
+let pos_of_string s =
+  let rec encode i accu =
+    if i < 0 then accu else encode (i - 1) (append_char_pos s.[i] accu)
+  in encode (String.length s - 1) Coq_xH
+
+let fresh_atom () =
+  let a = !next_atom in
+  next_atom := Pos.succ !next_atom;
+  a
 
 let intern_string s =
   try
     Hashtbl.find atom_of_string s
   with Not_found ->
-    let a = !next_atom in
-    next_atom := Pos.succ !next_atom;
+    let a =
+      if !use_canonical_atoms then pos_of_string s else fresh_atom () in
     Hashtbl.add atom_of_string s a;
     Hashtbl.add string_of_atom a s;
     a
+
 let extern_atom a =
   try
     Hashtbl.find string_of_atom a
   with Not_found ->
     Printf.sprintf "$%d" (P.to_int a)
 
-let first_unused_ident () = !next_atom
+(* Ignoring the terminating "1" bit, canonical encodings of strings can
+   be viewed as lists of bits, formed by concatenation of 6-bit fragments
+   (for letters, numbers, and underscore) and 14-bit fragments (for other
+   characters).  Hence, not all positive numbers are canonical encodings:
+   only those whose log2 is of the form [6n + 14m].
+
+   Here are the first intervals of positive numbers corresponding to strings:
+   - [1, 1] for the empty string
+   - [2^6, 2^7-1] for one "compact" character
+   - [2^12, 2^13-1] for two "compact" characters
+   - [2^14, 2^14-1] for one "escaped" character
+
+   Hence, between 2^7 and 2^12 - 1, we have 3968 consecutive positive
+   numbers that cannot be the encoding of a string.  These are the positive
+   numbers we'll use as temporaries in the SimplExpr pass if canonical
+   atoms are in use.
+
+   If short atoms are used, we just number the temporaries consecutively
+   starting one above the last generated atom.
+*)
+
+let first_unused_ident () =
+  if !use_canonical_atoms
+  then P.of_int 128
+  else !next_atom
 
 (* Strings *)
 
diff --git a/driver/Commandline.ml b/lib/Commandline.ml
index 672ed834..672ed834 100644
--- a/driver/Commandline.ml
+++ b/lib/Commandline.ml
diff --git a/driver/Commandline.mli b/lib/Commandline.mli
index 8bb6f18f..8bb6f18f 100644
--- a/driver/Commandline.mli
+++ b/lib/Commandline.mli
diff --git a/lib/Coqlib.v b/lib/Coqlib.v
index 02c5d07f..7a7261a3 100644
--- a/lib/Coqlib.v
+++ b/lib/Coqlib.v
@@ -1053,6 +1053,41 @@ Proof.
   induction 1; intros. auto. apply IHis_tail. eapply is_tail_cons_left; eauto.
 Qed.
 
+Lemma is_tail_app A (l1: list A): forall l2, is_tail l2 (l1 ++ l2).
+Proof.
+  induction l1; cbn; auto with coqlib.
+Qed.
+Hint Resolve is_tail_app: coqlib.
+
+Lemma is_tail_app_inv A (l1: list A): forall l2 l3, is_tail (l1 ++ l2) l3 -> is_tail l2 l3.
+Proof.
+  induction l1; cbn; auto with coqlib.
+  intros l2 l3 H; inversion H; eauto with coqlib.
+Qed.
+Hint Resolve is_tail_app_inv: coqlib.
+
+Lemma is_tail_app_right A (l2 l1: list A): is_tail l1 (l2++l1).
+Proof.
+  intros; eauto with coqlib.
+Qed.
+
+Lemma is_tail_app_def A (l1 l2: list A):
+  is_tail l1 l2 -> exists l3, l2 = l3 ++ l1.
+Proof.
+  induction 1 as [|x l1 l2]; simpl.
+  - exists nil; simpl; auto.
+  - destruct IHis_tail as (l3 & EQ); rewrite EQ.
+    exists (x::l3); simpl; auto.
+Qed.
+
+Lemma is_tail_bound A (l1 l2: list A):
+  is_tail l1 l2 -> (length l1 <= length l2)%nat.
+Proof.
+  intros H; destruct (is_tail_app_def H) as (l3 & EQ).
+  subst; rewrite app_length.
+  omega.
+Qed.
+
 (** [list_forall2 P [x1 ... xN] [y1 ... yM]] holds iff [N = M] and
   [P xi yi] holds for all [i]. *)
 
@@ -1325,3 +1360,9 @@ Lemma nlist_forall2_imply:
 Proof.
   induction 1; simpl; intros; constructor; auto.
 Qed.
+
+Lemma if_same : forall {T : Type} (b : bool) (x : T),
+    (if b then x else x) = x.
+Proof.
+  destruct b; trivial.
+Qed.
diff --git a/lib/Floats.v b/lib/Floats.v
index 272efa52..ac67b88c 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -17,11 +17,11 @@
 (** Formalization of floating-point numbers, using the Flocq library. *)
 
 Require Import Coqlib Zbits Integers Axioms.
-(*From Flocq*)
-Require Import Binary Bits Core.
+From Flocq Require Import Binary Bits Core.
 Require Import IEEE754_extra.
 Require Import Program.
 Require Archi.
+Import ListNotations.
 
 Close Scope R_scope.
 Open Scope Z_scope.
diff --git a/lib/IEEE754_extra.v b/lib/IEEE754_extra.v
index c23149be..18313ec1 100644
--- a/lib/IEEE754_extra.v
+++ b/lib/IEEE754_extra.v
@@ -17,11 +17,11 @@
 (** Additional operations and proofs about IEEE-754 binary
     floating-point numbers, on top of the Flocq library. *)
 
+From Flocq Require Import Core Digits Operations Round Bracket Sterbenz
+                          Binary Round_odd.
 Require Import Psatz.
 Require Import Bool.
 Require Import Eqdep_dec.
-(*From Flocq *)
-Require Import Core Digits Operations Round Bracket Sterbenz Binary Round_odd.
 
 Local Open Scope Z_scope.
 
diff --git a/kvx/abstractbb/Impure/ImpConfig.v b/lib/Impure/ImpConfig.v
index dd9785b5..dd9785b5 100644
--- a/kvx/abstractbb/Impure/ImpConfig.v
+++ b/lib/Impure/ImpConfig.v
diff --git a/kvx/abstractbb/Impure/ImpCore.v b/lib/Impure/ImpCore.v
index 508b3f19..508b3f19 100644
--- a/kvx/abstractbb/Impure/ImpCore.v
+++ b/lib/Impure/ImpCore.v
diff --git a/kvx/abstractbb/Impure/ImpExtern.v b/lib/Impure/ImpExtern.v
index 8fb3cf3b..8fb3cf3b 100644
--- a/kvx/abstractbb/Impure/ImpExtern.v
+++ b/lib/Impure/ImpExtern.v
diff --git a/kvx/abstractbb/Impure/ImpHCons.v b/lib/Impure/ImpHCons.v
index 637116cc..637116cc 100644
--- a/kvx/abstractbb/Impure/ImpHCons.v
+++ b/lib/Impure/ImpHCons.v
diff --git a/kvx/abstractbb/Impure/ImpIO.v b/lib/Impure/ImpIO.v
index 6c02c395..6c02c395 100644
--- a/kvx/abstractbb/Impure/ImpIO.v
+++ b/lib/Impure/ImpIO.v
diff --git a/kvx/abstractbb/Impure/ImpLoops.v b/lib/Impure/ImpLoops.v
index 33376c19..33376c19 100644
--- a/kvx/abstractbb/Impure/ImpLoops.v
+++ b/lib/Impure/ImpLoops.v
diff --git a/kvx/abstractbb/Impure/ImpMonads.v b/lib/Impure/ImpMonads.v
index f01a2755..f01a2755 100644
--- a/kvx/abstractbb/Impure/ImpMonads.v
+++ b/lib/Impure/ImpMonads.v
diff --git a/kvx/abstractbb/Impure/ImpPrelude.v b/lib/Impure/ImpPrelude.v
index de4c7973..de4c7973 100644
--- a/kvx/abstractbb/Impure/ImpPrelude.v
+++ b/lib/Impure/ImpPrelude.v
diff --git a/kvx/abstractbb/Impure/LICENSE b/lib/Impure/LICENSE
index 65c5ca88..65c5ca88 100644
--- a/kvx/abstractbb/Impure/LICENSE
+++ b/lib/Impure/LICENSE
diff --git a/kvx/abstractbb/Impure/README.md b/lib/Impure/README.md
index 2b19d14a..2b19d14a 100644
--- a/kvx/abstractbb/Impure/README.md
+++ b/lib/Impure/README.md
diff --git a/kvx/abstractbb/Impure/ocaml/ImpHConsOracles.ml b/lib/Impure/ocaml/ImpHConsOracles.ml
index 2b66899b..68a33a91 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpHConsOracles.ml
+++ b/lib/Impure/ocaml/ImpHConsOracles.ml
@@ -38,12 +38,18 @@ let xhCons (type a) (hp:a hashP) =
   let t = MyHashtbl.create 1000 in
   let logs = ref [] in
   {
-   hC = (fun (k:a hashinfo) ->
+    hC = (fun (k:a hashinfo) ->
+     (* DEBUG:
+     Printf.printf "*in %d -- look for hcodes= " (Obj.magic t);
+     List.iter (fun i -> Printf.printf "%d " i) k.hcodes;
+     print_newline();
+     *)
      match MyHashtbl.find_opt t k with
      | Some d -> d
-     | None -> (*print_string "+";*)
-        let d = hp.set_hid k.hdata (MyHashtbl.length t) in
-        MyHashtbl.add t {k with hdata = d } d; d);
+     | None ->
+         (* DEBUG: Printf.printf "*in %d -- new hid:%d" (Obj.magic t) (MyHashtbl.length t); print_newline(); *)
+         let d = hp.set_hid k.hdata (MyHashtbl.length t) in
+         MyHashtbl.add t {k with hdata = d } d; d);
    next_log = (fun info -> logs := (MyHashtbl.length t, info)::(!logs));
    next_hid = (fun () -> MyHashtbl.length t);
    remove = (fun (x:a hashinfo) -> MyHashtbl.remove t x);
diff --git a/kvx/abstractbb/Impure/ocaml/ImpHConsOracles.mli b/lib/Impure/ocaml/ImpHConsOracles.mli
index 5075d176..5075d176 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpHConsOracles.mli
+++ b/lib/Impure/ocaml/ImpHConsOracles.mli
diff --git a/kvx/abstractbb/Impure/ocaml/ImpIOOracles.ml b/lib/Impure/ocaml/ImpIOOracles.ml
index 9e63c12d..9e63c12d 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpIOOracles.ml
+++ b/lib/Impure/ocaml/ImpIOOracles.ml
diff --git a/kvx/abstractbb/Impure/ocaml/ImpIOOracles.mli b/lib/Impure/ocaml/ImpIOOracles.mli
index 6064286a..6064286a 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpIOOracles.mli
+++ b/lib/Impure/ocaml/ImpIOOracles.mli
diff --git a/kvx/abstractbb/Impure/ocaml/ImpLoopOracles.ml b/lib/Impure/ocaml/ImpLoopOracles.ml
index cb7625e5..cb7625e5 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpLoopOracles.ml
+++ b/lib/Impure/ocaml/ImpLoopOracles.ml
diff --git a/kvx/abstractbb/Impure/ocaml/ImpLoopOracles.mli b/lib/Impure/ocaml/ImpLoopOracles.mli
index 194696a1..194696a1 100644
--- a/kvx/abstractbb/Impure/ocaml/ImpLoopOracles.mli
+++ b/lib/Impure/ocaml/ImpLoopOracles.mli
diff --git a/lib/IterList.v b/lib/IterList.v
new file mode 100644
index 00000000..bde47068
--- /dev/null
+++ b/lib/IterList.v
@@ -0,0 +1,111 @@
+Require Import Coqlib.
+
+(** TODO: are these def and lemma already defined in the standard library ?
+
+In this case, it should be better to reuse those of the standard library !
+
+*)
+
+Fixpoint iter {A} (n:nat) (f: A -> A) (x: A) {struct n}: A :=
+  match n with
+  | O => x
+  | S n0 => iter n0 f (f x)
+  end.
+
+Lemma iter_S A (n:nat) (f: A -> A): forall x, iter (S n) f x = f (iter n f x).
+Proof.
+  induction n; simpl; auto.
+  intros; erewrite <- IHn; simpl; auto.
+Qed.
+
+Lemma iter_plus A (n m:nat) (f: A -> A): forall x, iter (n+m) f x = iter m f (iter n f x).
+Proof.
+  induction n; simpl; auto.
+Qed.
+
+Definition iter_tail {A} (n:nat) (l: list A) := iter n (@tl A) l.
+
+Lemma iter_tail_S {A} (n:nat) (l: list A): iter_tail (S n) l = tl (iter_tail n l).
+Proof.
+  apply iter_S.
+Qed.
+
+Lemma iter_tail_plus A (n m:nat) (l: list A): iter_tail (n+m) l = iter_tail m (iter_tail n l).
+Proof.
+  apply iter_plus.
+Qed.
+
+Lemma iter_tail_length A l1: forall (l2: list A), iter_tail (length l1) (l1 ++ l2) = l2.
+Proof.
+  induction l1; auto.
+Qed.
+
+Lemma iter_tail_nil A n: @iter_tail A n nil = nil.
+Proof.
+  unfold iter_tail; induction n; simpl; auto.
+Qed.
+
+Lemma iter_tail_reach_nil A (l: list A): iter_tail (length l) l = nil.
+Proof.
+  rewrite (app_nil_end l) at 2.
+  rewrite iter_tail_length. 
+  auto.
+Qed.
+
+Lemma length_iter_tail {A} (n:nat): forall (l: list A), (n <= List.length l)%nat -> (List.length l = n + List.length (iter_tail n l))%nat.
+Proof.
+  unfold iter_tail; induction n; auto.
+  intros l; destruct l. { simpl; omega. }
+  intros; simpl. erewrite IHn; eauto.
+  simpl in *; omega.
+Qed.
+
+Lemma iter_tail_S_ex {A} (n:nat): forall (l: list A), (n < length l)%nat -> exists x, iter_tail n l = x::(iter_tail (S n) l).
+Proof.
+  unfold iter_tail; induction n; simpl.
+  - intros l; destruct l; simpl; omega || eauto.
+  - intros l H; destruct (IHn (tl l)) as (x & H1).
+    + destruct l; simpl in *; try omega.
+    + rewrite H1; eauto.
+Qed.
+
+Lemma iter_tail_inject1 {A} (n1 n2:nat) (l: list A): (n1 <= List.length l)%nat -> (n2 <= List.length l)%nat -> iter_tail n1 l = iter_tail n2 l -> n1=n2.
+Proof.
+  intros H1 H2 EQ; exploit (length_iter_tail n1 l); eauto.
+  rewrite EQ.
+  rewrite (length_iter_tail n2 l); eauto.
+  omega.
+Qed.
+
+Lemma iter_tail_nil_inject {A} (n:nat) (l: list A): iter_tail n l = nil -> (List.length l <= n)%nat.
+Proof.
+  destruct (le_lt_dec n (List.length l)); try omega.
+  intros; exploit (iter_tail_inject1 n (length l) l); try omega.
+  rewrite iter_tail_reach_nil. auto.
+Qed.
+
+Lemma list_length_z_nat (A: Type) (l: list A): list_length_z l = Z.of_nat (length l).
+Proof.
+  induction l; auto.
+  rewrite list_length_z_cons. simpl. rewrite Zpos_P_of_succ_nat. omega.
+Qed.
+
+Lemma list_length_nat_z (A: Type) (l: list A): length l = Z.to_nat (list_length_z l).
+Proof.
+  intros; rewrite list_length_z_nat, Nat2Z.id. auto.
+Qed.
+
+Lemma is_tail_list_nth_z A (l1 l2: list A):
+  is_tail l1 l2 -> list_nth_z l2 ((list_length_z l2) - (list_length_z l1)) = list_nth_z l1 0.
+Proof.
+  induction 1; simpl.
+  - replace (list_length_z c - list_length_z c) with 0; omega || auto.
+  - assert (X: list_length_z (i :: c2) > list_length_z c1).
+    { rewrite !list_length_z_nat, <- Nat2Z.inj_gt.
+      exploit is_tail_bound; simpl; eauto.
+      omega. }
+    destruct (zeq (list_length_z (i :: c2) - list_length_z c1) 0) as [Y|Y]; try omega.
+    replace (Z.pred (list_length_z (i :: c2) - list_length_z c1)) with (list_length_z c2 - list_length_z c1); auto.
+    rewrite list_length_z_cons.
+    omega.
+Qed.
diff --git a/lib/OptionMonad.v b/lib/OptionMonad.v
new file mode 100644
index 00000000..824a9c2f
--- /dev/null
+++ b/lib/OptionMonad.v
@@ -0,0 +1,49 @@
+(* Declare Scope option_monad_scope. *)
+
+Notation "'SOME' X <- A 'IN' B" := (match A with Some X => B | None => None end)
+         (at level 200, X ident, A at level 100, B at level 200)
+         : option_monad_scope.
+
+Notation "'ASSERT' A 'IN' B" := (if A then B else None)
+         (at level 200, A at level 100, B at level 200)
+         : option_monad_scope.
+
+Local Open Scope option_monad_scope.
+
+
+(** Simple tactics for option-monad *)
+
+Lemma destruct_SOME A B (P: option B -> Prop) (e: option A) (f: A -> option B):
+  (forall x, e = Some x -> P (f x)) -> (e = None -> P None) -> (P (SOME x <- e IN f x)).
+Proof.
+  intros; destruct e; simpl; auto.
+Qed.
+
+Lemma destruct_ASSERT B (P: option B -> Prop) (e: bool) (x: option B):
+  (e = true -> P x) -> (e = false -> P None) -> (P (ASSERT e IN x)).
+Proof.
+  intros; destruct e; simpl; auto.
+Qed.
+
+Ltac inversion_SOME x :=
+  try (eapply destruct_SOME; [ let x := fresh x in intro x | simpl; try congruence ]).
+
+Ltac inversion_ASSERT :=
+  try (eapply destruct_ASSERT; [ idtac | simpl; try congruence ]).
+
+Ltac simplify_someHyp :=
+  match goal with
+  | H: None = Some _ |- _  => inversion H; clear H; subst
+  | H: Some _ = None |- _  => inversion H; clear H; subst
+  | H: ?t = ?t |- _ => clear H
+  | H: Some _ = Some _ |- _  => inversion H; clear H; subst
+  | H: Some _ <> None |- _ => clear H
+  | H: None <> Some _ |- _ => clear H
+  | H: _ = Some _ |- _ => (try rewrite !H in * |- *); generalize H; clear H
+  end.
+
+Ltac simplify_someHyps := 
+  repeat (simplify_someHyp; simpl in * |- *).
+
+Ltac try_simplify_someHyps := 
+  try (intros; simplify_someHyps; eauto).
diff --git a/lib/Readconfig.mll b/lib/Readconfig.mll
index 7b98255e..8abcc407 100644
--- a/lib/Readconfig.mll
+++ b/lib/Readconfig.mll
@@ -20,7 +20,7 @@
 let key_val_tbl : (string, string list) Hashtbl.t = Hashtbl.create 17
 
 let key_val key =
-  try Some(Hashtbl.find key_val_tbl key) with Not_found -> None
+  Hashtbl.find_opt key_val_tbl key
 
 (* Auxiliaries for parsing *)
 
diff --git a/lib/UnionFind.v b/lib/UnionFind.v
index 20bb91cd..bd1b763b 100644
--- a/lib/UnionFind.v
+++ b/lib/UnionFind.v
@@ -124,6 +124,15 @@ Module Type UNIONFIND.
         pathlen uf x + pathlen uf b + 1
       else
         pathlen uf x.
+   Axiom pathlen_union:
+    forall uf a b x,
+    pathlen (union uf a b) x =
+      if elt_eq (repr uf a) (repr uf b) then
+        pathlen uf x
+     else if elt_eq (repr uf x) (repr uf a) then
+        (pathlen uf x)+1
+      else
+        (pathlen uf x).
   Axiom pathlen_gt_merge:
     forall uf a b x y,
     repr uf x = repr uf y ->
@@ -531,6 +540,7 @@ Qed.
 
 End PATHLEN.
 
+
 (* Path length and merge *)
 
 Lemma pathlen_merge:
@@ -549,16 +559,49 @@ Proof.
   set (uf' := identify uf (repr uf a) b (repr_res_none uf a) (not_eq_sym n)).
   pattern x. apply (well_founded_ind (mwf uf')); intros.
   rewrite (pathlen_unroll uf'). destruct (M.get x0 (m uf')) as [x'|] eqn:G.
-  rewrite H; auto. simpl in G. rewrite M.gsspec in G.
-  destruct (M.elt_eq x0 (repr uf a)). rewrite e. rewrite repr_canonical. rewrite dec_eq_true.
-  inversion G. subst x'. rewrite dec_eq_false; auto.
-  replace (pathlen uf (repr uf a)) with 0. omega.
-  symmetry. apply pathlen_none. apply repr_res_none.
-  rewrite (repr_unroll uf x0), (pathlen_unroll uf x0); rewrite G.
-  destruct (M.elt_eq (repr uf x') (repr uf a)); omega.
-  simpl in G. rewrite M.gsspec in G. destruct (M.elt_eq x0 (repr uf a)); try discriminate.
-  rewrite (repr_none uf x0) by auto. rewrite dec_eq_false; auto.
-  symmetry. apply pathlen_zero; auto. apply repr_none; auto.
+  + rewrite H; auto. clear H. simpl in G. rewrite M.gsspec in G.
+    destruct (M.elt_eq x0 (repr uf a)).
+    - rewrite e, repr_canonical, dec_eq_true.
+      inversion G. subst x'. rewrite dec_eq_false; auto.
+      replace (pathlen uf (repr uf a)) with 0; try omega.
+      symmetry. apply pathlen_none. apply repr_res_none.
+    - rewrite (repr_unroll uf x0), (pathlen_unroll uf x0), G.
+      destruct (M.elt_eq (repr uf x') (repr uf a)); omega.
+  + clear H; simpl in G. rewrite M.gsspec in G. destruct (M.elt_eq x0 (repr uf a)); try discriminate.
+    rewrite (repr_none uf x0) by auto. rewrite dec_eq_false; auto.
+    symmetry. apply pathlen_zero; auto. apply repr_none; auto.
+Qed.
+
+Lemma pathlen_union:
+  forall uf a b x,
+  pathlen (union uf a b) x =
+    if M.elt_eq (repr uf a) (repr uf b) then
+      pathlen uf x
+    else if M.elt_eq (repr uf x) (repr uf a) then
+      (pathlen uf x)+1
+    else
+      (pathlen uf x).
+Proof.
+  intros. unfold union.
+  destruct (M.elt_eq (repr uf a) (repr uf b)).
+  auto.
+  set (uf' := identify uf _ _ _ _).
+  assert (LENa: pathlen uf (repr uf a) = 0).
+  { apply pathlen_none. apply repr_res_none. }
+  pattern x. apply (well_founded_ind (mwf uf')); intros.
+  rewrite (pathlen_unroll uf'). destruct (M.get x0 (m uf')) as [x'|] eqn:G.
+  + rewrite H; auto. clear H. simpl in G. rewrite M.gsspec in G.
+    destruct (M.elt_eq x0 (repr uf a)).
+    - inversion G; clear G. subst.
+      rewrite !repr_canonical, dec_eq_true.
+      rewrite dec_eq_false; auto.
+      rewrite LENa. rewrite (pathlen_none uf (repr uf b)); try omega.
+      apply repr_res_none.
+    - rewrite (repr_unroll uf x0), G, ! (pathlen_some _ _ _ G).
+      destruct (M.elt_eq _ _); auto.
+  + clear H. simpl in G. rewrite M.gsspec in G.
+    destruct (M.elt_eq _ (repr uf a)); try discriminate.
+    rewrite (repr_none _ _ G), !(pathlen_none _ _ G), dec_eq_false; auto.
 Qed.
 
 Lemma pathlen_gt_merge:
diff --git a/lib/Zbits.v b/lib/Zbits.v
index 27586aff..6f3acaab 100644
--- a/lib/Zbits.v
+++ b/lib/Zbits.v
@@ -266,7 +266,7 @@ Qed.
 Remark Ztestbit_shiftin_base:
   forall b x, Z.testbit (Zshiftin b x) 0 = b.
 Proof.
-  intros. rewrite Ztestbit_shiftin. apply zeq_true. omega.
+  intros. rewrite Ztestbit_shiftin; reflexivity.
 Qed.
 
 Remark Ztestbit_shiftin_succ:
@@ -316,7 +316,7 @@ Qed.
 Remark Ztestbit_base:
   forall x, Z.testbit x 0 = Z.odd x.
 Proof.
-  intros. rewrite Ztestbit_eq. apply zeq_true. omega.
+  intros. rewrite Ztestbit_eq; reflexivity.
 Qed.
 
 Remark Ztestbit_succ:
diff --git a/powerpc/Archi.v b/powerpc/Archi.v
index 8f96dafc..5b9d67cc 100644
--- a/powerpc/Archi.v
+++ b/powerpc/Archi.v
@@ -16,9 +16,8 @@
 
 (** Architecture-dependent parameters for PowerPC *)
 
+From Flocq Require Import Binary Bits.
 Require Import ZArith List.
-(*From Flocq*)
-Require Import Binary Bits.
 
 Definition ptr64 := false.
 
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index 4fb38ff8..d9901960 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -200,12 +200,9 @@ Inductive instruction : Type :=
   | Pfadd: freg -> freg -> freg -> instruction                (**r float addition *)
   | Pfadds: freg -> freg -> freg -> instruction               (**r float addition *)
   | Pfcmpu: freg -> freg -> instruction                       (**r float comparison *)
-  | Pfcfi: freg -> ireg -> instruction                        (**r signed-int-to-float conversion (pseudo, PPC64) *)
   | Pfcfl: freg -> ireg -> instruction                        (**r signed-long-to-float conversion (pseudo, PPC64) *)
-  | Pfcfiu: freg -> ireg -> instruction                       (**r unsigned-int-to-float conversion (pseudo, PPC64) *)
   | Pfcfid: freg -> freg -> instruction                       (**r signed-long-to-float conversion (PPC64) *)
   | Pfcti: ireg -> freg -> instruction                        (**r float-to-signed-int conversion, round towards 0 (pseudo) *)
-  | Pfctiu: ireg -> freg -> instruction                       (**r float-to-unsigned-int conversion, round towards 0 (pseudo, PPC64) *)
   | Pfctid: ireg -> freg -> instruction                       (**r float-to-signed-int conversion, round towards 0 (pseudo, PPC64) *)
   | Pfctidz: freg -> freg -> instruction                      (**r float-to-signed-long conversion, round towards 0 (PPC64) *)
   | Pfctiw: freg -> freg -> instruction                       (**r float-to-signed-int conversion, round by default *)
@@ -825,16 +822,10 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#rd <- (Val.addfs rs#r1 rs#r2))) m
   | Pfcmpu r1 r2 =>
       Next (nextinstr (compare_float rs rs#r1 rs#r2)) m
-  | Pfcfi rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatofint rs#r1)))) m
   | Pfcfl rd r1 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatoflong rs#r1)))) m
-  | Pfcfiu rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatofintu rs#r1)))) m
   | Pfcti rd r1 =>
       Next (nextinstr (rs#FPR13 <- Vundef #rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m
-  | Pfctiu rd r1 =>
-      Next (nextinstr (rs#FPR13 <- Vundef #rd <- (Val.maketotal (Val.intuoffloat rs#r1)))) m
   | Pfctid rd r1 =>
       Next (nextinstr (rs#FPR13 <- Vundef #rd <- (Val.maketotal (Val.longoffloat rs#r1)))) m
   | Pfdiv rd r1 r2 =>
@@ -1204,7 +1195,7 @@ Inductive step: state -> trace -> state -> Prop :=
       external_call ef ge vargs m t vres m' ->
       rs' = nextinstr
               (set_res res vres
-                (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+                (undef_regs (IR GPR0 :: map preg_of (destroyed_by_builtin ef)) rs)) ->
       step (State rs m) t (State rs' m')
   | exec_step_external:
       forall b ef args res rs m t rs' m',
diff --git a/powerpc/AsmToJSON.ml b/powerpc/AsmToJSON.ml
index 38f4bc75..1f32dd62 100644
--- a/powerpc/AsmToJSON.ml
+++ b/powerpc/AsmToJSON.ml
@@ -198,12 +198,9 @@ let pp_instructions pp ic =
   | Pfadd (fr1,fr2,fr3) -> instruction pp "Pfadd" [Freg fr1; Freg fr2; Freg fr3]
   | Pfadds (fr1,fr2,fr3) -> instruction pp "Pfadds" [Freg fr1; Freg fr2; Freg fr3]
   | Pfcmpu (fr1,fr2) -> instruction pp "Pfcmpu" [Freg fr1; Freg fr2]
-  | Pfcfi (ir,fr)
   | Pfcfl (ir,fr) -> assert false (* Should not occur *)
   | Pfcfid (fr1,fr2) -> instruction pp "Pfcfid" [Freg fr1; Freg fr2]
-  | Pfcfiu _ (* Should not occur *)
   | Pfcti _ (* Should not occur *)
-  | Pfctiu _ (* Should not occur *)
   | Pfctid _ -> assert false (* Should not occur *)
   | Pfctidz (fr1,fr2) -> instruction pp "Pfctidz" [Freg fr1; Freg fr2]
   | Pfctiw (fr1,fr2) -> instruction pp "Pfctiw" [Freg fr1; Freg fr2]
diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml
index ce88778c..cb6a659f 100644
--- a/powerpc/Asmexpand.ml
+++ b/powerpc/Asmexpand.ml
@@ -594,9 +594,7 @@ let expand_builtin_inline name args res =
       emit (Pfnmadd(res, a1, a2, a3))
   | "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
       emit (Pfnmsub(res, a1, a2, a3))
-  | "__builtin_fabs", [BA(FR a1)], BR(FR res) ->
-      emit (Pfabs(res, a1))
-  | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) ->
+  | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) ->
       emit (Pfsqrt(res, a1))
   | "__builtin_frsqrte", [BA(FR a1)], BR(FR res) ->
       emit (Pfrsqrte(res, a1))
@@ -767,6 +765,8 @@ let expand_builtin_inline name args res =
       emit (Pori (GPR0, GPR0, Cint _0))
   (* atomic operations *)
   | "__builtin_atomic_exchange", [BA (IR a1); BA (IR a2); BA (IR a3)],_ ->
+      (* Register constraints imposed by Machregs.v *)
+      assert(a1 = GPR3 && a2 = GPR4 && a3 = GPR5);
       emit (Plwz (GPR10,Cint _0,a2));
       emit (Psync);
       let lbl = new_label() in
@@ -786,6 +786,8 @@ let expand_builtin_inline name args res =
       emit (Pisync);
       emit (Pstw (GPR0,Cint _0, a2))
   | "__builtin_sync_fetch_and_add", [BA (IR a1); BA(IR a2)], BR (IR res) ->
+      (* Register constraints imposed by Machregs.v *)
+      assert (a1 = GPR4 && a2 = GPR5 && res = GPR3);
       let lbl = new_label() in
       emit (Psync);
       emit (Plabel lbl);
@@ -795,6 +797,8 @@ let expand_builtin_inline name args res =
       emit (Pbf (CRbit_2, lbl));
       emit (Pisync);
   | "__builtin_atomic_compare_exchange", [BA (IR dst); BA(IR exp); BA (IR des)],  BR (IR res) ->
+      (* Register constraints imposed by Machregs.v *)
+      assert (dst = GPR4 && exp = GPR5 && des = GPR6 && res = GPR3);
       let lbls = new_label ()
       and lblneq = new_label ()
       and lblsucc = new_label () in
@@ -871,15 +875,6 @@ let expand_instruction instr =
         emit (Paddi(GPR1, GPR1, Cint(coqint_of_camlint sz)))
       else
         emit (Plwz(GPR1, Cint ofs, GPR1))
-  | Pfcfi(r1, r2) ->
-      assert (Archi.ppc64);
-      emit (Pextsw(GPR0, r2));
-      emit (Pstdu(GPR0, Cint _m8, GPR1));
-      emit (Pcfi_adjust _8);
-      emit (Plfd(r1, Cint _0, GPR1));
-      emit (Pfcfid(r1, r1));
-      emit (Paddi(GPR1, GPR1, Cint _8));
-      emit (Pcfi_adjust _m8)
   | Pfcfl(r1, r2) ->
       assert (Archi.ppc64);
       emit (Pstdu(r2, Cint _m8, GPR1));
@@ -888,15 +883,6 @@ let expand_instruction instr =
       emit (Pfcfid(r1, r1));
       emit (Paddi(GPR1, GPR1, Cint _8));
       emit (Pcfi_adjust _m8)
-  | Pfcfiu(r1, r2) ->
-      assert (Archi.ppc64);
-      emit (Prldicl(GPR0, r2, _0, _32));
-      emit (Pstdu(GPR0, Cint _m8, GPR1));
-      emit (Pcfi_adjust _8);
-      emit (Plfd(r1, Cint _0, GPR1));
-      emit (Pfcfid(r1, r1));
-      emit (Paddi(GPR1, GPR1, Cint _8));
-      emit (Pcfi_adjust _m8)
   | Pfcti(r1, r2) ->
       emit (Pfctiwz(FPR13, r2));
       emit (Pstfdu(FPR13, Cint _m8, GPR1));
@@ -904,14 +890,6 @@ let expand_instruction instr =
       emit (Plwz(r1, Cint _4, GPR1));
       emit (Paddi(GPR1, GPR1, Cint _8));
       emit (Pcfi_adjust _m8)
-  | Pfctiu(r1, r2) ->
-      assert (Archi.ppc64);
-      emit (Pfctidz(FPR13, r2));
-      emit (Pstfdu(FPR13, Cint _m8, GPR1));
-      emit (Pcfi_adjust _8);
-      emit (Plwz(r1, Cint _4, GPR1));
-      emit (Paddi(GPR1, GPR1, Cint _8));
-      emit (Pcfi_adjust _m8)
   | Pfctid(r1, r2) ->
       assert (Archi.ppc64);
       emit (Pfctidz(FPR13, r2));
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index 29e2c028..d0c44f08 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -611,15 +611,6 @@ Definition transl_op
   | Ointoffloat, a1 :: nil =>
       do r1 <- freg_of a1; do r <- ireg_of res;
       OK (Pfcti r r1 :: k)
-  | Ointuoffloat, a1 :: nil =>
-      do r1 <- freg_of a1; do r <- ireg_of res;
-      OK (Pfctiu r r1 :: k)
-  | Ofloatofint, a1 :: nil =>
-      do r1 <- ireg_of a1; do r <- freg_of res;
-      OK (Pfcfi r r1 :: k)
-  | Ofloatofintu, a1 :: nil =>
-      do r1 <- ireg_of a1; do r <- freg_of res;
-      OK (Pfcfiu r r1 :: k)
   | Ofloatofwords, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- freg_of res;
       OK (Pfmake r r1 r2 :: k)
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index 21d5ce48..93589a31 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -789,16 +789,18 @@ Opaque loadind.
   econstructor; eauto.
   instantiate (2 := tf); instantiate (1 := x).
   unfold nextinstr. rewrite Pregmap.gss.
-  rewrite set_res_other. rewrite undef_regs_other_2.
+  rewrite set_res_other. simpl. rewrite undef_regs_other_2.
+  rewrite Pregmap.gso by auto with asmgen.
   rewrite <- H1. simpl. econstructor; eauto.
   eapply code_tail_next_int; eauto.
   rewrite preg_notin_charact. intros. auto with asmgen.
   auto with asmgen.
   apply agree_nextinstr. eapply agree_set_res; auto.
-  eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto.
+  eapply agree_undef_regs; eauto.
+  intros. simpl. rewrite undef_regs_other_2; auto. apply Pregmap.gso. auto with asmgen.
   congruence.
   intros. Simpl. rewrite set_res_other by auto.
-  rewrite undef_regs_other_2; auto with asmgen.
+  simpl. rewrite undef_regs_other_2; auto with asmgen.
 
 - (* Mgoto *)
   assert (f0 = f) by congruence. subst f0.
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index 1b797999..850e95c7 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -1500,18 +1500,6 @@ Opaque Val.add.
 - replace v with (Val.maketotal (Val.intoffloat (rs x))).
   TranslOpSimpl.
   rewrite H1; auto.
-  (* Ointuoffloat *)
-- replace v with (Val.maketotal (Val.intuoffloat (rs x))).
-  TranslOpSimpl.
-  rewrite H1; auto.
-  (* Ofloatofint *)
-- replace v with (Val.maketotal (Val.floatofint (rs x))).
-  TranslOpSimpl.
-  rewrite H1; auto.
-  (* Ofloatofintu *)
-- replace v with (Val.maketotal (Val.floatofintu (rs x))).
-  TranslOpSimpl.
-  rewrite H1; auto.
   (* Ocmp *)
 - destruct (transl_cond_op_correct c0 args res k rs m c) as [rs' [A [B C]]]; auto.
   exists rs'; auto with asmgen.
diff --git a/powerpc/Builtins1.v b/powerpc/Builtins1.v
index 53c83d7e..9d7aadd9 100644
--- a/powerpc/Builtins1.v
+++ b/powerpc/Builtins1.v
@@ -19,15 +19,55 @@ Require Import String Coqlib.
 Require Import AST Integers Floats Values.
 Require Import Builtins0.
 
-Inductive platform_builtin : Type := .
+Inductive platform_builtin : Type :=
+  | BI_isel
+  | BI_uisel
+  | BI_isel64
+  | BI_uisel64
+  | BI_bsel
+  | BI_mulhw
+  | BI_mulhwu
+  | BI_mulhd
+  | BI_mulhdu.
 
 Local Open Scope string_scope.
 
 Definition platform_builtin_table : list (string * platform_builtin) :=
-  nil.
+     ("__builtin_isel", BI_isel)
+  :: ("__builtin_uisel", BI_uisel)
+  :: ("__builtin_isel64", BI_isel64)
+  :: ("__builtin_uisel64", BI_uisel64)
+  :: ("__builtin_bsel", BI_bsel)
+  :: ("__builtin_mulhw", BI_mulhw)
+  :: ("__builtin_mulhwu", BI_mulhwu)
+  :: ("__builtin_mulhd", BI_mulhd)
+  :: ("__builtin_mulhdu", BI_mulhdu)
+  :: nil.
 
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
-  match b with end.
+  match b with
+  | BI_isel | BI_uisel | BI_bsel =>
+     mksignature (Tint :: Tint :: Tint :: nil) Tint cc_default
+  | BI_isel64 | BI_uisel64 =>
+     mksignature (Tint :: Tlong :: Tlong :: nil) Tlong cc_default
+  | BI_mulhw | BI_mulhwu =>
+     mksignature (Tint :: Tint :: nil) Tint cc_default
+  | BI_mulhd | BI_mulhdu =>
+     mksignature (Tlong :: Tlong :: nil) Tlong cc_default
+  end.
 
 Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
-  match b with end.
+  match b with
+  | BI_isel | BI_uisel | BI_bsel =>
+    mkbuiltin_n3t Tint Tint Tint Tint (fun c n1 n2 => if Int.eq c Int.zero then n2 else n1)
+  | BI_isel64 | BI_uisel64 =>
+    mkbuiltin_n3t Tint Tlong Tlong Tlong (fun c n1 n2 => if Int.eq c Int.zero then n2 else n1)
+  | BI_mulhw =>
+    mkbuiltin_n2t Tint Tint Tint Int.mulhs
+  | BI_mulhwu =>
+    mkbuiltin_n2t Tint Tint Tint Int.mulhu
+  | BI_mulhd =>
+    mkbuiltin_n2t Tlong Tlong Tlong Int64.mulhs
+  | BI_mulhdu =>
+    mkbuiltin_n2t Tlong Tlong Tlong Int64.mulhu
+  end.
diff --git a/powerpc/CBuiltins.ml b/powerpc/CBuiltins.ml
index e29a41f1..e0826877 100644
--- a/powerpc/CBuiltins.ml
+++ b/powerpc/CBuiltins.ml
@@ -28,18 +28,6 @@ let builtins = {
       (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false);
     "__builtin_mulhwu",
       (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false);
-    "__builtin_clz",
-      (TInt(IInt, []), [TInt(IUInt, [])], false);
-    "__builtin_clzl",
-      (TInt(IInt, []), [TInt(IULong, [])], false);
-    "__builtin_clzll",
-      (TInt(IInt, []), [TInt(IULongLong, [])], false);
-    "__builtin_ctz",
-      (TInt(IInt, []), [TInt(IUInt, [])], false);
-    "__builtin_ctzl",
-      (TInt(IInt, []), [TInt(IULong, [])], false);
-    "__builtin_ctzll",
-      (TInt(IInt, []), [TInt(IULongLong, [])], false);
     "__builtin_cmpb",
       (TInt (IUInt, []),  [TInt(IUInt, []);TInt(IUInt, [])], false);
     (* Integer arithmetic in 32/64-bit hybrid mode *)
diff --git a/powerpc/CSE2deps.v b/powerpc/CSE2deps.v
index d48dabf3..4592f408 100644
--- a/powerpc/CSE2deps.v
+++ b/powerpc/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/powerpc/CSE2depsproof.v b/powerpc/CSE2depsproof.v
index 123341da..ede09dd6 100644
--- a/powerpc/CSE2depsproof.v
+++ b/powerpc/CSE2depsproof.v
@@ -111,6 +111,66 @@ Section MEMORY_WRITE.
   Qed.
   End INDEXED_AWAY.
 End MEMORY_WRITE.
+
+Section STACK_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Ainstack ofsw) nil = Some addrw.
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Ainstack ofsr) nil = Some addrr.
+
+  Lemma stack_load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct sp; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    all: try rewrite ptrofs_modulus in *.
+    all: destruct Archi.ptr64.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem stack_load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply stack_load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End STACK_WRITE.
 End SOUNDNESS.
 
 
@@ -131,7 +191,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+- (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -141,7 +201,14 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml
new file mode 120000
index 00000000..ee2674bf
--- /dev/null
+++ b/powerpc/ExpansionOracle.ml
@@ -0,0 +1 @@
+../aarch64/ExpansionOracle.ml
+\ No newline at end of file
diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v
index e7c8758b..9967bbae 100644
--- a/powerpc/Machregs.v
+++ b/powerpc/Machregs.v
@@ -166,7 +166,7 @@ Definition destroyed_by_op (op: operation): list mreg :=
   | Ofloatconst _ => R12 :: nil
   | Osingleconst _ => R12 :: nil
   | Olongconst _ => R12 :: nil
-  | Ointoffloat | Ointuoffloat => F13 :: nil
+  | Ointoffloat => F13 :: nil
   | Olongoffloat => F13 :: nil
   | Oaddlimm _ => R12 :: nil
   | Oandlimm _ => R12 :: nil
@@ -232,7 +232,7 @@ Definition mregs_for_builtin (ef: external_function): list (option mreg) * list
     | EF_builtin id sg =>
       if string_dec id "__builtin_atomic_exchange" then ((Some R3)::(Some R4)::(Some R5)::nil,nil)
       else if string_dec id "__builtin_sync_fetch_and_add" then ((Some R4)::(Some R5)::nil,(Some R3)::nil)
-      else if string_dec id "___builtin_atomic_compare_exchange" then ((Some R4)::(Some R5)::(Some R6)::nil, (Some R3):: nil)
+      else if string_dec id "__builtin_atomic_compare_exchange" then ((Some R4)::(Some R5)::(Some R6)::nil, (Some R3):: nil)
       else (nil, nil)
     | _ => (nil, nil)
   end.
diff --git a/powerpc/Machregsaux.ml b/powerpc/Machregsaux.ml
index 0b0d4548..d17382ad 100644
--- a/powerpc/Machregsaux.ml
+++ b/powerpc/Machregsaux.ml
@@ -12,27 +12,7 @@
 
 (** Auxiliary functions on machine registers *)
 
-open Camlcoq
-open Machregs
-
-let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
-
-let _ =
-  List.iter
-    (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
-    Machregs.register_names
-
 let is_scratch_register s = s = "R0" || s = "r0"
-
-let name_of_register r =
-  try Some (Hashtbl.find register_names r) with Not_found -> None
-
-let register_by_name s =
-  Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
-
-let can_reserve_register r =
-  List.mem r Conventions1.int_callee_save_regs
-  || List.mem r Conventions1.float_callee_save_regs
                                               
 let class_of_type = function
   | AST.Tint | AST.Tlong -> 0
diff --git a/powerpc/Machregsaux.mli b/powerpc/Machregsaux.mli
index d7117c21..01b0f9fd 100644
--- a/powerpc/Machregsaux.mli
+++ b/powerpc/Machregsaux.mli
@@ -12,9 +12,6 @@
 
 (** Auxiliary functions on machine registers *)
 
-val name_of_register: Machregs.mreg -> string option
-val register_by_name: string -> Machregs.mreg option
 val is_scratch_register: string -> bool
-val can_reserve_register: Machregs.mreg -> bool
 
 val class_of_type: AST.typ -> int
diff --git a/powerpc/NeedOp.v b/powerpc/NeedOp.v
index 5ea09bd8..74ee6b85 100644
--- a/powerpc/NeedOp.v
+++ b/powerpc/NeedOp.v
@@ -61,7 +61,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Onegfs | Oabsfs => op1 (default nv)
   | Oaddfs | Osubfs | Omulfs | Odivfs => op2 (default nv)
   | Osingleoffloat | Ofloatofsingle => op1 (default nv)
-  | Ointoffloat | Ointuoffloat | Ofloatofint | Ofloatofintu => op1 (default nv)
+  | Ointoffloat => op1 (default nv)
   | Ofloatofwords | Omakelong => op2 (default nv)
   | Olowlong | Ohighlong => op1 (default nv)
   | Ocmp c => needs_of_condition c
diff --git a/powerpc/Op.v b/powerpc/Op.v
index a0ee5bb8..505b7545 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -105,7 +105,7 @@ Inductive operation : Type :=
   | Osubl: operation                    (**r [rd = r1 - r2] *)
   | Onegl: operation                    (**r [rd = - r1] *)
   | Omull: operation                    (**r [rd = r1 * r2] *)
-  | Omullhs: operation                   (**r [rd = high part of r1 * r2, signed] *)
+  | Omullhs: operation                  (**r [rd = high part of r1 * r2, signed] *)
   | Omullhu: operation                  (**r [rd = high part of r1 * r2, unsigned] *)
   | Odivl: operation                    (**r [rd = r1 / r2] (signed) *)
   | Odivlu: operation                   (**r [rd = r1 / r2] (unsigned) *)
@@ -141,9 +141,6 @@ Inductive operation : Type :=
   | Ofloatofsingle: operation           (**r [rd] is [r1] extended to double-precision float *)
 (*c Conversions between int and float: *)
   | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
-  | Ointuoffloat: operation             (**r [rd = unsigned_int_of_float(r1)] (PPC64 only) *)
-  | Ofloatofint: operation              (**r [rd = float_of_signed_int(r1)] (PPC64 only) *)
-  | Ofloatofintu: operation             (**r [rd = float_of_unsigned_int(r1)] (PPC64 only *)
   | Ofloatofwords: operation            (**r [rd = float_of_words(r1,r2)] *)
 (*c Manipulating 64-bit integers: *)
   | Omakelong: operation                (**r [rd = r1 << 32 | r2] *)
@@ -299,9 +296,6 @@ Definition eval_operation
   | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
   | Ofloatofsingle, v1::nil => Some(Val.floatofsingle v1)
   | Ointoffloat, v1::nil => Val.intoffloat v1
-  | Ointuoffloat, v1::nil => Val.intuoffloat v1
-  | Ofloatofint, v1::nil => Val.floatofint v1
-  | Ofloatofintu, v1::nil => Val.floatofintu v1
   | Ofloatofwords, v1::v2::nil => Some(Val.floatofwords v1 v2)
   | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
   | Olowlong, v1::nil => Some(Val.loword v1)
@@ -449,9 +443,6 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osingleoffloat => (Tfloat :: nil, Tsingle)
   | Ofloatofsingle => (Tsingle :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
-  | Ointuoffloat => (Tfloat :: nil, Tint)
-  | Ofloatofint => (Tint :: nil, Tfloat)
-  | Ofloatofintu => (Tint :: nil, Tfloat)
   | Ofloatofwords => (Tint :: Tint :: nil, Tfloat)
   | Omakelong => (Tint :: Tint :: nil, Tlong)
   | Olowlong => (Tlong :: nil, Tint)
@@ -570,9 +561,6 @@ Proof with (try exact I; try reflexivity).
   destruct v0...
   destruct v0...
   destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
-  destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
-  destruct v0; simpl in H0; inv H0...
-  destruct v0; simpl in H0; inv H0...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0...
@@ -585,8 +573,7 @@ Definition is_trapping_op (op : operation) :=
   match op with
   | Odiv | Odivl | Odivu | Odivlu
   | Oshrximm _ | Oshrxlimm _
-  | Ointoffloat | Ointuoffloat
-  | Ofloatofint | Ofloatofintu
+  | Ointoffloat     
   | Olongoffloat
   | Ofloatoflong => true
   | _ => false
@@ -761,7 +748,7 @@ Definition is_trivial_op (op: operation) : bool :=
 
 (** Operations that depend on the memory state. *)
 
-Definition condition_depends_on_memory (c: condition) : bool :=
+Definition cond_depends_on_memory (c: condition) : bool :=
   match c with
   | Ccompu _ => true
   | Ccompuimm _ _ => true
@@ -772,14 +759,14 @@ Definition condition_depends_on_memory (c: condition) : bool :=
 
 Definition op_depends_on_memory (op: operation) : bool :=
   match op with
-  | Ocmp c => condition_depends_on_memory c
-  | Osel c ty => condition_depends_on_memory c
+  | Ocmp c => cond_depends_on_memory c
+  | Osel c ty => cond_depends_on_memory c
   | _ => false
   end.
 
-Lemma condition_depends_on_memory_correct:
+Lemma cond_depends_on_memory_correct:
   forall c args m1 m2,
-  condition_depends_on_memory c = false ->
+  cond_depends_on_memory c = false ->
   eval_condition c args m1 = eval_condition c args m2.
 Proof.
   intros. destruct c; simpl; auto; discriminate.
@@ -791,12 +778,36 @@ Lemma op_depends_on_memory_correct:
   eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
 Proof.
   intros until m2. destruct op; simpl; try congruence; intros C.
-- f_equal; f_equal; apply condition_depends_on_memory_correct; auto.
+- f_equal; f_equal; apply cond_depends_on_memory_correct; auto.
 - destruct args; auto. destruct args; auto.
-  rewrite (condition_depends_on_memory_correct c args m1 m2 C).
+  rewrite (cond_depends_on_memory_correct c args m1 m2 C).
   auto.
 Qed.
 
+Lemma cond_valid_pointer_eq:
+  forall cond args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_condition cond args m1 = eval_condition cond args m2.
+Proof.
+  intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence.
+  all: repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op eqn:OP; simpl; try congruence.
+  - intros MEM; destruct c; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intro MEM; destruct c; simpl; try congruence;
+      repeat (destruct args; simpl; try congruence);
+      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_operation (op: operation) : list ident :=
@@ -1029,10 +1040,6 @@ Proof.
   inv H4; simpl; auto.
   inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
   exists (Vint i); auto.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
-  exists (Vint i); auto.
-  inv H4; simpl in H1; inv H1; simpl. TrivialExists.
-  inv H4; simpl in H1; inv H1; simpl. TrivialExists.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
   inv H4; simpl; auto.
diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml
new file mode 120000
index 00000000..9885fd52
--- /dev/null
+++ b/powerpc/PrepassSchedulingOracle.ml
@@ -0,0 +1 @@
+../x86/PrepassSchedulingOracle.ml
+\ No newline at end of file
diff --git a/powerpc/PrintOp.ml b/powerpc/PrintOp.ml
index 8d7f17ab..77791827 100644
--- a/powerpc/PrintOp.ml
+++ b/powerpc/PrintOp.ml
@@ -42,6 +42,14 @@ let print_condition reg pp = function
       fprintf pp "%a & 0x%lx == 0" reg r1 (camlint_of_coqint n)
   | (Cmasknotzero n, [r1]) ->
       fprintf pp "%a & 0x%lx != 0" reg r1 (camlint_of_coqint n)
+  | (Ccompl c, [r1;r2]) ->
+      fprintf pp "%a %sls %a" reg r1 (comparison_name c) reg r2
+  | (Ccomplu c, [r1;r2]) ->
+      fprintf pp "%a %slu %a" reg r1 (comparison_name c) reg r2
+  | (Ccomplimm(c, n), [r1]) ->
+      fprintf pp "%a %sls %Ld" reg r1 (comparison_name c) (camlint64_of_coqint n)
+  | (Ccompluimm(c, n), [r1]) ->
+      fprintf pp "%a %slu %Ld" reg r1 (comparison_name c) (camlint64_of_coqint n)
   | _ ->
       fprintf pp "<bad condition>"
 
diff --git a/powerpc/RTLpathSE_simplify.v b/powerpc/RTLpathSE_simplify.v
new file mode 120000
index 00000000..55bf0e52
--- /dev/null
+++ b/powerpc/RTLpathSE_simplify.v
@@ -0,0 +1 @@
+../aarch64/RTLpathSE_simplify.v
+\ No newline at end of file
diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp
index 52f4f855..fe8b5453 100644
--- a/powerpc/SelectOp.vp
+++ b/powerpc/SelectOp.vp
@@ -468,7 +468,7 @@ Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
 
 Definition intuoffloat (e: expr) :=
   if Archi.ppc64 then
-    Eop Ointuoffloat (e ::: Enil)
+    Eop Olowlong (Eop Olongoffloat (e ::: Enil) ::: Enil)
   else
     Elet e
     (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
@@ -482,7 +482,8 @@ Nondetfunction floatofintu (e: expr) :=
       Eop (Ofloatconst (Float.of_intu n)) Enil
   | _ =>
       if Archi.ppc64 then
-        Eop Ofloatofintu (e ::: Enil)  else
+        Eop Ofloatoflong (Eop Ocast32unsigned (e ::: Enil) ::: Enil)
+      else
         subf (Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil ::: e ::: Enil))
              (Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Int.zero)) Enil)
   end.
@@ -493,7 +494,8 @@ Nondetfunction floatofint (e: expr) :=
       Eop (Ofloatconst (Float.of_int n)) Enil
   | _ =>
       if Archi.ppc64 then
-        Eop Ofloatofint (e ::: Enil) else
+        Eop Ofloatoflong (Eop Ocast32signed (e ::: Enil) ::: Enil)
+      else
         subf (Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil
                                  ::: addimm Float.ox8000_0000 e ::: Enil))
              (Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Float.ox8000_0000)) Enil)
diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v
index 8135bad6..ed81c83f 100644
--- a/powerpc/SelectOpproof.v
+++ b/powerpc/SelectOpproof.v
@@ -855,8 +855,13 @@ Proof.
   destruct (Float.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
   exists (Vint n); split; auto. unfold intuoffloat.
   destruct Archi.ppc64.
-  econstructor. constructor; eauto. constructor. simpl; rewrite Heqo; auto.
-  set (im := Int.repr Int.half_modulus).
+- apply Float.to_intu_to_long in Heqo.
+  econstructor. constructor. econstructor. econstructor; eauto. constructor.
+  simpl; rewrite Heqo; simpl; eauto. constructor.
+  simpl. unfold Int64.loword. rewrite Int64.unsigned_repr, Int.repr_unsigned. auto.
+  assert (Int.modulus < Int64.max_unsigned) by (compute; auto).
+  generalize (Int.unsigned_range n). omega.  
+- set (im := Int.repr Int.half_modulus).
   set (fm := Float.of_intu im).
   assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f)).
     constructor. auto.
@@ -893,11 +898,12 @@ Theorem eval_floatofint:
 Proof.
   intros until y. unfold floatofint. destruct (floatofint_match a); intros.
   InvEval. TrivialExists.
-  destruct Archi.ppc64.
-  TrivialExists.
   rename e0 into a. destruct x; simpl in H0; inv H0.
   exists (Vfloat (Float.of_int i)); split; auto.
-  set (t1 := addimm Float.ox8000_0000 a).
+  destruct Archi.ppc64.
+- rewrite Float.of_int_of_long.
+  EvalOp. constructor. EvalOp. simpl; eauto. constructor. auto.
+- set (t1 := addimm Float.ox8000_0000 a).
   set (t2 := Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil ::: t1 ::: Enil)).
   set (t3 := Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Float.ox8000_0000)) Enil).
   exploit (eval_addimm Float.ox8000_0000 le a). eauto. fold t1.
@@ -917,12 +923,12 @@ Theorem eval_floatofintu:
 Proof.
   intros until y. unfold floatofintu. destruct (floatofintu_match a); intros.
   InvEval. TrivialExists.
-  destruct Archi.ppc64.
-  TrivialExists.
   rename e0 into a. destruct x; simpl in H0; inv H0.
   exists (Vfloat (Float.of_intu i)); split; auto.
-  unfold floatofintu.
-  set (t2 := Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil ::: a ::: Enil)).
+  destruct Archi.ppc64.
+- rewrite Float.of_intu_of_long.
+  EvalOp. constructor. EvalOp. simpl; eauto. constructor. auto.
+- set (t2 := Eop Ofloatofwords (Eop (Ointconst Float.ox4330_0000) Enil ::: a ::: Enil)).
   set (t3 := Eop (Ofloatconst (Float.from_words Float.ox4330_0000 Int.zero)) Enil).
   exploit (eval_subf le t2).
   unfold t2. EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor.
diff --git a/powerpc/TargetPrinter.ml b/powerpc/TargetPrinter.ml
index 3ea03786..554bfe09 100644
--- a/powerpc/TargetPrinter.ml
+++ b/powerpc/TargetPrinter.ml
@@ -557,22 +557,16 @@ module Target (System : SYSTEM):TARGET =
           fprintf oc "	fadds	%a, %a, %a\n" freg r1 freg r2 freg r3
       | Pfcmpu(r1, r2) ->
           fprintf oc "	fcmpu	%a, %a, %a\n" creg 0 freg r1 freg r2
-      | Pfcfi(r1, r2) ->
-          assert false
       | Pfcfl(r1, r2) ->
           assert false
       | Pfcfid(r1, r2) ->
           fprintf oc "	fcfid	%a, %a\n" freg r1 freg r2
-      | Pfcfiu(r1, r2) ->
-          assert false
       | Pfcti(r1, r2) ->
           assert false
       | Pfctid(r1, r2) ->
           assert false
       | Pfctidz(r1, r2) ->
           fprintf oc "	fctidz	%a, %a\n" freg r1 freg r2
-      | Pfctiu(r1, r2) ->
-          assert false
       | Pfctiw(r1, r2) ->
           fprintf oc "	fctiw	%a, %a\n" freg r1 freg r2
       | Pfctiwz(r1, r2) ->
diff --git a/powerpc/ValueAOp.v b/powerpc/ValueAOp.v
index a270d857..c81f1a6c 100644
--- a/powerpc/ValueAOp.v
+++ b/powerpc/ValueAOp.v
@@ -133,9 +133,6 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Osingleoffloat, v1::nil => singleoffloat v1
   | Ofloatofsingle, v1::nil => floatofsingle v1
   | Ointoffloat, v1::nil => intoffloat v1
-  | Ointuoffloat, v1::nil => intuoffloat v1
-  | Ofloatofint, v1::nil => floatofint v1
-  | Ofloatofintu, v1::nil => floatofintu v1
   | Ofloatofwords, v1::v2::nil => floatofwords v1 v2
   | Omakelong, v1::v2::nil => longofwords v1 v2
   | Olowlong, v1::nil => loword v1
diff --git a/riscV/Archi.v b/riscV/Archi.v
index 9bdaad99..1bb80e89 100644
--- a/riscV/Archi.v
+++ b/riscV/Archi.v
@@ -16,9 +16,8 @@
 
 (** Architecture-dependent parameters for RISC-V *)
 
+From Flocq Require Import Binary Bits.
 Require Import ZArith List.
-(*From Flocq*)
-Require Import Binary Bits.
 
 Parameter ptr64 : bool.
 
diff --git a/riscV/Asm.v b/riscV/Asm.v
index dc410a3b..a16f57b5 100644
--- a/riscV/Asm.v
+++ b/riscV/Asm.v
@@ -30,6 +30,7 @@ Require Import Smallstep.
 Require Import Locations.
 Require Stacklayout.
 Require Import Conventions.
+Require ExtValues.
 
 (** * Abstract syntax *)
 
@@ -62,10 +63,10 @@ Inductive freg: Type :=
   | F24: freg | F25: freg | F26: freg | F27: freg
   | F28: freg | F29: freg | F30: freg | F31: freg.
 
-Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
+Definition ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
 Proof. decide equality. Defined.
 
-Lemma ireg0_eq: forall (x y: ireg0), {x=y} + {x<>y}.
+Definition ireg0_eq: forall (x y: ireg0), {x=y} + {x<>y}.
 Proof. decide equality. apply ireg_eq. Defined.
 
 Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}.
@@ -255,8 +256,10 @@ Inductive instruction : Type :=
 
   (* floating point register move *)
   | Pfmv     (rd: freg) (rs: freg)                  (**r move *)
-  | Pfmvxs   (rd: ireg) (rs: freg)                  (**r move FP single to integer register *)
-  | Pfmvxd   (rd: ireg) (rs: freg)                  (**r move FP double to integer register *)
+  | Pfmvxs   (rd: ireg) (rs: freg)                  (**r bitwise move FP single to integer register *)
+  | Pfmvxd   (rd: ireg) (rs: freg)                  (**r bitwise move FP double to integer register *)
+  | Pfmvsx   (rd: freg) (rs: ireg)                  (**r bitwise move integer register to FP single *)
+  | Pfmvdx   (rd: freg) (rs: ireg)                  (**r bitwise move integer register to FP double*)
 
   (* 32-bit (single-precision) floating point *)
   | Pfls     (rd: freg) (ra: ireg) (ofs: offset)    (**r load float *)
@@ -345,6 +348,7 @@ Inductive instruction : Type :=
   | Pbtbl   (r: ireg)  (tbl: list label)            (**r N-way branch through a jump table *)
   | Pbuiltin: external_function -> list (builtin_arg preg)
               -> builtin_res preg -> instruction    (**r built-in function (pseudo) *)
+  | Pselectl (rd: ireg) (rb: ireg0) (rt: ireg0) (rf: ireg0)
   | Pnop : instruction.                             (**r nop instruction *)
 
 
@@ -918,6 +922,17 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#d <- (Val.floatofsingle rs#s))) m
   | Pfcvtsd d s =>
       Next (nextinstr (rs#d <- (Val.singleoffloat rs#s))) m
+          
+  | Pfmvxs d s =>
+      Next (nextinstr (rs#d <- (ExtValues.bits_of_single rs#s))) m
+  | Pfmvxd d s =>
+      Next (nextinstr (rs#d <- (ExtValues.bits_of_float rs#s))) m
+          
+  | Pfmvsx d s =>
+      Next (nextinstr (rs#d <- (ExtValues.single_of_bits rs#s))) m
+  | Pfmvdx d s =>
+      Next (nextinstr (rs#d <- (ExtValues.float_of_bits rs#s))) m
+
 
 (** Pseudo-instructions *)
   | Pallocframe sz pos =>
@@ -940,6 +955,10 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
           | _ => Stuck
           end
       end
+  | Pselectl rd rb rt rf =>
+    Next (nextinstr (rs#rd <- (ExtValues.select01_long
+                                 (rs###rb) (rs###rt) (rs###rf)))
+                     #X31 <- Vundef) m
   | Plabel lbl =>
       Next (nextinstr rs) m
   | Ploadsymbol rd s ofs =>
@@ -963,14 +982,12 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       end
   | Pbuiltin ef args res =>
       Stuck (**r treated specially below *)
+  | Pnop => Next (nextinstr rs) m (**r Pnop is used by an oracle during expansion *)
 
   (** The following instructions and directives are not generated directly by Asmgen,
       so we do not model them. *)
   | Pfence
 
-  | Pfmvxs _ _
-  | Pfmvxd _ _
-
   | Pfmins _ _ _
   | Pfmaxs _ _ _
   | Pfsqrts _ _
@@ -986,7 +1003,6 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pfmsubd _ _ _ _
   | Pfnmaddd _ _ _ _
   | Pfnmsubd _ _ _ _
-  | Pnop
     => Stuck
   end.
 
diff --git a/riscV/Asmexpand.ml b/riscV/Asmexpand.ml
index 7e36abf8..3f9d3359 100644
--- a/riscV/Asmexpand.ml
+++ b/riscV/Asmexpand.ml
@@ -23,6 +23,7 @@ open Asm
 open Asmexpandaux
 open AST
 open Camlcoq
+open Asmgen
 open! Integers
 
 exception Error of string
@@ -44,11 +45,13 @@ let align n a = (n + a - 1) land (-a)
 (* Emit instruction sequences that set or offset a register by a constant. *)
 
 let expand_loadimm32 dst n =
-  List.iter emit (Asmgen.loadimm32 dst n [])
+  match make_immed32 n with
+  | Imm32_single imm -> emit (Paddiw (dst, X0, imm))
+  | Imm32_pair (hi, lo) -> List.iter emit (load_hilo32 dst hi lo [])
 let expand_addptrofs dst src n =
-  List.iter emit (Asmgen.addptrofs dst src n [])
+  List.iter emit (addptrofs dst src n [])
 let expand_storeind_ptr src base ofs =
-  List.iter emit (Asmgen.storeind_ptr src base ofs [])
+  List.iter emit (storeind_ptr src base ofs [])
 
 (* Built-ins.  They come in two flavors:
    - annotation statements: take their arguments in registers or stack
@@ -394,6 +397,90 @@ let expand_bswap64 d s =
   emit (Psrlil(X31, X s, coqint_of_camlint 56l));
   emit (Porl(d, X X1, X X31))
 
+(* Count leading zeros.  Algorithm 5-7 from Hacker's Delight,
+   re-rolled as a loop to produce more compact code. *)
+
+let expand_clz ~sixtyfour ~splitlong =
+  (* Input:  X in X5 or (X5, X6) if splitlong
+     Result: N in X7
+     Temporaries: S in X8, Y in X9 *)
+  let lbl1 = new_label() in
+  let lbl2 = new_label() in
+  (* N := bitsize of X's type (32 or 64) *)
+  expand_loadimm32 X7 (coqint_of_camlint
+                         (if sixtyfour || splitlong then 64l else 32l));
+  (* S := initial shift amount (16 or 32) *)                         
+  expand_loadimm32 X8 (coqint_of_camlint (if sixtyfour then 32l else 16l));
+  if splitlong then begin
+    (* if (Xhigh == 0) goto lbl1 *)
+    emit (Pbeqw(X X6, X0, lbl1));
+    (* N := 32 *)
+    expand_loadimm32 X7 (coqint_of_camlint 32l);
+    (* X := Xhigh *)
+    emit (Pmv(X5, X6))
+  end;
+  (* lbl1: *)
+  emit (Plabel lbl1);
+  (* Y := X >> S *)
+  emit (if sixtyfour then Psrll(X9, X X5, X X8) else Psrlw(X9, X X5, X X8));
+  (* if (Y == 0) goto lbl2 *)
+  emit (if sixtyfour then Pbeql(X X9, X0, lbl2) else Pbeqw(X X9, X0, lbl2));
+  (* N := N - S *)
+  emit (Psubw(X7, X X7, X X8));
+  (* X := Y *)
+  emit (Pmv(X5, X9));
+  (* lbl2: *)
+  emit (Plabel lbl2);
+  (* S := S / 2 *)
+  emit (Psrliw(X8, X X8, _1));
+  (* if (S != 0) goto lbl1; *)
+  emit (Pbnew(X X8, X0, lbl1));
+  (* N := N - X *)
+  emit (Psubw(X7, X X7, X X5))
+
+(* Count trailing zeros.  Algorithm 5-14 from Hacker's Delight,
+   re-rolled as a loop to produce more compact code. *)
+
+let expand_ctz ~sixtyfour ~splitlong =
+  (* Input:  X in X6 or (X5, X6) if splitlong
+     Result: N in X7
+     Temporaries: S in X8, Y in X9 *)
+  let lbl1 = new_label() in
+  let lbl2 = new_label() in
+  (* N := bitsize of X's type (32 or 64) *)
+  expand_loadimm32 X7 (coqint_of_camlint
+                         (if sixtyfour || splitlong then 64l else 32l));
+  (* S := initial shift amount (16 or 32) *)                         
+  expand_loadimm32 X8 (coqint_of_camlint (if sixtyfour then 32l else 16l));
+  if splitlong then begin
+    (* if (Xlow == 0) goto lbl1 *)
+    emit (Pbeqw(X X5, X0, lbl1));
+    (* N := 32 *)
+    expand_loadimm32 X7 (coqint_of_camlint 32l);
+    (* X := Xlow *)
+    emit (Pmv(X6, X5))
+  end;
+  (* lbl1: *)
+  emit (Plabel lbl1);
+  (* Y := X >> S *)
+  emit (if sixtyfour then Pslll(X9, X X6, X X8) else Psllw(X9, X X6, X X8));
+  (* if (Y == 0) goto lbl2 *)
+  emit (if sixtyfour then Pbeql(X X9, X0, lbl2) else Pbeqw(X X9, X0, lbl2));
+  (* N := N - S *)
+  emit (Psubw(X7, X X7, X X8));
+  (* X := Y *)
+  emit (Pmv(X6, X9));
+  (* lbl2: *)
+  emit (Plabel lbl2);
+  (* S := S / 2 *)
+  emit (Psrliw(X8, X X8, _1));
+  (* if (S != 0) goto lbl1; *)
+  emit (Pbnew(X X8, X0, lbl1));
+  (* N := N - most significant bit of X *)
+  emit (if sixtyfour then Psrlil(X6, X X6, coqint_of_camlint 63l)
+                     else Psrliw(X6, X X6, coqint_of_camlint 31l));
+  emit (Psubw(X7, X X7, X X6))
+
 (* Handling of compiler-inlined builtins *)
 
 let expand_builtin_inline name args res =
@@ -418,10 +505,33 @@ let expand_builtin_inline name args res =
      assert (ah = X6 && al = X5 && rh = X5 && rl = X6);
      expand_bswap32 X5 X5;
      expand_bswap32 X6 X6
+  (* Count zeros *)
+  | "__builtin_clz", [BA(IR a)], BR(IR res) ->
+     assert (a = X5 && res = X7);
+     expand_clz ~sixtyfour:false ~splitlong:false
+  | "__builtin_clzl", [BA(IR a)], BR(IR res) ->
+     assert (a = X5 && res = X7);
+     expand_clz ~sixtyfour:Archi.ptr64 ~splitlong:false
+  | "__builtin_clzll", [BA(IR a)], BR(IR res) ->
+     assert (a = X5 && res = X7);
+     expand_clz ~sixtyfour:true ~splitlong:false
+  | "__builtin_clzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) ->
+     assert (al = X5 && ah = X6 && res = X7);
+     expand_clz ~sixtyfour:false ~splitlong:true
+  | "__builtin_ctz", [BA(IR a)], BR(IR res) ->
+     assert (a = X6 && res = X7);
+     expand_ctz ~sixtyfour:false ~splitlong:false
+  | "__builtin_ctzl", [BA(IR a)], BR(IR res) ->
+     assert (a = X6 && res = X7);
+     expand_ctz ~sixtyfour:Archi.ptr64 ~splitlong:false
+  | "__builtin_ctzll", [BA(IR a)], BR(IR res) ->
+     assert (a = X6 && res = X7);
+     expand_ctz ~sixtyfour:true ~splitlong:false
+  | "__builtin_ctzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) ->
+     assert (al = X5 && ah = X6 && res = X7);
+     expand_ctz ~sixtyfour:false ~splitlong:true
   (* Float arithmetic *)
-  | "__builtin_fabs",  [BA(FR a1)], BR(FR res) ->
-     emit (Pfabsd(res, a1))
-  | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) ->
+  | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) ->
      emit (Pfsqrtd(res, a1))
   | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
       emit (Pfmaddd(res, a1, a2, a3))
@@ -475,9 +585,49 @@ let expand_builtin_inline name args res =
      raise (Error ("unrecognized builtin " ^ name))
 
 (* Expansion of instructions *)
-
+                                 
 let expand_instruction instr =
   match instr with
+  | Pselectl(rd, rb, rt, rf) ->
+     if not Archi.ptr64
+     then failwith "Pselectl not available on RV32, only on RV64"
+     else
+       if ireg0_eq rt rf then
+         begin
+           if ireg0_eq (X rd) rt then
+             begin
+             end
+           else
+             begin
+               emit (Paddl(rd, X0, rt))
+             end
+         end
+       else
+         if (ireg0_eq (X rd) rt) then
+           begin
+             emit (Psubl(X31, X0, rb));
+             emit (Pandl(X31, X X31, rt));
+             emit (Paddil(rd, rb, Int64.mone));
+             emit (Pandl(rd, X rd, rf));
+             emit (Porl(rd, X rd, X X31))
+           end
+         else
+           if (ireg0_eq (X rd) rf) then
+             begin
+               emit (Paddil(X31, rb, Int64.mone));
+               emit (Pandl(X31, X X31, rf));
+               emit (Psubl(rd, X0, rb));
+               emit (Pandl(rd, X rd, rt));
+               emit (Porl(rd, X rd, X X31))
+             end  
+           else
+             begin
+               emit (Psubl(X31, X0, rb));
+               emit (Paddil(rd, rb, Int64.mone));
+               emit (Pandl(X31, X X31, rt));
+               emit (Pandl(rd, X rd, rf));
+               emit (Porl(rd, X rd, X X31))
+             end
   | Pallocframe (sz, ofs) ->
       let sg = get_current_function_sig() in
       emit (Pmv (X30, X2));
diff --git a/riscV/Asmgen.v b/riscV/Asmgen.v
index b431d63d..da6c0101 100644
--- a/riscV/Asmgen.v
+++ b/riscV/Asmgen.v
@@ -86,12 +86,6 @@ Definition make_immed64 (val: int64) :=
 Definition load_hilo32 (r: ireg) (hi lo: int) k :=
   if Int.eq lo Int.zero then Pluiw r hi :: k
   else Pluiw r hi :: Paddiw r r lo :: k.
-  
-Definition loadimm32 (r: ireg) (n: int) (k: code) :=
-  match make_immed32 n with
-  | Imm32_single imm => Paddiw r X0 imm :: k
-  | Imm32_pair hi lo => load_hilo32 r hi lo k
-  end.
 
 Definition opimm32 (op: ireg -> ireg0 -> ireg0 -> instruction)
                    (opimm: ireg -> ireg0 -> int -> instruction)
@@ -102,23 +96,11 @@ Definition opimm32 (op: ireg -> ireg0 -> ireg0 -> instruction)
   end.
 
 Definition addimm32 := opimm32 Paddw Paddiw.
-Definition andimm32 := opimm32 Pandw Pandiw.
-Definition orimm32  := opimm32 Porw  Poriw.
-Definition xorimm32 := opimm32 Pxorw Pxoriw.
-Definition sltimm32 := opimm32 Psltw Psltiw.
-Definition sltuimm32 := opimm32 Psltuw Psltiuw.
   
 Definition load_hilo64 (r: ireg) (hi lo: int64) k :=
   if Int64.eq lo Int64.zero then Pluil r hi :: k
   else Pluil r hi :: Paddil r r lo :: k.
 
-Definition loadimm64 (r: ireg) (n: int64) (k: code) :=
-  match make_immed64 n with
-  | Imm64_single imm => Paddil r X0 imm :: k
-  | Imm64_pair hi lo => load_hilo64 r hi lo k
-  | Imm64_large imm  => Ploadli r imm :: k
-  end.
-
 Definition opimm64 (op: ireg -> ireg0 -> ireg0 -> instruction)
                    (opimm: ireg -> ireg0 -> int64 -> instruction)
                    (rd rs: ireg) (n: int64) (k: code) :=
@@ -129,11 +111,6 @@ Definition opimm64 (op: ireg -> ireg0 -> ireg0 -> instruction)
   end.
 
 Definition addimm64 := opimm64 Paddl Paddil.
-Definition andimm64 := opimm64 Pandl Pandil.
-Definition orimm64  := opimm64 Porl  Poril.
-Definition xorimm64 := opimm64 Pxorl  Pxoril.
-Definition sltimm64 := opimm64 Psltl Psltil.
-Definition sltuimm64 := opimm64 Psltul Psltiul.
 
 Definition addptrofs (rd rs: ireg) (n: ptrofs) (k: code) :=
   if Ptrofs.eq_dec n Ptrofs.zero then
@@ -143,257 +120,95 @@ Definition addptrofs (rd rs: ireg) (n: ptrofs) (k: code) :=
     then addimm64 rd rs (Ptrofs.to_int64 n) k
     else addimm32 rd rs (Ptrofs.to_int n) k.
   
-(** Translation of conditional branches. *)
-
-Definition transl_cbranch_int32s (cmp: comparison) (r1 r2: ireg0) (lbl: label) :=
-  match cmp with
-  | Ceq => Pbeqw r1 r2 lbl
-  | Cne => Pbnew r1 r2 lbl
-  | Clt => Pbltw r1 r2 lbl
-  | Cle => Pbgew r2 r1 lbl
-  | Cgt => Pbltw r2 r1 lbl
-  | Cge => Pbgew r1 r2 lbl
-  end.
+(** Functions to select a special register according to the op "oreg" argument from RTL *)
 
-Definition transl_cbranch_int32u (cmp: comparison) (r1 r2: ireg0) (lbl: label) :=
-  match cmp with
-  | Ceq => Pbeqw  r1 r2 lbl
-  | Cne => Pbnew  r1 r2 lbl
-  | Clt => Pbltuw r1 r2 lbl
-  | Cle => Pbgeuw r2 r1 lbl
-  | Cgt => Pbltuw r2 r1 lbl
-  | Cge => Pbgeuw r1 r2 lbl
-  end.
+Definition apply_bin_oreg_ireg0 (optR: option oreg) (r1 r2: ireg0): (ireg0 * ireg0) :=
+  match optR with
+  | None => (r1, r2)
+  | Some X0_L => (X0, r1)
+  | Some X0_R => (r1, X0)
+  end. 
 
-Definition transl_cbranch_int64s (cmp: comparison) (r1 r2: ireg0) (lbl: label) :=
-  match cmp with
-  | Ceq => Pbeql r1 r2 lbl
-  | Cne => Pbnel r1 r2 lbl
-  | Clt => Pbltl r1 r2 lbl
-  | Cle => Pbgel r2 r1 lbl
-  | Cgt => Pbltl r2 r1 lbl
-  | Cge => Pbgel r1 r2 lbl
-  end.
-
-Definition transl_cbranch_int64u (cmp: comparison) (r1 r2: ireg0) (lbl: label) :=
-  match cmp with
-  | Ceq => Pbeql  r1 r2 lbl
-  | Cne => Pbnel  r1 r2 lbl
-  | Clt => Pbltul r1 r2 lbl
-  | Cle => Pbgeul r2 r1 lbl
-  | Cgt => Pbltul r2 r1 lbl
-  | Cge => Pbgeul r1 r2 lbl
-  end.
+Definition get_oreg (optR: option oreg) (r: ireg0) :=
+  match optR with
+  | Some X0_L | Some X0_R => X0
+  | _ => r
+  end. 
 
-Definition transl_cond_float (cmp: comparison) (rd: ireg) (fs1 fs2: freg) :=
-  match cmp with
-  | Ceq => (Pfeqd rd fs1 fs2, true)
-  | Cne => (Pfeqd rd fs1 fs2, false)
-  | Clt => (Pfltd rd fs1 fs2, true)
-  | Cle => (Pfled rd fs1 fs2, true)
-  | Cgt => (Pfltd rd fs2 fs1, true)
-  | Cge => (Pfled rd fs2 fs1, true)
-  end.
-  
-Definition transl_cond_single (cmp: comparison) (rd: ireg) (fs1 fs2: freg) :=
-  match cmp with
-  | Ceq => (Pfeqs rd fs1 fs2, true)
-  | Cne => (Pfeqs rd fs1 fs2, false)
-  | Clt => (Pflts rd fs1 fs2, true)
-  | Cle => (Pfles rd fs1 fs2, true)
-  | Cgt => (Pflts rd fs2 fs1, true)
-  | Cge => (Pfles rd fs2 fs1, true)
-  end.
-  
 Definition transl_cbranch
            (cond: condition) (args: list mreg) (lbl: label) (k: code) :=
   match cond, args with
-  | Ccomp c, a1 :: a2 :: nil =>
+  | CEbeqw optR, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (transl_cbranch_int32s c r1 r2 lbl :: k)
-  | Ccompu c, a1 :: a2 :: nil =>
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbeqw r1' r2' lbl :: k)
+  | CEbnew optR, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (transl_cbranch_int32u c r1 r2 lbl :: k)
-  | Ccompimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (if Int.eq n Int.zero then
-            transl_cbranch_int32s c r1 X0 lbl :: k
-          else
-            loadimm32 X31 n (transl_cbranch_int32s c r1 X31 lbl :: k))
-  | Ccompuimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (if Int.eq n Int.zero then
-            transl_cbranch_int32u c r1 X0 lbl :: k
-          else
-            loadimm32 X31 n (transl_cbranch_int32u c r1 X31 lbl :: k))
-  | Ccompl c, a1 :: a2 :: nil =>
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbnew r1' r2' lbl :: k)
+  | CEbequw optR, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (transl_cbranch_int64s c r1 r2 lbl :: k)
-  | Ccomplu c, a1 :: a2 :: nil =>
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbeqw r1' r2' lbl :: k)
+  | CEbneuw optR, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (transl_cbranch_int64u c r1 r2 lbl :: k)
-  | Ccomplimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (if Int64.eq n Int64.zero then
-            transl_cbranch_int64s c r1 X0 lbl :: k
-          else
-            loadimm64 X31 n (transl_cbranch_int64s c r1 X31 lbl :: k))
-  | Ccompluimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (if Int64.eq n Int64.zero then
-            transl_cbranch_int64u c r1 X0 lbl :: k
-          else
-            loadimm64 X31 n (transl_cbranch_int64u c r1 X31 lbl :: k))
-  | Ccompf c, f1 :: f2 :: nil =>
-      do r1 <- freg_of f1; do r2 <- freg_of f2;
-      let (insn, normal) := transl_cond_float c X31 r1 r2 in
-      OK (insn :: (if normal then Pbnew X31 X0 lbl else Pbeqw X31 X0 lbl) :: k)
-  | Cnotcompf c, f1 :: f2 :: nil =>
-      do r1 <- freg_of f1; do r2 <- freg_of f2;
-      let (insn, normal) := transl_cond_float c X31 r1 r2 in
-      OK (insn :: (if normal then Pbeqw X31 X0 lbl else Pbnew X31 X0 lbl) :: k)
-  | Ccompfs c, f1 :: f2 :: nil =>
-      do r1 <- freg_of f1; do r2 <- freg_of f2;
-      let (insn, normal) := transl_cond_single c X31 r1 r2 in
-      OK (insn :: (if normal then Pbnew X31 X0 lbl else Pbeqw X31 X0 lbl) :: k)
-  | Cnotcompfs c, f1 :: f2 :: nil =>
-      do r1 <- freg_of f1; do r2 <- freg_of f2;
-      let (insn, normal) := transl_cond_single c X31 r1 r2 in
-      OK (insn :: (if normal then Pbeqw X31 X0 lbl else Pbnew X31 X0 lbl) :: k)
-  | _, _ =>
-      Error(msg "Asmgen.transl_cond_branch")
-  end.
-
-(** Translation of a condition operator.  The generated code sets the
-  [rd] target register to 0 or 1 depending on the truth value of the
-  condition. *)
-
-Definition transl_cond_int32s (cmp: comparison) (rd: ireg) (r1 r2: ireg0) (k: code) :=
-  match cmp with
-  | Ceq => Pseqw rd r1 r2 :: k
-  | Cne => Psnew rd r1 r2 :: k
-  | Clt => Psltw rd r1 r2 :: k
-  | Cle => Psltw rd r2 r1 :: Pxoriw rd rd Int.one :: k
-  | Cgt => Psltw rd r2 r1 :: k
-  | Cge => Psltw rd r1 r2 :: Pxoriw rd rd Int.one :: k
-  end.
-
-Definition transl_cond_int32u (cmp: comparison) (rd: ireg) (r1 r2: ireg0) (k: code) :=
-  match cmp with
-  | Ceq => Pseqw rd r1 r2 :: k
-  | Cne => Psnew rd r1 r2 :: k
-  | Clt => Psltuw rd r1 r2 :: k
-  | Cle => Psltuw rd r2 r1 :: Pxoriw rd rd Int.one :: k
-  | Cgt => Psltuw rd r2 r1 :: k
-  | Cge => Psltuw rd r1 r2 :: Pxoriw rd rd Int.one :: k
-  end.
-
-Definition transl_cond_int64s (cmp: comparison) (rd: ireg) (r1 r2: ireg0) (k: code) :=
-  match cmp with
-  | Ceq => Pseql rd r1 r2 :: k
-  | Cne => Psnel rd r1 r2 :: k
-  | Clt => Psltl rd r1 r2 :: k
-  | Cle => Psltl rd r2 r1 :: Pxoriw rd rd Int.one :: k
-  | Cgt => Psltl rd r2 r1 :: k
-  | Cge => Psltl rd r1 r2 :: Pxoriw rd rd Int.one :: k
-  end.
-
-Definition transl_cond_int64u (cmp: comparison) (rd: ireg) (r1 r2: ireg0) (k: code) :=
-  match cmp with
-  | Ceq => Pseql rd r1 r2 :: k
-  | Cne => Psnel rd r1 r2 :: k
-  | Clt => Psltul rd r1 r2 :: k
-  | Cle => Psltul rd r2 r1 :: Pxoriw rd rd Int.one :: k
-  | Cgt => Psltul rd r2 r1 :: k
-  | Cge => Psltul rd r1 r2 :: Pxoriw rd rd Int.one :: k
-  end.
-
-Definition transl_condimm_int32s (cmp: comparison) (rd: ireg) (r1: ireg) (n: int) (k: code) :=
-  if Int.eq n Int.zero then transl_cond_int32s cmp rd r1 X0 k else
-  match cmp with
-  | Ceq | Cne => xorimm32 rd r1 n (transl_cond_int32s cmp rd rd X0 k)
-  | Clt => sltimm32 rd r1 n k
-  | Cle => if Int.eq n (Int.repr Int.max_signed)
-           then loadimm32 rd Int.one k
-           else sltimm32 rd r1 (Int.add n Int.one) k
-  | _   => loadimm32 X31 n (transl_cond_int32s cmp rd r1 X31 k)
-  end.
-
-Definition transl_condimm_int32u (cmp: comparison) (rd: ireg) (r1: ireg) (n: int) (k: code) :=
-  if Int.eq n Int.zero then transl_cond_int32u cmp rd r1 X0 k else
-  match cmp with
-  | Clt => sltuimm32 rd r1 n k
-  | _   => loadimm32 X31 n (transl_cond_int32u cmp rd r1 X31 k)
-  end.
-
-Definition transl_condimm_int64s (cmp: comparison) (rd: ireg) (r1: ireg) (n: int64) (k: code) :=
-  if Int64.eq n Int64.zero then transl_cond_int64s cmp rd r1 X0 k else
-  match cmp with
-  | Ceq | Cne => xorimm64 rd r1 n (transl_cond_int64s cmp rd rd X0 k)
-  | Clt => sltimm64 rd r1 n k
-  | Cle => if Int64.eq n (Int64.repr Int64.max_signed)
-           then loadimm32 rd Int.one k
-           else sltimm64 rd r1 (Int64.add n Int64.one) k
-  | _   => loadimm64 X31 n (transl_cond_int64s cmp rd r1 X31 k)
-  end.
-
-Definition transl_condimm_int64u (cmp: comparison) (rd: ireg) (r1: ireg) (n: int64) (k: code) :=
-  if Int64.eq n Int64.zero then transl_cond_int64u cmp rd r1 X0 k else
-  match cmp with
-  | Clt => sltuimm64 rd r1 n k
-  | _   => loadimm64 X31 n (transl_cond_int64u cmp rd r1 X31 k)
-  end.
-
-Definition transl_cond_op
-           (cond: condition) (rd: ireg) (args: list mreg) (k: code) :=
-  match cond, args with
-  | Ccomp c, a1 :: a2 :: nil =>
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbnew r1' r2' lbl :: k)
+  | CEbltw optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbltw r1' r2' lbl :: k)
+  | CEbltuw optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbltuw r1' r2' lbl :: k)
+  | CEbgew optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbgew r1' r2' lbl :: k)
+  | CEbgeuw optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbgeuw r1' r2' lbl :: k)
+  | CEbeql optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbeql r1' r2' lbl :: k)
+  | CEbnel optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbnel r1' r2' lbl :: k)
+  | CEbequl optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbeql r1' r2' lbl :: k)
+  | CEbneul optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbnel r1' r2' lbl :: k)
+  | CEbltl optR, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (transl_cond_int32s c rd r1 r2 k)
-  | Ccompu c, a1 :: a2 :: nil =>
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbltl r1' r2' lbl :: k)
+  | CEbltul optR, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (transl_cond_int32u c rd r1 r2 k)
-  | Ccompimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (transl_condimm_int32s c rd r1 n k)
-  | Ccompuimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (transl_condimm_int32u c rd r1 n k)
-  | Ccompl c, a1 :: a2 :: nil =>
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbltul r1' r2' lbl :: k)
+  | CEbgel optR, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (transl_cond_int64s c rd r1 r2 k)
-  | Ccomplu c, a1 :: a2 :: nil =>
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbgel r1' r2' lbl :: k)
+  | CEbgeul optR, a1 :: a2 :: nil =>
       do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (transl_cond_int64u c rd r1 r2 k)
-  | Ccomplimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (transl_condimm_int64s c rd r1 n k)
-  | Ccompluimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1;
-      OK (transl_condimm_int64u c rd r1 n k)
-  | Ccompf c, f1 :: f2 :: nil =>
-      do r1 <- freg_of f1; do r2 <- freg_of f2;
-      let (insn, normal) := transl_cond_float c rd r1 r2 in
-      OK (insn :: if normal then k else Pxoriw rd rd Int.one :: k)
-  | Cnotcompf c, f1 :: f2 :: nil =>
-      do r1 <- freg_of f1; do r2 <- freg_of f2;
-      let (insn, normal) := transl_cond_float c rd r1 r2 in
-      OK (insn :: if normal then Pxoriw rd rd Int.one :: k else k)
-  | Ccompfs c, f1 :: f2 :: nil =>
-      do r1 <- freg_of f1; do r2 <- freg_of f2;
-      let (insn, normal) := transl_cond_single c rd r1 r2 in
-      OK (insn :: if normal then k else Pxoriw rd rd Int.one :: k)
-  | Cnotcompfs c, f1 :: f2 :: nil =>
-      do r1 <- freg_of f1; do r2 <- freg_of f2;
-      let (insn, normal) := transl_cond_single c rd r1 r2 in
-      OK (insn :: if normal then Pxoriw rd rd Int.one :: k else k)
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbgeul r1' r2' lbl :: k)
   | _, _ =>
-      Error(msg "Asmgen.transl_cond_op")
+      Error(msg "Asmgen.transl_cond_branch")
   end.
 
 (** Translation of the arithmetic operation [r <- op(args)].
   The corresponding instructions are prepended to [k]. *)
-
+  
 Definition transl_op
               (op: operation) (args: list mreg) (res: mreg) (k: code) :=
   match op, args with
@@ -403,22 +218,6 @@ Definition transl_op
       | FR r, FR a => OK (Pfmv r a :: k)
       |  _  ,  _   => Error(msg "Asmgen.Omove")
       end
-  | Ointconst n, nil =>
-      do rd <- ireg_of res;
-      OK (loadimm32 rd n k)
-  | Olongconst n, nil =>
-      do rd <- ireg_of res;
-      OK (loadimm64 rd n k)
-  | Ofloatconst f, nil =>
-      do rd <- freg_of res;
-      OK (if Float.eq_dec f Float.zero
-          then Pfcvtdw rd X0 :: k
-          else Ploadfi rd f :: k)
-  | Osingleconst f, nil =>
-      do rd <- freg_of res;
-      OK (if Float32.eq_dec f Float32.zero
-          then Pfcvtsw rd X0 :: k
-          else Ploadsi rd f :: k)
   | Oaddrsymbol s ofs, nil =>
       do rd <- ireg_of res;
       OK (if Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)
@@ -428,18 +227,9 @@ Definition transl_op
       do rd <- ireg_of res;
       OK (addptrofs rd SP n k)
 
-  | Ocast8signed, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- ireg_of a1;
-      OK (Pslliw rd rs (Int.repr 24) :: Psraiw rd rd (Int.repr 24) :: k)
-  | Ocast16signed, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- ireg_of a1;
-      OK (Pslliw rd rs (Int.repr 16) :: Psraiw rd rd (Int.repr 16) :: k)
   | Oadd, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Paddw rd rs1 rs2 :: k)
-  | Oaddimm n, a1 :: nil =>
-      do rd  <- ireg_of res; do rs <- ireg_of a1;
-      OK (addimm32 rd rs n k)
   | Oneg, a1 :: nil =>
       do rd  <- ireg_of res; do rs <- ireg_of a1;
       OK (Psubw rd X0 rs :: k)
@@ -470,21 +260,12 @@ Definition transl_op
   | Oand, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Pandw rd rs1 rs2 :: k)
-  | Oandimm n, a1 :: nil =>
-      do rd  <- ireg_of res; do rs <- ireg_of a1;
-      OK (andimm32 rd rs n k)
   | Oor, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Porw rd rs1 rs2 :: k)
-  | Oorimm n, a1 :: nil =>
-      do rd  <- ireg_of res; do rs <- ireg_of a1;
-      OK (orimm32 rd rs n k)
   | Oxor, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Pxorw rd rs1 rs2 :: k)
-  | Oxorimm n, a1 :: nil =>
-      do rd  <- ireg_of res; do rs <- ireg_of a1;
-      OK (xorimm32 rd rs n k)
   | Oshl, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Psllw rd rs1 rs2 :: k)
@@ -503,19 +284,6 @@ Definition transl_op
   | Oshruimm n, a1 :: nil =>
       do rd <- ireg_of res; do rs <- ireg_of a1;
       OK (Psrliw rd rs n :: k)
-  | Oshrximm n, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- ireg_of a1;
-        OK (if Int.eq n Int.zero
-            then Pmv rd rs :: k
-            else if Int.eq n Int.one
-                 then Psrliw X31 rs (Int.repr 31) ::
-                      Paddw X31 rs X31 ::
-                      Psraiw rd X31 Int.one :: k
-                 else Psraiw X31 rs (Int.repr 31) ::
-                      Psrliw X31 X31 (Int.sub Int.iwordsize n) ::
-                      Paddw X31 rs X31 ::
-                      Psraiw rd X31 n :: k)  
-
   (* [Omakelong], [Ohighlong]  should not occur *)
   | Olowlong, a1 :: nil =>
       do rd <- ireg_of res; do rs <- ireg_of a1;
@@ -524,16 +292,9 @@ Definition transl_op
       do rd <- ireg_of res; do rs <- ireg_of a1;
       assertion (ireg_eq rd rs);
       OK (Pcvtw2l rd :: k)
-  | Ocast32unsigned, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- ireg_of a1;
-      assertion (ireg_eq rd rs);
-      OK (Pcvtw2l rd :: Psllil rd rd (Int.repr 32) :: Psrlil rd rd (Int.repr 32) :: k)
   | Oaddl, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Paddl rd rs1 rs2 :: k)
-  | Oaddlimm n, a1 :: nil =>
-      do rd  <- ireg_of res; do rs <- ireg_of a1;
-      OK (addimm64 rd rs n k)
   | Onegl, a1 :: nil =>
       do rd  <- ireg_of res; do rs <- ireg_of a1;
       OK (Psubl rd X0 rs :: k)
@@ -564,21 +325,12 @@ Definition transl_op
   | Oandl, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Pandl rd rs1 rs2 :: k)
-  | Oandlimm n, a1 :: nil =>
-      do rd  <- ireg_of res; do rs <- ireg_of a1;
-      OK (andimm64 rd rs n k)
   | Oorl, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Porl rd rs1 rs2 :: k)
-  | Oorlimm n, a1 :: nil =>
-      do rd  <- ireg_of res; do rs <- ireg_of a1;
-      OK (orimm64 rd rs n k)
   | Oxorl, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Pxorl rd rs1 rs2 :: k)
-  | Oxorlimm n, a1 :: nil =>
-      do rd  <- ireg_of res; do rs <- ireg_of a1;
-      OK (xorimm64 rd rs n k)
   | Oshll, a1 :: a2 :: nil =>
       do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
       OK (Pslll rd rs1 rs2 :: k)
@@ -597,19 +349,6 @@ Definition transl_op
   | Oshrluimm n, a1 :: nil =>
       do rd <- ireg_of res; do rs <- ireg_of a1;
       OK (Psrlil rd rs n :: k)
-  | Oshrxlimm n, a1 :: nil =>
-      do rd <- ireg_of res; do rs <- ireg_of a1;
-        OK (if Int.eq n Int.zero
-            then Pmv rd rs :: k
-            else if Int.eq n Int.one
-                 then Psrlil X31 rs (Int.repr 63) ::
-                      Paddl X31 rs X31 ::
-                      Psrail rd X31 Int.one :: k
-                 else Psrail X31 rs (Int.repr 63) ::
-                      Psrlil X31 X31 (Int.sub Int64.iwordsize' n) ::
-                      Paddl X31 rs X31 ::
-                      Psrail rd X31 n :: k)  
-
   | Onegf, a1 :: nil =>
       do rd <- freg_of res; do rs <- freg_of a1;
       OK (Pfnegd rd rs :: k)
@@ -705,10 +444,202 @@ Definition transl_op
       do rd <- freg_of res; do rs <- ireg_of a1;
       OK (Pfcvtslu rd rs :: k)
 
-  | Ocmp cmp, _ =>
+  (* Instructions expanded in RTL *)
+  | OEseqw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Pseqw rd rs1' rs2' :: k)
+  | OEsnew optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psnew rd rs1' rs2' :: k)
+  | OEsequw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Pseqw rd rs1' rs2' :: k)
+  | OEsneuw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psnew rd rs1' rs2' :: k)
+  | OEsltw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psltw rd rs1' rs2' :: k)
+  | OEsltuw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psltuw rd rs1' rs2' :: k)
+  | OEsltiw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Psltiw rd rs n :: k)
+  | OEsltiuw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Psltiuw rd rs n :: k)
+  | OExoriw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Pxoriw rd rs n :: k)
+  | OEluiw n, nil =>
+      do rd <- ireg_of res;
+      OK (Pluiw rd n :: k)
+  | OEaddiw optR n, nil =>
+      do rd <- ireg_of res;
+      let rs := get_oreg optR X0 in
+      OK (Paddiw rd rs n :: k)
+  | OEaddiw optR n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      let rs' := get_oreg optR rs in
+      OK (Paddiw rd rs' n :: k)
+  | OEandiw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Pandiw rd rs n :: k)
+  | OEoriw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Poriw rd rs n :: k)
+  | OEseql optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Pseql rd rs1' rs2' :: k)
+  | OEsnel optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psnel rd rs1' rs2' :: k)
+  | OEsequl optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Pseql rd rs1' rs2' :: k)
+  | OEsneul optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psnel rd rs1' rs2' :: k)
+  | OEsltl optR, a1 :: a2 :: nil =>
       do rd <- ireg_of res;
-      transl_cond_op cmp rd args k
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psltl rd rs1' rs2' :: k)
+  | OEsltul optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psltul rd rs1' rs2' :: k)
+  | OEsltil n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Psltil rd rs n :: k)
+  | OEsltiul n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Psltiul rd rs n :: k)
+  | OExoril n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Pxoril rd rs n :: k)
+  | OEluil n, nil =>
+      do rd <- ireg_of res;
+      OK (Pluil rd n :: k)
+  | OEaddil optR n, nil =>
+      do rd <- ireg_of res;
+      let rs := get_oreg optR X0 in
+      OK (Paddil rd rs n :: k)
+  | OEaddil optR n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      let rs' := get_oreg optR rs in
+      OK (Paddil rd rs' n :: k)
+  | OEandil n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Pandil rd rs n :: k)
+  | OEoril n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Poril rd rs n :: k)
+  | OEloadli n, nil =>
+      do rd <- ireg_of res;
+      OK (Ploadli rd n :: k)
+  | OEfeqd, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfeqd rd r1 r2 :: k)
+  | OEfltd, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfltd rd r1 r2 :: k)
+  | OEfled, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfled rd r1 r2 :: k)
+  | OEfeqs, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfeqs rd r1 r2 :: k)
+  | OEflts, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pflts rd r1 r2 :: k)
+  | OEfles, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfles rd r1 r2 :: k)
+  | OEmayundef _, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do r2 <- ireg_of a2;
+      if ireg_eq rd r2 then
+        OK (Pnop :: k)
+      else
+        OK (Pmv rd r2 :: k)
 
+  | Obits_of_single, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfmvxs rd rs :: k)
+  | Obits_of_float, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfmvxd rd rs :: k)
+  | Osingle_of_bits, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfmvsx rd rs :: k)
+  | Ofloat_of_bits, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfmvdx rd rs :: k)
+  | Oselectl, b::t::f::nil =>
+      do rd <- ireg_of res;
+      do rb <- ireg_of b;
+      do rt <- ireg_of t;
+      do rf <- ireg_of f;
+      OK (Pselectl rd rb rt rf :: k)
   | _, _ =>
       Error(msg "Asmgen.transl_op")
   end.
diff --git a/riscV/Asmgenproof.v b/riscV/Asmgenproof.v
index 8e9f022c..4af8352c 100644
--- a/riscV/Asmgenproof.v
+++ b/riscV/Asmgenproof.v
@@ -115,14 +115,6 @@ Qed.
 
 Section TRANSL_LABEL.
 
-Remark loadimm32_label:
-  forall r n k, tail_nolabel k (loadimm32 r n k).
-Proof.
-  intros; unfold loadimm32. destruct (make_immed32 n); TailNoLabel.
-  unfold load_hilo32. destruct (Int.eq lo Int.zero); TailNoLabel.
-Qed.
-Hint Resolve loadimm32_label: labels.
-
 Remark opimm32_label:
   forall op opimm r1 r2 n k,
   (forall r1 r2 r3, nolabel (op r1 r2 r3)) ->
@@ -134,14 +126,6 @@ Proof.
 Qed.
 Hint Resolve opimm32_label: labels.
 
-Remark loadimm64_label:
-  forall r n k, tail_nolabel k (loadimm64 r n k).
-Proof.
-  intros; unfold loadimm64. destruct (make_immed64 n); TailNoLabel.
-  unfold load_hilo64. destruct (Int64.eq lo Int64.zero); TailNoLabel.
-Qed.
-Hint Resolve loadimm64_label: labels.
-
 Remark opimm64_label:
   forall op opimm r1 r2 n k,
   (forall r1 r2 r3, nolabel (op r1 r2 r3)) ->
@@ -161,112 +145,12 @@ Proof.
 Qed.
 Hint Resolve addptrofs_label: labels.
 
-Remark transl_cond_float_nolabel:
-  forall c r1 r2 r3 insn normal,
-  transl_cond_float c r1 r2 r3 = (insn, normal) -> nolabel insn.
-Proof.
-  unfold transl_cond_float; intros. destruct c; inv H; exact I.
-Qed.
-
-Remark transl_cond_single_nolabel:
-  forall c r1 r2 r3 insn normal,
-  transl_cond_single c r1 r2 r3 = (insn, normal) -> nolabel insn.
-Proof.
-  unfold transl_cond_single; intros. destruct c; inv H; exact I.
-Qed.
-
 Remark transl_cbranch_label:
   forall cond args lbl k c,
   transl_cbranch cond args lbl k = OK c -> tail_nolabel k c.
 Proof.
   intros. unfold transl_cbranch in H; destruct cond; TailNoLabel.
-- destruct c0; simpl; TailNoLabel.
-- destruct c0; simpl; TailNoLabel.
-- destruct (Int.eq n Int.zero).
-  destruct c0; simpl; TailNoLabel.
-  apply tail_nolabel_trans with (transl_cbranch_int32s c0 x X31 lbl :: k).
-  auto with labels. destruct c0; simpl; TailNoLabel.
-- destruct (Int.eq n Int.zero).
-  destruct c0; simpl; TailNoLabel.
-  apply tail_nolabel_trans with (transl_cbranch_int32u c0 x X31 lbl :: k).
-  auto with labels. destruct c0; simpl; TailNoLabel.
-- destruct c0; simpl; TailNoLabel.
-- destruct c0; simpl; TailNoLabel.
-- destruct (Int64.eq n Int64.zero).
-  destruct c0; simpl; TailNoLabel.
-  apply tail_nolabel_trans with (transl_cbranch_int64s c0 x X31 lbl :: k).
-  auto with labels. destruct c0; simpl; TailNoLabel.
-- destruct (Int64.eq n Int64.zero).
-  destruct c0; simpl; TailNoLabel.
-  apply tail_nolabel_trans with (transl_cbranch_int64u c0 x X31 lbl :: k).
-  auto with labels. destruct c0; simpl; TailNoLabel.
-- destruct (transl_cond_float c0 X31 x x0) as [insn normal] eqn:F; inv EQ2.
-  apply tail_nolabel_cons. eapply transl_cond_float_nolabel; eauto. 
-  destruct normal; TailNoLabel.
-- destruct (transl_cond_float c0 X31 x x0) as [insn normal] eqn:F; inv EQ2.
-  apply tail_nolabel_cons. eapply transl_cond_float_nolabel; eauto. 
-  destruct normal; TailNoLabel.
-- destruct (transl_cond_single c0 X31 x x0) as [insn normal] eqn:F; inv EQ2.
-  apply tail_nolabel_cons. eapply transl_cond_single_nolabel; eauto. 
-  destruct normal; TailNoLabel.
-- destruct (transl_cond_single c0 X31 x x0) as [insn normal] eqn:F; inv EQ2.
-  apply tail_nolabel_cons. eapply transl_cond_single_nolabel; eauto. 
-  destruct normal; TailNoLabel.
-Qed.
-
-Remark transl_cond_op_label:
-  forall cond args r k c,
-  transl_cond_op cond r args k = OK c -> tail_nolabel k c.
-Proof.
-  intros. unfold transl_cond_op in H; destruct cond; TailNoLabel.
-- destruct c0; simpl; TailNoLabel.
-- destruct c0; simpl; TailNoLabel. 
-- unfold transl_condimm_int32s.
-  destruct (Int.eq n Int.zero).
-+ destruct c0; simpl; TailNoLabel.
-+ destruct c0; simpl.
-* eapply tail_nolabel_trans; [apply opimm32_label; intros; exact I | TailNoLabel].
-* eapply tail_nolabel_trans; [apply opimm32_label; intros; exact I | TailNoLabel].
-* apply opimm32_label; intros; exact I.
-* destruct (Int.eq n (Int.repr Int.max_signed)). apply loadimm32_label. apply opimm32_label; intros; exact I.
-* eapply tail_nolabel_trans. apply loadimm32_label. TailNoLabel.
-* eapply tail_nolabel_trans. apply loadimm32_label. TailNoLabel.
-- unfold transl_condimm_int32u.
-  destruct (Int.eq n Int.zero).
-+ destruct c0; simpl; TailNoLabel.
-+ destruct c0; simpl; 
-  try (eapply tail_nolabel_trans; [apply loadimm32_label | TailNoLabel]).
-  apply opimm32_label; intros; exact I.
-- destruct c0; simpl; TailNoLabel.
-- destruct c0; simpl; TailNoLabel. 
-- unfold transl_condimm_int64s.
-  destruct (Int64.eq n Int64.zero).
-+ destruct c0; simpl; TailNoLabel.
-+ destruct c0; simpl.
-* eapply tail_nolabel_trans; [apply opimm64_label; intros; exact I | TailNoLabel].
-* eapply tail_nolabel_trans; [apply opimm64_label; intros; exact I | TailNoLabel].
-* apply opimm64_label; intros; exact I.
-* destruct (Int64.eq n (Int64.repr Int64.max_signed)). apply loadimm32_label. apply opimm64_label; intros; exact I.
-* eapply tail_nolabel_trans. apply loadimm64_label. TailNoLabel.
-* eapply tail_nolabel_trans. apply loadimm64_label. TailNoLabel.
-- unfold transl_condimm_int64u.
-  destruct (Int64.eq n Int64.zero).
-+ destruct c0; simpl; TailNoLabel.
-+ destruct c0; simpl; 
-  try (eapply tail_nolabel_trans; [apply loadimm64_label | TailNoLabel]).
-  apply opimm64_label; intros; exact I.
-- destruct (transl_cond_float c0 r x x0) as [insn normal] eqn:F; inv EQ2.
-  apply tail_nolabel_cons. eapply transl_cond_float_nolabel; eauto. 
-  destruct normal; TailNoLabel.
-- destruct (transl_cond_float c0 r x x0) as [insn normal] eqn:F; inv EQ2.
-  apply tail_nolabel_cons. eapply transl_cond_float_nolabel; eauto. 
-  destruct normal; TailNoLabel.
-- destruct (transl_cond_single c0 r x x0) as [insn normal] eqn:F; inv EQ2.
-  apply tail_nolabel_cons. eapply transl_cond_single_nolabel; eauto. 
-  destruct normal; TailNoLabel.
-- destruct (transl_cond_single c0 r x x0) as [insn normal] eqn:F; inv EQ2.
-  apply tail_nolabel_cons. eapply transl_cond_single_nolabel; eauto. 
-  destruct normal; TailNoLabel.
+  all: destruct optR as [[]|]; simpl in *; TailNoLabel.
 Qed.
 
 Remark transl_op_label:
@@ -274,24 +158,12 @@ Remark transl_op_label:
   transl_op op args r k = OK c -> tail_nolabel k c.
 Proof.
 Opaque Int.eq.
-  unfold transl_op; intros; destruct op; TailNoLabel.
+  unfold transl_op; intros; destruct op; TailNoLabel;
+  try (destruct optR as [[]|]; simpl in *; TailNoLabel).
 - destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel.
-- destruct (Float.eq_dec n Float.zero); TailNoLabel.
-- destruct (Float32.eq_dec n Float32.zero); TailNoLabel.
 - destruct (Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)).
 + eapply tail_nolabel_trans; [|apply addptrofs_label]. TailNoLabel.
 + TailNoLabel. 
-- apply opimm32_label; intros; exact I.
-- apply opimm32_label; intros; exact I.
-- apply opimm32_label; intros; exact I.
-- apply opimm32_label; intros; exact I.
-- destruct (Int.eq n Int.zero); try destruct (Int.eq n Int.one); TailNoLabel.
-- apply opimm64_label; intros; exact I.
-- apply opimm64_label; intros; exact I.
-- apply opimm64_label; intros; exact I.
-- apply opimm64_label; intros; exact I.
-- destruct (Int.eq n Int.zero); try destruct (Int.eq n Int.one); TailNoLabel.
-- eapply transl_cond_op_label; eauto.
 Qed.
 
 Remark indexed_memory_access_label:
diff --git a/riscV/Asmgenproof1.v b/riscV/Asmgenproof1.v
index 8678a5dc..faa066b0 100644
--- a/riscV/Asmgenproof1.v
+++ b/riscV/Asmgenproof1.v
@@ -129,22 +129,6 @@ Proof.
   intros; Simpl.
 Qed.
 
-Lemma loadimm32_correct:
-  forall rd n k rs m,
-  exists rs',
-     exec_straight ge fn (loadimm32 rd n k) rs m k rs' m
-  /\ rs'#rd = Vint n
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold loadimm32; intros. generalize (make_immed32_sound n); intros E.
-  destruct (make_immed32 n). 
-- subst imm. econstructor; split. 
-  apply exec_straight_one. simpl; eauto. auto.
-  split. rewrite Int.add_zero_l; Simpl. 
-  intros; Simpl.
-- rewrite E. apply load_hilo32_correct.
-Qed.
-
 Lemma opimm32_correct:
   forall (op: ireg -> ireg0 -> ireg0 -> instruction)
          (opi: ireg -> ireg0 -> int -> instruction)
@@ -195,27 +179,6 @@ Proof.
   intros; Simpl.
 Qed.
 
-Lemma loadimm64_correct:
-  forall rd n k rs m,
-  exists rs',
-     exec_straight ge fn (loadimm64 rd n k) rs m k rs' m
-  /\ rs'#rd = Vlong n
-  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
-Proof.
-  unfold loadimm64; intros. generalize (make_immed64_sound n); intros E.
-  destruct (make_immed64 n). 
-- subst imm. econstructor; split. 
-  apply exec_straight_one. simpl; eauto. auto.
-  split. rewrite Int64.add_zero_l; Simpl. 
-  intros; Simpl.
-- exploit load_hilo64_correct; eauto. intros (rs' & A & B & C).
-  rewrite E. exists rs'; eauto.
-- subst imm. econstructor; split. 
-  apply exec_straight_one. simpl; eauto. auto.
-  split. Simpl. 
-  intros; Simpl.
-Qed.
-
 Lemma opimm64_correct:
   forall (op: ireg -> ireg0 -> ireg0 -> instruction)
          (opi: ireg -> ireg0 -> int64 -> instruction)
@@ -290,102 +253,6 @@ Proof.
   rewrite H0 in B. inv B. auto.
 Qed.
 
-(** Translation of conditional branches *)
-
-Lemma transl_cbranch_int32s_correct:
-  forall cmp r1 r2 lbl (rs: regset) m b,
-  Val.cmp_bool cmp rs##r1 rs##r2 = Some b ->
-  exec_instr ge fn (transl_cbranch_int32s cmp r1 r2 lbl) rs m =
-  eval_branch fn lbl rs m (Some b).
-Proof.
-  intros. destruct cmp; simpl; rewrite ? H.
-- destruct rs##r1; simpl in H; try discriminate. destruct rs##r2; inv H.
-  simpl; auto.
-- destruct rs##r1; simpl in H; try discriminate. destruct rs##r2; inv H.
-  simpl; auto.
-- auto.
-- rewrite <- Val.swap_cmp_bool. simpl. rewrite H; auto.
-- rewrite <- Val.swap_cmp_bool. simpl. rewrite H; auto.
-- auto.
-Qed.
-
-Lemma transl_cbranch_int32u_correct:
-  forall cmp r1 r2 lbl (rs: regset) m b,
-  Val.cmpu_bool (Mem.valid_pointer m) cmp rs##r1 rs##r2 = Some b ->
-  exec_instr ge fn (transl_cbranch_int32u cmp r1 r2 lbl) rs m =
-  eval_branch fn lbl rs m (Some b).
-Proof.
-  intros. destruct cmp; simpl; rewrite ? H; auto.
-- rewrite <- Val.swap_cmpu_bool. simpl. rewrite H; auto.
-- rewrite <- Val.swap_cmpu_bool. simpl. rewrite H; auto.
-Qed.
-
-Lemma transl_cbranch_int64s_correct:
-  forall cmp r1 r2 lbl (rs: regset) m b,
-  Val.cmpl_bool cmp rs###r1 rs###r2 = Some b ->
-  exec_instr ge fn (transl_cbranch_int64s cmp r1 r2 lbl) rs m =
-  eval_branch fn lbl rs m (Some b).
-Proof.
-  intros. destruct cmp; simpl; rewrite ? H.
-- destruct rs###r1; simpl in H; try discriminate. destruct rs###r2; inv H.
-  simpl; auto.
-- destruct rs###r1; simpl in H; try discriminate. destruct rs###r2; inv H.
-  simpl; auto.
-- auto.
-- rewrite <- Val.swap_cmpl_bool. simpl. rewrite H; auto.
-- rewrite <- Val.swap_cmpl_bool. simpl. rewrite H; auto.
-- auto.
-Qed.
-
-Lemma transl_cbranch_int64u_correct:
-  forall cmp r1 r2 lbl (rs: regset) m b,
-  Val.cmplu_bool (Mem.valid_pointer m) cmp rs###r1 rs###r2 = Some b ->
-  exec_instr ge fn (transl_cbranch_int64u cmp r1 r2 lbl) rs m =
-  eval_branch fn lbl rs m (Some b).
-Proof.
-  intros. destruct cmp; simpl; rewrite ? H; auto.
-- rewrite <- Val.swap_cmplu_bool. simpl. rewrite H; auto.
-- rewrite <- Val.swap_cmplu_bool. simpl. rewrite H; auto.
-Qed.
-
-Lemma transl_cond_float_correct:
-  forall (rs: regset) m cmp rd r1 r2 insn normal v,
-  transl_cond_float cmp rd r1 r2 = (insn, normal) ->
-  v = (if normal then Val.cmpf cmp rs#r1 rs#r2 else Val.notbool (Val.cmpf cmp rs#r1 rs#r2)) ->
-  exec_instr ge fn insn rs m = Next (nextinstr (rs#rd <- v)) m.
-Proof.
-  intros. destruct cmp; simpl in H; inv H; auto. 
-- rewrite Val.negate_cmpf_eq. auto.
-- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpf, Val.cmpf_bool.
-  rewrite <- Float.cmp_swap. auto.
-- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpf, Val.cmpf_bool.
-  rewrite <- Float.cmp_swap. auto.
-Qed.
-
-Lemma transl_cond_single_correct:
-  forall (rs: regset) m cmp rd r1 r2 insn normal v,
-  transl_cond_single cmp rd r1 r2 = (insn, normal) ->
-  v = (if normal then Val.cmpfs cmp rs#r1 rs#r2 else Val.notbool (Val.cmpfs cmp rs#r1 rs#r2)) ->
-  exec_instr ge fn insn rs m = Next (nextinstr (rs#rd <- v)) m.
-Proof.
-  intros. destruct cmp; simpl in H; inv H; auto. 
-- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpfs, Val.cmpfs_bool.
-  rewrite Float32.cmp_ne_eq. destruct (Float32.cmp Ceq f0 f); auto.
-- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpfs, Val.cmpfs_bool.
-  rewrite <- Float32.cmp_swap. auto.
-- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpfs, Val.cmpfs_bool.
-  rewrite <- Float32.cmp_swap. auto.
-Qed.
-
-Remark branch_on_X31:
-  forall normal lbl (rs: regset) m b,
-  rs#X31 = Val.of_bool (eqb normal b) -> 
-  exec_instr ge fn (if normal then Pbnew X31 X0 lbl else Pbeqw X31 X0 lbl) rs m =
-  eval_branch fn lbl rs m (Some b).
-Proof.
-  intros. destruct normal; simpl; rewrite H; simpl; destruct b; reflexivity. 
-Qed.
-
 Ltac ArgsInv :=
   repeat (match goal with
   | [ H: Error _ = OK _ |- _ ] => discriminate
@@ -417,82 +284,46 @@ Proof.
   { apply eval_condition_lessdef with (map ms args) m; auto. eapply preg_vals; eauto. }
   clear EVAL MEXT AG.
   destruct cond; simpl in TRANSL; ArgsInv.
-- exists rs, (transl_cbranch_int32s c0 x x0 lbl).
-  intuition auto. constructor. apply transl_cbranch_int32s_correct; auto.
-- exists rs, (transl_cbranch_int32u c0 x x0 lbl).
-  intuition auto. constructor. apply transl_cbranch_int32u_correct; auto.
-- predSpec Int.eq Int.eq_spec n Int.zero.
-+ subst n. exists rs, (transl_cbranch_int32s c0 x X0 lbl).
-  intuition auto. constructor. apply transl_cbranch_int32s_correct; auto.
-+ exploit (loadimm32_correct X31 n); eauto. intros (rs' & A & B & C).
-  exists rs', (transl_cbranch_int32s c0 x X31 lbl).
-  split. constructor; eexact A. split; auto.
-  apply transl_cbranch_int32s_correct; auto.
-  simpl; rewrite B, C; eauto with asmgen.
-- predSpec Int.eq Int.eq_spec n Int.zero.
-+ subst n. exists rs, (transl_cbranch_int32u c0 x X0 lbl).
-  intuition auto. constructor. apply transl_cbranch_int32u_correct; auto.
-+ exploit (loadimm32_correct X31 n); eauto. intros (rs' & A & B & C).
-  exists rs', (transl_cbranch_int32u c0 x X31 lbl).
-  split. constructor; eexact A. split; auto.
-  apply transl_cbranch_int32u_correct; auto.
-  simpl; rewrite B, C; eauto with asmgen.
-- exists rs, (transl_cbranch_int64s c0 x x0 lbl).
-  intuition auto. constructor. apply transl_cbranch_int64s_correct; auto.
-- exists rs, (transl_cbranch_int64u c0 x x0 lbl).
-  intuition auto. constructor. apply transl_cbranch_int64u_correct; auto.
-- predSpec Int64.eq Int64.eq_spec n Int64.zero.
-+ subst n. exists rs, (transl_cbranch_int64s c0 x X0 lbl).
-  intuition auto. constructor. apply transl_cbranch_int64s_correct; auto.
-+ exploit (loadimm64_correct X31 n); eauto. intros (rs' & A & B & C).
-  exists rs', (transl_cbranch_int64s c0 x X31 lbl).
-  split. constructor; eexact A. split; auto.
-  apply transl_cbranch_int64s_correct; auto.
-  simpl; rewrite B, C; eauto with asmgen.
-- predSpec Int64.eq Int64.eq_spec n Int64.zero.
-+ subst n. exists rs, (transl_cbranch_int64u c0 x X0 lbl).
-  intuition auto. constructor. apply transl_cbranch_int64u_correct; auto.
-+ exploit (loadimm64_correct X31 n); eauto. intros (rs' & A & B & C).
-  exists rs', (transl_cbranch_int64u c0 x X31 lbl).
-  split. constructor; eexact A. split; auto.
-  apply transl_cbranch_int64u_correct; auto.
-  simpl; rewrite B, C; eauto with asmgen.
-- destruct (transl_cond_float c0 X31 x x0) as [insn normal] eqn:TC; inv EQ2.
-  set (v := if normal then Val.cmpf c0 rs#x rs#x0 else Val.notbool (Val.cmpf c0 rs#x rs#x0)).
-  assert (V: v = Val.of_bool (eqb normal b)).
-  { unfold v, Val.cmpf. rewrite EVAL'. destruct normal, b; reflexivity. }
-  econstructor; econstructor.
-  split. constructor. apply exec_straight_one. eapply transl_cond_float_correct with (v := v); eauto. auto.
-  split. rewrite V; destruct normal, b; reflexivity.
-  intros; Simpl.
-- destruct (transl_cond_float c0 X31 x x0) as [insn normal] eqn:TC; inv EQ2.
-  assert (EVAL'': Val.cmpf_bool c0 (rs x) (rs x0) = Some (negb b)).
-  { destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; inv EVAL'; auto. }
-  set (v := if normal then Val.cmpf c0 rs#x rs#x0 else Val.notbool (Val.cmpf c0 rs#x rs#x0)).
-  assert (V: v = Val.of_bool (xorb normal b)).
-  { unfold v, Val.cmpf. rewrite EVAL''. destruct normal, b; reflexivity. }
-  econstructor; econstructor.
-  split. constructor. apply exec_straight_one. eapply transl_cond_float_correct with (v := v); eauto. auto.
-  split. rewrite V; destruct normal, b; reflexivity.
-  intros; Simpl.
-- destruct (transl_cond_single c0 X31 x x0) as [insn normal] eqn:TC; inv EQ2.
-  set (v := if normal then Val.cmpfs c0 rs#x rs#x0 else Val.notbool (Val.cmpfs c0 rs#x rs#x0)).
-  assert (V: v = Val.of_bool (eqb normal b)).
-  { unfold v, Val.cmpfs. rewrite EVAL'. destruct normal, b; reflexivity. }
-  econstructor; econstructor.
-  split. constructor. apply exec_straight_one. eapply transl_cond_single_correct with (v := v); eauto. auto.
-  split. rewrite V; destruct normal, b; reflexivity.
-  intros; Simpl.
-- destruct (transl_cond_single c0 X31 x x0) as [insn normal] eqn:TC; inv EQ2.
-  assert (EVAL'': Val.cmpfs_bool c0 (rs x) (rs x0) = Some (negb b)).
-  { destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; inv EVAL'; auto. }
-  set (v := if normal then Val.cmpfs c0 rs#x rs#x0 else Val.notbool (Val.cmpfs c0 rs#x rs#x0)).
-  assert (V: v = Val.of_bool (xorb normal b)).
-  { unfold v, Val.cmpfs. rewrite EVAL''. destruct normal, b; reflexivity. }
-  econstructor; econstructor.
-  split. constructor. apply exec_straight_one. eapply transl_cond_single_correct with (v := v); eauto. auto.
-  split. rewrite V; destruct normal, b; reflexivity.
-  intros; Simpl.
+  all:
+    destruct optR as [[]|];
+    unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+    unfold zero32, Op.zero32 in *;
+    unfold zero64, Op.zero64 in *; inv EQ2;
+    try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+    try (eexists; eexists; eauto; split; constructor;
+    simpl in *; try rewrite EVAL'; auto; fail).
+  all:
+    destruct (rs x) eqn:EQRS; simpl in *; try congruence;
+    eexists; eexists; eauto; split; constructor; auto;
+    simpl in *; rewrite EQRS.
+  - assert (HB: (Int.eq Int.zero i) = b) by congruence;
+    rewrite HB; destruct b; simpl; auto.
+  - assert (HB: (Int.eq i Int.zero) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  - destruct (rs x0); try congruence.
+    assert (HB: (Int.eq i i0) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  - assert (HB: negb (Int.eq Int.zero i) = b) by congruence.
+    rewrite HB; destruct b; simpl; auto.
+  - assert (HB: negb (Int.eq i Int.zero) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  - destruct (rs x0); try congruence.
+    assert (HB: negb (Int.eq i i0) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  - assert (HB: (Int64.eq Int64.zero i) = b) by congruence.
+    rewrite HB; destruct b; simpl; auto.
+  - assert (HB: (Int64.eq i Int64.zero) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  - destruct (rs x0); try congruence.
+    assert (HB: (Int64.eq i i0) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  - assert (HB: negb (Int64.eq Int64.zero i) = b) by congruence.
+    rewrite HB; destruct b; simpl; auto.
+  - assert (HB: negb (Int64.eq i Int64.zero) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  - destruct (rs x0); try congruence.
+    assert (HB: negb (Int64.eq i i0) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
 Qed.
 
 Lemma transl_cbranch_correct_true:
@@ -526,417 +357,6 @@ Proof.
   intros; Simpl. 
 Qed.
 
-(** Translation of condition operators *)
-
-Lemma transl_cond_int32s_correct:
-  forall cmp rd r1 r2 k rs m,
-  exists rs',
-     exec_straight ge fn (transl_cond_int32s cmp rd r1 r2 k) rs m k rs' m
-  /\ Val.lessdef (Val.cmp cmp rs##r1 rs##r2) rs'#rd
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
-Proof.
-  intros. destruct cmp; simpl. 
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl. destruct (rs##r1); auto. destruct (rs##r2); auto.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl. destruct (rs##r1); auto. destruct (rs##r2); auto.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. unfold Val.cmp. rewrite <- Val.swap_cmp_bool.
-  simpl. rewrite (Val.negate_cmp_bool Clt). 
-  destruct (Val.cmp_bool Clt rs##r2 rs##r1) as [[]|]; auto.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl. unfold Val.cmp. rewrite <- Val.swap_cmp_bool. auto.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. unfold Val.cmp. rewrite (Val.negate_cmp_bool Clt). 
-  destruct (Val.cmp_bool Clt rs##r1 rs##r2) as [[]|]; auto.
-Qed.
-
-Lemma transl_cond_int32u_correct:
-  forall cmp rd r1 r2 k rs m,
-  exists rs',
-     exec_straight ge fn (transl_cond_int32u cmp rd r1 r2 k) rs m k rs' m
-  /\ rs'#rd = Val.cmpu (Mem.valid_pointer m) cmp rs##r1 rs##r2
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
-Proof.
-  intros. destruct cmp; simpl. 
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. unfold Val.cmpu. rewrite <- Val.swap_cmpu_bool.
-  simpl. rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Cle). 
-  destruct (Val.cmpu_bool (Mem.valid_pointer m) Cle rs##r1 rs##r2) as [[]|]; auto.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl. unfold Val.cmpu. rewrite <- Val.swap_cmpu_bool. auto.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. unfold Val.cmpu. rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Clt). 
-  destruct (Val.cmpu_bool (Mem.valid_pointer m) Clt rs##r1 rs##r2) as [[]|]; auto.
-Qed.
-
-Lemma transl_cond_int64s_correct:
-  forall cmp rd r1 r2 k rs m,
-  exists rs',
-     exec_straight ge fn (transl_cond_int64s cmp rd r1 r2 k) rs m k rs' m
-  /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs###r1 rs###r2)) rs'#rd
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
-Proof.
-  intros. destruct cmp; simpl. 
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl. destruct (rs###r1); auto. destruct (rs###r2); auto.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl. destruct (rs###r1); auto. destruct (rs###r2); auto.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. unfold Val.cmpl. rewrite <- Val.swap_cmpl_bool.
-  simpl. rewrite (Val.negate_cmpl_bool Clt). 
-  destruct (Val.cmpl_bool Clt rs###r2 rs###r1) as [[]|]; auto.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl. unfold Val.cmpl. rewrite <- Val.swap_cmpl_bool. auto.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. unfold Val.cmpl. rewrite (Val.negate_cmpl_bool Clt). 
-  destruct (Val.cmpl_bool Clt rs###r1 rs###r2) as [[]|]; auto.
-Qed.
-
-Lemma transl_cond_int64u_correct:
-  forall cmp rd r1 r2 k rs m,
-  exists rs',
-     exec_straight ge fn (transl_cond_int64u cmp rd r1 r2 k) rs m k rs' m
-  /\ rs'#rd = Val.maketotal (Val.cmplu (Mem.valid_pointer m) cmp rs###r1 rs###r2)
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
-Proof.
-  intros. destruct cmp; simpl. 
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. unfold Val.cmplu. rewrite <- Val.swap_cmplu_bool.
-  simpl. rewrite (Val.negate_cmplu_bool (Mem.valid_pointer m) Cle). 
-  destruct (Val.cmplu_bool (Mem.valid_pointer m) Cle rs###r1 rs###r2) as [[]|]; auto.
-- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
-  split; intros; Simpl. unfold Val.cmplu. rewrite <- Val.swap_cmplu_bool. auto.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. unfold Val.cmplu. rewrite (Val.negate_cmplu_bool (Mem.valid_pointer m) Clt). 
-  destruct (Val.cmplu_bool (Mem.valid_pointer m) Clt rs###r1 rs###r2) as [[]|]; auto.
-Qed.
-
-Lemma transl_condimm_int32s_correct:
-  forall cmp rd r1 n k rs m,
-  r1 <> X31 ->
-  exists rs',
-     exec_straight ge fn (transl_condimm_int32s cmp rd r1 n k) rs m k rs' m
-  /\ Val.lessdef (Val.cmp cmp rs#r1 (Vint n)) rs'#rd
-  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
-Proof.
-  intros. unfold transl_condimm_int32s.
-  predSpec Int.eq Int.eq_spec n Int.zero.
-- subst n. exploit transl_cond_int32s_correct. intros (rs' & A & B & C).
-  exists rs'; eauto.
-- assert (DFL:
-    exists rs',
-      exec_straight ge fn (loadimm32 X31 n (transl_cond_int32s cmp rd r1 X31 k)) rs m k rs' m
-   /\ Val.lessdef (Val.cmp cmp rs#r1 (Vint n)) rs'#rd
-   /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r).
-  { exploit loadimm32_correct; eauto. intros (rs1 & A1 & B1 & C1).
-    exploit transl_cond_int32s_correct; eauto. intros (rs2 & A2 & B2 & C2).
-    exists rs2; split. 
-    eapply exec_straight_trans. eexact A1. eexact A2. 
-    split. simpl in B2. rewrite B1, C1 in B2 by auto with asmgen. auto.
-    intros; transitivity (rs1 r); auto. }
-  destruct cmp.
-+ unfold xorimm32. 
-  exploit (opimm32_correct Pxorw Pxoriw Val.xor); eauto. intros (rs1 & A1 & B1 & C1).
-  exploit transl_cond_int32s_correct; eauto. intros (rs2 & A2 & B2 & C2).
-  exists rs2; split. 
-  eapply exec_straight_trans. eexact A1. eexact A2. 
-  split. simpl in B2; rewrite B1 in B2; simpl in B2. destruct (rs#r1); auto.
-  unfold Val.cmp in B2; simpl in B2; rewrite Int.xor_is_zero in B2. exact B2.
-  intros; transitivity (rs1 r); auto.
-+ unfold xorimm32. 
-  exploit (opimm32_correct Pxorw Pxoriw Val.xor); eauto. intros (rs1 & A1 & B1 & C1).
-  exploit transl_cond_int32s_correct; eauto. intros (rs2 & A2 & B2 & C2).
-  exists rs2; split. 
-  eapply exec_straight_trans. eexact A1. eexact A2. 
-  split. simpl in B2; rewrite B1 in B2; simpl in B2. destruct (rs#r1); auto.
-  unfold Val.cmp in B2; simpl in B2; rewrite Int.xor_is_zero in B2. exact B2.
-  intros; transitivity (rs1 r); auto.
-+ exploit (opimm32_correct Psltw Psltiw (Val.cmp Clt)); eauto. intros (rs1 & A1 & B1 & C1).
-  exists rs1; split. eexact A1. split; auto. rewrite B1; auto.
-+ predSpec Int.eq Int.eq_spec n (Int.repr Int.max_signed).
-* subst n. exploit loadimm32_correct; eauto. intros (rs1 & A1 & B1 & C1).
-  exists rs1; split. eexact A1. split; auto. 
-  unfold Val.cmp; destruct (rs#r1); simpl; auto. rewrite B1. 
-  unfold Int.lt. rewrite zlt_false. auto. 
-  change (Int.signed (Int.repr Int.max_signed)) with Int.max_signed.
-  generalize (Int.signed_range i); omega.
-* exploit (opimm32_correct Psltw Psltiw (Val.cmp Clt)); eauto. intros (rs1 & A1 & B1 & C1).
-  exists rs1; split. eexact A1. split; auto. 
-  rewrite B1. unfold Val.cmp; simpl; destruct (rs#r1); simpl; auto.
-  unfold Int.lt. replace (Int.signed (Int.add n Int.one)) with (Int.signed n + 1).
-  destruct (zlt (Int.signed n) (Int.signed i)).
-  rewrite zlt_false by omega. auto.
-  rewrite zlt_true by omega. auto.
-  rewrite Int.add_signed. symmetry; apply Int.signed_repr. 
-  assert (Int.signed n <> Int.max_signed).
-  { red; intros E. elim H1. rewrite <- (Int.repr_signed n). rewrite E. auto. }
-  generalize (Int.signed_range n); omega.
-+ apply DFL.
-+ apply DFL.
-Qed.
-
-Lemma transl_condimm_int32u_correct:
-  forall cmp rd r1 n k rs m,
-  r1 <> X31 ->
-  exists rs',
-     exec_straight ge fn (transl_condimm_int32u cmp rd r1 n k) rs m k rs' m
-  /\ Val.lessdef (Val.cmpu (Mem.valid_pointer m) cmp rs#r1 (Vint n)) rs'#rd
-  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
-Proof.
-  intros. unfold transl_condimm_int32u.
-  predSpec Int.eq Int.eq_spec n Int.zero.
-- subst n. exploit transl_cond_int32u_correct. intros (rs' & A & B & C).
-  exists rs'; split. eexact A. split; auto. rewrite B; auto.
-- assert (DFL:
-    exists rs',
-      exec_straight ge fn (loadimm32 X31 n (transl_cond_int32u cmp rd r1 X31 k)) rs m k rs' m
-   /\ Val.lessdef (Val.cmpu (Mem.valid_pointer m) cmp rs#r1 (Vint n)) rs'#rd
-   /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r).
-  { exploit loadimm32_correct; eauto. intros (rs1 & A1 & B1 & C1).
-    exploit transl_cond_int32u_correct; eauto. intros (rs2 & A2 & B2 & C2).
-    exists rs2; split. 
-    eapply exec_straight_trans. eexact A1. eexact A2. 
-    split. simpl in B2. rewrite B1, C1 in B2 by auto with asmgen. rewrite B2; auto.
-    intros; transitivity (rs1 r); auto. }
-  destruct cmp.
-+ apply DFL.
-+ apply DFL.
-+ exploit (opimm32_correct Psltuw Psltiuw (Val.cmpu (Mem.valid_pointer m) Clt) m); eauto.
-  intros (rs1 & A1 & B1 & C1).
-  exists rs1; split. eexact A1. split; auto. rewrite B1; auto.
-+ apply DFL.
-+ apply DFL.
-+ apply DFL.
-Qed.
-
-Lemma transl_condimm_int64s_correct:
-  forall cmp rd r1 n k rs m,
-  r1 <> X31 ->
-  exists rs',
-     exec_straight ge fn (transl_condimm_int64s cmp rd r1 n k) rs m k rs' m
-  /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs#r1 (Vlong n))) rs'#rd
-  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
-Proof.
-  intros. unfold transl_condimm_int64s.
-  predSpec Int64.eq Int64.eq_spec n Int64.zero.
-- subst n. exploit transl_cond_int64s_correct. intros (rs' & A & B & C).
-  exists rs'; eauto.
-- assert (DFL:
-    exists rs',
-      exec_straight ge fn (loadimm64 X31 n (transl_cond_int64s cmp rd r1 X31 k)) rs m k rs' m
-   /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs#r1 (Vlong n))) rs'#rd
-   /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r).
-  { exploit loadimm64_correct; eauto. intros (rs1 & A1 & B1 & C1).
-    exploit transl_cond_int64s_correct; eauto. intros (rs2 & A2 & B2 & C2).
-    exists rs2; split. 
-    eapply exec_straight_trans. eexact A1. eexact A2. 
-    split. simpl in B2. rewrite B1, C1 in B2 by auto with asmgen. auto.
-    intros; transitivity (rs1 r); auto. }
-  destruct cmp.
-+ unfold xorimm64. 
-  exploit (opimm64_correct Pxorl Pxoril Val.xorl); eauto. intros (rs1 & A1 & B1 & C1).
-  exploit transl_cond_int64s_correct; eauto. intros (rs2 & A2 & B2 & C2).
-  exists rs2; split. 
-  eapply exec_straight_trans. eexact A1. eexact A2. 
-  split. simpl in B2; rewrite B1 in B2; simpl in B2. destruct (rs#r1); auto.
-  unfold Val.cmpl in B2; simpl in B2; rewrite Int64.xor_is_zero in B2. exact B2.
-  intros; transitivity (rs1 r); auto.
-+ unfold xorimm64. 
-  exploit (opimm64_correct Pxorl Pxoril Val.xorl); eauto. intros (rs1 & A1 & B1 & C1).
-  exploit transl_cond_int64s_correct; eauto. intros (rs2 & A2 & B2 & C2).
-  exists rs2; split. 
-  eapply exec_straight_trans. eexact A1. eexact A2. 
-  split. simpl in B2; rewrite B1 in B2; simpl in B2. destruct (rs#r1); auto.
-  unfold Val.cmpl in B2; simpl in B2; rewrite Int64.xor_is_zero in B2. exact B2.
-  intros; transitivity (rs1 r); auto.
-+ exploit (opimm64_correct Psltl Psltil (fun v1 v2 => Val.maketotal (Val.cmpl Clt v1 v2))); eauto. intros (rs1 & A1 & B1 & C1).
-  exists rs1; split. eexact A1. split; auto. rewrite B1; auto.
-+ predSpec Int64.eq Int64.eq_spec n (Int64.repr Int64.max_signed).
-* subst n. exploit loadimm32_correct; eauto. intros (rs1 & A1 & B1 & C1).
-  exists rs1; split. eexact A1. split; auto. 
-  unfold Val.cmpl; destruct (rs#r1); simpl; auto. rewrite B1. 
-  unfold Int64.lt. rewrite zlt_false. auto. 
-  change (Int64.signed (Int64.repr Int64.max_signed)) with Int64.max_signed.
-  generalize (Int64.signed_range i); omega.
-* exploit (opimm64_correct Psltl Psltil (fun v1 v2 => Val.maketotal (Val.cmpl Clt v1 v2))); eauto. intros (rs1 & A1 & B1 & C1).
-  exists rs1; split. eexact A1. split; auto. 
-  rewrite B1. unfold Val.cmpl; simpl; destruct (rs#r1); simpl; auto.
-  unfold Int64.lt. replace (Int64.signed (Int64.add n Int64.one)) with (Int64.signed n + 1).
-  destruct (zlt (Int64.signed n) (Int64.signed i)).
-  rewrite zlt_false by omega. auto.
-  rewrite zlt_true by omega. auto.
-  rewrite Int64.add_signed. symmetry; apply Int64.signed_repr. 
-  assert (Int64.signed n <> Int64.max_signed).
-  { red; intros E. elim H1. rewrite <- (Int64.repr_signed n). rewrite E. auto. }
-  generalize (Int64.signed_range n); omega.
-+ apply DFL.
-+ apply DFL.
-Qed.
-
-Lemma transl_condimm_int64u_correct:
-  forall cmp rd r1 n k rs m,
-  r1 <> X31 ->
-  exists rs',
-     exec_straight ge fn (transl_condimm_int64u cmp rd r1 n k) rs m k rs' m
-  /\ Val.lessdef (Val.maketotal (Val.cmplu (Mem.valid_pointer m) cmp rs#r1 (Vlong n))) rs'#rd
-  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
-Proof.
-  intros. unfold transl_condimm_int64u.
-  predSpec Int64.eq Int64.eq_spec n Int64.zero.
-- subst n. exploit transl_cond_int64u_correct. intros (rs' & A & B & C).
-  exists rs'; split. eexact A. split; auto. rewrite B; auto.
-- assert (DFL:
-    exists rs',
-      exec_straight ge fn (loadimm64 X31 n (transl_cond_int64u cmp rd r1 X31 k)) rs m k rs' m
-   /\ Val.lessdef (Val.maketotal (Val.cmplu (Mem.valid_pointer m) cmp rs#r1 (Vlong n))) rs'#rd
-   /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r).
-  { exploit loadimm64_correct; eauto. intros (rs1 & A1 & B1 & C1).
-    exploit transl_cond_int64u_correct; eauto. intros (rs2 & A2 & B2 & C2).
-    exists rs2; split. 
-    eapply exec_straight_trans. eexact A1. eexact A2. 
-    split. simpl in B2. rewrite B1, C1 in B2 by auto with asmgen. rewrite B2; auto.
-    intros; transitivity (rs1 r); auto. }
-  destruct cmp.
-+ apply DFL.
-+ apply DFL.
-+ exploit (opimm64_correct Psltul Psltiul (fun v1 v2 => Val.maketotal (Val.cmplu (Mem.valid_pointer m) Clt v1 v2)) m); eauto.
-  intros (rs1 & A1 & B1 & C1).
-  exists rs1; split. eexact A1. split; auto. rewrite B1; auto.
-+ apply DFL.
-+ apply DFL.
-+ apply DFL.
-Qed.
-
-Lemma transl_cond_op_correct:
-  forall cond rd args k c rs m,
-  transl_cond_op cond rd args k = OK c ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ Val.lessdef (Val.of_optbool (eval_condition cond (map rs (map preg_of args)) m)) rs'#rd
-  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
-Proof.
-  assert (MKTOT: forall ob, Val.of_optbool ob = Val.maketotal (option_map Val.of_bool ob)).
-  { destruct ob as [[]|]; reflexivity. }
-  intros until m; intros TR.
-  destruct cond; simpl in TR; ArgsInv.
-+ (* cmp *)
-  exploit transl_cond_int32s_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto.
-+ (* cmpu *)
-  exploit transl_cond_int32u_correct; eauto. intros (rs' & A & B & C).
-  exists rs'; repeat split; eauto. rewrite B; auto.
-+ (* cmpimm *)
-  apply transl_condimm_int32s_correct; eauto with asmgen.
-+ (* cmpuimm *)
-  apply transl_condimm_int32u_correct; eauto with asmgen.
-+ (* cmpl *)
-  exploit transl_cond_int64s_correct; eauto. intros (rs' & A & B & C).
-  exists rs'; repeat split; eauto. rewrite MKTOT; eauto.
-+ (* cmplu *)
-  exploit transl_cond_int64u_correct; eauto. intros (rs' & A & B & C).
-  exists rs'; repeat split; eauto. rewrite B, MKTOT; eauto.
-+ (* cmplimm *)
-  exploit transl_condimm_int64s_correct; eauto. instantiate (1 := x); eauto with asmgen. 
-  intros (rs' & A & B & C).
-  exists rs'; repeat split; eauto. rewrite MKTOT; eauto.
-+ (* cmpluimm *)
-  exploit transl_condimm_int64u_correct; eauto. instantiate (1 := x); eauto with asmgen. 
-  intros (rs' & A & B & C).
-  exists rs'; repeat split; eauto. rewrite MKTOT; eauto.
-+ (* cmpf *)
-  destruct (transl_cond_float c0 rd x x0) as [insn normal] eqn:TR.
-  fold (Val.cmpf c0 (rs x) (rs x0)).
-  set (v := Val.cmpf c0 (rs x) (rs x0)).
-  destruct normal; inv EQ2.
-* econstructor; split.
-  apply exec_straight_one. eapply transl_cond_float_correct with (v := v); eauto. auto.
-  split; intros; Simpl.
-* econstructor; split.
-  eapply exec_straight_two.
-  eapply transl_cond_float_correct with (v := Val.notbool v); eauto.
-  simpl; reflexivity.
-  auto. auto.
-  split; intros; Simpl. unfold v, Val.cmpf. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-+ (* notcmpf *)
-  destruct (transl_cond_float c0 rd x x0) as [insn normal] eqn:TR.
-  rewrite Val.notbool_negb_3. fold (Val.cmpf c0 (rs x) (rs x0)).
-  set (v := Val.cmpf c0 (rs x) (rs x0)).
-  destruct normal; inv EQ2.
-* econstructor; split.
-  eapply exec_straight_two.
-  eapply transl_cond_float_correct with (v := v); eauto.
-  simpl; reflexivity.
-  auto. auto.
-  split; intros; Simpl. unfold v, Val.cmpf. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-* econstructor; split.
-  apply exec_straight_one. eapply transl_cond_float_correct with (v := Val.notbool v); eauto. auto.
-  split; intros; Simpl.
-+ (* cmpfs *)
-  destruct (transl_cond_single c0 rd x x0) as [insn normal] eqn:TR.
-  fold (Val.cmpfs c0 (rs x) (rs x0)).
-  set (v := Val.cmpfs c0 (rs x) (rs x0)).
-  destruct normal; inv EQ2.
-* econstructor; split.
-  apply exec_straight_one. eapply transl_cond_single_correct with (v := v); eauto. auto.
-  split; intros; Simpl.
-* econstructor; split.
-  eapply exec_straight_two.
-  eapply transl_cond_single_correct with (v := Val.notbool v); eauto.
-  simpl; reflexivity.
-  auto. auto.
-  split; intros; Simpl. unfold v, Val.cmpfs. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-+ (* notcmpfs *)
-  destruct (transl_cond_single c0 rd x x0) as [insn normal] eqn:TR.
-  rewrite Val.notbool_negb_3. fold (Val.cmpfs c0 (rs x) (rs x0)).
-  set (v := Val.cmpfs c0 (rs x) (rs x0)).
-  destruct normal; inv EQ2.
-* econstructor; split.
-  eapply exec_straight_two.
-  eapply transl_cond_single_correct with (v := v); eauto.
-  simpl; reflexivity.
-  auto. auto.
-  split; intros; Simpl. unfold v, Val.cmpfs. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-* econstructor; split.
-  apply exec_straight_one. eapply transl_cond_single_correct with (v := Val.notbool v); eauto. auto.
-  split; intros; Simpl.
-Qed.
-
-(** Some arithmetic properties. *)
-
-Remark cast32unsigned_from_cast32signed:
-  forall i, Int64.repr (Int.unsigned i) = Int64.zero_ext 32 (Int64.repr (Int.signed i)).
-Proof.
-  intros. apply Int64.same_bits_eq; intros. 
-  rewrite Int64.bits_zero_ext, !Int64.testbit_repr by tauto.
-  rewrite Int.bits_signed by tauto. fold (Int.testbit i i0).
-  change Int.zwordsize with 32.
-  destruct (zlt i0 32). auto. apply Int.bits_above. auto.
-Qed.
-
 (* Translation of arithmetic operations *)
 
 Ltac SimplEval H :=
@@ -964,139 +384,72 @@ Proof.
 Opaque Int.eq.
   intros until c; intros TR EV.
   unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl.
-- (* move *)
-  destruct (preg_of res), (preg_of m0); inv TR; TranslOpSimpl.
-- (* intconst *)
-  exploit loadimm32_correct; eauto. intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen.
-- (* longconst *)
-  exploit loadimm64_correct; eauto. intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen.
-- (* floatconst *)
-  destruct (Float.eq_dec n Float.zero).
-+ subst n. econstructor; split. 
-  apply exec_straight_one. simpl; eauto. auto.
-  split; intros; Simpl. 
-+ econstructor; split. 
-  apply exec_straight_one. simpl; eauto. auto.
-  split; intros; Simpl. 
-- (* singleconst *)
-  destruct (Float32.eq_dec n Float32.zero).
-+ subst n. econstructor; split. 
-  apply exec_straight_one. simpl; eauto. auto.
-  split; intros; Simpl. 
-+ econstructor; split. 
-  apply exec_straight_one. simpl; eauto. auto.
-  split; intros; Simpl. 
-- (* addrsymbol *)
-  destruct (Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)).
-+ set (rs1 := nextinstr (rs#x <- (Genv.symbol_address ge id Ptrofs.zero))).
-  exploit (addptrofs_correct x x ofs k rs1 m); eauto with asmgen. 
-  intros (rs2 & A & B & C).
-  exists rs2; split. 
-  apply exec_straight_step with rs1 m; auto.
-  split. replace ofs with (Ptrofs.add Ptrofs.zero ofs) by (apply Ptrofs.add_zero_l). 
-  rewrite Genv.shift_symbol_address.
-  replace (rs1 x) with (Genv.symbol_address ge id Ptrofs.zero) in B by (unfold rs1; Simpl).
-  exact B.
-  intros. rewrite C by eauto with asmgen. unfold rs1; Simpl.  
-+ TranslOpSimpl.
-- (* stackoffset *)
-  exploit addptrofs_correct. instantiate (1 := X2); auto with asmgen. intros (rs' & A & B & C).
-  exists rs'; split; eauto. auto with asmgen.
-- (* cast8signed *)
-  econstructor; split.
-  eapply exec_straight_two. simpl;eauto. simpl;eauto. auto. auto.
-  split; intros; Simpl.
-  assert (A: Int.ltu (Int.repr 24) Int.iwordsize = true) by auto.
-  destruct (rs x0); auto; simpl. rewrite A; simpl. rewrite A. 
-  apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity.
-- (* cast16signed *)
-  econstructor; split.
-  eapply exec_straight_two. simpl;eauto. simpl;eauto. auto. auto.
+  (* move *)
+  { destruct (preg_of res), (preg_of m0); inv TR; TranslOpSimpl. }
+  (* addrsymbol *)
+  { destruct (Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)).
+    + set (rs1 := nextinstr (rs#x <- (Genv.symbol_address ge id Ptrofs.zero))).
+      exploit (addptrofs_correct x x ofs k rs1 m); eauto with asmgen. 
+      intros (rs2 & A & B & C).
+      exists rs2; split. 
+      apply exec_straight_step with rs1 m; auto.
+      split. replace ofs with (Ptrofs.add Ptrofs.zero ofs) by (apply Ptrofs.add_zero_l). 
+      rewrite Genv.shift_symbol_address.
+      replace (rs1 x) with (Genv.symbol_address ge id Ptrofs.zero) in B by (unfold rs1; Simpl).
+      exact B.
+      intros. rewrite C by eauto with asmgen. unfold rs1; Simpl.  
+    + TranslOpSimpl. }
+  (* stackoffset *)
+  { exploit addptrofs_correct. instantiate (1 := X2); auto with asmgen. intros (rs' & A & B & C).
+  exists rs'; split; eauto. auto with asmgen. }
+  (* Expanded instructions from RTL *)
+  9,10,19,20:
+    econstructor; split; try apply exec_straight_one; simpl; eauto;
+    split; intros; Simpl; try destruct (rs x0);
+    try rewrite Int64.add_commut;
+    try rewrite Int.add_commut; auto;
+    try rewrite Int64.and_commut;
+    try rewrite Int.and_commut; auto;
+    try rewrite Int64.or_commut;
+    try rewrite Int.or_commut; auto.
+  1-16:
+    destruct optR as [[]|]; try discriminate;
+    unfold apply_bin_oreg_ireg0, apply_bin_oreg in *; try inv EQ3; try inv EQ2;
+    try destruct (Int.eq _ _) eqn:A; try inv H0;
+    try destruct (Int64.eq _ _) eqn:A; try inv H1;
+    econstructor; split; try apply exec_straight_one; simpl; eauto;
+    split; intros; Simpl;
+    try apply Int.same_if_eq in A; subst;
+    try apply Int64.same_if_eq in A; subst;
+    unfold get_sp;
+    try destruct (rs x0); auto;
+    try destruct (rs x1); auto;
+    try destruct (rs X2); auto;
+    try destruct Archi.ptr64 eqn:B;
+    try fold (Val.add (Vint Int.zero) (get_sp (rs X2)));
+    try fold (Val.addl (Vlong Int64.zero) (get_sp (rs X2)));
+    try rewrite Val.add_commut; auto;
+    try rewrite Val.addl_commut; auto;
+    try rewrite Int.add_commut; auto;
+    try rewrite Int64.add_commut; auto;
+    replace (Ptrofs.of_int Int.zero) with (Ptrofs.zero) by auto;
+    replace (Ptrofs.of_int64 Int64.zero) with (Ptrofs.zero) by auto;
+    try rewrite Ptrofs.add_zero; auto.
+  (* mayundef *)
+  { destruct (ireg_eq x x0); inv EQ2;
+    econstructor; split;
+    try apply exec_straight_one; simpl; eauto;
+    split; unfold eval_may_undef;
+    destruct mu eqn:EQMU; simpl; intros; Simpl; auto.
+    all:
+      destruct (rs (preg_of m0)) eqn:EQM0; simpl; auto;
+      destruct (rs x0); simpl; auto; Simpl;
+      try destruct (Int.ltu _ _); simpl;
+      Simpl; auto. }
+  (* select *)
+  { econstructor; split. apply exec_straight_one. simpl; eauto. auto.
   split; intros; Simpl.
-  assert (A: Int.ltu (Int.repr 16) Int.iwordsize = true) by auto.
-  destruct (rs x0); auto; simpl. rewrite A; simpl. rewrite A. 
-  apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity.
-- (* addimm *)
-  exploit (opimm32_correct Paddw Paddiw Val.add); auto. instantiate (1 := x0); eauto with asmgen.
-  intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen. 
-- (* andimm *)
-  exploit (opimm32_correct Pandw Pandiw Val.and); auto. instantiate (1 := x0); eauto with asmgen.
-  intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen.
-- (* orimm *)
-  exploit (opimm32_correct Porw Poriw Val.or); auto. instantiate (1 := x0); eauto with asmgen.
-  intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen.
-- (* xorimm *)
-  exploit (opimm32_correct Pxorw Pxoriw Val.xor); auto. instantiate (1 := x0); eauto with asmgen.
-  intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen.
-- (* shrximm *)
-  clear H. exploit Val.shrx_shr_3; eauto. intros E; subst v; clear EV.
-  destruct (Int.eq n Int.zero).
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  split; intros; Simpl. 
-+ destruct (Int.eq n Int.one).
-  * econstructor; split.
-    eapply exec_straight_step. simpl; reflexivity. auto.
-    eapply exec_straight_step. simpl; reflexivity. auto.
-    apply exec_straight_one. simpl; reflexivity. auto.
-    split; intros; Simpl.
-  * change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
-    econstructor; split.
-    eapply exec_straight_step. simpl; reflexivity. auto. 
-    eapply exec_straight_step. simpl; reflexivity. auto. 
-    eapply exec_straight_step. simpl; reflexivity. auto. 
-    apply exec_straight_one. simpl; reflexivity. auto. 
-    split; intros; Simpl.
-- (* longofintu *)
-  econstructor; split.
-  eapply exec_straight_three. simpl; eauto. simpl; eauto. simpl; eauto. auto. auto. auto.
-  split; intros; Simpl. destruct (rs x0); auto. simpl. 
-  assert (A: Int.ltu (Int.repr 32) Int64.iwordsize' = true) by auto.
-  rewrite A; simpl. rewrite A. apply Val.lessdef_same. f_equal.
-  rewrite cast32unsigned_from_cast32signed. apply Int64.zero_ext_shru_shl. compute; auto.
-- (* addlimm *)
-  exploit (opimm64_correct Paddl Paddil Val.addl); auto. instantiate (1 := x0); eauto with asmgen.
-  intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen. 
-- (* andimm *)
-  exploit (opimm64_correct Pandl Pandil Val.andl); auto. instantiate (1 := x0); eauto with asmgen.
-  intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen.
-- (* orimm *)
-  exploit (opimm64_correct Porl Poril Val.orl); auto. instantiate (1 := x0); eauto with asmgen.
-  intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen.
-- (* xorimm *)
-  exploit (opimm64_correct Pxorl Pxoril Val.xorl); auto. instantiate (1 := x0); eauto with asmgen.
-  intros (rs' & A & B & C).
-  exists rs'; split; eauto. rewrite B; auto with asmgen.
-- (* shrxlimm *)
-  clear H. exploit Val.shrxl_shrl_3; eauto. intros E; subst v; clear EV.
-  destruct (Int.eq n Int.zero).
-+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  split; intros; Simpl. 
-+ destruct (Int.eq n Int.one).
-  * econstructor; split.
-    eapply exec_straight_step. simpl; reflexivity. auto.
-    eapply exec_straight_step. simpl; reflexivity. auto.
-    apply exec_straight_one. simpl; reflexivity. auto.
-    split; intros; Simpl.
-
-  * change (Int.repr 64) with Int64.iwordsize'. set (n' := Int.sub Int64.iwordsize' n).
-    econstructor; split.
-    eapply exec_straight_step. simpl; reflexivity. auto. 
-    eapply exec_straight_step. simpl; reflexivity. auto. 
-    eapply exec_straight_step. simpl; reflexivity. auto. 
-    apply exec_straight_one. simpl; reflexivity. auto. 
-    split; intros; Simpl.
-- (* cond *)
-  exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C).
-  exists rs'; split. eexact A. eauto with asmgen.
+  apply Val.lessdef_normalize. }
 Qed.
 
 (** Memory accesses *)
@@ -1404,6 +757,3 @@ Proof.
 Qed.
 
 End CONSTRUCTORS.
-
-
- 
diff --git a/riscV/Builtins1.v b/riscV/Builtins1.v
index 53c83d7e..47bacffa 100644
--- a/riscV/Builtins1.v
+++ b/riscV/Builtins1.v
@@ -18,16 +18,35 @@
 Require Import String Coqlib.
 Require Import AST Integers Floats Values.
 Require Import Builtins0.
+Require ExtValues.
 
-Inductive platform_builtin : Type := .
+Inductive platform_builtin : Type :=
+| BI_bits_of_float
+| BI_bits_of_double
+| BI_float_of_bits
+| BI_double_of_bits.
 
 Local Open Scope string_scope.
 
 Definition platform_builtin_table : list (string * platform_builtin) :=
-  nil.
+     ("__builtin_bits_of_float", BI_bits_of_float)
+  :: ("__builtin_bits_of_double", BI_bits_of_double)
+  :: ("__builtin_float_of_bits", BI_float_of_bits)
+  :: ("__builtin_double_of_bits", BI_double_of_bits)
+  :: nil.
 
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
-  match b with end.
+  match b with
+  | BI_bits_of_float => mksignature (Tsingle :: nil) Tint cc_default
+  | BI_bits_of_double => mksignature (Tfloat :: nil) Tlong cc_default
+  | BI_float_of_bits => mksignature (Tint :: nil) Tsingle cc_default
+  | BI_double_of_bits => mksignature (Tlong :: nil) Tfloat cc_default
+  end.
 
 Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
-  match b with end.
+  match b with
+  | BI_bits_of_float => mkbuiltin_n1t Tsingle Tint Float32.to_bits
+  | BI_bits_of_double => mkbuiltin_n1t Tfloat Tlong Float.to_bits
+  | BI_float_of_bits => mkbuiltin_n1t Tint Tsingle Float32.of_bits
+  | BI_double_of_bits => mkbuiltin_n1t Tlong Tfloat Float.of_bits
+  end.
diff --git a/riscV/CBuiltins.ml b/riscV/CBuiltins.ml
index a2087cb7..00b44fd5 100644
--- a/riscV/CBuiltins.ml
+++ b/riscV/CBuiltins.ml
@@ -46,6 +46,14 @@ let builtins = {
       (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
     "__builtin_fmin",
       (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
+    "__builtin_bits_of_double",
+      (TInt(IULong, []), [TFloat(FDouble, [])], false);
+    "__builtin_bits_of_float",
+      (TInt(IUInt, []), [TFloat(FFloat, [])], false);
+    "__builtin_double_of_bits",
+      (TFloat(FDouble, []), [TInt(IULong, [])], false);
+    "__builtin_float_of_bits",
+      (TFloat(FFloat, []), [TInt(IUInt, [])], false);
   ]
 }
 
diff --git a/riscV/CSE2deps.v b/riscV/CSE2deps.v
index b4b80e2f..c0deacf0 100644
--- a/riscV/CSE2deps.v
+++ b/riscV/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     (base :: nil), (base' :: nil) =>
     if peq base base'
     then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
-    else true  | _, _, _, _ => true
+    else true
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+  | _, _, _, _ => true
   end.
diff --git a/riscV/CSE2depsproof.v b/riscV/CSE2depsproof.v
index f283c8ac..cf9e62b1 100644
--- a/riscV/CSE2depsproof.v
+++ b/riscV/CSE2depsproof.v
@@ -123,7 +123,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+- (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -133,7 +133,15 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
-  }
+
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/riscV/ConstpropOpproof.v b/riscV/ConstpropOpproof.v
index 765aa035..26a50317 100644
--- a/riscV/ConstpropOpproof.v
+++ b/riscV/ConstpropOpproof.v
@@ -265,52 +265,84 @@ Qed.
 
 Lemma make_divimm_correct:
   forall n r1 r2 v,
-  Val.divs e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divs e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  destruct (Int.ltu i (Int.repr 31)) eqn:?.
-  exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
-  exists v; auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divs_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+      destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+      rewrite Val.divs_pow2 with (y:=v) (n:=n).
+      cbn.
+      all: auto.
+    }
+    exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_divuimm_correct:
   forall n r1 r2 v,
-  Val.divu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divuimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divuimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  econstructor; split. simpl; eauto.
-  rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divu_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+   cbn.
+   exists (Val.shru e # r1 (Vint i)); split; auto.
+   destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+   rewrite Val.divu_pow2 with (y:=v) (n:=n).
+   all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_moduimm_correct:
   forall n r1 r2 v,
-  Val.modu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.modu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_moduimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_moduimm.
   destruct (Int.is_power2 n) eqn:?.
-  exists v; split; auto. simpl. decEq. eapply Val.modu_pow2; eauto. congruence.
-  exists v; auto.
+  { destruct (Val.modu e # r1 e # r2) eqn:MODU; cbn in H.
+    { subst v0.
+      exists v; split; auto.
+      cbn. decEq. eapply Val.modu_pow2; eauto. congruence.
+    }
+    subst v.
+    eexists; split; auto.
+    cbn. reflexivity.
+  }
+  exists v; split; auto.
+  cbn.
+  congruence.
 Qed.
 
 Lemma make_andimm_correct:
@@ -444,48 +476,82 @@ Qed.
 
 Lemma make_divlimm_correct:
   forall n r1 r2 v,
-  Val.divls e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divls e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divlimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divlimm.
-  destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
-  rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
-  exists v; auto.
-  exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+      rewrite H0 in H.
+      destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+      {
+        subst v0.
+        rewrite Val.divls_pow2 with (y:=v) (n:=n).
+        cbn.
+        all: auto.
+      }
+      subst. auto.
+    }
+    cbn. subst. rewrite H0.
+    exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
 Qed.
 
 Lemma make_divluimm_correct:
   forall n r1 r2 v,
-  Val.divlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divluimm.
   destruct (Int64.is_power2' n) eqn:?.
+  {
   econstructor; split. simpl; eauto.
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl.
-  erewrite Int64.is_power2'_range by eauto.    
-  erewrite Int64.divu_pow2' by eauto.  auto. 
-  exists v; auto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  { 
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.is_power2'_range by eauto.    
+    erewrite Int64.divu_pow2' by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_modluimm_correct:
   forall n r1 r2 v,
-  Val.modlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.modlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_modluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_modluimm.
   destruct (Int64.is_power2 n) eqn:?.
-  exists v; split; auto. simpl. decEq.
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. 
-  simpl. erewrite Int64.modu_and by eauto. auto.
-  exists v; auto.
+  {
+  econstructor; split. simpl; eauto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  { 
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.modu_and by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_andlimm_correct:
@@ -633,14 +699,17 @@ Proof.
 - (* mul 2*)
   InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
 - (* divs *)
-  assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divimm_correct; auto.
+  assert (e#r2 = Vint n2). { clear H0. InvApproxRegs; SimplVM; auto. }
+                           apply make_divimm_correct; auto.
+  congruence.
 - (* divu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_divuimm_correct; auto.
+  congruence.
 - (* modu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_moduimm_correct; auto.
+  congruence.
 - (* and 1 *)
   rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
 - (* and 2 *)
@@ -680,12 +749,15 @@ Proof.
 - (* divl *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_divlimm_correct; auto.
+  congruence.
 - (* divlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_divluimm_correct; auto.
+  congruence.
 - (* modlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_modluimm_correct; auto.
+  congruence.
 - (* andl 1 *)
   rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
 - (* andl 2 *)
diff --git a/riscV/ExpansionOracle.ml b/riscV/ExpansionOracle.ml
new file mode 100644
index 00000000..4f67b9af
--- /dev/null
+++ b/riscV/ExpansionOracle.ml
@@ -0,0 +1,1066 @@
+(* *************************************************************)
+(*                                                             *)
+(*             The Compcert verified compiler                  *)
+(*                                                             *)
+(*           Léo Gourdin        UGA, VERIMAG                   *)
+(*                                                             *)
+(*  Copyright VERIMAG. All rights reserved.                    *)
+(*  This file is distributed under the terms of the INRIA      *)
+(*  Non-Commercial License Agreement.                          *)
+(*                                                             *)
+(* *************************************************************)
+
+open RTLpathLivegenaux
+open RTLpathCommon
+open Datatypes
+open Maps
+open RTL
+open Op
+open Asmgen
+open RTLpath
+open! Integers
+open Camlcoq
+open Option
+open AST
+open Printf
+
+(** Mini CSE (a dynamic numbering is applied during expansion. 
+    The CSE algorithm is inspired by the "static" one used in backend/CSE.v *)
+
+let exp_debug = false
+
+(** Managing virtual registers and node index *)
+
+let reg = ref 1
+
+let node = ref 1
+
+let p2i r = P.to_int r
+
+let r2p () = P.of_int !reg
+
+let n2p () = P.of_int !node
+
+let r2pi () =
+  reg := !reg + 1;
+  r2p ()
+
+let n2pi () =
+  node := !node + 1;
+  n2p ()
+
+(** Below are the types for rhs and equations *)
+
+type rhs = Sop of operation * int list | Smove
+
+type seq = Seq of int * rhs
+
+(** This is a mini abstraction to have a simpler representation during expansion
+    - Snop will be converted to Inop
+    - (Sr r) is inserted if the value was found in register r
+    - (Sexp dest rhs args succ) represent an instruction
+      (succesor may not be defined at this point, hence the use of type option)
+    - (Sfinalcond cond args succ1 succ2 info) represents a condition (which must
+      always be the last instruction in expansion list *)
+
+type expl =
+  | Snop of P.t
+  | Sr of P.t
+  | Sexp of P.t * rhs * P.t list * node option
+  | Sfinalcond of condition * P.t list * node * node * bool option
+
+(** Record used during the "dynamic" value numbering *)
+
+type numb = {
+  mutable nnext : int;  (** Next unusued value number *)
+  mutable seqs : seq list;  (** equations *)
+  mutable nreg : (P.t, int) Hashtbl.t;  (** mapping registers to values *)
+  mutable nval : (int, P.t list) Hashtbl.t;
+      (** reverse mapping values to registers containing it *)
+}
+
+let print_list_pos l =
+  if exp_debug then eprintf "[";
+  List.iter (fun i -> if exp_debug then eprintf "%d;" (p2i i)) l;
+  if exp_debug then eprintf "]\n"
+
+let empty_numbering () =
+  { nnext = 1; seqs = []; nreg = Hashtbl.create 100; nval = Hashtbl.create 100 }
+
+let rec get_nvalues vn = function
+  | [] -> []
+  | r :: rs ->
+      let v =
+        match Hashtbl.find_opt !vn.nreg r with
+        | Some v ->
+            if exp_debug then eprintf "getnval r=%d |-> v=%d\n" (p2i r) v;
+            v
+        | None ->
+            let n = !vn.nnext in
+            if exp_debug then eprintf "getnval r=%d |-> v=%d\n" (p2i r) n;
+            !vn.nnext <- !vn.nnext + 1;
+            Hashtbl.replace !vn.nreg r n;
+            Hashtbl.replace !vn.nval n [ r ];
+            n
+      in
+      let vs = get_nvalues vn rs in
+      v :: vs
+
+let get_nval_ornil vn v =
+  match Hashtbl.find_opt !vn.nval v with None -> [] | Some l -> l
+
+let forget_reg vn rd =
+  match Hashtbl.find_opt !vn.nreg rd with
+  | Some v ->
+      if exp_debug then eprintf "forget_reg: r=%d |-> v=%d\n" (p2i rd) v;
+      let old_regs = get_nval_ornil vn v in
+      if exp_debug then eprintf "forget_reg: old_regs are:\n";
+      print_list_pos old_regs;
+      Hashtbl.replace !vn.nval v
+        (List.filter (fun n -> not (P.eq n rd)) old_regs)
+  | None ->
+      if exp_debug then eprintf "forget_reg: no mapping for r=%d\n" (p2i rd)
+
+let update_reg vn rd v =
+  if exp_debug then eprintf "update_reg: update v=%d with r=%d\n" v (p2i rd);
+  forget_reg vn rd;
+  let old_regs = get_nval_ornil vn v in
+  Hashtbl.replace !vn.nval v (rd :: old_regs)
+
+let rec find_valnum_rhs rh = function
+  | [] -> None
+  | Seq (v, rh') :: tl -> if rh = rh' then Some v else find_valnum_rhs rh tl
+
+let set_unknown vn rd =
+  if exp_debug then eprintf "set_unknown: rd=%d\n" (p2i rd);
+  forget_reg vn rd;
+  Hashtbl.remove !vn.nreg rd
+
+let set_res_unknown vn res = match res with BR r -> set_unknown vn r | _ -> ()
+
+let addrhs vn rd rh =
+  match find_valnum_rhs rh !vn.seqs with
+  | Some vres ->
+      if exp_debug then eprintf "addrhs: Some v=%d\n" vres;
+      Hashtbl.replace !vn.nreg rd vres;
+      update_reg vn rd vres
+  | None ->
+      let n = !vn.nnext in
+      if exp_debug then eprintf "addrhs: None v=%d\n" n;
+      !vn.nnext <- !vn.nnext + 1;
+      !vn.seqs <- Seq (n, rh) :: !vn.seqs;
+      update_reg vn rd n;
+      Hashtbl.replace !vn.nreg rd n
+
+let addsop vn v op rd =
+  if exp_debug then eprintf "addsop\n";
+  if op = Omove then (
+    update_reg vn rd (List.hd v);
+    Hashtbl.replace !vn.nreg rd (List.hd v))
+  else addrhs vn rd (Sop (op, v))
+
+let rec kill_mem_operations = function
+  | (Seq (v, Sop (op, vl)) as eq) :: tl ->
+      if op_depends_on_memory op then kill_mem_operations tl
+      else eq :: kill_mem_operations tl
+  | [] -> []
+  | eq :: tl -> eq :: kill_mem_operations tl
+
+let reg_valnum vn v =
+  if exp_debug then eprintf "reg_valnum: trying to find a mapping for v=%d\n" v;
+  match Hashtbl.find !vn.nval v with
+  | [] -> None
+  | r :: rs ->
+      if exp_debug then eprintf "reg_valnum: found a mapping r=%d\n" (p2i r);
+      Some r
+
+let rec reg_valnums vn = function
+  | [] -> Some []
+  | v :: vs -> (
+      match (reg_valnum vn v, reg_valnums vn vs) with
+      | Some r, Some rs -> Some (r :: rs)
+      | _, _ -> None)
+
+let find_rhs vn rh =
+  match find_valnum_rhs rh !vn.seqs with
+  | None -> None
+  | Some vres -> reg_valnum vn vres
+
+(** Functions to perform the dynamic reduction during CSE *)
+
+let extract_arg l =
+  if List.length l > 0 then
+    match List.hd l with
+    | Sr r -> (r, List.tl l)
+    | Sexp (rd, _, _, _) -> (rd, l)
+    | _ -> failwith "extract_arg: final instruction arg can not be extracted"
+  else failwith "extract_arg: trying to extract on an empty list"
+
+let extract_final vn fl fdest succ =
+  if List.length fl > 0 then
+    match List.hd fl with
+    | Sr r ->
+        if not (P.eq r fdest) then (
+          let v = get_nvalues vn [ r ] in
+          addsop vn v Omove fdest;
+          Sexp (fdest, Smove, [ r ], Some succ) :: List.tl fl)
+        else Snop succ :: List.tl fl
+    | Sexp (rd, rh, args, None) ->
+        assert (rd = fdest);
+        Sexp (fdest, rh, args, Some succ) :: List.tl fl
+    | _ -> fl
+  else failwith "extract_final: trying to extract on an empty list"
+
+let addinst vn op args rd =
+  let v = get_nvalues vn args in
+  let rh = Sop (op, v) in
+  match find_rhs vn rh with
+  | Some r ->
+      if exp_debug then eprintf "addinst: rhs found with r=%d\n" (p2i r);
+      Sr r
+  | None ->
+      addsop vn v op rd;
+      Sexp (rd, rh, args, None)
+
+(** Expansion functions *)
+
+type immt =
+  | Addiw
+  | Addil
+  | Andiw
+  | Andil
+  | Oriw
+  | Oril
+  | Xoriw
+  | Xoril
+  | Sltiw
+  | Sltiuw
+  | Sltil
+  | Sltiul
+
+let load_hilo32 vn dest hi lo =
+  let op1 = OEluiw hi in
+  if Int.eq lo Int.zero then [ addinst vn op1 [] dest ]
+  else
+    let r = r2pi () in
+    let op2 = OEaddiw (None, lo) in
+    let i1 = addinst vn op1 [] r in
+    let r', l = extract_arg [ i1 ] in
+    let i2 = addinst vn op2 [ r' ] dest in
+    i2 :: l
+
+let load_hilo64 vn dest hi lo =
+  let op1 = OEluil hi in
+  if Int64.eq lo Int64.zero then [ addinst vn op1 [] dest ]
+  else
+    let r = r2pi () in
+    let op2 = OEaddil (None, lo) in
+    let i1 = addinst vn op1 [] r in
+    let r', l = extract_arg [ i1 ] in
+    let i2 = addinst vn op2 [ r' ] dest in
+    i2 :: l
+
+let loadimm32 vn dest n =
+  match make_immed32 n with
+  | Imm32_single imm ->
+      let op1 = OEaddiw (Some X0_R, imm) in
+      [ addinst vn op1 [] dest ]
+  | Imm32_pair (hi, lo) -> load_hilo32 vn dest hi lo
+
+let loadimm64 vn dest n =
+  match make_immed64 n with
+  | Imm64_single imm ->
+      let op1 = OEaddil (Some X0_R, imm) in
+      [ addinst vn op1 [] dest ]
+  | Imm64_pair (hi, lo) -> load_hilo64 vn dest hi lo
+  | Imm64_large imm ->
+      let op1 = OEloadli imm in
+      [ addinst vn op1 [] dest ]
+
+let get_opimm optR imm = function
+  | Addiw -> OEaddiw (optR, imm)
+  | Andiw -> OEandiw imm
+  | Oriw -> OEoriw imm
+  | Xoriw -> OExoriw imm
+  | Sltiw -> OEsltiw imm
+  | Sltiuw -> OEsltiuw imm
+  | Addil -> OEaddil (optR, imm)
+  | Andil -> OEandil imm
+  | Oril -> OEoril imm
+  | Xoril -> OExoril imm
+  | Sltil -> OEsltil imm
+  | Sltiul -> OEsltiul imm
+
+let opimm32 vn a1 dest n optR op opimm =
+  match make_immed32 n with
+  | Imm32_single imm -> [ addinst vn (get_opimm optR imm opimm) [ a1 ] dest ]
+  | Imm32_pair (hi, lo) ->
+      let r = r2pi () in
+      let l = load_hilo32 vn r hi lo in
+      let r', l' = extract_arg l in
+      let i = addinst vn op [ a1; r' ] dest in
+      i :: l'
+
+let opimm64 vn a1 dest n optR op opimm =
+  match make_immed64 n with
+  | Imm64_single imm -> [ addinst vn (get_opimm optR imm opimm) [ a1 ] dest ]
+  | Imm64_pair (hi, lo) ->
+      let r = r2pi () in
+      let l = load_hilo64 vn r hi lo in
+      let r', l' = extract_arg l in
+      let i = addinst vn op [ a1; r' ] dest in
+      i :: l'
+  | Imm64_large imm ->
+      let r = r2pi () in
+      let op1 = OEloadli imm in
+      let i1 = addinst vn op1 [] r in
+      let r', l' = extract_arg [ i1 ] in
+      let i2 = addinst vn op [ a1; r' ] dest in
+      i2 :: l'
+
+let addimm32 vn a1 dest n optR = opimm32 vn a1 dest n optR Oadd Addiw
+
+let andimm32 vn a1 dest n = opimm32 vn a1 dest n None Oand Andiw
+
+let orimm32 vn a1 dest n = opimm32 vn a1 dest n None Oor Oriw
+
+let xorimm32 vn a1 dest n = opimm32 vn a1 dest n None Oxor Xoriw
+
+let sltimm32 vn a1 dest n = opimm32 vn a1 dest n None (OEsltw None) Sltiw
+
+let sltuimm32 vn a1 dest n = opimm32 vn a1 dest n None (OEsltuw None) Sltiuw
+
+let addimm64 vn a1 dest n optR = opimm64 vn a1 dest n optR Oaddl Addil
+
+let andimm64 vn a1 dest n = opimm64 vn a1 dest n None Oandl Andil
+
+let orimm64 vn a1 dest n = opimm64 vn a1 dest n None Oorl Oril
+
+let xorimm64 vn a1 dest n = opimm64 vn a1 dest n None Oxorl Xoril
+
+let sltimm64 vn a1 dest n = opimm64 vn a1 dest n None (OEsltl None) Sltil
+
+let sltuimm64 vn a1 dest n = opimm64 vn a1 dest n None (OEsltul None) Sltiul
+
+let is_inv_cmp = function Cle | Cgt -> true | _ -> false
+
+let make_optR is_x0 is_inv =
+  if is_x0 then if is_inv then Some X0_L else Some X0_R else None
+
+let cbranch_int32s is_x0 cmp a1 a2 info succ1 succ2 k =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> Sfinalcond (CEbeqw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cne -> Sfinalcond (CEbnew optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Clt -> Sfinalcond (CEbltw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cle -> Sfinalcond (CEbgew optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cgt -> Sfinalcond (CEbltw optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cge -> Sfinalcond (CEbgew optR, [ a1; a2 ], succ1, succ2, info) :: k
+
+let cbranch_int32u is_x0 cmp a1 a2 info succ1 succ2 k =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> Sfinalcond (CEbequw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cne -> Sfinalcond (CEbneuw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Clt -> Sfinalcond (CEbltuw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cle -> Sfinalcond (CEbgeuw optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cgt -> Sfinalcond (CEbltuw optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cge -> Sfinalcond (CEbgeuw optR, [ a1; a2 ], succ1, succ2, info) :: k
+
+let cbranch_int64s is_x0 cmp a1 a2 info succ1 succ2 k =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> Sfinalcond (CEbeql optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cne -> Sfinalcond (CEbnel optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Clt -> Sfinalcond (CEbltl optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cle -> Sfinalcond (CEbgel optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cgt -> Sfinalcond (CEbltl optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cge -> Sfinalcond (CEbgel optR, [ a1; a2 ], succ1, succ2, info) :: k
+
+let cbranch_int64u is_x0 cmp a1 a2 info succ1 succ2 k =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> Sfinalcond (CEbequl optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cne -> Sfinalcond (CEbneul optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Clt -> Sfinalcond (CEbltul optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cle -> Sfinalcond (CEbgeul optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cgt -> Sfinalcond (CEbltul optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cge -> Sfinalcond (CEbgeul optR, [ a1; a2 ], succ1, succ2, info) :: k
+
+let cond_int32s vn is_x0 cmp a1 a2 dest =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> [ addinst vn (OEseqw optR) [ a1; a2 ] dest ]
+  | Cne -> [ addinst vn (OEsnew optR) [ a1; a2 ] dest ]
+  | Clt -> [ addinst vn (OEsltw optR) [ a1; a2 ] dest ]
+  | Cle ->
+      let r = r2pi () in
+      let op = OEsltw optR in
+      let i1 = addinst vn op [ a2; a1 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+  | Cgt -> [ addinst vn (OEsltw optR) [ a2; a1 ] dest ]
+  | Cge ->
+      let r = r2pi () in
+      let op = OEsltw optR in
+      let i1 = addinst vn op [ a1; a2 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let cond_int32u vn is_x0 cmp a1 a2 dest =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> [ addinst vn (OEsequw optR) [ a1; a2 ] dest ]
+  | Cne -> [ addinst vn (OEsneuw optR) [ a1; a2 ] dest ]
+  | Clt -> [ addinst vn (OEsltuw optR) [ a1; a2 ] dest ]
+  | Cle ->
+      let r = r2pi () in
+      let op = OEsltuw optR in
+      let i1 = addinst vn op [ a2; a1 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+  | Cgt -> [ addinst vn (OEsltuw optR) [ a2; a1 ] dest ]
+  | Cge ->
+      let r = r2pi () in
+      let op = OEsltuw optR in
+      let i1 = addinst vn op [ a1; a2 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let cond_int64s vn is_x0 cmp a1 a2 dest =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> [ addinst vn (OEseql optR) [ a1; a2 ] dest ]
+  | Cne -> [ addinst vn (OEsnel optR) [ a1; a2 ] dest ]
+  | Clt -> [ addinst vn (OEsltl optR) [ a1; a2 ] dest ]
+  | Cle ->
+      let r = r2pi () in
+      let op = OEsltl optR in
+      let i1 = addinst vn op [ a2; a1 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+  | Cgt -> [ addinst vn (OEsltl optR) [ a2; a1 ] dest ]
+  | Cge ->
+      let r = r2pi () in
+      let op = OEsltl optR in
+      let i1 = addinst vn op [ a1; a2 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let cond_int64u vn is_x0 cmp a1 a2 dest =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> [ addinst vn (OEsequl optR) [ a1; a2 ] dest ]
+  | Cne -> [ addinst vn (OEsneul optR) [ a1; a2 ] dest ]
+  | Clt -> [ addinst vn (OEsltul optR) [ a1; a2 ] dest ]
+  | Cle ->
+      let r = r2pi () in
+      let op = OEsltul optR in
+      let i1 = addinst vn op [ a2; a1 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+  | Cgt -> [ addinst vn (OEsltul optR) [ a2; a1 ] dest ]
+  | Cge ->
+      let r = r2pi () in
+      let op = OEsltul optR in
+      let i1 = addinst vn op [ a1; a2 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let is_normal_cmp = function Cne -> false | _ -> true
+
+let cond_float vn cmp f1 f2 dest =
+  match cmp with
+  | Ceq -> [ addinst vn OEfeqd [ f1; f2 ] dest ]
+  | Cne -> [ addinst vn OEfeqd [ f1; f2 ] dest ]
+  | Clt -> [ addinst vn OEfltd [ f1; f2 ] dest ]
+  | Cle -> [ addinst vn OEfled [ f1; f2 ] dest ]
+  | Cgt -> [ addinst vn OEfltd [ f2; f1 ] dest ]
+  | Cge -> [ addinst vn OEfled [ f2; f1 ] dest ]
+
+let cond_single vn cmp f1 f2 dest =
+  match cmp with
+  | Ceq -> [ addinst vn OEfeqs [ f1; f2 ] dest ]
+  | Cne -> [ addinst vn OEfeqs [ f1; f2 ] dest ]
+  | Clt -> [ addinst vn OEflts [ f1; f2 ] dest ]
+  | Cle -> [ addinst vn OEfles [ f1; f2 ] dest ]
+  | Cgt -> [ addinst vn OEflts [ f2; f1 ] dest ]
+  | Cge -> [ addinst vn OEfles [ f2; f1 ] dest ]
+
+let expanse_cbranchimm_int32s vn cmp a1 n info succ1 succ2 =
+  if Int.eq n Int.zero then cbranch_int32s true cmp a1 a1 info succ1 succ2 []
+  else
+    let r = r2pi () in
+    let l = loadimm32 vn r n in
+    let r', l' = extract_arg l in
+    cbranch_int32s false cmp a1 r' info succ1 succ2 l'
+
+let expanse_cbranchimm_int32u vn cmp a1 n info succ1 succ2 =
+  if Int.eq n Int.zero then cbranch_int32u true cmp a1 a1 info succ1 succ2 []
+  else
+    let r = r2pi () in
+    let l = loadimm32 vn r n in
+    let r', l' = extract_arg l in
+    cbranch_int32u false cmp a1 r' info succ1 succ2 l'
+
+let expanse_cbranchimm_int64s vn cmp a1 n info succ1 succ2 =
+  if Int64.eq n Int64.zero then
+    cbranch_int64s true cmp a1 a1 info succ1 succ2 []
+  else
+    let r = r2pi () in
+    let l = loadimm64 vn r n in
+    let r', l' = extract_arg l in
+    cbranch_int64s false cmp a1 r' info succ1 succ2 l'
+
+let expanse_cbranchimm_int64u vn cmp a1 n info succ1 succ2 =
+  if Int64.eq n Int64.zero then
+    cbranch_int64u true cmp a1 a1 info succ1 succ2 []
+  else
+    let r = r2pi () in
+    let l = loadimm64 vn r n in
+    let r', l' = extract_arg l in
+    cbranch_int64u false cmp a1 r' info succ1 succ2 l'
+
+let expanse_condimm_int32s vn cmp a1 n dest =
+  if Int.eq n Int.zero then cond_int32s vn true cmp a1 a1 dest
+  else
+    match cmp with
+    | Ceq | Cne ->
+        let r = r2pi () in
+        let l = xorimm32 vn a1 r n in
+        let r', l' = extract_arg l in
+        cond_int32s vn true cmp r' r' dest @ l'
+    | Clt -> sltimm32 vn a1 dest n
+    | Cle ->
+        if Int.eq n (Int.repr Int.max_signed) then
+          let l = loadimm32 vn dest Int.one in
+          let r, l' = extract_arg l in
+          addinst vn (OEmayundef MUint) [ a1; r ] dest :: l'
+        else sltimm32 vn a1 dest (Int.add n Int.one)
+    | _ ->
+        let r = r2pi () in
+        let l = loadimm32 vn r n in
+        let r', l' = extract_arg l in
+        cond_int32s vn false cmp a1 r' dest @ l'
+
+let expanse_condimm_int32u vn cmp a1 n dest =
+  if Int.eq n Int.zero then cond_int32u vn true cmp a1 a1 dest
+  else
+    match cmp with
+    | Clt -> sltuimm32 vn a1 dest n
+    | _ ->
+        let r = r2pi () in
+        let l = loadimm32 vn r n in
+        let r', l' = extract_arg l in
+        cond_int32u vn false cmp a1 r' dest @ l'
+
+let expanse_condimm_int64s vn cmp a1 n dest =
+  if Int64.eq n Int64.zero then cond_int64s vn true cmp a1 a1 dest
+  else
+    match cmp with
+    | Ceq | Cne ->
+        let r = r2pi () in
+        let l = xorimm64 vn a1 r n in
+        let r', l' = extract_arg l in
+        cond_int64s vn true cmp r' r' dest @ l'
+    | Clt -> sltimm64 vn a1 dest n
+    | Cle ->
+        if Int64.eq n (Int64.repr Int64.max_signed) then
+          let l = loadimm32 vn dest Int.one in
+          let r, l' = extract_arg l in
+          addinst vn (OEmayundef MUlong) [ a1; r ] dest :: l'
+        else sltimm64 vn a1 dest (Int64.add n Int64.one)
+    | _ ->
+        let r = r2pi () in
+        let l = loadimm64 vn r n in
+        let r', l' = extract_arg l in
+        cond_int64s vn false cmp a1 r' dest @ l'
+
+let expanse_condimm_int64u vn cmp a1 n dest =
+  if Int64.eq n Int64.zero then cond_int64u vn true cmp a1 a1 dest
+  else
+    match cmp with
+    | Clt -> sltuimm64 vn a1 dest n
+    | _ ->
+        let r = r2pi () in
+        let l = loadimm64 vn r n in
+        let r', l' = extract_arg l in
+        cond_int64u vn false cmp a1 r' dest @ l'
+
+let expanse_cond_fp vn cnot fn_cond cmp f1 f2 dest =
+  let normal = is_normal_cmp cmp in
+  let normal' = if cnot then not normal else normal in
+  let insn = fn_cond vn cmp f1 f2 dest in
+  if normal' then insn
+  else
+    let r', l = extract_arg insn in
+    addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let expanse_cbranch_fp vn cnot fn_cond cmp f1 f2 info succ1 succ2 =
+  let r = r2pi () in
+  let normal = is_normal_cmp cmp in
+  let normal' = if cnot then not normal else normal in
+  let insn = fn_cond vn cmp f1 f2 r in
+  let r', l = extract_arg insn in
+  if normal' then
+    Sfinalcond (CEbnew (Some X0_R), [ r'; r' ], succ1, succ2, info) :: l
+  else Sfinalcond (CEbeqw (Some X0_R), [ r'; r' ], succ1, succ2, info) :: l
+
+(** Form a list containing both sources and destination regs of an instruction *)
+
+let get_regindent = function Coq_inr _ -> [] | Coq_inl r -> [ r ]
+
+let get_regs_inst = function
+  | Inop _ -> []
+  | Iop (_, args, dest, _) -> dest :: args
+  | Iload (_, _, _, args, dest, _) -> dest :: args
+  | Istore (_, _, args, src, _) -> src :: args
+  | Icall (_, t, args, dest, _) -> dest :: (get_regindent t @ args)
+  | Itailcall (_, t, args) -> get_regindent t @ args
+  | Ibuiltin (_, args, dest, _) ->
+      AST.params_of_builtin_res dest @ AST.params_of_builtin_args args
+  | Icond (_, args, _, _, _) -> args
+  | Ijumptable (arg, _) -> [ arg ]
+  | Ireturn (Some r) -> [ r ]
+  | _ -> []
+
+(** Modify pathmap according to the size of the expansion list *)
+
+let write_pathmap initial esize pm' =
+  if exp_debug then
+    eprintf "write_pathmap: initial=%d, esize=%d\n" (p2i initial) esize;
+  let path = get_some @@ PTree.get initial !pm' in
+  let npsize = Camlcoq.Nat.of_int (esize + Camlcoq.Nat.to_int path.psize) in
+  let path' =
+    {
+      psize = npsize;
+      input_regs = path.input_regs;
+      pre_output_regs = path.pre_output_regs;
+      output_regs = path.output_regs;
+    }
+  in
+  pm' := PTree.set initial path' !pm'
+
+(** Write a single instruction in the tree and update order *)
+
+let write_inst target_node inst code' new_order =
+  code' := PTree.set (P.of_int target_node) inst !code';
+  new_order := P.of_int target_node :: !new_order
+
+(** Return olds args if the CSE numbering is empty *)
+
+let get_arguments vn vals args =
+  match reg_valnums vn vals with Some args' -> args' | None -> args
+
+(** Update the code tree with the expansion list *)
+
+let rec write_tree vn exp initial current code' new_order fturn =
+  if exp_debug then eprintf "wt: node is %d\n" !node;
+  let target_node, next_node =
+    if fturn then (P.to_int initial, current) else (current, current - 1)
+  in
+  match exp with
+  | Sr r :: _ ->
+      failwith "write_tree: there are still some symbolic values in the list"
+  | Sexp (rd, Sop (op, vals), args, None) :: k ->
+      let args = get_arguments vn vals args in
+      let inst = Iop (op, args, rd, P.of_int next_node) in
+      write_inst target_node inst code' new_order;
+      write_tree vn k initial next_node code' new_order false
+  | [ Snop succ ] ->
+      let inst = Inop succ in
+      write_inst target_node inst code' new_order
+  | [ Sexp (rd, Sop (op, vals), args, Some succ) ] ->
+      let args = get_arguments vn vals args in
+      let inst = Iop (op, args, rd, succ) in
+      write_inst target_node inst code' new_order
+  | [ Sexp (rd, Smove, args, Some succ) ] ->
+      let inst = Iop (Omove, args, rd, succ) in
+      write_inst target_node inst code' new_order
+  | [ Sfinalcond (cond, args, succ1, succ2, info) ] ->
+      let inst = Icond (cond, args, succ1, succ2, info) in
+      write_inst target_node inst code' new_order
+  | [] -> ()
+  | _ -> failwith "write_tree: invalid list"
+
+(** Main expansion function - TODO gourdinl to split? *)
+let expanse (sb : superblock) code pm =
+  if exp_debug then eprintf "#### New superblock for expansion oracle\n";
+  let new_order = ref [] in
+  let liveins = ref sb.liveins in
+  let exp = ref [] in
+  let was_branch = ref false in
+  let was_exp = ref false in
+  let code' = ref code in
+  let pm' = ref pm in
+  let vn = ref (empty_numbering ()) in
+  Array.iter
+    (fun n ->
+      was_branch := false;
+      was_exp := false;
+      let inst = get_some @@ PTree.get n code in
+      if exp_debug then eprintf "We are checking node %d\n" (p2i n);
+      (match inst with
+       (* Expansion of conditions - Ocmp *)
+       | Iop (Ocmp (Ccomp c), a1 :: a2 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccomp\n";
+           exp := cond_int32s vn false c a1 a2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompu c), a1 :: a2 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccompu\n";
+           exp := cond_int32u vn false c a1 a2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompimm (c, imm)), a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccompimm\n";
+           exp := expanse_condimm_int32s vn c a1 imm dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompuimm (c, imm)), a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccompuimm\n";
+           exp := expanse_condimm_int32u vn c a1 imm dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompl c), a1 :: a2 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccompl\n";
+           exp := cond_int64s vn false c a1 a2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccomplu c), a1 :: a2 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccomplu\n";
+           exp := cond_int64u vn false c a1 a2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccomplimm (c, imm)), a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccomplimm\n";
+           exp := expanse_condimm_int64s vn c a1 imm dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompluimm (c, imm)), a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccompluimm\n";
+           exp := expanse_condimm_int64u vn c a1 imm dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompf c), f1 :: f2 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccompf\n";
+           exp := expanse_cond_fp vn false cond_float c f1 f2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Cnotcompf c), f1 :: f2 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Cnotcompf\n";
+           exp := expanse_cond_fp vn true cond_float c f1 f2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompfs c), f1 :: f2 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ccompfs\n";
+           exp := expanse_cond_fp vn false cond_single c f1 f2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Cnotcompfs c), f1 :: f2 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Cnotcompfs\n";
+           exp := expanse_cond_fp vn true cond_single c f1 f2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       (* Expansion of branches - Ccomp *)
+       | Icond (Ccomp c, a1 :: a2 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccomp\n";
+           exp := cbranch_int32s false c a1 a2 info succ1 succ2 [];
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompu c, a1 :: a2 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccompu\n";
+           exp := cbranch_int32u false c a1 a2 info succ1 succ2 [];
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompimm (c, imm), a1 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccompimm\n";
+           exp := expanse_cbranchimm_int32s vn c a1 imm info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompuimm (c, imm), a1 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccompuimm\n";
+           exp := expanse_cbranchimm_int32u vn c a1 imm info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompl c, a1 :: a2 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccompl\n";
+           exp := cbranch_int64s false c a1 a2 info succ1 succ2 [];
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccomplu c, a1 :: a2 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccomplu\n";
+           exp := cbranch_int64u false c a1 a2 info succ1 succ2 [];
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccomplimm (c, imm), a1 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccomplimm\n";
+           exp := expanse_cbranchimm_int64s vn c a1 imm info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompluimm (c, imm), a1 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccompluimm\n";
+           exp := expanse_cbranchimm_int64u vn c a1 imm info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompf c, f1 :: f2 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccompf\n";
+           exp :=
+             expanse_cbranch_fp vn false cond_float c f1 f2 info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Cnotcompf c, f1 :: f2 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Cnotcompf\n";
+           exp := expanse_cbranch_fp vn true cond_float c f1 f2 info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompfs c, f1 :: f2 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Ccompfs\n";
+           exp :=
+             expanse_cbranch_fp vn false cond_single c f1 f2 info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Cnotcompfs c, f1 :: f2 :: nil, succ1, succ2, info) ->
+           if exp_debug then eprintf "Icond/Cnotcompfs\n";
+           exp :=
+             expanse_cbranch_fp vn true cond_single c f1 f2 info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | _ -> ());
+      (if not !was_exp then
+       match inst with
+       | Iop (Ofloatconst f, nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ofloatconst\n";
+           let r = r2pi () in
+           let l = loadimm64 vn r (Floats.Float.to_bits f) in
+           let r', l' = extract_arg l in
+           exp := addinst vn Ofloat_of_bits [ r' ] dest :: l';
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Osingleconst f, nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Osingleconst\n";
+           let r = r2pi () in
+           let l = loadimm32 vn r (Floats.Float32.to_bits f) in
+           let r', l' = extract_arg l in
+           exp := addinst vn Osingle_of_bits [ r' ] dest :: l';
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ointconst n, nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ointconst\n";
+           exp := loadimm32 vn dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Olongconst n, nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Olongconst\n";
+           exp := loadimm64 vn dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oaddimm n, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Oaddimm\n";
+           exp := addimm32 vn a1 dest n None;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oaddlimm n, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Oaddlimm\n";
+           exp := addimm64 vn a1 dest n None;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oandimm n, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Oandimm\n";
+           exp := andimm32 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oandlimm n, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Oandlimm\n";
+           exp := andimm64 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oorimm n, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Oorimm\n";
+           exp := orimm32 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oorlimm n, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Oorlimm\n";
+           exp := orimm64 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oxorimm n, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Oxorimm\n";
+           exp := xorimm32 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oxorlimm n, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Oxorlimm\n";
+           exp := xorimm64 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocast8signed, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/cast8signed\n";
+           let op = Oshlimm (Int.repr (Z.of_sint 24)) in
+           let r = r2pi () in
+           let i1 = addinst vn op [ a1 ] r in
+           let r', l = extract_arg [ i1 ] in
+           exp :=
+             addinst vn (Oshrimm (Int.repr (Z.of_sint 24))) [ r' ] dest :: l;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocast16signed, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/cast16signed\n";
+           let op = Oshlimm (Int.repr (Z.of_sint 16)) in
+           let r = r2pi () in
+           let i1 = addinst vn op [ a1 ] r in
+           let r', l = extract_arg [ i1 ] in
+           exp :=
+             addinst vn (Oshrimm (Int.repr (Z.of_sint 16))) [ r' ] dest :: l;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocast32unsigned, a1 :: nil, dest, succ) ->
+           if exp_debug then eprintf "Iop/Ocast32unsigned\n";
+           let r1 = r2pi () in
+           let r2 = r2pi () in
+           let op1 = Ocast32signed in
+           let i1 = addinst vn op1 [ a1 ] r1 in
+           let r1', l1 = extract_arg [ i1 ] in
+
+           let op2 = Oshllimm (Int.repr (Z.of_sint 32)) in
+           let i2 = addinst vn op2 [ r1' ] r2 in
+           let r2', l2 = extract_arg (i2 :: l1) in
+
+           let op3 = Oshrluimm (Int.repr (Z.of_sint 32)) in
+           exp := addinst vn op3 [ r2' ] dest :: l2;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oshrximm n, a1 :: nil, dest, succ) ->
+           if Int.eq n Int.zero then (
+             if exp_debug then eprintf "Iop/Oshrximm1\n";
+             exp := [ addinst vn (OEmayundef (MUshrx n)) [ a1; a1 ] dest ])
+           else if Int.eq n Int.one then (
+             if exp_debug then eprintf "Iop/Oshrximm2\n";
+             let r1 = r2pi () in
+             let r2 = r2pi () in
+             let op1 = Oshruimm (Int.repr (Z.of_sint 31)) in
+             let i1 = addinst vn op1 [ a1 ] r1 in
+             let r1', l1 = extract_arg [ i1 ] in
+
+             let op2 = Oadd in
+             let i2 = addinst vn op2 [ a1; r1' ] r2 in
+             let r2', l2 = extract_arg (i2 :: l1) in
+
+             let op3 = Oshrimm Int.one in
+             let i3 = addinst vn op3 [ r2' ] dest in
+             let r3, l3 = extract_arg (i3 :: l2) in
+             exp := addinst vn (OEmayundef (MUshrx n)) [ r3; r3 ] dest :: l3)
+           else (
+             if exp_debug then eprintf "Iop/Oshrximm3\n";
+             let r1 = r2pi () in
+             let r2 = r2pi () in
+             let r3 = r2pi () in
+             let op1 = Oshrimm (Int.repr (Z.of_sint 31)) in
+             let i1 = addinst vn op1 [ a1 ] r1 in
+             let r1', l1 = extract_arg [ i1 ] in
+
+             let op2 = Oshruimm (Int.sub Int.iwordsize n) in
+             let i2 = addinst vn op2 [ r1' ] r2 in
+             let r2', l2 = extract_arg (i2 :: l1) in
+
+             let op3 = Oadd in
+             let i3 = addinst vn op3 [ a1; r2' ] r3 in
+             let r3', l3 = extract_arg (i3 :: l2) in
+
+             let op4 = Oshrimm n in
+             let i4 = addinst vn op4 [ r3' ] dest in
+             let r4, l4 = extract_arg (i4 :: l3) in
+             exp := addinst vn (OEmayundef (MUshrx n)) [ r4; r4 ] dest :: l4);
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oshrxlimm n, a1 :: nil, dest, succ) ->
+           if Int.eq n Int.zero then (
+             if exp_debug then eprintf "Iop/Oshrxlimm1\n";
+             exp := [ addinst vn (OEmayundef (MUshrxl n)) [ a1; a1 ] dest ])
+           else if Int.eq n Int.one then (
+             if exp_debug then eprintf "Iop/Oshrxlimm2\n";
+             let r1 = r2pi () in
+             let r2 = r2pi () in
+             let op1 = Oshrluimm (Int.repr (Z.of_sint 63)) in
+             let i1 = addinst vn op1 [ a1 ] r1 in
+             let r1', l1 = extract_arg [ i1 ] in
+
+             let op2 = Oaddl in
+             let i2 = addinst vn op2 [ a1; r1' ] r2 in
+             let r2', l2 = extract_arg (i2 :: l1) in
+
+             let op3 = Oshrlimm Int.one in
+             let i3 = addinst vn op3 [ r2' ] dest in
+             let r3, l3 = extract_arg (i3 :: l2) in
+             exp := addinst vn (OEmayundef (MUshrxl n)) [ r3; r3 ] dest :: l3)
+           else (
+             if exp_debug then eprintf "Iop/Oshrxlimm3\n";
+             let r1 = r2pi () in
+             let r2 = r2pi () in
+             let r3 = r2pi () in
+             let op1 = Oshrlimm (Int.repr (Z.of_sint 63)) in
+             let i1 = addinst vn op1 [ a1 ] r1 in
+             let r1', l1 = extract_arg [ i1 ] in
+
+             let op2 = Oshrluimm (Int.sub Int64.iwordsize' n) in
+             let i2 = addinst vn op2 [ r1' ] r2 in
+             let r2', l2 = extract_arg (i2 :: l1) in
+
+             let op3 = Oaddl in
+             let i3 = addinst vn op3 [ a1; r2' ] r3 in
+             let r3', l3 = extract_arg (i3 :: l2) in
+
+             let op4 = Oshrlimm n in
+             let i4 = addinst vn op4 [ r3' ] dest in
+             let r4, l4 = extract_arg (i4 :: l3) in
+             exp := addinst vn (OEmayundef (MUshrxl n)) [ r4; r4 ] dest :: l4);
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | _ -> ());
+      (* Update the CSE numbering *)
+      (if not !was_exp then
+       match inst with
+       | Iop (op, args, dest, succ) ->
+           let v = get_nvalues vn args in
+           addsop vn v op dest
+       | Iload (_, _, _, _, dst, _) -> set_unknown vn dst
+       | Istore (chk, addr, args, src, s) ->
+           !vn.seqs <- kill_mem_operations !vn.seqs
+       | Icall (_, _, _, _, _) | Itailcall (_, _, _) | Ibuiltin (_, _, _, _) ->
+           vn := empty_numbering ()
+       | _ -> ());
+      (* Update code, liveins, pathmap, and order of the superblock for one expansion *)
+      if !was_exp then (
+        (if !was_branch && List.length !exp > 1 then
+         let lives = PTree.get n !liveins in
+         match lives with
+         | Some lives ->
+             let new_branch_pc = P.of_int (!node + 1) in
+             liveins := PTree.set new_branch_pc lives !liveins;
+             liveins := PTree.remove n !liveins
+         | _ -> ());
+        node := !node + List.length !exp - 1;
+        write_pathmap sb.instructions.(0) (List.length !exp - 1) pm';
+        write_tree vn (List.rev !exp) n !node code' new_order true)
+      else new_order := n :: !new_order)
+    sb.instructions;
+  sb.instructions <- Array.of_list (List.rev !new_order);
+  sb.liveins <- !liveins;
+  (!code', !pm')
+
+(** Compute the last used node and reg indexs *)
+
+let rec find_last_node_reg = function
+  | [] -> ()
+  | (pc, i) :: k ->
+      let rec traverse_list var = function
+        | [] -> ()
+        | e :: t ->
+            let e' = p2i e in
+            if e' > !var then var := e';
+            traverse_list var t
+      in
+      traverse_list node [ pc ];
+      traverse_list reg (get_regs_inst i);
+      find_last_node_reg k
diff --git a/riscV/ExtValues.v b/riscV/ExtValues.v
new file mode 100644
index 00000000..edf359ef
--- /dev/null
+++ b/riscV/ExtValues.v
@@ -0,0 +1,123 @@
+Require Import Coqlib.
+Require Import Integers.
+Require Import Values.
+Require Import Floats.
+Require Import Memory.
+Require Import Lia.
+    
+Definition bits_of_float x :=
+  match x with
+  | Vfloat f => Vlong (Float.to_bits f)
+  | _ => Vundef
+  end.
+
+Definition bits_of_single x :=
+  match x with
+  | Vsingle f => Vint (Float32.to_bits f)
+  | _ => Vundef
+  end.
+
+Definition float_of_bits x :=
+  match x with
+  | Vlong f => Vfloat (Float.of_bits f)
+  | _ => Vundef
+  end.
+
+Definition single_of_bits x :=
+  match x with
+  | Vint f => Vsingle (Float32.of_bits f)
+  | _ => Vundef
+  end.
+
+Definition bitwise_select_long b vtrue vfalse :=
+  Int64.or (Int64.and (Int64.neg b) vtrue)
+           (Int64.and (Int64.sub b Int64.one) vfalse).
+
+Lemma bitwise_select_long_true :
+  forall vtrue vfalse,
+    bitwise_select_long Int64.one vtrue vfalse = vtrue.
+Proof.
+  intros. unfold bitwise_select_long. cbn.
+  change (Int64.neg Int64.one) with Int64.mone.
+  rewrite Int64.and_commut.
+  rewrite Int64.and_mone.
+  rewrite Int64.sub_idem.
+  rewrite Int64.and_commut.
+  rewrite Int64.and_zero.
+  apply Int64.or_zero.
+Qed.
+
+Lemma bitwise_select_long_false :
+  forall vtrue vfalse,
+    bitwise_select_long Int64.zero vtrue vfalse = vfalse.
+Proof.
+  intros. unfold bitwise_select_long. cbn.
+  rewrite Int64.neg_zero.
+  rewrite Int64.and_commut.
+  rewrite Int64.and_zero.
+  rewrite Int64.sub_zero_r.
+  change (Int64.neg Int64.one) with Int64.mone.
+  rewrite Int64.and_commut.
+  rewrite Int64.and_mone.
+  rewrite Int64.or_commut.
+  apply Int64.or_zero.
+Qed.
+
+Definition select01_long (vb : val) (vtrue : val) (vfalse : val) : val :=
+  match vb with
+  | (Vint b) =>
+    if Int.eq b Int.one
+    then vtrue
+    else if Int.eq b Int.zero
+         then vfalse
+         else Vundef
+  | _ => Vundef
+  end.
+
+Lemma normalize_select01:
+  forall x y z, Val.normalize (select01_long x y z) AST.Tlong = select01_long x (Val.normalize y AST.Tlong) (Val.normalize z AST.Tlong).
+Proof.
+  unfold select01_long.
+  intros.
+  destruct x; cbn; trivial.
+  destruct (Int.eq i Int.one); trivial.
+  destruct (Int.eq i Int.zero); trivial.
+Qed.
+
+Lemma select01_long_true:
+  forall vt vf,
+    select01_long Vtrue vt vf = vt.
+Proof.
+  intros. unfold select01_long. cbn.
+  rewrite Int.eq_true. reflexivity.
+Qed.
+
+Lemma select01_long_false:
+  forall vt vf,
+    select01_long Vfalse vt vf = vf.
+Proof.
+  intros. unfold select01_long. cbn.
+  rewrite Int.eq_true.
+  rewrite Int.eq_false. reflexivity.
+  cbv. discriminate.
+Qed.
+
+Lemma float_bits_normalize:
+  forall v1,
+  ExtValues.float_of_bits (Val.normalize (ExtValues.bits_of_float v1) AST.Tlong) =
+  Val.normalize v1 AST.Tfloat.
+Proof.
+  destruct v1; cbn; trivial.
+  f_equal.
+  apply Float.of_to_bits.
+Qed.
+
+Lemma single_bits_normalize:
+  forall v1,
+  ExtValues.single_of_bits (Val.normalize (ExtValues.bits_of_single v1) AST.Tint) =
+  Val.normalize v1 AST.Tsingle.
+Proof.
+  destruct v1; cbn; trivial.
+  f_equal.
+  apply Float32.of_to_bits.
+Qed.
diff --git a/riscV/Machregs.v b/riscV/Machregs.v
index d8bb4a4b..d469e594 100644
--- a/riscV/Machregs.v
+++ b/riscV/Machregs.v
@@ -194,6 +194,17 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   match ef with
   | EF_inline_asm txt sg clob => destroyed_by_clobber clob
   | EF_memcpy sz al => R5 :: R6 :: R7 :: F0 :: nil
+  | EF_builtin name sg =>
+      if string_dec name "__builtin_clz"
+      || string_dec name "__builtin_clzl"
+      || string_dec name "__builtin_clzll" then
+        R5 :: R8 :: R9 :: nil
+      else if string_dec name "__builtin_ctz"
+      || string_dec name "__builtin_ctzl"
+      || string_dec name "__builtin_ctzll" then
+        R6 :: R8 :: R9 :: nil
+      else
+        nil
   | _ => nil
   end.
 
@@ -213,6 +224,20 @@ Definition mregs_for_builtin (ef: external_function): list (option mreg) * list(
   | EF_builtin name sg =>
       if (negb Archi.ptr64) && string_dec name "__builtin_bswap64" then
         (Some R6 :: Some R5 :: nil, Some R5 :: Some R6 :: nil)
+      else if string_dec name "__builtin_clz"
+           || string_dec name "__builtin_clzl" then
+        (Some R5 :: nil, Some R7 :: nil)
+      else if string_dec name "__builtin_clzll" then
+        if Archi.ptr64
+        then (Some R5 :: nil, Some R7 :: nil)
+        else (Some R6 :: Some R5 :: nil, Some R7 :: nil)
+      else if string_dec name "__builtin_ctz"
+           || string_dec name "__builtin_ctzl" then
+        (Some R6 :: nil, Some R7 :: nil)
+      else if string_dec name "__builtin_ctzll" then
+        if Archi.ptr64
+        then (Some R6 :: nil, Some R7 :: nil)
+        else (Some R6 :: Some R5 :: nil, Some R7 :: nil)
       else
         (nil, nil)
   | _ =>
diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml
index 07097eaf..840943e7 100644
--- a/riscV/Machregsaux.ml
+++ b/riscV/Machregsaux.ml
@@ -12,25 +12,7 @@
 
 (** Auxiliary functions on machine registers *)
 
-open Camlcoq
-open Machregs
-
-let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
-
-let _ =
-  List.iter
-    (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
-    Machregs.register_names
-
 let is_scratch_register r = false
-
-let name_of_register r =
-  try Some (Hashtbl.find register_names r) with Not_found -> None
-
-let register_by_name s =
-  Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
-
-let can_reserve_register r = Conventions1.is_callee_save r
                                               
 let class_of_type = function
   | AST.Tint | AST.Tlong -> 0
diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli
index d7117c21..01b0f9fd 100644
--- a/riscV/Machregsaux.mli
+++ b/riscV/Machregsaux.mli
@@ -12,9 +12,6 @@
 
 (** Auxiliary functions on machine registers *)
 
-val name_of_register: Machregs.mreg -> string option
-val register_by_name: string -> Machregs.mreg option
 val is_scratch_register: string -> bool
-val can_reserve_register: Machregs.mreg -> bool
 
 val class_of_type: AST.typ -> int
diff --git a/riscV/NeedOp.v b/riscV/NeedOp.v
index 117bbcb4..7d66cbb8 100644
--- a/riscV/NeedOp.v
+++ b/riscV/NeedOp.v
@@ -87,6 +87,45 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv)
   | Olongofsingle | Olonguofsingle | Osingleoflong | Osingleoflongu => op1 (default nv)
   | Ocmp c => needs_of_condition c
+  | OEseqw _ => op2 (default nv)
+  | OEsnew _ => op2 (default nv)
+  | OEsequw _ => op2 (default nv)
+  | OEsneuw _ => op2 (default nv)
+  | OEsltw _ => op2 (default nv)
+  | OEsltuw _ => op2 (default nv)
+  | OEsltiw _ => op1 (default nv)
+  | OEsltiuw _ => op1 (default nv)
+  | OExoriw _ => op1 (bitwise nv)
+  | OEluiw _ => op1 (default nv)
+  | OEaddiw _ _ => op1 (default nv)
+  | OEandiw n => op1 (andimm nv n)
+  | OEoriw n => op1 (orimm nv n)
+  | OEseql _ => op2 (default nv)
+  | OEsnel _ => op2 (default nv)
+  | OEsequl _ => op2 (default nv)
+  | OEsneul _ => op2 (default nv)
+  | OEsltl _ => op2 (default nv)
+  | OEsltul _ => op2 (default nv)
+  | OEsltil _ => op1 (default nv)
+  | OEsltiul _ => op1 (default nv)
+  | OExoril _ => op1 (default nv)
+  | OEluil _ => op1 (default nv)
+  | OEaddil _ _ => op1 (default nv)
+  | OEandil _ => op1 (default nv)
+  | OEoril _ => op1 (default nv)
+  | OEloadli _ => op1 (default nv)
+  | OEmayundef _ => op2 (default nv)
+  | OEfeqd => op2 (default nv)
+  | OEfltd => op2 (default nv)
+  | OEfled => op2 (default nv)
+  | OEfeqs => op2 (default nv)
+  | OEflts => op2 (default nv)
+  | OEfles => op2 (default nv)
+  | Obits_of_single => op1 (default nv)
+  | Obits_of_float => op1 (default nv)
+  | Osingle_of_bits => op1 (default nv)
+  | Ofloat_of_bits => op1 (default nv)
+  | Oselectl => All :: nv :: nv :: nil
   end.
 
 Definition operation_is_redundant (op: operation) (nv: nval): bool :=
@@ -154,6 +193,27 @@ Proof.
 - apply shlimm_sound; auto.
 - apply shrimm_sound; auto.
 - apply shruimm_sound; auto.
+- fold (Val.and (Vint n) v0);
+  fold (Val.and (Vint n) v2);
+  rewrite (Val.and_commut (Vint n) v0);
+  rewrite (Val.and_commut (Vint n) v2);
+  apply andimm_sound; auto.
+- fold (Val.or (Vint n) v0);
+  fold (Val.or (Vint n) v2);
+  rewrite (Val.or_commut (Vint n) v0);
+  rewrite (Val.or_commut (Vint n) v2);
+  apply orimm_sound; auto.
+- apply xor_sound; auto with na.
+- (* selectl *)
+  unfold ExtValues.select01_long.
+  destruct v0; auto with na.
+  assert (Val.lessdef (Vint i) v4) as LESSDEF by auto with na.
+  inv LESSDEF.
+  destruct (Int.eq i Int.one).
+  { apply normalize_sound; auto. }
+  destruct (Int.eq i Int.zero).
+  { apply normalize_sound; auto. }
+  cbn. auto with na.
 Qed.
 
 Lemma operation_is_redundant_sound:
diff --git a/riscV/Op.v b/riscV/Op.v
index 14d07e0b..9f94828f 100644
--- a/riscV/Op.v
+++ b/riscV/Op.v
@@ -32,11 +32,18 @@
 Require Import BoolEqual Coqlib.
 Require Import AST Integers Floats.
 Require Import Values Memory Globalenvs Events.
+Require ExtValues.
 
 Set Implicit Arguments.
 
 (** Conditions (boolean-valued operators). *)
 
+(** Type to modelize the use of a special register in arith operations *)
+
+Inductive oreg: Type :=
+  | X0_L: oreg
+  | X0_R: oreg.
+
 Inductive condition : Type :=
   | Ccomp (c: comparison)       (**r signed integer comparison *)
   | Ccompu (c: comparison)      (**r unsigned integer comparison *)
@@ -49,7 +56,32 @@ Inductive condition : Type :=
   | Ccompf (c: comparison)      (**r 64-bit floating-point comparison *)
   | Cnotcompf (c: comparison)   (**r negation of a floating-point comparison *)
   | Ccompfs (c: comparison)     (**r 32-bit floating-point comparison *)
-  | Cnotcompfs (c: comparison). (**r negation of a floating-point comparison *)
+  | Cnotcompfs (c: comparison)  (**r negation of a floating-point comparison *)
+  (* Expansed branches *)
+  | CEbeqw (optR: option oreg)                      (**r branch-if-equal signed *)
+  | CEbnew (optR: option oreg)                      (**r branch-if-not-equal signed *)
+  | CEbequw (optR: option oreg)                     (**r branch-if-equal unsigned *)
+  | CEbneuw (optR: option oreg)                     (**r branch-if-not-equal unsigned *)
+  | CEbltw (optR: option oreg)                      (**r branch-if-less signed *)
+  | CEbltuw (optR: option oreg)                     (**r branch-if-less unsigned *)
+  | CEbgew (optR: option oreg)                      (**r branch-if-greater-or-equal signed *)
+  | CEbgeuw (optR: option oreg)                     (**r branch-if-greater-or-equal unsigned *)
+  | CEbeql (optR: option oreg)                      (**r branch-if-equal signed *)
+  | CEbnel (optR: option oreg)                      (**r branch-if-not-equal signed *)
+  | CEbequl (optR: option oreg)                     (**r branch-if-equal unsigned *)
+  | CEbneul (optR: option oreg)                     (**r branch-if-not-equal unsigned *)
+  | CEbltl (optR: option oreg)                      (**r branch-if-less signed *)
+  | CEbltul (optR: option oreg)                     (**r branch-if-less unsigned *)
+  | CEbgel (optR: option oreg)                      (**r branch-if-greater-or-equal signed *)
+  | CEbgeul (optR: option oreg).                    (**r branch-if-greater-or-equal unsigned *)
+
+(* This type will define the eval function of a OEmayundef operation. *)
+
+Inductive mayundef: Type :=
+  | MUint: mayundef
+  | MUlong: mayundef
+  | MUshrx: int -> mayundef
+  | MUshrxl: int -> mayundef.
 
 (** Arithmetic and logical operations.  In the descriptions, [rd] is the
   result of the operation and [r1], [r2], etc, are the arguments. *)
@@ -152,7 +184,47 @@ Inductive operation : Type :=
   | Osingleoflong            (**r [rd = float32_of_signed_long(r1)] *)
   | Osingleoflongu           (**r [rd = float32_of_unsigned_int(r1)] *)
 (*c Boolean tests: *)
-  | Ocmp (cond: condition).  (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
+  | Ocmp (cond: condition)   (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
+  (* Expansed conditions *)
+  | OEseqw (optR: option oreg)                  (**r [rd <- rs1 == rs2] signed *)
+  | OEsnew (optR: option oreg)                  (**r [rd <- rs1 != rs2] signed *)
+  | OEsequw (optR: option oreg)                 (**r [rd <- rs1 == rs2] unsigned *)
+  | OEsneuw (optR: option oreg)                 (**r [rd <- rs1 != rs2] unsigned *)
+  | OEsltw (optR: option oreg)                  (**r set-less-than *)
+  | OEsltuw (optR: option oreg)                 (**r set-less-than unsigned *)
+  | OEsltiw (n: int)                            (**r set-less-than immediate *)
+  | OEsltiuw (n: int)                           (**r set-less-than unsigned immediate *)
+  | OEaddiw (optR: option oreg) (n: int)        (**r add immediate *)
+  | OEandiw (n: int)                            (**r and immediate *)
+  | OEoriw (n: int)                             (**r or immediate *)
+  | OExoriw (n: int)                            (**r xor immediate *)
+  | OEluiw (n: int)                             (**r load upper-immediate *)
+  | OEseql (optR: option oreg)                  (**r [rd <- rs1 == rs2] signed *)
+  | OEsnel (optR: option oreg)                  (**r [rd <- rs1 != rs2] signed *)
+  | OEsequl (optR: option oreg)                 (**r [rd <- rs1 == rs2] unsigned *)
+  | OEsneul (optR: option oreg)                 (**r [rd <- rs1 != rs2] unsigned *)
+  | OEsltl (optR: option oreg)                  (**r set-less-than *)
+  | OEsltul (optR: option oreg)                 (**r set-less-than unsigned *)
+  | OEsltil (n: int64)                          (**r set-less-than immediate *)
+  | OEsltiul (n: int64)                         (**r set-less-than unsigned immediate *)
+  | OEaddil (optR: option oreg) (n: int64)      (**r add immediate *)
+  | OEandil (n: int64)                          (**r and immediate *)
+  | OEoril (n: int64)                           (**r or immediate *)
+  | OExoril (n: int64)                          (**r xor immediate *)
+  | OEluil (n: int64)                           (**r load upper-immediate *)
+  | OEloadli (n: int64)                         (**r load an immediate int64 *)
+  | OEmayundef (mu: mayundef)
+  | OEfeqd                                      (**r compare equal *)
+  | OEfltd                                      (**r compare less-than *)
+  | OEfled                                      (**r compare less-than/equal *)
+  | OEfeqs                                      (**r compare equal *)
+  | OEflts                                      (**r compare less-than *)
+  | OEfles                                      (**r compare less-than/equal *)
+  | Obits_of_single
+  | Obits_of_float
+  | Osingle_of_bits
+  | Ofloat_of_bits
+  | Oselectl.
 
 (** Addressing modes.  [r1], [r2], etc, are the arguments to the
   addressing. *)
@@ -164,11 +236,15 @@ Inductive addressing: Type :=
 
 (** Comparison functions (used in modules [CSE] and [Allocation]). *)
 
+Definition oreg_eq: forall (x y: oreg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
 Definition eq_condition (x y: condition) : {x=y} + {x<>y}.
 Proof.
-  generalize Int.eq_dec Int64.eq_dec; intro.
+  generalize Int.eq_dec Int64.eq_dec bool_dec oreg_eq; intros.
   assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality.
   decide equality.
+  all: destruct optR, optR0; decide equality.
 Defined.
 
 Definition eq_addressing (x y: addressing) : {x=y} + {x<>y}.
@@ -179,8 +255,9 @@ Defined.
 
 Definition eq_operation: forall (x y: operation), {x=y} + {x<>y}.
 Proof.
-  generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec ident_eq eq_condition; intros.
+  generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec ident_eq eq_condition bool_dec Val.eq oreg_eq; intros.
   decide equality.
+  all: try destruct optR, optR0; try decide equality.
 Defined.
 
 (* Alternate definition: 
@@ -197,6 +274,44 @@ Defined.
 
 Global Opaque eq_condition eq_addressing eq_operation.
 
+(** Generic function to evaluate an instruction according to the given specific register *)
+  
+Definition zero32 := (Vint Int.zero).
+Definition zero64 := (Vlong Int64.zero).
+  
+Definition apply_bin_oreg {B} (optR: option oreg) (sem: val -> val -> B) (v1 v2 vz: val): B :=
+  match optR with
+  | None => sem v1 v2
+  | Some X0_L => sem vz v1
+  | Some X0_R => sem v1 vz
+  end.
+
+(** Mayundef evaluation according to the above defined type *)
+
+Definition eval_may_undef (mu: mayundef) (v1 v2: val): val :=
+  match mu with
+  | MUint => match v1, v2 with
+             | Vint _, Vint _ => v2
+             | _, _ => Vundef
+             end
+  | MUlong => match v1, v2 with
+              | Vlong _, Vint _ => v2
+              | _, _ => Vundef
+              end
+  | MUshrx i =>
+      match v1, v2 with
+      | Vint _, Vint _ =>
+          if Int.ltu i (Int.repr 31) then v2 else Vundef
+      | _, _ => Vundef
+      end
+  | MUshrxl i =>
+      match v1, v2 with
+      | Vlong _, Vlong _ =>
+          if Int.ltu i (Int.repr 63) then v2 else Vundef
+      | _, _ => Vundef
+      end
+  end.
+
 (** * Evaluation functions *)
 
 (** Evaluation of conditions, operators and addressing modes applied
@@ -218,9 +333,34 @@ Definition eval_condition (cond: condition) (vl: list val) (m: mem): option bool
   | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2)
   | Ccompfs c, v1 :: v2 :: nil => Val.cmpfs_bool c v1 v2
   | Cnotcompfs c, v1 :: v2 :: nil => option_map negb (Val.cmpfs_bool c v1 v2)
+  (* Expansed branches *)
+  | CEbeqw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmp_bool Ceq) v1 v2 zero32
+  | CEbnew optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmp_bool Cne) v1 v2 zero32
+  | CEbequw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpu_bool (Mem.valid_pointer m) Ceq) v1 v2 zero32
+  | CEbneuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpu_bool (Mem.valid_pointer m) Cne) v1 v2 zero32
+  | CEbltw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmp_bool Clt) v1 v2 zero32
+  | CEbltuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpu_bool (Mem.valid_pointer m) Clt) v1 v2 zero32
+  | CEbgew optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmp_bool Cge) v1 v2 zero32
+  | CEbgeuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpu_bool (Mem.valid_pointer m) Cge) v1 v2 zero32
+  | CEbeql optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpl_bool Ceq) v1 v2 zero64
+  | CEbnel optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpl_bool Cne) v1 v2 zero64
+  | CEbequl optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmplu_bool (Mem.valid_pointer m) Ceq) v1 v2 zero64
+  | CEbneul optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmplu_bool (Mem.valid_pointer m) Cne) v1 v2 zero64
+  | CEbltl optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpl_bool Clt) v1 v2 zero64
+  | CEbltul optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmplu_bool (Mem.valid_pointer m) Clt) v1 v2 zero64
+  | CEbgel optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpl_bool Cge) v1 v2 zero64
+  | CEbgeul optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmplu_bool (Mem.valid_pointer m) Cge) v1 v2 zero64
   | _, _ => None
   end.
 
+(** Assert sp is a pointer *)
+
+Definition get_sp sp :=
+  match sp with
+  | Vptr _ _ => sp
+  | _ => Vundef
+  end.
+
 Definition eval_operation
     (F V: Type) (genv: Genv.t F V) (sp: val)
     (op: operation) (vl: list val) (m: mem): option val :=
@@ -241,10 +381,10 @@ Definition eval_operation
   | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
   | Omulhs, v1::v2::nil => Some (Val.mulhs v1 v2)
   | Omulhu, v1::v2::nil => Some (Val.mulhu v1 v2)
-  | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
-  | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
-  | Omod, v1 :: v2 :: nil => Val.mods v1 v2
-  | Omodu, v1 :: v2 :: nil => Val.modu v1 v2
+  | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+  | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
+  | Omod, v1 :: v2 :: nil => Some (Val.maketotal (Val.mods v1 v2))
+  | Omodu, v1 :: v2 :: nil => Some (Val.maketotal (Val.modu v1 v2))
   | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
   | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
   | Oor, v1 :: v2 :: nil => Some (Val.or v1 v2)
@@ -257,7 +397,7 @@ Definition eval_operation
   | Oshrimm n, v1 :: nil => Some (Val.shr v1 (Vint n))
   | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
   | Oshruimm n, v1 :: nil => Some (Val.shru v1 (Vint n))
-  | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+  | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
   | Omakelong, v1::v2::nil => Some (Val.longofwords v1 v2)
   | Olowlong, v1::nil => Some (Val.loword v1)
   | Ohighlong, v1::nil => Some (Val.hiword v1)
@@ -270,10 +410,10 @@ Definition eval_operation
   | Omull, v1::v2::nil => Some (Val.mull v1 v2)
   | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
   | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
-  | Odivl, v1::v2::nil => Val.divls v1 v2
-  | Odivlu, v1::v2::nil => Val.divlu v1 v2
-  | Omodl, v1::v2::nil => Val.modls v1 v2
-  | Omodlu, v1::v2::nil => Val.modlu v1 v2
+  | Odivl, v1::v2::nil => Some (Val.maketotal (Val.divls v1 v2))
+  | Odivlu, v1::v2::nil => Some (Val.maketotal (Val.divlu v1 v2))
+  | Omodl, v1::v2::nil => Some (Val.maketotal (Val.modls v1 v2))
+  | Omodlu, v1::v2::nil => Some (Val.maketotal (Val.modlu v1 v2))
   | Oandl, v1::v2::nil => Some(Val.andl v1 v2)
   | Oandlimm n, v1::nil => Some (Val.andl v1 (Vlong n))
   | Oorl, v1::v2::nil => Some(Val.orl v1 v2)
@@ -286,7 +426,7 @@ Definition eval_operation
   | Oshrlimm n, v1::nil => Some (Val.shrl v1 (Vint n))
   | Oshrlu, v1::v2::nil => Some (Val.shrlu v1 v2)
   | Oshrluimm n, v1::nil => Some (Val.shrlu v1 (Vint n))
-  | Oshrxlimm n, v1::nil => Val.shrxl v1 (Vint n)
+  | Oshrxlimm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
   | Onegf, v1::nil => Some (Val.negf v1)
   | Oabsf, v1::nil => Some (Val.absf v1)
   | Oaddf, v1::v2::nil => Some (Val.addf v1 v2)
@@ -301,23 +441,65 @@ Definition eval_operation
   | Odivfs, v1::v2::nil => Some (Val.divfs v1 v2)
   | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
   | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
-  | Ointoffloat, v1::nil => Val.intoffloat v1
-  | Ointuoffloat, v1::nil => Val.intuoffloat v1
-  | Ofloatofint, v1::nil => Val.floatofint v1
-  | Ofloatofintu, v1::nil => Val.floatofintu v1
-  | Ointofsingle, v1::nil => Val.intofsingle v1
-  | Ointuofsingle, v1::nil => Val.intuofsingle v1
-  | Osingleofint, v1::nil => Val.singleofint v1
-  | Osingleofintu, v1::nil => Val.singleofintu v1
-  | Olongoffloat, v1::nil => Val.longoffloat v1
-  | Olonguoffloat, v1::nil => Val.longuoffloat v1
-  | Ofloatoflong, v1::nil => Val.floatoflong v1
-  | Ofloatoflongu, v1::nil => Val.floatoflongu v1
-  | Olongofsingle, v1::nil => Val.longofsingle v1
-  | Olonguofsingle, v1::nil => Val.longuofsingle v1
-  | Osingleoflong, v1::nil => Val.singleoflong v1
-  | Osingleoflongu, v1::nil => Val.singleoflongu v1
+  | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+  | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+  | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+  | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+  | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+  | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+  | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+  | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+  | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+  | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+  | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+  | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+  | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+  | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+  | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+  | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
+  | Obits_of_single, v1::nil => Some (ExtValues.bits_of_single v1)
+  | Obits_of_float, v1::nil => Some (ExtValues.bits_of_float v1)
+  | Osingle_of_bits, v1::nil => Some (ExtValues.single_of_bits v1)
+  | Ofloat_of_bits, v1::nil => Some (ExtValues.float_of_bits v1)
   | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
+  (* Expansed conditions *)
+  | OEseqw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmp Ceq) v1 v2 zero32)
+  | OEsnew optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmp Cne) v1 v2 zero32)
+  | OEsequw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m) Ceq) v1 v2 zero32)
+  | OEsneuw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m) Cne) v1 v2 zero32)
+  | OEsltw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmp Clt) v1 v2 zero32)
+  | OEsltuw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m) Clt) v1 v2 zero32)
+  | OEsltiw n, v1::nil => Some (Val.cmp Clt v1 (Vint n))
+  | OEsltiuw n, v1::nil => Some (Val.cmpu (Mem.valid_pointer m) Clt v1 (Vint n))
+  | OExoriw n, v1::nil => Some (Val.xor v1 (Vint n))
+  | OEluiw n, nil => Some (Val.shl (Vint n) (Vint (Int.repr 12)))
+  | OEaddiw optR n, nil => Some (apply_bin_oreg optR Val.add (Vint n) Vundef zero32)
+  | OEaddiw optR n, v1::nil => Some (apply_bin_oreg optR Val.add v1 (Vint n) Vundef)
+  | OEandiw n, v1::nil => Some (Val.and (Vint n) v1)
+  | OEoriw n, v1::nil => Some (Val.or (Vint n) v1)
+  | OEseql optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmpl Ceq) v1 v2 zero64))
+  | OEsnel optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmpl Cne) v1 v2 zero64))
+  | OEsequl optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m) Ceq) v1 v2 zero64))
+  | OEsneul optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m) Cne) v1 v2 zero64))
+  | OEsltl optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmpl Clt) v1 v2 zero64))
+  | OEsltul optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m) Clt) v1 v2 zero64))
+  | OEsltil n, v1::nil => Some (Val.maketotal (Val.cmpl Clt v1 (Vlong n)))
+  | OEsltiul n, v1::nil => Some (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Clt v1 (Vlong n)))
+  | OExoril n, v1::nil => Some (Val.xorl v1 (Vlong n))
+  | OEluil n, nil => Some (Vlong (Int64.sign_ext 32 (Int64.shl n (Int64.repr 12))))
+  | OEaddil optR n, nil => Some (apply_bin_oreg optR Val.addl (Vlong n) Vundef zero64)
+  | OEaddil optR n, v1::nil => Some (apply_bin_oreg optR Val.addl v1 (Vlong n) Vundef)
+  | OEandil n, v1::nil => Some (Val.andl (Vlong n) v1)
+  | OEoril n, v1::nil => Some (Val.orl (Vlong n) v1)
+  | OEloadli n, nil => Some (Vlong n)
+  | OEmayundef mu, v1 :: v2 :: nil => Some (eval_may_undef mu v1 v2)
+  | OEfeqd, v1::v2::nil => Some (Val.cmpf Ceq v1 v2)
+  | OEfltd, v1::v2::nil => Some (Val.cmpf Clt v1 v2)
+  | OEfled, v1::v2::nil => Some (Val.cmpf Cle v1 v2)
+  | OEfeqs, v1::v2::nil => Some (Val.cmpfs Ceq v1 v2)
+  | OEflts, v1::v2::nil => Some (Val.cmpfs Clt v1 v2)
+  | OEfles, v1::v2::nil => Some (Val.cmpfs Cle v1 v2)
+  | Oselectl, vb::vt::vf::nil => Some (Val.normalize (ExtValues.select01_long vb vt vf) Tlong)
   | _, _ => None
   end.
 
@@ -348,9 +530,9 @@ Qed.
 Ltac FuncInv :=
   match goal with
   | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ =>
-      destruct x; simpl in H; FuncInv
+      destruct x; cbn in H; FuncInv
   | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
-      destruct v; simpl in H; FuncInv
+      destruct v; cbn in H; FuncInv
   | H: (if Archi.ptr64 then _ else _) = Some _ |- _ =>
       destruct Archi.ptr64 eqn:?; FuncInv
   | H: (Some _ = Some _) |- _ =>
@@ -377,6 +559,31 @@ Definition type_of_condition (c: condition) : list typ :=
   | Cnotcompf _ => Tfloat :: Tfloat :: nil
   | Ccompfs _ => Tsingle :: Tsingle :: nil
   | Cnotcompfs _ => Tsingle :: Tsingle :: nil
+  | CEbeqw _ => Tint :: Tint :: nil
+  | CEbnew _ => Tint :: Tint :: nil
+  | CEbequw _ => Tint :: Tint :: nil
+  | CEbneuw _ => Tint :: Tint :: nil
+  | CEbltw _ => Tint :: Tint :: nil
+  | CEbltuw _ => Tint :: Tint :: nil
+  | CEbgew _ => Tint :: Tint :: nil
+  | CEbgeuw _ => Tint :: Tint :: nil
+  | CEbeql _ => Tlong :: Tlong :: nil
+  | CEbnel _ => Tlong :: Tlong :: nil
+  | CEbequl _ => Tlong :: Tlong :: nil
+  | CEbneul _ => Tlong :: Tlong :: nil
+  | CEbltl _ => Tlong :: Tlong :: nil
+  | CEbltul _ => Tlong :: Tlong :: nil
+  | CEbgel _ => Tlong :: Tlong :: nil
+  | CEbgeul _ => Tlong :: Tlong :: nil
+  end.
+
+(** The type of mayundef and addsp is dynamic *)
+
+Definition type_of_mayundef mu :=
+  match mu with
+  | MUint | MUshrx _ => (Tint :: Tint :: nil, Tint)
+  | MUlong => (Tlong :: Tint :: nil, Tint)
+  | MUshrxl _ => (Tlong :: Tlong :: nil, Tlong)
   end.
 
 Definition type_of_operation (op: operation) : list typ * typ :=
@@ -474,6 +681,47 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osingleoflong => (Tlong :: nil, Tsingle)
   | Osingleoflongu => (Tlong :: nil, Tsingle)
   | Ocmp c => (type_of_condition c, Tint)
+  | OEseqw _ => (Tint :: Tint :: nil, Tint)
+  | OEsnew _ => (Tint :: Tint :: nil, Tint)
+  | OEsequw _ => (Tint :: Tint :: nil, Tint)
+  | OEsneuw _ => (Tint :: Tint :: nil, Tint)
+  | OEsltw _ => (Tint :: Tint :: nil, Tint)
+  | OEsltuw _ => (Tint :: Tint :: nil, Tint)
+  | OEsltiw _ => (Tint :: nil, Tint)
+  | OEsltiuw _ => (Tint :: nil, Tint)
+  | OExoriw _ => (Tint :: nil, Tint)
+  | OEluiw _ => (nil, Tint)
+  | OEaddiw None _ => (Tint :: nil, Tint)
+  | OEaddiw (Some _) _ => (nil, Tint)
+  | OEandiw _ => (Tint :: nil, Tint)
+  | OEoriw _ => (Tint :: nil, Tint)
+  | OEseql _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsnel _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsequl _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsneul _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsltl _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsltul _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsltil _ => (Tlong :: nil, Tint)
+  | OEsltiul _ => (Tlong :: nil, Tint)
+  | OEandil _ => (Tlong :: nil, Tlong)
+  | OEoril _ => (Tlong :: nil, Tlong)
+  | OExoril _ => (Tlong :: nil, Tlong)
+  | OEluil _ => (nil, Tlong)
+  | OEaddil None _ => (Tlong :: nil, Tlong)
+  | OEaddil (Some _) _ => (nil, Tlong)
+  | OEloadli _ => (nil, Tlong)
+  | OEmayundef mu => type_of_mayundef mu
+  | OEfeqd => (Tfloat :: Tfloat :: nil, Tint)
+  | OEfltd => (Tfloat :: Tfloat :: nil, Tint)
+  | OEfled => (Tfloat :: Tfloat :: nil, Tint)
+  | OEfeqs => (Tsingle :: Tsingle :: nil, Tint)
+  | OEflts => (Tsingle :: Tsingle :: nil, Tint)
+  | OEfles => (Tsingle :: Tsingle :: nil, Tint)
+  | Obits_of_single => (Tsingle :: nil, Tint)
+  | Obits_of_float => (Tfloat :: nil, Tlong)
+  | Osingle_of_bits => (Tint :: nil, Tsingle)
+  | Ofloat_of_bits => (Tlong :: nil, Tfloat)
+  | Oselectl => (Tint :: Tlong :: Tlong :: nil, Tlong)
   end.
 
 Definition type_of_addressing (addr: addressing) : list typ :=
@@ -504,6 +752,14 @@ Proof.
   intros. unfold Val.has_type, Val.addl. destruct Archi.ptr64, v1, v2; auto.
 Qed.
 
+Remark type_mayundef:
+  forall mu v1 v2, Val.has_type (eval_may_undef mu v1 v2) (snd (type_of_mayundef mu)).
+Proof.
+  intros. unfold eval_may_undef.
+  destruct mu eqn:EQMU, v1, v2; simpl; auto.
+  all: destruct Int.ltu; simpl; auto.
+Qed.
+
 Lemma type_of_operation_sound:
   forall op vl sp v m,
   op <> Omove ->
@@ -513,7 +769,7 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   intros.
   destruct op; simpl; simpl in H0; FuncInv; subst; simpl.
   (* move *)
-  - congruence.
+  - simpl in H; congruence.
   (* intconst, longconst, floatconst, singleconst *)
   - exact I.
   - exact I.
@@ -539,15 +795,17 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1...
   - destruct v0; destruct v1...
   (* div, divu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero
+         || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero); cbn; trivial.
   (* mod, modu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero
+         || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero); cbn; trivial.
   (* and, andimm *)
   - destruct v0; destruct v1...
   - destruct v0...
@@ -567,7 +825,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   - destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
   (* shrx *)
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
   (* makelong, lowlong, highlong *)
   - destruct v0; destruct v1...
   - destruct v0...
@@ -588,15 +847,19 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1...
   - destruct v0; destruct v1...
   (* divl, divlu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   (* modl, modlu *)
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  - destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero); inv H2...
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
   (* andl, andlimm *)
   - destruct v0; destruct v1...
   - destruct v0...
@@ -616,7 +879,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
   - destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
   (* shrxl *)
-  - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
   (* negf, absf *)
   - destruct v0...
   - destruct v0...
@@ -639,50 +903,151 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   - destruct v0...
   - destruct v0...
   (* intoffloat, intuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_intu f); cbn; trivial.
   (* floatofint, floatofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* intofsingle, intuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_intu f); cbn; trivial.
   (* singleofint, singleofintu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longoffloat, longuoffloat *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_longu f); cbn; trivial.
   (* floatoflong, floatoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* longofsingle, longuofsingle *)
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
-  - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_longu f); cbn; trivial.
   (* singleoflong, singleoflongu *)
-  - destruct v0; simpl in H0; inv H0...
-  - destruct v0; simpl in H0; inv H0...
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
   (* cmp *)
   - destruct (eval_condition cond vl m)... destruct b...
+  (* OEseqw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmp;
+    destruct Val.cmp_bool... all: destruct b...
+  (* OEsnew *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmp;
+    destruct Val.cmp_bool... all: destruct b...
+  (* OEsequw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpu;
+    destruct Val.cmpu_bool... all: destruct b...
+  (* OEsneuw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpu;
+    destruct Val.cmpu_bool... all: destruct b...
+  (* OEsltw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmp;
+    destruct Val.cmp_bool... all: destruct b...
+  (* OEsltuw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpu;
+    destruct Val.cmpu_bool... all: destruct b...
+  (* OEsltiw *)
+  - unfold Val.cmp; destruct Val.cmp_bool...
+    all: destruct b...
+  (* OEsltiuw *)
+  - unfold Val.cmpu; destruct Val.cmpu_bool... destruct b...
+  (* OEaddiw *)
+  - destruct optR as [[]|]; simpl in *; trivial.
+  - destruct optR as [[]|]; simpl in *; trivial;
+    apply type_add.
+  (* OEandiw *)
+  - destruct v0...
+  (* OEoriw *)
+  - destruct v0...
+  (* OExoriw *)
+  - destruct v0...
+  (* OEluiw *)
+  - destruct (Int.ltu _ _); cbn; trivial.
+  (* OEseql *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpl;
+    destruct Val.cmpl_bool... all: destruct b...
+  (* OEsnel *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpl;
+    destruct Val.cmpl_bool... all: destruct b...
+  (* OEsequl *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmplu;
+    destruct Val.cmplu_bool... all: destruct b...
+  (* OEsneul *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmplu;
+    destruct Val.cmplu_bool... all: destruct b...
+  (* OEsltl *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpl;
+    destruct Val.cmpl_bool... all: destruct b...
+  (* OEsltul *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmplu;
+    destruct Val.cmplu_bool... all: destruct b...
+  (* OEsltil *)
+  - unfold Val.cmpl; destruct Val.cmpl_bool...
+    all: destruct b...
+  (* OEsltiul *)
+  - unfold Val.cmplu; destruct Val.cmplu_bool... destruct b...
+  (* OEaddil *)
+  - destruct optR as [[]|]; simpl in *; trivial.
+  - destruct optR as [[]|]; simpl in *; trivial;
+    apply type_addl.
+  (* OEandil *)
+  - destruct v0...
+  (* OEoril *)
+  - destruct v0...
+  (* OExoril *)
+  - destruct v0...
+  (* OEluil *)
+  - simpl; trivial.
+  (* OEloadli *)
+  - trivial.
+  (* OEmayundef *)
+  - apply type_mayundef.
+  (* OEfeqd *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float.cmp; cbn; auto.
+  (* OEfltd *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float.cmp; cbn; auto.
+  (* OEfled *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float.cmp; cbn; auto.
+  (* OEfeqs *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float32.cmp; cbn; auto.
+  (* OEflts *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float32.cmp; cbn; auto.
+  (* OEfles *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float32.cmp; cbn; auto.
+  (* Bits_of_single, float *)
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
+  (* single, float of bits *)
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
+  (* selectl *)
+  - destruct v0; cbn; trivial.
+    destruct Int.eq; cbn.
+    apply Val.normalize_type.
+    destruct Int.eq; cbn; trivial.
+    apply Val.normalize_type.
 Qed.
 
-
+(* This should not be simplified to "false" because it breaks proofs elsewhere. *)
 Definition is_trapping_op (op : operation) :=
   match op with
-  | Odiv | Odivl | Odivu | Odivlu
-  | Omod | Omodl | Omodu | Omodlu
-  | Oshrximm _ | Oshrxlimm _
-  | Ointoffloat | Ointuoffloat
-  | Ointofsingle | Ointuofsingle
-  | Olongoffloat | Olonguoffloat
-  | Olongofsingle | Olonguofsingle
-  | Osingleofint | Osingleofintu
-  | Osingleoflong | Osingleoflongu
-  | Ofloatofint | Ofloatofintu
-  | Ofloatoflong | Ofloatoflongu => true
+  | Omove => false
   | _ => false
   end.
-                
 
 Definition args_of_operation op :=
   if eq_operation op Omove
@@ -696,11 +1061,14 @@ Lemma is_trapping_op_sound:
     eval_operation genv sp op vl m <> None.
 Proof.
   unfold args_of_operation.
-  destruct op; destruct eq_operation; intros; simpl in *; try congruence.
+  destruct op eqn:E; destruct eq_operation; intros; simpl in *; try congruence.
   all: try (destruct vl as [ | vh1 vl1]; try discriminate).
   all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
   all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
   all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+  all: try destruct optR as [[]|]; simpl in H0; try discriminate.
+  all: try destruct Archi.ptr64; simpl in *; try discriminate.
+  all: try destruct mu; simpl in *; try discriminate.
 Qed.
 End SOUNDNESS.
 
@@ -744,6 +1112,22 @@ Definition negate_condition (cond: condition): condition :=
   | Cnotcompf c => Ccompf c
   | Ccompfs c => Cnotcompfs c
   | Cnotcompfs c => Ccompfs c
+  | CEbeqw optR => CEbnew optR
+  | CEbnew optR => CEbeqw optR
+  | CEbequw optR => CEbneuw optR
+  | CEbneuw optR => CEbequw optR
+  | CEbltw optR => CEbgew optR
+  | CEbltuw optR => CEbgeuw optR
+  | CEbgew optR => CEbltw optR
+  | CEbgeuw optR => CEbltuw optR
+  | CEbeql optR => CEbnel optR
+  | CEbnel optR => CEbeql optR
+  | CEbequl optR => CEbneul optR
+  | CEbneul optR => CEbequl optR
+  | CEbltl optR => CEbgel optR
+  | CEbltul optR => CEbgeul optR
+  | CEbgel optR => CEbltl optR
+  | CEbgeul optR => CEbltul optR
   end.
 
 Lemma eval_negate_condition:
@@ -763,6 +1147,39 @@ Proof.
   repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0) as [[]|]; auto.
   repeat (destruct vl; auto).
   repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v v0) as [[]|]; auto.
+
+  repeat (destruct vl; auto); replace (Cne) with (negate_comparison Ceq) by auto; destruct optR as [[]|];
+  apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto); replace (Ceq) with (negate_comparison Cne) by auto; destruct optR as [[]|];
+  apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto); replace (Cne) with (negate_comparison Ceq) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto); replace (Ceq) with (negate_comparison Cne) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto); replace (Cge) with (negate_comparison Clt) by auto; destruct optR as [[]|];
+  apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto); replace (Cge) with (negate_comparison Clt) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto); replace (Clt) with (negate_comparison Cge) by auto; destruct optR as [[]|];
+  apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto); replace (Clt) with (negate_comparison Cge) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto); replace (Cne) with (negate_comparison Ceq) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto); replace (Ceq) with (negate_comparison Cne) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto); replace (Cne) with (negate_comparison Ceq) by auto; destruct optR as [[]|];
+  apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto); replace (Ceq) with (negate_comparison Cne) by auto; destruct optR as [[]|];
+  apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto); replace (Cge) with (negate_comparison Clt) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto); replace (Cge) with (negate_comparison Clt) by auto; destruct optR as [[]|];
+  apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto); replace (Clt) with (negate_comparison Cge) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto); replace (Clt) with (negate_comparison Cge) by auto; destruct optR as [[]|];
+  apply Val.negate_cmplu_bool.
 Qed.
 
 (** Shifting stack-relative references.  This is used in [Stacking]. *)
@@ -788,7 +1205,8 @@ Qed.
 Lemma type_shift_stack_operation:
   forall delta op, type_of_operation (shift_stack_operation delta op) = type_of_operation op.
 Proof.
-  intros. destruct op; auto.
+  intros. destruct op; auto;
+  try destruct optR as [[]|]; simpl; auto.
 Qed.
 
 Lemma eval_shift_stack_addressing:
@@ -805,7 +1223,7 @@ Lemma eval_shift_stack_operation:
   eval_operation ge (Vptr sp Ptrofs.zero) (shift_stack_operation delta op) vl m =
   eval_operation ge (Vptr sp (Ptrofs.repr delta)) op vl m.
 Proof.
-  intros. destruct op; simpl; auto. destruct vl; auto.
+  intros. destruct op eqn:E; simpl; auto; destruct vl; auto.
   rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto.
 Qed.
 
@@ -853,23 +1271,87 @@ Definition is_trivial_op (op: operation) : bool :=
 
 (** Operations that depend on the memory state. *)
 
+Definition cond_depends_on_memory (cond : condition) : bool :=
+  match cond with
+  | Ccompu _ => negb Archi.ptr64
+  | Ccompuimm _ _ => negb Archi.ptr64
+  | Ccomplu _ => Archi.ptr64
+  | Ccompluimm _ _ => Archi.ptr64
+  | CEbequw _ => negb Archi.ptr64
+  | CEbneuw _ => negb Archi.ptr64
+  | CEbltuw _ => negb Archi.ptr64
+  | CEbgeuw _ => negb Archi.ptr64
+  | CEbequl _ => Archi.ptr64
+  | CEbneul _ => Archi.ptr64
+  | CEbltul _ => Archi.ptr64
+  | CEbgeul _ => Archi.ptr64
+  | _ => false
+  end.
+
 Definition op_depends_on_memory (op: operation) : bool :=
   match op with
-  | Ocmp (Ccompu _) => negb Archi.ptr64
-  | Ocmp (Ccompuimm _ _) => negb Archi.ptr64
-  | Ocmp (Ccomplu _) => Archi.ptr64
-  | Ocmp (Ccompluimm _ _) => Archi.ptr64
+  | Ocmp cmp => cond_depends_on_memory cmp
+  | OEsequw _ => negb Archi.ptr64
+  | OEsneuw _ => negb Archi.ptr64
+  | OEsltiuw _ => negb Archi.ptr64
+  | OEsltuw _ => negb Archi.ptr64
+  | OEsequl _ => Archi.ptr64
+  | OEsneul _ => Archi.ptr64
+  | OEsltul _ => Archi.ptr64
+  | OEsltiul _ => Archi.ptr64
   | _ => false
   end.
 
+Lemma cond_depends_on_memory_correct:
+  forall cond args m1 m2,
+  cond_depends_on_memory cond = false ->
+  eval_condition cond args m1 = eval_condition cond args m2.
+Proof.
+  intros until m2.
+  destruct cond; cbn; try congruence.
+  all: unfold Val.cmpu_bool, Val.cmplu_bool.
+  all: destruct Archi.ptr64; cbn; intro SF; try discriminate.
+  all: reflexivity.
+Qed.
+
 Lemma op_depends_on_memory_correct:
   forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
   op_depends_on_memory op = false ->
   eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
 Proof.
   intros until m2. destruct op; simpl; try congruence.
-  destruct cond; simpl; intros SF; auto; rewrite ? negb_false_iff in SF;
-  unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
+  intro DEPEND.
+  f_equal. f_equal. apply cond_depends_on_memory_correct; trivial.
+  all: intros; repeat (destruct args; auto);
+       unfold Val.cmpu, Val.cmpu_bool, Val.cmplu, Val.cmplu_bool;
+       try destruct optR as [[]|]; simpl;
+       destruct v; try destruct v0; simpl; auto;
+       try apply negb_false_iff in H; try rewrite H; auto.
+Qed.
+
+Lemma cond_valid_pointer_eq:
+  forall cond args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_condition cond args m1 = eval_condition cond args m2.
+Proof.
+  intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence.
+  all: repeat (destruct args; simpl; try congruence);
+       try destruct optR as [[]|]; simpl;
+       try destruct v, v0; try rewrite !MEM; auto;
+       try erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+ 
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op; simpl; try congruence.
+  intro MEM; erewrite cond_valid_pointer_eq; eauto.
+  all: intros MEM; repeat (destruct args; simpl; try congruence);
+       try destruct optR as [[]|]; simpl; try destruct v, v0; try rewrite !MEM; auto;
+       unfold Val.cmpu, Val.cmplu;
+       erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
 Qed.
 
 (** Global variables mentioned in an operation or addressing mode *)
@@ -976,6 +1458,90 @@ Ltac InvInject :=
   | _ => idtac
   end.
 
+Lemma eval_cmpu_bool_inj': forall b c v v' v0 v0',
+  Val.inject f v v' ->
+  Val.inject f v0 v0' ->
+  Val.cmpu_bool (Mem.valid_pointer m1) c v v0 = Some b ->
+  Val.cmpu_bool (Mem.valid_pointer m2) c v' v0' = Some b.
+Proof.
+  intros.
+  eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
+Qed.
+
+Lemma eval_cmpu_bool_inj: forall c v v' v0 v'0,
+  Val.inject f v v' ->
+  Val.inject f v0 v'0 ->
+  Val.inject f (Val.cmpu (Mem.valid_pointer m1) c v v0)
+  (Val.cmpu (Mem.valid_pointer m2) c v' v'0).
+Proof.
+  intros until v'0. intros HV1 HV2.
+  unfold Val.cmpu; 
+  destruct (Val.cmpu_bool (Mem.valid_pointer m1) c _ _) eqn:?; eauto.
+  exploit eval_cmpu_bool_inj'. eapply HV1. eapply HV2. eapply Heqo.
+  intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+Qed.
+
+Lemma eval_cmpu_bool_inj_opt: forall c v v' v0 v'0 optR,
+  Val.inject f v v' ->
+  Val.inject f v0 v'0 ->
+  Val.inject f (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m1) c) v v0 zero32)
+  (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m2) c) v' v'0 zero32).
+Proof.
+  intros until optR. intros HV1 HV2.
+  destruct optR as [[]|]; simpl; unfold zero32, Val.cmpu; 
+  destruct (Val.cmpu_bool (Mem.valid_pointer m1) c _ _) eqn:?; eauto;
+  assert (HVI: Val.inject f (Vint Int.zero) (Vint Int.zero)) by apply Val.inject_int.
+  + exploit eval_cmpu_bool_inj'. eapply HVI. eapply HV1. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+  + exploit eval_cmpu_bool_inj'. eapply HV1. eapply HVI. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+  + exploit eval_cmpu_bool_inj'. eapply HV1. instantiate (1:=v'0).
+    eauto. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+Qed.
+
+Lemma eval_cmplu_bool_inj': forall b c v v' v0 v0',
+  Val.inject f v v' ->
+  Val.inject f v0 v0' ->
+  Val.cmplu_bool (Mem.valid_pointer m1) c v v0 = Some b ->
+  Val.cmplu_bool (Mem.valid_pointer m2) c v' v0' = Some b.
+Proof.
+  intros.
+  eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
+Qed.
+
+Lemma eval_cmplu_bool_inj: forall c v v' v0 v'0,
+  Val.inject f v v' ->
+  Val.inject f v0 v'0 ->
+  Val.inject f (Val.maketotal (Val.cmplu (Mem.valid_pointer m1) c v v0))
+  (Val.maketotal (Val.cmplu (Mem.valid_pointer m2) c v' v'0)).
+Proof.
+  intros until v'0. intros HV1 HV2.
+  unfold Val.cmplu; 
+  destruct (Val.cmplu_bool (Mem.valid_pointer m1) c _ _) eqn:?; eauto.
+  exploit eval_cmplu_bool_inj'. eapply HV1. eapply HV2. eapply Heqo.
+  intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+Qed.
+
+Lemma eval_cmplu_bool_inj_opt: forall c v v' v0 v'0 optR,
+  Val.inject f v v' ->
+  Val.inject f v0 v'0 ->
+  Val.inject f (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m1) c) v v0 zero64))
+  (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m2) c) v' v'0 zero64)).
+Proof.
+  intros until optR. intros HV1 HV2.
+  destruct optR as [[]|]; simpl; unfold zero64, Val.cmplu; 
+  destruct (Val.cmplu_bool (Mem.valid_pointer m1) c _ _) eqn:?; eauto;
+  assert (HVI: Val.inject f (Vlong Int64.zero) (Vlong Int64.zero)) by apply Val.inject_long.
+  + exploit eval_cmplu_bool_inj'. eapply HVI. eapply HV1. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+  + exploit eval_cmplu_bool_inj'. eapply HV1. eapply HVI. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+  + exploit eval_cmplu_bool_inj'. eapply HV1. instantiate (1:=v'0).
+    eauto. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+Qed.
+
 Lemma eval_condition_inj:
   forall cond vl1 vl2 b,
   Val.inject_list f vl1 vl2 ->
@@ -983,6 +1549,9 @@ Lemma eval_condition_inj:
   eval_condition cond vl2 m2 = Some b.
 Proof.
   intros. destruct cond; simpl in H0; FuncInv; InvInject; simpl; auto.
+  all: assert (HVI32: Val.inject f (Vint Int.zero) (Vint Int.zero)) by apply Val.inject_int;
+       assert (HVI64: Val.inject f (Vlong Int64.zero) (Vlong Int64.zero)) by apply Val.inject_long;
+       try unfold zero32, zero64.
 - inv H3; inv H2; simpl in H0; inv H0; auto.
 - eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
 - inv H3; simpl in H0; inv H0; auto.
@@ -995,6 +1564,38 @@ Proof.
 - inv H3; inv H2; simpl in H0; inv H0; auto.
 - inv H3; inv H2; simpl in H0; inv H0; auto.
 - inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmpu_bool_inj'; eauto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmpu_bool_inj'; eauto.
+- destruct optR as [[]|]; simpl;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmpu_bool_inj'; eauto.
+- destruct optR as [[]|]; simpl;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmpu_bool_inj'; eauto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmplu_bool_inj'; eauto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmplu_bool_inj'; eauto.
+- destruct optR as [[]|]; simpl;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmplu_bool_inj'; eauto.
+- destruct optR as [[]|]; simpl;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmplu_bool_inj'; eauto.
 Qed.
 
 Ltac TrivialExists :=
@@ -1033,19 +1634,29 @@ Proof.
   - inv H4; inv H2; simpl; auto.
   - inv H4; inv H2; simpl; auto.
   (* div, divu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
     destruct (Int.eq i0 Int.zero
-              || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+              || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int.eq i0 Int.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
   (* mod, modu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
     destruct (Int.eq i0 Int.zero
-                     || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+              || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int.eq i0 Int.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
   (* and, andimm *)
   - inv H4; inv H2; simpl; auto.
   - inv H4; simpl; auto.
@@ -1065,8 +1676,10 @@ Proof.
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
   - inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
   (* shrx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+  - inv H4; cbn; try apply Val.val_inject_undef.
+    destruct (Int.ltu n (Int.repr 31)); cbn.
+    apply Val.inject_int.
+    apply Val.val_inject_undef.
   (* makelong, highlong, lowlong *)
   - inv H4; inv H2; simpl; auto.
   - inv H4; simpl; auto.
@@ -1085,19 +1698,31 @@ Proof.
   - inv H4; inv H2; simpl; auto.
   - inv H4; inv H2; simpl; auto.
   (* divl, divlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
     destruct (Int64.eq i0 Int64.zero
-              || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int64.eq i0 Int64.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
   (* modl, modlu *)
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
     destruct (Int64.eq i0 Int64.zero
-                     || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
-    TrivialExists.
-  - inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int64.eq i0 Int64.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
   (* andl, andlimm *)
   - inv H4; inv H2; simpl; auto.
   - inv H4; simpl; auto.
@@ -1117,8 +1742,10 @@ Proof.
   - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
   - inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
   (* shrx *)
-  - inv H4; simpl in H1; try discriminate. simpl.
-    destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+  - inv H4; cbn; try apply Val.val_inject_undef.
+    destruct (Int.ltu n (Int.repr 63)); cbn.
+    apply Val.inject_long.
+    apply Val.val_inject_undef.
   (* negf, absf *)
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
@@ -1141,42 +1768,145 @@ Proof.
   - inv H4; simpl; auto.
   - inv H4; simpl; auto.
   (* intoffloat, intuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_int f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_intu f0); cbn; auto.
   (* floatofint, floatofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* intofsingle, intuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
-    exists (Vint i); auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_int f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_intu f0); cbn; auto.
   (* singleofint, singleofintu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* longoffloat, longuoffloat *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_long f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_longu f0); cbn; auto.
   (* floatoflong, floatoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* longofsingle, longuofsingle *)
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
-  - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
-    exists (Vlong i); auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_long f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_longu f0); cbn; auto.
   (* singleoflong, singleoflongu *)
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
   (* cmp *)
   - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
     exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
     destruct b; simpl; constructor.
     simpl; constructor.
+  (* OEseqw *)
+  - destruct optR as [[]|]; simpl; unfold zero32, Val.cmp;
+    inv H4; inv H2; simpl; try destruct (Int.eq _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsnew *)
+  - destruct optR as [[]|]; simpl; unfold zero32, Val.cmp;
+    inv H4; inv H2; simpl; try destruct (Int.eq _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsequw *)
+  - apply eval_cmpu_bool_inj_opt; auto.
+  (* OEsneuw *)
+  - apply eval_cmpu_bool_inj_opt; auto.
+  (* OEsltw *)
+  - destruct optR as [[]|]; simpl; unfold zero32, Val.cmp;
+    inv H4; inv H2; simpl; try destruct (Int.lt _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsltuw *)
+  - apply eval_cmpu_bool_inj_opt; auto.
+  (* OEsltiw *)
+  - inv H4; simpl; cbn; auto; try destruct (Int.lt _ _); apply Val.inject_int.
+  (* OEsltiuw *)
+  - apply eval_cmpu_bool_inj; auto.
+  (* OEaddiw *)
+  - destruct optR as [[]|]; auto; simpl.
+    rewrite Int.add_zero_l; auto.
+    rewrite Int.add_commut, Int.add_zero_l; auto.
+  - destruct optR as [[]|]; auto; simpl;
+    eapply Val.add_inject; auto.
+  (* OEandiw *)
+  - inv H4; cbn; auto.
+  (* OEoriw *)
+  - inv H4; cbn; auto.
+  (* OExoriw *)
+  - inv H4; simpl; auto.
+  (* OEluiw *)
+  - destruct (Int.ltu _ _); auto.
+  (* OEseql *)
+  - destruct optR as [[]|]; simpl; unfold zero64, Val.cmpl;
+    inv H4; inv H2; simpl; try destruct (Int64.eq _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsnel *)
+  - destruct optR as [[]|]; simpl; unfold zero64, Val.cmpl;
+    inv H4; inv H2; simpl; try destruct (Int64.eq _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsequl *)
+  - apply eval_cmplu_bool_inj_opt; auto.
+  (* OEsneul *)
+  - apply eval_cmplu_bool_inj_opt; auto.
+  (* OEsltl *)
+  - destruct optR as [[]|]; simpl; unfold zero64, Val.cmpl;
+    inv H4; inv H2; simpl; try destruct (Int64.lt _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsltul *)
+  - apply eval_cmplu_bool_inj_opt; auto.
+  (* OEsltil *)
+  - inv H4; simpl; cbn; auto; try destruct (Int64.lt _ _); apply Val.inject_int.
+  (* OEsltiul *)
+  - apply eval_cmplu_bool_inj; auto.
+  (* OEaddil *)
+  - destruct optR as [[]|]; auto; simpl.
+    rewrite Int64.add_zero_l; auto.
+    rewrite Int64.add_commut, Int64.add_zero_l; auto.
+  - destruct optR as [[]|]; auto; simpl;
+    eapply Val.addl_inject; auto.
+  (* OEandil *)
+  - inv H4; cbn; auto.
+  (* OEoril *)
+  - inv H4; cbn; auto.
+  (* OExoril *)
+  - inv H4; simpl; auto.
+  (* OEmayundef *)
+  - destruct mu; inv H4; inv H2; simpl; auto;
+    try destruct (Int.ltu _ _); simpl; auto.
+    all: eapply Val.inject_ptr; eauto.
+  (* OEfeqd *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEfltd *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEfled *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEfeqs *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float32.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEflts *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float32.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEfles *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float32.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* Bits_of_single, double *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* single, double of bits *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* selectl *)
+  - inv H4; trivial. cbn.
+    destruct (Int.eq i Int.one).
+    + auto using Val.normalize_inject.
+    + destruct (Int.eq i Int.zero); cbn; auto using Val.normalize_inject.
 Qed.
 
 Lemma eval_addressing_inj:
@@ -1434,4 +2164,4 @@ Definition builtin_arg_ok
   match ba with
   | (BA _ | BA_splitlong (BA _) (BA _)) => true
   | _ => builtin_arg_ok_1 ba c
-  end.  
+  end.
diff --git a/riscV/OpWeights.ml b/riscV/OpWeights.ml
new file mode 100644
index 00000000..0a1d9ad4
--- /dev/null
+++ b/riscV/OpWeights.ml
@@ -0,0 +1,168 @@
+open Op
+open PrepassSchedulingOracleDeps
+
+module Rocket = struct
+  (* Attempt at modeling the Rocket core *)
+
+  let resource_bounds = [| 1 |]
+
+  let nr_non_pipelined_units = 1
+
+  (* divider *)
+
+  let latency_of_op (op : operation) (nargs : int) =
+    match op with
+    | Omul | Omulhs | Omulhu | Omull | Omullhs | Omullhu -> 4
+    | Onegf -> 1 (*r [rd = - r1] *)
+    | Oabsf (*r [rd = abs(r1)] *)
+    | Oaddf (*r [rd = r1 + r2] *)
+    | Osubf (*r [rd = r1 - r2] *)
+    | Omulf ->
+        6 (*r [rd = r1 * r2] *)
+    | Onegfs -> 1 (*r [rd = - r1] *)
+    | Oabsfs (*r [rd = abs(r1)] *)
+    | Oaddfs (*r [rd = r1 + r2] *)
+    | Osubfs (*r [rd = r1 - r2] *)
+    | Omulfs ->
+        4 (*r [rd = r1 * r2] *)
+    | Osingleoffloat (*r [rd] is [r1] truncated to single-precision float *)
+    | Ofloatofsingle (*r [rd] is [r1] extended to double-precision float *)
+    (*c Conversions between int and float: *)
+    | Ofloatconst _ | Osingleconst _
+    | Ointoffloat (*r [rd = signed_int_of_float64(r1)] *)
+    | Ointuoffloat (*r [rd = unsigned_int_of_float64(r1)] *)
+    | Ofloatofint (*r [rd = float64_of_signed_int(r1)] *)
+    | Ofloatofintu (*r [rd = float64_of_unsigned_int(r1)] *)
+    | Ointofsingle (*r [rd = signed_int_of_float32(r1)] *)
+    | Ointuofsingle (*r [rd = unsigned_int_of_float32(r1)] *)
+    | Osingleofint (*r [rd = float32_of_signed_int(r1)] *)
+    | Osingleofintu (*r [rd = float32_of_unsigned_int(r1)] *)
+    | Olongoffloat (*r [rd = signed_long_of_float64(r1)] *)
+    | Olonguoffloat (*r [rd = unsigned_long_of_float64(r1)] *)
+    | Ofloatoflong (*r [rd = float64_of_signed_long(r1)] *)
+    | Ofloatoflongu (*r [rd = float64_of_unsigned_long(r1)] *)
+    | Olongofsingle (*r [rd = signed_long_of_float32(r1)] *)
+    | Olonguofsingle (*r [rd = unsigned_long_of_float32(r1)] *)
+    | Osingleoflong (*r [rd = float32_of_signed_long(r1)] *)
+    | Osingleoflongu ->
+        2 (*r [rd = float32_of_unsigned_int(r1)] *)
+    | OEfeqd | OEfltd | OEfeqs | OEflts | OEfles | OEfled | Obits_of_single
+    | Obits_of_float | Osingle_of_bits | Ofloat_of_bits ->
+        2
+    | OEloadli _ -> 2
+    | Odiv | Odivu | Odivl | Odivlu -> 16
+    | Odivfs -> 35
+    | Odivf -> 50
+    | Ocmp cond -> (
+        match cond with
+        | Ccomp _ | Ccompu _ | Ccompimm _ | Ccompuimm _ | Ccompl _ | Ccomplu _
+        | Ccomplimm _ | Ccompluimm _ | CEbeqw _ | CEbnew _ | CEbequw _
+        | CEbneuw _ | CEbltw _ | CEbltuw _ | CEbgew _ | CEbgeuw _ | CEbeql _
+        | CEbnel _ | CEbequl _ | CEbneul _ | CEbltl _ | CEbltul _ | CEbgel _
+        | CEbgeul _ ->
+            1
+        | Ccompf _ | Cnotcompf _ -> 2
+        | Ccompfs _ | Cnotcompfs _ -> 2)
+    | OEmayundef _ -> 0
+    | _ -> 1
+
+  let resources_of_op (op : operation) (nargs : int) = resource_bounds
+
+  let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+    match op with
+    | Odiv | Odivu -> [| 29 |]
+    | Odivfs -> [| 20 |]
+    | Odivl | Odivlu | Odivf -> [| 50 |]
+    | _ -> [| -1 |]
+
+  let resources_of_cond (cond : condition) (nargs : int) = resource_bounds
+
+  let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3
+
+  let latency_of_call _ _ = 6
+
+  let resources_of_load trap chunk addressing nargs = resource_bounds
+
+  let resources_of_store chunk addressing nargs = resource_bounds
+
+  let resources_of_call _ _ = resource_bounds
+
+  let resources_of_builtin _ = resource_bounds
+end
+
+module SweRV_EH1 = struct
+  (* Attempt at modeling SweRV EH1
+     [| issues ; LSU ; multiplier |] *)
+  let resource_bounds = [| 2; 1; 1 |]
+
+  let nr_non_pipelined_units = 1
+
+  (* divider *)
+
+  let latency_of_op (op : operation) (nargs : int) =
+    match op with
+    | Omul | Omulhs | Omulhu | Omull | Omullhs | Omullhu -> 3
+    | Odiv | Odivu | Odivl | Odivlu -> 16
+    | _ -> 1
+
+  let resources_of_op (op : operation) (nargs : int) =
+    match op with
+    | Omul | Omulhs | Omulhu | Omull | Omullhs | Omullhu -> [| 1; 0; 1 |]
+    | Odiv | Odivu | Odivl | Odivlu -> [| 0; 0; 0 |]
+    | _ -> [| 1; 0; 0 |]
+
+  let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+    match op with
+    | Odiv | Odivu -> [| 29 |]
+    | Odivfs -> [| 20 |]
+    | Odivl | Odivlu | Odivf -> [| 50 |]
+    | _ -> [| -1 |]
+
+  let resources_of_cond (cond : condition) (nargs : int) = [| 1; 0; 0 |]
+
+  let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3
+
+  let latency_of_call _ _ = 6
+
+  let resources_of_load trap chunk addressing nargs = [| 1; 1; 0 |]
+
+  let resources_of_store chunk addressing nargs = [| 1; 1; 0 |]
+
+  let resources_of_call _ _ = resource_bounds
+
+  let resources_of_builtin _ = resource_bounds
+end
+
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "rocket" | "" ->
+      {
+        pipelined_resource_bounds = Rocket.resource_bounds;
+        nr_non_pipelined_units = Rocket.nr_non_pipelined_units;
+        latency_of_op = Rocket.latency_of_op;
+        resources_of_op = Rocket.resources_of_op;
+        non_pipelined_resources_of_op = Rocket.non_pipelined_resources_of_op;
+        latency_of_load = Rocket.latency_of_load;
+        resources_of_load = Rocket.resources_of_load;
+        resources_of_store = Rocket.resources_of_store;
+        resources_of_cond = Rocket.resources_of_cond;
+        latency_of_call = Rocket.latency_of_call;
+        resources_of_call = Rocket.resources_of_call;
+        resources_of_builtin = Rocket.resources_of_builtin;
+      }
+  | "SweRV_EH1" | "EH1" ->
+      {
+        pipelined_resource_bounds = SweRV_EH1.resource_bounds;
+        nr_non_pipelined_units = SweRV_EH1.nr_non_pipelined_units;
+        latency_of_op = SweRV_EH1.latency_of_op;
+        resources_of_op = SweRV_EH1.resources_of_op;
+        non_pipelined_resources_of_op = SweRV_EH1.non_pipelined_resources_of_op;
+        latency_of_load = SweRV_EH1.latency_of_load;
+        resources_of_load = SweRV_EH1.resources_of_load;
+        resources_of_store = SweRV_EH1.resources_of_store;
+        resources_of_cond = SweRV_EH1.resources_of_cond;
+        latency_of_call = SweRV_EH1.latency_of_call;
+        resources_of_call = SweRV_EH1.resources_of_call;
+        resources_of_builtin = SweRV_EH1.resources_of_builtin;
+      }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx)
diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml
new file mode 120000
index 00000000..912e9ffa
--- /dev/null
+++ b/riscV/PrepassSchedulingOracle.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracle.ml
+\ No newline at end of file
diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml
new file mode 120000
index 00000000..1e955b85
--- /dev/null
+++ b/riscV/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracleDeps.ml
+\ No newline at end of file
diff --git a/riscV/PrintOp.ml b/riscV/PrintOp.ml
index 9ec474b3..0d47192a 100644
--- a/riscV/PrintOp.ml
+++ b/riscV/PrintOp.ml
@@ -30,6 +30,21 @@ let comparison_name = function
   | Cgt -> ">"
   | Cge -> ">="
 
+let mu_name pp = function
+  | MUint -> fprintf pp "MUint"
+  | MUlong -> fprintf pp "MUlong"
+  | MUshrx i -> fprintf pp "MUshrx(%ld)" (camlint_of_coqint i)
+  | MUshrxl i -> fprintf pp "MUshrxl(%ld)" (camlint_of_coqint i)
+
+let get_optR_s c reg pp r1 r2 = function
+  | None -> fprintf pp "(%a %s %a)" reg r1 (comparison_name c) reg r2
+  | Some X0_L -> fprintf pp "(X0 %s %a)" (comparison_name c) reg r1
+  | Some X0_R -> fprintf pp "(%a %s X0)" reg r1 (comparison_name c)
+
+let get_optR_a pp = function
+  | None -> failwith "PrintOp: None in get_optR_a instruction (problem with RTL expansions?)"
+  | Some X0_L | Some X0_R -> fprintf pp "X0"
+
 let print_condition reg pp = function
   | (Ccomp c, [r1;r2]) ->
       fprintf pp "%a %ss %a" reg r1 (comparison_name c) reg r2
@@ -55,15 +70,47 @@ let print_condition reg pp = function
       fprintf pp "%a %sfs %a" reg r1 (comparison_name c) reg r2
   | (Cnotcompfs c, [r1;r2]) ->
       fprintf pp "%a not(%sfs) %a" reg r1 (comparison_name c) reg r2
+  | (CEbeqw optR, [r1;r2]) ->
+      fprintf pp "CEbeqw"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | (CEbnew optR, [r1;r2]) ->
+      fprintf pp "CEbnew"; (get_optR_s Cne reg pp r1 r2 optR)
+  | (CEbequw optR, [r1;r2]) ->
+      fprintf pp "CEbequw"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | (CEbneuw optR, [r1;r2]) ->
+      fprintf pp "CEbneuw"; (get_optR_s Cne reg pp r1 r2 optR)
+  | (CEbltw optR, [r1;r2]) ->
+      fprintf pp "CEbltw"; (get_optR_s Clt reg pp r1 r2 optR)
+  | (CEbltuw optR, [r1;r2]) ->
+      fprintf pp "CEbltuw"; (get_optR_s Clt reg pp r1 r2 optR)
+  | (CEbgew optR, [r1;r2]) ->
+      fprintf pp "CEbgew"; (get_optR_s Cge reg pp r1 r2 optR)
+  | (CEbgeuw optR, [r1;r2]) ->
+      fprintf pp "CEbgeuw"; (get_optR_s Cge reg pp r1 r2 optR)
+  | (CEbeql optR, [r1;r2]) ->
+      fprintf pp "CEbeql"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | (CEbnel optR, [r1;r2]) ->
+      fprintf pp "CEbnel"; (get_optR_s Cne reg pp r1 r2 optR)
+  | (CEbequl optR, [r1;r2]) ->
+      fprintf pp "CEbequl"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | (CEbneul optR, [r1;r2]) ->
+      fprintf pp "CEbneul"; (get_optR_s Cne reg pp r1 r2 optR)
+  | (CEbltl optR, [r1;r2]) ->
+      fprintf pp "CEbltl"; (get_optR_s Clt reg pp r1 r2 optR)
+  | (CEbltul optR, [r1;r2]) ->
+      fprintf pp "CEbltul"; (get_optR_s Clt reg pp r1 r2 optR)
+  | (CEbgel optR, [r1;r2]) ->
+      fprintf pp "CEbgel"; (get_optR_s Cge reg pp r1 r2 optR)
+  | (CEbgeul optR, [r1;r2]) ->
+      fprintf pp "CEbgeul"; (get_optR_s Cge reg pp r1 r2 optR)
   | _ ->
       fprintf pp "<bad condition>"
 
 let print_operation reg pp = function
   | Omove, [r1] -> reg pp r1
-  | Ointconst n, [] -> fprintf pp "%ld" (camlint_of_coqint n)
-  | Olongconst n, [] -> fprintf pp "%LdL" (camlint64_of_coqint n)
-  | Ofloatconst n, [] -> fprintf pp "%F" (camlfloat_of_coqfloat n)
-  | Osingleconst n, [] -> fprintf pp "%Ff" (camlfloat_of_coqfloat32 n)
+  | Ointconst n, [] -> fprintf pp "Ointconst(%ld)" (camlint_of_coqint n)
+  | Olongconst n, [] -> fprintf pp "Olongconst(%LdL)" (camlint64_of_coqint n)
+  | Ofloatconst n, [] -> fprintf pp "Ofloatconst(%F)" (camlfloat_of_coqfloat n)
+  | Osingleconst n, [] -> fprintf pp "Osingleconst(%Ff)" (camlfloat_of_coqfloat32 n)
   | Oaddrsymbol(id, ofs), [] ->
       fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs)
   | Oaddrstack ofs, [] ->
@@ -156,6 +203,47 @@ let print_operation reg pp = function
   | Osingleoflong, [r1] -> fprintf pp "singleoflong(%a)" reg r1
   | Osingleoflongu, [r1] -> fprintf pp "singleoflongu(%a)" reg r1
   | Ocmp c, args -> print_condition reg pp (c, args)
+  | OEseqw optR, [r1;r2] -> fprintf pp "OEseqw"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | OEsnew optR, [r1;r2] -> fprintf pp "OEsnew"; (get_optR_s Cne reg pp r1 r2 optR)
+  | OEsequw optR, [r1;r2] -> fprintf pp "OEsequw"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | OEsneuw optR, [r1;r2] -> fprintf pp "OEsneuw"; (get_optR_s Cne reg pp r1 r2 optR)
+  | OEsltw optR, [r1;r2] -> fprintf pp "OEsltw"; (get_optR_s Clt reg pp r1 r2 optR)
+  | OEsltuw optR, [r1;r2] -> fprintf pp "OEsltuw"; (get_optR_s Clt reg pp r1 r2 optR)
+  | OEsltiw n, [r1] -> fprintf pp "OEsltiw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEsltiuw n, [r1] -> fprintf pp "OEsltiuw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OExoriw n, [r1] -> fprintf pp "OExoriw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEluiw n, _ -> fprintf pp "OEluiw(%ld)" (camlint_of_coqint n)
+  | OEaddiw (optR, n), [] -> fprintf pp "OEaddiw(%a,%ld)" get_optR_a optR (camlint_of_coqint n)
+  | OEaddiw (optR, n), [r1] -> fprintf pp "OEaddiw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEandiw n, [r1] -> fprintf pp "OEandiw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEoriw n, [r1] -> fprintf pp "OEoriw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEseql optR, [r1;r2] -> fprintf pp "OEseql"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | OEsnel optR, [r1;r2] -> fprintf pp "OEsnel"; (get_optR_s Cne reg pp r1 r2 optR)
+  | OEsequl optR, [r1;r2] -> fprintf pp "OEsequl"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | OEsneul optR, [r1;r2] -> fprintf pp "OEsneul"; (get_optR_s Cne reg pp r1 r2 optR)
+  | OEsltl optR, [r1;r2] -> fprintf pp "OEsltl"; (get_optR_s Clt reg pp r1 r2 optR)
+  | OEsltul optR, [r1;r2] -> fprintf pp "OEsltul"; (get_optR_s Clt reg pp r1 r2 optR)
+  | OEsltil n, [r1] -> fprintf pp "OEsltil(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEsltiul n, [r1] -> fprintf pp "OEsltiul(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OExoril n, [r1] -> fprintf pp "OExoril(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEluil n, _ -> fprintf pp "OEluil(%ld)" (camlint_of_coqint n)
+  | OEaddil (optR, n), [] -> fprintf pp "OEaddil(%a,%ld)" get_optR_a optR (camlint_of_coqint n)
+  | OEaddil (optR, n), [r1] -> fprintf pp "OEaddil(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEandil n, [r1] -> fprintf pp "OEandil(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEoril n, [r1] -> fprintf pp "OEoril(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEloadli n, _ -> fprintf pp "OEloadli(%ld)" (camlint_of_coqint n)
+  | OEmayundef mu, [r1;r2] -> fprintf pp "OEmayundef (%a,%a,%a)" mu_name mu reg r1 reg r2
+  | OEfeqd, [r1;r2] -> fprintf pp "OEfeqd(%a,%s,%a)" reg r1 (comparison_name Ceq) reg r2
+  | OEfltd, [r1;r2] -> fprintf pp "OEfltd(%a,%s,%a)" reg r1 (comparison_name Clt) reg r2
+  | OEfled, [r1;r2] -> fprintf pp "OEfled(%a,%s,%a)" reg r1 (comparison_name Cle) reg r2
+  | OEfeqs, [r1;r2] -> fprintf pp "OEfeqs(%a,%s,%a)" reg r1 (comparison_name Ceq) reg r2
+  | OEflts, [r1;r2] -> fprintf pp "OEflts(%a,%s,%a)" reg r1 (comparison_name Clt) reg r2
+  | OEfles, [r1;r2] -> fprintf pp "OEfles(%a,%s,%a)" reg r1 (comparison_name Cle) reg r2
+  | Obits_of_single, [r1] -> fprintf pp "bits_of_single(%a)" reg r1
+  | Obits_of_float, [r1] -> fprintf pp "bits_of_float(%a)" reg r1
+  | Osingle_of_bits, [r1] -> fprintf pp "single_of_bits(%a)" reg r1
+  | Ofloat_of_bits, [r1] -> fprintf pp "float_of_bits(%a)" reg r1
+  | Oselectl, [rb;rt;rf] -> fprintf pp "selectl(b:%a, t:%a, f:%a)" reg rb reg rt reg rf
   | _ -> fprintf pp "<bad operator>"
 
 let print_addressing reg pp = function
diff --git a/riscV/RTLpathSE_simplify.v b/riscV/RTLpathSE_simplify.v
new file mode 100644
index 00000000..7aca1772
--- /dev/null
+++ b/riscV/RTLpathSE_simplify.v
@@ -0,0 +1,2102 @@
+Require Import Coqlib Floats Values Memory.
+Require Import Integers.
+Require Import Op Registers.
+Require Import RTLpathSE_theory.
+Require Import RTLpathSE_simu_specs.
+Require Import Asmgen Asmgenproof1.
+Require Import Lia.
+
+(** Useful functions for conditions/branches expansion *)
+
+Definition is_inv_cmp_int (cmp: comparison) : bool :=
+  match cmp with | Cle | Cgt => true | _ => false end.
+
+Definition is_inv_cmp_float (cmp: comparison) : bool :=
+  match cmp with | Cge | Cgt => true | _ => false end.
+
+Definition make_optR (is_x0 is_inv: bool) : option oreg :=
+  if is_x0 then
+    (if is_inv then Some (X0_L)
+    else Some (X0_R))
+  else None.
+
+(** Functions to manage lists of "fake" values *)
+
+Definition make_lhsv_cmp (is_inv: bool) (hv1 hv2: hsval) : list_hsval :=
+  let (hvfirst, hvsec) := if is_inv then (hv1, hv2) else (hv2, hv1) in
+  let lhsv := fScons hvfirst fSnil in
+  fScons hvsec lhsv.
+
+Definition make_lhsv_single (hvs: hsval) : list_hsval :=
+  fScons hvs fSnil.
+
+(** * Expansion functions *)
+
+(** ** Immediate loads *)
+
+Definition load_hilo32 (hi lo: int) :=
+  if Int.eq lo Int.zero then
+    fSop (OEluiw hi) fSnil
+  else
+    let hvs := fSop (OEluiw hi) fSnil in
+    let hl := make_lhsv_single hvs in
+    fSop (OEaddiw None lo) hl.
+
+Definition load_hilo64 (hi lo: int64) :=
+  if Int64.eq lo Int64.zero then
+    fSop (OEluil hi) fSnil
+  else
+    let hvs := fSop (OEluil hi) fSnil in
+    let hl := make_lhsv_single hvs in
+    fSop (OEaddil None lo) hl.
+
+Definition loadimm32 (n: int) :=
+  match make_immed32 n with
+  | Imm32_single imm =>
+      fSop (OEaddiw (Some X0_R) imm) fSnil
+  | Imm32_pair hi lo => load_hilo32 hi lo
+  end.
+
+Definition loadimm64 (n: int64) :=
+  match make_immed64 n with
+  | Imm64_single imm =>
+      fSop (OEaddil (Some X0_R) imm) fSnil
+  | Imm64_pair hi lo => load_hilo64 hi lo
+  | Imm64_large imm => fSop (OEloadli imm) fSnil
+  end.
+
+Definition opimm32 (hv1: hsval) (n: int) (op: operation) (opimm: int -> operation) :=
+  match make_immed32 n with
+  | Imm32_single imm =>
+      let hl := make_lhsv_single hv1 in
+      fSop (opimm imm) hl
+  | Imm32_pair hi lo =>
+      let hvs := load_hilo32 hi lo in
+      let hl := make_lhsv_cmp false hv1 hvs in
+      fSop op hl
+  end.
+
+Definition opimm64 (hv1: hsval) (n: int64) (op: operation) (opimm: int64 -> operation) :=
+  match make_immed64 n with
+  | Imm64_single imm =>
+      let hl := make_lhsv_single hv1 in
+      fSop (opimm imm) hl
+  | Imm64_pair hi lo =>
+      let hvs := load_hilo64 hi lo in
+      let hl := make_lhsv_cmp false hv1 hvs in
+      fSop op hl
+  | Imm64_large imm =>
+      let hvs := fSop (OEloadli imm) fSnil in
+      let hl := make_lhsv_cmp false hv1 hvs in
+      fSop op hl
+  end.
+
+Definition addimm32 (hv1: hsval) (n: int) (or: option oreg) := opimm32 hv1 n Oadd (OEaddiw or).
+Definition andimm32 (hv1: hsval) (n: int) := opimm32 hv1 n Oand OEandiw.
+Definition orimm32 (hv1: hsval) (n: int) := opimm32 hv1 n Oor OEoriw.
+Definition xorimm32 (hv1: hsval) (n: int) := opimm32 hv1 n Oxor OExoriw.
+Definition sltimm32 (hv1: hsval) (n: int) := opimm32 hv1 n (OEsltw None) OEsltiw.
+Definition sltuimm32 (hv1: hsval) (n: int) := opimm32 hv1 n (OEsltuw None) OEsltiuw.
+Definition addimm64 (hv1: hsval) (n: int64) (or: option oreg) := opimm64 hv1 n Oaddl (OEaddil or).
+Definition andimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n Oandl OEandil.
+Definition orimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n Oorl OEoril.
+Definition xorimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n Oxorl OExoril.
+Definition sltimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n (OEsltl None) OEsltil.
+Definition sltuimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n (OEsltul None) OEsltiul.
+
+(** ** Comparisons intructions *)
+
+Definition cond_int32s (cmp: comparison) (lhsv: list_hsval) (optR: option oreg) :=
+  match cmp with
+  | Ceq => fSop (OEseqw optR) lhsv
+  | Cne => fSop (OEsnew optR) lhsv
+  | Clt | Cgt => fSop (OEsltw optR) lhsv
+  | Cle | Cge =>
+      let hvs := (fSop (OEsltw optR) lhsv) in
+      let hl := make_lhsv_single hvs in
+      fSop (OExoriw Int.one) hl
+  end.
+
+Definition cond_int32u (cmp: comparison) (lhsv: list_hsval) (optR: option oreg) :=
+  match cmp with
+  | Ceq => fSop (OEsequw optR) lhsv
+  | Cne => fSop (OEsneuw optR) lhsv
+  | Clt | Cgt => fSop (OEsltuw optR) lhsv
+  | Cle | Cge =>
+      let hvs := (fSop (OEsltuw optR) lhsv) in
+      let hl := make_lhsv_single hvs in
+      fSop (OExoriw Int.one) hl
+  end.
+
+Definition cond_int64s (cmp: comparison) (lhsv: list_hsval) (optR: option oreg) :=
+  match cmp with
+  | Ceq => fSop (OEseql optR) lhsv
+  | Cne => fSop (OEsnel optR) lhsv
+  | Clt | Cgt => fSop (OEsltl optR) lhsv
+  | Cle | Cge =>
+      let hvs := (fSop (OEsltl optR) lhsv) in
+      let hl := make_lhsv_single hvs in
+      fSop (OExoriw Int.one) hl
+  end.
+
+Definition cond_int64u (cmp: comparison) (lhsv: list_hsval) (optR: option oreg) :=
+  match cmp with
+  | Ceq => fSop (OEsequl optR) lhsv
+  | Cne => fSop (OEsneul optR) lhsv
+  | Clt | Cgt => fSop (OEsltul optR) lhsv
+  | Cle | Cge =>
+      let hvs := (fSop (OEsltul optR) lhsv) in
+      let hl := make_lhsv_single hvs in
+      fSop (OExoriw Int.one) hl
+  end.
+
+Definition expanse_condimm_int32s (cmp: comparison) (hv1: hsval) (n: int) :=
+  let is_inv := is_inv_cmp_int cmp in
+  if Int.eq n Int.zero then
+    let optR := make_optR true is_inv in
+    let hl := make_lhsv_cmp is_inv hv1 hv1 in
+    cond_int32s cmp hl optR
+  else
+    match cmp with
+    | Ceq | Cne =>
+        let optR := make_optR true is_inv in
+        let hvs := xorimm32 hv1 n in
+        let hl := make_lhsv_cmp false hvs hvs in
+        cond_int32s cmp hl optR
+    | Clt => sltimm32 hv1 n
+    | Cle =>
+        if Int.eq n (Int.repr Int.max_signed) then
+          let hvs := loadimm32 Int.one in
+          let hl := make_lhsv_cmp false hv1 hvs in
+          fSop (OEmayundef MUint) hl
+        else sltimm32 hv1 (Int.add n Int.one)
+    | _ =>
+        let optR := make_optR false is_inv in
+        let hvs := loadimm32 n in
+        let hl := make_lhsv_cmp is_inv hv1 hvs in
+        cond_int32s cmp hl optR
+    end.
+
+Definition expanse_condimm_int32u (cmp: comparison) (hv1: hsval) (n: int) :=
+  let is_inv := is_inv_cmp_int cmp in
+  if Int.eq n Int.zero then
+    let optR := make_optR true is_inv in
+    let hl := make_lhsv_cmp is_inv hv1 hv1 in
+    cond_int32u cmp hl optR
+  else
+    match cmp with
+    | Clt => sltuimm32 hv1 n
+    | _ =>
+        let optR := make_optR false is_inv in
+        let hvs := loadimm32 n in
+        let hl := make_lhsv_cmp is_inv hv1 hvs in
+        cond_int32u cmp hl optR
+    end.
+
+Definition expanse_condimm_int64s (cmp: comparison) (hv1: hsval) (n: int64) :=
+  let is_inv := is_inv_cmp_int cmp in
+  if Int64.eq n Int64.zero then
+    let optR := make_optR true is_inv in
+    let hl := make_lhsv_cmp is_inv hv1 hv1 in
+    cond_int64s cmp hl optR
+  else
+    match cmp with
+    | Ceq | Cne =>
+        let optR := make_optR true is_inv in
+        let hvs := xorimm64 hv1 n in
+        let hl := make_lhsv_cmp false hvs hvs in
+        cond_int64s cmp hl optR
+    | Clt => sltimm64 hv1 n
+    | Cle =>
+        if Int64.eq n (Int64.repr Int64.max_signed) then
+          let hvs := loadimm32 Int.one in
+          let hl := make_lhsv_cmp false hv1 hvs in
+          fSop (OEmayundef MUlong) hl
+        else sltimm64 hv1 (Int64.add n Int64.one)
+    | _ =>
+        let optR := make_optR false is_inv in
+        let hvs := loadimm64 n in
+        let hl := make_lhsv_cmp is_inv hv1 hvs in
+        cond_int64s cmp hl optR
+    end.
+
+Definition expanse_condimm_int64u (cmp: comparison) (hv1: hsval) (n: int64) :=
+  let is_inv := is_inv_cmp_int cmp in
+  if Int64.eq n Int64.zero then
+    let optR := make_optR true is_inv in
+    let hl := make_lhsv_cmp is_inv hv1 hv1 in
+    cond_int64u cmp hl optR
+  else
+    match cmp with
+    | Clt => sltuimm64 hv1 n
+    | _ =>
+        let optR := make_optR false is_inv in
+        let hvs := loadimm64 n in
+        let hl := make_lhsv_cmp is_inv hv1 hvs in
+        cond_int64u cmp hl optR
+    end.
+
+Definition cond_float (cmp: comparison) (lhsv: list_hsval) :=
+  match cmp with
+  | Ceq | Cne => fSop OEfeqd lhsv
+  | Clt | Cgt => fSop OEfltd lhsv
+  | Cle | Cge => fSop OEfled lhsv
+  end.
+
+Definition cond_single (cmp: comparison) (lhsv: list_hsval) :=
+  match cmp with
+  | Ceq | Cne => fSop OEfeqs lhsv
+  | Clt | Cgt => fSop OEflts lhsv
+  | Cle | Cge => fSop OEfles lhsv
+  end.
+
+Definition is_normal_cmp cmp :=
+  match cmp with | Cne => false | _ => true end.
+
+Definition expanse_cond_fp (cnot: bool) fn_cond cmp (lhsv: list_hsval) :=
+  let normal := is_normal_cmp cmp in
+  let normal' := if cnot then negb normal else normal in
+  let hvs := fn_cond cmp lhsv in
+  let hl := make_lhsv_single hvs in
+  if normal' then hvs else fSop (OExoriw Int.one) hl.
+
+(** ** Branches instructions *)
+
+Definition transl_cbranch_int32s (cmp: comparison) (optR: option oreg) :=
+  match cmp with
+  | Ceq => CEbeqw optR
+  | Cne => CEbnew optR
+  | Clt => CEbltw optR
+  | Cle => CEbgew optR
+  | Cgt => CEbltw optR
+  | Cge => CEbgew optR
+  end.
+
+Definition transl_cbranch_int32u (cmp: comparison) (optR: option oreg) :=
+  match cmp with
+  | Ceq => CEbequw optR
+  | Cne => CEbneuw optR
+  | Clt => CEbltuw optR
+  | Cle => CEbgeuw optR
+  | Cgt => CEbltuw optR
+  | Cge => CEbgeuw optR
+  end.
+
+Definition transl_cbranch_int64s (cmp: comparison) (optR: option oreg) :=
+  match cmp with
+  | Ceq => CEbeql optR
+  | Cne => CEbnel optR
+  | Clt => CEbltl optR
+  | Cle => CEbgel optR
+  | Cgt => CEbltl optR
+  | Cge => CEbgel optR
+  end.
+
+Definition transl_cbranch_int64u (cmp: comparison) (optR: option oreg) :=
+  match cmp with
+  | Ceq => CEbequl optR
+  | Cne => CEbneul optR
+  | Clt => CEbltul optR
+  | Cle => CEbgeul optR
+  | Cgt => CEbltul optR
+  | Cge => CEbgeul optR
+  end.
+
+Definition expanse_cbranch_fp (cnot: bool) fn_cond cmp (lhsv: list_hsval) : (condition * list_hsval) :=
+  let normal := is_normal_cmp cmp in
+  let normal' := if cnot then negb normal else normal in
+  let hvs := fn_cond cmp lhsv in
+  let hl := make_lhsv_cmp false hvs hvs in
+  if normal' then ((CEbnew (Some X0_R)), hl) else ((CEbeqw (Some X0_R)), hl).
+
+(** * Target simplifications using "fake" values *)
+
+Definition target_op_simplify (op: operation) (lr: list reg) (hst: hsistate_local): option hsval :=
+  match op, lr with
+  | Ocmp (Ccomp c), a1 :: a2 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hv2 := fsi_sreg_get hst a2 in
+      let is_inv := is_inv_cmp_int c in
+      let optR := make_optR false is_inv in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond_int32s c lhsv optR)
+  | Ocmp (Ccompu c), a1 :: a2 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hv2 := fsi_sreg_get hst a2 in
+      let is_inv := is_inv_cmp_int c in
+      let optR := make_optR false is_inv in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond_int32u c lhsv optR)
+  | Ocmp (Ccompimm c imm), a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (expanse_condimm_int32s c hv1 imm)
+  | Ocmp (Ccompuimm c imm), a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (expanse_condimm_int32u c hv1 imm)
+  | Ocmp (Ccompl c), a1 :: a2 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hv2 := fsi_sreg_get hst a2 in
+      let is_inv := is_inv_cmp_int c in
+      let optR := make_optR false is_inv in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond_int64s c lhsv optR)
+  | Ocmp (Ccomplu c), a1 :: a2 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hv2 := fsi_sreg_get hst a2 in
+      let is_inv := is_inv_cmp_int c in
+      let optR := make_optR false is_inv in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond_int64u c lhsv optR)
+  | Ocmp (Ccomplimm c imm), a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (expanse_condimm_int64s c hv1 imm)
+  | Ocmp (Ccompluimm c imm), a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (expanse_condimm_int64u c hv1 imm)
+  | Ocmp (Ccompf c), f1 :: f2 :: nil =>
+      let hv1 := fsi_sreg_get hst f1 in
+      let hv2 := fsi_sreg_get hst f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cond_fp false cond_float c lhsv)
+  | Ocmp (Cnotcompf c), f1 :: f2 :: nil =>
+      let hv1 := fsi_sreg_get hst f1 in
+      let hv2 := fsi_sreg_get hst f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cond_fp true cond_float c lhsv)
+  | Ocmp (Ccompfs c), f1 :: f2 :: nil =>
+      let hv1 := fsi_sreg_get hst f1 in
+      let hv2 := fsi_sreg_get hst f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cond_fp false cond_single c lhsv)
+  | Ocmp (Cnotcompfs c), f1 :: f2 :: nil =>
+      let hv1 := fsi_sreg_get hst f1 in
+      let hv2 := fsi_sreg_get hst f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cond_fp true cond_single c lhsv)
+  | Ofloatconst f, nil =>
+      let hvs := loadimm64 (Float.to_bits f) in
+      let hl := make_lhsv_single hvs in
+      Some (fSop (Ofloat_of_bits) hl)
+  | Osingleconst f, nil =>
+      let hvs := loadimm32 (Float32.to_bits f) in
+      let hl := make_lhsv_single hvs in
+      Some (fSop (Osingle_of_bits) hl)
+  | Ointconst n, nil =>
+      Some (loadimm32 n)
+  | Olongconst n, nil =>
+      Some (loadimm64 n)
+  | Oaddimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (addimm32 hv1 n None)
+  | Oaddlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (addimm64 hv1 n None)
+  | Oandimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (andimm32 hv1 n)
+  | Oandlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (andimm64 hv1 n)
+  | Oorimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (orimm32 hv1 n)
+  | Oorlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (orimm64 hv1 n)
+  | Oxorimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (xorimm32 hv1 n)
+  | Oxorlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (xorimm64 hv1 n)
+  | Ocast8signed, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      let hvs := fSop (Oshlimm (Int.repr 24)) hl in
+      let hl' := make_lhsv_single hvs in
+      Some (fSop (Oshrimm (Int.repr 24)) hl')
+  | Ocast16signed, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      let hvs := fSop (Oshlimm (Int.repr 16)) hl in
+      let hl' := make_lhsv_single hvs in
+      Some (fSop (Oshrimm (Int.repr 16)) hl')
+  | Ocast32unsigned, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      let cast32s_s := fSop Ocast32signed hl in
+      let cast32s_l := make_lhsv_single cast32s_s in
+      let sllil_s := fSop (Oshllimm (Int.repr 32)) cast32s_l in
+      let sllil_l := make_lhsv_single sllil_s in
+      Some (fSop (Oshrluimm (Int.repr 32)) sllil_l)
+  | Oshrximm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      if Int.eq n Int.zero then
+        let lhl := make_lhsv_cmp false hv1 hv1 in
+        Some (fSop (OEmayundef (MUshrx n)) lhl)
+      else
+        if Int.eq n Int.one then
+          let srliw_s := fSop (Oshruimm (Int.repr 31)) hl in
+          let srliw_l := make_lhsv_cmp false hv1 srliw_s in
+          let addw_s := fSop Oadd srliw_l in
+          let addw_l := make_lhsv_single addw_s in
+          let sraiw_s := fSop (Oshrimm Int.one) addw_l in
+          let sraiw_l := make_lhsv_cmp false sraiw_s sraiw_s in
+          Some (fSop (OEmayundef (MUshrx n)) sraiw_l)
+        else 
+          let sraiw_s := fSop (Oshrimm (Int.repr 31)) hl in
+          let sraiw_l := make_lhsv_single sraiw_s in
+          let srliw_s := fSop (Oshruimm (Int.sub Int.iwordsize n)) sraiw_l in
+          let srliw_l := make_lhsv_cmp false hv1 srliw_s in
+          let addw_s := fSop Oadd srliw_l in
+          let addw_l := make_lhsv_single addw_s in
+          let sraiw_s' := fSop (Oshrimm n) addw_l in
+          let sraiw_l' := make_lhsv_cmp false sraiw_s' sraiw_s' in
+          Some (fSop (OEmayundef (MUshrx n)) sraiw_l')
+  | Oshrxlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      if Int.eq n Int.zero then
+        let lhl := make_lhsv_cmp false hv1 hv1 in
+        Some (fSop (OEmayundef (MUshrxl n)) lhl)
+      else
+        if Int.eq n Int.one then
+          let srlil_s := fSop (Oshrluimm (Int.repr 63)) hl in
+          let srlil_l := make_lhsv_cmp false hv1 srlil_s in
+          let addl_s := fSop Oaddl srlil_l in
+          let addl_l := make_lhsv_single addl_s in
+          let srail_s := fSop (Oshrlimm Int.one) addl_l in
+          let srail_l := make_lhsv_cmp false srail_s srail_s in
+          Some (fSop (OEmayundef (MUshrxl n)) srail_l)
+        else
+          let srail_s := fSop (Oshrlimm (Int.repr 63)) hl in
+          let srail_l := make_lhsv_single srail_s in
+          let srlil_s := fSop (Oshrluimm (Int.sub Int64.iwordsize' n)) srail_l in
+          let srlil_l := make_lhsv_cmp false hv1 srlil_s in
+          let addl_s := fSop Oaddl srlil_l in
+          let addl_l := make_lhsv_single addl_s in
+          let srail_s' := fSop (Oshrlimm n) addl_l in
+          let srail_l' := make_lhsv_cmp false srail_s' srail_s' in
+          Some (fSop (OEmayundef (MUshrxl n)) srail_l')
+  | _, _ => None
+  end.
+
+Definition target_cbranch_expanse (prev: hsistate_local) (cond: condition) (args: list reg) : option (condition * list_hsval) :=
+  match cond, args with
+  | (Ccomp c), (a1 :: a2 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let cond := transl_cbranch_int32s c (make_optR false is_inv) in
+      let hv1 := fsi_sreg_get prev a1 in
+      let hv2 := fsi_sreg_get prev a2 in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond, lhsv)
+  | (Ccompu c), (a1 :: a2 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let cond := transl_cbranch_int32u c (make_optR false is_inv) in
+      let hv1 := fsi_sreg_get prev a1 in
+      let hv2 := fsi_sreg_get prev a2 in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond, lhsv)
+  | (Ccompimm c n), (a1 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let hv1 := fsi_sreg_get prev a1 in
+      (if Int.eq n Int.zero then
+        let lhsv := make_lhsv_cmp is_inv hv1 hv1 in
+        let cond := transl_cbranch_int32s c (make_optR true is_inv) in
+        Some (cond, lhsv)
+      else
+        let hvs := loadimm32 n in
+        let lhsv := make_lhsv_cmp is_inv hv1 hvs in
+        let cond := transl_cbranch_int32s c (make_optR false is_inv) in
+        Some (cond, lhsv))
+  | (Ccompuimm c n), (a1 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let hv1 := fsi_sreg_get prev a1 in
+      (if Int.eq n Int.zero then
+        let lhsv := make_lhsv_cmp is_inv hv1 hv1 in
+        let cond := transl_cbranch_int32u c (make_optR true is_inv) in
+        Some (cond, lhsv)
+      else
+        let hvs := loadimm32 n in
+        let lhsv := make_lhsv_cmp is_inv hv1 hvs in
+        let cond := transl_cbranch_int32u c (make_optR false is_inv) in
+         Some (cond, lhsv))
+  | (Ccompl c), (a1 :: a2 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let cond := transl_cbranch_int64s c (make_optR false is_inv) in
+      let hv1 := fsi_sreg_get prev a1 in
+      let hv2 := fsi_sreg_get prev a2 in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond, lhsv)
+  | (Ccomplu c), (a1 :: a2 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let cond := transl_cbranch_int64u c (make_optR false is_inv) in
+      let hv1 := fsi_sreg_get prev a1 in
+      let hv2 := fsi_sreg_get prev a2 in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond, lhsv)
+  | (Ccomplimm c n), (a1 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let hv1 := fsi_sreg_get prev a1 in
+      (if Int64.eq n Int64.zero then
+        let lhsv := make_lhsv_cmp is_inv hv1 hv1 in
+        let cond := transl_cbranch_int64s c (make_optR true is_inv) in
+        Some (cond, lhsv)
+      else
+        let hvs := loadimm64 n in
+        let lhsv := make_lhsv_cmp is_inv hv1 hvs in
+        let cond := transl_cbranch_int64s c (make_optR false is_inv) in
+        Some (cond, lhsv))
+  | (Ccompluimm c n), (a1 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let hv1 := fsi_sreg_get prev a1 in
+      (if Int64.eq n Int64.zero then
+        let lhsv := make_lhsv_cmp is_inv hv1 hv1 in
+        let cond := transl_cbranch_int64u c (make_optR true is_inv) in
+        Some (cond, lhsv)
+      else
+        let hvs := loadimm64 n in
+        let lhsv := make_lhsv_cmp is_inv hv1 hvs in
+        let cond := transl_cbranch_int64u c (make_optR false is_inv) in
+        Some (cond, lhsv))
+  | (Ccompf c), (f1 :: f2 :: nil) =>
+      let hv1 := fsi_sreg_get prev f1 in
+      let hv2 := fsi_sreg_get prev f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cbranch_fp false cond_float c lhsv)
+  | (Cnotcompf c), (f1 :: f2 :: nil) =>
+      let hv1 := fsi_sreg_get prev f1 in
+      let hv2 := fsi_sreg_get prev f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cbranch_fp true cond_float c lhsv)
+  | (Ccompfs c), (f1 :: f2 :: nil) =>
+      let hv1 := fsi_sreg_get prev f1 in
+      let hv2 := fsi_sreg_get prev f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cbranch_fp false cond_single c lhsv)
+  | (Cnotcompfs c), (f1 :: f2 :: nil) =>
+      let hv1 := fsi_sreg_get prev f1 in
+      let hv2 := fsi_sreg_get prev f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cbranch_fp true cond_single c lhsv)
+  | _, _ => None
+   end.
+
+(** * Auxiliary lemmas on comparisons *)
+
+(** ** Signed ints *)
+
+Lemma xor_neg_ltle_cmp: forall v1 v2,
+  Some (Val.xor (Val.cmp Clt v1 v2) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmp_bool Cle v2 v1)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  unfold Val.cmp; simpl;
+  try rewrite Int.eq_sym;
+  try destruct (Int.eq _ _); try destruct (Int.lt _ _) eqn:ELT ; simpl;
+  try rewrite Int.xor_one_one; try rewrite Int.xor_zero_one;
+  auto.
+Qed.
+
+(** ** Unsigned ints *)
+
+Lemma xor_neg_ltle_cmpu: forall mptr v1 v2,
+  Some (Val.xor (Val.cmpu (Mem.valid_pointer mptr) Clt v1 v2) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpu_bool (Mem.valid_pointer mptr) Cle v2 v1)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  unfold Val.cmpu; simpl;
+  try rewrite Int.eq_sym;
+  try destruct (Int.eq _ _); try destruct (Int.ltu _ _) eqn:ELT ; simpl;
+  try rewrite Int.xor_one_one; try rewrite Int.xor_zero_one;
+  auto.
+  1,2:
+    unfold Val.cmpu, Val.cmpu_bool;
+    destruct Archi.ptr64; try destruct (_ && _); try destruct (_ || _);
+    try destruct (eq_block _ _); auto.
+  unfold Val.cmpu, Val.cmpu_bool; simpl;
+  destruct Archi.ptr64; try destruct (_ || _); simpl; auto;
+  destruct (eq_block b b0); destruct (eq_block b0 b);
+  try congruence;
+  try destruct (_ || _); simpl; try destruct (Ptrofs.ltu _ _);
+  simpl; auto;
+  repeat destruct (_ && _); simpl; auto.
+Qed.
+
+Remark ltu_12_wordsize:
+  Int.ltu (Int.repr 12) Int.iwordsize = true.
+Proof.
+  unfold Int.iwordsize, Int.zwordsize. simpl.
+  unfold Int.ltu. apply zlt_true.
+  rewrite !Int.unsigned_repr; try cbn; try omega.
+Qed.
+
+(** ** Signed longs *)
+
+Lemma xor_neg_ltle_cmpl: forall v1 v2,
+  Some (Val.xor (Val.maketotal (Val.cmpl Clt v1 v2)) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpl_bool Cle v2 v1)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  destruct (Int64.lt _ _); auto.
+Qed.
+
+Lemma xor_neg_ltge_cmpl: forall v1 v2,
+  Some (Val.xor (Val.maketotal (Val.cmpl Clt v1 v2)) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpl_bool Cge v1 v2)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  destruct (Int64.lt _ _); auto.
+Qed.
+
+Lemma xorl_zero_eq_cmpl: forall c v1 v2,
+  c = Ceq \/ c = Cne ->
+  Some
+    (Val.maketotal
+     (option_map Val.of_bool
+       (Val.cmpl_bool c (Val.xorl v1 v2) (Vlong Int64.zero)))) =
+  Some (Val.of_optbool (Val.cmpl_bool c v1 v2)).
+Proof.
+  intros. destruct c; inv H; try discriminate;
+  destruct v1, v2; simpl; auto;
+  destruct (Int64.eq i i0) eqn:EQ0.
+  1,3:
+    apply Int64.same_if_eq in EQ0; subst;
+    rewrite Int64.xor_idem;
+    rewrite Int64.eq_true; trivial.
+  1,2:
+    destruct (Int64.eq (Int64.xor i i0) Int64.zero) eqn:EQ1; simpl; try congruence;
+    rewrite Int64.xor_is_zero in EQ1; congruence.
+Qed.
+
+Lemma cmp_ltle_add_one: forall v n,
+  Int.eq n (Int.repr Int.max_signed) = false ->
+  Some (Val.of_optbool (Val.cmp_bool Clt v (Vint (Int.add n Int.one)))) =
+  Some (Val.of_optbool (Val.cmp_bool Cle v (Vint n))).
+Proof.
+  intros v n EQMAX. unfold Val.cmp_bool; destruct v; simpl; auto.
+  unfold Int.lt. replace (Int.signed (Int.add n Int.one)) with (Int.signed n + 1).
+  destruct (zlt (Int.signed n) (Int.signed i)).
+  rewrite zlt_false by omega. auto.
+  rewrite zlt_true by omega. auto.
+  rewrite Int.add_signed. symmetry; apply Int.signed_repr. 
+  specialize (Int.eq_spec n (Int.repr Int.max_signed)).
+  rewrite EQMAX; simpl; intros.
+  assert (Int.signed n <> Int.max_signed).
+  { red; intros E. elim H. rewrite <- (Int.repr_signed n). rewrite E. auto. }
+  generalize (Int.signed_range n); omega.
+Qed.
+
+Lemma cmpl_ltle_add_one: forall v n,
+  Int64.eq n (Int64.repr Int64.max_signed) = false ->
+  Some (Val.of_optbool (Val.cmpl_bool Clt v (Vlong (Int64.add n Int64.one)))) =
+  Some (Val.of_optbool (Val.cmpl_bool Cle v (Vlong n))).
+Proof.
+  intros v n EQMAX. unfold Val.cmpl_bool; destruct v; simpl; auto.
+  unfold Int64.lt. replace (Int64.signed (Int64.add n Int64.one)) with (Int64.signed n + 1).
+  destruct (zlt (Int64.signed n) (Int64.signed i)).
+  rewrite zlt_false by omega. auto.
+  rewrite zlt_true by omega. auto.
+  rewrite Int64.add_signed. symmetry; apply Int64.signed_repr. 
+  specialize (Int64.eq_spec n (Int64.repr Int64.max_signed)).
+  rewrite EQMAX; simpl; intros.
+  assert (Int64.signed n <> Int64.max_signed).
+  { red; intros E. elim H. rewrite <- (Int64.repr_signed n). rewrite E. auto. }
+  generalize (Int64.signed_range n); omega.
+Qed.
+
+Remark lt_maxsgn_false_int: forall i,
+  Int.lt (Int.repr Int.max_signed) i = false.
+Proof.
+  intros; unfold Int.lt.
+  specialize  Int.signed_range with i; intros.
+  rewrite zlt_false; auto. destruct H.
+  rewrite Int.signed_repr; try (cbn; lia).
+  apply Z.le_ge. trivial.
+Qed.
+
+Remark lt_maxsgn_false_long: forall i,
+  Int64.lt (Int64.repr Int64.max_signed) i = false.
+Proof.
+  intros; unfold Int64.lt.
+  specialize  Int64.signed_range with i; intros.
+  rewrite zlt_false; auto. destruct H.
+  rewrite Int64.signed_repr; try (cbn; lia).
+  apply Z.le_ge. trivial.
+Qed.
+
+(** ** Unsigned longs *)
+
+Lemma xor_neg_ltle_cmplu: forall mptr v1 v2,
+  Some (Val.xor (Val.maketotal (Val.cmplu (Mem.valid_pointer mptr) Clt v1 v2)) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer mptr) Cle v2 v1)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  destruct (Int64.ltu _ _); auto.
+  1,2: unfold Val.cmplu; simpl; auto;
+  destruct (Archi.ptr64); simpl;
+  try destruct (eq_block _ _); simpl;
+  try destruct (_ && _); simpl;
+  try destruct (Ptrofs.cmpu _ _);
+  try destruct cmp; simpl; auto.
+  unfold Val.cmplu; simpl;
+  destruct Archi.ptr64; try destruct (_ || _); simpl; auto;
+  destruct (eq_block b b0); destruct (eq_block b0 b);
+  try congruence;
+  try destruct (_ || _); simpl; try destruct (Ptrofs.ltu _ _);
+  simpl; auto;
+  repeat destruct (_ && _); simpl; auto.
+Qed.
+
+Lemma xor_neg_ltge_cmplu: forall mptr v1 v2,
+  Some (Val.xor (Val.maketotal (Val.cmplu (Mem.valid_pointer mptr) Clt v1 v2)) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer mptr) Cge v1 v2)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  destruct (Int64.ltu _ _); auto.
+  1,2: unfold Val.cmplu; simpl; auto;
+  destruct (Archi.ptr64); simpl;
+  try destruct (eq_block _ _); simpl;
+  try destruct (_ && _); simpl;
+  try destruct (Ptrofs.cmpu _ _);
+  try destruct cmp; simpl; auto.
+  unfold Val.cmplu; simpl;
+  destruct Archi.ptr64; try destruct (_ || _); simpl; auto;
+  destruct (eq_block b b0); destruct (eq_block b0 b);
+  try congruence;
+  try destruct (_ || _); simpl; try destruct (Ptrofs.ltu _ _);
+  simpl; auto;
+  repeat destruct (_ && _); simpl; auto.
+Qed.
+
+(** ** Floats *)
+
+Lemma xor_neg_eqne_cmpf: forall v1 v2,
+  Some (Val.xor (Val.cmpf Ceq v1 v2) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpf_bool Cne v1 v2)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence;
+  unfold Val.cmpf; simpl.
+  rewrite Float.cmp_ne_eq.
+  destruct (Float.cmp _ _ _); simpl; auto.
+Qed.
+
+(** ** Singles *)
+
+Lemma xor_neg_eqne_cmpfs: forall v1 v2,
+  Some (Val.xor (Val.cmpfs Ceq v1 v2) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpfs_bool Cne v1 v2)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence;
+  unfold Val.cmpfs; simpl.
+  rewrite Float32.cmp_ne_eq.
+  destruct (Float32.cmp _ _ _); simpl; auto.
+Qed.
+
+(** ** More useful lemmas *)
+
+Lemma xor_neg_optb: forall v,
+  Some (Val.xor (Val.of_optbool (option_map negb v))
+    (Vint Int.one)) = Some (Val.of_optbool v).
+Proof.
+  intros.
+  destruct v; simpl; trivial.
+  destruct b; simpl; auto.
+Qed.
+
+Lemma xor_neg_optb': forall v,
+  Some (Val.xor (Val.of_optbool v) (Vint Int.one)) =
+  Some (Val.of_optbool (option_map negb v)).
+Proof.
+  intros.
+  destruct v; simpl; trivial.
+  destruct b; simpl; auto.
+Qed.
+
+Lemma optbool_mktotal: forall v,
+  Val.maketotal (option_map Val.of_bool v) =
+  Val.of_optbool v.
+Proof.
+  intros.
+  destruct v; simpl; auto.
+Qed.
+
+(* TODO gourdinl move to common/Values ? *)
+Theorem swap_cmpf_bool:
+  forall c x y,
+  Val.cmpf_bool (swap_comparison c) x y = Val.cmpf_bool c y x.
+Proof.
+  destruct x; destruct y; simpl; auto. rewrite Float.cmp_swap. auto.
+Qed.
+
+Theorem swap_cmpfs_bool:
+  forall c x y,
+  Val.cmpfs_bool (swap_comparison c) x y = Val.cmpfs_bool c y x.
+Proof.
+  destruct x; destruct y; simpl; auto. rewrite Float32.cmp_swap. auto.
+Qed.
+
+Remark cast32unsigned_from_cast32signed:
+  forall i, Int64.repr (Int.unsigned i) = Int64.zero_ext 32 (Int64.repr (Int.signed i)).
+Proof.
+  intros. apply Int64.same_bits_eq; intros. 
+  rewrite Int64.bits_zero_ext, !Int64.testbit_repr by tauto.
+  rewrite Int.bits_signed by tauto. fold (Int.testbit i i0).
+  change Int.zwordsize with 32.
+  destruct (zlt i0 32). auto. apply Int.bits_above. auto.
+Qed.
+
+(** * Intermediates lemmas on each expanded instruction *)
+
+Lemma simplify_ccomp_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (cond_int32s c
+          (make_lhsv_cmp (is_inv_cmp_int c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)) None)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmp_bool c v v0)).
+Proof.
+  intros.
+  unfold cond_int32s in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmp.
+  - apply xor_neg_ltle_cmp.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmp_bool; trivial.
+  - replace (Clt) with (negate_comparison Cge) by auto;
+    rewrite Val.negate_cmp_bool.
+    rewrite xor_neg_optb; trivial.
+Qed.
+
+Lemma simplify_ccompu_correct ge sp hst st c r r0 rs0 m m0 v v0: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj
+       (cond_int32u c
+          (make_lhsv_cmp (is_inv_cmp_int c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)) None)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpu_bool (Mem.valid_pointer m) c v v0)).
+Proof.
+  intros.
+  erewrite (cmpu_bool_valid_pointer_eq (Mem.valid_pointer m) (Mem.valid_pointer m0)).
+  2: eauto.
+  unfold cond_int32u in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpu.
+  - apply xor_neg_ltle_cmpu.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmpu_bool; trivial.
+  - replace (Clt) with (negate_comparison Cge) by auto;
+    rewrite Val.negate_cmpu_bool.
+    rewrite xor_neg_optb; trivial.
+Qed.
+
+Lemma simplify_ccompimm_correct ge sp hst st c r n rs0 m m0 v: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj (expanse_condimm_int32s c (fsi_sreg_get hst r) n)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmp_bool c v (Vint n))).
+Proof.
+  intros.
+  unfold expanse_condimm_int32s, cond_int32s in *; destruct c;
+  intros; destruct (Int.eq n Int.zero) eqn:EQIMM; simpl;
+  try apply Int.same_if_eq in EQIMM; subst;
+  unfold loadimm32, sltimm32, xorimm32, opimm32, load_hilo32;
+  try erewrite !fsi_sreg_get_correct; eauto;
+  try rewrite OKv1;
+  unfold Val.cmp, zero32.
+  all:
+    try apply xor_neg_ltle_cmp; 
+    try apply xor_neg_ltge_cmp; trivial.
+  4: 
+    try destruct (Int.eq n (Int.repr Int.max_signed)) eqn:EQMAX; subst;
+    try apply Int.same_if_eq in EQMAX; subst; simpl.
+  4:
+    intros; try (specialize make_immed32_sound with (Int.one);
+    destruct (make_immed32 Int.one) eqn:EQMKI_A1); intros; simpl.
+  6:
+    intros; try (specialize make_immed32_sound with (Int.add n Int.one);
+    destruct (make_immed32 (Int.add n Int.one)) eqn:EQMKI_A2); intros; simpl.
+  1,2,3,8,9:
+    intros; try (specialize make_immed32_sound with (n);
+    destruct (make_immed32 n) eqn:EQMKI); intros; simpl.
+  all: 
+    try destruct (Int.eq lo Int.zero) eqn:EQLO32;
+    try apply Int.same_if_eq in EQLO32; subst;
+    try erewrite fSop_correct; eauto; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1;
+    try rewrite OK2;
+    try rewrite (Int.add_commut _ Int.zero), Int.add_zero_l in H; subst;
+    unfold Val.cmp, eval_may_undef, zero32, Val.add; simpl;
+    destruct v; auto.
+  all:
+    try rewrite ltu_12_wordsize;
+    try rewrite <- H;
+    try (apply cmp_ltle_add_one; auto);
+    try rewrite Int.add_commut, Int.add_zero_l in *;
+    try rewrite Int.add_commut;
+    try rewrite <- H; try rewrite cmp_ltle_add_one;
+    try rewrite Int.add_zero_l;
+    try (
+    simpl; trivial;
+    try rewrite Int.xor_is_zero;
+    try destruct (Int.lt _ _) eqn:EQLT; trivial;
+    try rewrite lt_maxsgn_false_int in EQLT;
+    simpl; trivial; try discriminate; fail).
+Qed.
+
+Lemma simplify_ccompuimm_correct ge sp hst st c r n rs0 m m0 v: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj (expanse_condimm_int32u c (fsi_sreg_get hst r) n)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpu_bool (Mem.valid_pointer m) c v (Vint n))).
+Proof.
+  intros.
+  assert (HMEM: Val.cmpu_bool (Mem.valid_pointer m) c v (Vint n) =
+          Val.cmpu_bool (Mem.valid_pointer m0) c v (Vint n)).
+  erewrite (cmpu_bool_valid_pointer_eq (Mem.valid_pointer m) (Mem.valid_pointer m0)); eauto.
+  unfold expanse_condimm_int32u, cond_int32u in *; destruct c;
+  intros; destruct (Int.eq n Int.zero) eqn:EQIMM; simpl;
+  try apply Int.same_if_eq in EQIMM; subst;
+  unfold loadimm32, sltuimm32, opimm32, load_hilo32;
+  try erewrite !fsi_sreg_get_correct; eauto;
+  try rewrite OKv1; trivial;
+  try rewrite xor_neg_ltle_cmpu;
+  unfold Val.cmpu, zero32.
+  all:
+    try (specialize make_immed32_sound with n;
+    destruct (make_immed32 n) eqn:EQMKI);
+    try destruct (Int.eq lo Int.zero) eqn:EQLO;
+    try apply Int.same_if_eq in EQLO; subst;
+    intros; subst;
+    try erewrite fSop_correct; eauto; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1;
+    try rewrite OK2;
+    rewrite HMEM;
+    unfold eval_may_undef, Val.cmpu;
+    destruct v; simpl; auto;
+    try rewrite EQIMM; try destruct (Archi.ptr64) eqn:EQARCH; simpl;
+    try rewrite ltu_12_wordsize; trivial;
+    try rewrite Int.add_commut, Int.add_zero_l in *;
+    try rewrite Int.add_zero_l;
+    try destruct (Int.ltu _ _) eqn:EQLTU; simpl;
+    try rewrite EQLTU; simpl; try rewrite EQIMM;
+    try rewrite EQARCH; trivial.
+Qed.
+
+Lemma simplify_ccompl_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (cond_int64s c
+          (make_lhsv_cmp (is_inv_cmp_int c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)) None)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpl_bool c v v0)).
+Proof.
+  intros.
+  unfold cond_int64s in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpl.
+  1,2,3: rewrite optbool_mktotal; trivial.
+  - apply xor_neg_ltle_cmpl.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmpl_bool; trivial.
+    rewrite optbool_mktotal; trivial.
+  - apply xor_neg_ltge_cmpl.
+Qed.
+
+Lemma simplify_ccomplu_correct ge sp hst st c r r0 rs0 m m0 v v0: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj
+       (cond_int64u c
+          (make_lhsv_cmp (is_inv_cmp_int c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)) None)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) c v v0)).
+Proof.
+  intros.
+  erewrite (cmplu_bool_valid_pointer_eq (Mem.valid_pointer m) (Mem.valid_pointer m0)).
+  2: eauto.
+  unfold cond_int64u in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmplu.
+  1,2,3: rewrite optbool_mktotal; trivial.
+  - apply xor_neg_ltle_cmplu.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmplu_bool; trivial.
+    rewrite optbool_mktotal; trivial.
+  - apply xor_neg_ltge_cmplu.
+Qed.
+
+Lemma simplify_ccomplimm_correct ge sp hst st c r n rs0 m m0 v: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj (expanse_condimm_int64s c (fsi_sreg_get hst r) n)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpl_bool c v (Vlong n))).
+Proof.
+  intros.
+  unfold expanse_condimm_int64s, cond_int64s in *; destruct c;
+  intros; destruct (Int64.eq n Int64.zero) eqn:EQIMM; simpl;
+  try apply Int64.same_if_eq in EQIMM; subst;
+  unfold loadimm32, loadimm64, sltimm64, xorimm64, opimm64, load_hilo32, load_hilo64;
+  try erewrite !fsi_sreg_get_correct; eauto;
+  try rewrite OKv1;
+  unfold Val.cmpl, zero64.
+  all:
+    try apply xor_neg_ltle_cmpl; 
+    try apply xor_neg_ltge_cmpl;
+    try rewrite optbool_mktotal; trivial.
+  4: 
+    try destruct (Int64.eq n (Int64.repr Int64.max_signed)) eqn:EQMAX; subst;
+    try apply Int64.same_if_eq in EQMAX; subst; simpl.
+  4:
+    intros; try (specialize make_immed32_sound with (Int.one);
+    destruct (make_immed32 Int.one) eqn:EQMKI_A1); intros; simpl.
+  6:
+    intros; try (specialize make_immed64_sound with (Int64.add n Int64.one);
+    destruct (make_immed64 (Int64.add n Int64.one)) eqn:EQMKI_A2); intros; simpl.
+  1,2,3,9,10:
+    intros; try (specialize make_immed64_sound with (n);
+    destruct (make_immed64 n) eqn:EQMKI); intros; simpl.
+  all: 
+    try destruct (Int.eq lo Int.zero) eqn:EQLO32;
+    try apply Int.same_if_eq in EQLO32; subst;
+    try destruct (Int64.eq lo Int64.zero) eqn:EQLO64;
+    try apply Int64.same_if_eq in EQLO64; subst;
+    try erewrite fSop_correct; eauto; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1;
+    try rewrite OK2;
+    try rewrite (Int64.add_commut _ Int64.zero), Int64.add_zero_l in H; subst;
+    try fold (Val.cmpl Clt v (Vlong imm));
+    try rewrite xor_neg_ltge_cmpl; trivial;
+    try rewrite xor_neg_ltle_cmpl; trivial;
+    unfold Val.cmpl, Val.addl;
+    try rewrite xorl_zero_eq_cmpl; trivial;
+    try rewrite optbool_mktotal; trivial;
+    unfold eval_may_undef, zero32, Val.add; simpl;
+    destruct v; auto.
+  1,2,3,4,5,6,7,8,9,10,11,12:
+      try rewrite <- optbool_mktotal; trivial;
+      try rewrite Int64.add_commut, Int64.add_zero_l in *;
+      try fold (Val.cmpl Clt (Vlong i) (Vlong imm));
+      try fold (Val.cmpl Clt (Vlong i) (Vlong (Int64.sign_ext 32 (Int64.shl hi (Int64.repr 12)))));
+      try fold (Val.cmpl Clt (Vlong i) (Vlong (Int64.add (Int64.sign_ext 32 (Int64.shl hi (Int64.repr 12))) lo)));
+      try rewrite xor_neg_ltge_cmpl; trivial;
+      try rewrite xor_neg_ltle_cmpl; trivial.
+  6:
+    rewrite <- H;
+    try apply cmpl_ltle_add_one; auto.
+  all:
+    try rewrite <- H;
+    try apply cmpl_ltle_add_one; auto;
+    try rewrite <- cmpl_ltle_add_one; auto;
+    try rewrite ltu_12_wordsize;
+    try rewrite Int.add_commut, Int.add_zero_l in *;
+    try rewrite Int64.add_commut, Int64.add_zero_l in *;
+    try rewrite Int64.add_zero_l;
+    simpl; try rewrite lt_maxsgn_false_long;
+    try (rewrite <- H; trivial; fail);
+    simpl; trivial.
+Qed.
+
+Lemma simplify_ccompluimm_correct ge sp hst st c r n rs0 m m0 v: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj (expanse_condimm_int64u c (fsi_sreg_get hst r) n)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) c v (Vlong n))).
+Proof.
+  intros.
+  assert (HMEM: Val.cmplu_bool (Mem.valid_pointer m) c v (Vlong n) =
+          Val.cmplu_bool (Mem.valid_pointer m0) c v (Vlong n)).
+  erewrite (cmplu_bool_valid_pointer_eq (Mem.valid_pointer m) (Mem.valid_pointer m0)); eauto.
+  unfold expanse_condimm_int64u, cond_int64u in *; destruct c;
+  intros; destruct (Int64.eq n Int64.zero) eqn:EQIMM; simpl;
+  unfold loadimm64, sltuimm64, opimm64, load_hilo64;
+  try erewrite !fsi_sreg_get_correct; eauto;
+  try rewrite OKv1;
+  unfold Val.cmplu, zero64.
+  (* Simplify make immediate and decompose subcases *)
+  all:
+    try (specialize make_immed64_sound with n;
+    destruct (make_immed64 n) eqn:EQMKI);
+    try destruct (Int64.eq lo Int64.zero) eqn:EQLO;
+    try erewrite fSop_correct; eauto; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1;
+    try rewrite OK2;
+    rewrite HMEM.
+  (* Ceq, Cne, Clt = itself *)
+  all: intros; try apply Int64.same_if_eq in EQIMM; subst; trivial.
+  (* Cle = xor (Clt) *)
+  all: try apply xor_neg_ltle_cmplu; trivial.
+  (* Others subcases with swap/negation *)
+  all:
+    unfold Val.cmplu, eval_may_undef, zero64, Val.addl;
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l in *; trivial;
+    try rewrite Int64.add_zero_l;
+    try (rewrite <- xor_neg_ltle_cmplu; unfold Val.cmplu;
+    trivial; fail);
+    try (replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmplu_bool; trivial; fail);
+    try (replace (Clt) with (negate_comparison Cge) by auto;
+    rewrite Val.negate_cmplu_bool; rewrite xor_neg_optb; trivial; fail);
+    try rewrite optbool_mktotal; trivial.
+  all:
+    try destruct v; simpl; auto;
+    try destruct (Archi.ptr64); simpl;
+    try rewrite EQIMM;
+    try rewrite HMEM; trivial;
+    try destruct (Int64.ltu _ _);
+    try rewrite <- xor_neg_ltge_cmplu; unfold Val.cmplu;
+    try rewrite <- optbool_mktotal; trivial.
+Qed.
+
+Lemma simplify_ccompf_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (expanse_cond_fp false cond_float c
+          (make_lhsv_cmp (is_inv_cmp_float c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)))) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpf_bool c v v0)).
+Proof.
+  intros.
+  unfold expanse_cond_fp in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpf.
+  - apply xor_neg_eqne_cmpf.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite swap_cmpf_bool; trivial.
+  - replace (Cle) with (swap_comparison Cge) by auto;
+    rewrite swap_cmpf_bool; trivial.
+Qed.
+
+Lemma simplify_cnotcompf_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (expanse_cond_fp true cond_float c
+          (make_lhsv_cmp (is_inv_cmp_float c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)))) rs0 m0 =
+  Some (Val.of_optbool (option_map negb (Val.cmpf_bool c v v0))).
+Proof.
+  intros.
+  unfold expanse_cond_fp in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpf.
+  1,3,4: apply xor_neg_optb'.
+  all: destruct v, v0; simpl; trivial.
+  rewrite Float.cmp_ne_eq; rewrite negb_involutive; trivial.
+  1: replace (Clt) with (swap_comparison Cgt) by auto; rewrite <- Float.cmp_swap; simpl.
+  2: replace (Cle) with (swap_comparison Cge) by auto; rewrite <- Float.cmp_swap; simpl.
+  all: destruct (Float.cmp _ _ _); trivial.
+Qed.
+
+Lemma simplify_ccompfs_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (expanse_cond_fp false cond_single c
+          (make_lhsv_cmp (is_inv_cmp_float c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)))) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpfs_bool c v v0)).
+Proof.
+  intros.
+  unfold expanse_cond_fp in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpfs.
+  - apply xor_neg_eqne_cmpfs.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite swap_cmpfs_bool; trivial.
+  - replace (Cle) with (swap_comparison Cge) by auto;
+    rewrite swap_cmpfs_bool; trivial.
+Qed.
+
+Lemma simplify_cnotcompfs_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (expanse_cond_fp true cond_single c
+          (make_lhsv_cmp (is_inv_cmp_float c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)))) rs0 m0 =
+  Some (Val.of_optbool (option_map negb (Val.cmpfs_bool c v v0))).
+Proof.
+  intros.
+  unfold expanse_cond_fp in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpfs.
+  1,3,4: apply xor_neg_optb'.
+  all: destruct v, v0; simpl; trivial.
+  rewrite Float32.cmp_ne_eq; rewrite negb_involutive; trivial.
+  1: replace (Clt) with (swap_comparison Cgt) by auto; rewrite <- Float32.cmp_swap; simpl.
+  2: replace (Cle) with (swap_comparison Cge) by auto; rewrite <- Float32.cmp_swap; simpl.
+  all: destruct (Float32.cmp _ _ _); trivial.
+Qed.
+
+Lemma simplify_floatconst_correct ge sp rs0 m0 args m n fsv lr st: forall
+  (H : match lr with
+    | nil =>
+        Some
+          (fSop Ofloat_of_bits
+             (make_lhsv_single (loadimm64 (Float.to_bits n))))
+    | _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Ofloatconst n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold loadimm64, load_hilo64; simpl;
+  specialize make_immed64_sound with (Float.to_bits n);
+  destruct (make_immed64 (Float.to_bits n)) eqn:EQMKI; intros;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO;
+  simpl.
+  - try rewrite Int64.add_commut, Int64.add_zero_l; inv H;
+    try rewrite Float.of_to_bits; trivial.
+  - apply Int64.same_if_eq in EQLO; subst.
+    try rewrite Int64.add_commut, Int64.add_zero_l in H.
+    rewrite <- H; try rewrite Float.of_to_bits; trivial.
+  - rewrite <- H; try rewrite Float.of_to_bits; trivial.
+  - rewrite <- H; try rewrite Float.of_to_bits; trivial.
+Qed.
+
+Lemma simplify_singleconst_correct ge sp rs0 m0 args m n fsv lr st: forall
+  (H : match lr with
+    | nil =>
+        Some
+          (fSop Osingle_of_bits
+             (make_lhsv_single (loadimm32 (Float32.to_bits n))))
+    | _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Osingleconst n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold loadimm32, load_hilo32; simpl;
+  specialize make_immed32_sound with (Float32.to_bits n);
+  destruct (make_immed32 (Float32.to_bits n)) eqn:EQMKI; intros;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO;
+  simpl.
+  { try rewrite Int.add_commut, Int.add_zero_l; inv H;
+    try rewrite Float32.of_to_bits; trivial. }
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l in H; simpl;
+    rewrite ltu_12_wordsize; simpl; try rewrite <- H;
+    try rewrite Float32.of_to_bits; trivial.
+Qed.
+
+Lemma simplify_addimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (addimm32 (fsi_sreg_get hst a1) n None)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oaddimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold addimm32, opimm32, load_hilo32, make_lhsv_cmp; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.add (Vint imm) v); rewrite Val.add_commut; trivial.
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_addlimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (addimm64 (fsi_sreg_get hst a1) n None)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oaddlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold addimm64, opimm64, load_hilo64, make_lhsv_cmp; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.addl (Vlong imm) v); rewrite Val.addl_commut; trivial.
+  all:
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_andimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (andimm32 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oandimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold andimm32, opimm32, load_hilo32, make_lhsv_cmp; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.and (Vint imm) v); rewrite Val.and_commut; trivial.
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_andlimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (andimm64 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oandlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold andimm64, opimm64, load_hilo64, make_lhsv_cmp; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.andl (Vlong imm) v); rewrite Val.andl_commut; trivial.
+  all:
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_orimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (orimm32 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oorimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold orimm32, opimm32, load_hilo32, make_lhsv_cmp; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.or (Vint imm) v); rewrite Val.or_commut; trivial.
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_orlimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (orimm64 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oorlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold orimm64, opimm64, load_hilo64, make_lhsv_cmp; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.orl (Vlong imm) v); rewrite Val.orl_commut; trivial.
+  all:
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_xorimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (xorimm32 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oxorimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold xorimm32, opimm32, load_hilo32, make_lhsv_cmp; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.xor (Vint imm) v); rewrite Val.xor_commut; trivial.
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_xorlimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (xorimm64 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oxorlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold xorimm64, opimm64, load_hilo64, make_lhsv_cmp; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.xorl (Vlong imm) v); rewrite Val.xorl_commut; trivial.
+  all:
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_intconst_correct ge sp rs0 m0 args m n fsv lr st: forall
+  (H : match lr with
+    | nil => Some (loadimm32 n)
+    | _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Ointconst n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold loadimm32, load_hilo32, make_lhsv_single; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  try apply Int.same_if_eq in EQLO; subst;
+  try rewrite Int.add_commut, Int.add_zero_l;
+  try rewrite ltu_12_wordsize; try rewrite H; trivial.
+Qed.
+
+Lemma simplify_longconst_correct ge sp rs0 m0 args m n fsv lr st: forall
+  (H : match lr with
+    | nil => Some (loadimm64 n)
+    | _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Olongconst n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold loadimm64, load_hilo64, make_lhsv_single; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  try apply Int64.same_if_eq in EQLO; subst;
+  try rewrite Int64.add_commut, Int64.add_zero_l;
+  try rewrite Int64.add_commut;
+  try rewrite ltu_12_wordsize; try rewrite H; trivial.
+Qed.
+
+Lemma simplify_cast8signed_correct ge sp rs0 m0 lr hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+       | nil => None
+       | a1 :: nil =>
+           Some
+             (fSop (Oshrimm (Int.repr 24))
+                (make_lhsv_single
+                   (fSop (Oshlimm (Int.repr 24))
+                      (make_lhsv_single (fsi_sreg_get hst a1)))))
+       | a1 :: _ :: _ => None
+       end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp Ocast8signed args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  unfold Val.shr, Val.shl, Val.sign_ext;
+  destruct v; simpl; auto.
+  assert (A: Int.ltu (Int.repr 24) Int.iwordsize = true) by auto.
+  rewrite A. rewrite Int.sign_ext_shr_shl; simpl; trivial. cbn; lia.
+Qed.
+
+Lemma simplify_cast16signed_correct ge sp rs0 m0 lr hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+       | nil => None
+       | a1 :: nil =>
+           Some
+             (fSop (Oshrimm (Int.repr 16))
+                (make_lhsv_single
+                   (fSop (Oshlimm (Int.repr 16))
+                      (make_lhsv_single (fsi_sreg_get hst a1)))))
+       | a1 :: _ :: _ => None
+       end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp Ocast16signed args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  unfold Val.shr, Val.shl, Val.sign_ext;
+  destruct v; simpl; auto.
+  assert (A: Int.ltu (Int.repr 16) Int.iwordsize = true) by auto.
+  rewrite A. rewrite Int.sign_ext_shr_shl; simpl; trivial. cbn; lia.
+Qed.
+
+Lemma simplify_shrximm_correct ge sp rs0 m0 lr hst fsv st args m n: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+       | nil => None
+       | a1 :: nil =>
+           if Int.eq n Int.zero
+           then
+            Some
+              (fSop (OEmayundef (MUshrx n))
+                 (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                    (fsi_sreg_get hst a1)))
+           else
+            if Int.eq n Int.one
+            then
+             Some
+               (fSop (OEmayundef (MUshrx n))
+                  (make_lhsv_cmp false
+                     (fSop (Oshrimm Int.one)
+                        (make_lhsv_single
+                           (fSop Oadd
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshruimm (Int.repr 31))
+                                    (make_lhsv_single (fsi_sreg_get hst a1)))))))
+                     (fSop (Oshrimm Int.one)
+                        (make_lhsv_single
+                           (fSop Oadd
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshruimm (Int.repr 31))
+                                    (make_lhsv_single (fsi_sreg_get hst a1)))))))))
+            else
+             Some
+               (fSop (OEmayundef (MUshrx n))
+                  (make_lhsv_cmp false
+                     (fSop (Oshrimm n)
+                        (make_lhsv_single
+                           (fSop Oadd
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshruimm (Int.sub Int.iwordsize n))
+                                    (make_lhsv_single
+                                       (fSop (Oshrimm (Int.repr 31))
+                                          (make_lhsv_single
+                                             (fsi_sreg_get hst a1)))))))))
+                     (fSop (Oshrimm n)
+                        (make_lhsv_single
+                           (fSop Oadd
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshruimm (Int.sub Int.iwordsize n))
+                                    (make_lhsv_single
+                                       (fSop (Oshrimm (Int.repr 31))
+                                          (make_lhsv_single
+                                             (fsi_sreg_get hst a1)))))))))))
+       | a1 :: _ :: _ => None
+       end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oshrximm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence).
+  assert (A: Int.ltu Int.zero (Int.repr 31) = true) by auto.
+  assert (B: Int.ltu (Int.repr 31) Int.iwordsize = true) by auto.
+  assert (C: Int.ltu Int.one Int.iwordsize = true) by auto.
+  destruct (Int.eq n Int.zero) eqn:EQ0;
+  destruct (Int.eq n Int.one) eqn:EQ1.
+  { apply Int.same_if_eq in EQ0.
+    apply Int.same_if_eq in EQ1; subst. discriminate. }
+  all:
+    simpl in OK1; inv OK1; inv H; simpl;
+    erewrite !fsi_sreg_get_correct; eauto;
+    destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1;
+    destruct (Val.shrx v (Vint n)) eqn:TOTAL; cbn;
+    unfold eval_may_undef.
+  2,4,6:
+    unfold Val.shrx in TOTAL;
+    destruct v; simpl in TOTAL; simpl; try congruence;
+    try rewrite B; simpl; try rewrite C; simpl;
+    try destruct (Val.shr _ _);
+    destruct (Int.ltu n (Int.repr 31)); try congruence.
+  - destruct v; simpl in TOTAL; try congruence;
+    apply Int.same_if_eq in EQ0; subst;
+    rewrite A, Int.shrx_zero in TOTAL;
+    [auto | cbn; lia].
+  - apply Int.same_if_eq in EQ1; subst;
+    unfold Val.shr, Val.shru, Val.shrx, Val.add; simpl;
+    destruct v; simpl in *; try discriminate; trivial.
+    rewrite B, C.
+    rewrite Int.shrx1_shr in TOTAL; auto.
+  - exploit Val.shrx_shr_2; eauto. rewrite EQ0.
+    intros; subst.
+    destruct v; simpl in *; try discriminate; trivial.
+    rewrite B in *.
+    destruct Int.ltu eqn:EQN0 in TOTAL; try discriminate.
+    simpl in *.
+    destruct Int.ltu eqn:EQN1 in TOTAL; try discriminate.
+    replace Int.iwordsize with (Int.repr 32) in * by auto.
+    rewrite !EQN1. simpl in *.
+    destruct Int.ltu eqn:EQN2 in TOTAL; try discriminate.
+    rewrite !EQN2. rewrite EQN0.
+    reflexivity.
+Qed.
+
+Lemma simplify_shrxlimm_correct ge sp rs0 m0 lr hst fsv st args m n: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+       | nil => None
+       | a1 :: nil =>
+           if Int.eq n Int.zero
+           then
+            Some
+              (fSop (OEmayundef (MUshrxl n))
+                 (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                    (fsi_sreg_get hst a1)))
+           else
+            if Int.eq n Int.one
+            then
+             Some
+               (fSop (OEmayundef (MUshrxl n))
+                  (make_lhsv_cmp false
+                     (fSop (Oshrlimm Int.one)
+                        (make_lhsv_single
+                           (fSop Oaddl
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshrluimm (Int.repr 63))
+                                    (make_lhsv_single (fsi_sreg_get hst a1)))))))
+                     (fSop (Oshrlimm Int.one)
+                        (make_lhsv_single
+                           (fSop Oaddl
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshrluimm (Int.repr 63))
+                                    (make_lhsv_single (fsi_sreg_get hst a1)))))))))
+            else
+             Some
+               (fSop (OEmayundef (MUshrxl n))
+                  (make_lhsv_cmp false
+                     (fSop (Oshrlimm n)
+                        (make_lhsv_single
+                           (fSop Oaddl
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshrluimm (Int.sub Int64.iwordsize' n))
+                                    (make_lhsv_single
+                                       (fSop (Oshrlimm (Int.repr 63))
+                                          (make_lhsv_single
+                                             (fsi_sreg_get hst a1)))))))))
+                     (fSop (Oshrlimm n)
+                        (make_lhsv_single
+                           (fSop Oaddl
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshrluimm (Int.sub Int64.iwordsize' n))
+                                    (make_lhsv_single
+                                       (fSop (Oshrlimm (Int.repr 63))
+                                          (make_lhsv_single
+                                             (fsi_sreg_get hst a1)))))))))))
+       | a1 :: _ :: _ => None
+       end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oshrxlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence).
+  assert (A: Int.ltu Int.zero (Int.repr 63) = true) by auto.
+  assert (B: Int.ltu (Int.repr 63) Int64.iwordsize' = true) by auto.
+  assert (C: Int.ltu Int.one Int64.iwordsize' = true) by auto.
+  destruct (Int.eq n Int.zero) eqn:EQ0;
+  destruct (Int.eq n Int.one) eqn:EQ1.
+  { apply Int.same_if_eq in EQ0.
+    apply Int.same_if_eq in EQ1; subst. discriminate. }
+  all:
+    simpl in OK1; inv OK1; inv H; simpl;
+    erewrite !fsi_sreg_get_correct; eauto;
+    destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1;
+    destruct (Val.shrxl v (Vint n)) eqn:TOTAL; cbn;
+    unfold eval_may_undef.
+  2,4,6:
+    unfold Val.shrxl in TOTAL;
+    destruct v; simpl in TOTAL; simpl; try congruence;
+    try rewrite B; simpl; try rewrite C; simpl;
+    try destruct (Val.shrl _ _);
+    destruct (Int.ltu n (Int.repr 63)); try congruence.
+  - destruct v; simpl in TOTAL; try congruence;
+    apply Int.same_if_eq in EQ0; subst;
+    rewrite A, Int64.shrx'_zero in *.
+    assumption.
+  - apply Int.same_if_eq in EQ1; subst;
+    unfold Val.shrl, Val.shrlu, Val.shrxl, Val.addl; simpl;
+    destruct v; simpl in *; try discriminate; trivial.
+    rewrite B, C.
+    rewrite Int64.shrx'1_shr' in TOTAL; auto.
+  - exploit Val.shrxl_shrl_2; eauto. rewrite EQ0.
+    intros; subst.
+    destruct v; simpl in *; try discriminate; trivial.
+    rewrite B in *.
+    destruct Int.ltu eqn:EQN0 in TOTAL; try discriminate.
+    simpl in *.
+    destruct Int.ltu eqn:EQN1 in TOTAL; try discriminate.
+    replace Int64.iwordsize' with (Int.repr 64) in * by auto.
+    rewrite !EQN1. simpl in *.
+    destruct Int.ltu eqn:EQN2 in TOTAL; try discriminate.
+    rewrite !EQN2. rewrite EQN0.
+    reflexivity.
+Qed.
+
+Lemma simplify_cast32unsigned_correct ge sp rs0 m0 lr hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+      | nil => None
+      | a1 :: nil =>
+          Some
+            (fSop (Oshrluimm (Int.repr 32))
+               (make_lhsv_single
+                  (fSop (Oshllimm (Int.repr 32))
+                     (make_lhsv_single
+                        (fSop Ocast32signed
+                           (make_lhsv_single (fsi_sreg_get hst a1)))))))
+      | a1 :: _ :: _ => None
+      end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp Ocast32unsigned args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  unfold Val.shrlu, Val.shll, Val.longofint, Val.longofintu.
+  destruct v; simpl; auto.
+  assert (A: Int.ltu (Int.repr 32) Int64.iwordsize' = true) by auto.
+  rewrite A. rewrite Int64.shru'_shl'; auto.
+  replace (Int.ltu (Int.repr 32) (Int.repr 32)) with (false) by auto.
+  rewrite cast32unsigned_from_cast32signed.
+  replace Int64.zwordsize with 64 by auto.
+  rewrite Int.unsigned_repr; cbn; try lia.
+  replace (Int.sub (Int.repr 32) (Int.repr 32)) with (Int.zero) by auto.
+  rewrite Int64.shru'_zero. reflexivity.
+Qed.
+
+(** * Main proof of simplification *)
+
+Lemma target_op_simplify_correct op lr hst fsv ge sp rs0 m0 st args m: forall
+   (H: target_op_simplify op lr hst = Some fsv)
+   (REF: hsilocal_refines ge sp rs0 m0 hst st)
+   (OK0: hsok_local ge sp rs0 m0 hst)
+   (OK1: seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args)
+   (OK2: seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+   seval_sval ge sp (hsval_proj fsv) rs0 m0 = eval_operation ge sp op args m.
+Proof.
+  unfold target_op_simplify; simpl.
+  intros H (LREF & SREF & SREG & SMEM) ? ? ?.
+  destruct op; try congruence.
+  eapply simplify_intconst_correct; eauto.
+  eapply simplify_longconst_correct; eauto.
+  eapply simplify_floatconst_correct; eauto. 
+  eapply simplify_singleconst_correct; eauto. 
+  eapply simplify_cast8signed_correct; eauto.
+  eapply simplify_cast16signed_correct; eauto.
+  eapply simplify_addimm_correct; eauto.
+  eapply simplify_andimm_correct; eauto.
+  eapply simplify_orimm_correct; eauto.
+  eapply simplify_xorimm_correct; eauto.
+  eapply simplify_shrximm_correct; eauto.
+  eapply simplify_cast32unsigned_correct; eauto.
+  eapply simplify_addlimm_correct; eauto.
+  eapply simplify_andlimm_correct; eauto.
+  eapply simplify_orlimm_correct; eauto.
+  eapply simplify_xorlimm_correct; eauto.
+  eapply simplify_shrxlimm_correct; eauto.
+  (* Ocmp expansions *)
+  destruct cond; repeat (destruct lr; simpl; try congruence);
+  simpl in OK1;
+  try (destruct (seval_sval ge sp (si_sreg st r) rs0 m0) eqn:OKv1; try congruence);
+  try (destruct (seval_sval ge sp (si_sreg st r0) rs0 m0) eqn:OKv2; try congruence);
+  inv H; inv OK1.
+  - eapply simplify_ccomp_correct; eauto.
+  - eapply simplify_ccompu_correct; eauto.
+  - eapply simplify_ccompimm_correct; eauto.
+  - eapply simplify_ccompuimm_correct; eauto.
+  - eapply simplify_ccompl_correct; eauto.
+  - eapply simplify_ccomplu_correct; eauto.
+  - eapply simplify_ccomplimm_correct; eauto.
+  - eapply simplify_ccompluimm_correct; eauto.
+  - eapply simplify_ccompf_correct; eauto.
+  - eapply simplify_cnotcompf_correct; eauto.
+  - eapply simplify_ccompfs_correct; eauto.
+  - eapply simplify_cnotcompfs_correct; eauto.
+Qed.
+
+Lemma target_cbranch_expanse_correct hst c l ge sp rs0 m0 st c' l': forall
+  (TARGET: target_cbranch_expanse hst c l = Some (c', l'))
+  (LREF : hsilocal_refines ge sp rs0 m0 hst st)
+  (OK: hsok_local ge sp rs0 m0 hst),
+  seval_condition ge sp c' (hsval_list_proj l') (si_smem st) rs0 m0 =
+  seval_condition ge sp c (list_sval_inj (map (si_sreg st) l)) (si_smem st) rs0 m0.
+Proof.
+  unfold target_cbranch_expanse, seval_condition; simpl.
+  intros H (LREF & SREF & SREG & SMEM) ?.
+  destruct c; try congruence;
+  repeat (destruct l; simpl in H; try congruence).
+  1,2,5,6:
+    destruct c; inv H; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try (destruct (seval_smem ge sp (si_smem st) rs0 m0) eqn:OKmem; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r) rs0 m0) eqn:OKv1; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r0) rs0 m0) eqn:OKv2; try congruence);
+    try replace (Cle) with (swap_comparison Cge) by auto;
+    try replace (Clt) with (swap_comparison Cgt) by auto;
+    try rewrite Val.swap_cmp_bool; trivial;
+    try rewrite Val.swap_cmpu_bool; trivial;
+    try rewrite Val.swap_cmpl_bool; trivial;
+    try rewrite Val.swap_cmplu_bool; trivial.
+  1,2,3,4:
+    try destruct (Int.eq n Int.zero) eqn: EQIMM;
+    try apply Int.same_if_eq in EQIMM;
+    try destruct (Int64.eq n Int64.zero) eqn: EQIMM;
+    try apply Int64.same_if_eq in EQIMM;
+    destruct c; inv H; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try (destruct (seval_smem ge sp (si_smem st) rs0 m0) eqn:OKmem; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r) rs0 m0) eqn:OKv1; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r0) rs0 m0) eqn:OKv2; try congruence);
+    unfold loadimm32, load_hilo32, Val.cmp, Val.cmpu, zero32;
+    unfold loadimm64, load_hilo64, Val.cmpl, Val.cmplu, zero64;
+    intros; try (specialize make_immed32_sound with (n);
+    destruct (make_immed32 n) eqn:EQMKI); intros; simpl;
+    intros; try (specialize make_immed64_sound with (n);
+    destruct (make_immed64 n) eqn:EQMKI); intros; simpl;
+    try rewrite EQLO; simpl;
+    try destruct (Int.eq lo Int.zero) eqn:EQLO;
+    try destruct (Int64.eq lo Int64.zero) eqn:EQLO;
+    try apply Int.same_if_eq in EQLO; simpl; trivial;
+    try apply Int64.same_if_eq in EQLO; simpl; trivial;
+    unfold eval_may_undef; 
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1; simpl; trivial;
+    try destruct v; try rewrite H;
+    try rewrite ltu_12_wordsize; try rewrite EQLO;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite Int.add_zero_l; try rewrite Int64.add_zero_l;
+    auto; simpl;
+    try rewrite H in EQIMM;
+    try rewrite EQLO in EQIMM;
+    try rewrite Int.add_commut, Int.add_zero_l in EQIMM;
+    try rewrite Int64.add_commut, Int64.add_zero_l in EQIMM;
+    try rewrite EQIMM; simpl;
+    try destruct (Archi.ptr64); trivial.
+
+    1,2,3,4:
+    destruct c; inv H; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try (destruct (seval_smem ge sp (si_smem st) rs0 m0) eqn:OKmem; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r) rs0 m0) eqn:OKv1; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r0) rs0 m0) eqn:OKv2; try congruence);
+    unfold zero32, zero64, Val.cmpf, Val.cmpfs;
+    destruct v, v0; simpl; trivial;
+    try rewrite Float.cmp_ne_eq;
+    try rewrite Float32.cmp_ne_eq;
+    try rewrite <- Float.cmp_swap; simpl;
+    try rewrite <- Float32.cmp_swap; simpl;
+    try destruct (Float.cmp _ _); simpl;
+    try destruct (Float32.cmp _ _); simpl;
+    try rewrite Int.eq_true; simpl;
+    try rewrite Int.eq_false; try apply Int.one_not_zero;
+    simpl; trivial.
+Qed.
+Global Opaque target_op_simplify.
+Global Opaque target_cbranch_expanse.
diff --git a/riscV/SelectLongproof.v b/riscV/SelectLongproof.v
index d47b6d64..0fc578bf 100644
--- a/riscV/SelectLongproof.v
+++ b/riscV/SelectLongproof.v
@@ -455,6 +455,10 @@ Proof.
   unfold divls_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_divls_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
@@ -462,6 +466,10 @@ Proof.
   unfold modls_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_modls_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
@@ -469,6 +477,10 @@ Proof.
   unfold divlu_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_divlu_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
@@ -476,6 +488,10 @@ Proof.
   unfold modlu_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_modlu_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_shrxlimm:
@@ -490,33 +506,9 @@ Proof.
 - subst n. destruct x; simpl in H0; inv H0. econstructor; split; eauto.
   change (Int.ltu Int.zero (Int.repr 63)) with true. simpl. rewrite Int64.shrx'_zero; auto.
 - TrivialExists.
-(*
-  intros. unfold shrxlimm. destruct Archi.splitlong eqn:SL.
-+ eapply SplitLongproof.eval_shrxlimm; eauto using Archi.splitlong_ptr32.
-+ destruct x; simpl in H0; try discriminate. 
-  destruct (Int.ltu n (Int.repr 63)) eqn:LTU; inv H0.
-  predSpec Int.eq Int.eq_spec n Int.zero.
-  - subst n. exists (Vlong i); split; auto. rewrite Int64.shrx'_zero. auto.
-  - assert (NZ: Int.unsigned n <> 0).
-    { intro EQ; elim H0. rewrite <- (Int.repr_unsigned n). rewrite EQ; auto. }
-    assert (LT: 0 <= Int.unsigned n < 63) by (apply Int.ltu_inv in LTU; assumption).
-    assert (LTU2: Int.ltu (Int.sub Int64.iwordsize' n) Int64.iwordsize' = true).
-    { unfold Int.ltu; apply zlt_true.
-      unfold Int.sub. change (Int.unsigned Int64.iwordsize') with 64. 
-      rewrite Int.unsigned_repr. omega. 
-      assert (64 < Int.max_unsigned) by reflexivity. omega. }
-    assert (X: eval_expr ge sp e m le
-               (Eop (Oshrlimm (Int.repr (Int64.zwordsize - 1))) (a ::: Enil))
-               (Vlong (Int64.shr' i (Int.repr (Int64.zwordsize - 1))))).
-    { EvalOp. }
-    assert (Y: eval_expr ge sp e m le (shrxlimm_inner a n)
-               (Vlong (Int64.shru' (Int64.shr' i (Int.repr (Int64.zwordsize - 1))) (Int.sub Int64.iwordsize' n)))).
-    { EvalOp. simpl. rewrite LTU2. auto. }
-    TrivialExists. 
-    constructor. EvalOp. simpl; eauto. constructor. 
-    simpl. unfold Int.ltu; rewrite zlt_true. rewrite Int64.shrx'_shr_2 by auto. reflexivity. 
-    change (Int.unsigned Int64.iwordsize') with 64; omega.
-*)
+  cbn.
+  rewrite H0.
+  reflexivity.
 Qed.
 
 Theorem eval_cmplu:
@@ -566,6 +558,7 @@ Proof.
   unfold longoffloat; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longoffloat; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
@@ -573,6 +566,7 @@ Proof.
   unfold longuoffloat; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longuoffloat; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
@@ -580,6 +574,7 @@ Proof.
   unfold floatoflong; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_floatoflong; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
@@ -587,6 +582,7 @@ Proof.
   unfold floatoflongu; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_floatoflongu; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
@@ -594,6 +590,7 @@ Proof.
   unfold longofsingle; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longofsingle; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
@@ -601,6 +598,7 @@ Proof.
   unfold longuofsingle; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longuofsingle; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
@@ -608,6 +606,7 @@ Proof.
   unfold singleoflong; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_singleoflong; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
@@ -615,6 +614,7 @@ Proof.
   unfold singleoflongu; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_singleoflongu; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 End CMCONSTR.
diff --git a/riscV/SelectOp.vp b/riscV/SelectOp.vp
index e9920e46..9932aaf8 100644
--- a/riscV/SelectOp.vp
+++ b/riscV/SelectOp.vp
@@ -419,9 +419,39 @@ Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
 
 (** ** Selection *)
 
+Definition same_expr_pure (e1 e2: expr) :=
+  match e1, e2 with
+  | Evar v1, Evar v2 => if ident_eq v1 v2 then true else false
+  | _, _ => false
+  end.
+
 Definition select (ty: typ) (cond: condition) (args: exprlist) (e1 e2: expr)
-   : option expr
-   := None.
+  : option expr :=
+  if same_expr_pure e1 e2
+  then Some e1
+  else
+     if Archi.ptr64 then
+      match ty with
+      | Tlong => Some (Eop Oselectl
+                       ((Eop (Ocmp cond) args) ::: e1 ::: e2 ::: Enil))
+      | Tint => Some (Eop Olowlong ((Eop Oselectl
+                       ((Eop (Ocmp cond) args) :::
+		         (Eop Ocast32signed (e1 ::: Enil)) :::
+			 (Eop Ocast32signed (e2 ::: Enil)) ::: Enil)) ::: Enil))
+      | Tfloat => Some (Eop Ofloat_of_bits ((Eop Oselectl
+                       ((Eop (Ocmp cond) args) :::
+		         (Eop Obits_of_float (e1 ::: Enil)) :::
+			 (Eop Obits_of_float (e2 ::: Enil)) ::: Enil)) ::: Enil))
+      | Tsingle => Some
+         (Eop Osingle_of_bits
+           ((Eop Olowlong ((Eop Oselectl
+             ((Eop (Ocmp cond) args) :::
+         (Eop Ocast32signed ((Eop Obits_of_single (e1 ::: Enil)) ::: Enil)) :::
+	 (Eop Ocast32signed ((Eop Obits_of_single (e2 ::: Enil)) ::: Enil))
+	 ::: Enil)) ::: Enil)) ::: Enil))
+      | _ => None
+      end
+      else None.
  
 (** ** Recognition of addressing modes for load and store operations *)
 
@@ -462,4 +492,9 @@ Definition divfs_base (e1: expr) (e2: expr) :=
 (** Platform-specific known builtins *)
 
 Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
-  None.
+  match b with
+  | BI_bits_of_float => Some (Eop Obits_of_single args)
+  | BI_bits_of_double => Some (Eop Obits_of_float args)
+  | BI_float_of_bits => Some (Eop Osingle_of_bits args)
+  | BI_double_of_bits => Some (Eop Ofloat_of_bits args)
+  end.
diff --git a/riscV/SelectOpproof.v b/riscV/SelectOpproof.v
index 7f2014dc..ce80fc57 100644
--- a/riscV/SelectOpproof.v
+++ b/riscV/SelectOpproof.v
@@ -24,6 +24,7 @@ Require Import Cminor Op CminorSel.
 Require Import SelectOp.
 Require Import OpHelpers.
 Require Import OpHelpersproof.
+Require Import Lia.
 
 Local Open Scope cminorsel_scope.
 
@@ -506,7 +507,12 @@ Theorem eval_divs_base:
     Val.divs x y = Some z ->
     exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold divs_base. exists z; split. EvalOp. auto.
+  intros. unfold divs_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_mods_base:
@@ -516,7 +522,12 @@ Theorem eval_mods_base:
     Val.mods x y = Some z ->
     exists v, eval_expr ge sp e m le (mods_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold mods_base. exists z; split. EvalOp. auto.
+  intros. unfold mods_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_divu_base:
@@ -526,7 +537,12 @@ Theorem eval_divu_base:
     Val.divu x y = Some z ->
     exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold divu_base. exists z; split. EvalOp. auto.
+  intros. unfold divu_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_modu_base:
@@ -536,7 +552,12 @@ Theorem eval_modu_base:
     Val.modu x y = Some z ->
     exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold modu_base. exists z; split. EvalOp. auto.
+  intros. unfold modu_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_shrximm:
@@ -553,34 +574,12 @@ Proof.
   replace (Int.shrx i Int.zero) with i. auto.
   unfold Int.shrx, Int.divs. rewrite Int.shl_zero.
   change (Int.signed Int.one) with 1. rewrite Z.quot_1_r. rewrite Int.repr_signed; auto.
-  econstructor; split. EvalOp. auto.
-(*
-  intros. destruct x; simpl in H0; try discriminate. 
-  destruct (Int.ltu n (Int.repr 31)) eqn:LTU; inv H0.
-  unfold shrximm.
-  predSpec Int.eq Int.eq_spec n Int.zero.
-  - subst n. exists (Vint i); split; auto.
-    unfold Int.shrx, Int.divs. rewrite Z.quot_1_r. rewrite Int.repr_signed. auto.
-  - assert (NZ: Int.unsigned n <> 0).
-    { intro EQ; elim H0. rewrite <- (Int.repr_unsigned n). rewrite EQ; auto. }
-    assert (LT: 0 <= Int.unsigned n < 31) by (apply Int.ltu_inv in LTU; assumption).
-    assert (LTU2: Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize = true).
-    { unfold Int.ltu; apply zlt_true.
-      unfold Int.sub. change (Int.unsigned Int.iwordsize) with 32. 
-      rewrite Int.unsigned_repr. omega. 
-      assert (32 < Int.max_unsigned) by reflexivity. omega. }
-    assert (X: eval_expr ge sp e m le
-               (Eop (Oshrimm (Int.repr (Int.zwordsize - 1))) (a ::: Enil))
-               (Vint (Int.shr i (Int.repr (Int.zwordsize - 1))))).
-    { EvalOp. }
-    assert (Y: eval_expr ge sp e m le (shrximm_inner a n)
-               (Vint (Int.shru (Int.shr i (Int.repr (Int.zwordsize - 1))) (Int.sub Int.iwordsize n)))).
-    { EvalOp. simpl. rewrite LTU2. auto. }
-    TrivialExists. 
-    constructor. EvalOp. simpl; eauto. constructor. 
-    simpl. unfold Int.ltu; rewrite zlt_true. rewrite Int.shrx_shr_2 by auto. reflexivity. 
-    change (Int.unsigned Int.iwordsize) with 32; omega.
-*)
+  econstructor; split. EvalOp.
+  cbn.
+  rewrite H0.
+  cbn.
+  reflexivity.
+  apply Val.lessdef_refl.
 Qed.
 
 Theorem eval_shl: binary_constructor_sound shl Val.shl.
@@ -790,6 +789,7 @@ Theorem eval_intoffloat:
   exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intoffloat. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuoffloat:
@@ -799,6 +799,7 @@ Theorem eval_intuoffloat:
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intuoffloat. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofintu:
@@ -810,6 +811,7 @@ Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros.
   InvEval. simpl in H0. TrivialExists.
   TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofint:
@@ -821,6 +823,7 @@ Proof.
   intros until y; unfold floatofint. case (floatofint_match a); intros.
   InvEval. simpl in H0. TrivialExists.
   TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intofsingle:
@@ -830,6 +833,7 @@ Theorem eval_intofsingle:
   exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intofsingle. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofint:
@@ -839,6 +843,7 @@ Theorem eval_singleofint:
   exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold singleofint; TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuofsingle:
@@ -848,6 +853,7 @@ Theorem eval_intuofsingle:
   exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intuofsingle. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofintu:
@@ -857,6 +863,7 @@ Theorem eval_singleofintu:
   exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intuofsingle. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleoffloat: unary_constructor_sound singleoffloat Val.singleoffloat.
@@ -869,6 +876,71 @@ Proof.
   red; intros. unfold floatofsingle. TrivialExists.
 Qed.
 
+Lemma mod_small_negative:
+  forall a modulus,
+    modulus > 0 -> -modulus < a < 0 -> a mod modulus = a + modulus.
+Proof.
+  intros.
+  replace (a mod modulus) with ((a + modulus) mod modulus).
+  apply Z.mod_small.
+  lia.
+  rewrite <- Zplus_mod_idemp_r.
+  rewrite Z.mod_same by lia.
+  rewrite Z.add_0_r.
+  reflexivity.
+Qed.
+
+Remark normalize_low_long: forall
+  (PTR64 : Archi.ptr64 = true) v1,
+    Val.loword (Val.normalize (Val.longofint v1) Tlong) = Val.normalize v1 Tint.
+Proof.
+  intros.
+  destruct v1; cbn; try rewrite PTR64; trivial.
+  f_equal.
+  unfold Int64.loword.
+  unfold Int.signed.
+  destruct zlt.
+  { rewrite Int64.int_unsigned_repr.
+    apply Int.repr_unsigned.
+  }
+  pose proof (Int.unsigned_range i).
+  rewrite Int64.unsigned_repr_eq.
+  replace ((Int.unsigned i - Int.modulus) mod Int64.modulus)
+    with (Int64.modulus + Int.unsigned i - Int.modulus).
+  {
+    rewrite <- (Int.repr_unsigned i) at 2.
+    apply Int.eqm_samerepr.
+    unfold Int.eqm, eqmod.
+    change Int.modulus with 4294967296 in *.
+    change Int64.modulus with 18446744073709551616 in *.
+    exists 4294967295.
+    lia.
+  }
+  { rewrite mod_small_negative.
+    lia.
+    constructor.
+    constructor.
+    change Int.modulus with 4294967296 in *.
+    change Int.half_modulus with 2147483648 in *.
+    change Int64.modulus with 18446744073709551616 in *.
+    lia.
+    lia.
+  }
+Qed.
+
+Lemma same_expr_pure_correct:
+  forall le a1 a2 v1 v2
+    (PURE : same_expr_pure a1 a2 = true)
+    (EVAL1 : eval_expr ge sp e m le a1 v1)
+    (EVAL2 : eval_expr ge sp e m le a2 v2),
+    v1 = v2.
+Proof.
+  intros.
+  destruct a1; destruct a2; cbn in *; try discriminate.
+  inv EVAL1. inv EVAL2.
+  destruct (ident_eq i i0); congruence.
+Qed.
+  
 Theorem eval_select:
   forall le ty cond al vl a1 v1 a2 v2 a b,
   select ty cond al a1 a2 = Some a ->
@@ -880,7 +952,56 @@ Theorem eval_select:
      eval_expr ge sp e m le a v
   /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v.
 Proof.
-  unfold select; intros; discriminate.
+  unfold select; intros.
+  pose proof (same_expr_pure_correct le a1 a2 v1 v2) as PURE.
+  destruct (same_expr_pure a1 a2).
+  { rewrite <- PURE by auto.
+    inv H.
+    exists v1. split. assumption.
+    unfold Val.select.
+    destruct b; apply Val.lessdef_normalize.
+  }
+  clear PURE.
+  destruct Archi.ptr64 eqn:PTR64.
+  2: discriminate.
+  destruct ty; cbn in *; try discriminate.
+  - (* Tint *)
+    inv H. TrivialExists.
+    + cbn. repeat econstructor; eassumption.
+    + cbn. f_equal. rewrite ExtValues.normalize_select01.
+      rewrite H3. destruct b.
+      * rewrite ExtValues.select01_long_true. apply normalize_low_long; assumption.
+      * rewrite ExtValues.select01_long_false. apply normalize_low_long; assumption.
+        
+  - (* Tfloat *)
+    inv H. TrivialExists.
+    + cbn. repeat econstructor; eassumption.
+    + cbn. f_equal. rewrite ExtValues.normalize_select01.
+      rewrite H3. destruct b.
+      * rewrite ExtValues.select01_long_true.
+        apply ExtValues.float_bits_normalize.
+      * rewrite ExtValues.select01_long_false.
+        apply ExtValues.float_bits_normalize.
+
+  - (* Tlong *)
+    inv H. TrivialExists.
+    + cbn. repeat econstructor; eassumption.
+    + cbn. f_equal. rewrite ExtValues.normalize_select01.
+      rewrite H3. destruct b.
+      * rewrite ExtValues.select01_long_true. reflexivity.
+      * rewrite ExtValues.select01_long_false. reflexivity.
+
+  - (* Tsingle *)
+    inv H. TrivialExists.
+    + cbn. repeat econstructor; eassumption.
+    + cbn. f_equal. rewrite ExtValues.normalize_select01.
+      rewrite H3. destruct b.
+      * rewrite ExtValues.select01_long_true.
+        rewrite normalize_low_long by assumption.
+        apply ExtValues.single_bits_normalize.
+      * rewrite ExtValues.select01_long_false.
+        rewrite normalize_low_long by assumption.
+        apply ExtValues.single_bits_normalize.
 Qed.
 
 Theorem eval_addressing:
@@ -963,7 +1084,10 @@ Theorem eval_platform_builtin:
   platform_builtin_sem bf vl = Some v ->
   exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
 Proof.
-  intros. discriminate.
+  destruct bf; intros until le; intro Heval.
+  all: try (inversion Heval; subst a; clear Heval;
+       exists v; split; trivial;
+       repeat (try econstructor; try eassumption)).
 Qed.
 
 End CMCONSTR.
diff --git a/riscV/TargetPrinter.ml b/riscV/TargetPrinter.ml
index 1f02ca71..1f00c440 100644
--- a/riscV/TargetPrinter.ml
+++ b/riscV/TargetPrinter.ml
@@ -396,6 +396,10 @@ module Target : TARGET =
          fprintf oc "	fmv.x.s	%a, %a\n"     ireg rd freg fs
       | Pfmvxd (rd,fs) ->
          fprintf oc "	fmv.x.d	%a, %a\n"     ireg rd freg fs
+      | Pfmvsx (fd,rs) ->
+         fprintf oc "	fmv.s.x	%a, %a\n"     freg fd ireg rs
+      | Pfmvdx (fd,rs) ->
+         fprintf oc "	fmv.d.x	%a, %a\n"     freg fd ireg rs
 
       (* 32-bit (single-precision) floating point *)
       | Pfls (fd, ra, ofs) ->
@@ -525,6 +529,8 @@ module Target : TARGET =
          fprintf oc "	fcvt.s.d	%a, %a\n" freg fd freg fs
 
       (* Pseudo-instructions expanded in Asmexpand *)
+      | Pselectl(_, _, _, _) ->
+         assert false
       | Pallocframe(sz, ofs) ->
          assert false
       | Pfreeframe(sz, ofs) ->
diff --git a/riscV/ValueAOp.v b/riscV/ValueAOp.v
index 5670b5fe..d29180e4 100644
--- a/riscV/ValueAOp.v
+++ b/riscV/ValueAOp.v
@@ -13,9 +13,46 @@
 Require Import Coqlib Compopts.
 Require Import AST Integers Floats Values Memory Globalenvs.
 Require Import Op RTL ValueDomain.
+Require Import Zbits.
 
 (** Value analysis for RISC V operators *)
 
+Definition zero32 := (I Int.zero).
+Definition zero64 := (L Int64.zero).
+
+(** Functions to select a special register (see Op.v) *)
+
+Definition apply_bin_oreg {B} (optR: option oreg) (sem: aval -> aval -> B) (v1 v2 vz: aval): B :=
+  match optR with
+  | None => sem v1 v2
+  | Some X0_L => sem vz v1
+  | Some X0_R => sem v1 vz
+  end.
+
+Definition eval_may_undef (mu: mayundef) (v1 v2: aval): aval :=
+  match mu with
+  | MUint => match v1, v2 with
+             | I _, I _ => v2
+             | _, _ => Ifptr Ptop
+             end
+  | MUlong => match v1, v2 with
+              | L _, I _ => v2
+              | _, _ => Ifptr Ptop
+              end
+  | MUshrx i =>
+      match v1, v2 with
+      | I _, I _ =>
+          if Int.ltu i (Int.repr 31) then v2 else Ifptr Ptop
+      | _, _ => Ifptr Ptop
+      end
+  | MUshrxl i =>
+      match v1, v2 with
+      | L _, L _ =>
+          if Int.ltu i (Int.repr 63) then v2 else Ifptr Ptop
+      | _, _ => Ifptr Ptop
+      end
+  end.
+
 Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
   match cond, vl with
   | Ccomp c, v1 :: v2 :: nil => cmp_bool c v1 v2
@@ -30,6 +67,22 @@ Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
   | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2)
   | Ccompfs c, v1 :: v2 :: nil => cmpfs_bool c v1 v2
   | Cnotcompfs c, v1 :: v2 :: nil => cnot (cmpfs_bool c v1 v2)
+  | CEbeqw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmp_bool Ceq) v1 v2 zero32
+  | CEbnew optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmp_bool Cne) v1 v2 zero32
+  | CEbequw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpu_bool Ceq) v1 v2 zero32
+  | CEbneuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpu_bool Cne) v1 v2 zero32
+  | CEbltw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmp_bool Clt) v1 v2 zero32
+  | CEbltuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpu_bool Clt) v1 v2 zero32
+  | CEbgew optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmp_bool Cge) v1 v2 zero32
+  | CEbgeuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpu_bool Cge) v1 v2 zero32
+  | CEbeql optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpl_bool Ceq) v1 v2 zero64
+  | CEbnel optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpl_bool Cne) v1 v2 zero64
+  | CEbequl optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmplu_bool Ceq) v1 v2 zero64
+  | CEbneul optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmplu_bool Cne) v1 v2 zero64
+  | CEbltl optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpl_bool Clt) v1 v2 zero64
+  | CEbltul optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmplu_bool Clt) v1 v2 zero64
+  | CEbgel optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpl_bool Cge) v1 v2 zero64
+  | CEbgeul optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmplu_bool Cge) v1 v2 zero64
   | _, _ => Bnone
   end.
 
@@ -41,6 +94,39 @@ Definition eval_static_addressing (addr: addressing) (vl: list aval): aval :=
   | _, _ => Vbot
   end.
 
+Definition bits_of_single (v : aval) : aval :=
+  match v with
+  | FS f => I (Float32.to_bits f)
+  | _ => ntop1 v
+  end.
+
+Definition bits_of_float (v : aval) : aval :=
+  match v with
+  | F f => L (Float.to_bits f)
+  | _ => ntop1 v
+  end.
+
+Definition single_of_bits (v : aval) : aval :=
+  match v with
+  | I f => FS (Float32.of_bits f)
+  | _ => ntop1 v
+  end.
+
+Definition float_of_bits (v : aval) : aval :=
+  match v with
+  | L f => F (Float.of_bits f)
+  | _ => ntop1 v
+  end.
+
+Definition select01_long (vb : aval) (vt : aval) (vf : aval) :=
+  match vb with
+  | I b =>
+    if Int.eq b Int.one then add_undef vt
+    else if Int.eq b Int.zero then add_undef vf
+         else add_undef (vlub vt vf)
+  | _ => add_undef (vlub vt vf)
+  end.
+
 Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   match op, vl with
   | Omove, v1::nil => v1
@@ -59,10 +145,10 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omul, v1::v2::nil => mul v1 v2
   | Omulhs, v1::v2::nil => mulhs v1 v2
   | Omulhu, v1::v2::nil => mulhu v1 v2
-  | Odiv, v1::v2::nil => divs v1 v2
-  | Odivu, v1::v2::nil => divu v1 v2
-  | Omod, v1::v2::nil => mods v1 v2
-  | Omodu, v1::v2::nil => modu v1 v2
+  | Odiv, v1::v2::nil => divs_total v1 v2
+  | Odivu, v1::v2::nil => divu_total v1 v2
+  | Omod, v1::v2::nil => mods_total v1 v2
+  | Omodu, v1::v2::nil => modu_total v1 v2
   | Oand, v1::v2::nil => and v1 v2
   | Oandimm n, v1::nil => and v1 (I n)
   | Oor, v1::v2::nil => or v1 v2
@@ -88,10 +174,10 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omull, v1::v2::nil => mull v1 v2
   | Omullhs, v1::v2::nil => mullhs v1 v2
   | Omullhu, v1::v2::nil => mullhu v1 v2
-  | Odivl, v1::v2::nil => divls v1 v2
-  | Odivlu, v1::v2::nil => divlu v1 v2
-  | Omodl, v1::v2::nil => modls v1 v2
-  | Omodlu, v1::v2::nil => modlu v1 v2
+  | Odivl, v1::v2::nil => divls_total v1 v2
+  | Odivlu, v1::v2::nil => divlu_total v1 v2
+  | Omodl, v1::v2::nil => modls_total v1 v2
+  | Omodlu, v1::v2::nil => modlu_total v1 v2
   | Oandl, v1::v2::nil => andl v1 v2
   | Oandlimm n, v1::nil => andl v1 (L n)
   | Oorl, v1::v2::nil => orl v1 v2
@@ -119,23 +205,64 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Odivfs, v1::v2::nil => divfs v1 v2
   | Osingleoffloat, v1::nil => singleoffloat v1
   | Ofloatofsingle, v1::nil => floatofsingle v1
-  | Ointoffloat, v1::nil => intoffloat v1
-  | Ointuoffloat, v1::nil => intuoffloat v1
+  | Ointoffloat, v1::nil => intoffloat_total v1
+  | Ointuoffloat, v1::nil => intuoffloat_total v1
   | Ofloatofint, v1::nil => floatofint v1
   | Ofloatofintu, v1::nil => floatofintu v1
-  | Ointofsingle, v1::nil => intofsingle v1
-  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Ointofsingle, v1::nil => intofsingle_total v1
+  | Ointuofsingle, v1::nil => intuofsingle_total v1
   | Osingleofint, v1::nil => singleofint v1
   | Osingleofintu, v1::nil => singleofintu v1
-  | Olongoffloat, v1::nil => longoffloat v1
-  | Olonguoffloat, v1::nil => longuoffloat v1
+  | Olongoffloat, v1::nil => longoffloat_total v1
+  | Olonguoffloat, v1::nil => longuoffloat_total v1
   | Ofloatoflong, v1::nil => floatoflong v1
   | Ofloatoflongu, v1::nil => floatoflongu v1
-  | Olongofsingle, v1::nil => longofsingle v1
-  | Olonguofsingle, v1::nil => longuofsingle v1
+  | Olongofsingle, v1::nil => longofsingle_total v1
+  | Olonguofsingle, v1::nil => longuofsingle_total v1
   | Osingleoflong, v1::nil => singleoflong v1
   | Osingleoflongu, v1::nil => singleoflongu v1
   | Ocmp c, _ => of_optbool (eval_static_condition c vl)
+  | OEseqw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmp_bool Ceq) v1 v2 zero32)
+  | OEsnew optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmp_bool Cne) v1 v2 zero32)
+  | OEsequw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpu_bool Ceq) v1 v2 zero32)
+  | OEsneuw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpu_bool Cne) v1 v2 zero32)
+  | OEsltw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmp_bool Clt) v1 v2 zero32)
+  | OEsltuw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpu_bool Clt) v1 v2 zero32)
+  | OEsltiw n, v1::nil => of_optbool (cmp_bool Clt v1 (I n))
+  | OEsltiuw n, v1::nil => of_optbool (cmpu_bool Clt v1 (I n))
+  | OExoriw n, v1::nil => xor v1 (I n)
+  | OEluiw n, nil => shl (I n) (I (Int.repr 12))
+  | OEaddiw optR n, nil => apply_bin_oreg optR add (I n) (Ifptr Ptop) zero32
+  | OEaddiw optR n, v1::nil => apply_bin_oreg optR add v1 (I n) (Ifptr Ptop)
+  | OEandiw n, v1::nil => and (I n) v1
+  | OEoriw n, v1::nil => or (I n) v1
+  | OEseql optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpl_bool Ceq) v1 v2 zero64)
+  | OEsnel optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpl_bool Cne) v1 v2 zero64)
+  | OEsequl optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmplu_bool Ceq) v1 v2 zero64)
+  | OEsneul optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmplu_bool Cne) v1 v2 zero64)
+  | OEsltl optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpl_bool Clt) v1 v2 zero64)
+  | OEsltul optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmplu_bool Clt) v1 v2 zero64)
+  | OEsltil n, v1::nil => of_optbool (cmpl_bool Clt v1 (L n))
+  | OEsltiul n, v1::nil => of_optbool (cmplu_bool Clt v1 (L n))
+  | OEandil n, v1::nil => andl (L n) v1
+  | OEoril n, v1::nil => orl (L n) v1
+  | OExoril n, v1::nil => xorl v1 (L n)
+  | OEluil n, nil => sign_ext 32 (shll (L n) (L (Int64.repr 12)))
+  | OEaddil optR n, nil => apply_bin_oreg optR addl (L n) (Ifptr Ptop) zero64
+  | OEaddil optR n, v1::nil => apply_bin_oreg optR addl v1 (L n) (Ifptr Ptop)
+  | OEloadli n, nil => L (n)
+  | OEmayundef mu, v1 :: v2 :: nil => eval_may_undef mu v1 v2
+  | OEfeqd, v1::v2::nil => of_optbool (cmpf_bool Ceq v1 v2)
+  | OEfltd, v1::v2::nil => of_optbool (cmpf_bool Clt v1 v2)
+  | OEfled, v1::v2::nil => of_optbool (cmpf_bool Cle v1 v2)
+  | OEfeqs, v1::v2::nil => of_optbool (cmpfs_bool Ceq v1 v2)
+  | OEflts, v1::v2::nil => of_optbool (cmpfs_bool Clt v1 v2)
+  | OEfles, v1::v2::nil => of_optbool (cmpfs_bool Cle v1 v2)
+  | Obits_of_single, v1::nil => bits_of_single v1
+  | Obits_of_float, v1::nil => bits_of_float v1
+  | Osingle_of_bits, v1::nil => single_of_bits v1
+  | Ofloat_of_bits, v1::nil => float_of_bits v1
+  | Oselectl, vb::vt::vf::nil => select01_long vb vt vf
   | _, _ => Vbot
   end.
 
@@ -147,6 +274,75 @@ Hypothesis GENV: genv_match bc ge.
 Variable sp: block.
 Hypothesis STACK: bc sp = BCstack.
 
+Lemma bits_of_single_sound:
+  forall v x, vmatch bc v x -> vmatch bc (ExtValues.bits_of_single v) (bits_of_single x).
+Proof.
+  unfold ExtValues.bits_of_single; intros. inv H; cbn; constructor.
+Qed.
+
+Lemma bits_of_float_sound:
+  forall v x, vmatch bc v x -> vmatch bc (ExtValues.bits_of_float v) (bits_of_float x).
+Proof.
+  unfold ExtValues.bits_of_float; intros. inv H; cbn; constructor.
+Qed.
+
+Lemma single_of_bits_sound:
+  forall v x, vmatch bc v x -> vmatch bc (ExtValues.single_of_bits v) (single_of_bits x).
+Proof.
+  unfold ExtValues.bits_of_single; intros. inv H; cbn; constructor.
+Qed.
+
+Lemma float_of_bits_sound:
+  forall v x, vmatch bc v x -> vmatch bc (ExtValues.float_of_bits v) (float_of_bits x).
+Proof.
+  unfold ExtValues.bits_of_float; intros. inv H; cbn; constructor.
+Qed.
+
+  
+Lemma select01_long_sound:
+  forall vb xb vt xt vf xf
+         (MATCH_b : vmatch bc vb xb)
+         (MATCH_t : vmatch bc vt xt)
+         (MATCH_f : vmatch bc vf xf),
+    vmatch bc (Val.normalize (ExtValues.select01_long vb vt vf) Tlong)
+              (select01_long xb xt xf).
+Proof.
+  intros.
+  inv MATCH_b; cbn; try apply add_undef_undef.
+  - destruct (Int.eq i Int.one). { apply add_undef_normalize; trivial. }
+    destruct (Int.eq i Int.zero). { apply add_undef_normalize; trivial. }
+    cbn. apply add_undef_undef.
+  - destruct (Int.eq i Int.one).
+    { apply add_undef_normalize.
+      apply vmatch_lub_l.
+      trivial. }
+    destruct (Int.eq i Int.zero).
+    { apply add_undef_normalize.
+      apply vmatch_lub_r.
+      trivial. }
+    cbn. apply add_undef_undef.
+  - destruct (Int.eq i Int.one).
+    { apply add_undef_normalize.
+      apply vmatch_lub_l.
+      trivial. }
+    destruct (Int.eq i Int.zero).
+    { apply add_undef_normalize.
+      apply vmatch_lub_r.
+      trivial. }
+    cbn. apply add_undef_undef.
+  - destruct (Int.eq i Int.one).
+    { apply add_undef_normalize.
+      apply vmatch_lub_l.
+      trivial. }
+    destruct (Int.eq i Int.zero).
+    { apply add_undef_normalize.
+      apply vmatch_lub_r.
+      trivial. }
+    cbn. apply add_undef_undef.
+Qed.
+
+Hint Resolve bits_of_single_sound bits_of_float_sound single_of_bits_sound float_of_bits_sound select01_long_sound : va.
+
 Theorem eval_static_condition_sound:
   forall cond vargs m aargs,
   list_forall2 (vmatch bc) vargs aargs ->
@@ -158,7 +354,9 @@ Proof.
   destruct cond; simpl; eauto with va.
   inv H2.
   destruct cond; simpl; eauto with va.
-  destruct cond; auto with va.
+  17: destruct cond; simpl; eauto with va.
+  all: destruct optR as [[]|]; unfold apply_bin_oreg, Op.apply_bin_oreg;
+       unfold zero32, Op.zero32, zero64, Op.zero64; eauto with va.
 Qed.
 
 Lemma symbol_address_sound:
@@ -200,6 +398,70 @@ Proof.
   rewrite Ptrofs.add_zero_l; eauto with va.
 Qed.
 
+Lemma of_optbool_maketotal_sound:
+  forall ob ab, cmatch ob ab -> vmatch bc (Val.maketotal (option_map Val.of_bool ob)) (of_optbool ab).
+Proof.
+  intros.
+  assert (DEFAULT: vmatch bc (Val.maketotal (option_map Val.of_bool ob)) (Uns Pbot 1)).
+  {
+    destruct ob; simpl; auto with va.
+    destruct b; constructor; try omega.
+    change 1 with (usize Int.one). apply is_uns_usize.
+    red; intros. apply Int.bits_zero.
+  }
+  inv H; auto. simpl. destruct b; constructor.
+Qed.
+
+Lemma eval_cmpu_sound c: forall a1 b1 a0 b0 optR m,
+  c = Ceq \/ c = Cne \/ c = Clt->
+  vmatch bc a1 b1 ->
+  vmatch bc a0 b0 ->
+  vmatch bc (Op.apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m) c) a1 a0 Op.zero32)
+  (of_optbool (apply_bin_oreg optR (cmpu_bool c) b1 b0 zero32)).
+Proof.
+  intros.
+  destruct optR as [[]|]; unfold Op.apply_bin_oreg, apply_bin_oreg;
+  apply of_optbool_sound; unfold Op.zero32, zero32; eauto with va.
+Qed.
+
+Lemma eval_cmplu_sound c: forall a1 b1 a0 b0 optR m,
+  c = Ceq \/ c = Cne \/ c = Clt->
+  vmatch bc a1 b1 ->
+  vmatch bc a0 b0 ->
+  vmatch bc
+    (Val.maketotal
+       (Op.apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m) c) a1 a0
+          Op.zero64))
+    (of_optbool (apply_bin_oreg optR (cmplu_bool c) b1 b0 zero64)).
+Proof.
+  intros.
+  destruct optR as [[]|]; unfold Op.apply_bin_oreg, apply_bin_oreg;
+  apply of_optbool_maketotal_sound; unfold Op.zero64, zero64; eauto with va.
+Qed.
+
+Lemma eval_cmp_sound: forall a1 b1 a0 b0 optR cmp,
+  vmatch bc a1 b1 ->
+  vmatch bc a0 b0 ->
+  vmatch bc (Op.apply_bin_oreg optR (Val.cmp cmp) a1 a0 Op.zero32)
+  (of_optbool (apply_bin_oreg optR (cmp_bool cmp) b1 b0 zero32)).
+Proof.
+  intros.
+  destruct optR as [[]|]; unfold Op.apply_bin_oreg, apply_bin_oreg;
+  apply of_optbool_sound; unfold Op.zero32, zero32; eauto with va.
+Qed.
+
+Lemma eval_cmpl_sound: forall a1 b1 a0 b0 optR cmp,
+  vmatch bc a1 b1 ->
+  vmatch bc a0 b0 ->
+  vmatch bc
+    (Val.maketotal (Op.apply_bin_oreg optR (Val.cmpl cmp) a1 a0 Op.zero64))
+    (of_optbool (apply_bin_oreg optR (cmpl_bool cmp) b1 b0 zero64)).
+Proof.
+  intros.
+  destruct optR as [[]|]; unfold Op.apply_bin_oreg, apply_bin_oreg;
+  apply of_optbool_maketotal_sound; unfold Op.zero64, zero64; eauto with va.
+Qed.
+
 Theorem eval_static_operation_sound:
   forall op vargs m vres aargs,
   eval_operation ge (Vptr sp Ptrofs.zero) op vargs m = Some vres ->
@@ -212,6 +474,39 @@ Proof.
   destruct (propagate_float_constants tt); constructor.
   rewrite Ptrofs.add_zero_l; eauto with va.
   apply of_optbool_sound. eapply eval_static_condition_sound; eauto.
+
+  3,4,6: apply eval_cmpu_sound; auto.
+  1,2,3: apply eval_cmp_sound; auto.
+  unfold Val.cmp; apply of_optbool_sound; eauto with va.
+  unfold Val.cmpu; apply of_optbool_sound; eauto with va.
+  
+  { destruct optR as [[]|]; simpl; eauto with va. }
+  { destruct optR as [[]|];
+    unfold apply_bin_oreg, Op.apply_bin_oreg; eauto with va. }
+  { fold (Val.and (Vint n) a1); eauto with va. }
+  { fold (Val.or (Vint n) a1); eauto with va. }
+  { simpl; try destruct (Int.ltu _ _); eauto with va; unfold ntop1;
+  try apply vmatch_ifptr_undef. }
+  9: { destruct optR as [[]|]; simpl; eauto with va. }
+  9: { destruct optR as [[]|];
+    unfold apply_bin_oreg, Op.apply_bin_oreg; eauto with va. }
+  9: { fold (Val.andl (Vlong n) a1); eauto with va. }
+  9: { fold (Val.orl (Vlong n) a1); eauto with va. }
+  9: { simpl; unfold ntop1, sign_ext, Int64.sign_ext, sgn; simpl;
+  apply vmatch_ifptr_l. }
+  
+  1,10: simpl; eauto with va.
+  10:
+    unfold Op.eval_may_undef, eval_may_undef; destruct mu;
+    inv H1; inv H0; eauto with va;
+    try destruct (Int.ltu _ _); simpl;
+    try eapply vmatch_ifptr_p, pmatch_top'; eauto with va.
+  
+  4,5,7: apply eval_cmplu_sound; auto.
+  1,3,4: apply eval_cmpl_sound; auto.
+  2: { unfold Val.cmpl; apply of_optbool_maketotal_sound; eauto with va. }
+  2: { unfold Val.cmplu; apply of_optbool_maketotal_sound; eauto with va. }
+  all: unfold Val.cmpf; apply of_optbool_sound; eauto with va.
 Qed.
 
 End SOUNDNESS.
diff --git a/runtime/Makefile b/runtime/Makefile
index ea3c914f..6f70fa87 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -38,6 +38,8 @@ OBJS=i64_dtos.o i64_dtou.o i64_sar.o i64_sdiv.o i64_shl.o \
   vararg.o
 endif
 
+AR=ar
+
 OBJS+=write_profiling_table.o
 
 LIB=libcompcert.a
@@ -59,7 +61,7 @@ endif
 
 $(LIB): $(OBJS)
 	rm -f $(LIB)
-	ar rcs $(LIB) $(OBJS)
+	$(AR) rcs $(LIB) $(OBJS)
 
 %.o: %.s
 	$(CASMRUNTIME) -o $@ $^
diff --git a/runtime/include/ccomp_kvx_fixes.h b/runtime/include/ccomp_kvx_fixes.h
index 65d65e7b..a518a069 100644
--- a/runtime/include/ccomp_kvx_fixes.h
+++ b/runtime/include/ccomp_kvx_fixes.h
@@ -33,13 +33,26 @@ extern __int128 __compcert_acswapd(void *address, unsigned long long new_value,
 #define __builtin_kvx_acswapw __compcert_acswapw
 extern __int128 __compcert_acswapw(void *address, unsigned long long new_value, unsigned long long old_value);
 
+#define __builtin_kvx_aladdd __compcert_aladdd
+extern long long __compcert_aladdd(void *address, unsigned long long incr);
+
+#define __builtin_kvx_aladdw __compcert_aladdw
+extern int __compcert_aladdw(void *address, unsigned int incr);
+
 #define __builtin_kvx_afaddd __compcert_afaddd
 extern long long __compcert_afaddd(void *address, unsigned long long incr);
 
 #define __builtin_kvx_afaddw __compcert_afaddw
 extern int __compcert_afaddw(void *address, unsigned int incr);
-#endif
+
+#define __builtin_kvx_ld __compcert_ld
+extern int __compcert_ld(void *address, const char *str, const int b);
+
+#define __builtin_kvx_lwz __compcert_lwz
+extern int __compcert_lwz(void *address, const char *str, const int b);
 
 /* #define __builtin_expect(x, y) (x) */
 #define __builtin_ctz(x) __builtin_kvx_ctzw(x)
 #define __builtin_clz(x) __builtin_kvx_clzw(x)
+
+#endif
diff --git a/runtime/kvx/ccomp_k1c_fixes.h b/runtime/kvx/ccomp_k1c_fixes.h
deleted file mode 120000
index b640c96e..00000000
--- a/runtime/kvx/ccomp_k1c_fixes.h
+++ /dev/null
@@ -1 +0,0 @@
-../include/ccomp_k1c_fixes.h
-\ No newline at end of file
diff --git a/runtime/x86_64/i64_dtou.S b/runtime/x86_64/i64_dtou.S
index cc822d67..7f12ae88 100644
--- a/runtime/x86_64/i64_dtou.S
+++ b/runtime/x86_64/i64_dtou.S
@@ -39,13 +39,13 @@
 // Conversion float -> unsigned long
 
 FUNCTION(__compcert_i64_dtou)
-        ucomisd .LC1(%rip), %xmm0
+        ucomisd .LC1(%rip), FP_ARG_1
         jnb 1f
-        cvttsd2siq %xmm0, %rax
+        cvttsd2siq FP_ARG_1, INT_RES
         ret
-1:      subsd .LC1(%rip), %xmm0
-        cvttsd2siq %xmm0, %rax
-        addq .LC2(%rip), %rax
+1:      subsd .LC1(%rip), FP_ARG_1
+        cvttsd2siq FP_ARG_1, INT_RES
+        addq .LC2(%rip), INT_RES
         ret
 
         .p2align 3
diff --git a/runtime/x86_64/i64_utod.S b/runtime/x86_64/i64_utod.S
index 62e6e484..4d4870fc 100644
--- a/runtime/x86_64/i64_utod.S
+++ b/runtime/x86_64/i64_utod.S
@@ -39,18 +39,18 @@
 // Conversion unsigned long -> double-precision float
 
 FUNCTION(__compcert_i64_utod)
-        testq   %rdi, %rdi
+        testq   INT_ARG_1, INT_ARG_1
         js      1f
-        pxor    %xmm0, %xmm0            // if < 2^63,
-        cvtsi2sdq %rdi, %xmm0           // convert as if signed
+        pxor    FP_RES, FP_RES          // if < 2^63,
+        cvtsi2sdq INT_ARG_1, FP_RES     // convert as if signed
         ret
 1:                                      // if >= 2^63, use round-to-odd trick
-        movq    %rdi, %rax
+        movq    INT_ARG_1, %rax
         shrq    %rax
-        andq    $1, %rdi
-        orq     %rdi, %rax              // (arg >> 1) | (arg & 1)
-        pxor    %xmm0, %xmm0
-        cvtsi2sdq %rax, %xmm0           // convert as if signed
-        addsd   %xmm0, %xmm0            // multiply result by 2.0
+        andq    $1, INT_ARG_1
+        orq     INT_ARG_1, %rax         // (arg >> 1) | (arg & 1)
+        pxor    FP_RES, FP_RES
+        cvtsi2sdq %rax, FP_RES          // convert as if signed
+        addsd   FP_RES, FP_RES          // multiply result by 2.0
         ret
 ENDFUNCTION(__compcert_i64_utod)       
diff --git a/runtime/x86_64/i64_utof.S b/runtime/x86_64/i64_utof.S
index 63a33920..0e878121 100644
--- a/runtime/x86_64/i64_utof.S
+++ b/runtime/x86_64/i64_utof.S
@@ -39,18 +39,18 @@
 // Conversion unsigned long -> single-precision float
 
 FUNCTION(__compcert_i64_utof)
-        testq   %rdi, %rdi
+        testq   INT_ARG_1, INT_ARG_1
         js      1f
-        pxor    %xmm0, %xmm0            // if < 2^63,
-        cvtsi2ssq %rdi, %xmm0           // convert as if signed
+        pxor    FP_RES, FP_RES          // if < 2^63,
+        cvtsi2ssq INT_ARG_1, FP_RES     // convert as if signed
         ret
 1:                                      // if >= 2^63, use round-to-odd trick
-        movq    %rdi, %rax
+        movq    INT_ARG_1, %rax
         shrq    %rax
-        andq    $1, %rdi
-        orq     %rdi, %rax              // (arg >> 1) | (arg & 1)
-        pxor    %xmm0, %xmm0
-        cvtsi2ssq %rax, %xmm0           // convert as if signed
-        addss   %xmm0, %xmm0            // multiply result by 2.0
+        andq    $1, INT_ARG_1
+        orq     INT_ARG_1, %rax         // (arg >> 1) | (arg & 1)
+        pxor    FP_RES, FP_RES
+        cvtsi2ssq %rax, FP_RES          // convert as if signed
+        addss   FP_RES, FP_RES          // multiply result by 2.0
         ret
 ENDFUNCTION(__compcert_i64_utof)       
diff --git a/runtime/x86_64/sysdeps.h b/runtime/x86_64/sysdeps.h
index e9d456af..aacef8f0 100644
--- a/runtime/x86_64/sysdeps.h
+++ b/runtime/x86_64/sysdeps.h
@@ -63,13 +63,25 @@ _##f:
 
 #if defined(SYS_cygwin)
 
-#define GLOB(x) _##x
+#define GLOB(x) x
 #define FUNCTION(f) \
 	.text; \
-	.globl _##f; \
+	.globl f; \
 	.align 16; \
-_##f:
+f:
 
 #define ENDFUNCTION(f)
 
 #endif
+
+// Names for argument and result registers
+
+#if defined(SYS_cygwin)
+#define INT_ARG_1 %rcx
+#else
+#define INT_ARG_1 %rdi
+#endif
+#define FP_ARG_1 %xmm0
+#define INT_RES %rax
+#define FP_RES %xmm0
+
diff --git a/runtime/x86_64/vararg.S b/runtime/x86_64/vararg.S
index 9c0d787b..c5225b34 100644
--- a/runtime/x86_64/vararg.S
+++ b/runtime/x86_64/vararg.S
@@ -34,6 +34,12 @@
 
 // Helper functions for variadic functions <stdarg.h>.  x86_64 version.
 	
+#include "sysdeps.h"
+
+// ELF ABI
+
+#if defined(SYS_linux) || defined(SYS_bsd) || defined(SYS_macosx)
+
 // typedef struct {
 //    unsigned int gp_offset;
 //    unsigned int fp_offset;
@@ -60,8 +66,6 @@
 // unsigned long long __compcert_va_int64(va_list ap);
 // double __compcert_va_float64(va_list ap);
 
-#include "sysdeps.h"
-
 FUNCTION(__compcert_va_int32)
         movl    0(%rdi), %edx           // edx = gp_offset
         cmpl    $48, %edx
@@ -146,3 +150,58 @@ FUNCTION(__compcert_va_saveregs)
         movaps %xmm7, 160(%r10)
 1:      ret
 ENDFUNCTION(__compcert_va_saveregs)
+
+#endif
+
+// Windows ABI
+
+#if defined(SYS_cygwin)
+
+// typedef void * va_list;
+// unsigned int __compcert_va_int32(va_list * ap);
+// unsigned long long __compcert_va_int64(va_list * ap);
+// double __compcert_va_float64(va_list * ap);
+
+FUNCTION(__compcert_va_int32)      // %rcx = pointer to argument pointer
+        movq 0(%rcx), %rdx         // %rdx = current argument pointer
+        movl 0(%rdx), %eax         // load the int32 value there
+        addq $8, %rdx              // increment argument pointer by 8
+        movq %rdx, 0(%rcx)
+        ret
+ENDFUNCTION(__compcert_va_int32)
+        
+FUNCTION(__compcert_va_int64)      // %rcx = pointer to argument pointer
+        movq 0(%rcx), %rdx         // %rdx = current argument pointer
+        movq 0(%rdx), %rax         // load the int64 value there
+        addq $8, %rdx              // increment argument pointer by 8
+        movq %rdx, 0(%rcx)
+        ret
+ENDFUNCTION(__compcert_va_int64)
+	
+FUNCTION(__compcert_va_float64)    // %rcx = pointer to argument pointer
+        movq 0(%rcx), %rdx         // %rdx = current argument pointer
+        movsd 0(%rdx), %xmm0       // load the float64 value there
+        addq $8, %rdx              // increment argument pointer by 8
+        movq %rdx, 0(%rcx)
+        ret
+ENDFUNCTION(__compcert_va_float64)
+
+FUNCTION(__compcert_va_composite)
+        jmp     GLOB(__compcert_va_int64)     // by-ref convention, FIXME
+ENDFUNCTION(__compcert_va_composite)
+
+// Save arguments passed in register in the stack at beginning of vararg
+// function.  The caller of the vararg function reserved 32 bytes of stack
+// just for this purpose.
+// FP arguments are passed both in FP registers and integer registers,
+// so it's enough to save the integer registers used for parameter passing.
+
+FUNCTION(__compcert_va_saveregs)
+        movq %rcx, 16(%rsp)
+        movq %rdx, 24(%rsp)
+        movq %r8, 32(%rsp)
+        movq %r9, 40(%rsp)
+        ret
+ENDFUNCTION(__compcert_va_saveregs)
+
+#endif
diff --git a/kvx/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index eab0b21a..eab0b21a 100644
--- a/kvx/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
diff --git a/kvx/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index 85e2a5c6..fb7af3f6 100644
--- a/kvx/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -33,6 +33,12 @@ type problem = {
 (** Print problem for human readability. *)
 val print_problem : out_channel -> problem -> unit;;
 
+(** Get the number of instructions in a problem *)
+val get_nr_instructions : problem -> int;;
+
+(** Get the number of resources in a problem *)
+val get_nr_resources : problem -> int;;
+
 (** Scheduling solution. For {i n} instructions to schedule, and 0≤{i i}<{i n}, position {i i} contains the time to which instruction {i i} should be scheduled. Position {i n} contains the final output latency. *)
 type solution = int array
               
diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v
new file mode 100644
index 00000000..5b34dc16
--- /dev/null
+++ b/scheduling/RTLpath.v
@@ -0,0 +1,1067 @@
+(** We introduce a data-structure extending the RTL CFG into a control-flow graph over "traces" (in the sense of "trace-scheduling")
+    Here, we use the word "path" instead of "trace" because "trace" has already a meaning in CompCert: 
+      a "path" is simply a list of successive nodes in the CFG (modulo some additional wellformness conditions).
+
+    Actually, we extend syntactically the notion of RTL programs with a structure of "path_map": 
+    this gives an alternative view of the CFG -- where "nodes" are paths instead of simple instructions.
+    Our wellformness condition on paths express that:
+      - the CFG on paths is wellformed: any successor of a given path points to another path (possibly the same).
+      - execution of a paths only emit single events.
+
+    We represent each path only by a natural: the number of nodes in the path. These nodes are recovered from a static notion of "default successor".
+    This notion of path is thus incomplete. For example, if a path contains a whole loop (and for example, unrools it several times), 
+    then this loop must be a suffix of the path.
+
+    However: it is sufficient in order to represent superblocks (each superblock being represented as a path).
+    A superblock decomposition of the CFG exactly corresponds to the case where each node is in at most one path.
+
+    Our goal is to provide two bisimulable semantics:
+      - one is simply the RTL semantics
+      - the other is based on a notion of "path-step": each path is executed in a single step.
+
+    Remark that all analyses on RTL programs should thus be appliable for "free" also for RTLpath programs !
+*)
+
+Require Import Coqlib Maps.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL Linking.
+
+Declare Scope option_monad_scope.
+
+Notation "'SOME' X <- A 'IN' B" := (match A with Some X => B | None => None end)
+         (at level 200, X ident, A at level 100, B at level 200)
+         : option_monad_scope.
+
+Notation "'ASSERT' A 'IN' B" := (if A then B else None)
+         (at level 200, A at level 100, B at level 200)
+         : option_monad_scope.
+
+Local Open Scope option_monad_scope.
+
+(** * Syntax of RTLpath programs *)
+
+(** Internal instruction = instruction with a default successor in a path. *)
+
+Definition default_succ (i: instruction): option node :=
+  match i with
+  | Inop s => Some s
+  | Iop op args res s => Some s
+  | Iload _ chunk addr args dst s => Some s
+  | Istore chunk addr args src s => Some s
+  | Icond cond args ifso ifnot _ => Some ifnot
+  | _ => None (* TODO: we could choose a successor for jumptable ? *)
+  end.
+
+Definition early_exit (i: instruction): option node := (* FIXME: for jumptable, replace [node] by [list node] *)
+  match i with
+  | Icond cond args ifso ifnot _ => Some ifso
+  | _ => None
+  end.
+
+(** Our notion of path. 
+
+    We do not formally require that the set of path is a partition of the CFG.
+    path may have intersections !
+
+    Moreover, we do not formally require that path have a single entry-point (a superblock structure) 
+
+    But, in practice, these properties are probably necessary in order to ensure the success of dynamic verification of scheduling.
+
+    Here: we only require that each exit-point of a path is the entry-point of a path
+     (and that internal node of a path are internal instructions)
+*)
+
+
+(* By convention, we say that node [n] is the entry-point of a path if it is a key of the path_map.
+
+   Such a path of entry [n] is defined from a natural [path] representing the [path] default-successors of [n].
+
+   Remark: a path can loop several times in the CFG.
+
+*)
+
+Record path_info := { 
+    psize: nat; (* number minus 1 of instructions in the path *)
+    input_regs: Regset.t;
+    (** Registers that are used (as input_regs) by the "fallthrough successors" of the path *)
+    pre_output_regs: Regset.t;
+    (** This field is not used by the verificator, but is helpful for the superblock scheduler *)
+    output_regs: Regset.t 
+}.
+
+Definition path_map: Type := PTree.t path_info.
+
+Definition path_entry (pm: path_map) (n: node): Prop := pm!n <> None.
+
+Inductive wellformed_path (c:code) (pm: path_map): nat -> node -> Prop :=
+  | wf_last_node i pc:
+      c!pc = Some i ->
+      (forall n, List.In n (successors_instr i) -> path_entry (*c*) pm n) ->
+      wellformed_path c pm 0 pc
+  | wf_internal_node path i pc pc':
+      c!pc = Some i ->
+      default_succ i = Some pc' ->
+      (forall n, early_exit i = Some n -> path_entry (*c*) pm n) ->
+      wellformed_path c pm path pc' ->
+      wellformed_path c pm (S path) pc.
+
+(* all paths defined from the path_map are wellformed *)
+Definition wellformed_path_map (c:code) (pm: path_map): Prop :=
+  forall n path, pm!n = Some path -> wellformed_path c pm path.(psize) n.
+
+(** We "extend" the notion of RTL program with the additional structure for path.
+
+    There is thus a trivial "forgetful functor" from RTLpath programs to RTL ones.
+*)
+
+Record function : Type := 
+ { fn_RTL:> RTL.function;
+   fn_path: path_map;
+   (* condition 1 below: the entry-point of the code is an entry-point of a path *) 
+   fn_entry_point_wf: path_entry fn_path fn_RTL.(fn_entrypoint); 
+   (* condition 2 below: the path_map is well-formed *) 
+   fn_path_wf: wellformed_path_map fn_RTL.(fn_code) fn_path
+ }.
+
+Definition fundef := AST.fundef function.
+Definition program := AST.program fundef unit.
+Definition genv := Genv.t fundef unit.
+
+Definition fundef_RTL (fu: fundef) : RTL.fundef := 
+  match fu with
+  | Internal f => Internal f.(fn_RTL)
+  | External ef => External ef
+  end.
+Coercion fundef_RTL: fundef >-> RTL.fundef.
+
+Definition transf_program (p: program) : RTL.program := transform_program fundef_RTL p.
+Coercion transf_program: program >-> RTL.program.
+
+(** * Path-step semantics of RTLpath programs *)
+
+(* Semantics of internal instructions (mimicking RTL semantics) *)
+
+Record istate := mk_istate { icontinue: bool; ipc: node; irs: regset; imem: mem }.
+
+(* FIXME - prediction *)
+(* Internal step through the path *)
+Definition istep (ge: RTL.genv) (i: instruction) (sp: val) (rs: regset) (m: mem): option istate :=
+  match i with
+  | Inop pc' => Some (mk_istate true pc' rs m)
+  | Iop op args res pc' =>
+      SOME v <- eval_operation ge sp op rs##args m IN
+      Some (mk_istate true pc' (rs#res <- v) m)
+  | Iload TRAP chunk addr args dst pc' =>
+      SOME a <- eval_addressing ge sp addr rs##args IN
+      SOME v <- Mem.loadv chunk m a IN
+      Some (mk_istate true pc' (rs#dst <- v) m)
+  | Iload NOTRAP chunk addr args dst pc' =>
+      let default_state := mk_istate true pc' rs#dst <- (default_notrap_load_value chunk) m in
+      match (eval_addressing ge sp addr rs##args) with
+      | None => Some default_state
+      | Some a => match (Mem.loadv chunk m a) with
+          | None => Some default_state
+          | Some v => Some (mk_istate true pc' (rs#dst <- v) m)
+          end
+      end
+  | Istore chunk addr args src pc' =>
+      SOME a <- eval_addressing ge sp addr rs##args IN
+      SOME m' <- Mem.storev chunk m a rs#src IN
+      Some (mk_istate true pc' rs m')
+  | Icond cond args ifso ifnot _ =>
+      SOME b <- eval_condition cond rs##args m IN
+      Some (mk_istate (negb b) (if b then ifso else ifnot) rs m)
+  | _ => None (* TODO jumptable ? *)
+  end.
+
+(** Execution of a path in a single step *)
+
+(* Executes until a state [st] is reached where st.(continue) is false *)
+Fixpoint isteps ge (path:nat) (f: function) sp rs m pc: option istate :=
+  match path with
+  | O => Some (mk_istate true pc rs m)
+  | S p =>
+    SOME i <- (fn_code f)!pc IN
+    SOME st <- istep ge i sp rs m IN
+    if (icontinue st) then
+      isteps ge p f sp (irs st) (imem st) (ipc st)
+    else
+      Some st
+  end.
+
+Definition find_function (pge: genv) (ros: reg + ident) (rs: regset) : option fundef :=
+  match ros with
+  | inl r => Genv.find_funct pge rs#r
+  | inr symb =>
+      match Genv.find_symbol pge symb with
+      | None => None
+      | Some b => Genv.find_funct_ptr pge b
+      end
+  end.
+
+Inductive stackframe : Type :=
+  | Stackframe
+             (res: reg)            (**r where to store the result *)
+             (f: function)         (**r calling function *)
+             (sp: val)             (**r stack pointer in calling function *)
+             (pc: node)            (**r program point in calling function *)
+             (rs: regset)          (**r register state in calling function *)
+  .
+
+Definition stf_RTL (st: stackframe): RTL.stackframe :=
+  match st with
+  | Stackframe res f sp pc rs => RTL.Stackframe res f sp pc rs
+  end.
+
+Fixpoint stack_RTL (stack: list stackframe): list RTL.stackframe :=
+  match stack with
+  | nil => nil
+  | cons stf stack' => cons (stf_RTL stf) (stack_RTL stack')
+  end.
+
+Inductive state : Type :=
+  | State
+             (stack: list stackframe) (**r call stack *)
+             (f: function)            (**r current function *)
+             (sp: val)                (**r stack pointer *)
+             (pc: node)               (**r current program point in [c] *)
+             (rs: regset)             (**r register state *)
+             (m: mem)                 (**r memory state *)
+  | Callstate
+             (stack: list stackframe) (**r call stack *)
+             (f: fundef)              (**r function to call *)
+             (args: list val)         (**r arguments to the call *)
+             (m: mem)                 (**r memory state *)
+  | Returnstate
+             (stack: list stackframe) (**r call stack *)
+             (v: val)                 (**r return value for the call *)
+             (m: mem)                 (**r memory state *)
+  .
+
+Definition state_RTL (s: state): RTL.state :=
+  match s with
+  | State stack f sp pc rs m => RTL.State (stack_RTL stack) f sp pc rs m
+  | Callstate stack f args m => RTL.Callstate (stack_RTL stack) f args m
+  | Returnstate stack v m => RTL.Returnstate (stack_RTL stack) v m
+  end.
+Coercion state_RTL: state >-> RTL.state.
+
+(* Used to execute the last instruction of a path (isteps is only in charge of executing the instructions before the last) *)
+Inductive path_last_step ge pge stack (f: function): val -> node -> regset -> mem -> trace -> state -> Prop :=
+  | exec_istate i sp pc rs m st:
+     (fn_code f)!pc = Some i ->
+     istep ge i sp rs m = Some st ->
+     path_last_step ge pge stack f sp pc rs m
+                    E0 (State stack f sp (ipc st) (irs st) (imem st))
+  | exec_Icall sp pc rs m sig ros args res pc' fd:
+      (fn_code f)!pc = Some(Icall sig ros args res pc') ->
+      find_function pge ros rs = Some fd ->
+      funsig fd = sig ->
+      path_last_step ge pge stack f sp pc rs m
+        E0 (Callstate (Stackframe res f sp pc' rs :: stack) fd rs##args m)
+  | exec_Itailcall stk pc rs m sig ros args fd m':
+      (fn_code f)!pc = Some(Itailcall sig ros args) ->
+      find_function pge ros rs = Some fd ->
+      funsig fd = sig ->
+      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+      path_last_step ge pge stack f (Vptr stk Ptrofs.zero) pc rs m
+        E0 (Callstate stack fd rs##args m')
+  | exec_Ibuiltin sp pc rs m ef args res pc' vargs t vres m':
+      (fn_code f)!pc = Some(Ibuiltin ef args res pc') ->
+      eval_builtin_args ge (fun r => rs#r) sp m args vargs ->
+      external_call ef ge vargs m t vres m' ->
+      path_last_step ge pge stack f sp pc rs m
+         t (State stack f sp pc' (regmap_setres res vres rs) m')
+  | exec_Ijumptable sp pc rs m arg tbl n pc': (* TODO remove jumptable from here ? *)
+      (fn_code f)!pc = Some(Ijumptable arg tbl) ->
+      rs#arg = Vint n ->
+      list_nth_z tbl (Int.unsigned n) = Some pc' ->
+      path_last_step ge pge stack f sp pc rs m
+        E0 (State stack f sp pc' rs m)
+  | exec_Ireturn stk pc rs m or m':
+      (fn_code f)!pc = Some(Ireturn or) ->
+      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+      path_last_step ge pge stack f (Vptr stk Ptrofs.zero) pc rs m
+        E0 (Returnstate stack (regmap_optget or Vundef rs) m').
+
+(* Executes an entire path *)
+Inductive path_step ge pge (path:nat) stack f sp rs m pc: trace -> state -> Prop :=
+  | exec_early_exit st:
+     isteps ge path f sp rs m pc = Some st ->
+     (icontinue st) = false ->
+     path_step ge pge path stack f sp rs m pc E0 (State stack f sp (ipc st) (irs st) (imem st))
+  | exec_normal_exit st t s:
+     isteps ge path f sp rs m pc = Some st ->
+     (icontinue st) = true ->
+     path_last_step ge pge stack f sp (ipc st) (irs st) (imem st) t s ->
+     path_step ge pge path stack f sp rs m pc t s.
+
+(* Either internal path execution, or the usual exec_function / exec_return borrowed from RTL *)
+Inductive step ge pge: state -> trace -> state -> Prop :=
+  | exec_path path stack f sp rs m pc t s: 
+      (fn_path f)!pc = Some path ->
+      path_step ge pge path.(psize) stack f sp rs m pc t s ->
+      step ge pge (State stack f sp pc rs m) t s
+  | exec_function_internal s f args m m' stk:
+      Mem.alloc m 0 (fn_RTL f).(fn_stacksize) = (m', stk) ->
+      step ge pge (Callstate s (Internal f) args m)
+        E0 (State s
+                  f
+                  (Vptr stk Ptrofs.zero)
+                  f.(fn_entrypoint)
+                  (init_regs args f.(fn_params))
+                  m')
+  | exec_function_external s ef args res t m m':
+      external_call ef ge args m t res m' ->
+      step ge pge (Callstate s (External ef) args m) 
+            t (Returnstate s res m')
+  | exec_return res f sp pc rs s vres m:
+      step ge pge (Returnstate (Stackframe res f sp pc rs :: s) vres m)
+        E0 (State s f sp pc (rs#res <- vres) m).
+
+Inductive initial_state (p:program) : state -> Prop :=
+    initial_state_intro  (b : block) (f : fundef) (m0 : mem):
+                          Genv.init_mem p = Some m0 ->
+                          Genv.find_symbol (Genv.globalenv p) (prog_main p) = Some b ->
+                          Genv.find_funct_ptr (Genv.globalenv p) b = Some f ->
+                          funsig f = signature_main -> initial_state p (Callstate nil f nil m0).
+
+Definition final_state (st: state) (i:int): Prop 
+ := RTL.final_state st i.
+
+Definition semantics (p: program) :=
+  Semantics (step (Genv.globalenv (transf_program p))) (initial_state p) final_state (Genv.globalenv p).
+
+(** * Proving the bisimulation between (semantics p) and (RTL.semantics p). *)
+
+(** ** Preliminaries: simple tactics for option-monad *)
+
+Lemma destruct_SOME A B (P: option B -> Prop) (e: option A) (f: A -> option B):
+  (forall x, e = Some x -> P (f x)) -> (e = None -> P None) -> (P (SOME x <- e IN f x)).
+Proof.
+  intros; destruct e; simpl; auto.
+Qed.
+
+Lemma destruct_ASSERT B (P: option B -> Prop) (e: bool) (x: option B):
+  (e = true -> P x) -> (e = false -> P None) -> (P (ASSERT e IN x)).
+Proof.
+  intros; destruct e; simpl; auto.
+Qed.
+
+Ltac inversion_SOME x :=
+  try (eapply destruct_SOME; [ let x := fresh x in intro x | simpl; try congruence ]).
+
+Ltac inversion_ASSERT :=
+  try (eapply destruct_ASSERT; [ idtac | simpl; try congruence ]).
+
+Ltac simplify_someHyp :=
+  match goal with
+  | H: None = Some _ |- _  => inversion H; clear H; subst
+  | H: Some _ = None |- _  => inversion H; clear H; subst
+  | H: ?t = ?t |- _ => clear H
+  | H: Some _ = Some _ |- _  => inversion H; clear H; subst
+  | H: Some _ <> None |- _ => clear H
+  | H: None <> Some _ |- _ => clear H
+  | H: _ = Some _ |- _ => (try rewrite !H in * |- *); generalize H; clear H
+  | H: _ = None |- _ => (try rewrite !H in * |- *); generalize H; clear H
+  end.
+
+Ltac explore_destruct :=
+  repeat (match goal with
+  | [H: ?expr = ?val |- context[match ?expr with | _ => _ end]] => rewrite H
+  | [H: match ?var with | _ => _ end |- _] => destruct var
+  | [ |- context[match ?m with | _ => _ end] ] => destruct m
+  | _ => discriminate
+  end).
+
+Ltac simplify_someHyps := 
+  repeat (simplify_someHyp; simpl in * |- *).
+
+Ltac try_simplify_someHyps := 
+  try (intros; simplify_someHyps; eauto).
+
+(* TODO: try to improve this tactic with a better control over names and inversion *)
+Ltac simplify_SOME x :=
+  (repeat inversion_SOME x); try_simplify_someHyps.
+
+(** ** The easy way: Forward simulation of RTLpath by RTL 
+
+This way can be viewed as a correctness property: all transitions in RTLpath are valid RTL transitions !
+
+*)
+
+Local Hint Resolve RTL.exec_Inop RTL.exec_Iop RTL.exec_Iload RTL.exec_Istore RTL.exec_Icond RTL.exec_Iload_notrap1 RTL.exec_Iload_notrap2: core.
+
+(* istep reflects RTL.step *)
+Lemma istep_correct ge i stack (f:function) sp rs m st :
+  istep ge i sp rs m = Some st ->
+  forall pc, (fn_code f)!pc = Some i ->
+  RTL.step ge (State stack f sp pc rs m) E0 (State stack f sp st.(ipc) st.(irs) st.(imem)).
+Proof.
+  destruct i; simpl; try congruence; simplify_SOME x.
+  1-3: explore_destruct; simplify_SOME x.
+Qed.
+
+Local Hint Resolve star_refl: core.
+
+(* isteps reflects a star relation on RTL.step *)
+Lemma isteps_correct ge path stack f sp: forall rs m pc st,
+  isteps ge path f sp rs m pc = Some st ->
+  star RTL.step ge (State stack f sp pc rs m) E0 (State stack f sp st.(ipc) st.(irs) st.(imem)).
+Proof.
+  induction path; simpl; try_simplify_someHyps.
+  inversion_SOME i; intros Hi.
+  inversion_SOME st0; intros Hst0.
+  destruct (icontinue st0) eqn:cont.
+  + intros; eapply star_step.
+    - eapply istep_correct; eauto.
+    - simpl; eauto.
+    - auto.
+  + intros; simplify_someHyp; eapply star_step.
+    - eapply istep_correct; eauto.
+    - simpl; eauto.
+    - auto.
+Qed.
+
+Lemma isteps_correct_early_exit ge path stack f sp: forall rs m pc st,
+  isteps ge path f sp rs m pc = Some st ->
+  st.(icontinue) = false ->
+  plus RTL.step ge (State stack f sp pc rs m) E0 (State stack f sp st.(ipc) st.(irs) st.(imem)).
+Proof.
+  destruct path; simpl; try_simplify_someHyps; try congruence.
+  inversion_SOME i; intros Hi.
+  inversion_SOME st0; intros Hst0.
+  destruct (icontinue st0) eqn:cont.
+  + intros; eapply plus_left.
+    - eapply istep_correct; eauto.
+    - eapply isteps_correct; eauto.
+    - auto.
+  + intros X; inversion X; subst.
+    eapply plus_one.
+    eapply istep_correct; eauto.
+Qed.
+
+Local Hint Resolve list_forall2_nil match_globdef_fun linkorder_refl match_globvar_intro: core.
+
+Section CORRECTNESS.
+
+Variable p: program.
+
+Lemma match_prog_RTL: match_program (fun _ f tf => tf = fundef_RTL f) eq p (transf_program p).
+Proof.
+  eapply match_transform_program; eauto.
+Qed.
+
+Let pge := Genv.globalenv p.
+Let ge := Genv.globalenv (transf_program p).
+
+Lemma senv_preserved: Senv.equiv pge ge.
+Proof (Genv.senv_match match_prog_RTL).
+
+Lemma symbols_preserved s: Genv.find_symbol ge s = Genv.find_symbol pge s.
+Proof (Genv.find_symbol_match match_prog_RTL s).
+
+Lemma find_function_RTL_match ros rs fd:
+  find_function pge ros rs = Some fd -> RTL.find_function ge ros rs = Some (fundef_RTL fd).
+Proof.
+  destruct ros; simpl.
+  + intro; exploit (Genv.find_funct_match match_prog_RTL); eauto.
+    intros (cuint & tf & H1 & H2 & H3); subst; auto.
+  + rewrite symbols_preserved.
+    destruct (Genv.find_symbol pge i); simpl; try congruence.
+    intro; exploit (Genv.find_funct_ptr_match match_prog_RTL); eauto. 
+    intros (cuint & tf & H1 & H2 & H3); subst; auto.
+Qed.
+
+Local Hint Resolve istep_correct RTL.exec_Ibuiltin RTL.exec_Ijumptable RTL.exec_Ireturn RTL.exec_Icall RTL.exec_Itailcall find_function_RTL_match: core.
+
+Lemma path_last_step_correct stack f sp pc rs m t s:
+  path_last_step ge pge stack f sp pc rs m t s ->
+  RTL.step ge (State stack f sp pc rs m) t s.
+Proof.
+  destruct 1; try (eapply istep_correct); simpl; eauto.
+Qed.
+
+Lemma path_step_correct path stack f sp pc rs m t s:
+  path_step ge pge path stack f sp rs m pc t s ->
+  plus RTL.step ge (State stack f sp pc rs m) t s.
+Proof.
+  destruct 1.
+  + eapply isteps_correct_early_exit; eauto.
+  + eapply plus_right.
+    eapply isteps_correct; eauto.
+    eapply path_last_step_correct; eauto.
+    auto.
+Qed.
+
+Local Hint Resolve plus_one RTL.exec_function_internal RTL.exec_function_external RTL.exec_return: core.
+
+Lemma step_correct s t s': step ge pge s t s' -> plus RTL.step ge s t s'.
+Proof.
+  destruct 1; try (eapply path_step_correct); simpl; eauto.
+Qed.
+
+Theorem RTLpath_correct: forward_simulation (semantics p) (RTL.semantics p).
+Proof.
+  eapply forward_simulation_plus with (match_states := fun s1 s2 => s2 = state_RTL s1); simpl; auto.
+  - apply senv_preserved.
+  - destruct 1; intros; eexists; intuition eauto. econstructor; eauto.
+    + apply (Genv.init_mem_match match_prog_RTL); auto.
+    + rewrite (Genv.find_symbol_match match_prog_RTL).
+      rewrite (match_program_main match_prog_RTL); eauto.
+    + exploit (Genv.find_funct_ptr_match match_prog_RTL); eauto.
+      intros (cunit & tf0 & XX); intuition subst; eauto.
+  - unfold final_state; intros; subst; eauto.
+  - intros; subst. eexists; intuition.
+    eapply step_correct; eauto.
+Qed.
+
+End CORRECTNESS.
+
+Lemma program_equals {A B: Type} : forall (p1 p2: AST.program A B),
+  prog_defs p1 = prog_defs p2 ->
+  prog_public p1 = prog_public p2 ->
+  prog_main p1 = prog_main p2 ->
+  p1 = p2.
+Proof.
+  intros. destruct p1. destruct p2. simpl in *. subst. auto.
+Qed.
+
+Lemma cons_extract {A: Type} : forall (l: list A) a b, a = b -> a::l = b::l.
+Proof.
+  intros. congruence.
+Qed.
+
+(* Definition transf_program : RTLpath.program -> RTL.program := transform_program fundef_RTL.
+
+Lemma transf_program_proj: forall p, transf_program (transf_program p) = p.
+Proof.
+  intros p. destruct p as [defs pub main]. unfold program_proj. simpl.
+  apply program_equals; simpl; auto.
+  induction defs.
+  - simpl; auto.
+  - simpl. rewrite IHdefs. 
+    destruct a as [id gd]; simpl.
+    destruct gd as [f|v]; simpl; auto.
+    rewrite transf_fundef_proj. auto.
+Qed. *)
+
+
+(** The hard way: Forward simulation of RTL by RTLpath 
+
+This way can be viewed as a completeness property: all transitions in RTL can be represented as RTLpath transitions !
+
+*)
+
+(* This lemma is probably needed to compose a pass from RTL -> RTLpath with other passes.*)
+Lemma match_RTL_prog {LA: Linker fundef} {LV: Linker unit} p: match_program (fun _ f tf => f = fundef_RTL tf) eq (transf_program p) p.
+Proof. 
+  unfold match_program, match_program_gen; intuition.
+  unfold transf_program at 2; simpl.
+  generalize (prog_defs p).
+  induction l as [|a l]; simpl; eauto.
+  destruct a; simpl.
+  intros; eapply list_forall2_cons; eauto.
+  unfold match_ident_globdef; simpl; intuition; destruct g as [f|v]; simpl; eauto.
+  eapply match_globdef_var. destruct v; eauto.
+Qed.
+
+(* Theory of wellformed paths *)
+
+Fixpoint nth_default_succ (c: code) (path:nat) (pc: node): option node :=
+  match path with
+  | O => Some pc
+  | S path' => 
+     SOME i <- c!pc IN
+     SOME pc' <- default_succ i IN
+     nth_default_succ c path' pc' 
+  end.
+
+Lemma wellformed_suffix_path c pm path path':
+   (path' <= path)%nat -> 
+  forall pc, wellformed_path c pm path pc -> 
+   exists pc', nth_default_succ c (path-path') pc = Some pc' /\ wellformed_path c pm path' pc'.
+Proof.
+  induction 1 as [|m].
+  + intros. enough (path'-path'=0)%nat as ->; [simpl;eauto|omega].
+  + intros pc WF; enough (S m-path'=S (m-path'))%nat as ->; [simpl;eauto|omega].
+    inversion WF; subst; clear WF; intros; simplify_someHyps.
+    intros; simplify_someHyps; eauto.
+Qed.
+
+Definition nth_default_succ_inst (c: code) (path:nat) pc: option instruction :=
+   SOME pc <- nth_default_succ c path pc IN
+   c!pc.
+
+Lemma final_node_path f path pc:
+   (fn_path f)!pc = Some path -> 
+   exists i, nth_default_succ_inst (fn_code f) path.(psize) pc = Some i 
+             /\ (forall n, List.In n (successors_instr i) -> path_entry (*fn_code f*) (fn_path f) n).
+Proof.
+  intros; exploit fn_path_wf; eauto.
+  intro WF.
+  set (ps:=path.(psize)).
+  exploit (wellformed_suffix_path (fn_code f) (fn_path f) ps O); omega || eauto.
+  destruct 1 as (pc' & NTH_SUCC & WF'); auto.
+  assert (ps - 0 = ps)%nat as HH by omega. rewrite HH in NTH_SUCC. clear HH.
+  unfold nth_default_succ_inst.
+  inversion WF'; clear WF'; subst. simplify_someHyps; eauto.
+Qed.
+
+Lemma internal_node_path path f path0 pc:
+   (fn_path f)!pc = (Some path0) -> 
+   (path < path0.(psize))%nat ->
+   exists i pc',
+      nth_default_succ_inst (fn_code f) path pc = Some i /\
+      default_succ i = Some pc' /\
+      (forall n, early_exit i = Some n -> path_entry (*fn_code f*) (fn_path f) n).
+Proof.
+  intros; exploit fn_path_wf; eauto.
+  set (ps:=path0.(psize)).
+  intro WF; exploit (wellformed_suffix_path (fn_code f) (fn_path f) ps (ps-path)); eauto. { omega. }
+  destruct 1 as (pc' & NTH_SUCC & WF').
+  assert (ps - (ps - path) = path)%nat as HH by omega. rewrite HH in NTH_SUCC. clear HH.
+  unfold nth_default_succ_inst. 
+  inversion WF'; clear WF'; subst. { omega. }
+  simplify_someHyps; eauto.
+Qed.
+
+Lemma initialize_path (*c*) pm n: path_entry (*c*) pm n -> exists path, pm!n = Some path.
+Proof.
+  unfold path_entry; destruct pm!n; eauto. intuition congruence.
+Qed.
+Local Hint Resolve fn_entry_point_wf: core.
+Local Opaque path_entry.
+
+Lemma istep_successors ge i sp rs m st:
+  istep ge i sp rs m = Some st -> 
+  In (ipc st) (successors_instr i).
+Proof.
+  destruct i; simpl; try congruence; simplify_SOME x.
+  all: explore_destruct; simplify_SOME x.
+Qed.
+
+Lemma istep_normal_exit ge i sp rs m st:
+  istep ge i sp rs m = Some st ->
+  st.(icontinue) = true ->
+  default_succ i = Some st.(ipc).
+Proof.
+  destruct i; simpl; try congruence; simplify_SOME x.
+  all: explore_destruct; simplify_SOME x.
+Qed.
+
+Lemma isteps_normal_exit ge path f sp: forall rs m pc st,
+  st.(icontinue) = true ->
+  isteps ge path f sp rs m pc = Some st ->
+  nth_default_succ (fn_code f) path pc = Some st.(ipc).
+Proof.
+  induction path; simpl. { try_simplify_someHyps. }
+  intros rs m pc st CONT; try_simplify_someHyps.
+  inversion_SOME i; intros Hi.
+  inversion_SOME st0; intros Hst0.
+  destruct (icontinue st0) eqn:X; try congruence.
+  try_simplify_someHyps.
+  intros; erewrite istep_normal_exit; eauto.
+Qed.
+
+
+(* TODO: the three following lemmas could maybe simplified by introducing an auxiliary
+    left-recursive definition equivalent to isteps ?
+*)
+Lemma isteps_step_right ge path f sp: forall rs m pc st i,
+  isteps ge path f sp rs m pc = Some st ->
+  st.(icontinue) = true ->
+  (fn_code f)!(st.(ipc)) = Some i ->
+  istep ge i sp st.(irs) st.(imem) = isteps ge (S path) f sp rs m pc.
+Proof.
+  induction path.
+  + simpl; intros; try_simplify_someHyps. simplify_SOME st.
+    destruct st as [b]; destruct b; simpl; auto.
+  + intros rs m pc st i H.
+    simpl in H.
+    generalize H; clear H; simplify_SOME xx.
+    destruct (icontinue xx0) eqn: CONTxx0.
+    * intros; erewrite IHpath; eauto.
+    * intros; congruence.
+Qed.
+
+Lemma isteps_inversion_early ge path f sp: forall rs m pc st,
+  isteps ge path f sp rs m pc = Some st ->
+  (icontinue st)=false ->
+  exists st0 i path0,
+    (path > path0)%nat /\
+    isteps ge path0 f sp rs m pc = Some st0 /\ 
+    st0.(icontinue) = true /\ 
+    (fn_code f)!(st0.(ipc)) = Some i /\
+    istep ge i sp st0.(irs) st0.(imem) = Some st.
+Proof.
+  induction path as [|path]; simpl.
+  - intros; try_simplify_someHyps; try congruence.
+  - intros rs m pc st; inversion_SOME i; inversion_SOME st0.
+    destruct (icontinue st0) eqn: CONT.
+    + intros STEP PC STEPS CONT0. exploit IHpath; eauto.
+      clear STEPS.
+      intros (st1 & i0 & path0 & BOUND & STEP1 & CONT1 & X1 & X2); auto.
+      exists st1. exists i0. exists (S path0). intuition.
+      simpl; try_simplify_someHyps. 
+      rewrite CONT. auto.
+    + intros; try_simplify_someHyps; try congruence.
+      eexists. exists i. exists O; simpl. intuition eauto.
+      omega.
+Qed.
+
+Lemma isteps_resize ge path0 path1 f sp rs m pc st: 
+ (path0 <= path1)%nat ->
+  isteps ge path0 f sp rs m pc = Some st ->
+  (icontinue st)=false ->
+  isteps ge path1 f sp rs m pc = Some st.
+Proof.
+  induction 1 as [|path1]; simpl; auto.
+  intros PSTEP CONT. exploit IHle; auto. clear PSTEP IHle H path0.
+  generalize rs m pc st CONT; clear rs m pc st CONT. 
+  induction path1 as [|path]; simpl; auto.
+  - intros; try_simplify_someHyps; try congruence.
+  - intros rs m pc st; inversion_SOME i; inversion_SOME st0; intros; try_simplify_someHyps.
+    destruct (icontinue st0) eqn: CONT0; eauto.
+Qed.
+
+(* FIXME - add prediction *)
+Inductive is_early_exit pc: instruction -> Prop :=
+  | Icond_early_exit cond args ifnot predict:
+     is_early_exit pc (Icond cond args pc ifnot predict)
+ . (* TODO add jumptable here ? *)
+
+Lemma istep_early_exit ge i sp rs m st :
+  istep ge i sp rs m = Some st -> 
+  st.(icontinue) = false -> 
+  st.(irs) = rs /\ st.(imem) = m /\ is_early_exit st.(ipc) i.
+Proof.
+  Local Hint Resolve Icond_early_exit: core.
+  destruct i; simpl; try congruence; simplify_SOME b; simpl; try congruence.
+  all: explore_destruct; simplify_SOME b; try discriminate.
+Qed.
+
+Section COMPLETENESS.
+
+Variable p: program.
+
+Let pge := Genv.globalenv p.
+Let ge := Genv.globalenv (transf_program p).
+
+Lemma find_funct_ptr_RTL_preserv b f:
+   Genv.find_funct_ptr ge b = Some f -> (exists f0, Genv.find_funct_ptr pge b = Some f0 /\ f = f0).
+Proof.
+  intros; exploit (Genv.find_funct_ptr_match (match_RTL_prog p)); eauto.
+  destruct 1 as (cunit & tf & X & Y & Z); subst.
+  eauto.
+Qed.
+
+Lemma find_RTL_function_match ros rs fd:
+  RTL.find_function ge ros rs = Some fd -> exists fd', fd = fundef_RTL fd' /\ find_function pge ros rs = Some fd'.
+Proof.
+  destruct ros; simpl.
+  + intro; exploit (Genv.find_funct_match (match_RTL_prog p)); eauto.
+    intros (cuint & tf & H1 & H2 & H3); subst; eauto.
+  + rewrite (symbols_preserved p); unfold pge.
+    destruct (Genv.find_symbol (Genv.globalenv p) i); simpl; try congruence.
+    intro; exploit find_funct_ptr_RTL_preserv; eauto.
+    intros (tf & H1 & H2); subst; eauto.
+Qed.
+
+
+(** *** Definition of well-formed stacks and of match_states *)
+Definition wf_stf (st: stackframe): Prop :=
+  match st with
+  | Stackframe res f sp pc rs => path_entry (*f.(fn_code)*) f.(fn_path) pc
+  end.
+
+Definition wf_stackframe (stack: list stackframe): Prop :=
+  forall st, List.In st stack -> wf_stf st.
+
+Lemma wf_stackframe_nil: wf_stackframe nil.
+Proof.
+  unfold wf_stackframe; simpl. tauto.
+Qed.
+Local Hint Resolve wf_stackframe_nil: core.
+
+Lemma wf_stackframe_cons st stack:
+  wf_stackframe (st::stack) <-> (wf_stf st) /\ wf_stackframe stack.
+Proof.
+  unfold wf_stackframe; simpl; intuition (subst; auto).
+Qed.
+
+Definition stack_of (s: state):  list stackframe :=
+  match s with
+  | State stack f sp pc rs m => stack
+  | Callstate stack f args m => stack
+  | Returnstate stack v m => stack
+  end.
+
+Definition is_inst (s: RTL.state): bool :=
+  match s with
+  | RTL.State stack f sp pc rs m => true
+  | _ => false
+  end.
+
+Inductive match_inst_states_goal (idx: nat) (s1:RTL.state): state -> Prop :=
+  | State_match path stack f sp pc rs m s2:
+    (fn_path f)!pc = Some path ->
+    (idx <= path.(psize))%nat ->
+      isteps ge (path.(psize)-idx) f sp rs m pc = Some s2 ->
+      s1 = State stack f sp s2.(ipc) s2.(irs) s2.(imem) ->
+      match_inst_states_goal idx s1 (State stack f sp pc rs m).
+
+Definition match_inst_states (idx: nat) (s1:RTL.state) (s2:state): Prop :=
+  if is_inst s1 then match_inst_states_goal idx s1 s2 else s1 = state_RTL s2.
+
+Definition match_states (idx: nat) (s1:RTL.state) (s2:state): Prop :=
+    match_inst_states idx s1 s2
+ /\ wf_stackframe (stack_of s2).
+
+(** *** Auxiliary lemmas of completeness *)
+Lemma istep_complete t i stack f sp rs m pc s':
+  RTL.step ge (State stack f sp pc rs m) t s' ->
+  (fn_code f)!pc = Some i -> 
+  default_succ i <> None -> 
+  t = E0 /\ exists st, istep ge i sp rs m = Some st /\ s'=(State stack f sp st.(ipc) st.(irs) st.(imem)).
+Proof.
+  intros H X; inversion H; simpl; subst; try rewrite X in * |-; clear X; simplify_someHyps; try congruence;
+  (split; auto); simplify_someHyps; eexists; split; simplify_someHyps; eauto.
+  all: explore_destruct; simplify_SOME a.
+Qed.
+
+Lemma stuttering path idx stack f sp rs m pc st t s1':
+   isteps ge (path.(psize)-(S idx)) f sp rs m pc = Some st ->
+   (fn_path f)!pc = Some path ->
+   (S idx <= path.(psize))%nat ->
+   st.(icontinue) = true ->
+   RTL.step ge (State stack f sp st.(ipc) st.(irs) st.(imem)) t s1' ->
+   t = E0 /\ match_inst_states idx s1' (State stack f sp pc rs m).
+Proof.
+  intros PSTEP PATH BOUND CONT RSTEP; exploit (internal_node_path (path.(psize)-(S idx))); omega || eauto.
+  intros (i & pc' & Hi & Hpc & DUM).
+  unfold nth_default_succ_inst in Hi.
+  erewrite isteps_normal_exit in Hi; eauto.
+  exploit istep_complete; congruence || eauto.
+  intros (SILENT & st0 & STEP0 & EQ).
+  intuition; subst; unfold match_inst_states; simpl.
+  intros; refine (State_match _ _ path stack f sp pc rs m _ PATH _ _ _); simpl; omega || eauto.
+  set (ps:=path.(psize)). enough (ps - idx = S (ps - (S idx)))%nat as ->; try omega.
+  erewrite <- isteps_step_right; eauto.
+Qed.
+
+Lemma normal_exit path stack f sp rs m pc st t s1':
+   isteps ge path.(psize) f sp rs m pc = Some st ->
+   (fn_path f)!pc = Some path ->
+   st.(icontinue) = true ->
+   RTL.step ge (State stack f sp st.(ipc) st.(irs) st.(imem)) t s1' ->
+   wf_stackframe stack ->
+   exists s2', 
+      (path_last_step ge pge stack f sp st.(ipc) st.(irs) st.(imem)) t s2' 
+       /\ (exists idx', match_states idx' s1' s2').
+Proof.
+  Local Hint Resolve istep_successors list_nth_z_in: core. (* Hint for path_entry proofs *)
+  intros PSTEP PATH CONT RSTEP WF; exploit (final_node_path f path); eauto.
+  intros (i & Hi & SUCCS).
+  unfold nth_default_succ_inst in Hi.
+  erewrite isteps_normal_exit in Hi; eauto.
+  destruct (default_succ i) eqn:Hn0.
+  + (* exec_istate *)
+    exploit istep_complete; congruence || eauto.
+    intros (SILENT & st0 & STEP0 & EQ); subst.
+    exploit (exec_istate ge pge); eauto.
+    eexists; intuition eauto.
+    unfold match_states, match_inst_states; simpl.
+    destruct (initialize_path (*fn_code f*) (fn_path f) (ipc st0)) as (path0 & Hpath0); eauto.
+    exists (path0.(psize)); intuition eauto.
+    econstructor; eauto.
+    * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || omega.
+    * simpl; eauto.
+  + generalize Hi; inversion RSTEP; clear RSTEP; subst; (repeat (simplify_someHyp; simpl in * |- * )); try congruence; eauto.
+    - (* Icall *)
+      intros; exploit find_RTL_function_match; eauto.
+      intros (fd' & MATCHfd & Hfd'); subst.
+      exploit (exec_Icall ge pge); eauto.
+      eexists; intuition eauto.
+      eexists O; unfold match_states, match_inst_states; simpl; intuition eauto.
+      rewrite wf_stackframe_cons; intuition simpl; eauto.
+    - (* Itailcall *)
+      intros; exploit find_RTL_function_match; eauto.
+      intros (fd' & MATCHfd & Hfd'); subst.
+      exploit (exec_Itailcall ge pge); eauto.
+      eexists; intuition eauto.
+      eexists O; unfold match_states, match_inst_states; simpl; intuition eauto.
+    - (* Ibuiltin *)
+      intros; exploit exec_Ibuiltin; eauto.
+      eexists; intuition eauto.
+      unfold match_states, match_inst_states; simpl.
+      destruct (initialize_path (*fn_code f*) (fn_path f) pc') as (path0 & Hpath0); eauto.
+      exists path0.(psize); intuition eauto.
+      econstructor; eauto.
+      * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || omega.
+      * simpl; eauto.
+   - (* Ijumptable *)
+      intros; exploit exec_Ijumptable; eauto.
+      eexists; intuition eauto.
+      unfold match_states, match_inst_states; simpl.
+      destruct (initialize_path (*fn_code f*) (fn_path f) pc') as (path0 & Hpath0); eauto.
+      exists path0.(psize); intuition eauto.
+      econstructor; eauto.
+      * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || omega.
+      * simpl; eauto.
+  - (* Ireturn *)
+      intros; exploit exec_Ireturn; eauto.
+      eexists; intuition eauto.
+      eexists O; unfold match_states, match_inst_states; simpl; intuition eauto.
+Qed.
+
+Lemma path_step_complete stack f sp rs m pc t s1' idx path st:
+  isteps ge (path.(psize)-idx) f sp rs m pc = Some st ->
+  (fn_path f)!pc = Some path ->
+  (idx <= path.(psize))%nat ->
+  RTL.step ge (State stack f sp st.(ipc) st.(irs) st.(imem)) t s1' ->
+  wf_stackframe stack ->
+  exists idx' s2', 
+      (path_step ge pge path.(psize) stack f sp rs m pc t s2' 
+       \/  (t = E0 /\ s2'=(State stack f sp pc rs m) /\ (idx' < idx)%nat)
+       \/ (exists path', path_step ge pge path.(psize) stack f sp rs m pc E0 (State stack f sp st.(ipc) st.(irs) st.(imem))
+                         /\ (fn_path f)!(ipc st) = Some path' /\ path'.(psize) = O
+                         /\ path_step ge pge path'.(psize) stack f sp st.(irs) st.(imem) st.(ipc) t s2')
+       )
+      /\ match_states idx' s1' s2'.
+Proof.
+  Local Hint Resolve exec_early_exit exec_normal_exit: core.
+  intros PSTEP PATH BOUND RSTEP WF; destruct (st.(icontinue)) eqn: CONT.
+  destruct idx as [ | idx].
+  + (* path_step on normal_exit *)
+     assert (path.(psize)-0=path.(psize))%nat as HH by omega. rewrite HH in PSTEP. clear HH.
+     exploit normal_exit; eauto.
+     intros (s2' & LSTEP & (idx' & MATCH)).
+     exists idx'; exists s2'; intuition eauto.
+  + (* stuttering step *)
+    exploit stuttering; eauto.
+    unfold match_states; exists idx; exists (State stack f sp pc rs m); 
+    intuition.
+  + (* one or two path_step on early_exit *)
+    exploit (isteps_resize ge (path.(psize) - idx)%nat path.(psize)); eauto; try omega.
+    clear PSTEP; intros PSTEP.
+    (* TODO for clarification: move the assert below into a separate lemma *)
+    assert (HPATH0: exists path0, (fn_path f)!(ipc st) = Some path0).
+    { clear RSTEP.
+      exploit isteps_inversion_early; eauto.
+      intros (st0 & i & path0 & BOUND0 & PSTEP0 & CONT0 & PC0 & STEP0).
+      exploit istep_early_exit; eauto.
+      intros (X1 & X2 & EARLY_EXIT).
+      destruct st as [cont pc0 rs0 m0]; simpl in * |- *; intuition subst.
+      exploit (internal_node_path path0); omega || eauto.
+      intros (i' & pc' & Hi' & Hpc' & ENTRY).
+      unfold nth_default_succ_inst in Hi'.
+      erewrite isteps_normal_exit in Hi'; eauto.
+      clear pc' Hpc' STEP0 PSTEP0 BOUND0; try_simplify_someHyps; intros.
+      destruct EARLY_EXIT as [cond args ifnot]; simpl in ENTRY;
+      destruct (initialize_path (*fn_code f*) (fn_path f) pc0); eauto.
+    }
+    destruct HPATH0 as (path1  & Hpath1).
+    destruct (path1.(psize)) as [|ps] eqn:Hpath1size.
+    * (* two step case *)
+      exploit (normal_exit path1); try rewrite Hpath1size; simpl; eauto.
+      simpl; intros (s2' & LSTEP & (idx' & MATCH)).
+      exists idx'. exists s2'. constructor; auto.
+      right. right. eexists; intuition eauto.
+      (* now, prove the last step *)
+      rewrite Hpath1size; exploit exec_normal_exit. 4:{ eauto. } 
+      - simpl; eauto.
+      - simpl; eauto.
+      - simpl; eauto.
+    * (* single step case *)
+      exploit (stuttering path1 ps stack f sp (irs st) (imem st) (ipc st)); simpl; auto.
+      - { rewrite Hpath1size; enough (S ps-S ps=0)%nat as ->; try omega.  simpl; eauto. }
+      - omega.
+      - simpl; eauto.
+      - simpl; eauto.
+      - intuition subst.
+        repeat eexists; intuition eauto.
+Qed.
+
+Lemma step_noninst_complete s1 t s1' s2:
+  is_inst s1 = false ->
+  s1 = state_RTL s2 ->
+  RTL.step ge s1 t s1' ->
+  wf_stackframe (stack_of s2) ->
+  exists s2', step ge pge s2 t s2' /\ exists idx, match_states idx s1' s2'.
+Proof.
+  intros H0 H1 H2 WFSTACK; destruct s2; subst; simpl in * |- *; try congruence;
+  inversion H2; clear H2; subst; try_simplify_someHyps; try congruence.
+  + (* exec_function_internal *)
+    destruct f; simpl in H3; inversion H3; subst; clear H3.
+    eexists; constructor 1.
+    * eapply exec_function_internal; eauto.
+    * unfold match_states, match_inst_states; simpl.
+      destruct (initialize_path (*fn_code f*) (fn_path f) (fn_entrypoint (fn_RTL f))) as (path & Hpath); eauto.
+      exists path.(psize). constructor; auto.
+      econstructor; eauto.
+      - set (ps:=path.(psize)). enough (ps-ps=O)%nat as ->; simpl; eauto.
+        omega.
+      - simpl; auto.
+  + (* exec_function_external *)
+    destruct f; simpl in H3 |-; inversion H3; subst; clear H3.
+    eexists; constructor 1.
+    * apply exec_function_external; eauto.
+    * unfold match_states, match_inst_states; simpl. exists O; auto.
+  + (* exec_return *)
+    destruct stack eqn: Hstack; simpl in H1; inversion H1; clear H1; subst.
+    destruct s0 eqn: Hs0; simpl in H0; inversion H0; clear H0; subst.
+    eexists; constructor 1.
+    * apply exec_return.
+    * unfold match_states, match_inst_states; simpl.
+      rewrite wf_stackframe_cons in WFSTACK.
+      destruct WFSTACK as (H0 & H1); simpl in H0.
+      destruct (initialize_path (*fn_code f0*) (fn_path f0) pc0) as (path & Hpath); eauto.
+      exists path.(psize). constructor; auto.
+      econstructor; eauto.
+      - set (ps:=path.(psize)). enough (ps-ps=O)%nat as ->; simpl; eauto.
+        omega.
+      - simpl; auto.
+Qed.
+
+(** *** The main completeness lemma and the simulation theorem...*)
+Lemma step_complete s1 t s1' idx s2:
+ match_states idx s1 s2 ->
+ RTL.step ge s1 t s1' ->
+ exists idx' s2', (plus (step ge) pge s2 t s2' \/ (t = E0 /\ s2=s2' /\ (idx' < idx)%nat)) /\ match_states idx' s1' s2'.
+Proof.
+  Local Hint Resolve plus_one plus_two exec_path: core.
+  unfold match_states at 1, match_inst_states. intros (IS_INST & WFSTACK). destruct (is_inst s1) eqn: His1.
+  - clear His1; destruct IS_INST as [path stack f sp pc rs m s2 X X0 X1 X2]; auto; subst; simpl in * |- *. 
+    intros STEP; exploit path_step_complete; eauto.
+    intros (idx' & s2' & H0 & H1).
+    eexists; eexists; eauto.
+    destruct H0 as [H0|[H0|(path'&H0)]]; intuition subst; eauto.
+  - intros; exploit step_noninst_complete; eauto.
+    intros (s2' & STEP & (idx0 & MATCH)).
+    exists idx0; exists s2'; intuition auto.
+Qed.
+
+Theorem RTLpath_complete: forward_simulation (RTL.semantics p) (semantics p).
+Proof.
+  eapply (Forward_simulation (L1:=RTL.semantics p) (L2:=semantics p) lt match_states).
+  constructor 1; simpl.
+  - apply lt_wf.
+  - unfold match_states, match_inst_states. destruct 1; simpl; exists O.
+    destruct (find_funct_ptr_RTL_preserv b f) as (f0 & X1 & X2); subst; eauto.
+    exists (Callstate nil f0 nil m0). simpl; split; try econstructor; eauto.
+    + apply (Genv.init_mem_match (match_RTL_prog p)); auto.
+    + rewrite (Genv.find_symbol_match (match_RTL_prog p)).
+      rewrite (match_program_main (match_RTL_prog p)); eauto.
+  - unfold final_state, match_states, match_inst_states. intros i s1 s2 r (H0 & H1) H2; destruct H2.
+    destruct s2; simpl in * |- *; inversion H0; subst.
+    constructor.
+  - Local Hint Resolve star_refl: core.
+    intros; exploit step_complete; eauto.
+    destruct 1 as (idx' & s2' & X).
+    exists idx'. exists s2'. intuition (subst; eauto). 
+  - intros id; destruct (senv_preserved p); simpl in * |-. intuition.
+Qed.
+
+End COMPLETENESS.
diff --git a/scheduling/RTLpathCommon.ml b/scheduling/RTLpathCommon.ml
new file mode 100644
index 00000000..3d123ba8
--- /dev/null
+++ b/scheduling/RTLpathCommon.ml
@@ -0,0 +1,14 @@
+open Maps
+open Registers
+open Camlcoq
+
+type superblock = {
+  mutable instructions: P.t array; (* pointers to code instructions *)
+  (* each predicted Pcb has its attached liveins *)
+  (* This is indexed by the pc value *)
+  mutable liveins: Regset.t PTree.t;
+  (* Union of the input_regs of the last successors *)
+  s_output_regs: Regset.t;
+  typing: RTLtyping.regenv
+}
+
diff --git a/scheduling/RTLpathLivegen.v b/scheduling/RTLpathLivegen.v
new file mode 100644
index 00000000..9f646ad0
--- /dev/null
+++ b/scheduling/RTLpathLivegen.v
@@ -0,0 +1,325 @@
+(** Building a RTLpath program with liveness annotation.
+*)
+
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import Lattice.
+Require Import AST.
+Require Import Op.
+Require Import Registers.
+Require Import Globalenvs Smallstep RTL RTLpath.
+Require Import Bool Errors.
+Require Import Program.
+
+Local Open Scope lazy_bool_scope.
+
+Local Open Scope option_monad_scope.
+
+Axiom build_path_map: RTL.function -> path_map.
+
+Extract Constant build_path_map => "RTLpathLivegenaux.build_path_map".
+
+Fixpoint list_mem (rl: list reg) (alive: Regset.t) {struct rl}: bool :=
+  match rl with
+  | nil => true
+  | r1 :: rs => Regset.mem r1 alive &&& list_mem rs alive
+  end.
+
+Definition exit_checker {A} (pm: path_map) (alive: Regset.t) (pc: node) (v:A): option A :=
+   SOME path <- pm!pc IN
+   ASSERT Regset.subset path.(input_regs) alive IN
+   Some v.
+
+Lemma exit_checker_path_entry A (pm: path_map) (alive: Regset.t) (pc: node) (v:A) res:
+  exit_checker pm alive pc v = Some res -> path_entry pm pc.
+Proof.
+  unfold exit_checker, path_entry.
+  inversion_SOME path; simpl; congruence.
+Qed.
+
+Lemma exit_checker_res A (pm: path_map) (alive: Regset.t) (pc: node) (v:A) res:
+  exit_checker pm alive pc v = Some res -> v=res.
+Proof.
+  unfold exit_checker, path_entry.
+  inversion_SOME path; try_simplify_someHyps.
+  inversion_ASSERT; try_simplify_someHyps.
+Qed.
+
+Definition iinst_checker (pm: path_map) (alive: Regset.t) (i: instruction): option (Regset.t * node)  :=
+  match i with
+  | Inop pc' => Some (alive, pc')
+  | Iop op args dst pc' => 
+      ASSERT list_mem args alive IN
+      Some (Regset.add dst alive, pc')
+  | Iload _ chunk addr args dst pc' =>
+      ASSERT list_mem args alive IN
+      Some (Regset.add dst alive, pc')
+  | Istore chunk addr args src pc' =>
+      ASSERT Regset.mem src alive IN
+      ASSERT list_mem args alive IN
+      Some (alive, pc')
+  | Icond cond args ifso ifnot _ =>
+      ASSERT list_mem args alive IN
+      exit_checker pm alive ifso (alive, ifnot)
+  | _ => None
+  end.
+
+
+Local Hint Resolve exit_checker_path_entry: core.
+
+Lemma iinst_checker_path_entry (pm: path_map) (alive: Regset.t) (i: instruction) res pc:
+  iinst_checker pm alive i = Some res -> 
+  early_exit i = Some pc -> path_entry pm pc.
+Proof.
+  destruct i; simpl; try_simplify_someHyps; subst.
+  inversion_ASSERT; try_simplify_someHyps.
+Qed.
+
+Lemma iinst_checker_default_succ (pm: path_map) (alive: Regset.t) (i: instruction) res pc:
+  iinst_checker pm alive i = Some res -> 
+  pc = snd res ->
+  default_succ i = Some pc.
+Proof.
+  destruct i; simpl; try_simplify_someHyps; subst;
+  repeat (inversion_ASSERT); try_simplify_someHyps.
+  intros; exploit exit_checker_res; eauto.
+  intros; subst. simpl; auto.
+Qed.
+
+Fixpoint ipath_checker (ps:nat) (f: RTL.function) (pm: path_map) (alive: Regset.t) (pc:node): option (Regset.t * node) :=
+  match ps with
+  | O => Some (alive, pc)
+  | S p =>
+    SOME i <- f.(fn_code)!pc IN
+    SOME res <- iinst_checker pm alive i IN
+    ipath_checker p f pm (fst res) (snd res)
+  end.
+
+Lemma ipath_checker_wellformed f pm ps: forall alive pc res,
+   ipath_checker ps f pm alive pc = Some res -> 
+   wellformed_path f.(fn_code) pm 0 (snd res) ->
+   wellformed_path f.(fn_code) pm ps pc.
+Proof.
+  induction ps; simpl; try_simplify_someHyps.
+  inversion_SOME i; inversion_SOME res'.
+  intros. eapply wf_internal_node; eauto.
+  * eapply iinst_checker_default_succ; eauto.
+  * intros; eapply iinst_checker_path_entry; eauto.
+Qed.
+
+
+Lemma ipath_checker_default_succ (f: RTLpath.function) path: forall alive pc res,
+  ipath_checker path f (fn_path f) alive pc = Some res
+  -> nth_default_succ (fn_code f) path pc = Some (snd res).
+Proof.
+  induction path; simpl.
+  + try_simplify_someHyps.
+  + intros alive pc res.
+    inversion_SOME i; intros INST.
+    inversion_SOME res0; intros ICHK IPCHK.
+    rewrite INST.
+    erewrite iinst_checker_default_succ; eauto.
+Qed.
+
+Definition reg_option_mem (or: option reg) (alive: Regset.t) :=
+  match or with None => true | Some r => Regset.mem r alive end.
+
+Definition reg_sum_mem (ros: reg + ident) (alive: Regset.t) :=
+  match ros with inl r => Regset.mem r alive | inr s => true end.
+
+(* NB: definition following [regmap_setres] in [RTL.step] semantics *)
+Definition reg_builtin_res (res: builtin_res reg) (alive: Regset.t): Regset.t :=
+  match res with
+  | BR r => Regset.add r alive
+  | _ => alive
+  end.
+
+Fixpoint exit_list_checker (pm: path_map) (alive: Regset.t) (l: list node): bool :=
+   match l with
+   | nil => true
+   | pc::l' => exit_checker pm alive pc tt &&& exit_list_checker pm alive l'
+   end.
+
+Lemma lazy_and_Some_true A (o: option A) (b: bool): o &&& b = true <-> (exists v, o = Some v) /\ b = true.
+Proof.
+  destruct o; simpl; intuition. 
+  - eauto.
+  - firstorder. try_simplify_someHyps.
+Qed.
+
+Lemma lazy_and_Some_tt_true (o: option unit) (b: bool): o &&& b = true <-> o = Some tt /\ b = true.
+Proof.
+   intros; rewrite lazy_and_Some_true; firstorder.
+   destruct x; auto.
+Qed.
+
+
+Lemma exit_list_checker_correct pm alive l pc:
+  exit_list_checker pm alive l = true -> List.In pc l -> exit_checker pm alive pc tt = Some tt.
+Proof.
+  intros EXIT PC; induction l; intuition.
+  simpl in * |-. rewrite lazy_and_Some_tt_true in EXIT.
+  firstorder (subst; eauto).
+Qed.
+
+Local Hint Resolve exit_list_checker_correct: core.
+
+Definition final_inst_checker (pm: path_map) (alive por: Regset.t) (i: instruction): option unit :=
+   match i with
+   | Icall sig ros args res pc' =>
+      ASSERT list_mem args alive IN
+      ASSERT reg_sum_mem ros alive IN
+      exit_checker pm (Regset.add res por) pc' tt
+   | Itailcall sig ros args =>
+      ASSERT list_mem args alive IN
+      ASSERT reg_sum_mem ros alive IN
+      Some tt
+   | Ibuiltin ef args res pc' =>
+      ASSERT list_mem (params_of_builtin_args args) alive IN
+      exit_checker pm (reg_builtin_res res por) pc' tt
+   | Ijumptable arg tbl =>
+      ASSERT Regset.mem arg alive IN
+      ASSERT exit_list_checker pm por tbl IN
+      Some tt
+   | Ireturn optarg =>
+      ASSERT (reg_option_mem optarg) alive IN
+      Some tt
+   | _ => None
+   end.
+
+Lemma final_inst_checker_wellformed (c:code) pc (pm: path_map) (alive por: Regset.t) (i: instruction):
+  final_inst_checker pm alive por i = Some tt ->
+  c!pc = Some i -> wellformed_path c pm 0 pc.
+Proof.
+  intros CHECK PC. eapply wf_last_node; eauto.
+  clear c pc PC. intros pc PC.
+  destruct i; simpl in * |- *; intuition (subst; eauto);
+  try (generalize CHECK; clear CHECK; try (inversion_SOME path); repeat inversion_ASSERT; try_simplify_someHyps).
+Qed.
+
+Definition inst_checker (pm: path_map) (alive por: Regset.t) (i: instruction): option unit :=
+   match iinst_checker pm alive i with
+   | Some res =>
+      ASSERT Regset.subset por (fst res) IN
+      exit_checker pm por (snd res) tt
+   | _ => 
+      ASSERT Regset.subset por alive IN
+      final_inst_checker pm alive por i
+   end.
+
+Lemma inst_checker_wellformed (c:code) pc (pm: path_map) (alive por: Regset.t) (i: instruction):
+  inst_checker pm alive por i = Some tt ->
+  c!pc = Some i -> wellformed_path c pm 0 pc.
+Proof.
+  unfold inst_checker.
+  destruct (iinst_checker pm alive i) as [[alive0 pc0]|] eqn: CHECK1; simpl.
+  - simpl; intros CHECK2 PC. eapply wf_last_node; eauto.
+    destruct i; simpl in * |- *; intuition (subst; eauto);
+    try (generalize CHECK2 CHECK1; clear CHECK1 CHECK2; try (inversion_SOME path); repeat inversion_ASSERT; try_simplify_someHyps).
+    intros PC CHECK1 CHECK2. 
+    intros; exploit exit_checker_res; eauto.
+    intros X; inversion X. intros; subst; eauto.
+  - simpl; intros CHECK2 PC. eapply final_inst_checker_wellformed; eauto.
+    generalize CHECK2. clear CHECK2. inversion_ASSERT. try_simplify_someHyps.
+Qed.
+
+Definition path_checker (f: RTL.function) pm (pc: node) (path:path_info): option unit :=
+   SOME res <- ipath_checker (path.(psize)) f pm (path.(input_regs)) pc  IN
+   SOME i <- f.(fn_code)!(snd res) IN
+   inst_checker pm (fst res) (path.(pre_output_regs)) i.
+
+Lemma path_checker_wellformed f pm pc path:
+   path_checker f pm pc path = Some tt -> wellformed_path (f.(fn_code)) pm (path.(psize)) pc.
+Proof.
+  unfold path_checker.
+  inversion_SOME res.
+  inversion_SOME i.
+  intros; eapply ipath_checker_wellformed; eauto.
+  eapply inst_checker_wellformed; eauto.
+Qed.
+
+Fixpoint list_path_checker f pm (l:list (node*path_info)): bool :=
+  match l with
+  | nil => true
+  | (pc, path)::l' =>
+      path_checker f pm pc path &&& list_path_checker f pm l'
+  end.
+
+Lemma list_path_checker_correct f pm l: 
+  list_path_checker f pm l = true -> forall e, List.In e l -> path_checker f pm (fst e) (snd e) = Some tt.
+Proof.
+  intros CHECKER e H; induction l as [|(pc & path) l]; intuition.
+  simpl in * |- *. rewrite lazy_and_Some_tt_true in CHECKER. intuition (subst; auto).
+Qed.
+
+Definition function_checker (f: RTL.function) pm: bool := 
+  pm!(f.(fn_entrypoint)) &&& list_path_checker f pm (PTree.elements pm).
+
+Lemma function_checker_correct f pm pc path: 
+  function_checker f pm = true -> 
+  pm!pc = Some path -> 
+  path_checker f pm pc path = Some tt.
+Proof.
+  unfold function_checker; rewrite lazy_and_Some_true.
+  intros (ENTRY & PATH) PC.
+  exploit list_path_checker_correct; eauto.
+  - eapply PTree.elements_correct; eauto.
+  - simpl; auto.
+Qed.
+
+Lemma function_checker_wellformed_path_map f pm:
+  function_checker f pm = true -> wellformed_path_map f.(fn_code) pm.
+Proof.
+  unfold wellformed_path_map.
+  intros; eapply path_checker_wellformed; eauto.
+  intros; eapply function_checker_correct; eauto.
+Qed.
+
+Lemma function_checker_path_entry f pm:
+  function_checker f pm = true -> path_entry pm (f.(fn_entrypoint)).
+Proof.
+  unfold function_checker; rewrite lazy_and_Some_true;
+  unfold path_entry. firstorder congruence.
+Qed.
+
+Definition liveness_ok_function (f: function): Prop :=
+  forall pc path, f.(fn_path)!pc = Some path -> path_checker f f.(fn_path) pc path = Some tt.
+
+Program Definition transf_function (f: RTL.function): { r: res function | forall f', r = OK f' -> liveness_ok_function f' /\ f'.(fn_RTL) = f } :=
+  let pm := build_path_map f in
+  match function_checker f pm with
+  | true => OK {| fn_RTL := f; fn_path := pm |}
+  | false => Error(msg "RTLpathGen: function_checker failed")
+  end.
+Obligation 1.
+  apply function_checker_path_entry; auto.
+Qed.
+Obligation 2.
+  apply function_checker_wellformed_path_map; auto.
+Qed.
+Obligation 3.
+  unfold liveness_ok_function; simpl; intros; intuition.
+  apply function_checker_correct; auto.
+Qed.
+
+Definition transf_fundef (f: RTL.fundef) : res fundef :=
+  transf_partial_fundef (fun f => ` (transf_function f)) f.
+
+Inductive liveness_ok_fundef: fundef -> Prop :=
+  | liveness_ok_Internal f: liveness_ok_function f -> liveness_ok_fundef (Internal f)
+  | liveness_ok_External ef: liveness_ok_fundef (External ef).
+
+Lemma transf_fundef_correct f f':
+  transf_fundef f = OK f' -> (liveness_ok_fundef f') /\ fundef_RTL f' = f.
+Proof.
+  intros TRANSF; destruct f; simpl; monadInv TRANSF.
+  - destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+    destruct (H _ EQ).
+    intuition subst; auto. apply liveness_ok_Internal; auto.
+  - intuition. apply liveness_ok_External; auto.
+Qed.
+
+Definition transf_program (p: RTL.program) : res program :=
+  transform_partial_program transf_fundef p.
+
diff --git a/scheduling/RTLpathLivegenaux.ml b/scheduling/RTLpathLivegenaux.ml
new file mode 100644
index 00000000..2a20a15d
--- /dev/null
+++ b/scheduling/RTLpathLivegenaux.ml
@@ -0,0 +1,290 @@
+open RTL
+open RTLpath
+open Registers
+open Maps
+open Camlcoq
+open Datatypes
+open Kildall
+open Lattice
+open DebugPrint
+
+let get_some = function
+| None -> failwith "Got None instead of Some _"
+| Some thing -> thing
+
+let successors_inst = function
+| Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) | Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,_) -> [n1; n2]
+| Ijumptable (_,l) -> l
+| Itailcall _ | Ireturn _ -> []
+
+let predicted_successor = function
+| Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> Some n
+| Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> None
+| Icond (_,_,n1,n2,p) -> (
+    match p with
+    | Some true -> Some n1
+    | Some false -> Some n2
+    | None -> None )
+| Ijumptable _ | Itailcall _ | Ireturn _ -> None
+
+let non_predicted_successors i =
+  match predicted_successor i with
+  | None -> successors_inst i
+  | Some n -> List.filter (fun n' -> n != n') (successors_inst i)
+
+let rec list_to_regset = function
+  | [] -> Regset.empty
+  | r::l -> Regset.add r (list_to_regset l)
+
+let get_input_regs i = 
+  let empty = Regset.empty in
+  match i with
+  | Inop _ -> empty
+  | Iop (_,lr,_,_) | Iload (_,_,_,lr,_,_) | Icond (_,lr,_,_,_) -> list_to_regset lr
+  | Istore (_,_,lr,r,_) -> Regset.add r (list_to_regset lr)
+  | Icall (_, ri, lr, _, _) | Itailcall (_, ri, lr) -> begin
+      let rs = list_to_regset lr in
+      match ri with
+      | Coq_inr _ -> rs
+      | Coq_inl r -> Regset.add r rs
+    end
+  | Ibuiltin (_, lbr, _, _) -> list_to_regset @@ AST.params_of_builtin_args lbr
+  | Ijumptable (r, _) -> Regset.add r empty
+  | Ireturn opr -> (match opr with Some r -> Regset.add r empty | None -> empty)
+
+let get_output_reg i =
+  match i with
+  | Inop _ | Istore _ | Icond _ | Itailcall _ | Ijumptable _ | Ireturn _ -> None
+  | Iop (_, _, r, _) | Iload (_, _, _, _, r, _) | Icall (_, _, _, r, _) -> Some r
+  | Ibuiltin (_, _, brr, _) -> (match brr with AST.BR r -> Some r | _ -> None)
+
+(* adapted from Linearizeaux.get_join_points *)
+let get_join_points code entry =
+  let reached = ref (PTree.map (fun n i -> false) code) in
+  let reached_twice = ref (PTree.map (fun n i -> false) code) in
+  let rec traverse pc =
+    if get_some @@ PTree.get pc !reached then begin
+      if not (get_some @@ PTree.get pc !reached_twice) then
+        reached_twice := PTree.set pc true !reached_twice
+    end else begin
+      reached := PTree.set pc true !reached;
+      traverse_succs (successors_inst @@ get_some @@ PTree.get pc code)
+    end
+  and traverse_succs = function
+    | [] -> ()
+    | [pc] -> traverse pc
+    | pc :: l -> traverse pc; traverse_succs l
+  in traverse entry; !reached_twice
+
+(* Does not set the input_regs and liveouts field *)
+let get_path_map code entry join_points =
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let path_map = ref PTree.empty in
+  let rec dig_path e =
+    if (get_some @@ PTree.get e !visited) then
+      ()
+    else begin
+      visited := PTree.set e true !visited;
+      let psize = ref (-1) in
+      let path_successors = ref [] in
+      let rec dig_path_rec n : (path_info * node list) option =
+        let inst = get_some @@ PTree.get n code in
+        begin
+          psize := !psize + 1;
+          let successor = match predicted_successor inst with
+          | None -> None
+          | Some n' -> if get_some @@ PTree.get n' join_points then None else Some n'
+          in match successor with
+          | Some n' -> begin
+                path_successors := !path_successors @ non_predicted_successors inst;
+                dig_path_rec n'
+              end
+          | None -> Some ({ psize = (Camlcoq.Nat.of_int !psize);
+                            input_regs = Regset.empty; pre_output_regs = Regset.empty; output_regs = Regset.empty },
+                        !path_successors @ successors_inst inst)
+        end
+      in match dig_path_rec e with
+      | None -> ()
+      | Some ret ->
+          let (path_info, succs) = ret in
+          begin
+            path_map := PTree.set e path_info !path_map;
+            List.iter dig_path succs
+          end
+    end
+  in begin
+    dig_path entry;
+    !path_map
+  end
+
+let transfer f pc after = let open Liveness in
+  match PTree.get pc f.fn_code with
+  | Some i ->
+    (match i with
+    | Inop _ -> after
+    | Iop (_, args, res, _) ->
+        reg_list_live args (Regset.remove res after)
+    | Iload (_, _, _, args, dst, _) ->
+        reg_list_live args (Regset.remove dst after)
+    | Istore (_, _, args, src, _) ->
+        reg_list_live args (Regset.add src after)
+    | Icall (_, ros, args, res, _) ->
+        reg_list_live args (reg_sum_live ros (Regset.remove res after))
+    | Itailcall (_, ros, args) ->
+        reg_list_live args (reg_sum_live ros Regset.empty)
+    | Ibuiltin (_, args, res, _) ->
+        reg_list_live (AST.params_of_builtin_args args)
+          (reg_list_dead (AST.params_of_builtin_res res) after)
+    | Icond (_, args, _, _, _) ->
+        reg_list_live args after
+    | Ijumptable (arg, _) ->
+        Regset.add arg after
+    | Ireturn optarg ->
+        reg_option_live optarg Regset.empty)
+  | None -> Regset.empty
+
+module RegsetLat = LFSet(Regset)
+
+module DS = Backward_Dataflow_Solver(RegsetLat)(NodeSetBackward)
+
+let analyze f =
+  let liveouts = get_some @@ DS.fixpoint f.fn_code successors_instr (transfer f) in
+  PTree.map (fun n _ -> let lo = PMap.get n liveouts in transfer f n lo) f.fn_code
+
+(** OLD CODE - If needed to have our own kildall
+
+let transfer after = let open Liveness in function
+  | Inop _ -> after
+  | Iop (_, args, res, _) ->
+      reg_list_live args (Regset.remove res after)
+  | Iload (_, _, _, args, dst, _) ->
+      reg_list_live args (Regset.remove dst after)
+  | Istore (_, _, args, src, _) ->
+      reg_list_live args (Regset.add src after)
+  | Icall (_, ros, args, res, _) ->
+      reg_list_live args (reg_sum_live ros (Regset.remove res after))
+  | Itailcall (_, ros, args) ->
+      reg_list_live args (reg_sum_live ros Regset.empty)
+  | Ibuiltin (_, args, res, _) ->
+      reg_list_live (AST.params_of_builtin_args args)
+        (reg_list_dead (AST.params_of_builtin_res res) after)
+  | Icond (_, args, _, _, _) ->
+      reg_list_live args after
+  | Ijumptable (arg, _) ->
+      Regset.add arg after
+  | Ireturn optarg ->
+      reg_option_live optarg Regset.empty
+
+let get_last_nodes f =
+  let visited = ref (PTree.map (fun n i -> false) f.fn_code) in
+  let rec step n =
+    let inst = get_some @@ PTree.get n f.fn_code in
+    let successors = successors_inst inst in
+    if get_some @@ PTree.get n !visited then []
+    else begin
+
+let analyze f =
+  let liveness = ref (PTree.map (fun n i -> None) f.fn_code) in
+  let predecessors = Duplicateaux.get_predecessors_rtl f.fn_code in
+  let last_nodes = get_last_nodes f in
+  let rec step liveout n = (* liveout is the input_regs from the successor *)
+    let inst = get_some @@ PTree.get n f.fn_code in
+    let continue = ref true in
+    let alive = match get_some @@ PTree.get n !liveness with
+    | None -> transfer liveout inst
+    | Some pre_alive -> begin
+        let union = Regset.union pre_alive liveout in
+        let new_alive = transfer union inst in
+        (if Regset.equal pre_alive new_alive then continue := false);
+        new_alive
+      end
+    in begin
+      liveness := PTree.set n (Some alive) !liveness;
+      if !continue then
+        let preds = get_some @@ PTree.get n predecessors in
+        List.iter (step alive) preds
+    end
+  in begin
+    List.iter (step Regset.empty) last_nodes;
+    let liveness_noopt = PTree.map (fun n i -> get_some i) !liveness in
+    begin
+      debug_flag := true;
+      dprintf "Liveness: "; print_ptree_regset liveness_noopt; dprintf "\n";
+      debug_flag := false;
+      liveness_noopt
+    end
+  end
+*)
+
+let rec traverse code n size =
+  let inst = get_some @@ PTree.get n code in
+  if (size == 0) then (inst, n)
+  else 
+    let n' = get_some @@ predicted_successor inst in
+    traverse code n' (size-1)
+
+let get_outputs liveness f n pi =
+  let (last_instruction, pc_last) = traverse f.fn_code n (Camlcoq.Nat.to_int pi.psize) in
+  let path_last_successors = successors_inst last_instruction in
+  let list_input_regs = List.map (
+      fun n -> get_some @@ PTree.get n liveness
+    ) path_last_successors in
+  let outputs = List.fold_left Regset.union Regset.empty list_input_regs in
+  let por = match last_instruction with (* see RTLpathLivegen.final_inst_checker *)
+    | Icall (_, _, _, res, _) -> Regset.remove res outputs
+    | Ibuiltin (_, _, res, _) ->  Liveness.reg_list_dead (AST.params_of_builtin_res res) outputs
+    | Itailcall (_, _, _) | Ireturn _ ->
+       assert (outputs = Regset.empty); (* defensive check for performance *)
+       outputs
+    | _ -> outputs
+  in (por, outputs)
+
+let set_pathmap_liveness f pm =
+  let liveness = analyze f in
+  let new_pm = ref PTree.empty in
+  begin
+    debug "Liveness: "; print_ptree_regset liveness; debug "\n";
+    List.iter (fun (n, pi) ->
+      let inputs = get_some @@ PTree.get n liveness in
+      let (por, outputs) = get_outputs liveness f n pi in
+      new_pm := PTree.set n
+                  {psize=pi.psize; input_regs=inputs; pre_output_regs=por; output_regs=outputs} !new_pm
+    ) (PTree.elements pm);
+    !new_pm
+  end
+
+let print_path_info pi = begin
+  debug "(psize=%d; " (Camlcoq.Nat.to_int pi.psize);
+  debug "\ninput_regs=";
+  print_regset pi.input_regs;
+  debug "\n; pre_output_regs=";
+  print_regset pi.pre_output_regs;
+  debug "\n; output_regs=";
+  print_regset pi.output_regs;
+  debug ")\n"
+end
+
+let print_path_map path_map = begin
+  debug "[";
+  List.iter (fun (n,pi) ->
+    debug "\n\t";
+    debug "%d: " (P.to_int n);
+    print_path_info pi
+  ) (PTree.elements path_map);
+  debug "]"
+end
+
+let build_path_map f = 
+  let code = f.fn_code in
+  let entry = f.fn_entrypoint in
+  let join_points = get_join_points code entry in
+  let path_map = set_pathmap_liveness f @@ get_path_map code entry join_points in
+  begin
+    debug "Join points: ";
+    print_true_nodes join_points;
+    debug "\nPath map: ";
+    print_path_map path_map;
+    debug "\n";
+    path_map
+  end
diff --git a/scheduling/RTLpathLivegenproof.v b/scheduling/RTLpathLivegenproof.v
new file mode 100644
index 00000000..b02400bf
--- /dev/null
+++ b/scheduling/RTLpathLivegenproof.v
@@ -0,0 +1,760 @@
+(** Proofs of the liveness properties from the liveness checker of RTLpathLivengen.
+*)
+
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import Lattice.
+Require Import AST.
+Require Import Op.
+Require Import Registers.
+Require Import Globalenvs Smallstep RTL RTLpath RTLpathLivegen.
+Require Import Bool Errors Linking Values Events.
+Require Import Program.
+
+Definition match_prog (p: RTL.program) (tp: program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variables prog: RTL.program.
+Variables tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tpge := Genv.globalenv tprog.
+Let tge := Genv.globalenv (RTLpath.transf_program tprog).
+
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL).
+  apply (Genv.find_symbol_match (match_prog_RTL tprog)).
+Qed.
+
+Lemma senv_transitivity x y z: Senv.equiv x y -> Senv.equiv y z -> Senv.equiv x z.
+Proof.
+  unfold Senv.equiv. intuition congruence.
+Qed.
+
+Lemma senv_preserved: Senv.equiv ge tge.
+Proof.
+  eapply senv_transitivity. { eapply (Genv.senv_match TRANSL). }
+  eapply RTLpath.senv_preserved.
+Qed.
+
+Lemma function_ptr_preserved v f: Genv.find_funct_ptr ge v = Some f -> 
+  exists tf, Genv.find_funct_ptr tpge v = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  intros; apply (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+
+Lemma function_ptr_RTL_preserved v f: Genv.find_funct_ptr ge v = Some f -> Genv.find_funct_ptr tge v = Some f.
+Proof.
+  intros; exploit function_ptr_preserved; eauto.
+  intros (tf & Htf & TRANS).
+  exploit (Genv.find_funct_ptr_match (match_prog_RTL tprog)); eauto.
+  intros (cunit & tf0 & X & Y & DUM); subst.
+  unfold tge. rewrite X. 
+  exploit transf_fundef_correct; eauto. 
+  intuition subst; auto.
+Qed.
+
+Lemma find_function_preserved ros rs fd:
+  RTL.find_function ge ros rs = Some fd -> RTL.find_function tge ros rs = Some fd.
+Proof.
+  intros H; assert (X: exists tfd, find_function tpge ros rs = Some tfd /\ fd = fundef_RTL tfd). 
+  * destruct ros; simpl in * |- *.
+    + intros; exploit (Genv.find_funct_match TRANSL); eauto.
+      intros (cuint & tf & H1 & H2 & H3); subst; repeat econstructor; eauto.
+      exploit transf_fundef_correct; eauto. 
+      intuition auto.
+    + rewrite <- (Genv.find_symbol_match TRANSL) in H.
+    unfold tpge. destruct (Genv.find_symbol _ i); simpl; try congruence.
+    exploit function_ptr_preserved; eauto.
+    intros (tf & H1 & H2); subst; repeat econstructor; eauto.
+    exploit transf_fundef_correct; eauto. 
+    intuition auto.
+ * destruct X as (tf & X1 & X2); subst.
+   eapply find_function_RTL_match; eauto.
+Qed.
+
+
+Local Hint Resolve symbols_preserved senv_preserved: core.
+
+Lemma transf_program_RTL_correct: 
+  forward_simulation (RTL.semantics prog) (RTL.semantics (RTLpath.transf_program tprog)).
+Proof.
+  eapply forward_simulation_step with (match_states:=fun (s1 s2:RTL.state) => s1=s2); simpl; eauto.
+  - eapply senv_preserved.
+  - (* initial states *)
+    intros s1 INIT. destruct INIT as [b f m0 ge0 INIT SYMB PTR SIG]. eexists; intuition eauto.
+    econstructor; eauto.
+    + intros; eapply (Genv.init_mem_match (match_prog_RTL tprog)). apply (Genv.init_mem_match TRANSL); auto.
+    + rewrite symbols_preserved. 
+      replace (prog_main (RTLpath.transf_program tprog)) with (prog_main prog).
+      * eapply SYMB.
+      * erewrite (match_program_main (match_prog_RTL tprog)). erewrite (match_program_main TRANSL); auto.
+    + exploit function_ptr_RTL_preserved; eauto.
+  - intros; subst; auto.
+  - intros s t s2 STEP s1 H; subst.
+    eexists; intuition.
+    destruct STEP. 
+    + (* Inop *) eapply exec_Inop; eauto.
+    + (* Iop *) eapply exec_Iop; eauto.
+      erewrite eval_operation_preserved; eauto.
+    + (* Iload *) eapply exec_Iload; eauto. 
+      all: erewrite eval_addressing_preserved; eauto.
+    + (* Iload notrap1 *) eapply exec_Iload_notrap1; eauto.
+      all: erewrite eval_addressing_preserved; eauto.
+    + (* Iload notrap2 *) eapply exec_Iload_notrap2; eauto.
+      all: erewrite eval_addressing_preserved; eauto.
+    + (* Istore *) eapply exec_Istore; eauto.
+      all: erewrite eval_addressing_preserved; eauto.
+    + (* Icall *)
+        eapply RTL.exec_Icall; eauto.
+        eapply find_function_preserved; eauto.
+    + (* Itailcall *)
+        eapply RTL.exec_Itailcall; eauto.
+        eapply find_function_preserved; eauto.
+    + (* Ibuiltin *)
+      eapply RTL.exec_Ibuiltin; eauto.
+      * eapply eval_builtin_args_preserved; eauto.
+      * eapply external_call_symbols_preserved; eauto.
+    + (* Icond *)
+      eapply exec_Icond; eauto.
+    + (* Ijumptable *)
+      eapply RTL.exec_Ijumptable; eauto.
+    + (* Ireturn *)
+      eapply RTL.exec_Ireturn; eauto.
+    + (* exec_function_internal *)
+      eapply RTL.exec_function_internal; eauto.
+    + (* exec_function_external *)
+      eapply RTL.exec_function_external; eauto.
+      eapply external_call_symbols_preserved; eauto.
+    + (* exec_return *)
+      eapply RTL.exec_return; eauto.
+Qed.
+
+Theorem transf_program_correct: 
+  forward_simulation (RTL.semantics prog) (RTLpath.semantics tprog).
+Proof.
+  eapply compose_forward_simulations.
+  + eapply transf_program_RTL_correct.
+  + eapply RTLpath_complete.
+Qed.
+
+
+(* Properties used in hypothesis of [RTLpathLiveproofs.step_eqlive] theorem *)
+Theorem all_fundef_liveness_ok b f:
+  Genv.find_funct_ptr tpge b = Some f -> liveness_ok_fundef f.
+Proof.
+  unfold match_prog, match_program in TRANSL.
+  unfold Genv.find_funct_ptr, tpge; simpl; intro X.
+  destruct (Genv.find_def_match_2 TRANSL b) as [|f0 y H]; try congruence.
+  destruct y as [tf0|]; try congruence.
+  inversion X as [H1]. subst. clear X.
+  remember (@Gfun fundef unit f) as f2.
+  destruct H as [ctx' f1 f2 H0|]; try congruence.
+  inversion Heqf2 as [H2]. subst; clear Heqf2.
+  exploit transf_fundef_correct; eauto.
+  intuition.
+Qed.
+
+End PRESERVATION.
+
+Local Open Scope lazy_bool_scope.
+Local Open Scope option_monad_scope.
+
+Local Notation ext alive := (fun r => Regset.In r alive).
+
+Lemma regset_add_spec live r1 r2: Regset.In r1 (Regset.add r2 live) <-> (r1 = r2 \/ Regset.In r1 live).
+Proof.
+  destruct (Pos.eq_dec r1 r2).
+  - subst. intuition; eapply Regset.add_1; auto.
+  - intuition. 
+    * right. eapply Regset.add_3; eauto.
+    * eapply Regset.add_2; auto.
+Qed.
+
+Definition eqlive_reg (alive: Regset.elt -> Prop) (rs1 rs2: regset): Prop :=
+ forall r, (alive r) -> rs1#r = rs2#r. 
+
+Lemma eqlive_reg_refl alive rs: eqlive_reg alive rs rs.
+Proof.
+  unfold eqlive_reg; auto.
+Qed.
+
+Lemma eqlive_reg_symmetry alive rs1 rs2: eqlive_reg alive rs1 rs2 -> eqlive_reg alive rs2 rs1.
+Proof.
+  unfold eqlive_reg; intros; symmetry; auto.
+Qed.
+
+Lemma eqlive_reg_trans alive rs1 rs2 rs3: eqlive_reg alive rs1 rs2 -> eqlive_reg alive rs2 rs3 -> eqlive_reg alive rs1 rs3.
+Proof.
+  unfold eqlive_reg; intros H0 H1 r H. rewrite H0; eauto.
+Qed.
+
+Lemma eqlive_reg_update (alive: Regset.elt -> Prop) rs1 rs2 r v: eqlive_reg (fun r1 => r1 <> r /\ alive r1) rs1 rs2 -> eqlive_reg alive (rs1 # r <- v) (rs2 # r <- v).
+Proof.
+  unfold eqlive_reg; intros EQLIVE r0 ALIVE.
+  destruct (Pos.eq_dec r r0) as [H|H].
+  - subst. rewrite! Regmap.gss. auto.
+  - rewrite! Regmap.gso; auto.
+Qed.
+
+Lemma eqlive_reg_monotonic (alive1 alive2: Regset.elt -> Prop) rs1 rs2: eqlive_reg alive2 rs1 rs2 -> (forall r, alive1 r -> alive2 r) ->  eqlive_reg alive1 rs1 rs2.
+Proof.
+  unfold eqlive_reg; intuition.
+Qed.
+
+Lemma eqlive_reg_triv rs1 rs2: (forall r, rs1#r = rs2#r) <-> eqlive_reg (fun _ => True) rs1 rs2.
+Proof.
+  unfold eqlive_reg; intuition.
+Qed.
+
+Lemma eqlive_reg_triv_trans alive rs1 rs2 rs3: eqlive_reg alive rs1 rs2 -> (forall r, rs2#r = rs3#r) -> eqlive_reg alive rs1 rs3.
+Proof.
+  rewrite eqlive_reg_triv; intros; eapply eqlive_reg_trans; eauto.
+  eapply eqlive_reg_monotonic; eauto.
+  simpl; eauto.
+Qed.
+
+Local Hint Resolve Regset.mem_2 Regset.subset_2: core.
+
+Lemma lazy_and_true (b1 b2: bool): b1 &&& b2 = true <-> b1 = true /\ b2 = true.
+Proof.
+  destruct b1; simpl; intuition.
+Qed.
+
+Lemma list_mem_correct (rl: list reg) (alive: Regset.t):
+  list_mem rl alive = true -> forall r, List.In r rl -> ext alive r.
+Proof.
+  induction rl; simpl; try rewrite lazy_and_true; intuition subst; auto.
+Qed.
+
+Lemma eqlive_reg_list (alive: Regset.elt -> Prop) args rs1 rs2: eqlive_reg alive rs1 rs2 -> (forall r, List.In r args -> (alive r)) -> rs1##args = rs2##args.
+Proof.
+  induction args; simpl; auto.
+  intros EQLIVE ALIVE; rewrite IHargs; auto.
+  unfold eqlive_reg in EQLIVE.
+  rewrite EQLIVE; auto.
+Qed.
+
+Lemma eqlive_reg_listmem (alive: Regset.t) args rs1 rs2: eqlive_reg (ext alive) rs1 rs2 -> list_mem args alive = true -> rs1##args = rs2##args.
+Proof.
+  intros; eapply eqlive_reg_list; eauto.
+  intros; eapply list_mem_correct; eauto.
+Qed.
+
+Record eqlive_istate alive (st1 st2: istate): Prop :=
+   { eqlive_continue: icontinue st1 = icontinue st2;
+     eqlive_ipc: ipc st1 = ipc st2;
+     eqlive_irs: eqlive_reg alive (irs st1) (irs st2);
+     eqlive_imem: (imem st1) = (imem st2) }.
+
+Lemma iinst_checker_eqlive ge sp pm alive i res rs1 rs2 m st1: 
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  iinst_checker pm alive i = Some res -> 
+  istep ge i sp rs1 m = Some st1 -> 
+  exists st2, istep ge i sp rs2 m = Some st2 /\ eqlive_istate (ext (fst res)) st1 st2.
+Proof.
+  intros EQLIVE.
+  destruct i; simpl; try_simplify_someHyps.
+  - (* Inop *)
+    repeat (econstructor; eauto).
+  - (* Iop *)
+    inversion_ASSERT; try_simplify_someHyps.
+    inversion_SOME v. intros EVAL.
+    erewrite <- eqlive_reg_listmem; eauto.
+    try_simplify_someHyps.
+    repeat (econstructor; simpl; eauto).
+    eapply eqlive_reg_update.
+    eapply eqlive_reg_monotonic; eauto.
+    intros r0; rewrite regset_add_spec. 
+    intuition.
+  - (* Iload *)
+    inversion_ASSERT; try_simplify_someHyps.
+    destruct t.
+    inversion_SOME a0. intros EVAL.
+    erewrite <- eqlive_reg_listmem; eauto.
+    try_simplify_someHyps.
+    inversion_SOME v; try_simplify_someHyps.
+    repeat (econstructor; simpl; eauto).
+    2:
+      erewrite <- (eqlive_reg_listmem _ _ rs1 rs2); eauto;
+      destruct (eval_addressing _ _ _ _);
+      try destruct (Memory.Mem.loadv _ _ _);
+      try (intros; inv H1; repeat (econstructor; simpl; eauto)).
+    all:
+      eapply eqlive_reg_update;
+      eapply eqlive_reg_monotonic; eauto;
+      intros r0; rewrite regset_add_spec;
+      intuition.
+  - (* Istore *)
+    (repeat inversion_ASSERT); try_simplify_someHyps.
+    inversion_SOME a0. intros EVAL.
+    erewrite <- eqlive_reg_listmem; eauto.
+    rewrite <- (EQLIVE r); auto.
+    inversion_SOME v; try_simplify_someHyps.
+    try_simplify_someHyps.
+    repeat (econstructor; simpl; eauto).
+  - (* Icond *)
+    inversion_ASSERT.
+    inversion_SOME b. intros EVAL.
+    intros ARGS; erewrite <- eqlive_reg_listmem; eauto.
+    try_simplify_someHyps.
+    repeat (econstructor; simpl; eauto).
+    exploit exit_checker_res; eauto.
+    intro; subst; simpl. auto.
+Qed.
+
+Lemma iinst_checker_istep_continue ge sp pm alive i res rs m st: 
+  iinst_checker pm alive i = Some res ->
+  istep ge i sp rs m = Some st ->
+  icontinue st = true ->
+  (snd res)=(ipc st).
+Proof.
+  intros; exploit iinst_checker_default_succ; eauto.
+  erewrite istep_normal_exit; eauto.
+  congruence.
+Qed.
+
+Lemma exit_checker_eqlive A (pm: path_map) (alive: Regset.t) (pc: node) (v:A) res rs1 rs2:
+  exit_checker pm alive pc v = Some res ->  
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  exists path, pm!pc = Some path /\ eqlive_reg (ext path.(input_regs)) rs1 rs2.
+Proof.
+  unfold exit_checker.
+  inversion_SOME path.
+  inversion_ASSERT. try_simplify_someHyps.
+  repeat (econstructor; eauto).
+  intros; eapply eqlive_reg_monotonic; eauto.
+  intros; exploit Regset.subset_2; eauto.
+Qed.
+
+Lemma iinst_checker_eqlive_stopped ge sp pm alive i res rs1 rs2 m st1: 
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  istep ge i sp rs1 m = Some st1 ->
+  iinst_checker pm alive i = Some res -> 
+  icontinue st1 = false ->
+  exists path st2, pm!(ipc st1) = Some path /\ istep ge i sp rs2 m = Some st2 /\ eqlive_istate (ext path.(input_regs)) st1 st2.
+Proof.
+  intros EQLIVE.
+  set (tmp := istep ge i sp rs2).
+  destruct i; simpl; try_simplify_someHyps; repeat (inversion_ASSERT || inversion_SOME b);  try_simplify_someHyps; try congruence.
+  1-3: explore_destruct; simpl; try_simplify_someHyps; repeat (inversion_ASSERT || inversion_SOME b);  try_simplify_someHyps; try congruence.
+  (* Icond *)
+  unfold tmp; clear tmp; simpl.
+  intros EVAL; erewrite <- eqlive_reg_listmem; eauto.
+  try_simplify_someHyps.
+  destruct b eqn:EQb; simpl in * |-; try congruence.
+  intros; exploit exit_checker_eqlive; eauto.
+  intros (path & PATH & EQLIVE2).
+  repeat (econstructor; simpl; eauto).
+Qed.
+
+Lemma ipath_checker_eqlive_normal ge ps (f:function) sp pm: forall alive pc res rs1 rs2 m st1, 
+  eqlive_reg (ext alive) rs1 rs2 ->
+  ipath_checker ps f pm alive pc = Some res ->
+  isteps ge ps f sp rs1 m pc = Some st1 ->
+  icontinue st1 = true ->
+  exists st2, isteps ge ps f sp rs2 m pc = Some st2 /\ eqlive_istate (ext (fst res)) st1 st2.
+Proof.
+  induction ps as [|ps]; simpl; try_simplify_someHyps.
+  - repeat (econstructor; simpl; eauto).
+  - inversion_SOME i; try_simplify_someHyps.
+    inversion_SOME res0.
+    inversion_SOME st0.
+    intros.
+    exploit iinst_checker_eqlive; eauto.
+    destruct 1 as (st2 & ISTEP & [CONT PC RS MEM]).
+    try_simplify_someHyps.
+    rewrite <- CONT, <- MEM, <- PC.
+    destruct (icontinue st0) eqn:CONT'.
+    * intros; exploit iinst_checker_istep_continue; eauto.
+      rewrite <- PC; intros X; rewrite X in * |-. eauto.
+    * try_simplify_someHyps.
+      congruence.
+Qed.
+
+Lemma ipath_checker_isteps_continue ge ps (f:function) sp pm: forall alive pc res rs m st, 
+  ipath_checker ps f pm alive pc = Some res ->
+  isteps ge ps f sp rs m pc = Some st ->
+  icontinue st = true ->
+  (snd res)=(ipc st).
+Proof.
+  induction ps as [|ps]; simpl; try_simplify_someHyps.
+  inversion_SOME i; try_simplify_someHyps.
+  inversion_SOME res0.
+  inversion_SOME st0.
+  destruct (icontinue st0) eqn:CONT'.
+  - intros; exploit iinst_checker_istep_continue; eauto.
+    intros EQ; rewrite EQ in * |-; clear EQ; eauto.
+  - try_simplify_someHyps; congruence.
+Qed.
+
+Lemma ipath_checker_eqlive_stopped ge ps (f:function) sp pm: forall alive pc res rs1 rs2 m st1, 
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  ipath_checker ps f pm alive pc = Some res -> 
+  isteps ge ps f sp rs1 m pc = Some st1 -> 
+  icontinue st1 = false ->
+  exists path st2, pm!(ipc st1) = Some path /\ isteps ge ps f sp rs2 m pc = Some st2 /\ eqlive_istate (ext path.(input_regs)) st1 st2.
+Proof.
+  induction ps as [|ps]; simpl; try_simplify_someHyps; try congruence.
+  inversion_SOME i; try_simplify_someHyps.
+  inversion_SOME res0.
+  inversion_SOME st0.
+  intros.
+  destruct (icontinue st0) eqn:CONT'; try_simplify_someHyps; intros.
+  * intros; exploit iinst_checker_eqlive; eauto.
+    destruct 1 as (st2 & ISTEP & [CONT PC RS MEM]).
+    exploit iinst_checker_istep_continue; eauto.
+    intros PC'.
+    try_simplify_someHyps.
+    rewrite PC', <- CONT, <- MEM, <- PC, CONT'.
+    eauto.
+  * intros; exploit iinst_checker_eqlive_stopped; eauto.
+    intros EQLIVE; generalize EQLIVE; destruct 1 as (path & st2 & PATH & ISTEP & [CONT PC RS MEM]).
+    try_simplify_someHyps.
+    rewrite <- CONT, <- MEM, <- PC, CONT'.
+    try_simplify_someHyps.
+Qed.
+
+Inductive eqlive_stackframes: stackframe -> stackframe -> Prop :=
+  | eqlive_stackframes_intro path res f sp pc rs1 rs2
+      (LIVE: liveness_ok_function f)
+      (PATH: f.(fn_path)!pc = Some path)
+      (EQUIV: forall v, eqlive_reg (ext path.(input_regs)) (rs1 # res <- v) (rs2 # res <- v)):
+       eqlive_stackframes (Stackframe res f sp pc rs1) (Stackframe res f sp pc rs2). 
+
+Inductive eqlive_states: state -> state -> Prop :=
+  | eqlive_states_intro 
+      path st1 st2 f sp pc rs1 rs2 m
+      (STACKS: list_forall2 eqlive_stackframes st1 st2)
+      (LIVE: liveness_ok_function f)
+      (PATH: f.(fn_path)!pc = Some path)
+      (EQUIV: eqlive_reg (ext path.(input_regs)) rs1 rs2):
+      eqlive_states (State st1 f sp pc rs1 m) (State st2 f sp pc rs2 m)
+  | eqlive_states_call st1 st2 f args m
+      (LIVE: liveness_ok_fundef f)
+      (STACKS: list_forall2 eqlive_stackframes st1 st2):
+      eqlive_states (Callstate st1 f args m) (Callstate st2 f args m)
+  | eqlive_states_return st1 st2 v m
+      (STACKS: list_forall2 eqlive_stackframes st1 st2):
+      eqlive_states (Returnstate st1 v m) (Returnstate st2 v m).
+
+
+Section LivenessProperties.
+
+Variable prog: program.
+
+Let pge := Genv.globalenv prog.
+Let ge := Genv.globalenv (RTLpath.transf_program prog).
+
+Hypothesis all_fundef_liveness_ok: forall b f,
+  Genv.find_funct_ptr pge b = Some f -> 
+  liveness_ok_fundef f.
+
+Lemma find_funct_liveness_ok v fd:
+  Genv.find_funct pge v = Some fd -> liveness_ok_fundef fd.
+Proof.
+  unfold Genv.find_funct.
+  destruct v; try congruence.
+  destruct (Integers.Ptrofs.eq_dec _ _); try congruence.
+  eapply all_fundef_liveness_ok; eauto.
+Qed.
+
+Lemma find_function_liveness_ok ros rs f:
+  find_function pge ros rs = Some f -> liveness_ok_fundef f.
+Proof.
+  destruct ros as [r|i]; simpl.
+  - intros; eapply find_funct_liveness_ok; eauto.
+  - destruct (Genv.find_symbol pge i); try congruence.
+    eapply all_fundef_liveness_ok; eauto.
+Qed.
+
+Lemma find_function_eqlive alive ros rs1 rs2:
+  eqlive_reg (ext alive) rs1 rs2 ->
+  reg_sum_mem ros alive = true ->
+  find_function pge ros rs1 = find_function pge ros rs2.
+Proof.
+  intros EQLIVE.
+  destruct ros; simpl; auto.
+  intros H; erewrite (EQLIVE r); eauto.
+Qed.
+
+Lemma final_inst_checker_from_iinst_checker i sp rs m st pm alive por:
+  istep ge i sp rs m = Some st -> 
+  final_inst_checker pm alive por i = None.
+Proof.
+  destruct i; simpl; try congruence.
+Qed.
+
+(* is it useful ?
+Lemma inst_checker_from_iinst_checker i sp rs m st pm alive:
+  istep ge i sp rs m = Some st -> 
+  inst_checker pm alive i = (SOME res <- iinst_checker pm alive i IN exit_checker pm (fst res) (snd res) tt).
+Proof.
+  unfold inst_checker.
+  destruct (iinst_checker pm alive i); simpl; auto.
+  destruct i; simpl; try congruence.
+Qed.
+*)
+
+Lemma exit_checker_eqlive_ext1 (pm: path_map) (alive: Regset.t) (pc: node) r rs1 rs2:
+  exit_checker pm (Regset.add r alive) pc tt = Some tt ->  
+  eqlive_reg (ext alive) rs1 rs2 ->
+  exists path, pm!pc = Some path /\ (forall v, eqlive_reg (ext path.(input_regs)) (rs1 # r <- v) (rs2 # r <- v)).
+Proof.
+  unfold exit_checker.
+  inversion_SOME path.
+  inversion_ASSERT. try_simplify_someHyps.
+  repeat (econstructor; eauto).
+  intros; eapply eqlive_reg_update; eauto.
+  eapply eqlive_reg_monotonic; eauto.
+  intros r0 [X1 X2]; exploit Regset.subset_2; eauto.
+  rewrite regset_add_spec. intuition subst.
+Qed.
+
+Local Hint Resolve in_or_app: local.
+Lemma eqlive_eval_builtin_args alive rs1 rs2 sp m args vargs:
+  eqlive_reg alive rs1 rs2 ->
+  Events.eval_builtin_args ge (fun r => rs1 # r) sp m args vargs ->
+  (forall r, List.In r (params_of_builtin_args args) -> alive r) ->
+  Events.eval_builtin_args ge (fun r => rs2 # r) sp m args vargs.
+Proof.
+  unfold Events.eval_builtin_args.
+  intros EQLIVE; induction 1 as [|a1 al b1 bl EVAL1 EVALL]; simpl.
+  { econstructor; eauto. }
+  intro X. 
+  assert (X1: eqlive_reg (fun r => In r (params_of_builtin_arg a1)) rs1 rs2).
+  { eapply eqlive_reg_monotonic; eauto with local. }
+  lapply IHEVALL; eauto with local.
+  clear X IHEVALL; intro X. econstructor; eauto.
+  generalize X1; clear EVALL X1 X.
+  induction EVAL1; simpl; try (econstructor; eauto; fail).
+  - intros X1; erewrite X1; [ econstructor; eauto | eauto ].
+  - intros; econstructor.
+    + eapply IHEVAL1_1; eauto.
+      eapply eqlive_reg_monotonic; eauto.
+      simpl; intros; eauto with local.
+    + eapply IHEVAL1_2; eauto.
+      eapply eqlive_reg_monotonic; eauto.
+      simpl; intros; eauto with local.
+  - intros; econstructor.
+    + eapply IHEVAL1_1; eauto.
+      eapply eqlive_reg_monotonic; eauto.
+      simpl; intros; eauto with local.
+    + eapply IHEVAL1_2; eauto.
+      eapply eqlive_reg_monotonic; eauto.
+      simpl; intros; eauto with local.
+Qed.
+
+Lemma exit_checker_eqlive_builtin_res (pm: path_map) (alive: Regset.t) (pc: node) rs1 rs2 (res:builtin_res reg):
+  exit_checker pm (reg_builtin_res res alive) pc tt = Some tt ->
+  eqlive_reg (ext alive) rs1 rs2 ->
+  exists path, pm!pc = Some path /\ (forall vres, eqlive_reg (ext path.(input_regs)) (regmap_setres res vres rs1) (regmap_setres res vres rs2)).
+Proof.
+  destruct res; simpl.
+  - intros; exploit exit_checker_eqlive_ext1; eauto.
+  - intros; exploit exit_checker_eqlive; eauto.
+    intros (path & PATH & EQLIVE).
+    eexists; intuition eauto.
+  - intros; exploit exit_checker_eqlive; eauto.
+    intros (path & PATH & EQLIVE).
+    eexists; intuition eauto.
+Qed.
+
+Lemma exit_list_checker_eqlive (pm: path_map) (alive: Regset.t) (tbl: list node) rs1 rs2 pc: forall n,
+  exit_list_checker pm alive tbl = true ->  
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  list_nth_z tbl n = Some pc ->
+  exists path, pm!pc = Some path /\ eqlive_reg (ext path.(input_regs)) rs1 rs2.
+Proof.
+  induction tbl; simpl.
+  - intros; try congruence.
+  - intros n; rewrite lazy_and_Some_tt_true; destruct (zeq n 0) eqn: Hn.
+    * try_simplify_someHyps; intuition.
+      exploit exit_checker_eqlive; eauto.
+    * intuition. eapply IHtbl; eauto.
+Qed.
+
+Lemma final_inst_checker_eqlive (f: function) sp alive por pc i rs1 rs2 m stk1 stk2 t s1:
+  list_forall2 eqlive_stackframes stk1 stk2 ->
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  Regset.Subset por alive ->
+  liveness_ok_function f ->
+  (fn_code f) ! pc = Some i ->
+  path_last_step ge pge stk1 f sp pc rs1 m t s1 ->
+  final_inst_checker (fn_path f) alive por i = Some tt -> 
+  exists s2, path_last_step ge pge stk2 f sp pc rs2 m t s2 /\ eqlive_states s1 s2.
+Proof.
+  intros STACKS EQLIVE SUB LIVENESS PC; 
+  destruct 1 as [i' sp pc rs1 m st1|
+                 sp pc rs1 m sig ros args res pc' fd|
+                 st1 pc rs1 m sig ros args fd m'|
+                 sp pc rs1 m ef args res pc' vargs t vres m'|
+                 sp pc rs1 m arg tbl n pc' |
+                 st1 pc rs1 m optr m']; 
+  try_simplify_someHyps.
+  + (* istate *)
+    intros PC ISTEP. erewrite final_inst_checker_from_iinst_checker; eauto.
+    congruence.
+  + (* Icall *)
+    repeat inversion_ASSERT. intros.
+    exploit exit_checker_eqlive_ext1; eauto.
+    eapply eqlive_reg_monotonic; eauto.
+    intros (path & PATH & EQLIVE2).
+    eexists; split.
+    - eapply exec_Icall; eauto.
+      erewrite <- find_function_eqlive; eauto.
+    - erewrite eqlive_reg_listmem; eauto.
+      eapply eqlive_states_call; eauto.
+      eapply find_function_liveness_ok; eauto.
+      repeat (econstructor; eauto).
+  + (* Itailcall *)
+    repeat inversion_ASSERT. intros.
+    eexists; split.
+    - eapply exec_Itailcall; eauto.
+      erewrite <- find_function_eqlive; eauto.
+    - erewrite eqlive_reg_listmem; eauto.
+      eapply eqlive_states_call; eauto.
+      eapply find_function_liveness_ok; eauto.
+  + (* Ibuiltin *)
+    repeat inversion_ASSERT. intros.
+    exploit exit_checker_eqlive_builtin_res; eauto.
+    eapply eqlive_reg_monotonic; eauto.
+    intros (path & PATH & EQLIVE2).
+    eexists; split.
+    - eapply exec_Ibuiltin; eauto.
+      eapply eqlive_eval_builtin_args; eauto.
+      intros; eapply list_mem_correct; eauto.
+    - repeat (econstructor; simpl; eauto).
+  + (* Ijumptable *)
+    repeat inversion_ASSERT. intros.
+    exploit exit_list_checker_eqlive; eauto.
+    eapply eqlive_reg_monotonic; eauto.
+    intros (path & PATH & EQLIVE2).
+    eexists; split.
+    - eapply exec_Ijumptable; eauto.
+      erewrite <- EQLIVE; eauto.
+    - repeat (econstructor; simpl; eauto).
+  + (* Ireturn *)
+    repeat inversion_ASSERT. intros.
+    eexists; split.
+    - eapply exec_Ireturn; eauto.
+    - destruct optr; simpl in * |- *.
+      * erewrite (EQLIVE r); eauto.
+        eapply eqlive_states_return; eauto.
+      * eapply eqlive_states_return; eauto.
+Qed.
+
+Lemma inst_checker_eqlive (f: function) sp alive por pc i rs1 rs2 m stk1 stk2 t s1:
+  list_forall2 eqlive_stackframes stk1 stk2 ->
+  eqlive_reg (ext alive) rs1 rs2 -> 
+  liveness_ok_function f ->
+  (fn_code f) ! pc = Some i ->
+  path_last_step ge pge stk1 f sp pc rs1 m t s1 ->
+  inst_checker (fn_path f) alive por i = Some tt -> 
+  exists s2, path_last_step ge pge stk2 f sp pc rs2 m t s2 /\ eqlive_states s1 s2.
+Proof.
+  unfold inst_checker;
+  intros STACKS EQLIVE LIVENESS PC.
+  destruct (iinst_checker (fn_path f) alive i) as [res|] eqn: IICHECKER.
+  + destruct 1 as [i' sp pc rs1 m st1| | | | | ];
+    try_simplify_someHyps.
+    intros IICHECKER PC ISTEP. inversion_ASSERT.
+    intros.
+    destruct (icontinue st1) eqn: CONT.
+    - (* CONT => true *)
+      exploit iinst_checker_eqlive; eauto.
+      destruct 1 as (st2 & ISTEP2 & [CONT' PC2 RS MEM]).
+      repeat (econstructor; simpl; eauto).
+      rewrite <- MEM, <- PC2.
+      apply Regset.subset_2 in H.
+      exploit exit_checker_eqlive; eauto.
+      eapply eqlive_reg_monotonic; eauto.
+      intros (path & PATH & EQLIVE2).
+      eapply eqlive_states_intro; eauto.
+      erewrite <- iinst_checker_istep_continue; eauto.
+    - (* CONT => false *)
+      intros; exploit iinst_checker_eqlive_stopped; eauto.
+      destruct 1 as (path & st2 & PATH & ISTEP2 & [CONT2 PC2 RS MEM]).
+      repeat (econstructor; simpl; eauto).
+      rewrite <- MEM, <- PC2.
+      eapply eqlive_states_intro; eauto.
+  + inversion_ASSERT.
+    intros; exploit final_inst_checker_eqlive; eauto.
+Qed.
+
+Lemma path_step_eqlive path stk1 f sp rs1 m pc t s1 stk2 rs2:
+  path_step ge pge (psize path) stk1 f sp rs1 m pc t s1 ->
+  list_forall2 eqlive_stackframes stk1 stk2 ->
+  eqlive_reg (ext (input_regs path)) rs1 rs2 ->
+  liveness_ok_function f ->
+  (fn_path f) ! pc = Some path ->
+   exists s2, path_step ge pge (psize path) stk2 f sp rs2 m pc t s2 /\ eqlive_states s1 s2.
+Proof.
+  intros STEP STACKS EQLIVE LIVE PC.
+  unfold liveness_ok_function in LIVE.
+  exploit LIVE; eauto.
+  unfold path_checker.
+  inversion_SOME res; (* destruct res as [alive pc']. *) intros ICHECK. (* simpl. *)
+  inversion_SOME i; intros PC'.
+  destruct STEP as [st ISTEPS CONT|].
+  - (* early_exit *)
+    intros; exploit ipath_checker_eqlive_stopped; eauto.
+    destruct 1 as (path2 & st2 & PATH & ISTEP2 & [CONT2 PC2 RS MEM]).
+    repeat (econstructor; simpl; eauto).
+    rewrite <- MEM, <- PC2.
+    eapply eqlive_states_intro; eauto.
+  - (* normal_exit *)
+    intros; exploit ipath_checker_eqlive_normal; eauto.
+    destruct 1 as (st2 & ISTEP2 & [CONT' PC2 RS MEM]).
+    exploit ipath_checker_isteps_continue; eauto.
+    intros PC3; rewrite <- PC3, <- PC2 in * |-.
+    exploit inst_checker_eqlive; eauto.
+    intros (s2 & LAST_STEP & EQLIVE2).
+     eexists; split; eauto.
+     eapply exec_normal_exit; eauto.
+     rewrite <- PC3, <- MEM; auto.
+Qed.
+
+Theorem step_eqlive t s1 s1' s2: 
+  step ge pge s1 t s1' ->
+  eqlive_states s1 s2 ->
+  exists s2', step ge pge s2 t s2' /\ eqlive_states s1' s2'.
+Proof.
+  destruct 1 as [path stack f sp rs m pc t s PATH STEP | | | ].
+  - intros EQLIVE; inv EQLIVE; simplify_someHyps. 
+    intro PATH.
+    exploit path_step_eqlive; eauto.
+    intros (s2 & STEP2 & EQUIV2). 
+    eexists; split; eauto.
+    eapply exec_path; eauto.
+  - intros EQLIVE; inv EQLIVE; inv LIVE.
+    exploit initialize_path. { eapply fn_entry_point_wf. }
+    intros (path & Hpath).
+    eexists; split.
+    * eapply exec_function_internal; eauto.
+    * eapply eqlive_states_intro; eauto.
+      eapply eqlive_reg_refl.
+  - intros EQLIVE; inv EQLIVE.
+    eexists; split.
+    * eapply exec_function_external; eauto.
+    * eapply eqlive_states_return; eauto.
+  - intros EQLIVE; inv EQLIVE.
+    inversion STACKS as [|s1 st1 s' s2 STACK STACKS']; subst; clear STACKS.
+    inv STACK.
+    exists (State s2 f sp pc (rs2 # res <- vres) m); split.
+    * apply exec_return.
+    * eapply eqlive_states_intro; eauto.
+Qed.
+
+End LivenessProperties.
diff --git a/scheduling/RTLpathSE_impl.v b/scheduling/RTLpathSE_impl.v
new file mode 100644
index 00000000..e21d7cd1
--- /dev/null
+++ b/scheduling/RTLpathSE_impl.v
@@ -0,0 +1,1664 @@
+(** Implementation and refinement of the symbolic execution *)
+
+Require Import Coqlib Maps Floats.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL RTLpath.
+Require Import Errors.
+Require Import RTLpathSE_theory RTLpathLivegenproof.
+Require Import Axioms RTLpathSE_simu_specs.
+Require Import RTLpathSE_simplify.
+
+Local Open Scope error_monad_scope.
+Local Open Scope option_monad_scope.
+
+Require Import Impure.ImpHCons.
+Import Notations.
+Import HConsing.
+
+Local Open Scope impure.
+Local Open Scope hse.
+
+Import ListNotations.
+Local Open Scope list_scope.
+
+Definition XDEBUG {A} (x:A) (k: A -> ?? pstring): ?? unit := RET tt. (* TO REMOVE DEBUG INFO *)
+(*Definition XDEBUG {A} (x:A) (k: A -> ?? pstring): ?? unit := DO s <~ k x;; println ("DEBUG simu_check:" +; s). (* TO INSERT DEBUG INFO *)*)
+
+Definition DEBUG (s: pstring): ?? unit := XDEBUG tt (fun _ => RET s).
+
+(** * Implementation of Data-structure use in Hash-consing *)
+
+Definition hsval_get_hid (hsv: hsval): hashcode :=
+  match hsv with
+  | HSinput _ hid => hid
+  | HSop _ _ hid => hid
+  | HSload _ _ _ _ _ hid => hid
+  end.
+
+Definition list_hsval_get_hid (lhsv: list_hsval): hashcode :=
+  match lhsv with
+  | HSnil hid => hid
+  | HScons _ _ hid => hid
+  end.
+
+Definition hsmem_get_hid (hsm: hsmem): hashcode :=
+  match hsm with
+  | HSinit hid => hid
+  | HSstore _ _ _ _ _ hid => hid
+  end.
+
+Definition hsval_set_hid (hsv: hsval) (hid: hashcode): hsval :=
+  match hsv with
+  | HSinput r _ => HSinput r hid
+  | HSop o lhsv _ => HSop o lhsv hid
+  | HSload hsm trap chunk addr lhsv _ => HSload hsm trap chunk addr lhsv hid
+  end.
+
+Definition list_hsval_set_hid (lhsv: list_hsval) (hid: hashcode): list_hsval :=
+  match lhsv with
+  | HSnil _ => HSnil hid
+  | HScons hsv lhsv _ => HScons hsv lhsv hid
+  end.
+
+Definition hsmem_set_hid (hsm: hsmem) (hid: hashcode): hsmem :=
+  match hsm with
+  | HSinit _ => HSinit hid
+  | HSstore hsm chunk addr lhsv srce _ => HSstore hsm chunk addr lhsv srce hid
+  end.
+
+
+Lemma hsval_set_hid_correct x y ge sp rs0 m0:
+  hsval_set_hid x unknown_hid = hsval_set_hid y unknown_hid ->
+  seval_hsval ge sp x rs0 m0 = seval_hsval ge sp y rs0 m0.
+Proof.
+  destruct x, y; intro H; inversion H; subst; simpl; auto.
+Qed.
+Local Hint Resolve hsval_set_hid_correct: core.
+
+Lemma list_hsval_set_hid_correct x y ge sp rs0 m0:
+  list_hsval_set_hid x unknown_hid = list_hsval_set_hid y unknown_hid ->
+  seval_list_hsval ge sp x rs0 m0 = seval_list_hsval ge sp y rs0 m0.
+Proof.
+  destruct x, y; intro H; inversion H; subst; simpl; auto.
+Qed.
+Local Hint Resolve list_hsval_set_hid_correct: core.
+
+Lemma hsmem_set_hid_correct x y ge sp rs0 m0:
+  hsmem_set_hid x unknown_hid = hsmem_set_hid y unknown_hid ->
+  seval_hsmem ge sp x rs0 m0 = seval_hsmem ge sp y rs0 m0.
+Proof.
+  destruct x, y; intro H; inversion H; subst; simpl; auto.
+Qed.
+Local Hint Resolve hsmem_set_hid_correct: core.
+
+(** Now, we build the hash-Cons value from a "hash_eq".
+
+  Informal specification: 
+    [hash_eq] must be consistent with the "hashed" constructors defined above.
+
+  We expect that hashinfo values in the code of these "hashed" constructors verify:
+    (hash_eq (hdata x) (hdata y) ~> true) <-> (hcodes x)=(hcodes y)
+*)
+
+
+Definition hsval_hash_eq (sv1 sv2: hsval): ?? bool :=
+  match sv1, sv2 with
+  | HSinput r1 _, HSinput r2 _ => struct_eq r1 r2 (* NB: really need a struct_eq here ? *)
+  | HSop op1 lsv1 _, HSop op2 lsv2 _  =>
+     DO b1 <~ phys_eq lsv1 lsv2;;
+     if b1
+     then struct_eq op1 op2 (* NB: really need a struct_eq here ? *)
+     else RET false
+  | HSload sm1 trap1 chk1 addr1 lsv1 _, HSload sm2 trap2 chk2 addr2 lsv2 _ =>
+     DO b1 <~ phys_eq lsv1 lsv2;;
+     DO b2 <~ phys_eq sm1 sm2;;
+     DO b3 <~ struct_eq trap1 trap2;;
+     DO b4 <~ struct_eq chk1 chk2;;
+     if b1 && b2 && b3 && b4
+     then struct_eq addr1 addr2
+     else RET false
+  | _,_ => RET false
+  end.
+
+
+Lemma and_true_split a b: a && b = true <-> a = true /\ b = true.
+Proof.
+  destruct a; simpl; intuition.
+Qed.
+
+Lemma hsval_hash_eq_correct x y:
+  WHEN hsval_hash_eq x y ~> b THEN 
+   b = true -> hsval_set_hid x unknown_hid = hsval_set_hid y unknown_hid.
+Proof.
+  destruct x, y; wlp_simplify; try (rewrite !and_true_split in *); intuition; subst; try congruence.
+Qed.
+Global Opaque hsval_hash_eq.
+Local Hint Resolve hsval_hash_eq_correct: wlp.
+
+Definition list_hsval_hash_eq (lsv1 lsv2: list_hsval): ?? bool :=
+  match lsv1, lsv2 with
+  | HSnil _, HSnil _ => RET true
+  | HScons sv1 lsv1' _, HScons sv2 lsv2' _  =>
+     DO b <~ phys_eq lsv1' lsv2';;
+     if b 
+     then phys_eq sv1 sv2
+     else RET false
+  | _,_ => RET false
+  end.
+
+Lemma list_hsval_hash_eq_correct x y:
+  WHEN list_hsval_hash_eq x y ~> b THEN 
+   b = true -> list_hsval_set_hid x unknown_hid = list_hsval_set_hid y unknown_hid.
+Proof.
+  destruct x, y; wlp_simplify; try (rewrite !and_true_split in *); intuition; subst; try congruence.
+Qed.
+Global Opaque list_hsval_hash_eq.
+Local Hint Resolve list_hsval_hash_eq_correct: wlp.
+
+Definition hsmem_hash_eq (sm1 sm2: hsmem): ?? bool :=
+  match sm1, sm2 with
+  | HSinit _, HSinit _ => RET true
+  | HSstore sm1 chk1 addr1 lsv1 sv1 _, HSstore sm2 chk2 addr2 lsv2 sv2 _ =>
+     DO b1 <~ phys_eq lsv1 lsv2;;
+     DO b2 <~ phys_eq sm1 sm2;;
+     DO b3 <~ phys_eq sv1 sv2;;
+     DO b4 <~ struct_eq chk1 chk2;;
+     if b1 && b2 && b3 && b4
+     then struct_eq addr1 addr2
+     else RET false
+  | _,_ => RET false
+  end.
+
+Lemma hsmem_hash_eq_correct x y:
+  WHEN hsmem_hash_eq x y ~> b THEN 
+   b = true -> hsmem_set_hid x unknown_hid = hsmem_set_hid y unknown_hid.
+Proof.
+  destruct x, y; wlp_simplify; try (rewrite !and_true_split in *); intuition; subst; try congruence.
+Qed.
+Global Opaque hsmem_hash_eq.
+Local Hint Resolve hsmem_hash_eq_correct: wlp.
+
+
+Definition hSVAL: hashP hsval := {| hash_eq := hsval_hash_eq; get_hid:=hsval_get_hid; set_hid:=hsval_set_hid |}. 
+Definition hLSVAL: hashP list_hsval := {| hash_eq := list_hsval_hash_eq; get_hid:= list_hsval_get_hid; set_hid:= list_hsval_set_hid |}.
+Definition hSMEM: hashP hsmem := {| hash_eq := hsmem_hash_eq; get_hid:= hsmem_get_hid; set_hid:= hsmem_set_hid |}.
+
+Program Definition mk_hash_params: Dict.hash_params hsval :=
+ {|
+    Dict.test_eq := phys_eq;
+    Dict.hashing := fun (ht: hsval) => RET (hsval_get_hid ht);
+    Dict.log := fun hv =>
+         DO hv_name <~ string_of_hashcode (hsval_get_hid hv);;
+         println ("unexpected undef behavior of hashcode:" +; (CamlStr hv_name)) |}.
+Obligation 1.
+  wlp_simplify.
+Qed.
+
+(** ** various auxiliary (trivial lemmas) *)
+Lemma hsilocal_refines_sreg ge sp rs0 m0 hst st:
+  hsilocal_refines ge sp rs0 m0 hst st -> hsok_local ge sp rs0 m0 hst -> forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (si_sreg st r) rs0 m0.
+Proof.
+  unfold hsilocal_refines; intuition.
+Qed.
+Local Hint Resolve hsilocal_refines_sreg: core.
+
+Lemma hsilocal_refines_valid_pointer ge sp rs0 m0 hst st:
+  hsilocal_refines ge sp rs0 m0 hst st -> forall m b ofs, seval_smem ge sp st.(si_smem) rs0 m0 = Some m -> Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs.
+Proof.
+  unfold hsilocal_refines; intuition.
+Qed.
+Local Hint Resolve hsilocal_refines_valid_pointer: core.
+
+Lemma hsilocal_refines_smem_refines ge sp rs0 m0 hst st:
+  hsilocal_refines ge sp rs0 m0 hst st -> hsok_local ge sp rs0 m0 hst -> smem_refines ge sp rs0 m0 (hsi_smem hst) (st.(si_smem)).
+Proof.
+  unfold hsilocal_refines; intuition.
+Qed.
+Local Hint Resolve hsilocal_refines_smem_refines: core.
+
+Lemma hsistate_refines_dyn_exits ge sp rs0 m0 hst st:
+  hsistate_refines_dyn ge sp rs0 m0 hst st -> hsiexits_refines_dyn ge sp rs0 m0 (hsi_exits hst) (si_exits st).
+Proof.
+  unfold hsistate_refines_dyn; intuition.
+Qed.
+Local Hint Resolve hsistate_refines_dyn_exits: core.
+
+Lemma hsistate_refines_dyn_local ge sp rs0 m0 hst st:
+  hsistate_refines_dyn ge sp rs0 m0 hst st -> hsilocal_refines ge sp rs0 m0 (hsi_local hst) (si_local st).
+Proof.
+  unfold hsistate_refines_dyn; intuition.
+Qed.
+Local Hint Resolve hsistate_refines_dyn_local: core.
+
+Lemma hsistate_refines_dyn_nested ge sp rs0 m0 hst st:
+  hsistate_refines_dyn ge sp rs0 m0 hst st -> nested_sok ge sp rs0 m0 (si_local st) (si_exits st).
+Proof.
+  unfold hsistate_refines_dyn; intuition.
+Qed.
+Local Hint Resolve hsistate_refines_dyn_nested: core.
+
+(** * Implementation of symbolic execution *)
+Section CanonBuilding.
+
+Variable hC_hsval: hashinfo hsval -> ?? hsval.
+
+Hypothesis hC_hsval_correct: forall hs,
+  WHEN hC_hsval hs ~> hs' THEN forall ge sp rs0 m0,
+    seval_hsval ge sp (hdata hs) rs0 m0 = seval_hsval ge sp hs' rs0 m0.
+
+Variable hC_list_hsval: hashinfo list_hsval -> ?? list_hsval.
+Hypothesis hC_list_hsval_correct: forall lh,
+  WHEN hC_list_hsval lh ~> lh' THEN forall ge sp rs0 m0,
+    seval_list_hsval ge sp (hdata lh) rs0 m0 = seval_list_hsval ge sp lh' rs0 m0.
+
+Variable hC_hsmem: hashinfo hsmem -> ?? hsmem.
+Hypothesis hC_hsmem_correct: forall hm,
+  WHEN hC_hsmem hm ~> hm' THEN forall ge sp rs0 m0,
+    seval_hsmem ge sp (hdata hm) rs0 m0 = seval_hsmem ge sp hm' rs0 m0.
+
+(* First, we wrap constructors for hashed values !*)
+
+Definition reg_hcode := 1.
+Definition op_hcode := 2.
+Definition load_hcode := 3.
+
+Definition hSinput_hcodes (r: reg) :=
+   DO hc <~ hash reg_hcode;;
+   DO hv <~ hash r;;
+   RET [hc;hv].
+Extraction Inline hSinput_hcodes.
+
+Definition hSinput (r:reg): ?? hsval :=
+   DO hv <~ hSinput_hcodes r;;
+   hC_hsval {| hdata:=HSinput r unknown_hid; hcodes :=hv; |}.
+
+Lemma hSinput_correct r:
+  WHEN hSinput r ~> hv THEN forall ge sp rs0 m0,
+    sval_refines ge sp rs0 m0 hv (Sinput r).
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque hSinput.
+Local Hint Resolve hSinput_correct: wlp.
+
+Definition hSop_hcodes (op:operation) (lhsv: list_hsval) :=
+   DO hc <~ hash op_hcode;;
+   DO hv <~ hash op;;
+   RET [hc;hv;list_hsval_get_hid lhsv].
+Extraction Inline hSop_hcodes.
+
+Definition hSop (op:operation) (lhsv: list_hsval): ?? hsval :=
+   DO hv <~ hSop_hcodes op lhsv;;
+   hC_hsval {| hdata:=HSop op lhsv unknown_hid; hcodes :=hv |}.
+
+Lemma hSop_fSop_correct op lhsv:
+  WHEN hSop op lhsv ~> hv THEN forall ge sp rs0 m0,
+    seval_hsval ge sp hv rs0 m0 = seval_hsval ge sp (fSop op lhsv) rs0 m0.
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque hSop.
+Local Hint Resolve hSop_fSop_correct: wlp_raw.
+
+Lemma hSop_correct op lhsv:
+  WHEN hSop op lhsv ~> hv THEN forall ge sp rs0 m0 lsv sm m
+   (MEM: seval_smem ge sp sm rs0 m0 = Some m)
+   (MVALID: forall b ofs, Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+   (LR: list_sval_refines ge sp rs0 m0 lhsv lsv),
+   sval_refines ge sp rs0 m0 hv (Sop op lsv sm).
+Proof.
+  generalize fSop_correct; simpl.
+  intros X.
+  wlp_xsimplify ltac:(intuition eauto with wlp wlp_raw).
+  erewrite H, X; eauto.
+Qed.
+Local Hint Resolve hSop_correct: wlp.
+
+Definition hSload_hcodes (hsm: hsmem) (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval):=
+   DO hc <~ hash load_hcode;;
+   DO hv1 <~ hash trap;;
+   DO hv2 <~ hash chunk;;
+   DO hv3 <~ hash addr;;
+   RET [hc; hsmem_get_hid hsm; hv1; hv2; hv3; list_hsval_get_hid lhsv].
+Extraction Inline hSload_hcodes.
+
+Definition hSload (hsm: hsmem) (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval): ?? hsval :=
+   DO hv <~ hSload_hcodes hsm trap chunk addr lhsv;;
+   hC_hsval {| hdata := HSload hsm trap chunk addr lhsv unknown_hid; hcodes := hv |}.
+
+Lemma hSload_correct hsm trap chunk addr lhsv:
+  WHEN hSload hsm trap chunk addr lhsv ~> hv THEN forall ge sp rs0 m0 lsv sm
+    (LR: list_sval_refines ge sp rs0 m0 lhsv lsv)
+    (MR: smem_refines ge sp rs0 m0 hsm sm),
+    sval_refines ge sp rs0 m0 hv (Sload sm trap chunk addr lsv).
+Proof.
+  wlp_simplify.
+  rewrite <- LR, <- MR.
+  auto.
+Qed.
+Global Opaque hSload.
+Local Hint Resolve hSload_correct: wlp.
+
+Definition hSnil (_: unit): ?? list_hsval :=
+   hC_list_hsval {| hdata := HSnil unknown_hid; hcodes := nil |}.
+
+Lemma hSnil_correct:
+  WHEN hSnil() ~> hv THEN forall ge sp rs0 m0,
+    list_sval_refines ge sp rs0 m0 hv Snil.
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque hSnil.
+Local Hint Resolve hSnil_correct: wlp.
+
+Definition hScons (hsv: hsval) (lhsv: list_hsval): ?? list_hsval :=
+   hC_list_hsval {| hdata := HScons hsv lhsv unknown_hid; hcodes := [hsval_get_hid hsv; list_hsval_get_hid lhsv] |}.
+
+Lemma hScons_correct hsv lhsv:
+  WHEN hScons hsv lhsv ~> lhsv' THEN forall ge sp rs0 m0 sv lsv
+    (VR: sval_refines ge sp rs0 m0 hsv sv)
+    (LR: list_sval_refines ge sp rs0 m0 lhsv lsv),
+    list_sval_refines ge sp rs0 m0 lhsv' (Scons sv lsv).
+Proof.
+  wlp_simplify.
+  rewrite <- VR, <- LR.
+  auto.
+Qed.
+Global Opaque hScons.
+Local Hint Resolve hScons_correct: wlp.
+
+Definition hSinit (_: unit): ?? hsmem :=
+   hC_hsmem {| hdata := HSinit unknown_hid; hcodes := nil |}.
+
+Lemma hSinit_correct:
+  WHEN hSinit() ~> hm THEN forall ge sp rs0 m0,
+    smem_refines ge sp rs0 m0 hm Sinit.
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque hSinit.
+Local Hint Resolve hSinit_correct: wlp.
+
+Definition hSstore_hcodes (hsm: hsmem) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval) (srce: hsval):=
+   DO hv1 <~ hash chunk;;
+   DO hv2 <~ hash addr;;
+   RET [hsmem_get_hid hsm; hv1; hv2; list_hsval_get_hid lhsv; hsval_get_hid srce].
+Extraction Inline hSstore_hcodes.
+
+Definition hSstore (hsm: hsmem) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval) (srce: hsval): ?? hsmem :=
+   DO hv <~ hSstore_hcodes hsm chunk addr lhsv srce;;
+   hC_hsmem {| hdata := HSstore hsm chunk addr lhsv srce unknown_hid; hcodes := hv |}.
+
+Lemma hSstore_correct hsm chunk addr lhsv hsv:
+  WHEN hSstore hsm chunk addr lhsv hsv ~> hsm' THEN forall ge sp rs0 m0 lsv sm sv
+    (LR: list_sval_refines ge sp rs0 m0 lhsv lsv)
+    (MR: smem_refines ge sp rs0 m0 hsm sm)
+    (VR: sval_refines ge sp rs0 m0 hsv sv),
+    smem_refines ge sp rs0 m0 hsm' (Sstore sm chunk addr lsv sv).
+Proof.
+  wlp_simplify.
+  rewrite <- LR, <- MR, <- VR.
+  auto.
+Qed.
+Global Opaque hSstore.
+Local Hint Resolve hSstore_correct: wlp.
+
+Definition hsi_sreg_get (hst: PTree.t hsval) r: ?? hsval :=
+   match PTree.get r hst with 
+   | None => hSinput r
+   | Some sv => RET sv
+   end.
+
+Lemma hsi_sreg_get_correct hst r:
+  WHEN hsi_sreg_get hst r ~> hsv THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    sval_refines ge sp rs0 m0 hsv (f r).
+Proof.
+  unfold hsi_sreg_eval, hsi_sreg_proj; wlp_simplify; rewrite <- RR; try_simplify_someHyps.
+Qed.
+Global Opaque hsi_sreg_get.
+Local Hint Resolve hsi_sreg_get_correct: wlp.
+
+Fixpoint hlist_args (hst: PTree.t hsval) (l: list reg): ?? list_hsval :=
+  match l with
+  | nil => hSnil()
+  | r::l =>
+    DO v <~ hsi_sreg_get hst r;;
+    DO lhsv <~ hlist_args hst l;;
+    hScons v lhsv
+  end.
+
+Lemma hlist_args_correct hst l:
+  WHEN hlist_args hst l ~> lhsv THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    list_sval_refines ge sp rs0 m0 lhsv (list_sval_inj (List.map f l)).
+Proof.
+  induction l; wlp_simplify.
+Qed.
+Global Opaque hlist_args.
+Local Hint Resolve hlist_args_correct: wlp.
+
+(** Convert a "fake" hash-consed term into a "real" hash-consed term *)
+
+Fixpoint fsval_proj hsv: ?? hsval :=
+  match hsv with
+  | HSinput r hc => 
+    DO b <~ phys_eq hc unknown_hid;;
+    if b 
+    then hSinput r (* was not yet really hash-consed *)
+    else RET hsv (* already hash-consed *)
+  | HSop op hl hc => 
+    DO b <~ phys_eq hc unknown_hid;;
+    if b 
+    then (* was not yet really hash-consed *) 
+      DO hl' <~ fsval_list_proj hl;;
+      hSop op hl'
+    else RET hsv (* already hash-consed *)
+  | HSload hm t chk addr hl _ => RET hsv (* FIXME ? *)
+  end
+with fsval_list_proj hsl: ?? list_hsval :=
+  match hsl with
+  | HSnil hc => 
+    DO b <~ phys_eq hc unknown_hid;;
+    if b 
+    then hSnil() (* was not yet really hash-consed *)
+    else RET hsl (* already hash-consed *)
+  | HScons hv hl hc => 
+    DO b <~ phys_eq hc unknown_hid;;
+    if b 
+    then (* was not yet really hash-consed *)
+      DO hv' <~ fsval_proj hv;;
+      DO hl' <~ fsval_list_proj hl;;
+      hScons hv' hl' 
+    else RET hsl (* already hash-consed *)
+  end.
+
+Lemma fsval_proj_correct hsv:
+  WHEN fsval_proj hsv ~> hsv' THEN forall ge sp rs0 m0,
+  seval_hsval ge sp hsv rs0 m0 = seval_hsval ge sp hsv' rs0 m0.
+Proof.
+ induction hsv using hsval_mut 
+ with (P0 := fun lhsv => 
+       WHEN fsval_list_proj lhsv ~> lhsv' THEN forall ge sp rs0 m0,
+         seval_list_hsval ge sp lhsv rs0 m0 = seval_list_hsval ge sp lhsv' rs0 m0)
+       (P1 := fun sm => True); try (wlp_simplify; tauto).
+ - wlp_xsimplify ltac:(intuition eauto with wlp_raw wlp).
+   rewrite H, H0; auto.
+ - wlp_simplify; erewrite H0, H1; eauto.
+Qed.
+Global Opaque fsval_proj.
+Local Hint Resolve fsval_proj_correct: wlp.
+
+Lemma fsval_list_proj_correct lhsv:
+  WHEN fsval_list_proj lhsv ~> lhsv' THEN forall ge sp rs0 m0,
+  seval_list_hsval ge sp lhsv rs0 m0 = seval_list_hsval ge sp lhsv' rs0 m0.
+Proof.
+  induction lhsv; wlp_simplify.
+  erewrite H0, H1; eauto.
+Qed.
+Global Opaque fsval_list_proj.
+Local Hint Resolve fsval_list_proj_correct: wlp.
+
+
+(** ** Assignment of memory *)
+Definition hslocal_set_smem (hst:hsistate_local) hm :=
+  {| hsi_smem := hm;
+     hsi_ok_lsval := hsi_ok_lsval hst;
+     hsi_sreg:= hsi_sreg hst
+  |}.
+
+Lemma sok_local_set_mem ge sp rs0 m0 st sm:
+  sok_local ge sp rs0 m0 (slocal_set_smem st sm)
+  <-> (sok_local ge sp rs0 m0 st /\ seval_smem ge sp sm rs0 m0 <> None).
+Proof.
+  unfold slocal_set_smem, sok_local; simpl; intuition (subst; eauto).
+Qed.
+
+Lemma hsok_local_set_mem ge sp rs0 m0 hst hsm:
+  (seval_hsmem ge sp (hsi_smem hst) rs0 m0 = None -> seval_hsmem ge sp hsm rs0 m0 = None) ->
+  hsok_local ge sp rs0 m0 (hslocal_set_smem hst hsm)
+  <-> (hsok_local ge sp rs0 m0 hst /\ seval_hsmem ge sp hsm rs0 m0 <> None).
+Proof.
+  unfold hslocal_set_smem, hsok_local; simpl; intuition.
+Qed.
+
+Lemma hslocal_set_mem_correct ge sp rs0 m0 hst st hsm sm:
+  (seval_hsmem ge sp (hsi_smem hst) rs0 m0 = None -> seval_hsmem ge sp hsm rs0 m0 = None) ->
+  (forall m b ofs, seval_smem ge sp sm rs0 m0 = Some m -> Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs) ->
+  hsilocal_refines ge sp rs0 m0 hst st ->
+  (hsok_local ge sp rs0 m0 hst -> smem_refines ge sp rs0 m0 hsm sm) ->
+  hsilocal_refines ge sp rs0 m0 (hslocal_set_smem hst hsm) (slocal_set_smem st sm).
+Proof.
+  intros PRESERV SMVALID (OKEQ & SMEMEQ' & REGEQ & MVALID) SMEMEQ.
+  split; rewrite! hsok_local_set_mem; simpl; eauto; try tauto.
+  rewrite sok_local_set_mem.
+  intuition congruence.
+Qed.
+
+Definition hslocal_store (hst: hsistate_local) chunk addr args src: ?? hsistate_local :=
+   let pt := hst.(hsi_sreg) in
+   DO hargs <~ hlist_args pt args;;
+   DO hsrc <~ hsi_sreg_get pt src;;
+   DO hm <~ hSstore hst chunk addr hargs hsrc;;
+   RET (hslocal_set_smem hst hm).
+
+Lemma hslocal_store_correct hst chunk addr args src:
+  WHEN hslocal_store hst chunk addr args src ~> hst' THEN forall ge sp rs0 m0 st
+    (REF: hsilocal_refines ge sp rs0 m0 hst st),
+    hsilocal_refines ge sp rs0 m0 hst' (slocal_store st chunk addr args src).
+Proof.
+  wlp_simplify.
+  eapply hslocal_set_mem_correct; simpl; eauto.
+  + intros X; erewrite H1; eauto.
+    rewrite X. simplify_SOME z.
+  + unfold hsilocal_refines in *; 
+    simplify_SOME z; intuition. 
+    erewrite <- Mem.storev_preserv_valid; [| eauto].
+    eauto.
+  + unfold hsilocal_refines in *; intuition eauto.
+Qed.
+Global Opaque hslocal_store.
+Local Hint Resolve hslocal_store_correct: wlp.
+
+(** ** Assignment of local state *)
+
+Definition hsist_set_local (hst: hsistate) (pc: node) (hnxt: hsistate_local): hsistate :=
+   {| hsi_pc := pc; hsi_exits := hst.(hsi_exits); hsi_local:= hnxt |}.
+
+Lemma hsist_set_local_correct_stat hst st pc hnxt nxt:
+  hsistate_refines_stat hst st ->
+  hsistate_refines_stat (hsist_set_local hst pc hnxt) (sist_set_local st pc nxt).
+Proof.
+  unfold hsistate_refines_stat; simpl; intuition.
+Qed.
+
+Lemma hsist_set_local_correct_dyn ge sp rs0 m0 hst st pc hnxt nxt:
+  hsistate_refines_dyn ge sp rs0 m0 hst st ->
+  hsilocal_refines ge sp rs0 m0 hnxt nxt ->
+  (sok_local ge sp rs0 m0 nxt -> sok_local ge sp rs0 m0 (si_local st)) ->
+  hsistate_refines_dyn ge sp rs0 m0 (hsist_set_local hst pc hnxt) (sist_set_local st pc nxt).
+Proof.
+  unfold hsistate_refines_dyn; simpl.
+  intros (EREF & LREF & NESTED) LREFN SOK; intuition.
+  destruct NESTED as [|st0 se lse TOP NEST]; econstructor; simpl; auto.
+Qed.
+
+(** ** Assignment of registers *)
+
+(** locally new symbolic values during symbolic execution *)
+Inductive root_sval: Type :=
+| Rop (op: operation)
+| Rload (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing)
+.
+
+Definition root_apply (rsv: root_sval) (lr: list reg) (st: sistate_local): sval :=
+  let lsv := list_sval_inj (List.map (si_sreg st) lr) in
+  let sm := si_smem st in
+  match rsv with
+  | Rop op => Sop op lsv sm
+  | Rload trap chunk addr => Sload sm trap chunk addr lsv
+  end.
+Coercion root_apply: root_sval >-> Funclass.
+
+Definition root_happly (rsv: root_sval) (lr: list reg) (hst: hsistate_local) : ?? hsval :=
+  DO lhsv <~ hlist_args hst lr;;
+  match rsv with
+  | Rop op => hSop op lhsv
+  | Rload trap chunk addr => hSload hst trap chunk addr lhsv
+  end.
+
+Lemma root_happly_correct (rsv: root_sval) lr hst:
+  WHEN root_happly rsv lr hst ~> hv' THEN forall ge sp rs0 m0 st
+    (REF:hsilocal_refines ge sp rs0 m0 hst st)
+    (OK:hsok_local ge sp rs0 m0 hst),
+    sval_refines ge sp rs0 m0 hv' (rsv lr st).
+Proof.
+   unfold hsilocal_refines, root_apply, root_happly; destruct rsv; wlp_simplify.
+   unfold sok_local in *.
+   generalize (H0 ge sp rs0 m0 (list_sval_inj (map (si_sreg st) lr)) (si_smem st)); clear H0.
+   destruct (seval_smem ge sp (si_smem st) rs0 m0) as [m|] eqn:X; eauto.
+   intuition congruence.
+Qed.
+Global Opaque root_happly.
+Hint Resolve root_happly_correct: wlp.
+
+Local Open Scope lazy_bool_scope.
+
+(* NB: return [false] if the rsv cannot fail *)
+Definition may_trap (rsv: root_sval) (lr: list reg): bool :=
+  match rsv with 
+  | Rop op => is_trapping_op op ||| negb (Nat.eqb (length lr) (args_of_operation op))  (* cf. lemma is_trapping_op_sound *)
+  | Rload TRAP _ _  => true
+  | _ => false
+  end.
+
+Lemma lazy_orb_negb_false (b1 b2:bool):
+  (b1 ||| negb b2) = false <-> (b1 = false /\ b2 = true).
+Proof.
+  unfold negb; explore; simpl; intuition (try congruence).
+Qed.
+
+Lemma seval_list_sval_length ge sp rs0 m0 (f: reg -> sval) (l:list reg):
+  forall l', seval_list_sval ge sp (list_sval_inj (List.map f l)) rs0 m0 = Some l' ->
+  Datatypes.length l = Datatypes.length l'.
+Proof.
+  induction l.
+  - simpl. intros. inv H. reflexivity.
+  - simpl. intros. destruct (seval_sval _ _ _ _ _); [|discriminate].
+    destruct (seval_list_sval _ _ _ _ _) eqn:SLS; [|discriminate]. inv H. simpl.
+    erewrite IHl; eauto.
+Qed.
+
+Lemma may_trap_correct (ge: RTL.genv) (sp:val) (rsv: root_sval) (rs0: regset) (m0: mem) (lr: list reg) st:
+  may_trap rsv lr = false -> 
+  seval_list_sval ge sp (list_sval_inj (List.map (si_sreg st) lr)) rs0 m0 <> None ->
+  seval_smem ge sp (si_smem st) rs0 m0 <> None ->
+  seval_sval ge sp (rsv lr st) rs0 m0 <> None.
+Proof.
+  destruct rsv; simpl; try congruence.
+  - rewrite lazy_orb_negb_false. intros (TRAP1 & TRAP2) OK1 OK2.
+    explore; try congruence.
+    eapply is_trapping_op_sound; eauto.
+    erewrite <- seval_list_sval_length; eauto.
+    apply Nat.eqb_eq in TRAP2.
+    assumption.
+  - intros X OK1 OK2.
+    explore; try congruence.
+Qed.
+
+(** simplify a symbolic value before assignment to a register *)
+Definition simplify (rsv: root_sval) (lr: list reg) (hst: hsistate_local): ?? hsval :=
+  match rsv with
+  | Rop op =>
+     match is_move_operation op lr with
+     | Some arg => hsi_sreg_get hst arg (* optimization of Omove *)
+     | None =>
+       match target_op_simplify op lr hst with
+       | Some fhv => fsval_proj fhv
+       | None =>
+         DO lhsv <~ hlist_args hst lr;;
+         hSop op lhsv
+       end
+     end
+  | Rload _ chunk addr => 
+       DO lhsv <~ hlist_args hst lr;;
+       hSload hst NOTRAP chunk addr lhsv
+  end.
+
+Lemma simplify_correct rsv lr hst:
+  WHEN simplify rsv lr hst ~> hv THEN forall ge sp rs0 m0 st
+    (REF: hsilocal_refines ge sp rs0 m0 hst st)
+    (OK0: hsok_local ge sp rs0 m0 hst)
+    (OK1: seval_sval ge sp (rsv lr st) rs0 m0 <> None),
+    sval_refines ge sp rs0 m0 hv (rsv lr st).
+Proof.
+  destruct rsv; simpl; auto.
+  - (* Rop *)
+    destruct (is_move_operation _ _) eqn: Hmove.
+    { wlp_simplify; exploit is_move_operation_correct; eauto.
+      intros (Hop & Hlsv); subst; simpl in *.
+      simplify_SOME z.
+      * erewrite H; eauto.
+      * try_simplify_someHyps; congruence.
+      * congruence. }
+    destruct (target_op_simplify _ _ _) eqn: Htarget_op_simp; wlp_simplify.
+    { destruct (seval_list_sval _ _ _) eqn: OKlist; try congruence.
+      destruct (seval_smem _ _ _ _ _) eqn: OKmem; try congruence.
+      rewrite <- H; exploit target_op_simplify_correct; eauto. }
+    clear Htarget_op_simp.
+    generalize (H0 ge sp rs0 m0 (list_sval_inj (map (si_sreg st) lr)) (si_smem st)); clear H0.
+    destruct (seval_smem ge sp (si_smem st) rs0 m0) as [m|] eqn:X; eauto.
+    intro H0; clear H0; simplify_SOME z; congruence. (* absurd case *)
+  - (* Rload *)
+    destruct trap; wlp_simplify.
+    erewrite H0; eauto.
+    erewrite H; eauto.
+    erewrite hsilocal_refines_smem_refines; eauto.
+    destruct (seval_list_sval _ _ _ _) as [args|] eqn: Hargs; try congruence.
+    destruct (eval_addressing _ _ _ _) as [a|] eqn: Ha; try congruence.
+    destruct (seval_smem _ _ _ _) as [m|] eqn: Hm; try congruence.
+    destruct (Mem.loadv _ _ _); try congruence.
+Qed.
+Global Opaque simplify.
+Local Hint Resolve simplify_correct: wlp.
+
+Definition red_PTree_set (r: reg) (hsv: hsval) (hst: PTree.t hsval): PTree.t hsval :=
+  match hsv with
+  | HSinput r' _ =>
+     if Pos.eq_dec r r' 
+     then PTree.remove r' hst
+     else PTree.set r hsv hst
+  | _ => PTree.set r hsv hst
+  end.
+
+Lemma red_PTree_set_correct (r r0:reg) hsv hst ge sp rs0 m0:
+  hsi_sreg_eval ge sp (red_PTree_set r hsv hst) r0 rs0 m0 = hsi_sreg_eval ge sp (PTree.set r hsv hst) r0 rs0 m0.
+Proof.
+  destruct hsv; simpl; auto.
+  destruct (Pos.eq_dec r r1); auto.
+  subst; unfold hsi_sreg_eval, hsi_sreg_proj.
+  destruct (Pos.eq_dec r0 r1); auto.
+  - subst; rewrite PTree.grs, PTree.gss; simpl; auto.
+  - rewrite PTree.gro, PTree.gso; simpl; auto.
+Qed.
+
+Lemma red_PTree_set_refines (r r0:reg) hsv hst sv st ge sp rs0 m0:
+ hsilocal_refines ge sp rs0 m0 hst st ->
+ sval_refines ge sp rs0 m0 hsv sv ->
+ hsok_local ge sp rs0 m0 hst ->
+ hsi_sreg_eval ge sp (red_PTree_set r hsv hst) r0 rs0 m0 = seval_sval ge sp (if Pos.eq_dec r r0 then sv else si_sreg st r0) rs0 m0.
+Proof.
+  intros; rewrite red_PTree_set_correct.
+  exploit hsilocal_refines_sreg; eauto.
+  unfold hsi_sreg_eval, hsi_sreg_proj.
+  destruct (Pos.eq_dec r r0); auto.
+  - subst. rewrite PTree.gss; simpl; auto.
+  - rewrite PTree.gso; simpl; eauto.
+Qed.
+
+Lemma sok_local_set_sreg (rsv:root_sval) ge sp rs0 m0 st r lr:
+  sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st))
+  <-> (sok_local ge sp rs0 m0 st /\ seval_sval ge sp (rsv lr st) rs0 m0 <> None).
+Proof.
+  unfold slocal_set_sreg, sok_local; simpl; split.
+  + intros ((SVAL0 & PRE) & SMEM & SVAL).
+    repeat (split; try tauto).
+    - intros r0; generalize (SVAL r0); clear SVAL; destruct (Pos.eq_dec r r0); try congruence.
+    - generalize (SVAL r); clear SVAL; destruct (Pos.eq_dec r r); try congruence.
+  + intros ((PRE & SMEM & SVAL0) & SVAL).
+    repeat (split; try tauto; eauto).
+    intros r0;  destruct (Pos.eq_dec r r0); try congruence.
+Qed.
+
+Definition hslocal_set_sreg (hst: hsistate_local) (r: reg) (rsv: root_sval) (lr: list reg): ?? hsistate_local :=
+  DO ok_lhsv <~
+   (if may_trap rsv lr
+    then DO hv <~ root_happly rsv lr hst;;
+         XDEBUG hv (fun hv => DO hv_name <~ string_of_hashcode (hsval_get_hid hv);; RET ("-- insert undef behavior of hashcode:" +; (CamlStr hv_name))%string);;
+         RET (hv::(hsi_ok_lsval hst))
+    else RET (hsi_ok_lsval hst));;
+  DO simp <~ simplify rsv lr hst;;
+  RET {| hsi_smem := hst;
+         hsi_ok_lsval := ok_lhsv;
+         hsi_sreg := red_PTree_set r simp (hsi_sreg hst) |}.
+
+Lemma hslocal_set_sreg_correct hst r rsv lr:
+  WHEN hslocal_set_sreg hst r rsv lr ~> hst' THEN forall ge sp rs0 m0 st
+    (REF: hsilocal_refines ge sp rs0 m0 hst st),
+    hsilocal_refines ge sp rs0 m0 hst' (slocal_set_sreg st r (rsv lr st)).
+Proof.
+  wlp_simplify.
+  + (* may_trap ~> true *)
+    assert (X: sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st)) <->
+               hsok_local ge sp rs0 m0 {| hsi_smem := hst; hsi_ok_lsval := exta :: hsi_ok_lsval hst; hsi_sreg := red_PTree_set r exta0 hst |}).
+    { rewrite sok_local_set_sreg; generalize REF.
+      intros (OKeq & MEM & REG & MVALID); rewrite OKeq; clear OKeq.
+      unfold hsok_local; simpl; intuition (subst; eauto);
+      erewrite <- H0 in *; eauto; unfold hsok_local; simpl; intuition eauto.
+    }
+    unfold hsilocal_refines; simpl; split; auto.
+    rewrite <- X, sok_local_set_sreg. intuition eauto.
+    - destruct REF; intuition eauto.
+    - generalize REF; intros (OKEQ & _). rewrite OKEQ in * |-; erewrite red_PTree_set_refines; eauto.
+  + (* may_trap ~> false *)
+    assert (X: sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st)) <->
+               hsok_local ge sp rs0 m0 {| hsi_smem := hst; hsi_ok_lsval := hsi_ok_lsval hst; hsi_sreg := red_PTree_set r exta hst |}).
+    { 
+      rewrite sok_local_set_sreg; generalize REF.
+      intros (OKeq & MEM & REG & MVALID); rewrite OKeq.
+      unfold hsok_local; simpl; intuition (subst; eauto).
+      assert (X0:hsok_local ge sp rs0 m0 hst). { unfold hsok_local; intuition. }
+      exploit may_trap_correct; eauto.
+      * intro X1; eapply seval_list_sval_inj_not_none; eauto.
+        assert (X2: sok_local ge sp rs0 m0 st). { intuition. }
+        unfold sok_local in X2; intuition eauto.
+      * rewrite <- MEM; eauto.
+    }
+    unfold hsilocal_refines; simpl; split; auto.
+    rewrite <- X, sok_local_set_sreg. intuition eauto.
+    - destruct REF; intuition eauto.
+    - generalize REF; intros (OKEQ & _). rewrite OKEQ in * |-; erewrite red_PTree_set_refines; eauto.
+Qed.
+Global Opaque hslocal_set_sreg.
+Local Hint Resolve hslocal_set_sreg_correct: wlp.
+
+(** ** Execution of one instruction *)
+
+(* TODO gourdinl
+ * This is just useful for debugging fake values hashcode projection *)
+Fixpoint check_no_uhid lhsv :=
+  match lhsv with
+  | HSnil hc =>
+      DO b <~ phys_eq hc unknown_hid;;
+      assert_b (negb b) "fail no uhid";;
+      RET tt
+  | HScons hsv lhsv' hc =>
+      DO b <~ phys_eq hc unknown_hid;;
+      assert_b (negb b) "fail no uhid";;
+      check_no_uhid lhsv'
+  end.
+
+Definition cbranch_expanse (prev: hsistate_local) (cond: condition) (args: list reg): ?? (condition * list_hsval) :=
+    match target_cbranch_expanse prev cond args with
+    | Some (cond', vargs) => 
+      DO vargs' <~ fsval_list_proj vargs;;
+      RET (cond', vargs')
+    | None =>
+      DO vargs <~ hlist_args prev args ;;
+      RET (cond, vargs)
+    end.
+
+Lemma cbranch_expanse_correct hst c l:
+ WHEN cbranch_expanse hst c l ~> r THEN forall ge sp rs0 m0 st
+  (LREF : hsilocal_refines ge sp rs0 m0 hst st)
+  (OK: hsok_local ge sp rs0 m0 hst),
+  seval_condition ge sp (fst r) (hsval_list_proj (snd r)) (si_smem st) rs0 m0 =
+  seval_condition ge sp c (list_sval_inj (map (si_sreg st) l)) (si_smem st) rs0 m0.
+Proof.
+  unfold cbranch_expanse.
+  destruct (target_cbranch_expanse _ _ _) eqn: TARGET; wlp_simplify;
+  unfold seval_condition; erewrite <- H; eauto.
+  destruct p as [c' l']; simpl.
+  exploit target_cbranch_expanse_correct; eauto.
+Qed.
+Local Hint Resolve cbranch_expanse_correct: wlp.
+Global Opaque cbranch_expanse.
+
+Definition hsiexec_inst (i: instruction) (hst: hsistate): ?? (option hsistate) := 
+  match i with
+  | Inop pc' => 
+      RET (Some (hsist_set_local hst pc' hst.(hsi_local)))
+  | Iop op args dst pc' =>
+      DO next <~ hslocal_set_sreg hst.(hsi_local) dst (Rop op) args;;
+      RET (Some (hsist_set_local hst pc' next))
+  | Iload trap chunk addr args dst pc' =>
+      DO next <~ hslocal_set_sreg hst.(hsi_local) dst (Rload trap chunk addr) args;;
+      RET (Some (hsist_set_local hst pc' next))
+  | Istore chunk addr args src pc' =>
+      DO next <~ hslocal_store hst.(hsi_local) chunk addr args src;;
+      RET (Some (hsist_set_local hst pc' next))
+  | Icond cond args ifso ifnot _ =>
+      let prev := hst.(hsi_local) in
+      DO res <~ cbranch_expanse prev cond args;;
+      let (cond, vargs) := res in
+      let ex := {| hsi_cond:=cond; hsi_scondargs:=vargs; hsi_elocal := prev; hsi_ifso := ifso |} in
+      RET (Some {| hsi_pc := ifnot; hsi_exits := ex::hst.(hsi_exits); hsi_local := prev |})
+  | _ => RET None
+  end.
+
+Remark hsiexec_inst_None_correct i hst:
+  WHEN hsiexec_inst i hst ~> o THEN forall st, o = None -> siexec_inst i st = None.
+Proof.
+  destruct i; wlp_simplify; congruence.
+Qed.
+
+Lemma seval_condition_refines hst st ge sp cond hargs args rs m:
+  hsok_local ge sp rs m hst -> 
+  hsilocal_refines ge sp rs m hst st ->
+  list_sval_refines ge sp rs m hargs args ->
+  hseval_condition ge sp cond hargs (hsi_smem hst) rs m
+  = seval_condition ge sp cond args (si_smem st) rs m.
+ Proof.
+  intros HOK (_ & MEMEQ & _) LR. unfold hseval_condition, seval_condition.
+  rewrite LR, <- MEMEQ; auto.
+Qed.
+
+Lemma sok_local_set_sreg_simp (rsv:root_sval) ge sp rs0 m0 st r lr:
+  sok_local ge sp rs0 m0 (slocal_set_sreg st r (rsv lr st))
+  -> sok_local ge sp rs0 m0 st.
+Proof.
+  rewrite sok_local_set_sreg; intuition.
+Qed.
+
+Local Hint Resolve hsist_set_local_correct_stat: core.
+
+Lemma hsiexec_cond_noexp (hst: hsistate): forall l c0 n n0,
+  WHEN DO res <~
+       (DO vargs <~ hlist_args (hsi_local hst) l;; RET ((c0, vargs)));;
+       (let (cond, vargs) := res in
+        RET (Some
+               {|
+               hsi_pc := n0;
+               hsi_exits := {|
+                            hsi_cond := cond;
+                            hsi_scondargs := vargs;
+                            hsi_elocal := hsi_local hst;
+                            hsi_ifso := n |} :: hsi_exits hst;
+               hsi_local := hsi_local hst |})) ~> o0
+  THEN (forall (hst' : hsistate) (st : sistate),
+        o0 = Some hst' ->
+        exists st' : sistate,
+          Some
+            {|
+            si_pc := n0;
+            si_exits := {|
+                        si_cond := c0;
+                        si_scondargs := list_sval_inj
+                                          (map (si_sreg (si_local st)) l);
+                        si_elocal := si_local st;
+                        si_ifso := n |} :: si_exits st;
+            si_local := si_local st |} = Some st' /\
+          (hsistate_refines_stat hst st -> hsistate_refines_stat hst' st') /\
+          (forall (ge : RTL.genv) (sp : val) (rs0 : regset) (m0 : mem),
+           hsistate_refines_dyn ge sp rs0 m0 hst st ->
+           hsistate_refines_dyn ge sp rs0 m0 hst' st')).
+Proof.
+  intros.
+  wlp_simplify; try_simplify_someHyps; eexists; intuition eauto.
+  - unfold hsistate_refines_stat, hsiexits_refines_stat in *; simpl; intuition.
+    constructor; simpl; eauto.
+    constructor.
+  - destruct H0 as (EXREF & LREF & NEST).
+    split.
+    + constructor; simpl; auto.
+      constructor; simpl; auto.
+      intros; erewrite seval_condition_refines; eauto.
+    + split; simpl; auto.
+      destruct NEST as [|st0 se lse TOP NEST];
+      econstructor; simpl; auto; constructor; auto.
+Qed.
+
+Lemma hsiexec_inst_correct i hst:
+  WHEN hsiexec_inst i hst ~> o THEN forall hst' st,
+   o = Some hst' ->
+   exists st', siexec_inst i st = Some st'
+    /\ (forall (REF:hsistate_refines_stat hst st), hsistate_refines_stat hst' st')
+    /\ (forall ge sp rs0 m0 (REF:hsistate_refines_dyn ge sp rs0 m0 hst st), hsistate_refines_dyn ge sp rs0 m0 hst' st').
+Proof.
+  destruct i; simpl;
+  try (wlp_simplify; try_simplify_someHyps; eexists; intuition eauto; fail).
+  - (* refines_dyn Iop *)
+    wlp_simplify; try_simplify_someHyps; eexists; intuition eauto.
+    eapply hsist_set_local_correct_dyn; eauto.
+    generalize (sok_local_set_sreg_simp (Rop o)); simpl; eauto.
+  - (* refines_dyn Iload *)
+    wlp_simplify; try_simplify_someHyps; eexists; intuition eauto.
+    eapply hsist_set_local_correct_dyn; eauto.
+    generalize (sok_local_set_sreg_simp (Rload t0 m a)); simpl; eauto.
+  - (* refines_dyn Istore *)
+    wlp_simplify; try_simplify_someHyps; eexists; intuition eauto.
+    eapply hsist_set_local_correct_dyn; eauto.
+    unfold sok_local; simpl; intuition.
+  - (* refines_stat Icond *)
+    wlp_simplify; try_simplify_someHyps; eexists; intuition eauto.
+    + unfold hsistate_refines_stat, hsiexits_refines_stat in *; simpl; intuition.
+      constructor; simpl; eauto.
+      constructor.
+    + destruct REF as (EXREF & LREF & NEST).
+      split.
+      * constructor; simpl; auto.
+        constructor; simpl; auto.
+        intros; erewrite seval_condition_refines; eauto.
+      * split; simpl; auto.
+        destruct NEST as [|st0 se lse TOP NEST];
+        econstructor; simpl; auto; constructor; auto.
+Qed.
+Global Opaque hsiexec_inst.
+Local Hint Resolve hsiexec_inst_correct: wlp.
+
+
+Definition some_or_fail {A} (o: option A) (msg: pstring): ?? A :=
+  match o with
+  | Some x => RET x
+  | None => FAILWITH msg
+  end.
+
+Fixpoint hsiexec_path (path:nat) (f: function) (hst: hsistate): ?? hsistate :=
+  match path with
+  | O => RET hst
+  | S p =>
+    let pc := hst.(hsi_pc) in
+    XDEBUG pc (fun pc => DO name_pc <~ string_of_Z (Zpos pc);; RET ("- sym exec node: " +; name_pc)%string);;
+    DO i <~ some_or_fail ((fn_code f)!pc) "hsiexec_path.internal_error.1";;
+    DO ohst1 <~ hsiexec_inst i hst;;
+    DO hst1 <~ some_or_fail ohst1 "hsiexec_path.internal_error.2";;
+    hsiexec_path p f hst1
+  end.
+
+Lemma hsiexec_path_correct path f: forall hst,
+  WHEN hsiexec_path path f hst ~> hst' THEN forall st
+  (RSTAT:hsistate_refines_stat hst st),
+  exists st', siexec_path path f st = Some st'
+    /\ hsistate_refines_stat hst' st'
+    /\ (forall ge sp rs0 m0 (REF:hsistate_refines_dyn ge sp rs0 m0 hst st), hsistate_refines_dyn ge sp rs0 m0 hst' st').
+Proof.
+  induction path; wlp_simplify; try_simplify_someHyps. clear IHpath.
+  generalize RSTAT; intros (PCEQ & _) INSTEQ.
+  rewrite <- PCEQ, INSTEQ; simpl.
+  exploit H0; eauto. clear H0.
+  intros (st0 & SINST & ISTAT & IDYN); erewrite SINST.
+  exploit H1; eauto. clear H1.
+  intros (st' & SPATH & PSTAT & PDYN).
+  eexists; intuition eauto.
+Qed.
+Global Opaque hsiexec_path.
+Local Hint Resolve hsiexec_path_correct: wlp.
+
+Fixpoint hbuiltin_arg (hst: PTree.t hsval) (arg : builtin_arg reg): ?? builtin_arg hsval := 
+  match arg with
+  | BA r => 
+         DO v <~ hsi_sreg_get hst r;;
+         RET (BA v)
+  | BA_int n => RET (BA_int n)
+  | BA_long n => RET (BA_long n)
+  | BA_float f0 => RET (BA_float f0)
+  | BA_single s => RET (BA_single s)
+  | BA_loadstack chunk ptr => RET (BA_loadstack chunk ptr)
+  | BA_addrstack ptr => RET (BA_addrstack ptr)
+  | BA_loadglobal chunk id ptr => RET (BA_loadglobal chunk id ptr)
+  | BA_addrglobal id ptr => RET (BA_addrglobal id ptr)
+  | BA_splitlong ba1 ba2 => 
+    DO v1 <~ hbuiltin_arg hst ba1;;
+    DO v2 <~ hbuiltin_arg hst ba2;;
+    RET (BA_splitlong v1 v2)
+  | BA_addptr ba1 ba2 => 
+    DO v1 <~ hbuiltin_arg hst ba1;;
+    DO v2 <~ hbuiltin_arg hst ba2;;
+    RET (BA_addptr v1 v2)
+  end.
+
+Lemma hbuiltin_arg_correct hst arg:
+  WHEN hbuiltin_arg hst arg ~> hargs THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    seval_builtin_sval ge sp (builtin_arg_map hsval_proj hargs) rs0 m0 = seval_builtin_sval ge sp (builtin_arg_map f arg) rs0 m0.
+Proof.
+  induction arg; wlp_simplify.
+  + erewrite H; eauto.
+  + erewrite H; eauto.
+    erewrite H0; eauto.
+  + erewrite H; eauto.
+    erewrite H0; eauto.
+Qed.
+Global Opaque hbuiltin_arg.
+Local Hint Resolve hbuiltin_arg_correct: wlp.
+
+Fixpoint hbuiltin_args (hst: PTree.t hsval) (args: list (builtin_arg reg)): ?? list (builtin_arg hsval) :=
+  match args with
+  | nil => RET nil
+  | a::l =>
+    DO ha <~ hbuiltin_arg hst a;;
+    DO hl <~ hbuiltin_args hst l;;
+    RET (ha::hl)
+    end.
+
+Lemma hbuiltin_args_correct hst args:
+  WHEN hbuiltin_args hst args ~> hargs THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    bargs_refines ge sp rs0 m0 hargs (List.map (builtin_arg_map f) args).
+Proof.
+  unfold bargs_refines, seval_builtin_args; induction args; wlp_simplify.
+  erewrite H; eauto.
+  erewrite H0; eauto.
+Qed.
+Global Opaque hbuiltin_args.
+Local Hint Resolve hbuiltin_args_correct: wlp.
+
+Definition hsum_left (hst: PTree.t hsval) (ros: reg + ident): ?? (hsval + ident) :=
+  match ros with
+  | inl r => DO hr <~ hsi_sreg_get hst r;; RET (inl hr) 
+  | inr s => RET (inr s)
+  end.
+
+Lemma hsum_left_correct hst ros:
+  WHEN hsum_left hst ros ~> hsi THEN forall ge sp rs0 m0 (f: reg -> sval)
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    sum_refines ge sp rs0 m0 hsi (sum_left_map f ros).
+Proof.
+  unfold sum_refines; destruct ros; wlp_simplify.
+Qed.
+Global Opaque hsum_left.
+Local Hint Resolve hsum_left_correct: wlp.
+
+Definition hsexec_final (i: instruction) (hst: PTree.t hsval): ?? hsfval :=
+  match i with
+  | Icall sig ros args res pc =>
+    DO svos <~ hsum_left hst ros;;
+    DO sargs <~ hlist_args hst args;;
+    RET (HScall sig svos sargs res pc)
+  | Itailcall sig ros args =>
+    DO svos <~ hsum_left hst ros;;
+    DO sargs <~ hlist_args hst args;;
+    RET (HStailcall sig svos sargs)
+  | Ibuiltin ef args res pc =>
+    DO sargs <~ hbuiltin_args hst args;;
+    RET (HSbuiltin ef sargs res pc)
+  | Ijumptable reg tbl =>
+    DO sv <~ hsi_sreg_get hst reg;;
+    RET (HSjumptable sv tbl)
+  | Ireturn or =>
+    match or with
+    | Some r => DO hr <~ hsi_sreg_get hst r;; RET (HSreturn (Some hr))
+    | None => RET (HSreturn None)
+    end
+  | _ => RET (HSnone)
+  end.
+
+Lemma hsexec_final_correct (hsl: hsistate_local) i:
+  WHEN hsexec_final i hsl ~> hsf THEN forall ge sp rs0 m0 sl
+   (OK:  hsok_local ge sp rs0 m0 hsl)
+   (REF: hsilocal_refines ge sp rs0 m0 hsl sl),
+   hfinal_refines ge sp rs0 m0 hsf (sexec_final i sl).
+Proof.
+  destruct i; wlp_simplify; try econstructor; simpl; eauto.
+Qed.
+Global Opaque hsexec_final.
+Local Hint Resolve hsexec_final_correct: wlp.
+
+Definition init_hsistate_local (_:unit): ?? hsistate_local
+  := DO hm <~ hSinit ();;
+     RET {| hsi_smem := hm; hsi_ok_lsval := nil; hsi_sreg := PTree.empty hsval |}.
+
+Lemma init_hsistate_local_correct:
+  WHEN init_hsistate_local () ~> hsl THEN forall ge sp rs0 m0,
+  hsilocal_refines ge sp rs0 m0 hsl init_sistate_local.
+Proof.
+  unfold hsilocal_refines; wlp_simplify.
+  - unfold hsok_local; simpl; intuition. erewrite H in *; congruence.
+  - unfold hsok_local, sok_local; simpl in *; intuition; try congruence.
+  - unfold hsi_sreg_eval, hsi_sreg_proj. rewrite PTree.gempty. reflexivity.
+  - try_simplify_someHyps.
+Qed.
+Global Opaque init_hsistate_local.
+Local Hint Resolve init_hsistate_local_correct: wlp.
+
+Definition init_hsistate pc: ?? hsistate
+  := DO hst <~ init_hsistate_local ();;
+     RET {| hsi_pc := pc; hsi_exits := nil; hsi_local := hst |}.
+
+Lemma init_hsistate_correct pc:
+  WHEN init_hsistate pc ~> hst THEN
+      hsistate_refines_stat hst (init_sistate pc)
+   /\ forall ge sp rs0 m0, hsistate_refines_dyn ge sp rs0 m0 hst (init_sistate pc).
+Proof.
+  unfold hsistate_refines_stat, hsistate_refines_dyn, hsiexits_refines_dyn; wlp_simplify; constructor.
+Qed.
+Global Opaque init_hsistate.
+Local Hint Resolve init_hsistate_correct: wlp.
+
+Definition hsexec (f: function) (pc:node): ?? hsstate :=
+  DO path <~ some_or_fail ((fn_path f)!pc) "hsexec.internal_error.1";;
+  DO hinit <~ init_hsistate pc;;
+  DO hst <~ hsiexec_path path.(psize) f hinit;;
+  DO i <~ some_or_fail ((fn_code f)!(hst.(hsi_pc))) "hsexec.internal_error.2";;
+  DO ohst <~ hsiexec_inst i hst;;
+  match ohst with
+  | Some hst' => RET {| hinternal := hst'; hfinal := HSnone |}
+  | None => DO hsvf <~ hsexec_final i hst.(hsi_local);;
+            RET {| hinternal := hst; hfinal := hsvf |}
+  end.
+
+Lemma hsexec_correct_aux f pc:
+  WHEN hsexec f pc ~> hst THEN
+  exists st, sexec f pc = Some st /\ hsstate_refines hst st.
+Proof.
+  unfold hsstate_refines, sexec; wlp_simplify.
+  - (* Some *)
+   rewrite H; clear H.
+   exploit H0; clear H0; eauto.
+   intros (st0 & EXECPATH & SREF & DREF).
+   rewrite EXECPATH; clear EXECPATH.
+   generalize SREF. intros (EQPC & _).
+   rewrite <- EQPC, H3; clear H3.
+   exploit H4; clear H4; eauto.
+   intros (st' & EXECL & SREF' & DREF').
+   try_simplify_someHyps.
+   eexists; intuition (simpl; eauto).
+   constructor.
+  - (* None *)
+   rewrite H; clear H H4.
+   exploit H0; clear H0; eauto.
+   intros (st0 & EXECPATH & SREF & DREF).
+   rewrite EXECPATH; clear EXECPATH.
+   generalize SREF. intros (EQPC & _).
+   rewrite <- EQPC, H3; clear H3.
+   erewrite hsiexec_inst_None_correct; eauto.
+   eexists; intuition (simpl; eauto).
+Qed.
+
+Global Opaque hsexec.
+
+End CanonBuilding.
+
+(** Correction of concrete symbolic execution wrt abstract symbolic execution *)
+Theorem hsexec_correct
+  (hC_hsval : hashinfo hsval -> ?? hsval)
+  (hC_list_hsval : hashinfo list_hsval -> ?? list_hsval)
+  (hC_hsmem : hashinfo hsmem -> ?? hsmem)
+  (f : function) 
+  (pc : node):
+       WHEN hsexec hC_hsval hC_list_hsval hC_hsmem f pc ~> hst THEN forall
+        (hC_hsval_correct: forall hs,
+            WHEN hC_hsval hs ~> hs' THEN forall ge sp rs0 m0,
+                seval_sval ge sp (hsval_proj (hdata hs)) rs0 m0 =
+                seval_sval ge sp (hsval_proj hs') rs0 m0)
+        (hC_list_hsval_correct: forall lh,
+            WHEN hC_list_hsval lh ~> lh' THEN forall ge sp rs0 m0,
+              seval_list_sval ge sp (hsval_list_proj (hdata lh)) rs0 m0 =
+              seval_list_sval ge sp (hsval_list_proj lh') rs0 m0)
+         (hC_hsmem_correct: forall hm,
+            WHEN hC_hsmem hm ~> hm' THEN forall ge sp rs0 m0,
+              seval_smem ge sp (hsmem_proj (hdata hm)) rs0 m0 =
+              seval_smem ge sp (hsmem_proj hm') rs0 m0),
+         exists st : sstate, sexec f pc = Some st /\ hsstate_refines hst st.
+Proof.
+  wlp_simplify.
+  eapply hsexec_correct_aux; eauto.
+Qed.
+Local Hint Resolve hsexec_correct: wlp.
+
+(** * Implementing the simulation test with concrete hash-consed symbolic execution *)
+
+Definition phys_check {A} (x y:A) (msg: pstring): ?? unit :=
+  DO b <~ phys_eq x y;;
+  assert_b b msg;;
+  RET tt.
+
+Definition struct_check {A} (x y: A) (msg: pstring): ?? unit :=
+  DO b <~ struct_eq x y;;
+  assert_b b msg;;
+  RET tt.
+
+Lemma struct_check_correct {A} (a b: A) msg:
+  WHEN struct_check a b msg ~> _ THEN
+  a = b.
+Proof. wlp_simplify. Qed.
+Global Opaque struct_check.
+Hint Resolve struct_check_correct: wlp.
+
+Definition option_eq_check {A} (o1 o2: option A): ?? unit :=
+  match o1, o2 with
+  | Some x1, Some x2 => phys_check x1 x2 "option_eq_check: data physically differ"
+  | None, None => RET tt
+  | _, _ => FAILWITH "option_eq_check: structure differs"
+  end.
+
+Lemma option_eq_check_correct A (o1 o2: option A): WHEN option_eq_check o1 o2 ~> _ THEN o1=o2.
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque option_eq_check.
+Hint Resolve option_eq_check_correct:wlp.
+
+Import PTree.
+
+Fixpoint PTree_eq_check {A} (d1 d2: PTree.t A): ?? unit :=
+  match d1, d2 with
+  | Leaf, Leaf => RET tt
+  | Node l1 o1 r1, Node l2 o2 r2 =>
+      option_eq_check o1 o2;;
+      PTree_eq_check l1 l2;;
+      PTree_eq_check r1 r2
+  | _, _ => FAILWITH "PTree_eq_check: some key is absent"
+  end.
+
+Lemma PTree_eq_check_correct A d1: forall (d2: t A),
+ WHEN PTree_eq_check d1 d2 ~> _ THEN forall x, PTree.get x d1 = PTree.get x d2.
+Proof.
+  induction d1 as [|l1 Hl1 o1 r1 Hr1]; destruct d2 as [|l2 o2 r2]; simpl; 
+  wlp_simplify. destruct x; simpl; auto.
+Qed.
+Global Opaque PTree_eq_check.
+Local Hint Resolve PTree_eq_check_correct: wlp.
+
+Fixpoint PTree_frame_eq_check {A} (frame: list positive) (d1 d2: PTree.t A): ?? unit :=
+  match frame with
+  | nil => RET tt
+  | k::l => 
+    option_eq_check (PTree.get k d1) (PTree.get k d2);;
+    PTree_frame_eq_check l d1 d2
+  end.
+
+Lemma PTree_frame_eq_check_correct A l (d1 d2: t A):
+ WHEN PTree_frame_eq_check l d1 d2 ~> _ THEN forall x, List.In x l -> PTree.get x d1 = PTree.get x d2.
+Proof.
+  induction l as [|k l]; simpl; wlp_simplify.
+  subst; auto.
+Qed.
+Global Opaque PTree_frame_eq_check.
+Local Hint Resolve PTree_frame_eq_check_correct: wlp.
+
+Definition hsilocal_frame_simu_check frame hst1 hst2 : ?? unit :=
+  DEBUG("? frame check");;
+  phys_check (hsi_smem hst2) (hsi_smem hst1) "hsilocal_frame_simu_check: hsi_smem sets aren't equiv";;
+  PTree_frame_eq_check frame (hsi_sreg hst1) (hsi_sreg hst2);;
+  Sets.assert_list_incl mk_hash_params (hsi_ok_lsval hst2) (hsi_ok_lsval hst1);;
+  DEBUG("=> frame check: OK").
+
+Lemma setoid_in {A: Type} (a: A): forall l,
+  SetoidList.InA (fun x y => x = y) a l ->
+  In a l.
+Proof.
+  induction l; intros; inv H.
+  - constructor. reflexivity.
+  - right. auto.
+Qed.
+
+Lemma regset_elements_in r rs:
+  Regset.In r rs ->
+  In r (Regset.elements rs).
+Proof.
+  intros. exploit Regset.elements_1; eauto. intro SIN.
+  apply setoid_in. assumption.
+Qed.
+Local Hint Resolve regset_elements_in: core.
+
+Lemma hsilocal_frame_simu_check_correct hst1 hst2 alive:
+  WHEN hsilocal_frame_simu_check (Regset.elements alive) hst1 hst2 ~> _ THEN
+  hsilocal_simu_spec alive hst1 hst2.
+Proof.
+  unfold hsilocal_simu_spec; wlp_simplify. symmetry; eauto.
+Qed.
+Hint Resolve hsilocal_frame_simu_check_correct: wlp.
+Global Opaque hsilocal_frame_simu_check.
+
+Definition revmap_check_single (dm: PTree.t node) (n tn: node) : ?? unit :=
+  DO res <~ some_or_fail (dm ! tn) "revmap_check_single: no mapping for tn";;
+  struct_check n res "revmap_check_single: n and res are physically different".
+
+Lemma revmap_check_single_correct dm pc1 pc2:
+  WHEN revmap_check_single dm pc1 pc2 ~> _ THEN
+  dm ! pc2 = Some pc1.
+Proof.
+  wlp_simplify. congruence.
+Qed.
+Hint Resolve revmap_check_single_correct: wlp.
+Global Opaque revmap_check_single.
+
+Definition hsiexit_simu_check (dm: PTree.t node) (f: RTLpath.function) (hse1 hse2: hsistate_exit): ?? unit :=
+  struct_check (hsi_cond hse1) (hsi_cond hse2) "hsiexit_simu_check: conditions do not match";;
+  phys_check (hsi_scondargs hse1) (hsi_scondargs hse2) "hsiexit_simu_check: args do not match";;
+  revmap_check_single dm (hsi_ifso hse1) (hsi_ifso hse2);;
+  DO path <~ some_or_fail ((fn_path f) ! (hsi_ifso hse1)) "hsiexit_simu_check: internal error";;
+  hsilocal_frame_simu_check (Regset.elements path.(input_regs)) (hsi_elocal hse1) (hsi_elocal hse2).
+
+Lemma hsiexit_simu_check_correct dm f hse1 hse2:
+  WHEN hsiexit_simu_check dm f hse1 hse2 ~> _ THEN
+  hsiexit_simu_spec dm f hse1 hse2.
+Proof.
+  unfold hsiexit_simu_spec; wlp_simplify.
+Qed.
+Hint Resolve hsiexit_simu_check_correct: wlp.
+Global Opaque hsiexit_simu_check.
+
+Fixpoint hsiexits_simu_check (dm: PTree.t node) (f: RTLpath.function) (lhse1 lhse2: list hsistate_exit) :=
+  match lhse1,lhse2 with
+  | nil, nil => RET tt
+  | hse1 :: lhse1, hse2 :: lhse2 =>
+    hsiexit_simu_check dm f hse1 hse2;;
+    hsiexits_simu_check dm f lhse1 lhse2
+  | _, _ => FAILWITH "siexists_simu_check:  lengths do not match"
+  end.
+
+Lemma hsiexits_simu_check_correct dm f: forall le1 le2,
+  WHEN hsiexits_simu_check dm f le1 le2 ~> _ THEN
+  hsiexits_simu_spec dm f le1 le2.
+Proof.
+  unfold hsiexits_simu_spec; induction le1; simpl; destruct le2; wlp_simplify; constructor; eauto.
+Qed.
+Hint Resolve hsiexits_simu_check_correct: wlp.
+Global Opaque hsiexits_simu_check.
+
+Definition hsistate_simu_check (dm: PTree.t node) (f: RTLpath.function) outframe (hst1 hst2: hsistate) :=
+  hsiexits_simu_check dm f (hsi_exits hst1) (hsi_exits hst2);;
+  hsilocal_frame_simu_check (Regset.elements outframe) (hsi_local hst1) (hsi_local hst2).
+
+Lemma hsistate_simu_check_correct dm f outframe hst1 hst2:
+  WHEN hsistate_simu_check dm f outframe hst1 hst2 ~> _ THEN
+  hsistate_simu_spec dm f outframe hst1 hst2.
+Proof.
+  unfold hsistate_simu_spec; wlp_simplify.
+Qed.
+Hint Resolve hsistate_simu_check_correct: wlp.
+Global Opaque hsistate_simu_check.
+
+
+Fixpoint revmap_check_list (dm: PTree.t node) (ln ln': list node): ?? unit :=
+  match ln, ln' with
+  | nil, nil => RET tt
+  | n::ln, n'::ln' => 
+      revmap_check_single dm n n';;
+      revmap_check_list dm ln ln'
+  | _, _ => FAILWITH "revmap_check_list: lists have different lengths"
+  end.
+
+Lemma revmap_check_list_correct dm: forall lpc lpc',
+  WHEN revmap_check_list dm lpc lpc' ~> _ THEN
+  ptree_get_list dm lpc' = Some lpc.
+Proof.
+  induction lpc.
+  - destruct lpc'; wlp_simplify.
+  - destruct lpc'; wlp_simplify. try_simplify_someHyps.
+Qed.
+Global Opaque revmap_check_list.
+Hint Resolve revmap_check_list_correct: wlp.
+
+
+Definition svos_simu_check (svos1 svos2: hsval + ident) :=
+  match svos1, svos2 with
+  | inl sv1, inl sv2 => phys_check sv1 sv2 "svos_simu_check: sval mismatch"
+  | inr id1, inr id2 => phys_check id1 id2 "svos_simu_check: symbol mismatch"
+  | _, _ => FAILWITH "svos_simu_check: type mismatch"
+  end.
+
+Lemma svos_simu_check_correct svos1 svos2:
+  WHEN svos_simu_check svos1 svos2 ~> _ THEN
+  svos1 = svos2.
+Proof.
+  destruct svos1; destruct svos2; wlp_simplify.
+Qed.
+Global Opaque svos_simu_check.
+Hint Resolve svos_simu_check_correct: wlp.
+
+
+Fixpoint builtin_arg_simu_check (bs bs': builtin_arg hsval) :=
+  match bs with
+  | BA sv =>
+    match bs' with
+    | BA sv' => phys_check sv sv' "builtin_arg_simu_check: sval mismatch"
+    | _ => FAILWITH "builtin_arg_simu_check: BA mismatch"
+    end
+  | BA_splitlong lo hi =>
+    match bs' with
+    | BA_splitlong lo' hi' =>
+        builtin_arg_simu_check lo lo';;
+        builtin_arg_simu_check hi hi'
+    | _ => FAILWITH "builtin_arg_simu_check: BA_splitlong mismatch"
+    end
+  | BA_addptr b1 b2 =>
+    match bs' with
+    | BA_addptr b1' b2' =>
+        builtin_arg_simu_check b1 b1';;
+        builtin_arg_simu_check b2 b2'
+    | _ => FAILWITH "builtin_arg_simu_check: BA_addptr mismatch"
+    end
+  | bs => struct_check bs bs' "builtin_arg_simu_check: basic mismatch"
+  end.
+
+Lemma builtin_arg_simu_check_correct: forall bs1 bs2,
+  WHEN builtin_arg_simu_check bs1 bs2 ~> _ THEN
+  builtin_arg_map hsval_proj bs1 = builtin_arg_map hsval_proj bs2.
+Proof.
+  induction bs1.
+  all: try (wlp_simplify; subst; reflexivity).
+  all: destruct bs2; wlp_simplify; congruence.
+Qed.
+Global Opaque builtin_arg_simu_check.
+Hint Resolve builtin_arg_simu_check_correct: wlp.
+
+Fixpoint list_builtin_arg_simu_check lbs1 lbs2 :=
+  match lbs1, lbs2 with
+  | nil, nil => RET tt
+  | bs1::lbs1, bs2::lbs2 =>
+    builtin_arg_simu_check bs1 bs2;;
+    list_builtin_arg_simu_check lbs1 lbs2
+  | _, _ => FAILWITH "list_builtin_arg_simu_check: length mismatch"
+  end.
+
+Lemma list_builtin_arg_simu_check_correct: forall lbs1 lbs2,
+  WHEN list_builtin_arg_simu_check lbs1 lbs2 ~> _ THEN
+  List.map (builtin_arg_map hsval_proj) lbs1 = List.map (builtin_arg_map hsval_proj) lbs2.
+Proof.
+  induction lbs1; destruct lbs2; wlp_simplify. congruence.
+Qed.
+Global Opaque list_builtin_arg_simu_check.
+Hint Resolve list_builtin_arg_simu_check_correct: wlp.
+
+Definition sfval_simu_check (dm: PTree.t node) (f: RTLpath.function) (pc1 pc2: node) (fv1 fv2: hsfval) :=
+  match fv1, fv2 with
+  | HSnone, HSnone => revmap_check_single dm pc1 pc2
+  | HScall sig1 svos1 lsv1 res1 pc1, HScall sig2 svos2 lsv2 res2 pc2 =>
+      revmap_check_single dm pc1 pc2;;
+      phys_check sig1 sig2 "sfval_simu_check: Scall different signatures";;
+      phys_check res1 res2 "sfval_simu_check: Scall res do not match";;
+      svos_simu_check svos1 svos2;;
+      phys_check lsv1 lsv2 "sfval_simu_check: Scall args do not match"
+  | HStailcall sig1 svos1 lsv1, HStailcall sig2 svos2 lsv2 =>
+      phys_check sig1 sig2 "sfval_simu_check: Stailcall different signatures";;
+      svos_simu_check svos1 svos2;;
+      phys_check lsv1 lsv2 "sfval_simu_check: Stailcall args do not match"
+  | HSbuiltin ef1 lbs1 br1 pc1, HSbuiltin ef2 lbs2 br2 pc2 =>
+      revmap_check_single dm pc1 pc2;;
+      phys_check ef1 ef2 "sfval_simu_check: builtin ef do not match";;
+      phys_check br1 br2 "sfval_simu_check: builtin br do not match";;
+      list_builtin_arg_simu_check lbs1 lbs2
+  | HSjumptable sv ln, HSjumptable sv' ln' =>
+      revmap_check_list dm ln ln';;
+      phys_check sv sv' "sfval_simu_check: Sjumptable sval do not match"
+  | HSreturn osv1, HSreturn osv2 =>
+      option_eq_check osv1 osv2
+  | _, _ => FAILWITH "sfval_simu_check: structure mismatch"
+  end.
+
+Lemma sfval_simu_check_correct dm f opc1 opc2 fv1 fv2:
+  WHEN sfval_simu_check dm f opc1 opc2 fv1 fv2 ~> _ THEN
+  hfinal_simu_spec dm f opc1 opc2 fv1 fv2.
+Proof.
+  unfold hfinal_simu_spec; destruct fv1; destruct fv2; wlp_simplify; try congruence.
+Qed.
+Hint Resolve sfval_simu_check_correct: wlp.
+Global Opaque sfval_simu_check.
+
+Definition hsstate_simu_check (dm: PTree.t node) (f: RTLpath.function) outframe (hst1 hst2: hsstate) :=
+  hsistate_simu_check dm f outframe (hinternal hst1) (hinternal hst2);;
+  sfval_simu_check dm f (hsi_pc hst1) (hsi_pc hst2) (hfinal hst1) (hfinal hst2).
+
+Lemma hsstate_simu_check_correct dm f outframe hst1 hst2:
+  WHEN hsstate_simu_check dm f outframe hst1 hst2 ~> _ THEN
+  hsstate_simu_spec dm f outframe hst1 hst2.
+Proof.
+  unfold hsstate_simu_spec; wlp_simplify.
+Qed.
+Hint Resolve hsstate_simu_check_correct: wlp.
+Global Opaque hsstate_simu_check.
+
+Definition simu_check_single (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function) (m: node * node): ?? unit :=
+  let (pc2, pc1) := m in
+  (* creating the hash-consing tables *)
+  DO hC_sval <~ hCons hSVAL;;
+  DO hC_list_hsval <~ hCons hLSVAL;;
+  DO hC_hsmem <~ hCons hSMEM;;
+  let hsexec := hsexec hC_sval.(hC) hC_list_hsval.(hC) hC_hsmem.(hC) in
+  (* performing the hash-consed executions *)
+  XDEBUG pc1 (fun pc => DO name_pc <~ string_of_Z (Zpos pc);; RET ("entry-point of input superblock: " +; name_pc)%string);;
+  DO hst1 <~ hsexec f pc1;;
+  XDEBUG pc2 (fun pc => DO name_pc <~ string_of_Z (Zpos pc);; RET ("entry-point of output superblock: " +; name_pc)%string);;
+  DO hst2 <~ hsexec tf pc2;;
+  DO path <~ some_or_fail ((fn_path f)!pc1) "simu_check_single.internal_error.1";;
+  let outframe := path.(pre_output_regs) in
+  (* comparing the executions *)
+  hsstate_simu_check dm f outframe hst1 hst2.
+
+Lemma simu_check_single_correct dm tf f pc1 pc2:
+  WHEN simu_check_single dm f tf (pc2, pc1) ~> _ THEN
+  sexec_simu dm f tf pc1 pc2.
+Proof.
+  unfold sexec_simu; wlp_simplify.
+  exploit H2; clear H2. 1-3: wlp_simplify.
+  intros (st2 & SEXEC2 & REF2). try_simplify_someHyps.
+  exploit H3; clear H3. 1-3: wlp_simplify.
+  intros (st3 & SEXEC3 & REF3). try_simplify_someHyps.
+  eexists. eexists. split; eauto. split; eauto.
+  intros ctx.
+  eapply hsstate_simu_spec_correct; eauto.
+Qed.
+Global Opaque simu_check_single.
+Global Hint Resolve simu_check_single_correct: wlp.
+
+Fixpoint simu_check_rec (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function) lm : ?? unit :=
+  match lm with
+  | nil => RET tt
+  | m :: lm => 
+    simu_check_single dm f tf m;;
+    simu_check_rec dm f tf lm
+  end.
+
+Lemma simu_check_rec_correct dm f tf lm:
+  WHEN simu_check_rec dm f tf lm ~> _ THEN
+  forall pc1 pc2, In (pc2, pc1) lm -> sexec_simu dm f tf pc1 pc2.
+Proof.
+  induction lm; wlp_simplify.
+  match goal with
+  | X: (_,_) = (_,_) |- _ => inversion X; subst
+  end.
+  subst; eauto.
+Qed.
+Global Opaque simu_check_rec.
+Global Hint Resolve simu_check_rec_correct: wlp.
+
+Definition imp_simu_check (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function): ?? unit :=
+   simu_check_rec dm f tf (PTree.elements dm);;
+   DEBUG("simu_check OK!").
+
+Local Hint Resolve PTree.elements_correct: core.
+Lemma imp_simu_check_correct dm f tf:
+  WHEN imp_simu_check dm f tf ~> _ THEN
+  forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2.
+Proof.
+  wlp_simplify.
+Qed.
+Global Opaque imp_simu_check.
+Global Hint Resolve imp_simu_check_correct: wlp.
+
+Program Definition aux_simu_check (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function): ?? bool :=
+   DO r <~ 
+     (TRY 
+       imp_simu_check dm f tf;; 
+       RET true
+      CATCH_FAIL s, _ =>
+       println ("simu_check_failure:" +; s);;
+       RET false
+      ENSURE (fun b => b=true -> forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2));;
+   RET (`r).
+Obligation 1.
+  split; wlp_simplify. discriminate.
+Qed.
+
+Lemma aux_simu_check_correct dm f tf:
+  WHEN aux_simu_check dm f tf ~> b THEN
+  b=true -> forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2.
+Proof.
+  unfold aux_simu_check; wlp_simplify.
+  destruct exta; simpl; auto.
+Qed.
+
+(* Coerce aux_simu_check into a pure function (this is a little unsafe like all oracles in CompCert). *)
+
+Import UnsafeImpure.
+
+Definition simu_check (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function) : res unit := 
+  match unsafe_coerce (aux_simu_check dm f tf) with
+  | Some true => OK tt
+  | _ => Error (msg "simu_check has failed")
+  end.
+
+Lemma simu_check_correct dm f tf:
+  simu_check dm f tf = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  sexec_simu dm f tf pc1 pc2.
+Proof.
+  unfold simu_check.
+  destruct (unsafe_coerce (aux_simu_check dm f tf)) as [[|]|] eqn:Hres; simpl; try discriminate.
+  intros; eapply aux_simu_check_correct; eauto.
+  eapply unsafe_coerce_not_really_correct; eauto.
+Qed.
diff --git a/scheduling/RTLpathSE_simu_specs.v b/scheduling/RTLpathSE_simu_specs.v
new file mode 100644
index 00000000..c3266db9
--- /dev/null
+++ b/scheduling/RTLpathSE_simu_specs.v
@@ -0,0 +1,937 @@
+(** Low-level specifications of the simulation tests by symbolic execution with hash-consing *)
+
+Require Import Coqlib Maps Floats.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL RTLpath.
+Require Import Errors.
+Require Import RTLpathSE_theory RTLpathLivegenproof.
+Require Import Axioms.
+
+Local Open Scope error_monad_scope.
+Local Open Scope option_monad_scope.
+
+Require Export Impure.ImpHCons.
+Import HConsing.
+
+Import ListNotations.
+Local Open Scope list_scope.
+
+(** * Auxilary notions on simulation tests *)
+
+Definition silocal_simu (dm: PTree.t node) (f: RTLpath.function) outframe (sl1 sl2: sistate_local) (ctx: simu_proof_context f): Prop :=
+    forall is1, ssem_local (the_ge1 ctx) (the_sp ctx) sl1 (the_rs0 ctx) (the_m0 ctx) (irs is1) (imem is1) ->
+    exists is2, ssem_local (the_ge2 ctx) (the_sp ctx) sl2 (the_rs0 ctx) (the_m0 ctx) (irs is2) (imem is2)
+                /\ istate_simu f dm outframe is1 is2.
+
+(* a kind of negation of sabort_local *)
+Definition sok_local (ge: RTL.genv) (sp:val) (rs0: regset) (m0: mem) (st: sistate_local): Prop :=
+  (st.(si_pre) ge sp rs0 m0)
+  /\ seval_smem ge sp st.(si_smem) rs0 m0 <> None
+  /\ forall (r: reg), seval_sval ge sp (si_sreg st r) rs0 m0 <> None.
+
+Lemma ssem_local_sok ge sp rs0 m0 st rs m:
+  ssem_local ge sp st rs0 m0 rs m -> sok_local ge sp rs0 m0 st.
+Proof.
+  unfold sok_local, ssem_local. 
+  intuition congruence.
+Qed.
+
+Definition siexit_simu (dm: PTree.t node) (f: RTLpath.function) outframe (ctx: simu_proof_context f) (se1 se2: sistate_exit) :=
+  (sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal se1) ->
+          (seval_condition (the_ge1 ctx) (the_sp ctx) (si_cond se1) (si_scondargs se1) 
+                             (si_smem (si_elocal se1)) (the_rs0 ctx) (the_m0 ctx)) =
+          (seval_condition (the_ge2 ctx) (the_sp ctx) (si_cond se2) (si_scondargs se2)
+                             (si_smem (si_elocal se2)) (the_rs0 ctx) (the_m0 ctx)))
+  /\ forall is1,
+      icontinue is1 = false ->
+      ssem_exit (the_ge1 ctx) (the_sp ctx) se1 (the_rs0 ctx) (the_m0 ctx) (irs is1) (imem is1) (ipc is1) ->
+      exists is2,
+          ssem_exit (the_ge2 ctx) (the_sp ctx) se2 (the_rs0 ctx) (the_m0 ctx) (irs is2) (imem is2) (ipc is2)
+      /\  istate_simu f dm outframe is1 is2.
+
+Definition siexits_simu (dm: PTree.t node) (f: RTLpath.function) outframe (lse1 lse2: list sistate_exit) (ctx: simu_proof_context f) :=
+  list_forall2 (siexit_simu dm f outframe ctx) lse1 lse2.
+
+
+(** * Implementation of Data-structure use in Hash-consing *)
+
+(** ** Implementation of symbolic values/symbolic memories with hash-consing data *)
+
+Inductive hsval :=
+  | HSinput (r: reg) (hid: hashcode)
+  | HSop (op: operation) (lhsv: list_hsval) (hid: hashcode) (** NB: does not depend on the memory ! *)
+  | HSload (hsm: hsmem) (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval) (hid: hashcode)
+with list_hsval :=
+  | HSnil (hid: hashcode)
+  | HScons (hsv: hsval) (lhsv: list_hsval) (hid: hashcode)
+with hsmem :=
+  | HSinit (hid: hashcode)
+  | HSstore (hsm: hsmem) (chunk: memory_chunk) (addr: addressing) (lhsv: list_hsval) (srce: hsval) (hid:hashcode).
+
+Scheme hsval_mut := Induction for hsval Sort Prop
+with list_hsval_mut := Induction for list_hsval Sort Prop
+with hsmem_mut := Induction for hsmem Sort Prop.
+
+
+
+(** Symbolic final value -- from hash-consed values
+  It does not seem useful to hash-consed these final values (because they are final).
+*)
+Inductive hsfval :=
+  | HSnone
+  | HScall (sig: signature) (svos: hsval + ident) (lsv: list_hsval) (res: reg) (pc: node)
+  | HStailcall (sig: signature) (svos: hsval + ident) (lsv: list_hsval)
+  | HSbuiltin (ef: external_function) (sargs: list (builtin_arg hsval)) (res: builtin_res reg) (pc: node)
+  | HSjumptable (sv: hsval) (tbl: list node)
+  | HSreturn (res: option hsval)
+.
+
+(** * gives the semantics of hash-consed symbolic values *)
+Fixpoint hsval_proj hsv :=
+  match hsv with
+  | HSinput r _ => Sinput r
+  | HSop op hl _ => Sop op (hsval_list_proj hl) Sinit (** NB: use the initial memory of the path ! *)
+  | HSload hm t chk addr hl _ => Sload (hsmem_proj hm) t chk addr (hsval_list_proj hl)
+  end
+with hsval_list_proj hl :=
+  match hl with
+  | HSnil _ => Snil
+  | HScons hv hl _ => Scons (hsval_proj hv) (hsval_list_proj hl)
+  end
+with hsmem_proj hm :=
+  match hm with
+  | HSinit _ => Sinit
+  | HSstore hm chk addr hl hv _ => Sstore (hsmem_proj hm) chk addr (hsval_list_proj hl) (hsval_proj hv)
+  end.
+
+Declare Scope hse.
+Local Open Scope hse.
+
+
+(** We use a Notation instead a Definition, in order to get more automation "for free" *)
+Notation "'seval_hsval' ge sp hsv" := (seval_sval ge sp (hsval_proj hsv))
+  (only parsing, at level 0, ge at next level, sp at next level, hsv at next level): hse.
+Notation "'seval_list_hsval' ge sp lhv" := (seval_list_sval ge sp (hsval_list_proj lhv))
+  (only parsing, at level 0, ge at next level, sp at next level, lhv at next level): hse.
+Notation "'seval_hsmem' ge sp hsm" := (seval_smem ge sp (hsmem_proj hsm))
+  (only parsing, at level 0, ge at next level, sp at next level, hsm at next level): hse.
+
+Notation "'sval_refines' ge sp rs0 m0 hv sv" := (seval_hsval ge sp hv rs0 m0 = seval_sval ge sp sv rs0 m0)
+  (only parsing, at level 0, ge at next level, sp at next level, rs0 at next level, m0 at next level, hv at next level, sv at next level): hse.
+Notation "'list_sval_refines' ge sp rs0 m0 lhv lsv" := (seval_list_hsval ge sp lhv rs0 m0 = seval_list_sval ge sp lsv rs0 m0)
+  (only parsing, at level 0, ge at next level, sp at next level, rs0 at next level, m0 at next level, lhv at next level, lsv at next level): hse.
+Notation "'smem_refines' ge sp rs0 m0 hm sm" := (seval_hsmem ge sp hm rs0 m0 = seval_smem ge sp sm rs0 m0)
+  (only parsing, at level 0, ge at next level, sp at next level, rs0 at next level, m0 at next level, hm at next level, sm at next level): hse.
+
+
+(** ** Implementation of symbolic states (with hash-consing) *)
+
+(** *** Syntax and semantics of symbolic internal local states 
+
+The semantics is given by the refinement relation [hsilocal_refines] wrt to (abstract) symbolic internal local states
+
+*)
+
+(* NB: "h" stands for hash-consing *)
+Record hsistate_local := 
+  { 
+    (** [hsi_smem] represents the current smem symbolic evaluations.
+        (we also recover the history of smem in hsi_smem)  *)
+    hsi_smem:> hsmem;
+    (** For the values in registers:
+        1) we store a list of sval evaluations
+        2) we encode the symbolic regset by a PTree *)
+    hsi_ok_lsval: list hsval;
+    hsi_sreg:> PTree.t hsval
+  }.
+
+Definition hsi_sreg_proj (hst: PTree.t hsval) r: sval :=
+   match PTree.get r hst with
+   | None => Sinput r
+   | Some hsv => hsval_proj hsv
+   end.
+
+Definition hsi_sreg_eval ge sp hst r := seval_sval ge sp (hsi_sreg_proj hst r).
+
+Definition hsok_local ge sp rs0 m0 (hst: hsistate_local) : Prop :=
+     (forall hsv, List.In hsv (hsi_ok_lsval hst) -> seval_hsval ge sp hsv rs0 m0 <> None)
+  /\ (seval_hsmem ge sp (hst.(hsi_smem)) rs0 m0 <> None).
+
+(* refinement link between a (st: sistate_local) and (hst: hsistate_local) *)
+Definition hsilocal_refines ge sp rs0 m0 (hst: hsistate_local) (st: sistate_local) :=
+      (sok_local ge sp rs0 m0 st <-> hsok_local ge sp rs0 m0 hst)
+  /\  (hsok_local ge sp rs0 m0 hst -> smem_refines ge sp rs0 m0 (hsi_smem hst) (st.(si_smem)))
+  /\  (hsok_local ge sp rs0 m0 hst -> forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (si_sreg st r) rs0 m0)
+  /\  (* the below invariant allows to evaluate operations in the initial memory of the path instead of the current memory *)
+      (forall m b ofs, seval_smem ge sp st.(si_smem) rs0 m0 = Some m -> Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  .
+
+(** *** Syntax and semantics of symbolic exit states *)
+Record hsistate_exit := mk_hsistate_exit
+  { hsi_cond: condition; hsi_scondargs: list_hsval; hsi_elocal: hsistate_local; hsi_ifso: node }.
+
+(** NB: we split the refinement relation between a "static" part -- independendent of the initial context
+   and a "dynamic" part -- that depends on it
+*)
+Definition hsiexit_refines_stat (hext: hsistate_exit) (ext: sistate_exit): Prop :=
+  hsi_ifso hext = si_ifso ext.
+
+Definition hseval_condition ge sp cond hcondargs hmem rs0 m0 :=
+  seval_condition ge sp cond (hsval_list_proj hcondargs) (hsmem_proj hmem) rs0 m0.
+
+Lemma hseval_condition_preserved ge ge' sp cond args mem rs0 m0:
+  (forall s : ident, Genv.find_symbol ge' s = Genv.find_symbol ge s) ->
+  hseval_condition ge sp cond args mem rs0 m0 = hseval_condition ge' sp cond args mem rs0 m0.
+Proof.
+  intros. unfold hseval_condition. erewrite seval_condition_preserved; [|eapply H].
+  reflexivity.
+Qed.
+
+Definition hsiexit_refines_dyn ge sp rs0 m0 (hext: hsistate_exit) (ext: sistate_exit): Prop :=
+   hsilocal_refines ge sp rs0 m0 (hsi_elocal hext) (si_elocal ext)
+   /\ (hsok_local ge sp rs0 m0 (hsi_elocal hext) ->
+        hseval_condition ge sp (hsi_cond hext) (hsi_scondargs hext) (hsi_smem (hsi_elocal hext)) rs0 m0
+         = seval_condition ge sp (si_cond ext) (si_scondargs ext) (si_smem (si_elocal ext)) rs0 m0).
+
+Definition hsiexits_refines_stat lhse lse :=
+  list_forall2 hsiexit_refines_stat lhse lse.
+
+Definition hsiexits_refines_dyn ge sp rs0 m0 lhse se :=
+  list_forall2 (hsiexit_refines_dyn ge sp rs0 m0) lhse se.
+
+
+(** *** Syntax and Semantics of symbolic internal state *)
+
+Record hsistate := { hsi_pc: node; hsi_exits: list hsistate_exit; hsi_local: hsistate_local }.
+
+(* expresses the "monotony" of sok_local along sequences *)
+Inductive nested_sok ge sp rs0 m0: sistate_local -> list sistate_exit -> Prop :=
+    nsok_nil st: nested_sok ge sp rs0 m0 st nil
+  | nsok_cons st se lse:
+     (sok_local ge sp rs0 m0 st -> sok_local ge sp rs0 m0 (si_elocal se)) ->
+     nested_sok ge sp rs0 m0 (si_elocal se) lse ->
+     nested_sok ge sp rs0 m0 st (se::lse).
+
+Lemma nested_sok_prop ge sp st sle rs0 m0:
+  nested_sok ge sp rs0 m0 st sle ->
+  sok_local ge sp rs0 m0 st ->
+  forall se, In se sle -> sok_local ge sp rs0 m0 (si_elocal se).
+Proof.
+  induction 1; simpl; intuition (subst; eauto).
+Qed.
+
+Lemma nested_sok_elocal ge sp rs0 m0 st2 exits:
+  nested_sok ge sp rs0 m0 st2 exits ->
+  forall st1, (sok_local ge sp rs0 m0 st1 -> sok_local ge sp rs0 m0 st2) ->
+  nested_sok ge sp rs0 m0 st1 exits.
+Proof.
+  induction 1; [intros; constructor|].
+  intros. constructor; auto.
+Qed.
+
+Lemma nested_sok_tail ge sp rs0 m0 st lx exits:
+  is_tail lx exits ->
+  nested_sok ge sp rs0 m0 st exits ->
+  nested_sok ge sp rs0 m0 st lx.
+Proof.
+  induction 1; [auto|].
+  intros. inv H0. eapply IHis_tail. eapply nested_sok_elocal; eauto.
+Qed.
+
+Definition hsistate_refines_stat (hst: hsistate) (st:sistate): Prop :=
+  hsi_pc hst = si_pc st
+  /\ hsiexits_refines_stat (hsi_exits hst) (si_exits st).
+
+Definition hsistate_refines_dyn ge sp rs0 m0 (hst: hsistate) (st:sistate): Prop :=
+     hsiexits_refines_dyn ge sp rs0 m0 (hsi_exits hst) (si_exits st)
+  /\ hsilocal_refines ge sp rs0 m0 (hsi_local hst) (si_local st)
+  /\ nested_sok ge sp rs0 m0 (si_local st) (si_exits st) (* invariant necessary to prove "monotony" of sok_local along execution *)
+  .
+
+(** *** Syntax and Semantics of symbolic state *)
+
+Definition hfinal_proj (hfv: hsfval) : sfval := 
+  match hfv with
+  | HSnone => Snone
+  | HScall s hvi hlv r pc => Scall s (sum_left_map hsval_proj hvi) (hsval_list_proj hlv) r pc
+  | HStailcall s hvi hlv => Stailcall s (sum_left_map hsval_proj hvi) (hsval_list_proj hlv)
+  | HSbuiltin ef lbh br pc => Sbuiltin ef (List.map (builtin_arg_map hsval_proj) lbh) br pc
+  | HSjumptable hv ln => Sjumptable (hsval_proj hv) ln
+  | HSreturn oh => Sreturn (option_map hsval_proj oh)
+  end.
+
+Section HFINAL_REFINES.
+
+Variable ge: RTL.genv.
+Variable sp: val.
+Variable rs0: regset.
+Variable m0: mem.
+
+Definition option_refines (ohsv: option hsval) (osv: option sval) :=
+  match ohsv, osv with
+  | Some hsv, Some sv => sval_refines ge sp rs0 m0 hsv sv
+  | None, None => True
+  | _, _ => False
+  end.
+
+Definition sum_refines (hsi: hsval + ident) (si: sval + ident) :=
+  match hsi, si with
+  | inl hv, inl sv => sval_refines ge sp rs0 m0 hv sv
+  | inr id, inr id' => id = id'
+  | _, _ => False
+  end.
+
+Definition bargs_refines (hargs: list (builtin_arg hsval)) (args: list (builtin_arg sval)): Prop :=
+  seval_list_builtin_sval ge sp (List.map (builtin_arg_map hsval_proj) hargs) rs0 m0 = seval_list_builtin_sval ge sp args rs0 m0.
+
+Inductive hfinal_refines: hsfval -> sfval -> Prop :=
+  | hsnone_ref: hfinal_refines HSnone Snone
+  | hscall_ref: forall hros ros hargs args s r pc,
+      sum_refines hros ros ->
+      list_sval_refines ge sp rs0 m0 hargs args ->
+      hfinal_refines (HScall s hros hargs r pc) (Scall s ros args r pc)
+  | hstailcall_ref: forall hros ros hargs args s,
+      sum_refines hros ros ->
+      list_sval_refines ge sp rs0 m0 hargs args ->
+      hfinal_refines (HStailcall s hros hargs) (Stailcall s ros args)
+  | hsbuiltin_ref: forall ef lbha lba br pc,
+      bargs_refines lbha lba ->
+      hfinal_refines (HSbuiltin ef lbha br pc) (Sbuiltin ef lba br pc)
+  | hsjumptable_ref: forall hsv sv lpc,
+      sval_refines ge sp rs0 m0 hsv sv -> hfinal_refines (HSjumptable hsv lpc) (Sjumptable sv lpc)
+  | hsreturn_ref: forall ohsv osv,
+      option_refines ohsv osv -> hfinal_refines (HSreturn ohsv) (Sreturn osv).
+
+End HFINAL_REFINES.
+
+(* TODO gourdinl Leave this here ? *)
+Section FAKE_HSVAL.
+(* BEGIN "fake" hsval without real hash-consing *)
+(* TODO: 
+  2) reuse these definitions in hSinput, hSop, etc 
+     in order to factorize proofs ?
+*)
+
+Definition fSinput (r: reg): hsval :=
+  HSinput r unknown_hid.
+
+Lemma fSinput_correct r ge sp rs0 m0: (* useless trivial lemma ? *)
+  sval_refines ge sp rs0 m0 (fSinput r) (Sinput r).
+Proof.
+  auto.
+Qed.
+
+Definition fSop (op:operation) (lhsv: list_hsval): hsval :=
+   HSop op lhsv unknown_hid.
+
+Lemma fSop_correct op lhsv ge sp rs0 m0 lsv sm m: forall
+   (MEM: seval_smem ge sp sm rs0 m0 = Some m)
+   (MVALID: forall b ofs, Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+   (LR: list_sval_refines ge sp rs0 m0 lhsv lsv),
+   sval_refines ge sp rs0 m0 (fSop op lhsv) (Sop op lsv sm).
+Proof.
+  intros; simpl. rewrite <- LR, MEM.
+  destruct (seval_list_sval _ _ _ _); try congruence.
+  eapply op_valid_pointer_eq; eauto.
+Qed.
+
+Definition fsi_sreg_get (hst: PTree.t hsval) r: hsval :=
+   match PTree.get r hst with 
+   | None => fSinput r
+   | Some sv => sv
+   end.
+
+Lemma fsi_sreg_get_correct hst r ge sp rs0 m0 (f: reg -> sval): forall
+    (RR: forall r, hsi_sreg_eval ge sp hst r rs0 m0 = seval_sval ge sp (f r) rs0 m0),
+    sval_refines ge sp rs0 m0 (fsi_sreg_get hst r) (f r).
+Proof.
+   unfold hsi_sreg_eval, hsi_sreg_proj, fsi_sreg_get; intros; simpl.
+   rewrite <- RR. destruct (hst ! r); simpl; auto.
+Qed.
+
+Definition fSnil: list_hsval :=
+   HSnil unknown_hid.
+
+(* TODO: Lemma fSnil_correct *)
+
+Definition fScons (hsv: hsval) (lhsv: list_hsval): list_hsval :=
+   HScons hsv lhsv unknown_hid.
+
+(* TODO: Lemma fScons_correct *)
+
+(* END "fake" hsval ... *)
+
+End FAKE_HSVAL.
+
+
+Record hsstate := { hinternal:> hsistate; hfinal: hsfval }.
+
+Definition hsstate_refines (hst: hsstate) (st:sstate): Prop :=
+   hsistate_refines_stat (hinternal hst) (internal st)
+  /\ (forall ge sp rs0 m0, hsistate_refines_dyn ge sp rs0 m0 (hinternal hst) (internal st))
+  /\ (forall ge sp rs0 m0, hsok_local ge sp rs0 m0 (hsi_local (hinternal hst)) -> hfinal_refines ge sp rs0 m0 (hfinal hst) (final st))
+  .
+
+(** * Intermediate specifications of the simulation tests *)
+
+(** ** Specification of the simulation test on [hsistate_local].
+       It is motivated by [hsilocal_simu_spec_correct theorem] below
+*)
+Definition hsilocal_simu_spec (alive: Regset.t) (hst1 hst2: hsistate_local) :=
+     List.incl (hsi_ok_lsval hst2) (hsi_ok_lsval hst1)
+  /\ (forall r, Regset.In r alive -> PTree.get r hst2 = PTree.get r hst1)
+  /\ hsi_smem hst1 = hsi_smem hst2.
+
+Definition seval_sval_partial ge sp rs0 m0 hsv :=
+  match seval_hsval ge sp hsv rs0 m0 with
+  | Some v => v
+  | None => Vundef
+  end.
+
+Definition select_first (ox oy: option val) :=
+  match ox with
+  | Some v => Some v
+  | None => oy
+  end.
+
+(** If the register was computed by hrs, evaluate the symbolic value from hrs.
+    Else, take the value directly from rs0 *)
+Definition seval_partial_regset ge sp rs0 m0 hrs :=
+  let hrs_eval := PTree.map1 (seval_sval_partial ge sp rs0 m0) hrs in
+  (fst rs0, PTree.combine select_first hrs_eval (snd rs0)).
+
+Lemma seval_partial_regset_get ge sp rs0 m0 hrs r:
+  (seval_partial_regset ge sp rs0 m0 hrs) # r =
+  match (hrs ! r) with Some sv => seval_sval_partial ge sp rs0 m0 sv | None => (rs0 # r) end.
+Proof.
+  unfold seval_partial_regset. unfold Regmap.get. simpl.
+  rewrite PTree.gcombine; [| simpl; reflexivity]. rewrite PTree.gmap1.
+  destruct (hrs ! r); simpl; [reflexivity|].
+  destruct ((snd rs0) ! r); reflexivity.
+Qed.
+
+Lemma ssem_local_refines_hok ge sp rs0 m0 hst st rs m:
+  ssem_local ge sp st rs0 m0 rs m -> hsilocal_refines ge sp rs0 m0 hst st -> hsok_local ge sp rs0 m0 hst.
+Proof.
+  intros H0 (H1 & _ & _). apply H1. eapply ssem_local_sok. eauto.
+Qed.
+
+Lemma hsilocal_simu_spec_nofail ge1 ge2 of sp rs0 m0 hst1 hst2:
+  hsilocal_simu_spec of hst1 hst2 ->
+  (forall s, Genv.find_symbol ge1 s = Genv.find_symbol ge2 s) ->
+  hsok_local ge1 sp rs0 m0 hst1 ->
+  hsok_local ge2 sp rs0 m0 hst2.
+Proof.
+  intros (RSOK & _ & MEMOK) GFS (OKV & OKM). constructor.
+  - intros sv INS. apply RSOK in INS. apply OKV in INS. erewrite seval_preserved; eauto.
+  - erewrite MEMOK in OKM. erewrite smem_eval_preserved; eauto.
+Qed.
+
+Theorem hsilocal_simu_spec_correct hst1 hst2 alive ge1 ge2 sp rs0 m0 rs m st1 st2:
+  hsilocal_simu_spec alive hst1 hst2 ->
+  hsilocal_refines ge1 sp rs0 m0 hst1 st1 ->
+  hsilocal_refines ge2 sp rs0 m0 hst2 st2 ->
+  (forall s, Genv.find_symbol ge1 s = Genv.find_symbol ge2 s) ->
+  ssem_local ge1 sp st1 rs0 m0 rs m ->
+  let rs' := seval_partial_regset ge2 sp rs0 m0 (hsi_sreg hst2)
+  in ssem_local ge2 sp st2 rs0 m0 rs' m /\ eqlive_reg (fun r => Regset.In r alive) rs rs'.
+Proof.
+  intros CORE HREF1 HREF2 GFS SEML.
+  refine (modusponens _ _ (ssem_local_refines_hok _ _ _ _ _ _ _ _ _ _) _); eauto.
+  intro HOK1.
+  refine (modusponens _ _ (hsilocal_simu_spec_nofail _ _ _ _ _ _ _ _ _ _ _) _); eauto.
+  intro HOK2.
+  destruct SEML as (PRE & MEMEQ & RSEQ).
+  assert (SIPRE: si_pre st2 ge2 sp rs0 m0). { destruct HREF2 as (OKEQ & _ & _). rewrite <- OKEQ in HOK2. apply HOK2. }
+  assert (SMEMEVAL: seval_smem ge2 sp (si_smem st2) rs0 m0 = Some m). {
+    destruct HREF2 as (_ & MEMEQ2 & _). destruct HREF1 as (_ & MEMEQ1 & _).
+    destruct CORE as (_ & _ & MEMEQ3).
+    rewrite <- MEMEQ2; auto. rewrite <- MEMEQ3.
+    erewrite smem_eval_preserved; [| eapply GFS].
+    rewrite MEMEQ1; auto. }
+   constructor.
+   + constructor; [assumption | constructor; [assumption|]].
+      destruct HREF2 as (B & _ & A & _).
+      (** B is used for the auto below. *)
+      assert (forall r : positive, hsi_sreg_eval ge2 sp hst2 r rs0 m0 = seval_sval ge2 sp (si_sreg st2 r) rs0 m0) by auto.
+      intro r. rewrite <- H. clear H. 
+      generalize (A HOK2 r). unfold hsi_sreg_eval.
+      rewrite seval_partial_regset_get.
+      unfold hsi_sreg_proj.
+      destruct (hst2 ! r) eqn:HST2; [| simpl; reflexivity].
+      unfold seval_sval_partial. generalize HOK2; rewrite <- B; intros (_ & _ & C) D.
+      assert (seval_sval ge2 sp (hsval_proj h) rs0 m0 <> None) by congruence.
+      destruct (seval_sval ge2 sp _ rs0 m0); [reflexivity | contradiction].
+    + intros r ALIVE. destruct HREF2 as (_ & _ & A & _). destruct HREF1 as (_ & _ & B & _).
+      destruct CORE as (_ & C & _). rewrite seval_partial_regset_get.
+      assert (OPT: forall (x y: val), Some x = Some y -> x = y) by congruence.
+      destruct (hst2 ! r) eqn:HST2; apply OPT; clear OPT.
+      ++ unfold seval_sval_partial.
+         assert (seval_sval ge2 sp (hsval_proj h) rs0 m0 = hsi_sreg_eval ge2 sp hst2 r rs0 m0). {
+           unfold hsi_sreg_eval, hsi_sreg_proj. rewrite HST2. reflexivity. }
+         rewrite H. clear H. unfold hsi_sreg_eval, hsi_sreg_proj. rewrite C; [|assumption].
+         erewrite seval_preserved; [| eapply GFS].
+         unfold hsi_sreg_eval, hsi_sreg_proj in B; rewrite B; [|assumption]. rewrite RSEQ. reflexivity.
+      ++ rewrite <- RSEQ. rewrite <- B; [|assumption]. unfold hsi_sreg_eval, hsi_sreg_proj.
+         rewrite <- C; [|assumption]. rewrite HST2. reflexivity.
+Qed.
+
+(** ** Specification of the simulation test on [hsistate_exit].
+       It is motivated by [hsiexit_simu_spec_correct theorem] below
+*)
+Definition hsiexit_simu_spec dm f (hse1 hse2: hsistate_exit) :=
+  (exists path, (fn_path f) ! (hsi_ifso hse1) = Some path
+    /\ hsilocal_simu_spec path.(input_regs) (hsi_elocal hse1) (hsi_elocal hse2))
+  /\ dm ! (hsi_ifso hse2) = Some (hsi_ifso hse1)
+  /\ hsi_cond hse1 = hsi_cond hse2
+  /\ hsi_scondargs hse1 = hsi_scondargs hse2.
+
+Definition hsiexit_simu dm f outframe (ctx: simu_proof_context f) hse1 hse2: Prop := forall se1 se2,
+  hsiexit_refines_stat hse1 se1 ->
+  hsiexit_refines_stat hse2 se2 ->
+  hsiexit_refines_dyn (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hse1 se1 ->
+  hsiexit_refines_dyn (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hse2 se2 ->
+  siexit_simu dm f outframe ctx se1 se2.
+
+Lemma hsiexit_simu_spec_nofail dm f hse1 hse2 ge1 ge2 sp rs m:
+  hsiexit_simu_spec dm f hse1 hse2 ->
+  (forall s, Genv.find_symbol ge1 s = Genv.find_symbol ge2 s) ->
+  hsok_local ge1 sp rs m (hsi_elocal hse1) ->
+  hsok_local ge2 sp rs m (hsi_elocal hse2).
+Proof.
+  intros CORE GFS HOK1.
+  destruct CORE as ((p & _ & CORE') & _ & _ & _).
+  eapply hsilocal_simu_spec_nofail; eauto.
+Qed.
+
+Theorem hsiexit_simu_spec_correct dm f outframe hse1 hse2 ctx:
+  hsiexit_simu_spec dm f hse1 hse2 ->
+  hsiexit_simu dm f outframe ctx hse1 hse2.
+Proof.
+  intros SIMUC st1 st2 HREF1 HREF2 HDYN1 HDYN2.
+  assert (SEVALC:
+   sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal st1) ->
+    (seval_condition (the_ge1 ctx) (the_sp ctx) (si_cond st1) (si_scondargs st1) (si_smem (si_elocal st1)) 
+      (the_rs0 ctx) (the_m0 ctx)) =
+    (seval_condition (the_ge2 ctx) (the_sp ctx) (si_cond st2) (si_scondargs st2) (si_smem (si_elocal st2)) 
+      (the_rs0 ctx) (the_m0 ctx))).
+  { destruct HDYN1 as ((OKEQ1 & _) & SCOND1).
+    rewrite OKEQ1; intro OK1. rewrite <- SCOND1 by assumption. clear SCOND1.
+    generalize (genv_match ctx).
+    intro GFS; exploit hsiexit_simu_spec_nofail; eauto.
+    destruct HDYN2 as (_ & SCOND2). intro OK2. rewrite <- SCOND2 by assumption. clear OK1 OK2 SCOND2.
+    destruct SIMUC as ((path & _ & LSIMU) & _ & CONDEQ & ARGSEQ). destruct LSIMU as (_ & _ & MEMEQ).
+    rewrite CONDEQ. rewrite ARGSEQ. rewrite MEMEQ. erewrite <- hseval_condition_preserved; eauto.
+  }
+  constructor; [assumption|]. intros is1 ICONT SSEME.
+  assert (OK1: sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal st1)). {
+    destruct SSEME as (_ & SSEML & _). eapply ssem_local_sok; eauto. }
+  assert (HOK1: hsok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (hsi_elocal hse1)). {
+    destruct HDYN1 as (LREF & _). destruct LREF as (OKEQ & _ & _). rewrite <- OKEQ. assumption. }
+  exploit hsiexit_simu_spec_nofail. 2: eapply ctx. all: eauto. intro HOK2.
+  destruct SSEME as (SCOND & SLOC & PCEQ). destruct SIMUC as ((path & PATH & LSIMU) & REVEQ & _ & _); eauto.
+  destruct HDYN1 as (LREF1 & _). destruct HDYN2 as (LREF2 & _).
+  exploit hsilocal_simu_spec_correct; eauto; [apply ctx|]. simpl.
+  intros (SSEML & EQREG).
+  eexists (mk_istate (icontinue is1) (si_ifso st2) _ (imem is1)). simpl. constructor.
+  - constructor; intuition congruence || eauto.
+  - unfold istate_simu. rewrite ICONT.
+    simpl. assert (PCEQ': hsi_ifso hse1 = ipc is1) by congruence.
+    exists path. constructor; [|constructor]; [congruence| |congruence].
+    constructor; [|constructor]; simpl; auto.
+Qed.
+
+Remark hsiexit_simu_siexit dm f outframe ctx hse1 hse2 se1 se2:
+  hsiexit_simu dm f outframe ctx hse1 hse2 ->
+  hsiexit_refines_stat hse1 se1 ->
+  hsiexit_refines_stat hse2 se2 ->
+  hsiexit_refines_dyn (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hse1 se1 ->
+  hsiexit_refines_dyn (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hse2 se2 ->
+  siexit_simu dm f outframe ctx se1 se2.
+Proof.
+  auto.
+Qed.
+
+(** ** Specification of the simulation test on [list hsistate_exit].
+       It is motivated by [hsiexit_simu_spec_correct theorem] below
+*)
+
+Definition hsiexits_simu dm f outframe (ctx: simu_proof_context f) (lhse1 lhse2: list hsistate_exit): Prop :=
+  list_forall2 (hsiexit_simu dm f outframe ctx) lhse1 lhse2.
+
+Definition hsiexits_simu_spec dm f lhse1 lhse2: Prop :=
+  list_forall2 (hsiexit_simu_spec dm f) lhse1 lhse2.
+
+Theorem hsiexits_simu_spec_correct dm f outframe lhse1 lhse2 ctx:
+  hsiexits_simu_spec dm f lhse1 lhse2 ->
+  hsiexits_simu dm f outframe ctx lhse1 lhse2.
+Proof.
+  induction 1; [constructor|].
+  constructor; [|apply IHlist_forall2; assumption].
+  apply hsiexit_simu_spec_correct; assumption.
+Qed.
+
+
+Lemma siexits_simu_all_fallthrough dm f outframe ctx: forall lse1 lse2,
+  siexits_simu dm f outframe lse1 lse2 ctx ->
+  all_fallthrough (the_ge1 ctx) (the_sp ctx) lse1 (the_rs0 ctx) (the_m0 ctx) ->
+  (forall se1, In se1 lse1 -> sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal se1)) ->
+  all_fallthrough (the_ge2 ctx) (the_sp ctx) lse2 (the_rs0 ctx) (the_m0 ctx).
+Proof.
+  induction 1; [unfold all_fallthrough; contradiction|]; simpl.
+  intros X OK ext INEXT. eapply all_fallthrough_revcons in X. destruct X as (SEVAL & ALLFU).
+  apply IHlist_forall2 in ALLFU.
+  - destruct H as (CONDSIMU & _).
+    inv INEXT; [|eauto].
+    erewrite <- CONDSIMU; eauto.
+  - intros; intuition.
+Qed.
+
+
+Lemma siexits_simu_all_fallthrough_upto dm f outframe ctx lse1 lse2:
+  siexits_simu dm f outframe lse1 lse2 ctx ->
+  forall ext1 lx1,
+  (forall se1, In se1 lx1 -> sok_local (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) (si_elocal se1)) ->
+  all_fallthrough_upto_exit (the_ge1 ctx) (the_sp ctx) ext1 lx1 lse1 (the_rs0 ctx) (the_m0 ctx) ->
+  exists ext2 lx2,
+    all_fallthrough_upto_exit (the_ge2 ctx) (the_sp ctx) ext2 lx2 lse2 (the_rs0 ctx) (the_m0 ctx)
+  /\ length lx1 = length lx2.
+Proof.
+  induction 1.
+  - intros ext lx1. intros OK H. destruct H as (ITAIL & ALLFU). eapply is_tail_false in ITAIL. contradiction.
+  - simpl; intros ext lx1 OK ALLFUE.
+    destruct ALLFUE as (ITAIL & ALLFU). inv ITAIL.
+    + eexists; eexists.
+      constructor; [| eapply list_forall2_length; eauto].
+      constructor; [econstructor | eapply siexits_simu_all_fallthrough; eauto].
+    + exploit IHlist_forall2.
+      * intuition. apply OK. eassumption.
+      * constructor; eauto.
+      * intros (ext2 & lx2 & ALLFUE2 & LENEQ).
+        eexists; eexists. constructor; eauto.
+        eapply all_fallthrough_upto_exit_cons; eauto.
+Qed.
+
+
+Lemma hsiexits_simu_siexits dm f outframe ctx lhse1 lhse2:
+  hsiexits_simu dm f outframe ctx lhse1 lhse2 ->
+  forall lse1 lse2,
+  hsiexits_refines_stat lhse1 lse1 ->
+  hsiexits_refines_stat lhse2 lse2 ->
+  hsiexits_refines_dyn (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) lhse1 lse1 ->
+  hsiexits_refines_dyn (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) lhse2 lse2 ->
+  siexits_simu dm f outframe lse1 lse2 ctx.
+Proof.
+  induction 1.
+  - intros. inv H. inv H0. constructor.
+  - intros lse1 lse2 SREF1 SREF2 DREF1 DREF2. inv SREF1. inv SREF2. inv DREF1. inv DREF2.
+    constructor; [| eapply IHlist_forall2; eauto].
+    eapply hsiexit_simu_siexit; eauto.
+Qed.
+
+
+(** ** Specification of the simulation test on [hsistate].
+       It is motivated by [hsistate_simu_spec_correct theorem] below
+*)
+
+Definition hsistate_simu_spec dm f outframe (hse1 hse2: hsistate) :=
+     list_forall2 (hsiexit_simu_spec dm f) (hsi_exits hse1) (hsi_exits hse2)
+  /\ hsilocal_simu_spec outframe (hsi_local hse1) (hsi_local hse2).
+
+Definition hsistate_simu dm f outframe (hst1 hst2: hsistate) (ctx: simu_proof_context f): Prop := forall st1 st2,
+  hsistate_refines_stat hst1 st1 ->
+  hsistate_refines_stat hst2 st2 ->
+  hsistate_refines_dyn (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hst1 st1 ->
+  hsistate_refines_dyn (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hst2 st2 ->
+  sistate_simu dm f outframe st1 st2 ctx.
+
+Lemma list_forall2_nth_error {A} (l1 l2: list A) P:
+  list_forall2 P l1 l2 ->
+  forall x1 x2 n,
+  nth_error l1 n = Some x1 ->
+  nth_error l2 n = Some x2 ->
+  P x1 x2.
+Proof.
+  induction 1.
+  - intros. rewrite nth_error_nil in H. discriminate.
+  - intros x1 x2 n. destruct n as [|n]; simpl.
+    + intros. inv H1. inv H2. assumption.
+    + apply IHlist_forall2.
+Qed.
+
+Lemma is_tail_length {A} (l1 l2: list A):
+  is_tail l1 l2 ->
+  (length l1 <= length l2)%nat.
+Proof.
+  induction l2.
+  - intro. destruct l1; auto. apply is_tail_false in H. contradiction.
+  - intros ITAIL. inv ITAIL; auto.
+    apply IHl2 in H1. clear IHl2. simpl. omega.
+Qed.
+
+Lemma is_tail_nth_error {A} (l1 l2: list A) x:
+  is_tail (x::l1) l2 ->
+  nth_error l2 ((length l2) - length l1 - 1) = Some x.
+Proof.
+  induction l2.
+  - intro ITAIL. apply is_tail_false in ITAIL. contradiction.
+  - intros ITAIL. assert (length (a::l2) = S (length l2)) by auto. rewrite H. clear H.
+    assert (forall n n', ((S n) - n' - 1)%nat = (n - n')%nat) by (intros; omega). rewrite H. clear H.
+    inv ITAIL.
+    + assert (forall n, (n - n)%nat = 0%nat) by (intro; omega). rewrite H.
+      simpl. reflexivity.
+    + exploit IHl2; eauto. intros. clear IHl2.
+      assert (forall n n', (n > n')%nat -> (n - n')%nat = S (n - n' - 1)%nat) by (intros; omega).
+      exploit (is_tail_length (x::l1)); eauto. intro. simpl in H2.
+      assert ((length l2 > length l1)%nat) by omega. clear H2.
+      rewrite H0; auto.
+Qed.
+
+Theorem hsistate_simu_spec_correct dm f outframe hst1 hst2 ctx:
+  hsistate_simu_spec dm f outframe hst1 hst2 ->
+  hsistate_simu dm f outframe hst1 hst2 ctx.
+Proof.
+  intros (ESIMU & LSIMU) st1 st2 (PCREF1 & EREF1) (PCREF2 & EREF2) DREF1 DREF2 is1 SEMI.
+  destruct DREF1 as (DEREF1 & LREF1 & NESTED). destruct DREF2 as (DEREF2 & LREF2 & _).
+  exploit hsiexits_simu_spec_correct; eauto. intro HESIMU.
+  unfold ssem_internal in SEMI. destruct (icontinue _) eqn:ICONT.
+  - destruct SEMI as (SSEML & PCEQ & ALLFU).
+    exploit hsilocal_simu_spec_correct; eauto; [apply ctx|]. simpl. intro SSEML2.
+    exists (mk_istate (icontinue is1) (si_pc st2) (seval_partial_regset (the_ge2 ctx) (the_sp ctx) 
+              (the_rs0 ctx) (the_m0 ctx) (hsi_local hst2)) (imem is1)). constructor.
+    + unfold ssem_internal. simpl. rewrite ICONT.
+      destruct SSEML2 as [SSEMLP EQLIVE].
+      constructor; [assumption | constructor; [reflexivity |]].
+      eapply siexits_simu_all_fallthrough; eauto.
+      * eapply hsiexits_simu_siexits; eauto.
+      * eapply nested_sok_prop; eauto.
+        eapply ssem_local_sok; eauto.
+    + unfold istate_simu. rewrite ICONT.
+      destruct SSEML2 as [SSEMLP EQLIVE].
+      constructor; simpl; auto.
+  - destruct SEMI as (ext & lx & SSEME & ALLFU).
+    assert (SESIMU: siexits_simu dm f outframe (si_exits st1) (si_exits st2) ctx) by (eapply hsiexits_simu_siexits; eauto).
+    exploit siexits_simu_all_fallthrough_upto; eauto.
+    * destruct ALLFU as (ITAIL & ALLF).
+      exploit nested_sok_tail; eauto. intros NESTED2.
+      inv NESTED2. destruct SSEME as (_ & SSEML & _). eapply ssem_local_sok in SSEML.
+      eapply nested_sok_prop; eauto.
+    * intros (ext2 & lx2 & ALLFU2 & LENEQ).
+      assert (EXTSIMU: siexit_simu dm f outframe ctx ext ext2). {
+        eapply list_forall2_nth_error; eauto.
+        - destruct ALLFU as (ITAIL & _). eapply is_tail_nth_error; eauto.
+        - destruct ALLFU2 as (ITAIL & _). eapply is_tail_nth_error in ITAIL.
+          assert (LENEQ': length (si_exits st1) = length (si_exits st2)) by (eapply list_forall2_length; eauto).
+          congruence. }
+      destruct EXTSIMU as (CONDEVAL & EXTSIMU).
+      apply EXTSIMU in SSEME; [|assumption]. clear EXTSIMU. destruct SSEME as (is2 & SSEME2 & ISIMU).
+      exists (mk_istate (icontinue is1) (ipc is2) (irs is2) (imem is2)). constructor.
+      + unfold ssem_internal. simpl. rewrite ICONT. exists ext2, lx2. constructor; assumption.
+      + unfold istate_simu in *. rewrite ICONT in *. destruct ISIMU as (path & PATHEQ & ISIMULIVE & DMEQ).
+        destruct ISIMULIVE as (CONTEQ & REGEQ & MEMEQ).
+        exists path. repeat (constructor; auto).
+Qed.
+
+
+(** ** Specification of the simulation test on [sfval].
+       It is motivated by [hfinal_simu_spec_correct theorem] below
+*)
+
+
+Definition final_simu_spec (dm: PTree.t node) (f: RTLpath.function) (pc1 pc2: node) (f1 f2: sfval): Prop :=
+  match f1 with
+  | Scall sig1 svos1 lsv1 res1 pc1 =>
+      match f2 with
+      | Scall sig2 svos2 lsv2 res2 pc2 =>
+          dm ! pc2 = Some pc1 /\ sig1 = sig2 /\ svos1 = svos2 /\ lsv1 = lsv2 /\ res1 = res2
+      | _ => False
+      end
+  | Sbuiltin ef1 lbs1 br1 pc1 =>
+      match f2 with
+      | Sbuiltin ef2 lbs2 br2 pc2 =>
+          dm ! pc2 = Some pc1 /\ ef1 = ef2 /\ lbs1 = lbs2 /\ br1 = br2
+      | _ => False
+      end
+  | Sjumptable sv1 lpc1 =>
+      match f2 with
+      | Sjumptable sv2 lpc2 =>
+          ptree_get_list dm lpc2 = Some lpc1 /\ sv1 = sv2
+      | _ => False
+      end
+  | Snone =>
+      match f2 with
+      | Snone => dm ! pc2 = Some pc1
+      | _ => False
+      end
+  (* Stailcall, Sreturn *)
+  | _ => f1 = f2
+  end.
+
+Definition hfinal_simu_spec (dm: PTree.t node) (f: RTLpath.function) (pc1 pc2: node) (hf1 hf2: hsfval): Prop :=
+  final_simu_spec dm f pc1 pc2 (hfinal_proj hf1) (hfinal_proj hf2).
+
+Lemma svident_simu_refl f ctx s:
+  svident_simu f ctx s s.
+Proof.
+  destruct s; constructor; [| reflexivity].
+  erewrite <- seval_preserved; [| eapply ctx]. constructor.
+Qed.
+
+Lemma list_proj_refines_eq ge ge' sp rs0 m0 lsv lhsv:
+  (forall s, Genv.find_symbol ge s = Genv.find_symbol ge' s) ->
+  list_sval_refines ge sp rs0 m0 lhsv lsv ->
+  forall lhsv' lsv',
+  list_sval_refines ge' sp rs0 m0 lhsv' lsv' ->
+  hsval_list_proj lhsv = hsval_list_proj lhsv' ->
+  seval_list_sval ge sp lsv rs0 m0 = seval_list_sval ge' sp lsv' rs0 m0.
+Proof.
+  intros GFS H lhsv' lsv' H' H0.
+  erewrite <- H, H0.
+  erewrite list_sval_eval_preserved; eauto.
+Qed.
+
+Lemma seval_builtin_sval_preserved ge ge' sp sv rs0 m0:
+   (forall s : ident, Genv.find_symbol ge' s = Genv.find_symbol ge s) ->
+   seval_builtin_sval ge sp sv rs0 m0 =
+   seval_builtin_sval ge' sp sv rs0 m0.
+Proof.
+  induction sv; intro FIND; cbn.
+  all: try (erewrite seval_preserved by eauto); trivial.
+  all: erewrite IHsv1 by eauto; erewrite IHsv2 by eauto; reflexivity.
+Qed.
+
+Lemma seval_list_builtin_sval_preserved ge ge' sp lsv rs0 m0:
+   (forall s : ident, Genv.find_symbol ge' s = Genv.find_symbol ge s) ->
+   seval_list_builtin_sval ge sp lsv rs0 m0 =
+   seval_list_builtin_sval ge' sp lsv rs0 m0.
+Proof.
+  induction lsv; intro FIND; cbn. { trivial. }
+  erewrite seval_builtin_sval_preserved by eauto.
+  erewrite IHlsv by eauto.
+  reflexivity.
+Qed.                              
+
+Lemma barg_proj_refines_eq ge ge' sp rs0 m0:
+  (forall s, Genv.find_symbol ge s = Genv.find_symbol ge' s) ->
+  forall lhsv lsv, bargs_refines ge sp rs0 m0 lhsv lsv ->
+  forall lhsv' lsv', bargs_refines ge' sp rs0 m0 lhsv' lsv' ->
+  List.map (builtin_arg_map hsval_proj) lhsv = List.map (builtin_arg_map hsval_proj) lhsv' ->
+  seval_list_builtin_sval ge sp lsv rs0 m0 = seval_list_builtin_sval ge' sp lsv' rs0 m0.
+Proof.
+  unfold bargs_refines; intros GFS lhsv lsv H lhsv' lsv' H' H0.
+  erewrite <- H, H0.
+  erewrite seval_list_builtin_sval_preserved; eauto.
+Qed.
+
+Lemma sval_refines_proj ge ge' sp rs m hsv sv hsv' sv':
+  (forall s, Genv.find_symbol ge s = Genv.find_symbol ge' s) ->
+  sval_refines ge sp rs m hsv sv ->
+  sval_refines ge' sp rs m hsv' sv' ->
+  hsval_proj hsv = hsval_proj hsv' ->
+  seval_sval ge sp sv rs m = seval_sval ge' sp sv' rs m.
+Proof.
+  intros GFS REF REF' PROJ.
+  rewrite <- REF, PROJ.
+  erewrite <- seval_preserved; eauto.
+Qed.
+
+Theorem hfinal_simu_spec_correct dm f ctx opc1 opc2 hf1 hf2 f1 f2:
+  hfinal_simu_spec dm f opc1 opc2 hf1 hf2 ->
+  hfinal_refines (the_ge1 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hf1 f1 ->
+  hfinal_refines (the_ge2 ctx) (the_sp ctx) (the_rs0 ctx) (the_m0 ctx) hf2 f2 ->
+  sfval_simu dm f opc1 opc2 ctx f1 f2.
+Proof.
+  assert (GFS: forall s : ident, Genv.find_symbol (the_ge1 ctx) s = Genv.find_symbol (the_ge2 ctx) s) by apply ctx.
+  intros CORE FREF1 FREF2.
+  destruct hf1; inv FREF1.
+  (* Snone *)
+  - destruct hf2; try contradiction. inv FREF2.
+    inv CORE. constructor. assumption.
+  (* Scall *)
+  - rename H5 into SREF1. rename H6 into LREF1.
+    destruct hf2; try contradiction. inv FREF2.
+    rename H5 into SREF2. rename H6 into LREF2.
+    destruct CORE as (PCEQ & ? & ? & ? & ?). subst.
+    rename H0 into SVOSEQ. rename H1 into LSVEQ.
+    constructor; [assumption | |].
+    + destruct svos.
+      * destruct svos0; try discriminate. destruct ros; try contradiction.
+        destruct ros0; try contradiction. constructor.
+        simpl in SVOSEQ. inv SVOSEQ.
+        simpl in SREF1. simpl in SREF2.
+        rewrite <- SREF1. rewrite <- SREF2.
+        erewrite <- seval_preserved; [| eapply GFS]. congruence.
+      * destruct svos0; try discriminate. destruct ros; try contradiction.
+        destruct ros0; try contradiction. constructor.
+        simpl in SVOSEQ. inv SVOSEQ. congruence.
+    + erewrite list_proj_refines_eq; eauto.
+  (* Stailcall *)
+  - rename H3 into SREF1. rename H4 into LREF1.
+    destruct hf2; try (inv CORE; fail). inv FREF2.
+    rename H4 into LREF2. rename H3 into SREF2.
+    inv CORE. rename H1 into SVOSEQ. rename H2 into LSVEQ.
+    constructor.
+    + destruct svos. (** Copy-paste from Scall *)
+      * destruct svos0; try discriminate. destruct ros; try contradiction.
+        destruct ros0; try contradiction. constructor.
+        simpl in SVOSEQ. inv SVOSEQ.
+        simpl in SREF1. simpl in SREF2.
+        rewrite <- SREF1. rewrite <- SREF2.
+        erewrite <- seval_preserved; [| eapply GFS]. congruence.
+      * destruct svos0; try discriminate. destruct ros; try contradiction.
+        destruct ros0; try contradiction. constructor.
+        simpl in SVOSEQ. inv SVOSEQ. congruence.
+    + erewrite list_proj_refines_eq; eauto.
+  (* Sbuiltin *)
+  - rename H4 into BREF1. destruct hf2; try (inv CORE; fail). inv FREF2.
+    rename H4 into BREF2. inv CORE. destruct H0 as (? & ? & ?). subst.
+    rename H into PCEQ. rename H1 into ARGSEQ. constructor; [assumption|].
+    erewrite barg_proj_refines_eq; eauto. constructor.
+  (* Sjumptable *)
+  - rename H2 into SREF1. destruct hf2; try contradiction. inv FREF2.
+    rename H2 into SREF2. destruct CORE as (A & B). constructor; [assumption|].
+    erewrite sval_refines_proj; eauto.
+  (* Sreturn *)
+  - rename H0 into SREF1.
+    destruct hf2; try discriminate. inv CORE.
+    inv FREF2. destruct osv; destruct res; inv SREF1.
+    + destruct res0; try discriminate. destruct osv0; inv H1.
+      constructor. simpl in H0. inv H0. erewrite sval_refines_proj; eauto.
+    + destruct res0; try discriminate. destruct osv0; inv H1. constructor.
+Qed.
+
+
+(** ** Specification of the simulation test on [hsstate].
+       It is motivated by [hsstate_simu_spec_correct theorem] below
+*)
+
+Definition hsstate_simu_spec (dm: PTree.t node) (f: RTLpath.function) outframe (hst1 hst2: hsstate) :=
+     hsistate_simu_spec dm f outframe (hinternal hst1) (hinternal hst2)
+  /\ hfinal_simu_spec dm f (hsi_pc (hinternal hst1)) (hsi_pc (hinternal hst2)) (hfinal hst1) (hfinal hst2).
+
+Definition hsstate_simu dm f outframe (hst1 hst2: hsstate) ctx: Prop :=
+  forall st1 st2,
+  hsstate_refines hst1 st1 ->
+  hsstate_refines hst2 st2 -> sstate_simu dm f outframe st1 st2 ctx.
+
+Theorem hsstate_simu_spec_correct dm f outframe ctx hst1 hst2:
+  hsstate_simu_spec dm f outframe hst1 hst2 ->
+  hsstate_simu dm f outframe hst1 hst2 ctx.
+Proof.
+  intros (SCORE & FSIMU) st1 st2 (SREF1 & DREF1 & FREF1) (SREF2 & DREF2 & FREF2).
+  generalize SCORE. intro SIMU; eapply hsistate_simu_spec_correct in SIMU; eauto.
+  constructor; auto.
+  intros is1 SEM1 CONT1.
+  unfold hsistate_simu in SIMU. exploit SIMU; clear SIMU; eauto.
+  unfold istate_simu, ssem_internal in *; intros (is2 & SEM2 & SIMU).
+  rewrite! CONT1 in *. destruct SIMU as (CONT2 & _).
+  rewrite! CONT1, <- CONT2 in *.
+  destruct SEM1 as (SEM1 & _ & _).
+  destruct SEM2 as (SEM2 & _ & _).
+  eapply hfinal_simu_spec_correct in FSIMU; eauto.
+  - destruct SREF1 as (PC1 & _). destruct SREF2 as (PC2 & _). rewrite <- PC1. rewrite <- PC2.
+    eapply FSIMU.
+  - eapply FREF1. exploit DREF1. intros (_ & (OK & _) & _). rewrite <- OK. eapply ssem_local_sok; eauto.
+  - eapply FREF2. exploit DREF2. intros (_ & (OK & _) & _). rewrite <- OK. eapply ssem_local_sok; eauto.
+Qed.
diff --git a/scheduling/RTLpathSE_theory.v b/scheduling/RTLpathSE_theory.v
new file mode 100644
index 00000000..aa8db342
--- /dev/null
+++ b/scheduling/RTLpathSE_theory.v
@@ -0,0 +1,1876 @@
+(* A theory of symbolic execution on RTLpath
+
+NB: an efficient implementation with hash-consing will be defined in RTLpathSE_impl.v
+
+*)
+
+Require Import Coqlib Maps Floats.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL RTLpath.
+Require Import Errors Duplicate.
+
+Local Open Scope error_monad_scope.
+
+(* Enhanced from kvx/Asmblockgenproof.v *)
+Ltac explore_hyp :=
+  repeat match goal with
+  | [ H : match ?var with | _ => _ end = _ |- _ ] => (let EQ1 := fresh "EQ" in (destruct var eqn:EQ1; try discriminate))
+  | [ H : OK _ = OK _ |- _ ] => monadInv H
+  | [ H : bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H : Error _ = OK _ |- _ ] => inversion H
+  | [ H : Some _ = Some _ |- _ ] => inv H
+  | [ x : unit |- _ ] => destruct x
+  end.
+
+Ltac explore := explore_hyp;
+  repeat match goal with
+  | [ |- context[if ?b then _ else _] ] => (let EQ1 := fresh "IEQ" in destruct b eqn:EQ1)
+  | [ |- context[match ?m with | _ => _ end] ] => (let DEQ1 := fresh "DEQ" in destruct m eqn:DEQ1)
+  | [ |- context[match ?m as _ return _ with | _ => _ end]] => (let DREQ1 := fresh "DREQ" in destruct m eqn:DREQ1)
+  end.
+
+(* Ltac explore :=
+  repeat match goal with
+  | [ H : match ?var with | _ => _ end = _ |- _ ] => (let EQ1 := fresh "EQ" in (destruct var eqn:EQ1; try discriminate))
+  | [ H : OK _ = OK _ |- _ ] => monadInv H
+  | [ |- context[if ?b then _ else _] ] => (let EQ1 := fresh "IEQ" in destruct b eqn:EQ1)
+  | [ |- context[match ?m with | _ => _ end] ] => (let DEQ1 := fresh "DEQ" in destruct m eqn:DEQ1)
+  | [ |- context[match ?m as _ return _ with | _ => _ end]] => (let DREQ1 := fresh "DREQ" in destruct m eqn:DREQ1)
+  | [ H : bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H : Error _ = OK _ |- _ ] => inversion H
+  | [ H : Some _ = Some _ |- _ ] => inv H
+  | [ x : unit |- _ ] => destruct x
+  end. *)
+
+(** * Syntax and semantics of symbolic values *)
+
+(* symbolic value *)
+Inductive sval :=
+  | Sinput (r: reg)
+  | Sop (op:operation) (lsv: list_sval)  (sm: smem)
+  | Sload (sm: smem) (trap: trapping_mode) (chunk:memory_chunk) (addr:addressing) (lsv:list_sval) 
+with list_sval := 
+  | Snil
+  | Scons (sv: sval) (lsv: list_sval)
+(* symbolic memory *)
+with smem :=
+  | Sinit 
+  | Sstore (sm: smem) (chunk:memory_chunk) (addr:addressing) (lsv:list_sval) (srce: sval).
+
+Scheme sval_mut := Induction for sval Sort Prop
+with list_sval_mut := Induction for list_sval Sort Prop
+with smem_mut := Induction for smem Sort Prop.
+
+Fixpoint list_sval_inj (l: list sval): list_sval :=
+  match l with
+  | nil => Snil
+  | v::l => Scons v (list_sval_inj l)
+  end.
+
+Local Open Scope option_monad_scope.
+
+Fixpoint seval_sval (ge: RTL.genv) (sp:val) (sv: sval) (rs0: regset) (m0: mem): option val :=
+  match sv with
+  | Sinput r => Some (rs0#r)
+  | Sop op l sm =>
+     SOME args <- seval_list_sval ge sp l rs0 m0 IN
+     SOME m <- seval_smem ge sp sm rs0 m0 IN
+     eval_operation ge sp op args m
+  | Sload sm trap chunk addr lsv =>
+      match trap with
+      | TRAP =>
+          SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
+          SOME a <- eval_addressing ge sp addr args IN
+          SOME m <- seval_smem ge sp sm rs0 m0 IN
+          Mem.loadv chunk m a
+      | NOTRAP =>
+          SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
+          match (eval_addressing ge sp addr args) with
+          | None => Some (default_notrap_load_value chunk)
+          | Some a =>
+              SOME m <- seval_smem ge sp sm rs0 m0 IN
+              match (Mem.loadv chunk m a) with
+              | None => Some (default_notrap_load_value chunk)
+              | Some val => Some val
+              end
+          end
+      end
+  end
+with seval_list_sval (ge: RTL.genv) (sp:val) (lsv: list_sval) (rs0: regset) (m0: mem): option (list val) :=
+  match lsv with
+  | Snil => Some nil
+  | Scons sv lsv' => 
+    SOME v <- seval_sval ge sp sv rs0 m0 IN
+    SOME lv <- seval_list_sval ge sp lsv' rs0 m0 IN
+    Some (v::lv)
+  end
+with seval_smem (ge: RTL.genv) (sp:val) (sm: smem) (rs0: regset) (m0: mem): option mem :=
+  match sm with
+  | Sinit => Some m0
+  | Sstore sm chunk addr lsv srce =>
+     SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
+     SOME a <- eval_addressing ge sp addr args IN
+     SOME m <- seval_smem ge sp sm rs0 m0 IN
+     SOME sv <- seval_sval ge sp srce rs0 m0 IN
+     Mem.storev chunk m a sv
+  end.
+
+(* Syntax and Semantics of local symbolic internal states *)
+(* [si_pre] is a precondition on initial ge, sp, rs0, m0 *)
+Record sistate_local := { si_pre: RTL.genv -> val -> regset -> mem -> Prop; si_sreg: reg -> sval; si_smem: smem }.
+
+(* Predicate on which (rs, m) is a possible final state after evaluating [st] on (rs0, m0) *)
+Definition ssem_local (ge: RTL.genv) (sp:val) (st: sistate_local) (rs0: regset) (m0: mem) (rs: regset) (m: mem): Prop :=
+  st.(si_pre) ge sp rs0 m0
+  /\ seval_smem ge sp st.(si_smem) rs0 m0 = Some m
+  /\ forall (r:reg), seval_sval ge sp (st.(si_sreg) r) rs0 m0 = Some (rs#r).
+
+Definition sabort_local (ge: RTL.genv) (sp:val) (st: sistate_local) (rs0: regset) (m0: mem): Prop :=
+  ~(st.(si_pre) ge sp rs0 m0)
+  \/ seval_smem ge sp st.(si_smem) rs0 m0 = None
+  \/ exists (r: reg), seval_sval ge sp (st.(si_sreg) r) rs0 m0 = None.
+
+(* Syntax and semantics of symbolic exit states *)
+Record sistate_exit := mk_sistate_exit
+  { si_cond: condition; si_scondargs: list_sval; si_elocal: sistate_local; si_ifso: node }.
+
+Definition seval_condition ge sp (cond: condition) (lsv: list_sval) (sm: smem) rs0 m0 : option bool :=
+  SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
+  SOME m <- seval_smem ge sp sm rs0 m0 IN
+  eval_condition cond args m.
+
+Definition all_fallthrough ge sp (lx: list sistate_exit) rs0 m0: Prop :=
+  forall ext, List.In ext lx ->
+  seval_condition ge sp ext.(si_cond) ext.(si_scondargs) ext.(si_elocal).(si_smem) rs0 m0 = Some false.
+
+Lemma all_fallthrough_revcons ge sp ext rs m lx:
+  all_fallthrough ge sp (ext::lx) rs m ->
+  seval_condition ge sp (si_cond ext) (si_scondargs ext) (si_smem (si_elocal ext)) rs m = Some false
+  /\ all_fallthrough ge sp lx rs m.
+Proof.
+  intros ALLFU. constructor.
+  - assert (In ext (ext::lx)) by (constructor; auto). apply ALLFU in H. assumption.
+  - intros ext' INEXT. assert (In ext' (ext::lx)) by (apply in_cons; auto).
+    apply ALLFU in H. assumption.
+Qed.
+
+(** Semantic of an exit in pseudo code:
+     if si_cond (si_condargs)
+       si_elocal; goto if_so
+     else ()
+*)
+
+Definition ssem_exit (ge: RTL.genv) (sp: val) (ext: sistate_exit) (rs: regset) (m: mem) rs' m' (pc': node) : Prop :=
+    seval_condition ge sp (si_cond ext) (si_scondargs ext) ext.(si_elocal).(si_smem) rs m = Some true
+ /\ ssem_local ge sp (si_elocal ext) rs m rs' m'
+ /\ (si_ifso ext) = pc'.
+
+(* Either an abort on the condition evaluation OR an abort on the sistate_local IF the condition was true *)
+Definition sabort_exit (ge: RTL.genv) (sp: val) (ext: sistate_exit) (rs: regset) (m: mem) : Prop :=
+  let sev_cond := seval_condition ge sp (si_cond ext) (si_scondargs ext) ext.(si_elocal).(si_smem) rs m in
+  sev_cond = None
+  \/ (sev_cond = Some true /\ sabort_local ge sp ext.(si_elocal) rs m).
+
+(** * Syntax and Semantics of symbolic internal state *)
+Record sistate := { si_pc: node; si_exits: list sistate_exit; si_local: sistate_local }. 
+
+Definition all_fallthrough_upto_exit ge sp ext lx' lx rs m : Prop :=
+  is_tail (ext::lx') lx /\ all_fallthrough ge sp lx' rs m.
+
+(** Semantic of a sistate in pseudo code:
+     si_exit1; si_exit2; ...; si_exitn;
+     si_local; goto si_pc *)
+
+(* Note: in RTLpath, is.(icontinue) = false iff we took an early exit *)
+
+Definition ssem_internal (ge: RTL.genv) (sp:val) (st: sistate) (rs: regset) (m: mem) (is: istate): Prop :=
+  if (is.(icontinue)) 
+  then 
+    ssem_local ge sp st.(si_local) rs m is.(irs) is.(imem) 
+    /\ st.(si_pc) = is.(ipc)
+    /\ all_fallthrough ge sp st.(si_exits) rs m
+  else exists ext lx,
+    ssem_exit ge sp ext rs m is.(irs) is.(imem) is.(ipc)
+    /\ all_fallthrough_upto_exit ge sp ext lx st.(si_exits) rs m.
+
+Definition sabort (ge: RTL.genv) (sp: val) (st: sistate) (rs: regset) (m: mem): Prop :=
+  (* No early exit was met but we aborted on the si_local *)
+  (all_fallthrough ge sp st.(si_exits) rs m /\ sabort_local ge sp st.(si_local) rs m)
+  (* OR we aborted on an evaluation of one of the early exits *)
+  \/ (exists ext lx, all_fallthrough_upto_exit ge sp ext lx st.(si_exits) rs m /\ sabort_exit ge sp ext rs m).
+
+Definition ssem_internal_opt ge sp (st: sistate) rs0 m0 (ois: option istate): Prop :=
+  match ois with
+  | Some is => ssem_internal ge sp st rs0 m0 is
+  | None => sabort ge sp st rs0 m0
+  end.
+
+Definition ssem_internal_opt2 ge sp (ost: option sistate) rs0 m0 (ois: option istate) : Prop :=
+  match ost with
+  | Some st => ssem_internal_opt ge sp st rs0 m0 ois
+  | None => ois=None
+  end.
+
+(** * An internal state represents a parallel program !
+
+     We prove below that the semantics [ssem_internal_opt] is deterministic.
+
+ *)
+
+Definition istate_eq ist1 ist2 :=
+  ist1.(icontinue) = ist2.(icontinue) /\
+  ist1.(ipc) = ist2.(ipc) /\
+  (forall r, (ist1.(irs)#r) = ist2.(irs)#r) /\ 
+  ist1.(imem) = ist2.(imem).
+
+Lemma all_fallthrough_noexit ge sp ext lx rs0 m0 rs m pc:
+  ssem_exit ge sp ext rs0 m0 rs m pc ->
+  In ext lx ->
+  all_fallthrough ge sp lx rs0 m0 ->
+  False.
+Proof.
+  Local Hint Resolve is_tail_in: core.
+  intros SSEM INE ALLF.
+  destruct SSEM as (SSEM & SSEM').
+  unfold all_fallthrough in ALLF. rewrite ALLF in SSEM; eauto.
+  discriminate.
+Qed.
+
+Lemma ssem_internal_exclude_incompatible_continue ge sp st rs m is1 is2:
+  is1.(icontinue) = true ->
+  is2.(icontinue) = false ->
+  ssem_internal ge sp st rs m is1 ->
+  ssem_internal ge sp st rs m is2 ->
+  False.
+Proof.
+  Local Hint Resolve all_fallthrough_noexit: core.
+  unfold ssem_internal.
+  intros CONT1 CONT2.
+  rewrite CONT1, CONT2; simpl.
+  intuition eauto.
+  destruct H0 as (ext & lx & SSEME & ALLFU).
+  destruct ALLFU as (ALLFU & ALLFU').
+  eapply all_fallthrough_noexit; eauto.
+Qed.
+
+Lemma ssem_internal_determ_continue ge sp st rs m is1 is2:
+   ssem_internal ge sp st rs m is1 ->
+   ssem_internal ge sp st rs m is2 ->
+   is1.(icontinue) = is2.(icontinue).
+Proof.
+   Local Hint Resolve ssem_internal_exclude_incompatible_continue: core.
+   destruct (Bool.bool_dec is1.(icontinue) is2.(icontinue)) as [|H]; auto.
+   intros H1 H2. assert (absurd: False); intuition.
+   destruct (icontinue is1) eqn: His1, (icontinue is2) eqn: His2; eauto.
+Qed.
+
+Lemma ssem_local_determ ge sp st rs0 m0 rs1 m1 rs2 m2:
+  ssem_local ge sp st rs0 m0 rs1 m1 ->
+  ssem_local ge sp st rs0 m0 rs2 m2 ->
+  (forall r, rs1#r = rs2#r) /\ m1 = m2.
+Proof.
+  unfold ssem_local. intuition try congruence.
+  generalize (H5 r); rewrite H4; congruence.
+Qed.
+
+(* TODO: lemma to move in Coqlib *)
+Lemma is_tail_bounded_total {A} (l1 l2 l3: list A): is_tail l1 l3 -> is_tail l2 l3
+  -> is_tail l1 l2 \/ is_tail l2 l1.
+Proof.
+  Local Hint Resolve is_tail_cons: core.
+  induction 1 as [|i l1 l3 T1 IND]; simpl; auto.
+  intros T2; inversion T2; subst; auto.
+Qed.
+
+Lemma exit_cond_determ ge sp rs0 m0 l1 l2: 
+  is_tail l1 l2 -> forall ext1 lx1 ext2 lx2, 
+  l1=(ext1 :: lx1) -> 
+  l2=(ext2 :: lx2) ->
+  all_fallthrough ge sp lx1 rs0 m0 ->
+  seval_condition ge sp (si_cond ext1) (si_scondargs ext1) (si_smem (si_elocal ext1)) rs0 m0 = Some true ->
+  all_fallthrough ge sp lx2 rs0 m0 ->
+  ext1=ext2.
+Proof.
+  destruct 1 as [l1|i l1 l3 T1]; intros ext1 lx1 ext2 lx2 EQ1 EQ2; subst; 
+  inversion EQ2; subst; auto.
+  intros D1 EVAL NYE.
+  Local Hint Resolve is_tail_in: core.
+  unfold all_fallthrough in NYE.
+  rewrite NYE in EVAL; eauto.
+  try congruence.
+Qed.
+
+Lemma ssem_exit_determ ge sp ext rs0 m0 rs1 m1 pc1 rs2 m2 pc2:
+  ssem_exit ge sp ext rs0 m0 rs1 m1 pc1 ->
+  ssem_exit ge sp ext rs0 m0 rs2 m2 pc2 ->
+  pc1 = pc2 /\ (forall r, rs1#r = rs2#r) /\ m1 = m2.
+Proof.
+  Local Hint Resolve exit_cond_determ eq_sym: core.
+  intros SSEM1 SSEM2. destruct SSEM1 as (SEVAL1 & SLOC1 & PCEQ1). destruct SSEM2 as (SEVAL2 & SLOC2 & PCEQ2). subst.
+  destruct (ssem_local_determ ge sp (si_elocal ext) rs0 m0 rs1 m1 rs2 m2); auto.
+Qed.
+
+Remark is_tail_inv_left {A: Type} (a a': A) l l':
+  is_tail (a::l) (a'::l') ->
+  (a = a' /\ l = l') \/ (In a l' /\ is_tail l (a'::l')).
+Proof.
+  intros. inv H.
+  - left. eauto.
+  - right. econstructor.
+    + eapply is_tail_in; eauto.
+    + eapply is_tail_cons_left; eauto.
+Qed.
+
+Lemma ssem_internal_determ ge sp st rs m is1 is2:
+  ssem_internal ge sp st rs m is1 ->
+  ssem_internal ge sp st rs m is2 ->
+  istate_eq is1 is2.
+Proof.
+  unfold istate_eq.
+  intros SEM1 SEM2. 
+  exploit (ssem_internal_determ_continue ge sp st rs m is1 is2); eauto.
+  intros CONTEQ. unfold ssem_internal in * |-. rewrite CONTEQ in * |- *.
+  destruct (icontinue is2).
+  - destruct (ssem_local_determ ge sp (si_local st) rs m (irs is1) (imem is1) (irs is2) (imem is2)); 
+    intuition (try congruence).
+  - destruct SEM1 as (ext1 & lx1 & SSEME1 & ALLFU1). destruct SEM2 as (ext2 & lx2 & SSEME2 & ALLFU2).
+    destruct ALLFU1 as (ALLFU1 & ALLFU1'). destruct ALLFU2 as (ALLFU2 & ALLFU2').
+    destruct SSEME1 as (SSEME1 & SSEME1' & SSEME1''). destruct SSEME2 as (SSEME2 & SSEME2' & SSEME2'').
+    assert (X:ext1=ext2).
+    { destruct (is_tail_bounded_total (ext1 :: lx1) (ext2 :: lx2) (si_exits st)) as [TAIL|TAIL]; eauto. }
+    subst. destruct (ssem_local_determ ge sp (si_elocal ext2) rs m (irs is1) (imem is1) (irs is2) (imem is2)); auto.
+    intuition. congruence.
+Qed.
+
+Lemma ssem_local_exclude_sabort_local ge sp loc rs m rs' m':
+  ssem_local ge sp loc rs m rs' m' ->
+  sabort_local ge sp loc rs m ->
+  False.
+Proof.
+  intros SIML ABORT. inv SIML. destruct H0 as (H0 & H0').
+  inversion ABORT as [ABORT1 | [ABORT2 | ABORT3]]; [ | | inv ABORT3]; congruence.
+Qed.
+
+Lemma ssem_local_exclude_sabort ge sp st rs m rs' m':
+  ssem_local ge sp (si_local st) rs m rs' m' ->
+  all_fallthrough ge sp (si_exits st) rs m ->
+  sabort ge sp st rs m ->
+  False.
+Proof.
+  intros SIML ALLF ABORT.
+  inv ABORT.
+  - intuition; eapply ssem_local_exclude_sabort_local; eauto.
+  - destruct H as (ext & lx & ALLFU & SABORT).
+    destruct ALLFU as (TAIL & _). eapply is_tail_in in TAIL.
+    eapply ALLF in TAIL.
+    destruct SABORT as [CONDFAIL | (CONDTRUE & ABORTL)]; congruence.
+Qed.
+
+Lemma ssem_exit_fallthrough_upto_exit ge sp ext ext' lx lx' exits rs m rs' m' pc':
+  ssem_exit ge sp ext rs m rs' m' pc' ->
+  all_fallthrough_upto_exit ge sp ext lx exits rs m ->
+  all_fallthrough_upto_exit ge sp ext' lx' exits rs m ->
+  is_tail (ext'::lx') (ext::lx).
+Proof.
+  intros SSEME ALLFU ALLFU'.
+  destruct ALLFU as (ISTAIL & ALLFU). destruct ALLFU' as (ISTAIL' & ALLFU').
+  destruct (is_tail_bounded_total (ext::lx) (ext'::lx') exits); eauto.
+  inv H.
+  - econstructor; eauto.
+  - eapply is_tail_in in H2. eapply ALLFU' in H2.
+    destruct SSEME as (SEVAL & _). congruence.
+Qed.
+
+Lemma ssem_exit_exclude_sabort_exit ge sp ext rs m rs' m' pc':
+  ssem_exit ge sp ext rs m rs' m' pc' ->
+  sabort_exit ge sp ext rs m ->
+  False.
+Proof.
+  intros A B. destruct A as (A & A' & A''). inv B.
+  - congruence.
+  - destruct H as (_ & H). eapply ssem_local_exclude_sabort_local; eauto.
+Qed.
+
+Lemma ssem_exit_exclude_sabort ge sp ext st lx rs m rs' m' pc':
+  ssem_exit ge sp ext rs m rs' m' pc' ->
+  all_fallthrough_upto_exit ge sp ext lx (si_exits st) rs m ->
+  sabort ge sp st rs m ->
+  False.
+Proof.
+  intros SSEM ALLFU ABORT.
+  inv ABORT.
+  - destruct H as (ALLF & _). destruct ALLFU as (TAIL & _).
+    eapply is_tail_in in TAIL.
+    destruct SSEM as (SEVAL & _ & _).
+    eapply ALLF in TAIL. congruence.
+  - destruct H as (ext' & lx' & ALLFU' & ABORT).
+    exploit ssem_exit_fallthrough_upto_exit; eauto. intros ITAIL.
+    destruct ALLFU as (ALLFU1 & ALLFU2). destruct ALLFU' as (ALLFU1' & ALLFU2').
+    exploit (is_tail_inv_left ext' ext lx' lx); eauto. intro. inv H.
+    + inv H0. eapply ssem_exit_exclude_sabort_exit; eauto.
+    + destruct H0 as (INE & TAIL). eapply ALLFU2 in INE. destruct ABORT as [ABORT | (ABORT & ABORT')]; congruence.
+Qed.
+
+Lemma ssem_internal_exclude_sabort ge sp st rs m is:
+  sabort ge sp st rs m ->
+  ssem_internal ge sp st rs m is -> False.
+Proof.
+  intros ABORT SEM.
+  unfold ssem_internal in SEM. destruct icontinue.
+  - destruct SEM as (SEM1 & SEM2 & SEM3).
+    eapply ssem_local_exclude_sabort; eauto.
+  - destruct SEM as (ext & lx & SEM1 & SEM2). eapply ssem_exit_exclude_sabort; eauto.
+Qed.
+
+Definition istate_eq_opt ist1 oist :=
+  exists ist2, oist = Some ist2 /\ istate_eq ist1 ist2.
+
+Lemma ssem_internal_opt_determ ge sp st rs m ois is:
+  ssem_internal_opt ge sp st rs m ois ->
+  ssem_internal ge sp st rs m is ->
+  istate_eq_opt is ois.
+Proof.
+  destruct ois as [is1|]; simpl; eauto.
+  - intros; eexists; intuition; eapply ssem_internal_determ; eauto.
+  - intros; exploit ssem_internal_exclude_sabort; eauto. destruct 1.
+Qed.
+
+(** * Symbolic execution of one internal step *)
+
+Definition slocal_set_sreg (st:sistate_local) (r:reg) (sv:sval) :=
+  {| si_pre:=(fun ge sp rs m => seval_sval ge sp (st.(si_sreg) r) rs m <> None /\ (st.(si_pre) ge sp rs m));
+     si_sreg:=fun y => if Pos.eq_dec r y then sv else st.(si_sreg) y;
+     si_smem:= st.(si_smem)|}.
+
+Definition slocal_set_smem (st:sistate_local) (sm:smem) :=
+  {| si_pre:=(fun ge sp rs m => seval_smem ge sp st.(si_smem) rs m <> None /\ (st.(si_pre) ge sp rs m));
+     si_sreg:= st.(si_sreg);
+     si_smem:= sm |}.
+
+Definition sist_set_local (st: sistate) (pc: node) (nxt: sistate_local): sistate :=
+   {| si_pc := pc; si_exits := st.(si_exits); si_local:= nxt |}.
+
+Definition slocal_store st chunk addr args src : sistate_local :=
+   let args := list_sval_inj (List.map (si_sreg st) args) in
+   let src := si_sreg st src in
+   let sm := Sstore (si_smem st) chunk addr args src
+   in slocal_set_smem st sm.
+
+Definition siexec_inst (i: instruction) (st: sistate): option sistate := 
+  match i with
+  | Inop pc' => 
+      Some (sist_set_local st pc' st.(si_local))
+  | Iop op args dst pc' =>
+      let prev := st.(si_local) in
+      let vargs := list_sval_inj (List.map prev.(si_sreg) args) in
+      let next := slocal_set_sreg prev dst (Sop op vargs prev.(si_smem)) in
+      Some (sist_set_local st pc' next)
+  | Iload trap chunk addr args dst pc' =>
+      let prev := st.(si_local) in
+      let vargs := list_sval_inj (List.map prev.(si_sreg) args) in
+      let next := slocal_set_sreg prev dst (Sload prev.(si_smem) trap chunk addr vargs) in
+      Some (sist_set_local st pc' next)
+  | Istore chunk addr args src pc' =>
+      let next := slocal_store st.(si_local) chunk addr args src in
+      Some (sist_set_local st pc' next)
+   | Icond cond args ifso ifnot _ =>
+      let prev := st.(si_local) in
+      let vargs := list_sval_inj (List.map prev.(si_sreg) args) in
+      let ex := {| si_cond:=cond; si_scondargs:=vargs; si_elocal := prev; si_ifso := ifso |} in
+      Some {| si_pc := ifnot; si_exits := ex::st.(si_exits); si_local := prev |}
+  | _ => None
+  end.
+
+Lemma seval_list_sval_inj ge sp l rs0 m0 (sreg: reg -> sval) rs: 
+   (forall r : reg, seval_sval ge sp (sreg r) rs0 m0 = Some (rs # r)) ->
+   seval_list_sval ge sp (list_sval_inj (map sreg l)) rs0 m0 = Some (rs ## l).
+Proof.
+  intros H; induction l as [|r l]; simpl; auto.
+  inversion_SOME v.
+  inversion_SOME lv.
+  generalize (H r).
+  try_simplify_someHyps.
+Qed.
+
+Lemma slocal_set_sreg_preserves_sabort_local ge sp st rs0 m0 r sv:
+  sabort_local ge sp st rs0 m0 ->
+  sabort_local ge sp (slocal_set_sreg st r sv) rs0 m0.
+Proof.
+  unfold sabort_local. simpl; intuition.
+  destruct H as [r1 H]. destruct (Pos.eq_dec r r1) as [TEST|TEST] eqn: HTEST.
+  - subst; rewrite H; intuition.
+  - right. right. exists r1. rewrite HTEST. auto.
+Qed.
+
+Lemma slocal_set_smem_preserves_sabort_local ge sp st rs0 m0 m:
+  sabort_local ge sp st rs0 m0 ->
+  sabort_local ge sp (slocal_set_smem st m) rs0 m0.
+Proof.
+  unfold sabort_local. simpl; intuition.
+Qed.
+
+Lemma all_fallthrough_upto_exit_cons ge sp ext lx ext' exits rs m:
+  all_fallthrough_upto_exit ge sp ext lx exits rs m ->
+  all_fallthrough_upto_exit ge sp ext lx (ext'::exits) rs m.
+Proof.
+  intros. inv H. econstructor; eauto.
+Qed.
+
+Lemma all_fallthrough_cons ge sp exits rs m ext:
+  all_fallthrough ge sp exits rs m ->
+  seval_condition ge sp (si_cond ext) (si_scondargs ext) (si_smem (si_elocal ext)) rs m = Some false ->
+  all_fallthrough ge sp (ext::exits) rs m.
+Proof.
+  intros. unfold all_fallthrough in *. intros.
+  inv H1; eauto.
+Qed.
+
+Lemma siexec_inst_preserves_sabort i ge sp rs m st st': 
+  siexec_inst i st = Some st' ->
+  sabort ge sp st rs m -> sabort ge sp st' rs m.
+Proof.
+  intros SISTEP ABORT.
+  destruct i; simpl in SISTEP; try discriminate; inv SISTEP; unfold sabort; simpl.
+  (* NOP *)
+  * destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - left. constructor; eauto.
+    - right. exists ext0, lx0. constructor; eauto.
+  (* OP *)
+  * destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - left. constructor; eauto. eapply slocal_set_sreg_preserves_sabort_local; eauto.
+    - right. exists ext0, lx0. constructor; eauto.
+  (* LOAD *)
+  * destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - left. constructor; eauto. eapply slocal_set_sreg_preserves_sabort_local; eauto.
+    - right. exists ext0, lx0. constructor; eauto.
+  (* STORE *)
+  * destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - left. constructor; eauto. eapply slocal_set_smem_preserves_sabort_local; eauto.
+    - right. exists ext0, lx0. constructor; eauto.
+  (* COND *)
+  * remember ({| si_cond := _; si_scondargs := _; si_elocal := _; si_ifso := _ |}) as ext.
+    destruct ABORT as [(ALLF & ABORTL) | (ext0 & lx0 & ALLFU & ABORTE)].
+    - destruct (seval_condition ge sp (si_cond ext) (si_scondargs ext)
+        (si_smem (si_elocal ext)) rs m) eqn:SEVAL; [destruct b|].
+      (* case true *)
+      + right. exists ext, (si_exits st).
+        constructor.
+        ++ constructor. econstructor; eauto. eauto.
+        ++ unfold sabort_exit. right. constructor; eauto.
+           subst. simpl. eauto.
+      (* case false *)
+      + left. constructor; eauto. eapply all_fallthrough_cons; eauto.
+      (* case None *)
+      + right. exists ext, (si_exits st). constructor.
+        ++ constructor. econstructor; eauto. eauto.
+        ++ unfold sabort_exit. left. eauto.
+    - right. exists ext0, lx0. constructor; eauto. eapply all_fallthrough_upto_exit_cons; eauto.
+Qed.
+
+Lemma siexec_inst_WF i st:
+  siexec_inst i st = None -> default_succ i = None.
+Proof.
+  destruct i; simpl; unfold sist_set_local; simpl; congruence.
+Qed.
+
+Lemma siexec_inst_default_succ i st st':
+  siexec_inst i st = Some st' -> default_succ i = Some (st'.(si_pc)).
+Proof.
+  destruct i; simpl; unfold sist_set_local; simpl; try congruence;
+  intro H; inversion_clear H; simpl; auto.
+Qed.
+
+
+Lemma seval_list_sval_inj_not_none ge sp st rs0 m0: forall l,
+  (forall r, List.In r l -> seval_sval ge sp (si_sreg st r) rs0 m0 = None -> False) ->
+  seval_list_sval ge sp (list_sval_inj (map (si_sreg st) l)) rs0 m0 = None -> False.
+Proof.
+  induction l.
+  - intuition discriminate.
+  - intros ALLR. simpl.
+    inversion_SOME v.
+    + intro SVAL. inversion_SOME lv; [discriminate|].
+      assert (forall r : reg, In r l -> seval_sval ge sp (si_sreg st r) rs0 m0 = None -> False).
+      { intros r INR. eapply ALLR. right. assumption. }
+      intro SVALLIST. intro. eapply IHl; eauto.
+    + intros. exploit (ALLR a); simpl; eauto.
+Qed.
+
+Lemma siexec_inst_correct ge sp i st rs0 m0 rs m:
+  ssem_local ge sp st.(si_local) rs0 m0 rs m ->
+  all_fallthrough ge sp st.(si_exits) rs0 m0 ->
+  ssem_internal_opt2 ge sp (siexec_inst i st) rs0 m0 (istep ge i sp rs m).
+Proof.
+  intros (PRE & MEM & REG) NYE.
+  destruct i; simpl; auto.
+  + (* Nop *)
+    constructor; [|constructor]; simpl; auto.
+    constructor; auto.
+  + (* Op *)
+    inversion_SOME v; intros OP; simpl.
+    - constructor; [|constructor]; simpl; auto.
+      constructor; simpl; auto.
+      * constructor; auto. congruence.
+      * constructor; auto.
+        intro r0. destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+        subst. rewrite Regmap.gss; simpl; auto.
+        erewrite seval_list_sval_inj; simpl; auto.
+        try_simplify_someHyps.
+    - left. constructor; simpl; auto.
+      unfold sabort_local. right. right.
+      simpl. exists r. destruct (Pos.eq_dec r r); try congruence.
+      simpl. erewrite seval_list_sval_inj; simpl; auto.
+      try_simplify_someHyps.
+  + (* LOAD *) 
+    inversion_SOME a0; intro ADD.
+    { inversion_SOME v; intros LOAD; simpl. 
+      - explore_destruct; unfold ssem_internal, ssem_local; simpl; intuition.
+        * unfold ssem_internal. simpl. constructor; [|constructor]; auto.
+          constructor; constructor; simpl; auto. congruence. intro r0.
+          destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+          subst; rewrite Regmap.gss; simpl.
+          erewrite seval_list_sval_inj; simpl; auto.
+          try_simplify_someHyps.
+        * unfold ssem_internal. simpl. constructor; [|constructor]; auto.
+          constructor; constructor; simpl; auto. congruence. intro r0.
+          destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+          subst; rewrite Regmap.gss; simpl.
+          inversion_SOME args; intros ARGS.
+          2: { exploit seval_list_sval_inj_not_none; eauto; intuition congruence. }
+          exploit seval_list_sval_inj; eauto. intro ARGS'. erewrite ARGS in ARGS'. inv ARGS'. rewrite ADD.
+          inversion_SOME m2. intro SMEM.
+          assert (m = m2) by congruence. subst. rewrite LOAD. reflexivity.
+      - explore_destruct; unfold sabort, sabort_local; simpl.
+        * unfold sabort. simpl. left. constructor; auto.
+          right. right. exists r. simpl. destruct (Pos.eq_dec r r); try congruence.
+          simpl. erewrite seval_list_sval_inj; simpl; auto.
+          rewrite ADD; simpl; auto. try_simplify_someHyps.
+        * unfold ssem_internal. simpl. constructor; [|constructor]; auto.
+          constructor; constructor; simpl; auto. congruence. intro r0.
+          destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+          subst; rewrite Regmap.gss; simpl.
+          erewrite seval_list_sval_inj; simpl; auto.
+          try_simplify_someHyps.
+     } { rewrite ADD. destruct t.
+          - simpl. left; eauto. simpl. econstructor; eauto.
+            right. right. simpl. exists r. destruct (Pos.eq_dec r r); [|contradiction].
+            simpl. inversion_SOME args. intro SLS.
+            eapply seval_list_sval_inj in REG. rewrite REG in SLS. inv SLS.
+            rewrite ADD. reflexivity.
+          - simpl. constructor; [|constructor]; simpl; auto.
+            constructor; simpl; constructor; auto; [congruence|].
+            intro r0. destruct (Pos.eq_dec r r0); [|rewrite Regmap.gso; auto].
+            subst. simpl. rewrite Regmap.gss.
+            erewrite seval_list_sval_inj; simpl; auto.
+            try_simplify_someHyps.
+     }
+  + (* STORE *)
+    inversion_SOME a0; intros ADD.
+    { inversion_SOME m'; intros STORE; simpl.
+      - unfold ssem_internal, ssem_local; simpl; intuition.
+        * congruence.
+        * erewrite seval_list_sval_inj; simpl; auto.
+          erewrite REG.
+          try_simplify_someHyps.
+      - unfold sabort, sabort_local; simpl.
+        left. constructor; auto. right. left.
+        erewrite seval_list_sval_inj; simpl; auto.
+        erewrite REG.
+        try_simplify_someHyps. }
+    { unfold sabort, sabort_local; simpl.
+      left. constructor; auto. right. left.
+      erewrite seval_list_sval_inj; simpl; auto.
+      erewrite ADD; simpl; auto. }
+  + (* COND *)
+    Local Hint Resolve is_tail_refl: core.
+    Local Hint Unfold ssem_local: core.
+    inversion_SOME b; intros COND.
+    { destruct b; simpl; unfold ssem_internal, ssem_local; simpl.
+      - remember (mk_sistate_exit _ _ _ _) as ext. exists ext, (si_exits st).
+        constructor; constructor; subst; simpl; auto.
+        unfold seval_condition. subst; simpl.
+        erewrite seval_list_sval_inj; simpl; auto.
+        try_simplify_someHyps.
+      - intuition. unfold all_fallthrough in * |- *. simpl.
+        intuition. subst. simpl.
+        unfold seval_condition.
+        erewrite seval_list_sval_inj; simpl; auto.
+        try_simplify_someHyps. }
+    { unfold sabort. simpl. right.
+      remember (mk_sistate_exit _ _ _ _) as ext. exists ext, (si_exits st).
+      constructor; [constructor; subst; simpl; auto|].
+      left. subst; simpl; auto.
+      unfold seval_condition.
+      erewrite seval_list_sval_inj; simpl; auto.
+      try_simplify_someHyps. }
+Qed.
+
+
+Lemma siexec_inst_correct_None ge sp i st rs0 m0 rs m:
+  ssem_local ge sp (st.(si_local)) rs0 m0 rs m ->
+  siexec_inst i st = None -> 
+  istep ge i sp rs m = None.
+Proof.
+  intros (PRE & MEM & REG).
+  destruct i; simpl; unfold sist_set_local, ssem_internal, ssem_local; simpl; try_simplify_someHyps.
+Qed.
+
+(** * Symbolic execution of the internal steps of a path *)
+Fixpoint siexec_path (path:nat) (f: function) (st: sistate): option sistate :=
+  match path with
+  | O => Some st
+  | S p =>
+    SOME i <- (fn_code f)!(st.(si_pc)) IN
+    SOME st1 <- siexec_inst i st IN
+    siexec_path p f st1
+  end.
+
+Lemma siexec_inst_add_exits i st st':
+  siexec_inst i st = Some st' ->
+  ( si_exits st' = si_exits st \/ exists ext, si_exits st' = ext :: si_exits st ).
+Proof.
+  destruct i; simpl; intro SISTEP; inversion_clear SISTEP; unfold siexec_inst; simpl; (discriminate || eauto).
+Qed.
+
+Lemma siexec_inst_preserves_allfu ge sp ext lx rs0 m0 st st' i:
+  all_fallthrough_upto_exit ge sp ext lx (si_exits st) rs0 m0 ->
+  siexec_inst i st = Some st' ->
+  all_fallthrough_upto_exit ge sp ext lx (si_exits st') rs0 m0.
+Proof.
+  intros ALLFU SISTEP. destruct ALLFU as (ISTAIL & ALLF).
+  constructor; eauto.
+  destruct i; simpl in SISTEP; inversion_clear SISTEP; simpl; (discriminate || eauto).
+Qed.
+
+Lemma siexec_path_correct_false ge sp f rs0 m0 st' is:
+  forall path,
+  is.(icontinue)=false ->
+  forall st, ssem_internal ge sp st rs0 m0 is ->
+  siexec_path path f st = Some st' ->
+  ssem_internal ge sp st' rs0 m0 is.
+Proof.
+  induction path; simpl.
+  - intros. congruence.
+  - intros ICF st SSEM STEQ'.
+    destruct ((fn_code f) ! (si_pc st)) eqn:FIC; [|discriminate].
+    destruct (siexec_inst _ _) eqn:SISTEP; [|discriminate].
+    eapply IHpath. 3: eapply STEQ'. eauto.
+    unfold ssem_internal in SSEM. rewrite ICF in SSEM.
+    destruct SSEM as (ext & lx & SEXIT & ALLFU).
+    unfold ssem_internal. rewrite ICF. exists ext, lx.
+    constructor; auto. eapply siexec_inst_preserves_allfu; eauto.
+Qed.
+
+Lemma siexec_path_preserves_sabort ge sp path f rs0 m0 st': forall st, 
+  siexec_path path f st = Some st' ->
+  sabort ge sp st rs0 m0 -> sabort ge sp st' rs0 m0.
+Proof.
+  Local Hint Resolve siexec_inst_preserves_sabort: core.
+  induction path; simpl.
+  + unfold sist_set_local; try_simplify_someHyps.
+  + intros st; inversion_SOME i.
+    inversion_SOME st1; eauto.
+Qed.
+
+Lemma siexec_path_WF path f: forall st,
+  siexec_path path f st = None -> nth_default_succ (fn_code f) path st.(si_pc) = None.
+Proof.
+  induction path; simpl.
+  + unfold sist_set_local. intuition congruence.
+  + intros st; destruct ((fn_code f) ! (si_pc st)); simpl; try tauto.
+    destruct (siexec_inst i st) as [st1|] eqn: Hst1; simpl.
+    - intros; erewrite siexec_inst_default_succ; eauto.
+    - intros; erewrite siexec_inst_WF; eauto.
+Qed.
+
+Lemma siexec_path_default_succ path f st': forall st,
+  siexec_path path f st = Some st' -> nth_default_succ (fn_code f) path st.(si_pc) = Some st'.(si_pc).
+Proof.
+  induction path; simpl.
+  + unfold sist_set_local. intros st H. inversion_clear H; simpl; try congruence.
+  + intros st; destruct ((fn_code f) ! (si_pc st)); simpl; try congruence.
+    destruct (siexec_inst i st) as [st1|] eqn: Hst1; simpl; try congruence.
+    intros; erewrite siexec_inst_default_succ; eauto.
+Qed.
+
+Lemma siexec_path_correct_true ge sp path (f:function) rs0 m0: forall st is,
+  is.(icontinue)=true ->
+  ssem_internal ge sp st rs0 m0 is -> 
+  nth_default_succ (fn_code f) path st.(si_pc) <> None ->
+  ssem_internal_opt2 ge sp (siexec_path path f st) rs0 m0
+                         (isteps ge path f sp is.(irs) is.(imem) is.(ipc))
+  .
+Proof.
+  Local Hint Resolve siexec_path_correct_false siexec_path_preserves_sabort siexec_path_WF: core.
+  induction path; simpl.
+  + intros st is CONT INV WF;
+    unfold ssem_internal, sist_set_local in * |- *;
+    try_simplify_someHyps. simpl.
+    destruct is; simpl in * |- *; subst; intuition auto.
+  + intros st is CONT; unfold ssem_internal at 1; rewrite CONT.
+    intros (LOCAL & PC & NYE) WF.
+    rewrite <- PC.
+    inversion_SOME i; intro Hi; rewrite Hi in WF |- *; simpl; auto.
+    exploit siexec_inst_correct; eauto. 
+    inversion_SOME st1; intros Hst1; erewrite Hst1; simpl.
+    - inversion_SOME is1; intros His1;rewrite His1; simpl. 
+      * destruct (icontinue is1) eqn:CONT1.
+        (* icontinue is0 = true *)
+        intros; eapply IHpath; eauto.
+        destruct i; simpl in * |- *; unfold sist_set_local in * |- *; try_simplify_someHyps.
+        (* icontinue is0 = false -> EARLY EXIT *)
+        destruct (siexec_path path f st1) as [st2|] eqn: Hst2; simpl; eauto.
+        destruct WF. erewrite siexec_inst_default_succ; eauto.
+        (* try_simplify_someHyps; eauto. *)
+      * destruct (siexec_path path f st1) as [st2|] eqn: Hst2; simpl; eauto.
+    - intros His1;rewrite His1; simpl; auto.
+Qed.
+
+(** REM: in the following two unused lemmas *)
+
+Lemma siexec_path_right_assoc_decompose f path: forall st st',
+  siexec_path (S path) f st = Some st' ->
+  exists st0, siexec_path path f st = Some st0 /\ siexec_path 1%nat f st0 = Some st'.
+Proof.
+  induction path; simpl; eauto.
+  intros st st'.
+  inversion_SOME i1.
+  inversion_SOME st1.
+  try_simplify_someHyps; eauto.
+Qed.
+
+Lemma siexec_path_right_assoc_compose f path: forall st st0 st',
+  siexec_path path f st = Some st0 ->
+  siexec_path 1%nat f st0 = Some st' ->
+  siexec_path (S path) f st = Some st'.
+Proof.
+  induction path.
+  + intros st st0 st' H. simpl in H.
+    try_simplify_someHyps; auto.
+  + intros st st0 st'.
+    assert (X:exists x, x=(S path)); eauto.
+    destruct X as [x X]. 
+    intros H1 H2. rewrite <- X.
+    generalize H1; clear H1. simpl.
+    inversion_SOME i1. intros Hi1; rewrite Hi1.
+    inversion_SOME st1. intros Hst1; rewrite Hst1.
+    subst; eauto.
+Qed.
+
+(** * Symbolic (final) value of a path *)
+Inductive sfval :=
+  | Snone
+  | Scall (sig:signature) (svos: sval + ident) (lsv:list_sval) (res:reg) (pc:node)
+    (* NB: [res] the return register is hard-wired ! Is it restrictive ? *)
+  | Stailcall: signature -> sval + ident -> list_sval -> sfval
+  | Sbuiltin (ef:external_function) (sargs: list (builtin_arg sval)) (res: builtin_res reg) (pc:node)
+  | Sjumptable (sv: sval) (tbl: list node)
+  | Sreturn: option sval -> sfval
+.
+
+Definition sfind_function (pge: RTLpath.genv) (ge: RTL.genv) (sp: val) (svos : sval + ident) (rs0: regset) (m0: mem): option fundef :=
+  match svos with
+  | inl sv => SOME v <- seval_sval ge sp sv rs0 m0 IN Genv.find_funct pge v
+  | inr symb => SOME b <- Genv.find_symbol pge symb IN Genv.find_funct_ptr pge b
+  end.
+
+Section SEVAL_BUILTIN_ARG. (* adapted from Events.v *)
+
+Variable ge: RTL.genv.
+Variable sp: val.
+Variable m: mem.
+Variable rs0: regset.
+Variable m0: mem.
+
+Inductive seval_builtin_arg: builtin_arg sval -> val -> Prop :=
+  | seval_BA: forall x v,
+      seval_sval ge sp x rs0 m0 = Some v ->
+      seval_builtin_arg (BA x) v
+  | seval_BA_int: forall n,
+      seval_builtin_arg (BA_int n) (Vint n)
+  | seval_BA_long: forall n,
+      seval_builtin_arg (BA_long n) (Vlong n)
+  | seval_BA_float: forall n,
+      seval_builtin_arg (BA_float n) (Vfloat n)
+  | seval_BA_single: forall n,
+      seval_builtin_arg (BA_single n) (Vsingle n)
+  | seval_BA_loadstack: forall chunk ofs v,
+      Mem.loadv chunk m (Val.offset_ptr sp ofs) = Some v ->
+      seval_builtin_arg (BA_loadstack chunk ofs) v
+  | seval_BA_addrstack: forall ofs,
+      seval_builtin_arg (BA_addrstack ofs) (Val.offset_ptr sp ofs)
+  | seval_BA_loadglobal: forall chunk id ofs v,
+      Mem.loadv chunk m (Senv.symbol_address ge id ofs) = Some v ->
+      seval_builtin_arg (BA_loadglobal chunk id ofs) v
+  | seval_BA_addrglobal: forall id ofs,
+      seval_builtin_arg (BA_addrglobal id ofs) (Senv.symbol_address ge id ofs)
+  | seval_BA_splitlong: forall hi lo vhi vlo,
+      seval_builtin_arg hi vhi -> seval_builtin_arg lo vlo ->
+      seval_builtin_arg (BA_splitlong hi lo) (Val.longofwords vhi vlo)
+  | seval_BA_addptr: forall a1 a2 v1 v2,
+      seval_builtin_arg a1 v1 -> seval_builtin_arg a2 v2 ->
+      seval_builtin_arg (BA_addptr a1 a2)
+                       (if Archi.ptr64 then Val.addl v1 v2 else Val.add v1 v2).
+
+Definition seval_builtin_args (al: list (builtin_arg sval)) (vl: list val) : Prop :=
+  list_forall2 seval_builtin_arg al vl.
+
+Lemma seval_builtin_arg_determ:
+  forall a v, seval_builtin_arg a v -> forall v', seval_builtin_arg a v' -> v' = v.
+Proof.
+  induction 1; intros v' EV; inv EV; try congruence.
+  f_equal; eauto.
+  apply IHseval_builtin_arg1 in H3. apply IHseval_builtin_arg2 in H5. subst; auto. 
+Qed.
+
+Lemma eval_builtin_args_determ:
+  forall al vl, seval_builtin_args al vl -> forall vl', seval_builtin_args al vl' -> vl' = vl.
+Proof.
+  induction 1; intros v' EV; inv EV; f_equal; eauto using seval_builtin_arg_determ.
+Qed.
+
+End SEVAL_BUILTIN_ARG.
+
+Inductive ssem_final (pge: RTLpath.genv) (ge: RTL.genv) (sp:val) (npc: node) stack (f: function) (rs0: regset) (m0: mem): sfval -> regset -> mem -> trace -> state -> Prop :=
+  | exec_Snone rs m:
+      ssem_final pge ge sp npc stack f rs0 m0 Snone rs m E0 (State stack f sp npc rs m)
+  | exec_Scall rs m sig svos lsv args res pc fd:
+      sfind_function pge ge sp svos rs0 m0 = Some fd ->
+      funsig fd = sig ->
+      seval_list_sval ge sp lsv rs0 m0 = Some args ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Scall sig svos lsv res pc) rs m
+        E0 (Callstate (Stackframe res f sp pc rs :: stack) fd args m)
+  | exec_Stailcall stk rs m sig svos args fd m' lsv:
+      sfind_function pge ge sp svos rs0 m0 = Some fd ->
+      funsig fd = sig ->
+      sp = Vptr stk Ptrofs.zero ->
+      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+      seval_list_sval ge sp lsv rs0 m0 = Some args ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Stailcall sig svos lsv) rs m
+        E0 (Callstate stack fd args m')
+  | exec_Sbuiltin m' rs m vres res pc t sargs ef vargs:
+      seval_builtin_args ge sp m rs0 m0 sargs vargs ->
+      external_call ef ge vargs m t vres m' ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Sbuiltin ef sargs res pc) rs m
+        t (State stack f sp pc (regmap_setres res vres rs) m')
+  | exec_Sjumptable sv tbl pc' n rs m:
+      seval_sval ge sp sv rs0 m0 = Some (Vint n) ->
+      list_nth_z tbl (Int.unsigned n) = Some pc' ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Sjumptable sv tbl) rs m
+        E0 (State stack f sp pc' rs m)
+  | exec_Sreturn stk osv rs m m' v:
+      sp = (Vptr stk Ptrofs.zero) ->
+      Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+      match osv with Some sv => seval_sval ge sp sv rs0 m0 | None => Some Vundef end = Some v ->
+      ssem_final pge ge sp npc stack f rs0 m0 (Sreturn osv) rs m 
+         E0 (Returnstate stack v m')
+.
+
+Record sstate := { internal:> sistate; final: sfval }.
+
+Inductive ssem pge (ge: RTL.genv) (sp:val) (st: sstate) stack f (rs0: regset) (m0: mem): trace -> state -> Prop :=
+  | ssem_early is:
+     is.(icontinue) = false ->
+     ssem_internal ge sp st rs0 m0 is -> 
+     ssem pge ge sp st stack f rs0 m0 E0 (State stack f sp is.(ipc) is.(irs) is.(imem))
+  | ssem_normal is t s:
+     is.(icontinue) = true ->
+     ssem_internal ge sp st rs0 m0 is ->  
+     ssem_final pge ge sp st.(si_pc) stack f rs0 m0 st.(final) is.(irs) is.(imem) t s ->
+     ssem pge ge sp st stack f rs0 m0 t s 
+  .
+
+(* NB: generic function that could be put into [AST] file *)
+Fixpoint builtin_arg_map {A B} (f: A -> B) (arg: builtin_arg A) : builtin_arg B :=
+  match arg with
+  | BA x => BA (f x)
+  | BA_int n => BA_int n
+  | BA_long n => BA_long n
+  | BA_float f => BA_float f
+  | BA_single s => BA_single s
+  | BA_loadstack chunk ptr => BA_loadstack chunk ptr
+  | BA_addrstack ptr => BA_addrstack ptr
+  | BA_loadglobal chunk id ptr => BA_loadglobal chunk id ptr
+  | BA_addrglobal id ptr => BA_addrglobal id ptr
+  | BA_splitlong ba1 ba2 => BA_splitlong (builtin_arg_map f ba1) (builtin_arg_map f ba2)
+  | BA_addptr ba1 ba2 => BA_addptr (builtin_arg_map f ba1) (builtin_arg_map f ba2)
+  end.
+
+Lemma seval_builtin_arg_correct ge sp rs m rs0 m0 sreg: forall arg varg,
+  (forall r, seval_sval ge sp (sreg r) rs0 m0 = Some rs # r) ->
+  eval_builtin_arg ge (fun r => rs # r) sp m arg varg ->
+  seval_builtin_arg ge sp m rs0 m0 (builtin_arg_map sreg arg) varg.
+Proof.
+  induction arg.
+  all: try (intros varg SEVAL BARG; inv BARG; constructor; congruence).
+  - intros varg SEVAL BARG. inv BARG. simpl. constructor.
+    eapply IHarg1; eauto. eapply IHarg2; eauto.
+  - intros varg SEVAL BARG. inv BARG. simpl. constructor.
+    eapply IHarg1; eauto. eapply IHarg2; eauto.
+Qed.
+
+Lemma seval_builtin_args_correct ge sp rs m rs0 m0 sreg args vargs:
+  (forall r, seval_sval ge sp (sreg r) rs0 m0 = Some rs # r) ->
+  eval_builtin_args ge (fun r => rs # r) sp m args vargs ->
+  seval_builtin_args ge sp m rs0 m0 (map (builtin_arg_map sreg) args) vargs.
+Proof.
+  induction 2.
+  - constructor.
+  - simpl. constructor; [| assumption].
+    eapply seval_builtin_arg_correct; eauto.
+Qed.
+
+Lemma seval_builtin_arg_complete ge sp rs m rs0 m0 sreg: forall arg varg,
+  (forall r, seval_sval ge sp (sreg r) rs0 m0 = Some rs # r) ->
+  seval_builtin_arg ge sp m rs0 m0 (builtin_arg_map sreg arg) varg ->
+  eval_builtin_arg ge (fun r => rs # r) sp m arg varg.
+Proof.
+  induction arg.
+  all: intros varg SEVAL BARG; try (inv BARG; constructor; congruence).
+  - inv BARG. rewrite SEVAL in H0. inv H0. constructor.
+  - inv BARG. simpl. constructor.
+    eapply IHarg1; eauto. eapply IHarg2; eauto.
+  - inv BARG. simpl. constructor.
+    eapply IHarg1; eauto. eapply IHarg2; eauto.
+Qed.
+
+Lemma seval_builtin_args_complete ge sp rs m rs0 m0 sreg: forall args vargs,
+  (forall r, seval_sval ge sp (sreg r) rs0 m0 = Some rs # r) ->
+  seval_builtin_args ge sp m rs0 m0 (map (builtin_arg_map sreg) args) vargs ->
+  eval_builtin_args ge (fun r => rs # r) sp m args vargs.
+Proof.
+  induction args.
+  - simpl. intros. inv H0. constructor.
+  - intros vargs SEVAL BARG. simpl in BARG. inv BARG.
+    constructor; [| eapply IHargs; eauto].
+    eapply seval_builtin_arg_complete; eauto.
+Qed.
+
+(** * Symbolic execution of final step *)
+Definition sexec_final (i: instruction) (prev: sistate_local): sfval := 
+  match i with
+  | Icall sig ros args res pc => 
+    let svos := sum_left_map prev.(si_sreg) ros in
+    let sargs := list_sval_inj (List.map prev.(si_sreg) args) in
+    Scall sig svos sargs res pc
+  | Itailcall sig ros args =>
+    let svos := sum_left_map prev.(si_sreg) ros in
+    let sargs := list_sval_inj (List.map prev.(si_sreg) args) in
+    Stailcall sig svos sargs
+  | Ibuiltin ef args res pc =>
+    let sargs := List.map (builtin_arg_map prev.(si_sreg)) args in
+    Sbuiltin ef sargs res pc
+  | Ireturn or => 
+    let sor := SOME r <- or IN Some (prev.(si_sreg) r) in
+    Sreturn sor
+  | Ijumptable reg tbl =>
+    let sv := prev.(si_sreg) reg in
+    Sjumptable sv tbl
+  | _ => Snone
+  end.
+
+Lemma sexec_final_correct pge ge sp i (f:function) pc st stack rs0 m0 t rs m s:
+  (fn_code f) ! pc = Some i ->
+  pc = st.(si_pc) ->
+  ssem_local ge sp (si_local st) rs0 m0 rs m ->
+  path_last_step ge pge stack f sp pc rs m t s ->
+  siexec_inst i st = None -> 
+  ssem_final pge ge sp pc stack f rs0 m0 (sexec_final i (si_local st)) rs m t s.
+Proof.
+  intros PC1 PC2 (PRE&MEM&REG) LAST. destruct LAST; subst; try_simplify_someHyps; simpl.
+  + (* Snone *) intro Hi; destruct i; simpl in Hi |- *; unfold sist_set_local in Hi; try congruence.
+  + (* Icall *) intros; eapply exec_Scall; auto.
+    - destruct ros; simpl in * |- *; auto.
+      rewrite REG; auto.
+    - erewrite seval_list_sval_inj; simpl; auto.
+  + (* Itailcall *) intros. eapply exec_Stailcall; auto.
+    - destruct ros; simpl in * |- *; auto.
+      rewrite REG; auto.
+    - erewrite seval_list_sval_inj; simpl; auto.
+  + (* Ibuiltin *) intros. eapply exec_Sbuiltin; eauto.
+    eapply seval_builtin_args_correct; eauto.
+  + (* Ijumptable *) intros. eapply exec_Sjumptable; eauto. congruence.
+  + (* Ireturn *) intros; eapply exec_Sreturn; simpl; eauto.
+    destruct or; simpl; auto.
+Qed.
+
+Lemma sexec_final_complete i (f:function) pc st ge pge sp stack rs0 m0 t rs m s:
+  (fn_code f) ! pc = Some i ->
+  pc = st.(si_pc) ->
+  ssem_local ge sp (si_local st) rs0 m0 rs m ->
+  ssem_final pge ge sp pc stack f rs0 m0 (sexec_final i (si_local st)) rs m t s ->
+  siexec_inst i st = None -> 
+  path_last_step ge pge stack f sp pc rs m t s.
+Proof.
+  intros PC1 PC2 (PRE&MEM&REG) LAST HSIS.
+  destruct i as [ (* Inop *) | (* Iop *) | (* Iload *) | (* Istore *)
+    | (* Icall *) sig ros args res pc'
+    | (* Itailcall *) sig ros args
+    | (* Ibuiltin *) ef bargs br pc'
+    | (* Icond *)
+    | (* Ijumptable *) jr tbl
+    | (*Ireturn*) or]; 
+    subst; try_simplify_someHyps; try (unfold sist_set_local in HSIS; try congruence);
+    inversion LAST; subst; clear LAST; simpl in * |- *.
+  + (* Icall *)
+    erewrite seval_list_sval_inj in * |- ; simpl; try_simplify_someHyps; auto.
+    intros; eapply exec_Icall; eauto.
+    destruct ros; simpl in * |- *; auto.
+    rewrite REG in * |- ; auto.
+  + (* Itailcall *)
+    intros HPC SMEM. erewrite seval_list_sval_inj in H10; auto. inv H10. 
+    eapply exec_Itailcall; eauto.
+    destruct ros; simpl in * |- *; auto.
+    rewrite REG in * |- ; auto.
+  + (* Ibuiltin *) intros HPC SMEM.
+    eapply exec_Ibuiltin; eauto.
+    eapply seval_builtin_args_complete; eauto.
+  + (* Ijumptable *) intros HPC SMEM.
+    eapply exec_Ijumptable; eauto.
+    congruence.
+  + (* Ireturn *)
+    intros; subst. enough (v=regmap_optget or Vundef rs) as ->.
+    * eapply exec_Ireturn; eauto.
+    * intros; destruct or; simpl; congruence.
+Qed.
+
+(** * Main function of the symbolic execution *)
+
+Definition init_sistate_local := {| si_pre:= fun _ _ _ _ => True; si_sreg:= fun r => Sinput r; si_smem:= Sinit |}.
+
+Definition init_sistate pc := {| si_pc:= pc; si_exits:=nil; si_local:= init_sistate_local |}.
+
+Lemma init_ssem_internal ge sp pc rs m: ssem_internal ge sp (init_sistate pc) rs m (mk_istate true pc rs m).
+Proof.
+  unfold ssem_internal, ssem_local, all_fallthrough; simpl. intuition.
+Qed.
+
+Definition sexec (f: function) (pc:node): option sstate :=
+  SOME path <- (fn_path f)!pc IN
+  SOME st <- siexec_path path.(psize) f (init_sistate pc) IN
+  SOME i <- (fn_code f)!(st.(si_pc)) IN
+  Some (match siexec_inst i st with
+       | Some st' => {| internal := st'; final := Snone |}
+       | None => {| internal := st; final := sexec_final i st.(si_local) |}
+       end).
+
+Lemma final_node_path_simpl f path pc:
+   (fn_path f)!pc = Some path -> nth_default_succ_inst (fn_code f) path.(psize) pc <> None. 
+Proof.
+  intros; exploit final_node_path; eauto.
+  intros (i & NTH & DUM).
+  congruence.
+Qed.
+
+Lemma symb_path_last_step i st st' ge pge stack (f:function) sp pc rs m t s: 
+  (fn_code f) ! pc = Some i ->
+  pc = st.(si_pc) ->
+  siexec_inst i st = Some st' ->
+  path_last_step ge pge stack f sp pc rs m t s ->
+  exists mk_istate, 
+     istep ge i sp rs m = Some mk_istate 
+  /\ t = E0 
+  /\ s = (State stack f sp mk_istate.(ipc) mk_istate.(RTLpath.irs) mk_istate.(imem)).
+Proof.
+  intros PC1 PC2 Hst' LAST; destruct LAST; subst; try_simplify_someHyps; simpl.
+Qed.
+
+(* NB: each concrete execution can be executed on the symbolic state (produced from [sexec]) 
+(sexec is a correct over-approximation)
+*)
+Theorem sexec_correct f pc pge ge sp path stack rs m t s: 
+  (fn_path f)!pc = Some path ->
+  path_step ge pge path.(psize) stack f sp rs m pc t s ->
+  exists st, sexec f pc = Some st /\ ssem pge ge sp st stack f rs m t s.
+Proof.
+  Local Hint Resolve init_ssem_internal: core.
+  intros PATH STEP; unfold sexec; rewrite PATH; simpl.
+  lapply (final_node_path_simpl f path pc); eauto. intro WF.
+  exploit (siexec_path_correct_true ge sp path.(psize) f rs m (init_sistate pc) (mk_istate true pc rs m)); simpl; eauto.
+  { intros ABS. apply WF; unfold nth_default_succ_inst. rewrite ABS; auto. }
+  (destruct (nth_default_succ_inst (fn_code f) path.(psize) pc) as [i|] eqn: Hi; [clear WF|congruence]).
+  destruct STEP as [sti STEPS CONT|sti t s STEPS CONT LAST];
+  (* intro Hst *)
+  (rewrite STEPS; unfold ssem_internal_opt2; destruct (siexec_path _ _ _) as [st|] eqn: Hst; try congruence);
+  (* intro SEM *)
+  (simpl; unfold ssem_internal; simpl; rewrite CONT; intro SEM);
+  (* intro Hi' *)
+  ( assert (Hi': (fn_code f) ! (si_pc st) = Some i); 
+    [ unfold nth_default_succ_inst in Hi; 
+      exploit siexec_path_default_succ; eauto; simpl;
+      intros DEF; rewrite DEF in Hi; auto 
+      | clear Hi; rewrite Hi' ]);
+  (* eexists *)
+  (eexists; constructor; eauto).
+  - (* early *)
+    eapply ssem_early; eauto.
+    unfold ssem_internal; simpl; rewrite CONT.
+    destruct (siexec_inst i st) as [st'|] eqn: Hst'; simpl; eauto.
+    destruct SEM as (ext & lx & SEM & ALLFU). exists ext, lx.
+    constructor; auto. eapply siexec_inst_preserves_allfu; eauto.
+  - destruct SEM as (SEM & PC & HNYE).
+    destruct (siexec_inst i st) as [st'|] eqn: Hst'; simpl.
+    + (* normal on Snone *)
+      rewrite <- PC in LAST.
+      exploit symb_path_last_step; eauto; simpl.
+      intros (mk_istate & ISTEP & Ht & Hs); subst.
+      exploit siexec_inst_correct; eauto. simpl.
+      erewrite Hst', ISTEP; simpl.
+      clear LAST CONT STEPS PC SEM HNYE Hst Hi' Hst' ISTEP st sti i.
+      intro SEM; destruct (mk_istate.(icontinue)) eqn: CONT.
+      { (* icontinue mk_istate = true *)
+        eapply ssem_normal; simpl; eauto.
+        unfold ssem_internal in SEM.
+        rewrite CONT in SEM.
+        destruct SEM as (SEM & PC & HNYE).
+        rewrite <- PC.
+        eapply exec_Snone. }
+      { eapply ssem_early; eauto. }
+    + (* normal non-Snone instruction *) 
+      eapply ssem_normal; eauto.
+      * unfold ssem_internal; simpl; rewrite CONT; intuition.
+      * simpl. eapply sexec_final_correct; eauto.
+        rewrite PC; auto.
+Qed.
+
+(* TODO: déplacer les trucs sur equiv_stackframe dans RTLpath ? *)
+Inductive equiv_stackframe: stackframe -> stackframe -> Prop :=
+  | equiv_stackframe_intro res f sp pc rs1 rs2
+      (EQUIV: forall r : positive, rs1 !! r = rs2 !! r):
+      equiv_stackframe (Stackframe res f sp pc rs1) (Stackframe res f sp pc rs2).
+
+Inductive equiv_state: state -> state -> Prop :=
+  | State_equiv stack f sp pc rs1 m rs2
+     (EQUIV: forall r, rs1#r = rs2#r): 
+     equiv_state (State stack f sp pc rs1 m) (State stack f sp pc rs2 m)
+  | Call_equiv stk stk' f args m
+      (STACKS: list_forall2 equiv_stackframe stk stk'):
+      equiv_state (Callstate stk f args m) (Callstate stk' f args m)
+  | Return_equiv stk stk' v m
+      (STACKS: list_forall2 equiv_stackframe stk stk'):
+      equiv_state (Returnstate stk v m) (Returnstate stk' v m).
+
+Lemma equiv_stackframe_refl stf: equiv_stackframe stf stf.
+Proof.
+  destruct stf. constructor; auto.
+Qed.
+
+Lemma equiv_stack_refl stk: list_forall2 equiv_stackframe stk stk.
+Proof.
+  Local Hint Resolve equiv_stackframe_refl: core.
+  induction stk; simpl; constructor; auto.
+Qed.
+
+Lemma equiv_state_refl s: equiv_state s s.
+Proof.
+  Local Hint Resolve equiv_stack_refl: core.
+  induction s; simpl; constructor; auto.
+Qed.
+
+(*
+Lemma equiv_stackframe_trans stf1 stf2 stf3:
+  equiv_stackframe stf1 stf2 -> equiv_stackframe stf2 stf3 -> equiv_stackframe stf1 stf3.
+Proof.
+  destruct 1; intros EQ; inv EQ; try econstructor; eauto.
+  intros; eapply eq_trans; eauto.
+Qed.
+
+Lemma equiv_stack_trans stk1 stk2:
+  list_forall2 equiv_stackframe stk1 stk2 -> 
+  forall stk3, list_forall2 equiv_stackframe stk2 stk3 -> 
+  list_forall2 equiv_stackframe stk1 stk3.
+Proof.
+  Local Hint Resolve equiv_stackframe_trans.
+  induction 1; intros stk3 EQ; inv EQ; econstructor; eauto.
+Qed.
+
+Lemma equiv_state_trans s1 s2 s3: equiv_state s1 s2 -> equiv_state s2 s3 -> equiv_state s1 s3.
+Proof.
+  Local Hint Resolve equiv_stack_trans.
+  destruct 1; intros EQ; inv EQ; econstructor; eauto.
+  intros; eapply eq_trans; eauto.
+Qed.
+*)
+
+Lemma regmap_setres_eq (rs rs': regset) res vres:
+  (forall r, rs # r = rs' # r) ->
+  forall r, (regmap_setres res vres rs) # r = (regmap_setres res vres rs') # r.
+Proof.
+  intros RSEQ r. destruct res; simpl; try congruence.
+  destruct (peq x r).
+  - subst. repeat (rewrite Regmap.gss). reflexivity.
+  - repeat (rewrite Regmap.gso); auto.
+Qed.
+
+Lemma ssem_final_equiv pge ge sp (f:function) st sv stack rs0 m0 t rs1 rs2 m s:
+  ssem_final pge ge sp st stack f rs0 m0 sv rs1 m t s ->
+  (forall r, rs1#r = rs2#r) -> 
+  exists s', equiv_state s s' /\ ssem_final pge ge sp st stack f rs0 m0 sv rs2 m t s'.
+Proof. 
+  Local Hint Resolve equiv_stack_refl: core.
+  destruct 1.
+  - (* Snone *) intros; eexists; econstructor.
+    + eapply State_equiv; eauto.
+    + eapply exec_Snone.
+  - (* Scall *)
+    intros; eexists; econstructor.
+    2: { eapply exec_Scall; eauto. }
+    apply Call_equiv; auto.
+    repeat (constructor; auto).
+  - (* Stailcall *)
+    intros; eexists; econstructor; [| eapply exec_Stailcall; eauto].
+    apply Call_equiv; auto.
+  - (* Sbuiltin *)
+    intros; eexists; econstructor; [| eapply exec_Sbuiltin; eauto].
+    constructor. eapply regmap_setres_eq; eauto.
+  - (* Sjumptable *)
+    intros; eexists; econstructor; [| eapply exec_Sjumptable; eauto].
+    constructor. assumption.
+  - (* Sreturn *)
+    intros; eexists; econstructor; [| eapply exec_Sreturn; eauto].
+    eapply equiv_state_refl; eauto.
+Qed.
+
+Lemma siexec_inst_early_exit_absurd i st st' ge sp rs m rs' m' pc':
+  siexec_inst i st = Some st' ->
+  (exists ext lx, ssem_exit ge sp ext rs m rs' m' pc' /\
+     all_fallthrough_upto_exit ge sp ext lx (si_exits st) rs m) ->
+  all_fallthrough ge sp (si_exits st') rs m ->
+  False.
+Proof.
+  intros SIEXEC (ext & lx & SSEME & ALLFU) ALLF. destruct ALLFU as (TAIL & _).
+  exploit siexec_inst_add_exits; eauto. destruct 1 as [SIEQ | (ext0 & SIEQ)].
+  - rewrite SIEQ in *. eapply all_fallthrough_noexit. eauto. 2: eapply ALLF. eapply is_tail_in. eassumption.
+  - rewrite SIEQ in *. eapply all_fallthrough_noexit. eauto. 2: eapply ALLF. eapply is_tail_in.
+    constructor. eassumption.
+Qed.
+
+Lemma is_tail_false {A: Type}: forall (l: list A) a, is_tail (a::l) nil -> False.
+Proof.
+  intros. eapply is_tail_incl in H. unfold incl in H. pose (H a).
+  assert (In a (a::l)) by (constructor; auto). assert (In a nil) by auto. apply in_nil in H1.
+  contradiction.
+Qed.
+
+Lemma cons_eq_false {A: Type}: forall (l: list A) a,
+  a :: l = l -> False.
+Proof.
+  induction l; intros.
+  - discriminate.
+  - inv H. apply IHl in H2. contradiction.
+Qed.
+
+Lemma app_cons_nil_eq {A: Type}: forall l' l (a:A),
+  (l' ++ a :: nil) ++ l = l' ++ a::l.
+Proof.
+  induction l'; intros.
+  - simpl. reflexivity.
+  - simpl. rewrite IHl'. reflexivity. 
+Qed.
+
+Lemma app_eq_false {A: Type}: forall l (l': list A) a,
+  l' ++ a :: l = l -> False.
+Proof.
+  induction l; intros.
+  - apply app_eq_nil in H. destruct H as (_ & H). apply cons_eq_false in H. contradiction.
+  - destruct l' as [|a' l'].
+    + simpl in H. apply cons_eq_false in H. contradiction.
+    + rewrite <- app_comm_cons in H. inv H.
+      apply (IHl (l' ++ (a0 :: nil)) a). rewrite app_cons_nil_eq. assumption.
+Qed.
+
+Lemma is_tail_false_gen {A: Type}: forall (l: list A) l' a, is_tail (l'++(a::l)) l -> False.
+Proof.
+  induction l.
+  - intros. destruct l' as [|a' l'].
+    + simpl in H. apply is_tail_false in H. contradiction.
+    + rewrite <- app_comm_cons in H. apply is_tail_false in H. contradiction.
+  - intros. inv H.
+    + apply app_eq_false in H2. contradiction.
+    + apply (IHl (l' ++ (a0 :: nil)) a). rewrite app_cons_nil_eq. assumption.
+Qed.
+
+Lemma is_tail_eq {A: Type}: forall (l l': list A),
+  is_tail l' l ->
+  is_tail l l' ->
+  l = l'.
+Proof.
+  destruct l as [|a l]; intros l' ITAIL ITAIL'.
+  - destruct l' as [|i' l']; auto. apply is_tail_false in ITAIL. contradiction.
+  - inv ITAIL; auto.
+    destruct l' as [|i' l']. { apply is_tail_false in ITAIL'. contradiction. }
+    exploit is_tail_trans. eapply ITAIL'. eauto. intro ABSURD.
+    apply (is_tail_false_gen l nil a) in ABSURD. contradiction.
+Qed.
+
+(* NB: each execution of a symbolic state (produced from [sexec]) represents a concrete execution
+  (sexec is exact).
+*)
+Theorem sexec_exact f pc pge ge sp path stack st rs m t s1: 
+  (fn_path f)!pc = Some path ->
+  sexec f pc = Some st -> 
+  ssem pge ge sp st stack f rs m t s1 ->
+  exists s2, path_step ge pge path.(psize) stack f sp rs m pc t s2 /\ 
+             equiv_state s1 s2.
+Proof.
+  Local Hint Resolve init_ssem_internal: core.
+  unfold sexec; intros PATH SSTEP SEM; rewrite PATH in SSTEP.
+  lapply (final_node_path_simpl f path pc); eauto. intro WF.
+  exploit (siexec_path_correct_true ge sp path.(psize) f rs m (init_sistate pc) (mk_istate true pc rs m)); simpl; eauto.
+  { intros ABS. apply WF; unfold nth_default_succ_inst. rewrite ABS; auto. }
+  (destruct (nth_default_succ_inst (fn_code f) path.(psize) pc) as [i|] eqn: Hi; [clear WF|congruence]).
+  unfold nth_default_succ_inst in Hi.
+  destruct (siexec_path path.(psize) f (init_sistate pc)) as [st0|] eqn: Hst0; simpl.
+  2:{ (* absurd case *)
+      exploit siexec_path_WF; eauto.
+      simpl; intros NDS; rewrite NDS in Hi; congruence. }
+  exploit siexec_path_default_succ; eauto; simpl.
+  intros NDS; rewrite NDS in Hi.
+  rewrite Hi in SSTEP.
+  intros ISTEPS. try_simplify_someHyps.
+  destruct (siexec_inst i st0) as [st'|] eqn:Hst'; simpl.
+  + (* exit on Snone instruction *)
+    assert (SEM': t = E0 /\ exists is, ssem_internal ge sp st' rs m is
+           /\ s1 = (State stack f sp (if (icontinue is) then (si_pc st') else (ipc is)) (irs is) (imem is))).
+    {  destruct SEM as [is CONT SEM|is t s CONT SEM1 SEM2]; simpl in * |- *.
+       - repeat (econstructor; eauto).
+         rewrite CONT; eauto.
+       - inversion SEM2. repeat (econstructor; eauto).
+         rewrite CONT; eauto. }
+    clear SEM; subst. destruct SEM' as [X (is & SEM & X')]; subst.
+    intros.
+    destruct (isteps ge (psize path) f sp rs m pc) as [is0|] eqn:RISTEPS; simpl in *.
+    * unfold ssem_internal in ISTEPS. destruct (icontinue is0) eqn: ICONT0. 
+      ** (* icontinue is0=true: path_step by normal_exit *)
+         destruct ISTEPS as (SEMis0&H1&H2).
+         rewrite H1 in * |-.
+         exploit siexec_inst_correct; eauto.
+         rewrite Hst'; simpl.
+         intros; exploit ssem_internal_opt_determ; eauto.
+         destruct 1 as (st & Hst & EQ1 & EQ2 & EQ3 & EQ4).
+         eexists. econstructor 1.
+         *** eapply exec_normal_exit; eauto.
+             eapply exec_istate; eauto.
+         *** rewrite EQ1.
+             enough ((ipc st) = (if icontinue st then si_pc st' else ipc is)) as ->.
+             { rewrite EQ2, EQ4. eapply State_equiv; auto. }
+             destruct (icontinue st) eqn:ICONT; auto.
+             exploit siexec_inst_default_succ; eauto.
+             erewrite istep_normal_exit; eauto.
+             try_simplify_someHyps.
+      ** (* The concrete execution has not reached "i" => early exit *) 
+         unfold ssem_internal in SEM.
+         destruct (icontinue is) eqn:ICONT.
+         { destruct SEM as (SEML & SIPC & ALLF).
+           exploit siexec_inst_early_exit_absurd; eauto. contradiction. }
+         
+         eexists. econstructor 1.
+         *** eapply exec_early_exit; eauto.
+         *** destruct ISTEPS as (ext & lx & SSEME & ALLFU). destruct SEM as (ext' & lx' & SSEME' & ALLFU').
+             eapply siexec_inst_preserves_allfu in ALLFU; eauto.
+             exploit ssem_exit_fallthrough_upto_exit; eauto.
+             exploit ssem_exit_fallthrough_upto_exit. eapply SSEME. eapply ALLFU. eapply ALLFU'.
+             intros ITAIL ITAIL'. apply is_tail_eq in ITAIL; auto. clear ITAIL'.
+             inv ITAIL. exploit ssem_exit_determ. eapply SSEME. eapply SSEME'. intros (IPCEQ & IRSEQ & IMEMEQ).
+             rewrite <- IPCEQ. rewrite <- IMEMEQ. constructor. congruence. 
+    * (* The concrete execution has not reached "i" => abort case *)
+      eapply siexec_inst_preserves_sabort in ISTEPS; eauto.
+      exploit ssem_internal_exclude_sabort; eauto. contradiction.
+  + destruct SEM as [is CONT SEM|is t s CONT SEM1 SEM2]; simpl in * |- *.
+    - (* early exit *)
+      intros.
+      exploit ssem_internal_opt_determ; eauto.
+      destruct 1 as (st & Hst & EQ1 & EQ2 & EQ3 & EQ4).
+      eexists. econstructor 1.
+      * eapply exec_early_exit; eauto.
+      * rewrite EQ2, EQ4; eapply State_equiv. auto.
+    - (* normal exit non-Snone instruction *)
+      intros.
+      exploit ssem_internal_opt_determ; eauto.
+      destruct 1 as (st & Hst & EQ1 & EQ2 & EQ3 & EQ4).
+      unfold ssem_internal in SEM1.
+      rewrite CONT in SEM1. destruct SEM1 as (SEM1 & PC0 & NYE0).
+      exploit ssem_final_equiv; eauto.
+      clear SEM2; destruct 1 as (s' & Ms' & SEM2).
+      rewrite ! EQ4 in * |-; clear EQ4.
+      rewrite ! EQ2 in * |-; clear EQ2.
+      exists s'; intuition.
+      eapply exec_normal_exit; eauto.
+      eapply sexec_final_complete; eauto.
+      * congruence.
+      * unfold ssem_local in * |- *. 
+        destruct SEM1 as (A & B & C). constructor; [|constructor]; eauto.
+        intro r. congruence.
+      * congruence.
+Qed.
+
+(** * Simulation of RTLpath code w.r.t symbolic execution *)
+
+Section SymbValPreserved.
+
+Variable ge ge': RTL.genv.
+
+Hypothesis symbols_preserved_RTL: forall s, Genv.find_symbol ge' s = Genv.find_symbol ge s.
+
+Hypothesis senv_preserved_RTL: Senv.equiv ge ge'.
+
+Lemma senv_find_symbol_preserved id:
+  Senv.find_symbol ge id = Senv.find_symbol ge' id.
+Proof.
+  destruct senv_preserved_RTL as (A & B & C). congruence.
+Qed.
+
+Lemma senv_symbol_address_preserved id ofs:
+  Senv.symbol_address ge id ofs = Senv.symbol_address ge' id ofs.
+Proof.
+  unfold Senv.symbol_address. rewrite senv_find_symbol_preserved.
+  reflexivity.
+Qed.
+
+Lemma seval_preserved sp sv rs0 m0:
+  seval_sval ge sp sv rs0 m0 = seval_sval ge' sp sv rs0 m0.
+Proof.
+  Local Hint Resolve symbols_preserved_RTL: core.
+  induction sv using sval_mut with (P0 := fun lsv => seval_list_sval ge sp lsv rs0 m0 = seval_list_sval ge' sp lsv rs0 m0)
+                                   (P1 := fun sm => seval_smem ge sp sm rs0 m0 = seval_smem ge' sp sm rs0 m0); simpl; auto.
+  + rewrite IHsv; clear IHsv. destruct (seval_list_sval _ _ _ _); auto.
+    rewrite IHsv0; clear IHsv0. destruct (seval_smem _ _ _ _); auto.
+    erewrite eval_operation_preserved; eauto.
+  + rewrite IHsv0; clear IHsv0. destruct (seval_list_sval _ _ _ _); auto.
+    erewrite <- eval_addressing_preserved; eauto.
+    destruct (eval_addressing _ sp _ _); auto.
+    rewrite IHsv; auto.
+  + rewrite IHsv; clear IHsv. destruct (seval_sval _ _ _ _); auto.
+    rewrite IHsv0; auto.
+  + rewrite IHsv0; clear IHsv0. destruct (seval_list_sval _ _ _ _); auto.
+    erewrite <- eval_addressing_preserved; eauto.
+    destruct (eval_addressing _ sp _ _); auto.
+    rewrite IHsv; clear IHsv. destruct (seval_smem _ _ _ _); auto.
+    rewrite IHsv1; auto.
+Qed.
+
+Lemma seval_builtin_arg_preserved sp m rs0 m0:
+  forall bs varg,
+  seval_builtin_arg ge sp m rs0 m0 bs varg ->
+  seval_builtin_arg ge' sp m rs0 m0 bs varg.
+Proof.
+  induction 1.
+  all: try (constructor; auto).
+  - rewrite <- seval_preserved. assumption.
+  - rewrite <- senv_symbol_address_preserved. assumption.
+  - rewrite senv_symbol_address_preserved. eapply seval_BA_addrglobal.
+Qed.
+
+Lemma seval_builtin_args_preserved sp m rs0 m0 lbs vargs:
+  seval_builtin_args ge sp m rs0 m0 lbs vargs ->
+  seval_builtin_args ge' sp m rs0 m0 lbs vargs.
+Proof.
+  induction 1; constructor; eauto.
+  eapply seval_builtin_arg_preserved; auto.
+Qed.
+
+Lemma list_sval_eval_preserved sp lsv rs0 m0: 
+  seval_list_sval ge sp lsv rs0 m0 = seval_list_sval ge' sp lsv rs0 m0.
+Proof.
+  induction lsv; simpl; auto.
+  rewrite seval_preserved. destruct (seval_sval _ _ _ _); auto.
+  rewrite IHlsv; auto.
+Qed.
+
+Lemma smem_eval_preserved sp sm rs0 m0: 
+  seval_smem ge sp sm rs0 m0 = seval_smem ge' sp sm rs0 m0.
+Proof.
+  induction sm; simpl; auto.
+  rewrite list_sval_eval_preserved. destruct (seval_list_sval _ _ _ _); auto.
+  erewrite <- eval_addressing_preserved; eauto.
+  destruct (eval_addressing _ sp _ _); auto.
+  rewrite IHsm; clear IHsm. destruct (seval_smem _ _ _ _); auto.
+  rewrite seval_preserved; auto.
+Qed.
+
+Lemma seval_condition_preserved sp cond lsv sm rs0 m0:
+ seval_condition ge sp cond lsv sm rs0 m0 = seval_condition ge' sp cond lsv sm rs0 m0.
+Proof.
+  unfold seval_condition.
+  rewrite list_sval_eval_preserved. destruct (seval_list_sval _ _ _ _); auto.
+  rewrite smem_eval_preserved; auto.
+Qed.
+
+End SymbValPreserved.
+
+Require Import RTLpathLivegen RTLpathLivegenproof.
+
+(** * DEFINITION OF SIMULATION BETWEEN (ABSTRACT) SYMBOLIC EXECUTIONS
+*)
+
+Definition istate_simulive alive (srce: PTree.t node) (is1 is2: istate): Prop :=
+     is1.(icontinue) = is2.(icontinue)
+     /\ eqlive_reg alive is1.(irs) is2.(irs)
+     /\ is1.(imem) = is2.(imem).
+
+Definition istate_simu f (srce: PTree.t node) outframe is1 is2: Prop :=
+  if is1.(icontinue) then
+     istate_simulive (fun r => Regset.In r outframe) srce is1 is2
+  else
+     exists path, f.(fn_path)!(is1.(ipc)) = Some path 
+     /\ istate_simulive (fun r => Regset.In r path.(input_regs)) srce is1 is2
+     /\ srce!(is2.(ipc)) = Some is1.(ipc).
+
+Record simu_proof_context {f1: RTLpath.function} := {
+   liveness_hyps: liveness_ok_function f1;
+   the_ge1: RTL.genv;
+   the_ge2: RTL.genv;
+   genv_match: forall s, Genv.find_symbol the_ge1 s = Genv.find_symbol the_ge2 s;
+   the_sp: val;
+   the_rs0: regset; 
+   the_m0: mem
+}.
+Arguments simu_proof_context: clear implicits.
+
+(* NOTE: a pure semantic definition on [sistate], for a total freedom in refinements *)
+Definition sistate_simu (dm: PTree.t node) (f: RTLpath.function) outframe (st1 st2: sistate) (ctx: simu_proof_context f): Prop :=
+  forall is1, ssem_internal (the_ge1 ctx) (the_sp ctx) st1 (the_rs0 ctx) (the_m0 ctx) is1 ->
+  exists is2, ssem_internal (the_ge2 ctx) (the_sp ctx) st2 (the_rs0 ctx) (the_m0 ctx) is2
+              /\ istate_simu f dm outframe is1 is2.
+
+Inductive svident_simu (f: RTLpath.function) (ctx: simu_proof_context f): (sval + ident) -> (sval + ident) -> Prop :=
+  | Sleft_simu sv1 sv2:
+     (seval_sval (the_ge1 ctx) (the_sp ctx) sv1 (the_rs0 ctx) (the_m0 ctx)) = (seval_sval (the_ge2 ctx) (the_sp ctx) sv2 (the_rs0 ctx) (the_m0 ctx))
+     -> svident_simu f ctx (inl sv1) (inl sv2)
+  | Sright_simu id1 id2:
+     id1 = id2
+     -> svident_simu f ctx (inr id1) (inr id2)
+  .
+
+
+Fixpoint ptree_get_list (pt: PTree.t node) (lp: list positive) : option (list positive) :=
+  match lp with
+  | nil => Some nil
+  | p1::lp => SOME p2 <- pt!p1 IN
+              SOME lp2 <- (ptree_get_list pt lp) IN
+              Some (p2 :: lp2)
+  end.
+
+Lemma ptree_get_list_nth dm p2: forall lp2 lp1,
+  ptree_get_list dm lp2 = Some lp1 ->
+  forall n, list_nth_z lp2 n = Some p2 ->
+  exists p1,
+    list_nth_z lp1 n = Some p1 /\ dm ! p2 = Some p1.
+Proof.
+  induction lp2.
+  - simpl. intros. inv H. simpl in *. discriminate.
+  - intros lp1 PGL n LNZ. simpl in PGL. explore.
+    inv LNZ. destruct (zeq n 0) eqn:ZEQ.
+    + subst. inv H0. exists n0. simpl; constructor; auto.
+    + exploit IHlp2; eauto. intros (p1 & LNZ & DMEQ).
+      eexists. simpl. rewrite ZEQ.
+      constructor; eauto.
+Qed.
+
+Lemma ptree_get_list_nth_rev dm p1: forall lp2 lp1,
+  ptree_get_list dm lp2 = Some lp1 ->
+  forall n, list_nth_z lp1 n = Some p1 ->
+  exists p2,
+    list_nth_z lp2 n = Some p2 /\ dm ! p2 = Some p1.
+Proof.
+  induction lp2.
+  - simpl. intros. inv H. simpl in *. discriminate.
+  - intros lp1 PGL n LNZ. simpl in PGL. explore.
+    inv LNZ. destruct (zeq n 0) eqn:ZEQ.
+    + subst. inv H0. exists a. simpl; constructor; auto.
+    + exploit IHlp2; eauto. intros (p2 & LNZ & DMEQ).
+      eexists. simpl. rewrite ZEQ.
+      constructor; eauto. congruence.
+Qed.
+
+Fixpoint seval_builtin_sval ge sp bsv rs0 m0 :=
+  match bsv with
+  | BA sv => SOME v <- seval_sval ge sp sv rs0 m0 IN Some (BA v)
+  | BA_splitlong sv1 sv2 =>
+      SOME v1 <- seval_builtin_sval ge sp sv1 rs0 m0 IN
+      SOME v2 <- seval_builtin_sval ge sp sv2 rs0 m0 IN
+      Some (BA_splitlong v1 v2)
+  | BA_addptr sv1 sv2 =>
+      SOME v1 <- seval_builtin_sval ge sp sv1 rs0 m0 IN
+      SOME v2 <- seval_builtin_sval ge sp sv2 rs0 m0 IN
+      Some (BA_addptr v1 v2)
+  | BA_int i => Some (BA_int i)
+  | BA_long l => Some (BA_long l)
+  | BA_float f => Some (BA_float f)
+  | BA_single s => Some (BA_single s)
+  | BA_loadstack chk ptr => Some (BA_loadstack chk ptr)
+  | BA_addrstack ptr => Some (BA_addrstack ptr)
+  | BA_loadglobal chk id ptr => Some (BA_loadglobal chk id ptr)
+  | BA_addrglobal id ptr => Some (BA_addrglobal id ptr)
+  end.
+
+
+Fixpoint seval_list_builtin_sval ge sp lbsv rs0 m0 :=
+  match lbsv with
+  | nil => Some nil
+  | bsv::lbsv => SOME v <- seval_builtin_sval ge sp bsv rs0 m0 IN
+                 SOME lv <- seval_list_builtin_sval ge sp lbsv rs0 m0 IN
+                 Some (v::lv)
+  end.
+
+Lemma seval_list_builtin_sval_nil ge sp rs0 m0 lbs2:
+  seval_list_builtin_sval ge sp lbs2 rs0 m0 = Some nil ->
+  lbs2 = nil.
+Proof.
+  destruct lbs2; simpl; auto.
+  intros. destruct (seval_builtin_sval _ _ _ _ _);
+    try destruct (seval_list_builtin_sval _ _ _ _ _); discriminate.
+Qed.
+
+Lemma seval_builtin_sval_arg (ge:RTL.genv) sp rs0 m0 bs:
+   forall ba m v, 
+   seval_builtin_sval ge sp bs rs0 m0 = Some ba ->
+   eval_builtin_arg ge (fun id => id) sp m ba v ->
+   seval_builtin_arg ge sp m rs0 m0 bs v.
+Proof.
+   induction bs; simpl; 
+   try (intros ba m v H; inversion H; subst; clear H;
+        intros H; inversion H; subst;
+        econstructor; auto; fail).
+   - intros ba m v; destruct (seval_sval _ _ _ _ _) eqn: SV;
+     intros H; inversion H; subst; clear H.
+     intros H; inversion H; subst.
+     econstructor; auto.
+   - intros ba m v. 
+     destruct (seval_builtin_sval _ _ bs1 _ _) eqn: SV1; try congruence.
+     destruct (seval_builtin_sval _ _ bs2 _ _) eqn: SV2; try congruence.
+     intros H; inversion H; subst; clear H.
+     intros H; inversion H; subst.
+     econstructor; eauto.
+   - intros ba m v. 
+     destruct (seval_builtin_sval _ _ bs1 _ _) eqn: SV1; try congruence.
+     destruct (seval_builtin_sval _ _ bs2 _ _) eqn: SV2; try congruence.
+     intros H; inversion H; subst; clear H.
+     intros H; inversion H; subst.
+     econstructor; eauto.
+Qed.
+
+Lemma seval_builtin_arg_sval ge sp m rs0 m0 v: forall bs,
+  seval_builtin_arg ge sp m rs0 m0 bs v ->
+  exists ba,
+    seval_builtin_sval ge sp bs rs0 m0 = Some ba
+    /\ eval_builtin_arg ge (fun id => id) sp m ba v.
+Proof.
+  induction 1.
+  all: try (eexists; constructor; [simpl; reflexivity | constructor]).
+  2-3: try assumption.
+  - eexists. constructor.
+    + simpl. rewrite H. reflexivity.
+    + constructor.
+  - destruct IHseval_builtin_arg1 as (ba1 & A1 & B1).
+    destruct IHseval_builtin_arg2 as (ba2 & A2 & B2).
+    eexists. constructor.
+    + simpl. rewrite A1. rewrite A2. reflexivity.
+    + constructor; assumption. 
+  - destruct IHseval_builtin_arg1 as (ba1 & A1 & B1).
+    destruct IHseval_builtin_arg2 as (ba2 & A2 & B2).
+    eexists. constructor.
+    + simpl. rewrite A1. rewrite A2. reflexivity.
+    + constructor; assumption.
+Qed.
+
+Lemma seval_builtin_sval_args (ge:RTL.genv) sp rs0 m0 lbs:
+   forall lba m v, 
+   seval_list_builtin_sval ge sp lbs rs0 m0 = Some lba ->
+   list_forall2 (eval_builtin_arg ge (fun id => id) sp m) lba v ->
+   seval_builtin_args ge sp m rs0 m0 lbs v.
+Proof.
+  unfold seval_builtin_args; induction lbs; simpl; intros lba m v.
+  - intros H; inversion H; subst; clear H.
+    intros H; inversion H. econstructor.
+  - destruct (seval_builtin_sval _ _ _ _ _) eqn:SV; try congruence.
+    destruct (seval_list_builtin_sval _ _ _ _ _) eqn: SVL; try congruence.
+    intros H; inversion H; subst; clear H.
+    intros H; inversion H; subst; clear H. 
+    econstructor; eauto.
+    eapply seval_builtin_sval_arg; eauto.
+Qed.
+
+Lemma seval_builtin_args_sval ge sp m rs0 m0 lv: forall lbs,
+  seval_builtin_args ge sp m rs0 m0 lbs lv ->
+  exists lba,
+    seval_list_builtin_sval ge sp lbs rs0 m0 = Some lba
+    /\ list_forall2 (eval_builtin_arg ge (fun id => id) sp m) lba lv.
+Proof.
+  induction 1.
+  - eexists. constructor.
+    + simpl. reflexivity.
+    + constructor.
+  - destruct IHlist_forall2 as (lba & A & B).
+    apply seval_builtin_arg_sval in H. destruct H as (ba & A' & B').
+    eexists. constructor.
+    + simpl. rewrite A'. rewrite A. reflexivity.
+    + constructor; assumption.
+Qed.
+
+Lemma seval_builtin_sval_correct ge sp m rs0 m0: forall bs1 v bs2,
+  seval_builtin_arg ge sp m rs0 m0 bs1 v ->
+  (seval_builtin_sval ge sp bs1 rs0 m0) = (seval_builtin_sval ge sp bs2 rs0 m0) ->
+  seval_builtin_arg ge sp m rs0 m0 bs2 v.
+Proof.
+  intros. exploit seval_builtin_arg_sval; eauto.
+  intros (ba & X1 & X2).
+  eapply seval_builtin_sval_arg; eauto.
+  congruence.
+Qed.
+
+Lemma seval_list_builtin_sval_correct ge sp m rs0 m0 vargs: forall lbs1,
+  seval_builtin_args ge sp m rs0 m0 lbs1 vargs ->
+  forall lbs2, (seval_list_builtin_sval ge sp lbs1 rs0 m0) = (seval_list_builtin_sval ge sp lbs2 rs0 m0) ->
+  seval_builtin_args ge sp m rs0 m0 lbs2 vargs.
+Proof.
+  intros. exploit seval_builtin_args_sval; eauto.
+  intros (ba & X1 & X2).
+  eapply seval_builtin_sval_args; eauto.
+  congruence.
+Qed.
+
+(* NOTE: we need to mix semantical simulation and syntactic definition on [sfval] in order to abstract the [match_states] *)
+Inductive sfval_simu (dm: PTree.t node) (f: RTLpath.function) (opc1 opc2: node) (ctx: simu_proof_context f): sfval -> sfval -> Prop :=
+  | Snone_simu: 
+      dm!opc2 = Some opc1 -> 
+      sfval_simu dm f opc1 opc2 ctx Snone Snone
+  | Scall_simu sig svos1 svos2 lsv1 lsv2 res pc1 pc2:
+      dm!pc2 = Some pc1 ->
+      svident_simu f ctx svos1 svos2 ->
+      (seval_list_sval (the_ge1 ctx) (the_sp ctx) lsv1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_list_sval (the_ge2 ctx) (the_sp ctx) lsv2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Scall sig svos1 lsv1 res pc1) (Scall sig svos2 lsv2 res pc2)
+  | Stailcall_simu sig svos1 svos2 lsv1 lsv2:
+      svident_simu f ctx svos1 svos2 ->
+      (seval_list_sval (the_ge1 ctx) (the_sp ctx) lsv1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_list_sval (the_ge2 ctx) (the_sp ctx) lsv2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Stailcall sig svos1 lsv1) (Stailcall sig svos2 lsv2)
+  | Sbuiltin_simu ef lbs1 lbs2 br pc1 pc2:
+      dm!pc2 = Some pc1 ->
+      (seval_list_builtin_sval (the_ge1 ctx) (the_sp ctx) lbs1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_list_builtin_sval (the_ge2 ctx) (the_sp ctx) lbs2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Sbuiltin ef lbs1 br pc1) (Sbuiltin ef lbs2 br pc2)
+  | Sjumptable_simu sv1 sv2 lpc1 lpc2:
+      ptree_get_list dm lpc2 = Some lpc1 ->
+      (seval_sval (the_ge1 ctx) (the_sp ctx) sv1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_sval (the_ge2 ctx) (the_sp ctx) sv2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Sjumptable sv1 lpc1) (Sjumptable sv2 lpc2)
+  | Sreturn_simu_none: sfval_simu dm f opc1 opc2 ctx (Sreturn None) (Sreturn None)
+  | Sreturn_simu_some sv1 sv2:
+      (seval_sval (the_ge1 ctx) (the_sp ctx) sv1 (the_rs0 ctx) (the_m0 ctx))
+      = (seval_sval (the_ge2 ctx) (the_sp ctx) sv2 (the_rs0 ctx) (the_m0 ctx)) ->
+      sfval_simu dm f opc1 opc2 ctx (Sreturn (Some sv1)) (Sreturn (Some sv2)).
+
+Definition sstate_simu dm f outframe (s1 s2: sstate) (ctx: simu_proof_context f): Prop :=
+       sistate_simu dm f outframe s1.(internal) s2.(internal) ctx
+    /\ forall is1,
+           ssem_internal (the_ge1 ctx) (the_sp ctx) s1 (the_rs0 ctx) (the_m0 ctx) is1 -> 
+           is1.(icontinue) = true ->
+           sfval_simu dm f s1.(si_pc) s2.(si_pc) ctx s1.(final) s2.(final).
+
+Definition sexec_simu dm (f1 f2: RTLpath.function) pc1 pc2: Prop :=
+    forall st1, sexec f1 pc1 = Some st1 -> 
+    exists path st2, (fn_path f1)!pc1 = Some path /\ sexec f2 pc2 = Some st2 
+     /\ forall ctx, sstate_simu dm f1 path.(pre_output_regs) st1 st2 ctx.
diff --git a/scheduling/RTLpathScheduler.v b/scheduling/RTLpathScheduler.v
new file mode 100644
index 00000000..31680256
--- /dev/null
+++ b/scheduling/RTLpathScheduler.v
@@ -0,0 +1,329 @@
+(** RTLpath Scheduling from an external oracle. 
+
+This module is inspired from [Duplicate] and [Duplicateproof]
+
+*)
+
+Require Import AST Linking Values Maps Globalenvs Smallstep Registers.
+Require Import Coqlib Maps Events Errors Op.
+Require Import RTL RTLpath RTLpathLivegen RTLpathLivegenproof RTLpathSE_theory RTLpathSE_impl.
+Require RTLpathWFcheck.
+
+Notation "'ASSERT' A 'WITH' MSG 'IN' B" := (if A then B else Error (msg MSG))
+         (at level 200, A at level 100, B at level 200)
+         : error_monad_scope.
+
+Local Open Scope error_monad_scope.
+Local Open Scope positive_scope.
+
+(** External oracle returning the new RTLpath function and a mapping of new path_entries to old path_entries 
+
+NB: the new RTLpath function is generated from the fn_code, the fn_entrypoint and the fn_path
+It requires to check that the path structure is wf !
+
+*)
+
+(* Returns: new code, new entrypoint, new pathmap, revmap
+ * Indeed, the entrypoint might not be the same if the entrypoint node is moved further down
+ * a path ; same reasoning for the pathmap *)
+Axiom untrusted_scheduler: RTLpath.function -> code * node * path_map * (PTree.t node).
+
+Extract Constant untrusted_scheduler => "RTLpathScheduleraux.scheduler".
+
+Program Definition function_builder (tfr: RTL.function) (tpm: path_map) : 
+  { r : res RTLpath.function | forall f', r = OK f' -> fn_RTL f' = tfr} :=
+  match RTLpathWFcheck.function_checker tfr tpm with
+  | false => Error (msg "In function_builder: (tfr, tpm) is not wellformed")
+  | true => OK {| fn_RTL := tfr; fn_path := tpm |}
+  end.
+Next Obligation.
+  apply RTLpathWFcheck.function_checker_path_entry. auto.
+Defined. Next Obligation.
+  apply RTLpathWFcheck.function_checker_wellformed_path_map. auto.
+Defined.
+
+Definition entrypoint_check (dm: PTree.t node) (fr tfr: RTL.function) : res unit :=
+  match dm ! (fn_entrypoint tfr) with
+  | None => Error (msg "No mapping for (entrypoint tfr)")
+  | Some etp => if (Pos.eq_dec (fn_entrypoint fr) etp) then OK tt
+                else Error (msg "Entrypoints do not match")
+  end.
+
+Lemma entrypoint_check_correct fr tfr dm:
+  entrypoint_check dm fr tfr = OK tt ->
+  dm ! (fn_entrypoint tfr) = Some (fn_entrypoint fr).
+Proof.
+  unfold entrypoint_check. explore; try discriminate. congruence.
+Qed.
+
+Definition path_entry_check_single (pm tpm: path_map) (m: node * node) :=
+  let (pc2, pc1) := m in
+  match (tpm ! pc2) with
+  | None => Error (msg "pc2 isn't an entry of tpm")
+  | Some _ =>
+      match (pm ! pc1) with
+      | None => Error (msg "pc1 isn't an entry of pm")
+      | Some _ => OK tt
+      end
+  end.
+
+Lemma path_entry_check_single_correct pm tpm pc1 pc2:
+  path_entry_check_single pm tpm (pc2, pc1) = OK tt ->
+  path_entry tpm pc2 /\ path_entry pm pc1.
+Proof.
+  unfold path_entry_check_single. intro. explore.
+  constructor; congruence.
+Qed.
+
+(* Inspired from Duplicate.verify_mapping_rec *)
+Fixpoint path_entry_check_rec (pm tpm: path_map) lm :=
+  match lm with
+  | nil => OK tt
+  | m :: lm => do u1 <- path_entry_check_single pm tpm m;
+               do u2 <- path_entry_check_rec pm tpm lm;
+               OK tt
+  end.
+
+Lemma path_entry_check_rec_correct pm tpm pc1 pc2: forall lm,
+  path_entry_check_rec pm tpm lm = OK tt ->
+  In (pc2, pc1) lm ->
+  path_entry tpm pc2 /\ path_entry pm pc1.
+Proof.
+  induction lm.
+  - simpl. intuition.
+  - simpl. intros. explore. destruct H0.
+    + subst. eapply path_entry_check_single_correct; eauto.
+    + eapply IHlm; assumption.
+Qed.
+
+Definition path_entry_check (dm: PTree.t node) (pm tpm: path_map) := path_entry_check_rec pm tpm (PTree.elements dm).
+
+Lemma path_entry_check_correct dm pm tpm:
+  path_entry_check dm pm tpm = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  path_entry tpm pc2 /\ path_entry pm pc1.
+Proof.
+  unfold path_entry_check. intros. eapply PTree.elements_correct in H0.
+  eapply path_entry_check_rec_correct; eassumption.
+Qed.
+
+Definition function_equiv_checker (dm: PTree.t node) (f: RTLpath.function) (tf: RTLpath.function) : res unit :=
+  let pm := fn_path f in
+  let fr := fn_RTL f in
+  let tpm := fn_path tf in
+  let tfr := fn_RTL tf in
+  do _ <- entrypoint_check dm fr tfr;
+  do _ <- path_entry_check dm pm tpm;
+  do _ <- simu_check dm f tf;
+  OK tt.
+
+Lemma function_equiv_checker_entrypoint f tf dm:
+  function_equiv_checker dm f tf = OK tt ->
+  dm ! (fn_entrypoint tf) = Some (fn_entrypoint f).
+Proof.
+  unfold function_equiv_checker. intros. explore.
+  eapply entrypoint_check_correct; eauto.
+Qed.
+
+Lemma function_equiv_checker_pathentry1 f tf dm:
+  function_equiv_checker dm f tf = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  path_entry (fn_path tf) pc2.
+Proof.
+  unfold function_equiv_checker. intros. explore.
+  exploit path_entry_check_correct. eassumption. all: eauto. intuition.
+Qed.
+
+Lemma function_equiv_checker_pathentry2 f tf dm:
+  function_equiv_checker dm f tf = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  path_entry (fn_path f) pc1.
+Proof.
+  unfold function_equiv_checker. intros. explore.
+  exploit path_entry_check_correct. eassumption. all: eauto. intuition.
+Qed.
+
+Lemma function_equiv_checker_correct f tf dm:
+  function_equiv_checker dm f tf = OK tt ->
+  forall pc1 pc2, dm ! pc2 = Some pc1 ->
+  sexec_simu dm f tf pc1 pc2.
+Proof.
+  unfold function_equiv_checker. intros. explore.
+  eapply simu_check_correct; eauto.
+Qed.
+
+Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (PTree.t node)) :=
+  let (tctetpm, dm) := untrusted_scheduler f in
+  let (tcte, tpm) := tctetpm in
+  let (tc, te) := tcte in
+  let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in
+  do tf <- proj1_sig (function_builder tfr tpm);
+  do tt <- function_equiv_checker dm f tf; 
+  OK (tf, dm).
+
+Theorem verified_scheduler_correct f tf dm:
+  verified_scheduler f = OK (tf, dm) ->
+  fn_sig f = fn_sig tf
+  /\ fn_params f = fn_params tf
+  /\ fn_stacksize f = fn_stacksize tf
+  /\ dm ! (fn_entrypoint tf) = Some (fn_entrypoint f)
+  /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> path_entry (fn_path f) pc1)
+  /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> path_entry (fn_path tf) pc2)
+  /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2)
+.
+Proof.
+  intros VERIF. unfold verified_scheduler in VERIF. explore.
+  Local Hint Resolve function_equiv_checker_entrypoint
+    function_equiv_checker_pathentry1 function_equiv_checker_pathentry2
+    function_equiv_checker_correct: core.
+  destruct (function_builder _ _) as [res H]; simpl in * |- *; auto.
+    apply H in EQ2. rewrite EQ2. simpl.
+  repeat (constructor; eauto).
+  exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition.
+Qed.
+
+Record match_function (dupmap: PTree.t node) (f1 f2: RTLpath.function): Prop := {
+  preserv_fnsig: fn_sig f1 = fn_sig f2;
+  preserv_fnparams: fn_params f1 = fn_params f2;
+  preserv_fnstacksize: fn_stacksize f1 = fn_stacksize f2;
+  preserv_entrypoint: dupmap!(f2.(fn_entrypoint)) = Some f1.(fn_entrypoint);
+  dupmap_path_entry1: forall pc1 pc2, dupmap!pc2 = Some pc1 -> path_entry (fn_path f1) pc1;
+  dupmap_path_entry2: forall pc1 pc2, dupmap!pc2 = Some pc1 -> path_entry (fn_path f2) pc2;
+  dupmap_correct: forall pc1 pc2, dupmap!pc2 = Some pc1 -> sexec_simu dupmap f1 f2 pc1 pc2;
+}.
+
+Program Definition transf_function (f: RTLpath.function):
+  { r : res RTLpath.function | forall f', r = OK f' -> exists dm, match_function dm f f'} :=
+  match (verified_scheduler f) with
+  | Error e => Error e
+  | OK (tf, dm) => OK tf
+  end.
+Next Obligation.
+  exploit verified_scheduler_correct; eauto.
+  intros (A & B & C & D & E & F & G (* & H *)).
+  exists dm. econstructor; eauto.
+Defined.
+
+Theorem match_function_preserves f f' dm:
+  match_function dm f f' ->
+  fn_sig f = fn_sig f' /\ fn_params f = fn_params f' /\ fn_stacksize f = fn_stacksize f'.
+Proof.
+  intros.
+  destruct H as [SIG PARAM SIZE ENTRY CORRECT].
+  intuition.
+Qed.
+
+Definition transf_fundef (f: fundef) : res fundef :=
+  transf_partial_fundef (fun f => proj1_sig (transf_function f)) f.
+
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
+
+(** * Preservation proof *)
+
+Local Notation ext alive := (fun r => Regset.In r alive).
+
+Inductive match_fundef: RTLpath.fundef -> RTLpath.fundef -> Prop :=
+  | match_Internal dupmap f f': match_function dupmap f f' -> match_fundef (Internal f) (Internal f')
+  | match_External ef: match_fundef (External ef) (External ef).
+
+Inductive match_stackframes: stackframe -> stackframe -> Prop :=
+  | match_stackframe_intro dupmap res f sp pc rs1 rs2 f' pc' path
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc)
+      (LIVE: liveness_ok_function f)
+      (PATH: f.(fn_path)!pc = Some path)
+      (EQUIV: forall v, eqlive_reg (ext path.(input_regs)) (rs1 # res <- v) (rs2 # res <- v)):
+      match_stackframes (Stackframe res f sp pc rs1) (Stackframe res f' sp pc' rs2).
+
+Inductive match_states: state -> state -> Prop :=
+  | match_states_intro dupmap st f sp pc rs1 rs2 m st' f' pc' path
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_function dupmap f f')
+      (DUPLIC: dupmap!pc' = Some pc)
+      (LIVE: liveness_ok_function f)
+      (PATH: f.(fn_path)!pc = Some path)
+      (EQUIV: eqlive_reg (ext path.(input_regs)) rs1 rs2):
+      match_states (State st f sp pc rs1 m) (State st' f' sp pc' rs2 m)
+  | match_states_call st st' f f' args m
+      (STACKS: list_forall2 match_stackframes st st')
+      (TRANSF: match_fundef f f')
+      (LIVE: liveness_ok_fundef f):
+      match_states (Callstate st f args m) (Callstate st' f' args m)
+  | match_states_return st st' v m
+      (STACKS: list_forall2 match_stackframes st st'):
+      match_states (Returnstate st v m) (Returnstate st' v m).
+
+Lemma match_stackframes_equiv stf1 stf2 stf3:
+  match_stackframes stf1 stf2 -> equiv_stackframe stf2 stf3 -> match_stackframes stf1 stf3.
+Proof.
+  destruct 1; intros EQ; inv EQ; try econstructor; eauto.
+  intros; eapply eqlive_reg_trans; eauto.
+  rewrite eqlive_reg_triv in * |-.
+  eapply eqlive_reg_update.
+  eapply eqlive_reg_monotonic; eauto.
+  simpl; auto.
+Qed.
+
+Lemma match_stack_equiv stk1 stk2:
+  list_forall2 match_stackframes stk1 stk2 -> 
+  forall stk3, list_forall2 equiv_stackframe stk2 stk3 -> 
+  list_forall2 match_stackframes stk1 stk3.
+Proof.
+  Local Hint Resolve match_stackframes_equiv: core.
+  induction 1; intros stk3 EQ; inv EQ; econstructor; eauto.
+Qed.
+
+Lemma match_states_equiv s1 s2 s3: match_states s1 s2 -> equiv_state s2 s3 -> match_states s1 s3.
+Proof.
+  Local Hint Resolve match_stack_equiv: core.
+  destruct 1; intros EQ; inv EQ; econstructor; eauto.
+  intros; eapply eqlive_reg_triv_trans; eauto.
+Qed.
+
+Lemma eqlive_match_stackframes stf1 stf2 stf3:
+  eqlive_stackframes stf1 stf2 -> match_stackframes stf2 stf3 -> match_stackframes stf1 stf3.
+Proof.
+  destruct 1; intros MS; inv MS; try econstructor; eauto.
+  try_simplify_someHyps. intros; eapply eqlive_reg_trans; eauto.
+Qed.
+
+Lemma eqlive_match_stack stk1 stk2:
+  list_forall2 eqlive_stackframes stk1 stk2 -> 
+  forall stk3, list_forall2 match_stackframes stk2 stk3 -> 
+  list_forall2 match_stackframes stk1 stk3.
+Proof.
+  induction 1; intros stk3 MS; inv MS; econstructor; eauto.
+  eapply eqlive_match_stackframes; eauto.
+Qed.
+
+Lemma eqlive_match_states s1 s2 s3: eqlive_states s1 s2 -> match_states s2 s3 -> match_states s1 s3.
+Proof.
+  Local Hint Resolve eqlive_match_stack: core.
+  destruct 1; intros MS; inv MS; try_simplify_someHyps; econstructor; eauto.
+  eapply eqlive_reg_trans; eauto.
+Qed.
+
+Lemma eqlive_stackframes_refl stf1 stf2: match_stackframes stf1 stf2 -> eqlive_stackframes stf1 stf1.
+Proof.
+  destruct 1; econstructor; eauto.
+  intros; eapply eqlive_reg_refl; eauto.
+Qed.
+
+Lemma eqlive_stacks_refl stk1 stk2:
+  list_forall2 match_stackframes stk1 stk2 -> list_forall2 eqlive_stackframes stk1 stk1.
+Proof.
+  induction 1; simpl; econstructor; eauto.
+  eapply eqlive_stackframes_refl; eauto.
+Qed.
+
+Lemma transf_fundef_correct f f':
+  transf_fundef f = OK f' -> match_fundef f f'.
+Proof.
+  intros TRANSF; destruct f; simpl; monadInv TRANSF.
+  + destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+    destruct (H _ EQ).
+    intuition subst; auto.
+    eapply match_Internal; eauto.
+  + eapply match_External.
+Qed.
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
new file mode 100644
index 00000000..aeed39df
--- /dev/null
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -0,0 +1,328 @@
+open DebugPrint
+open Machine
+open RTLpathLivegenaux
+open RTLpath
+open RTLpathCommon
+open RTL
+open Maps
+open Registers
+open ExpansionOracle
+
+let config = Machine.config
+
+let print_superblock (sb: superblock) code =
+  let insts = sb.instructions in
+  let li = sb.liveins in
+  let outs = sb.s_output_regs in
+  begin
+    debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n";
+    debug "  liveins = "; print_ptree_regset li; debug "\n";
+    debug "  output_regs = "; print_regset outs; debug "}"
+  end
+
+let print_superblocks lsb code =
+  let rec f = function
+    | [] -> ()
+    | sb :: lsb -> (print_superblock sb code; debug ",\n"; f lsb)
+  in begin
+    debug "[\n";
+    f lsb;
+    debug "]"
+  end
+
+let get_superblocks code entry pm typing =
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let rec get_superblocks_rec pc =
+    let liveins = ref (PTree.empty) in
+    let rec follow pc n =
+      let inst = get_some @@ PTree.get pc code in
+      if (n == 0) then begin
+        (match (non_predicted_successors inst) with
+          | [pcout] ->
+              let live = (get_some @@ PTree.get pcout pm).input_regs in
+                liveins := PTree.set pc live !liveins
+          | _ -> ());
+        ([pc], successors_inst inst)
+      end else
+        let nexts_from_exit = match (non_predicted_successors inst) with
+          | [pcout] -> 
+              let live = (get_some @@ PTree.get pcout pm).input_regs in begin
+                liveins := PTree.set pc live !liveins;
+                [pcout]
+              end
+          | [] -> []
+          | _ -> failwith "Having more than one non_predicted_successor is not handled"
+        in match (predicted_successor inst) with
+          | None -> failwith "Incorrect path"
+          | Some succ ->
+              let (insts, nexts) = follow succ (n-1) in (pc :: insts, nexts_from_exit @ nexts)
+    in if (get_some @@ PTree.get pc !visited) then []
+    else begin
+      visited := PTree.set pc true !visited;
+      let pi = get_some @@ PTree.get pc pm in
+      let (insts, nexts) = follow pc (Camlcoq.Nat.to_int pi.psize) in
+      let superblock = { instructions = Array.of_list insts; liveins = !liveins;
+        s_output_regs = pi.output_regs; typing = typing } in
+      superblock :: (List.concat @@ List.map get_superblocks_rec nexts)
+    end
+  in let lsb = get_superblocks_rec entry in begin
+    (* debug_flag := true; *)
+    debug "Superblocks identified:"; print_superblocks lsb code; debug "\n";
+    (* debug_flag := false; *)
+    lsb
+end
+
+(* TODO David *)
+let schedule_superblock sb code =
+  if not !Clflags.option_fprepass
+  then sb.instructions
+  else
+    (* let old_flag = !debug_flag in
+    debug_flag := true;
+    print_endline "ORIGINAL SUPERBLOCK";
+    print_superblock sb code;
+    debug_flag := old_flag; *)
+    let nr_instr = Array.length sb.instructions in
+    let trailer_length =
+      match PTree.get (sb.instructions.(nr_instr-1)) code with
+      | None -> 0
+      | Some ii ->
+         match predicted_successor ii with
+         | Some _ -> 0
+         | None -> 1 in
+    match PrepassSchedulingOracle.schedule_sequence
+            (Array.map (fun i ->
+                 (match PTree.get i code with
+                 | Some ii -> ii
+                 | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
+                 (match PTree.get i sb.liveins with
+                 | Some s -> s
+                 | None -> Regset.empty))
+               (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with
+    | None -> sb.instructions
+    | Some order ->
+       let ins' =
+         Array.append 
+           (Array.map (fun i -> sb.instructions.(i)) order)
+           (Array.sub sb.instructions (nr_instr-trailer_length) trailer_length) in
+       (* Printf.printf "REORDERED SUPERBLOCK %d\n" (Array.length ins');
+       debug_flag := true;
+       print_instructions (Array.to_list ins') code;
+       debug_flag := old_flag;
+       flush stdout; *)
+       assert ((Array.length sb.instructions) = (Array.length ins'));
+       (*sb.instructions; *)
+       ins';;
+
+  (* stub2: reverse function *)
+  (*
+  let reversed = Array.of_list @@ List.rev @@ Array.to_list (sb.instructions) in
+  let tmp = reversed.(0) in
+  let last_index = Array.length reversed - 1 in
+  begin
+    reversed.(0) <- reversed.(last_index);
+    reversed.(last_index) <- tmp;
+    reversed
+  end *)
+  (* stub: identity function *)
+
+(**
+ * Perform basic checks on the new order :
+ * - must have the same length as the old order
+ * - non basic instructions (call, tailcall, return, jumptable, non predicted CB) must not move
+ *)
+let check_order code old_order new_order = begin
+  assert ((Array.length old_order) == (Array.length new_order));
+  let length = Array.length new_order in
+  if length > 0 then
+    let last_inst = Array.get old_order (length - 1) in
+    let instr = get_some @@ PTree.get last_inst code in
+    match predicted_successor instr with
+    | None ->
+        if (last_inst != Array.get new_order (length - 1)) then
+          failwith "The last instruction of the superblock is not basic, but was moved"
+    | _ -> ()
+end
+
+type sinst =
+  (* Each middle instruction has a direct successor *)
+  (* A Smid can be the last instruction of a superblock, but a Send cannot be moved *)
+  | Smid of RTL.instruction * node
+  | Send of RTL.instruction
+
+let rinst_to_sinst inst =
+  match inst with
+  | Inop n -> Smid(inst, n)
+  | Iop (_,_,_,n) -> Smid(inst, n)
+  | Iload (_,_,_,_,_,n) -> Smid(inst, n)
+  | Istore (_,_,_,_,n) -> Smid(inst, n)
+  | Icond (_,_,n1,n2,p) -> (
+      match p with
+      | Some true -> Smid(inst, n1)
+      | Some false -> Smid(inst, n2)
+      | None -> Send(inst)
+    )
+  | Icall _ | Ibuiltin _ | Ijumptable _ | Itailcall _ | Ireturn _ -> Send(inst)
+
+let change_predicted_successor s = function
+  | Smid(i, n) -> Smid(i, s)
+  | Send _ -> failwith "Called change_predicted_successor on Send. Are you trying to move a non-basic instruction in the middle of the block?"
+
+(* Forwards the successor changes into an RTL instruction *)
+let sinst_to_rinst = function
+  | Smid(inst, s) -> (
+      match inst with
+      | Inop n -> Inop s
+      | Iop (a,b,c,n) -> Iop (a,b,c,s)
+      | Iload (a,b,c,d,e,n) -> Iload (a,b,c,d,e,s)
+      | Istore (a,b,c,d,n) -> Istore (a,b,c,d,s)
+      | Icond (a,b,n1,n2,p) -> (
+        match p with
+        | Some true -> Icond(a, b, s, n2, p)
+        | Some false -> Icond(a, b, n1, s, p)
+        | None -> failwith "Non predicted Icond as a middle instruction!"
+        )
+      | _ -> failwith "That instruction shouldn't be a middle instruction"
+      )
+  | Send i -> i
+
+let is_a_cb = function Icond _ -> true | _ -> false
+let is_a_load = function Iload _ -> true | _ -> false
+
+let find_array arr n =
+  let index = ref None in
+  begin
+    Array.iteri (fun i n' ->
+      if n = n' then
+        match !index with
+        | Some _ -> failwith "More than one element present"
+        | None -> index := Some i
+    ) arr;
+    !index
+  end
+
+let rec hashedset_from_list = function
+  | [] -> HashedSet.PSet.empty
+  | n::ln -> HashedSet.PSet.add n (hashedset_from_list ln)
+
+let hashedset_map f hs = hashedset_from_list @@ List.map f @@ HashedSet.PSet.elements hs
+
+let apply_schedule code sb new_order =
+  let tc = ref code in
+  let old_order = sb.instructions in
+  let count_cbs order code =
+    let current_cbs = ref HashedSet.PSet.empty in
+    let cbs_above = ref PTree.empty in
+    Array.iter (fun n ->
+      let inst = get_some @@ PTree.get n code in
+      if is_a_cb inst then current_cbs := HashedSet.PSet.add n !current_cbs
+      else if is_a_load inst then cbs_above := PTree.set n !current_cbs !cbs_above
+    ) order;
+    !cbs_above
+  in let fmap n =
+    let index = get_some @@ find_array new_order n in
+    old_order.(index)
+  in begin
+    check_order code old_order new_order;
+    (* First pass - modify the positions, nothing else *)
+    Array.iteri (fun i n' ->
+      let inst' = get_some @@ PTree.get n' code in
+      let iend = Array.length old_order - 1 in
+      let new_inst =
+        if (i == iend) then
+          let final_inst_node = Array.get old_order iend in
+          let sinst' = rinst_to_sinst inst' in
+          match sinst' with
+          (* The below assert fails if a Send is in the middle of the original superblock *)
+          | Send i -> (assert (final_inst_node == n'); i)
+          | Smid _ ->
+              let final_inst = get_some @@ PTree.get final_inst_node code in
+              match rinst_to_sinst final_inst with
+              | Smid (_, s') -> sinst_to_rinst @@ change_predicted_successor s' sinst'
+              | Send _ -> assert(false) (* should have failed earlier *)
+        else
+          sinst_to_rinst
+          (* this will fail if the moved instruction is a Send *)
+          @@ change_predicted_successor (Array.get old_order (i+1))
+          @@ rinst_to_sinst inst'
+      in tc := PTree.set (Array.get old_order i) new_inst !tc
+    ) new_order;
+    (* Second pass - turn the loads back into trapping when it was not needed *)
+    (* 1) We remember which CBs are "above" a given load *)
+    let cbs_above = count_cbs old_order code in
+    (* 2) We do the same for new_order *)
+    let cbs_above' = count_cbs (Array.map fmap new_order) !tc in
+    (* 3) We examine each load, turn it back into trapping if cbs_above is included in cbs_above' *)
+    Array.iter (fun n ->
+      let n' = fmap n in
+      let inst' = get_some @@ PTree.get n' !tc in
+      match inst' with
+      | Iload (t,a,b,c,d,s) ->
+          let pset = hashedset_map fmap @@ get_some @@ PTree.get n cbs_above in
+          let pset' = get_some @@ PTree.get n' cbs_above' in
+          if HashedSet.PSet.is_subset pset pset' then tc := PTree.set n' (Iload (AST.TRAP,a,b,c,d,s)) !tc
+          else assert !config.has_non_trapping_loads
+      | _ -> ()
+    ) old_order;
+    !tc
+  end
+
+let turn_all_loads_nontrap sb code =
+  if not !config.has_non_trapping_loads then code
+  else begin
+    let code' = ref code in
+    Array.iter (fun n ->
+      let inst = get_some @@ PTree.get n code in
+      match inst with
+      | Iload (t,a,b,c,d,s) -> code' := PTree.set n (Iload (AST.NOTRAP,a,b,c,d,s)) !code'
+      | _ -> ()
+    ) sb.instructions;
+    !code'
+  end
+
+let rec do_schedule code pm = function
+  | [] -> (code, pm)
+  | sb :: lsb ->
+      (*debug_flag := true;*)
+      let (code_exp, pm) = expanse sb code pm in
+      (*debug_flag := false;*)
+      (* Trick: instead of turning loads into non trap as needed..
+       * First, we turn them all into non-trap.
+       * Then, we turn back those who didn't need to be turned, into TRAP again
+       * This is because the scheduler (rightfully) refuses to schedule ahead of a branch
+       * operations that might trap *)
+      let code' = turn_all_loads_nontrap sb code_exp in
+      let schedule = schedule_superblock sb code' in
+      let new_code = apply_schedule code' sb schedule in
+      begin
+        (*debug_flag := true;*)
+        if code != code_exp then (
+        debug "Old Code: "; print_code code;
+        debug "Exp Code: "; print_code code_exp);
+        debug "\nSchedule to apply: "; print_arrayp schedule;
+        debug "\nNew Code: "; print_code new_code;
+        debug "\n";
+        do_schedule new_code pm lsb
+      end
+
+let get_ok r = match r with Errors.OK x -> x | _ -> failwith "Did not get OK"
+
+let scheduler f =
+  let code = f.fn_RTL.fn_code in
+  let id_ptree = PTree.map (fun n i -> n) (f.fn_path) in
+  let entry = f.fn_RTL.fn_entrypoint in
+  let pm = f.fn_path in
+  let typing = get_ok @@ RTLtyping.type_function f.fn_RTL in
+  let lsb = get_superblocks code entry pm typing in
+  begin
+    (* debug_flag := true; *)
+    debug "Pathmap:\n"; debug "\n";
+    print_path_map pm;
+    debug "Superblocks:\n";
+    (*print_code code; flush stdout; flush stderr;*)
+    (*debug_flag := false;*)
+    (*print_superblocks lsb code; debug "\n";*)
+    find_last_node_reg (PTree.elements code);
+    let (tc, pm) = do_schedule code pm lsb in
+    (((tc, entry), pm), id_ptree)
+  end
diff --git a/scheduling/RTLpathSchedulerproof.v b/scheduling/RTLpathSchedulerproof.v
new file mode 100644
index 00000000..a9c2fa76
--- /dev/null
+++ b/scheduling/RTLpathSchedulerproof.v
@@ -0,0 +1,509 @@
+Require Import AST Linking Values Maps Globalenvs Smallstep Registers.
+Require Import Coqlib Maps Events Errors Op.
+Require Import RTL RTLpath RTLpathLivegen RTLpathLivegenproof RTLpathSE_theory.
+Require Import RTLpathScheduler.
+
+Definition match_prog (p tp: program) :=
+  match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+  forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+  intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: program.
+Variable tprog: program.
+
+Hypothesis TRANSL: match_prog prog tprog.
+
+Let pge := Genv.globalenv prog.
+Let tpge := Genv.globalenv tprog.
+
+Hypothesis all_fundef_liveness_ok: forall b fd, Genv.find_funct_ptr pge b = Some fd -> liveness_ok_fundef fd.
+
+Lemma symbols_preserved s: Genv.find_symbol tpge s = Genv.find_symbol pge s.
+Proof.
+  rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
+
+Lemma senv_preserved:
+  Senv.equiv pge tpge.
+Proof.
+  eapply (Genv.senv_match TRANSL).
+Qed.
+
+Lemma functions_preserved:
+  forall (v: val) (f: fundef),
+  Genv.find_funct pge v = Some f ->
+  exists tf cunit, transf_fundef f = OK tf /\ Genv.find_funct tpge v = Some tf /\ linkorder cunit prog.
+Proof.
+  intros. exploit (Genv.find_funct_match TRANSL); eauto.
+  intros (cu & tf & A & B & C).
+  repeat eexists; intuition eauto.
+  + unfold incl; auto.
+  + eapply linkorder_refl.
+Qed.
+
+Lemma function_ptr_preserved:
+  forall v f,
+  Genv.find_funct_ptr pge v = Some f ->
+  exists tf,
+  Genv.find_funct_ptr tpge v = Some tf /\ transf_fundef f = OK tf.
+Proof.
+  intros.
+  exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
+
+Lemma function_sig_preserved:
+  forall f tf, transf_fundef f = OK tf -> funsig tf = funsig f.
+Proof.
+  intros. destruct f.
+  - simpl in H. monadInv H.
+    destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+    destruct (H _ EQ).
+    intuition subst; auto.
+    symmetry.
+    eapply match_function_preserves.
+    eassumption.
+  - simpl in H. monadInv H. reflexivity.
+Qed.
+
+Theorem transf_initial_states:
+  forall s1, initial_state prog s1 ->
+  exists s2, initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+  intros. inv H.
+  exploit function_ptr_preserved; eauto. intros (tf & FIND &  TRANSF).
+  exists (Callstate nil tf nil m0).
+  split.
+  - econstructor; eauto.
+    + intros; apply (Genv.init_mem_match TRANSL); assumption.
+    + replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved. eauto.
+      symmetry. eapply match_program_main. eauto.
+    + destruct f.
+      * monadInv TRANSF. rewrite <- H3.
+        destruct (transf_function f) as [res H]; simpl in * |- *; auto.
+        destruct (H _ EQ).
+        intuition subst; auto.
+        symmetry; eapply match_function_preserves. eassumption.
+      * monadInv TRANSF. assumption.
+  - constructor; eauto.
+    + constructor.
+    + apply transf_fundef_correct; auto.
+(*     + eapply all_fundef_liveness_ok; eauto. *)
+Qed.
+
+Theorem transf_final_states s1 s2 r:
+  final_state s1 r -> match_states s1 s2 -> final_state s2 r.
+Proof.
+  unfold final_state.
+  intros H; inv H.
+  intros H; inv H; simpl in * |- *; try congruence.
+  inv H1.
+  destruct st; simpl in * |- *; try congruence.
+  inv STACKS. constructor.
+Qed.
+
+
+Let ge := Genv.globalenv (RTLpath.transf_program prog).
+Let tge := Genv.globalenv (RTLpath.transf_program tprog).
+
+Lemma senv_sym x y: Senv.equiv x y -> Senv.equiv y x.
+Proof.
+  unfold Senv.equiv. intuition congruence.
+Qed.
+
+Lemma senv_transitivity x y z: Senv.equiv x y -> Senv.equiv y z -> Senv.equiv x z.
+Proof.
+  unfold Senv.equiv. intuition congruence.
+Qed.
+
+Lemma senv_preserved_RTL:
+  Senv.equiv ge tge.
+Proof.
+  eapply senv_transitivity. { eapply senv_sym; eapply RTLpath.senv_preserved. }
+  eapply senv_transitivity. { eapply senv_preserved. }
+  eapply RTLpath.senv_preserved.
+Qed.
+
+Lemma symbols_preserved_RTL s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+  unfold tge, ge. erewrite RTLpath.symbols_preserved; eauto.
+  rewrite symbols_preserved.
+  erewrite RTLpath.symbols_preserved; eauto.
+Qed.
+
+Program Definition mkctx sp rs0 m0 {f1: RTLpath.function} (hyp: liveness_ok_function f1)
+   :  simu_proof_context f1
+   := {| the_ge1:= ge; the_ge2 := tge; the_sp:=sp; the_rs0:=rs0; the_m0:=m0 |}.
+Obligation 2.
+  erewrite symbols_preserved_RTL. eauto.
+Qed.
+
+Lemma s_find_function_fundef f sp svos rs0 m0 fd
+  (LIVE: liveness_ok_function f):
+  sfind_function pge ge sp svos rs0 m0 = Some fd ->
+  liveness_ok_fundef fd.
+Proof.
+  unfold sfind_function. destruct svos; simpl.
+  + destruct (seval_sval _ _ _ _); try congruence.
+    eapply find_funct_liveness_ok; eauto.
+  + destruct (Genv.find_symbol _ _); try congruence.
+    intros. eapply all_fundef_liveness_ok; eauto.
+Qed.
+Local Hint Resolve s_find_function_fundef: core.
+
+Lemma s_find_function_preserved f sp svos1 svos2 rs0 m0 fd
+  (LIVE: liveness_ok_function f):
+  (svident_simu f (mkctx sp rs0 m0 LIVE) svos1 svos2) ->
+  sfind_function pge ge sp svos1 rs0 m0 = Some fd ->
+  exists fd', sfind_function tpge tge sp svos2 rs0 m0 = Some fd'
+              /\ transf_fundef fd = OK fd'.
+Proof.
+  Local Hint Resolve symbols_preserved_RTL: core.
+  unfold sfind_function. intros [sv1 sv2 SIMU|]; simpl in *.
+  + rewrite !(seval_preserved ge tge) in *; eauto.
+    destruct (seval_sval _ _ _ _); try congruence.
+    erewrite <- SIMU; try congruence. clear SIMU.
+    intros; exploit functions_preserved; eauto.
+    intros (fd' & cunit & (X1 & X2 & X3)). eexists.
+    repeat split; eauto.
+  + subst. rewrite symbols_preserved. destruct (Genv.find_symbol _ _); try congruence.
+    intros; exploit function_ptr_preserved; eauto.
+Qed.
+
+Lemma sistate_simu f dupmap outframe sp st st' rs m is 
+  (LIVE: liveness_ok_function f):
+  ssem_internal ge sp st rs m is ->
+  sistate_simu dupmap f outframe st st' (mkctx sp rs m LIVE)->
+  exists is',
+    ssem_internal tge sp st' rs m is' /\ istate_simu f dupmap outframe is is'.
+Proof.
+  intros SEM X; eapply X; eauto.
+Qed.
+
+Lemma seval_builtin_sval_preserved sp rs m:
+  forall bs, seval_builtin_sval ge sp bs rs m = seval_builtin_sval tge sp bs rs m.
+Proof.
+  induction bs.
+  all: try (simpl; try reflexivity; erewrite seval_preserved by eapply symbols_preserved_RTL; reflexivity).
+  all: simpl; rewrite IHbs1; rewrite IHbs2; reflexivity.
+Qed.
+
+Lemma seval_list_builtin_sval_preserved sp rs m:
+  forall lbs,
+  seval_list_builtin_sval ge sp lbs rs m = seval_list_builtin_sval tge sp lbs rs m.
+Proof.
+  induction lbs; [simpl; reflexivity|].
+  simpl. rewrite seval_builtin_sval_preserved. rewrite IHlbs.
+  reflexivity.
+Qed.
+
+Lemma ssem_final_simu dm f f' stk stk' sp st st' rs0 m0 sv sv' rs m t s
+  (LIVE: liveness_ok_function f):
+  match_function dm f f' ->
+  list_forall2 match_stackframes stk stk' ->
+  sfval_simu dm f st.(si_pc) st'.(si_pc) (mkctx sp rs0 m0 LIVE) sv sv' ->
+  ssem_final pge ge sp st.(si_pc) stk f rs0 m0 sv rs m t s ->
+  exists s', ssem_final tpge tge sp st'.(si_pc) stk' f' rs0 m0 sv' rs m t s' /\ match_states s s'.
+Proof.
+  Local Hint Resolve transf_fundef_correct: core.
+  intros FUN STK SFV. destruct SFV; intros SEM; inv SEM; simpl in *.
+  - (* Snone *)
+    exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+    intros (path & PATH).
+    eexists; split; econstructor; eauto.
+    eapply eqlive_reg_refl.
+  - (* Scall *)
+    exploit s_find_function_preserved; eauto.
+    intros (fd' & FIND & TRANSF).
+    erewrite <- function_sig_preserved; eauto.
+    exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+    intros (path & PATH).
+    eexists; split; econstructor; eauto.
+    + eapply eq_trans; try eassumption; auto.
+    + simpl. repeat (econstructor; eauto).
+  - (* Stailcall *)
+    exploit s_find_function_preserved; eauto.
+    intros (fd' & FIND & TRANSF).
+    erewrite <- function_sig_preserved; eauto.
+    eexists; split; econstructor; eauto.
+    + erewrite <- preserv_fnstacksize; eauto.
+    + eapply eq_trans; try eassumption; auto.
+  - (* Sbuiltin *)
+    pose senv_preserved_RTL as SRTL.
+    exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+    intros (path & PATH).
+    eexists; split; econstructor; eauto.
+    + eapply seval_builtin_args_preserved; eauto.
+      eapply seval_list_builtin_sval_correct; eauto.
+      rewrite H0.
+      erewrite seval_list_builtin_sval_preserved; eauto.
+    + eapply external_call_symbols_preserved; eauto.
+    + eapply eqlive_reg_refl.
+  - (* Sjumptable *)
+    exploit ptree_get_list_nth_rev; eauto. intros (p2 & LNZ & DM).
+    exploit initialize_path. { eapply dupmap_path_entry1; eauto. }
+    intros (path & PATH).
+    eexists; split; econstructor; eauto.
+    + eapply eq_trans; try eassumption; auto.
+    + eapply eqlive_reg_refl.
+  - (* Sreturn *)
+    eexists; split; econstructor; eauto.
+    erewrite <- preserv_fnstacksize; eauto.
+  - (* Sreturn bis *)
+    eexists; split; econstructor; eauto.
+    + erewrite <- preserv_fnstacksize; eauto.
+    + rewrite <- H. erewrite <- seval_preserved; eauto.
+Qed.
+
+Lemma siexec_snone_por_correct rs' is t s alive path0 i sp s0 st0 stk stk' f rs0 m0: forall
+  (SSEM2 : ssem_final pge ge sp (si_pc s0) stk f rs0 m0 Snone
+          (irs is) (imem is) t s)
+  (SIEXEC : siexec_inst i st0 = Some s0)
+  (ICHK : inst_checker (fn_path f) alive (pre_output_regs path0) i = Some tt),
+  (liveness_ok_function f) ->
+  list_forall2 match_stackframes stk stk' ->
+  eqlive_reg (fun r : Regset.elt => Regset.In r (pre_output_regs path0)) (irs is) rs' ->
+  exists s' : state,
+    ssem_final pge ge sp (si_pc s0) stk f rs0 m0 Snone rs' (imem is) t s' /\
+    eqlive_states s s'.
+Proof.
+  Local Hint Resolve eqlive_stacks_refl: core.
+  intros ? ? ? LIVE STK EQLIVE.
+  inversion SSEM2; subst; clear SSEM2.
+  eexists; split.
+  * econstructor.
+  * generalize ICHK.
+    unfold inst_checker. destruct i; simpl in *;
+    unfold exit_checker; try discriminate.
+    all:
+      try destruct (list_mem _ _); simpl;
+      try (destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence; fail).
+    4,5:
+      destruct (Regset.mem _ _); destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+    1,2,3,4: assert (NPC: n=(si_pc s0)).
+    all: try (inv SIEXEC; simpl; auto; fail).
+    1,2,3,4:
+      try (destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence);
+      simpl; inversion_SOME p;
+      destruct (Regset.subset (input_regs p) (pre_output_regs path0)) eqn:SUB_PATH; try congruence;
+      intros NPATH _; econstructor; eauto;
+      try (instantiate (1:=p); rewrite <- NPC; auto; fail).
+    1,2,3,4:
+      eapply eqlive_reg_monotonic; eauto; simpl;
+      intros; apply Regset.subset_2 in SUB_PATH;
+      unfold Regset.Subset in SUB_PATH;
+      apply SUB_PATH in H; auto.
+    assert (NPC: n0=(si_pc s0)). { inv SIEXEC; simpl; auto. }
+    inversion_SOME p.
+    2: { destruct (Regset.subset _ _) eqn:?; try congruence. }
+    destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+    2: { destruct (Regset.subset (pre_output_regs path0) alive) eqn:?; try congruence. }
+    simpl.
+    destruct (Regset.subset (pre_output_regs path0) alive) eqn:SUB_ALIVE'; try congruence.
+    inversion_SOME p'.
+    destruct (Regset.subset (input_regs p') (pre_output_regs path0)) eqn:SUB_PATH; try congruence.
+    intros NPATH NPATH' _. econstructor; eauto.
+    instantiate (1:=p'). rewrite <- NPC; auto.
+    eapply eqlive_reg_monotonic; eauto; simpl.
+    intros. apply Regset.subset_2 in SUB_PATH.
+    unfold Regset.Subset in SUB_PATH.
+    apply SUB_PATH in H; auto.
+Qed.
+
+Lemma pre_output_regs_correct f pc0 path0 stk stk' sp (st:sstate) rs0 m0 t s is rs':
+  (liveness_ok_function f) ->
+  (fn_path f) ! pc0 = Some path0 ->
+  sexec f pc0 = Some st -> 
+  list_forall2 match_stackframes stk stk' ->
+  ssem_final pge ge sp (si_pc st) stk f rs0 m0 (final st) (irs is) (imem is) t s ->
+  eqlive_reg (fun r : Regset.elt => Regset.In r (pre_output_regs path0)) (irs is) rs' ->
+  exists s', ssem_final pge ge sp (si_pc st) stk f rs0 m0 (final st) rs' (imem is) t s' /\ eqlive_states s s'.
+Proof.
+  Local Hint Resolve eqlive_stacks_refl: core.
+  intros LIVE PATH0 SEXEC STK SSEM2 EQLIVE.
+  (* start decomposing path_checker *)
+  generalize (LIVE pc0 path0 PATH0).
+  unfold path_checker.
+  inversion_SOME res; intros IPCHK.
+  inversion_SOME i; intros INST ICHK.
+  exploit ipath_checker_default_succ; eauto. intros DEFSUCC.
+  (* start decomposing SEXEC *)
+  generalize SEXEC; clear SEXEC.
+  unfold sexec; rewrite PATH0.
+  inversion_SOME st0; intros SEXEC_PATH.
+  exploit siexec_path_default_succ; eauto.
+  simpl. rewrite DEFSUCC.
+  clear DEFSUCC. destruct res as [alive pc1]. simpl in *.
+  try_simplify_someHyps.
+  destruct (siexec_inst i st0) eqn: SIEXEC; try_simplify_someHyps; intros.
+  (* Snone *)
+  eapply siexec_snone_por_correct; eauto.
+  destruct i; try_simplify_someHyps; try congruence;
+  inversion SSEM2; subst; clear SSEM2; simpl in *.
+ + (* Scall *)
+    eexists; split.
+    * econstructor; eauto.
+    * econstructor; eauto.
+      econstructor; eauto.
+      (* wf *)
+      generalize ICHK.
+      unfold inst_checker; simpl in *.
+      destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+      destruct (list_mem _ _); try congruence.
+      destruct (reg_sum_mem _ _); try congruence.
+      intros EXIT.
+      exploit exit_checker_eqlive_ext1; eauto.
+      intros. destruct H as [p [PATH EQLIVE']].
+      econstructor; eauto.
+ + (* Stailcall *)
+    eexists; split.
+    * econstructor; eauto.
+    * econstructor; eauto.
+ + (* Sbuiltin *)
+    eexists; split.
+    * econstructor; eauto.
+    * (* wf *)
+      generalize ICHK.
+      unfold inst_checker; simpl in *.
+      destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+      destruct (list_mem _ _); try congruence.
+      intros EXIT.
+      exploit exit_checker_eqlive_builtin_res; eauto.
+      intros. destruct H as [p [PATH EQLIVE']].
+      econstructor; eauto.
+ + (* Sjumptable *)
+    eexists; split.
+    * econstructor; eauto.
+    * (* wf *)
+      generalize ICHK.
+      unfold inst_checker; simpl in *.
+      destruct (Regset.subset _ _) eqn:SUB_ALIVE; try congruence.
+      destruct (Regset.mem _ _); try congruence.
+      destruct (exit_list_checker _ _ _) eqn:EQL; try congruence.
+      exploit exit_list_checker_eqlive; eauto.
+      intros. destruct H as [p [PATH EQLIVE']].
+      econstructor; eauto.
+ + (* Sreturn *)
+    eexists; split.
+    * econstructor; eauto.
+    * econstructor; eauto.
+Qed.
+
+(* The main theorem on simulation of symbolic states ! *)
+Theorem ssem_sstate_simu dm f f' pc0 path0 stk stk' sp st st' rs m t s:
+  (fn_path f) ! pc0 = Some path0 ->
+  sexec f pc0 = Some st -> 
+  match_function dm f f' ->
+  liveness_ok_function f ->
+  list_forall2 match_stackframes stk stk' ->
+  ssem pge ge sp st stk f rs m t s ->
+  (forall ctx: simu_proof_context f, sstate_simu dm f (pre_output_regs path0) st st' ctx) ->
+  exists s', ssem tpge tge sp st' stk' f' rs m t s' /\ match_states s s'.
+Proof.
+  intros PATH0 SEXEC MFUNC LIVE STACKS SEM SIMU.
+  destruct (SIMU (mkctx sp rs m LIVE)) as (SIMU1 & SIMU2); clear SIMU.
+  destruct SEM as [is CONT SEM|is t s' CONT SEM1 SEM2]; simpl in *.
+  - (* sem_early *)
+    exploit sistate_simu; eauto.
+    unfold istate_simu; rewrite CONT.
+    intros (is' & SEM' & (path & PATH & (CONT' & RS' & M') & PC')).
+    exists (State stk' f' sp (ipc is') (irs is') (imem is')).
+    split.
+    + eapply ssem_early; auto. congruence.
+    + rewrite M'. econstructor; eauto.
+  - (* sem_normal *)
+    exploit sistate_simu; eauto.
+    unfold istate_simu; rewrite CONT.
+    intros (is' & SEM' & (CONT' & RS' & M')).
+    exploit pre_output_regs_correct; eauto.
+    clear SEM2; intros (s0 & SEM2 & EQLIVE).
+    exploit ssem_final_simu; eauto.
+    clear SEM2; intros (s1 & SEM2 & MATCH0).
+    exploit ssem_final_equiv; eauto.
+    clear SEM2; rewrite M'; rewrite CONT' in CONT; intros (s2 & EQ & SEM2).
+    exists s2; split.
+    + eapply ssem_normal; eauto.
+    + eapply eqlive_match_states; eauto.
+      eapply match_states_equiv; eauto.
+Qed.
+
+Lemma exec_path_simulation dupmap path stk stk' f f' sp rs m pc pc' t s:
+  (fn_path f)!pc = Some path ->
+  path_step ge pge path.(psize) stk f sp rs m pc t s ->
+  list_forall2 match_stackframes stk stk' ->
+  dupmap ! pc' = Some pc ->
+  match_function dupmap f f' ->
+  liveness_ok_function f ->
+  exists path' s', (fn_path f')!pc' = Some path' /\ path_step tge tpge path'.(psize) stk' f' sp rs m pc' t s' /\ match_states s s'.
+Proof.
+  intros PATH STEP STACKS DUPPC MATCHF LIVE.
+  exploit initialize_path. { eapply dupmap_path_entry2; eauto. }
+  intros (path' & PATH').
+  exists path'.
+  exploit (sexec_correct f pc pge ge sp path stk rs m t s); eauto.
+  intros (st & SYMB & SEM).
+  exploit dupmap_correct; eauto.
+  intros (path0 & st' & PATH0 & SYMB' & SIMU).
+  rewrite PATH0 in PATH; inversion PATH; subst.
+  exploit ssem_sstate_simu; eauto.
+  intros (s0 & SEM0 & MATCH). 
+  exploit (sexec_exact f'); eauto.
+  intros (s' & STEP' & EQ).
+  exists s'; intuition.
+  eapply match_states_equiv; eauto.
+Qed.
+
+Lemma step_simulation s1 t s1' s2:
+  step ge pge s1 t s1' ->
+  match_states s1 s2 ->
+  exists s2',
+     step tge tpge s2 t s2'
+  /\ match_states s1' s2'.
+Proof.
+  Local Hint Resolve eqlive_stacks_refl transf_fundef_correct: core.
+  destruct 1 as [path stack f sp rs m pc t s PATH STEP | | | ]; intros MS; inv MS.
+(* exec_path *)
+  - try_simplify_someHyps. intros.
+    exploit path_step_eqlive; eauto. (* { intros. eapply all_fundef_liveness_ok; eauto. } *)
+    clear STEP EQUIV rs; intros (s2 & STEP & EQLIVE).
+    exploit exec_path_simulation; eauto.
+    clear STEP; intros (path' & s' & PATH' & STEP' & MATCH').
+    exists s'; split.
+    + eapply exec_path; eauto.
+    + eapply eqlive_match_states; eauto.
+(* exec_function_internal *)
+  - inv LIVE.
+    exploit initialize_path. { eapply (fn_entry_point_wf f). }
+    destruct 1 as (path & PATH).
+    inversion TRANSF as [f0 xf tf MATCHF|]; subst. eexists. split.
+    + eapply exec_function_internal. erewrite <- preserv_fnstacksize; eauto.
+    + erewrite preserv_fnparams; eauto.
+      econstructor; eauto. 
+      { apply preserv_entrypoint; auto. }
+      { apply eqlive_reg_refl. }
+(* exec_function_external *)
+  - inversion TRANSF as [|]; subst. eexists. split.
+    + econstructor. eapply external_call_symbols_preserved; eauto. apply senv_preserved_RTL.
+    + constructor. assumption.
+(* exec_return *)
+  - inv STACKS. destruct b1 as [res' f' sp' pc' rs']. eexists. split.
+    + constructor.
+    + inv H1. econstructor; eauto.
+Qed.
+
+Theorem transf_program_correct:
+  forward_simulation (semantics prog) (semantics tprog).
+Proof.
+  eapply forward_simulation_step with match_states.
+  - eapply senv_preserved.
+  - eapply transf_initial_states.
+  - intros; eapply transf_final_states; eauto.
+  - intros; eapply step_simulation; eauto.
+Qed.
+
+End PRESERVATION.
diff --git a/scheduling/RTLpathWFcheck.v b/scheduling/RTLpathWFcheck.v
new file mode 100644
index 00000000..63b914ec
--- /dev/null
+++ b/scheduling/RTLpathWFcheck.v
@@ -0,0 +1,187 @@
+Require Import Coqlib.
+Require Import Maps.
+Require Import Lattice.
+Require Import AST.
+Require Import Op.
+Require Import Registers.
+Require Import Globalenvs Smallstep RTL RTLpath.
+Require Import Bool Errors.
+Require Import Program.
+Require RTLpathLivegen.
+
+Local Open Scope lazy_bool_scope.
+
+Local Open Scope option_monad_scope.
+
+Definition exit_checker {A} (pm: path_map) (pc: node) (v:A): option A :=
+   SOME path <- pm!pc IN
+   Some v.
+
+Lemma exit_checker_path_entry A (pm: path_map) (pc: node) (v:A) res:
+  exit_checker pm pc v = Some res -> path_entry pm pc.
+Proof.
+  unfold exit_checker, path_entry.
+  inversion_SOME path; simpl; congruence.
+Qed.
+
+Lemma exit_checker_res A (pm: path_map) (pc: node) (v:A) res:
+  exit_checker pm pc v = Some res -> v=res.
+Proof.
+  unfold exit_checker, path_entry.
+  inversion_SOME path; try_simplify_someHyps.
+Qed.
+
+Definition iinst_checker (pm: path_map) (i: instruction): option (node)  :=
+  match i with
+  | Inop pc' | Iop _ _ _ pc' | Iload _ _ _ _ _ pc'
+  | Istore _ _ _ _ pc' => Some (pc')
+  | Icond cond args ifso ifnot _ =>
+      exit_checker pm ifso ifnot
+  | _ => None
+  end.
+
+Local Hint Resolve exit_checker_path_entry: core.
+
+Lemma iinst_checker_path_entry (pm: path_map) (i: instruction) res pc:
+  iinst_checker pm i = Some res -> 
+  early_exit i = Some pc -> path_entry pm pc.
+Proof.
+  destruct i; simpl; try_simplify_someHyps; subst.
+Qed.
+
+Lemma iinst_checker_default_succ (pm: path_map) (i: instruction) res pc:
+  iinst_checker pm i = Some res -> 
+  pc = res ->
+  default_succ i = Some pc.
+Proof.
+  destruct i; simpl; try_simplify_someHyps; subst;
+  repeat (inversion_ASSERT); try_simplify_someHyps.
+  intros; exploit exit_checker_res; eauto.
+  intros; subst. simpl; auto.
+Qed.
+
+Fixpoint ipath_checker (ps:nat) (f: RTL.function) (pm: path_map) (pc:node): option (node) :=
+  match ps with
+  | O => Some (pc)
+  | S p =>
+    SOME i <- f.(fn_code)!pc IN
+    SOME res <- iinst_checker pm i IN
+    ipath_checker p f pm res
+  end.
+
+Lemma ipath_checker_wellformed f pm ps: forall pc res,
+   ipath_checker ps f pm pc = Some res -> 
+   wellformed_path f.(fn_code) pm 0 res ->
+   wellformed_path f.(fn_code) pm ps pc.
+Proof.
+  induction ps; simpl; try_simplify_someHyps.
+  inversion_SOME i; inversion_SOME res'.
+  intros. eapply wf_internal_node; eauto.
+  * eapply iinst_checker_default_succ; eauto.
+  * intros; eapply iinst_checker_path_entry; eauto.
+Qed.
+
+Fixpoint exit_list_checker (pm: path_map) (l: list node): bool :=
+   match l with
+   | nil => true
+   | pc::l' => exit_checker pm pc tt &&& exit_list_checker pm l'
+   end.
+
+Lemma exit_list_checker_correct pm l pc:
+  exit_list_checker pm l = true -> List.In pc l -> exit_checker pm pc tt = Some tt.
+Proof.
+  intros EXIT PC; induction l; intuition.
+  simpl in * |-. rewrite RTLpathLivegen.lazy_and_Some_tt_true in EXIT.
+  firstorder (subst; eauto).
+Qed.
+
+Local Hint Resolve exit_list_checker_correct: core.
+
+Definition inst_checker (pm: path_map) (i: instruction): option unit :=
+   match i with
+   | Icall sig ros args res pc' =>
+      exit_checker pm pc' tt
+   | Itailcall sig ros args =>
+      Some tt
+   | Ibuiltin ef args res pc' =>
+      exit_checker pm pc' tt
+   | Ijumptable arg tbl =>
+      ASSERT exit_list_checker pm tbl IN
+      Some tt
+   | Ireturn optarg =>
+      Some tt
+   | _ => 
+      SOME res <- iinst_checker pm i IN
+      exit_checker pm res tt
+   end.
+
+Lemma inst_checker_wellformed (c:code) pc (pm: path_map) (i: instruction):
+  inst_checker pm i = Some tt ->
+  c!pc = Some i -> wellformed_path c pm 0 pc.
+Proof.
+  intros CHECK PC. eapply wf_last_node; eauto.
+  clear c pc PC. intros pc PC.
+  destruct i; simpl in * |- *; intuition (subst; eauto);
+  try (generalize CHECK; clear CHECK; try (inversion_SOME path); repeat inversion_ASSERT; try_simplify_someHyps).
+  intros X; exploit exit_checker_res; eauto.
+  clear X. intros; subst; eauto.
+Qed.
+
+Definition path_checker (f: RTL.function) pm (pc: node) (path:path_info): option unit :=
+   SOME res <- ipath_checker (path.(psize)) f pm pc  IN
+   SOME i <- f.(fn_code)!res IN
+   inst_checker pm i.
+
+Lemma path_checker_wellformed f pm pc path:
+   path_checker f pm pc path = Some tt -> wellformed_path (f.(fn_code)) pm (path.(psize)) pc.
+Proof.
+  unfold path_checker.
+  inversion_SOME res.
+  inversion_SOME i.
+  intros; eapply ipath_checker_wellformed; eauto.
+  eapply inst_checker_wellformed; eauto.
+Qed.
+
+Fixpoint list_path_checker f pm (l:list (node*path_info)): bool :=
+  match l with
+  | nil => true
+  | (pc, path)::l' =>
+      path_checker f pm pc path &&& list_path_checker f pm l'
+  end.
+
+Lemma list_path_checker_correct f pm l: 
+  list_path_checker f pm l = true -> forall e, List.In e l -> path_checker f pm (fst e) (snd e) = Some tt.
+Proof.
+  intros CHECKER e H; induction l as [|(pc & path) l]; intuition.
+  simpl in * |- *. rewrite RTLpathLivegen.lazy_and_Some_tt_true in CHECKER. intuition (subst; auto).
+Qed.
+
+Definition function_checker (f: RTL.function) (pm: path_map): bool := 
+  pm!(f.(fn_entrypoint)) &&& list_path_checker f pm (PTree.elements pm).
+
+Lemma function_checker_correct f pm pc path: 
+  function_checker f pm = true -> 
+  pm!pc = Some path -> 
+  path_checker f pm pc path = Some tt.
+Proof.
+  unfold function_checker; rewrite RTLpathLivegen.lazy_and_Some_true.
+  intros (ENTRY & PATH) PC.
+  exploit list_path_checker_correct; eauto.
+  - eapply PTree.elements_correct; eauto.
+  - simpl; auto.
+Qed.
+
+Lemma function_checker_wellformed_path_map f pm:
+  function_checker f pm = true -> wellformed_path_map f.(fn_code) pm.
+Proof.
+  unfold wellformed_path_map.
+  intros; eapply path_checker_wellformed; eauto.
+  intros; eapply function_checker_correct; eauto.
+Qed.
+
+Lemma function_checker_path_entry f pm:
+  function_checker f pm = true -> path_entry pm (f.(fn_entrypoint)).
+Proof.
+  unfold function_checker; rewrite RTLpathLivegen.lazy_and_Some_true;
+  unfold path_entry. firstorder congruence.
+Qed.
diff --git a/scheduling/RTLpathproof.v b/scheduling/RTLpathproof.v
new file mode 100644
index 00000000..20eded97
--- /dev/null
+++ b/scheduling/RTLpathproof.v
@@ -0,0 +1,50 @@
+Require Import Coqlib Maps.
+Require Import AST Integers Values Events Memory Globalenvs Smallstep.
+Require Import Op Registers.
+Require Import RTL Linking.
+Require Import RTLpath.
+
+Definition match_prog (p: RTLpath.program) (tp: RTL.program) :=
+  match_program (fun ctx f tf => tf = fundef_RTL f) eq p tp.
+
+Lemma transf_program_match:
+  forall p, match_prog p (transf_program p).
+Proof.
+  intros. eapply match_transform_program; eauto.
+Qed.
+
+Lemma match_program_transf:
+  forall p tp, match_prog p tp -> transf_program p = tp.
+Proof.
+  intros p tp H. inversion_clear H. inv H1.
+  destruct p as [defs pub main]. destruct tp as [tdefs tpub tmain]. simpl in *.
+  subst. unfold transf_program. unfold transform_program. simpl.
+  apply program_equals; simpl; auto.
+  induction H0; simpl; auto.
+  rewrite IHlist_forall2. apply cons_extract.
+  destruct a1 as [ida gda]. destruct b1 as [idb gdb].
+  simpl in *.
+  inv H. inv H2.
+  - simpl in *. subst. auto.
+  - simpl in *. subst. inv H. auto.
+Qed.
+
+
+Section PRESERVATION.
+
+Variable prog: RTLpath.program.
+Variable tprog: RTL.program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Theorem transf_program_correct:
+  forward_simulation (RTLpath.semantics prog) (RTL.semantics tprog).
+Proof.
+  pose proof (match_program_transf prog tprog TRANSF) as TR. subst.
+  eapply RTLpath_correct.
+Qed.
+
+End PRESERVATION.
+
+
diff --git a/kvx/abstractbb/AbstractBasicBlocksDef.v b/scheduling/abstractbb/AbstractBasicBlocksDef.v
index 948ed660..34d72de1 100644
--- a/kvx/abstractbb/AbstractBasicBlocksDef.v
+++ b/scheduling/abstractbb/AbstractBasicBlocksDef.v
@@ -170,7 +170,7 @@ Lemma exp_equiv e old1 old2:
    (exp_eval e m1 old1) = (exp_eval e m2 old2).
 Proof.
   intros H1.
-  induction e using exp_mut with (P0:=fun l =>  forall m1 m2, (forall x, m1 x = m2 x) -> list_exp_eval l m1 old1 = list_exp_eval l m2 old2); simpl; try congruence; auto.
+  induction e using exp_mut with (P0:=fun l =>  forall m1 m2, (forall x, m1 x = m2 x) -> list_exp_eval l m1 old1 = list_exp_eval l m2 old2); cbn; try congruence; auto.
   - intros; erewrite IHe; eauto.
   - intros; erewrite IHe, IHe0; auto.
 Qed.
@@ -183,38 +183,38 @@ Lemma inst_equiv_refl i old1 old2:
   forall m1 m2, (forall x, m1 x = m2 x) -> 
   res_eq (inst_run i m1 old1) (inst_run i m2 old2).
 Proof.
-  intro H; induction i as [ | [x e]]; simpl; eauto.
+  intro H; induction i as [ | [x e]]; cbn; eauto.
   intros m1 m2 H1. erewrite exp_equiv; eauto.
-  destruct (exp_eval e m2 old2); simpl; auto.
+  destruct (exp_eval e m2 old2); cbn; auto.
   apply IHi.
   unfold assign; intro y. destruct (R.eq_dec x y); auto. 
 Qed.
 
 Lemma bblock_equiv_refl p: forall m1 m2, (forall x, m1 x = m2 x) -> res_eq (run p m1) (run p m2).
 Proof.
-  induction p as [ | i p']; simpl; eauto.
+  induction p as [ | i p']; cbn; eauto.
   intros m1 m2 H; lapply (inst_equiv_refl i m1 m2); auto.
   intros X; lapply (X m1 m2); auto; clear X.
-  destruct (inst_run i m1 m1); simpl.
-  - intros [m3 [H1 H2]]; rewrite H1; simpl; auto.
-  - intros H1; rewrite H1; simpl; auto.
+  destruct (inst_run i m1 m1); cbn.
+  - intros [m3 [H1 H2]]; rewrite H1; cbn; auto.
+  - intros H1; rewrite H1; cbn; auto.
 Qed.
 
 Lemma res_eq_sym om1 om2: res_eq om1 om2 -> res_eq om2 om1.
 Proof.
-  destruct om1; simpl.
-  - intros [m2 [H1 H2]]; subst; simpl. eauto.
-  - intros; subst; simpl; eauto.
+  destruct om1; cbn.
+  - intros [m2 [H1 H2]]; subst; cbn. eauto.
+  - intros; subst; cbn; eauto.
 Qed.
 
 Lemma res_eq_trans (om1 om2 om3: option mem): 
   (res_eq om1 om2) -> (res_eq om2 om3) -> (res_eq om1 om3).
 Proof.
-  destruct om1; simpl.
-  - intros [m2 [H1 H2]]; subst; simpl.
-    intros [m3 [H3 H4]]; subst; simpl.
+  destruct om1; cbn.
+  - intros [m2 [H1 H2]]; subst; cbn.
+    intros [m3 [H3 H4]]; subst; cbn.
     eapply ex_intro; intuition eauto. rewrite H2; auto.
-  - intro; subst; simpl; auto.
+  - intro; subst; cbn; auto.
 Qed.
 
 Lemma bblock_simu_alt p1 p2: bblock_simu p1 p2 <-> (forall m1 m2,  (forall x, m1 x = m2 x) -> (run p1 m1)<>None -> res_eq (run p1 m1) (run p2 m2)).
@@ -232,8 +232,8 @@ Lemma run_app p1: forall m1 p2,
      | None => None
      end.
 Proof.
-   induction p1; simpl; try congruence.
-   intros; destruct (inst_run _ _ _); simpl; auto.
+   induction p1; cbn; try congruence.
+   intros; destruct (inst_run _ _ _); cbn; auto.
 Qed.
 
 Lemma run_app_None p1 m1 p2:
@@ -273,23 +273,26 @@ with list_term :=
 Scheme term_mut := Induction for term Sort Prop
 with list_term_mut := Induction for list_term Sort Prop.
 
+Declare Scope pattern_scope.
+Declare Scope term_scope.
 Bind Scope pattern_scope with term.
 Delimit Scope term_scope with term.
 Delimit Scope pattern_scope with pattern.
 
+Local Open Scope pattern_scope.
+
 Notation "[ ]" := (LTnil _) (format "[ ]"): pattern_scope.
-Notation "[ x ]" := (LTcons x [] _): pattern_scope.
+Notation "[ x ]" := (LTcons x [ ] _): pattern_scope.
 Notation "[ x ; y ; .. ; z ]" := (LTcons x (LTcons y .. (LTcons z (LTnil _) _) .. _) _): pattern_scope.
 Notation "o @ l" := (App o l _) (at level 50, no associativity): pattern_scope.
 
 Import HConsingDefs.
 
 Notation "[ ]" := (LTnil unknown_hid) (format "[ ]"): term_scope.
-Notation "[ x ]" := (LTcons x [] unknown_hid): term_scope.
+Notation "[ x ]" := (LTcons x []%term unknown_hid): term_scope.
 Notation "[ x ; y ; .. ; z ]" := (LTcons x (LTcons y .. (LTcons z (LTnil unknown_hid) unknown_hid) .. unknown_hid) unknown_hid): term_scope.
 Notation "o @ l" := (App o l unknown_hid) (at level 50, no associativity): term_scope.
 
-Local Open Scope pattern_scope.
 
 Fixpoint term_eval (ge: genv) (t: term) (m: mem): option value :=
   match t with
@@ -334,7 +337,7 @@ Fixpoint allvalid ge (l: list term) m : Prop :=
 Lemma allvalid_extensionality ge (l: list term) m:
   allvalid ge l m <-> (forall t, List.In t l -> term_eval ge t m <> None).
 Proof.
-  induction l as [|t l]; simpl; try (tauto).
+  induction l as [|t l]; cbn; try (tauto).
   destruct l.
   - intuition (congruence || eauto).
   - rewrite IHl; clear IHl. intuition (congruence || eauto).
@@ -365,16 +368,16 @@ Qed.
 Lemma intro_fail_correct (l: list term) (t: term) :
    (forall ge m, term_eval ge t m <> None <-> allvalid ge l m) -> match_pt t (intro_fail l t).
 Proof.
-  unfold match_pt; simpl; intros; intuition congruence.
+  unfold match_pt; cbn; intros; intuition congruence.
 Qed.
 Hint Resolve intro_fail_correct: wlp.
 
 (** The default reduction of a term to a pseudo-term *) 
-Definition identity_fail (t: term):= intro_fail [t] t.
+Definition identity_fail (t: term):= intro_fail (t::nil) t.
 
 Lemma identity_fail_correct (t: term): match_pt t (identity_fail t).
 Proof.
-  eapply intro_fail_correct; simpl; tauto.
+  eapply intro_fail_correct; cbn; tauto.
 Qed.
 Global Opaque identity_fail.
 Hint Resolve identity_fail_correct: wlp.
@@ -382,19 +385,19 @@ Hint Resolve identity_fail_correct: wlp.
 (** The reduction for constant term *) 
 Definition nofail (is_constant: op -> bool) (t: term):=
     match t with
-    | Input x _ => intro_fail ([])%list t
-    | o @ [] => if is_constant o then (intro_fail ([])%list t) else (identity_fail t)
+    | Input x _ => intro_fail nil t
+    | o @ [] => if is_constant o then (intro_fail nil t) else (identity_fail t)
     | _ => identity_fail t
     end.
 
 Lemma nofail_correct (is_constant: op -> bool) t:
  (forall ge o, is_constant o = true -> op_eval ge o nil <> None) -> match_pt t (nofail is_constant t).
 Proof.
-  destruct t; simpl.
-  + intros; eapply intro_fail_correct; simpl; intuition congruence.
-  + intros; destruct l; simpl; auto with wlp.
-    destruct (is_constant o) eqn:Heqo; simpl; intuition eauto with wlp.
-     eapply intro_fail_correct; simpl; intuition eauto with wlp.
+  destruct t; cbn.
+  + intros; eapply intro_fail_correct; cbn; intuition congruence.
+  + intros; destruct l; cbn; auto with wlp.
+    destruct (is_constant o) eqn:Heqo; cbn; intuition eauto with wlp.
+     eapply intro_fail_correct; cbn; intuition eauto with wlp.
 Qed.
 Global Opaque nofail.
 Hint Resolve nofail_correct: wlp.
@@ -425,7 +428,7 @@ Lemma app_fail_allvalid_correct l pt t1 t2: forall
   (V2: forall (ge : genv) (m : mem), term_eval ge t2 m <> None <-> allvalid ge (mayfail {| mayfail := t1 :: l; effect := t1 |}) m)
   (ge : genv) (m : mem), term_eval ge t2 m <> None <-> allvalid ge (mayfail (app_fail l pt)) m.
 Proof.
-  intros; generalize (V1 ge m) (V2 ge m); rewrite !allvalid_extensionality; simpl. clear V1 V2.
+  intros; generalize (V1 ge m) (V2 ge m); rewrite !allvalid_extensionality; cbn. clear V1 V2.
   intuition subst.
   + rewrite rev_append_rev, in_app_iff, <- in_rev in H3. destruct H3; eauto.
   + eapply H3; eauto.
diff --git a/kvx/abstractbb/ImpSimuTest.v b/scheduling/abstractbb/ImpSimuTest.v
index 89260ddb..6b64e1d8 100644
--- a/kvx/abstractbb/ImpSimuTest.v
+++ b/scheduling/abstractbb/ImpSimuTest.v
@@ -23,9 +23,8 @@ Require Export Impure.ImpHCons. (**r Import the Impure library. See https://gith
 Export Notations.
 Import HConsing.
 
-
+Require Import Coq.Bool.Bool.
 Require Export SeqSimuTheory.
-
 Require Import PArith.
 
 
@@ -35,6 +34,8 @@ Import ListNotations.
 Local Open Scope list_scope.
 
 
+Definition FULL_DEBUG_DUMP : bool := false. (* print debug traces, even if the verifier succeeds. *)
+
 (** * Interface of (impure) equality tests for operators *)
 Module Type ImpParam.
 
@@ -160,13 +161,13 @@ Definition list_term_set_hid (l: list_term) (hid: hashcode): list_term :=
 Lemma term_eval_set_hid ge t hid m:
   term_eval ge (term_set_hid t hid) m = term_eval ge t m.
 Proof.
-  destruct t; simpl; auto.
+  destruct t; cbn; auto.
 Qed.
 
 Lemma list_term_eval_set_hid ge l hid m:
   list_term_eval ge (list_term_set_hid l hid) m = list_term_eval ge l m.
 Proof.
-  destruct l; simpl; auto.
+  destruct l; cbn; auto.
 Qed.
 
 (* Local nickname *)
@@ -315,7 +316,7 @@ Proof.
     destruct (DM0 m) as (PRE & VALID0); clear DM0.
     assert (VALID1: allvalid ge hd.(hpre) m -> pre d ge m). { unfold smem_valid in PRE; tauto. }
     assert (VALID2: allvalid ge hd.(hpre) m -> forall x : Dict.R.t, ST.term_eval ge (d x) m <> None). { unfold smem_valid in PRE; tauto. }
-    rewrite !allvalid_extensionality in * |- *; simpl.
+    rewrite !allvalid_extensionality in * |- *; cbn.
     intuition (subst; eauto).
     + eapply smem_valid_set_proof; eauto.
       erewrite <- EQT; eauto.
@@ -323,11 +324,11 @@ Proof.
       intros X1; exploit smem_valid_set_decompose_2; eauto.
       rewrite <- EQT; eauto.
     + exploit smem_valid_set_decompose_1; eauto.
-  - clear DM0. unfold hsmem_post_eval, hsmem_post_eval in * |- *; simpl.
+  - clear DM0. unfold hsmem_post_eval, hsmem_post_eval in * |- *; cbn.
     Local Hint Resolve smem_valid_set_decompose_1: core.
     intros; case (R.eq_dec x x0).
-    + intros; subst; rewrite !Dict.set_spec_eq; simpl; eauto.
-    + intros; rewrite !Dict.set_spec_diff; simpl; eauto.
+    + intros; subst; rewrite !Dict.set_spec_eq; cbn; eauto.
+    + intros; rewrite !Dict.set_spec_diff; cbn; eauto.
 Qed.
 Local Hint Resolve naive_set_correct: core.
 
@@ -404,10 +405,10 @@ Lemma hterm_append_correct l: forall lh,
   WHEN hterm_append l lh ~> lh' THEN (forall ge m, allvalid ge lh' m <-> (allvalid ge l m /\ allvalid ge lh m)).
 Proof.
   Local Hint Resolve eq_trans: localhint.
-  induction l as [|t l']; simpl; wlp_xsimplify ltac:(eauto with wlp).
+  induction l as [|t l']; cbn; wlp_xsimplify ltac:(eauto with wlp).
   - intros; rewrite! allvalid_extensionality; intuition eauto.
   - intros REC ge m; rewrite REC; clear IHl' REC. rewrite !allvalid_extensionality.
-    simpl; intuition (subst; eauto with wlp localhint).
+    cbn; intuition (subst; eauto with wlp localhint).
 Qed.
 (*Local Hint Resolve hterm_append_correct: wlp.*)
 Global Opaque hterm_append.
@@ -431,8 +432,8 @@ Lemma smart_set_correct hd x ht:
     forall ge m y, hsmem_post_eval ge d y m = hsmem_post_eval ge (Dict.set hd x ht) y m.
 Proof.
   destruct ht; wlp_simplify.
-  unfold hsmem_post_eval; simpl. case (R.eq_dec x0 y).
-  - intros; subst. rewrite Dict.set_spec_eq, Dict.rem_spec_eq. simpl; congruence.
+  unfold hsmem_post_eval; cbn. case (R.eq_dec x0 y).
+  - intros; subst. rewrite Dict.set_spec_eq, Dict.rem_spec_eq. cbn; congruence.
   - intros; rewrite Dict.set_spec_diff, Dict.rem_spec_diff; auto.
 Qed.
 (*Local Hint Resolve smart_set_correct: wlp.*)
@@ -456,17 +457,17 @@ Proof.
   generalize (hterm_append_correct _ _ _ Hexta0); intro APPEND.
   generalize (hterm_lift_correct _ _ Hexta1); intro LIFT.
   generalize (smart_set_correct _ _ _ _ Hexta3); intro SMART.
-  eapply equiv_hsmem_models; eauto; unfold equiv_hsmem; simpl. 
+  eapply equiv_hsmem_models; eauto; unfold equiv_hsmem; cbn. 
   destruct H as (VALID & EFFECT); split.
   - intros; rewrite APPEND, <- VALID.
-    rewrite !allvalid_extensionality in * |- *; simpl; intuition (subst; eauto).
+    rewrite !allvalid_extensionality in * |- *; cbn; intuition (subst; eauto).
   - intros m x0 ALLVALID; rewrite SMART.
     destruct (term_eval ge ht m) eqn: Hht.
     * case (R.eq_dec x x0).
-      + intros; subst. unfold hsmem_post_eval; simpl. rewrite !Dict.set_spec_eq.
+      + intros; subst. unfold hsmem_post_eval; cbn. rewrite !Dict.set_spec_eq.
         erewrite LIFT, EFFECT; eauto.
-      + intros; unfold hsmem_post_eval; simpl. rewrite !Dict.set_spec_diff; auto.
-    * rewrite allvalid_extensionality in ALLVALID; destruct (ALLVALID ht); simpl; auto.
+      + intros; unfold hsmem_post_eval; cbn. rewrite !Dict.set_spec_diff; auto.
+    * rewrite allvalid_extensionality in ALLVALID; destruct (ALLVALID ht); cbn; auto.
 Qed.
 Local Hint Resolve hsmem_set_correct: wlp.
 Global Opaque hsmem_set.
@@ -481,7 +482,7 @@ Proof.
   intro H.
   induction e using exp_mut with (P0:=fun le =>  forall d hd,
      smem_model ge d hd -> forall m, smem_valid ge d m -> smem_valid ge od m -> list_term_eval ge (list_exp_term le hd hod) m = list_term_eval ge (list_exp_term le d od) m); 
-     unfold smem_model in * |- * ; simpl; intuition eauto.
+     unfold smem_model in * |- * ; cbn; intuition eauto.
   - erewrite IHe; eauto.
   - erewrite IHe0, IHe; eauto.
 Qed.
@@ -516,10 +517,10 @@ Lemma exp_hterm_correct_x ge e hod od:
    induction e using exp_mut with (P0:=fun le =>  forall d hd,
      smem_model ge d hd ->
      WHEN list_exp_hterm le hd hod ~> lt THEN forall m, smem_valid ge d m -> smem_valid ge od m -> list_term_eval ge lt m = ST.list_term_eval ge (list_exp_term le d od) m); 
-     unfold smem_model, hsmem_post_eval in * |- * ; simpl; wlp_simplify.
+     unfold smem_model, hsmem_post_eval in * |- * ; cbn; wlp_simplify.
   - rewrite H1, <- H4; auto.
-  - rewrite H4, <- H0; simpl; auto.
-  - rewrite H5, <- H0, <- H4; simpl; auto.
+  - rewrite H4, <- H0; cbn; auto.
+  - rewrite H5, <- H0, <- H4; cbn; auto.
 Qed.
 Global Opaque exp_hterm.
 
@@ -544,7 +545,7 @@ Lemma hinst_smem_correct i: forall hd hod,
     forall ge od d, smem_model ge od hod -> smem_model ge d hd -> (forall m, smem_valid ge d m -> smem_valid ge od m) -> smem_model ge (inst_smem i d od) hd'.
 Proof.
   Local Hint Resolve smem_valid_set_proof: core.
-  induction i; simpl; wlp_simplify; eauto 15 with wlp.
+  induction i; cbn; wlp_simplify; eauto 15 with wlp.
 Qed.
 Global Opaque hinst_smem.
 Local Hint Resolve hinst_smem_correct: wlp.
@@ -564,7 +565,7 @@ Fixpoint bblock_hsmem_rec (p: bblock) (d: hsmem): ?? hsmem :=
 Lemma bblock_hsmem_rec_correct p: forall hd,
   WHEN bblock_hsmem_rec p hd ~> hd' THEN forall ge d, smem_model ge d hd -> smem_model ge (bblock_smem_rec p d) hd'.
 Proof.
-  induction p; simpl; wlp_simplify.
+  induction p; cbn; wlp_simplify.
 Qed.
 Global Opaque bblock_hsmem_rec.
 Local Hint Resolve bblock_hsmem_rec_correct: wlp.
@@ -573,8 +574,8 @@ Definition hsmem_empty: hsmem := {| hpre:= nil ; hpost := Dict.empty |}.
 
 Lemma hsmem_empty_correct ge: smem_model ge smem_empty hsmem_empty.
 Proof.
-  unfold smem_model, smem_valid, hsmem_post_eval; simpl; intuition try congruence.
-  rewrite !Dict.empty_spec; simpl; auto.
+  unfold smem_model, smem_valid, hsmem_post_eval; cbn; intuition try congruence.
+  rewrite !Dict.empty_spec; cbn; auto.
 Qed.
 
 Definition bblock_hsmem: bblock -> ?? hsmem
@@ -673,8 +674,8 @@ Hypothesis hco_term_correct: forall t, WHEN hco_term.(hC) t ~> t' THEN forall ge
 Variable hco_list: hashConsing list_term.
 Hypothesis hco_list_correct: forall t, WHEN hco_list.(hC) t ~> t' THEN forall ge m, list_term_eval ge (hdata t) m = list_term_eval ge t' m.
 
-Variable print_error_end: hsmem -> hsmem -> ?? unit.
-Variable print_error: pstring -> ?? unit.
+Variable print_end_error: hsmem -> hsmem -> ?? unit.
+Variable print_dump: (option pstring) -> ?? unit.
 
 Variable check_failpreserv: bool.
 Variable dbg_failpreserv: term -> ?? unit. (* info of additional failure of the output bbloc p2 wrt the input bbloc p1 *) 
@@ -687,20 +688,23 @@ Program Definition g_bblock_simu_test (p1 p2: bblock): ?? bool :=
     DO d2 <~ bblock_hsmem hco_term.(hC) hco_list.(hC) no_log_assign log_new_term log_inst2 p2;;
     DO b <~ Dict.eq_test d1 d2 ;;
     if b then (
-      if check_failpreserv then (
+      (if check_failpreserv then
           let hp := mk_hash_params dbg_failpreserv in
           failure_in_failpreserv.(set)(true);;
-          Sets.assert_list_incl hp d2.(hpre) d1.(hpre);;
-          RET true
-      ) else RET false
+          Sets.assert_list_incl hp d2.(hpre) d1.(hpre)
+      else RET tt);;
+      (if FULL_DEBUG_DUMP then
+        print_dump None (* not an error... *)
+      else RET tt);;
+      RET check_failpreserv
     ) else (
-      print_error_end d1 d2 ;;
+      print_end_error d1 d2 ;;
       RET false
     )
   CATCH_FAIL s, _ =>
     DO b <~ failure_in_failpreserv.(get)();;
     if b then RET false 
-         else print_error s;; RET false
+         else print_dump (Some s);; RET false
   ENSURE (fun b => b=true -> forall ge, bblock_simu ge p1 p2));;
   RET (`r).
 Obligation 1.
@@ -722,7 +726,7 @@ Theorem g_bblock_simu_test_correct p1 p2:
   WHEN g_bblock_simu_test p1 p2 ~> b THEN b=true -> forall ge, bblock_simu ge p1 p2.
 Proof.
   wlp_simplify.
-  destruct exta0; simpl in * |- *; auto.
+  destruct exta0; cbn in * |- *; auto.
 Qed.
 Global Opaque g_bblock_simu_test.
 
@@ -773,12 +777,16 @@ Definition msg_unknow_term: pstring := "unknown term".
 Definition msg_number: pstring := "on 2nd bblock -- on inst num ".
 Definition msg_notfailpreserv: pstring := "a possible failure of 2nd bblock is absent in 1st bblock (INTERNAL ERROR: this error is expected to be detected before!!!)".
 
-Definition print_error_end (_ _: hsmem): ?? unit
+Definition print_end_error (_ _: hsmem): ?? unit
  := println (msg_prefix +; msg_error_on_end).
 
-Definition print_error (log: logger unit) (s:pstring): ?? unit
- := DO n <~ log_info log ();;
-    println (msg_prefix +; msg_number +; n +; " -- " +; s). 
+Definition print_error (log: logger unit) (os: option pstring): ?? unit
+ := match os with
+    | Some s =>
+      DO n <~ log_info log ();;
+      println (msg_prefix +; msg_number +; n +; " -- " +; s)
+    | None => RET tt
+    end.
 
 Definition failpreserv_error (_: term): ?? unit
   := println (msg_prefix +; msg_notfailpreserv).
@@ -808,7 +816,7 @@ Program Definition bblock_simu_test (p1 p2: bblock): ?? bool :=
     (log_insert log)
     hco_term _
     hco_list _
-    print_error_end
+    print_end_error
     (print_error log)
     true (* check_failpreserv *)
     failpreserv_error
@@ -905,7 +913,6 @@ with string_of_list_term (l: list_term): ?? pstring :=
      RET (st +; ";" +; sl)
   end.
 
-
 End PrettryPrint.
 
 
@@ -948,7 +955,7 @@ Definition print_tables gdi ext exl: ?? unit :=
   iterall exl (fun head _ pl => print_list gdi head pl.(hdata));;
   println "----------------".
 
-Definition print_final_debug gdi (d1 d2: hsmem): ?? unit 
+Definition print_final_debug gdi (d1 d2: hsmem): ?? unit
  := DO b <~ Dict.not_eq_witness d1 d2 ;;
     match b with
     | Some x =>
@@ -986,39 +993,44 @@ Definition print_witness gdi cr (*msg*) :=
   | None => println "Unexpected failure: no witness info (hint: hash-consing bug ?)"
   end.
 
-
-Definition print_error_end1 gdi hct hcl (d1 d2:hsmem): ?? unit
+Definition print_end_error1 gdi hct hcl (d1 d2:hsmem): ?? unit
  := println "- GRAPH of 1st bblock";;
     DO ext <~ export hct ();;
     DO exl <~ export hcl ();;
     print_tables gdi ext exl;;
-    print_error_end d1 d2;;
+    print_end_error d1 d2;;
     print_final_debug gdi d1 d2.
 
-Definition print_error1 gdi hct hcl cr log s : ?? unit
+Definition print_dump1 gdi hct hcl cr log os : ?? unit
  := println "- GRAPH of 1st bblock";;
     DO ext <~ export hct ();;
     DO exl <~ export hcl ();;
     print_tables gdi ext exl;;
-    print_error log s;;
-    print_witness gdi cr (*"1st"*).
-
+    print_error log os;;
+    match os with
+    | Some _ => print_witness gdi cr (*"1st"*)
+    | None => RET tt
+    end.
 
 Definition xmsg_number: pstring := "on 1st bblock -- on inst num ".
 
-Definition print_error_end2 gdi hct hcl (d1 d2:hsmem): ?? unit
+Definition print_end_error2 gdi hct hcl (d1 d2:hsmem): ?? unit
  := println (msg_prefix +; msg_error_on_end);;
     println "- GRAPH of 2nd bblock";;
     DO ext <~ export hct ();;
     DO exl <~ export hcl ();;
     print_tables gdi ext exl.
 
-Definition print_error2 gdi hct hcl cr (log: logger unit) (s:pstring): ?? unit
+Definition print_dump2 gdi hct hcl cr (log: logger unit) (os:option pstring): ?? unit
  := DO n <~ log_info log ();;
     DO ext <~ export hct ();;
     DO exl <~ export hcl ();;
-    println (msg_prefix +; xmsg_number +; n +; " -- " +; s);;
-    print_witness gdi cr (*"2nd"*);;
+    match os with
+    | Some s =>
+      println (msg_prefix +; xmsg_number +; n +; " -- " +; s);;
+      print_witness gdi cr (*"2nd"*)
+    | None => RET tt
+    end;;
     println "- GRAPH of 2nd bblock";;
     print_tables gdi ext exl.
 
@@ -1051,21 +1063,25 @@ Program Definition verb_bblock_simu_test (p1 p2: bblock): ?? bool :=
   DO cr <~ make_cref None;;
   DO hco_term <~ mk_annot (hCons hpt);;
   DO hco_list <~ mk_annot (hCons hplt);;
-  DO result1 <~ g_bblock_simu_test
+  (if FULL_DEBUG_DUMP then
+      println("");;
+      println("-- START simu checker --")
+   else RET tt);;
+  DO result1 <~ (g_bblock_simu_test
      (log_assign dict_info log1)
      (log_new_term (msg_term cr))
      (hlog log1 hco_term hco_list)
      (log_insert log2)
      hco_term _
      hco_list _
-     (print_error_end1 dict_info.(D.get) hco_term hco_list)
-     (print_error1 dict_info.(D.get) hco_term hco_list cr log2)
+     (print_end_error1 dict_info.(D.get) hco_term hco_list)
+     (print_dump1 dict_info.(D.get) hco_term hco_list cr log2)
      true
      failpreserv_error
-     p1 p2;;
-  if result1 
-  then RET true
-  else
+     p1 p2);;
+   if (if FULL_DEBUG_DUMP then false else result1)
+   then RET true
+   else (
     DO dict_info <~ make_dict (mk_hash_params (fun _ => RET tt));;
     DO log1 <~ count_logger ();;
     DO log2 <~ count_logger ();;
@@ -1080,31 +1096,35 @@ Program Definition verb_bblock_simu_test (p1 p2: bblock): ?? bool :=
        (log_insert log2)
        hco_term _
        hco_list _
-       (print_error_end2 dict_info.(D.get) hco_term hco_list)
-       (print_error2 dict_info.(D.get) hco_term hco_list cr log2)
+       (print_end_error2 dict_info.(D.get) hco_term hco_list)
+       (print_dump2 dict_info.(D.get) hco_term hco_list cr log2)
        false
        (fun _ => RET tt)
        p2 p1;;
-    if result2 
-    then (
-      println (msg_prefix +; " OOops - symmetry violation in bblock_simu_test  => this is a bug of bblock_simu_test ??");;
-      RET false
-    ) else RET false
-   .
+     if FULL_DEBUG_DUMP then
+        println("-- END simu checker --");;
+        println("");;
+        RET result1
+     else if result2
+     then (
+       println (msg_prefix +; " OOops - symmetry violation in bblock_simu_test  => this is a bug of bblock_simu_test ??");;
+       RET false
+     ) else RET false
+  ).
 Obligation 1.
-  generalize (hCons_correct _ _ _ H0); clear H0.
+  generalize (hCons_correct _ _ _ H1); clear H1.
   wlp_simplify.
 Qed.
 Obligation 2.
-  generalize (hCons_correct _ _ _ H); clear H.
+  generalize (hCons_correct _ _ _ H0); clear H0.
   wlp_simplify.
 Qed.
 Obligation 3.
-  generalize (hCons_correct _ _ _ H0); clear H0.
+  generalize (hCons_correct _ _ _ H1); clear H1.
   wlp_simplify.
 Qed.
 Obligation 4.
-  generalize (hCons_correct _ _ _ H); clear H.
+  generalize (hCons_correct _ _ _ H0); clear H0.
   wlp_simplify.
 Qed.
 
@@ -1209,8 +1229,8 @@ Lemma eq_test_correct A d1: forall (d2: t A),
  WHEN eq_test d1 d2 ~> b THEN
   b=true -> forall x, get d1 x = get d2 x.
 Proof.
-  unfold get; induction d1 as [|l1 Hl1 [x1|] r1 Hr1]; destruct d2 as [|l2 [x2|] r2]; simpl; 
-  wlp_simplify; (discriminate || (subst; destruct x; simpl; auto)).
+  unfold get; induction d1 as [|l1 Hl1 [x1|] r1 Hr1]; destruct d2 as [|l2 [x2|] r2]; cbn; 
+  wlp_simplify; (discriminate || (subst; destruct x; cbn; auto)).
 Qed.
 Global Opaque eq_test.
 
diff --git a/kvx/abstractbb/Parallelizability.v b/scheduling/abstractbb/Parallelizability.v
index 79ec9038..afa4b9fd 100644
--- a/kvx/abstractbb/Parallelizability.v
+++ b/scheduling/abstractbb/Parallelizability.v
@@ -50,8 +50,8 @@ Fixpoint inst_prun (i: inst) (m tmp old: mem): option mem :=
 Lemma inst_run_prun i: forall m old,
   inst_run ge i m old = inst_prun i m m old.
 Proof.
-  induction i as [|[y e] i']; simpl; auto.
-  intros m old; destruct (exp_eval ge e m old); simpl; auto.
+  induction i as [|[y e] i']; cbn; auto.
+  intros m old; destruct (exp_eval ge e m old); cbn; auto.
 Qed.
 
 
@@ -76,8 +76,8 @@ Lemma inst_prun_equiv i old: forall m1 m2 tmp,
   (forall x, m1 x = m2 x) ->
   res_eq (inst_prun i m1 tmp old) (inst_prun i m2 tmp old).
 Proof.
-  induction i as [|[x e] i']; simpl; eauto.
-  intros m1 m2 tmp H; destruct (exp_eval ge e tmp old); simpl; auto.
+  induction i as [|[x e] i']; cbn; eauto.
+  intros m1 m2 tmp H; destruct (exp_eval ge e tmp old); cbn; auto.
   eapply IHi'; unfold assign. intros; destruct (R.eq_dec x x0); auto.
 Qed.
 
@@ -85,12 +85,12 @@ Lemma prun_iw_equiv p: forall m1 m2 old,
   (forall x, m1 x = m2 x) ->
   res_eq (prun_iw p m1 old) (prun_iw p m2 old).
 Proof.
-  induction p as [|i p']; simpl; eauto.
+  induction p as [|i p']; cbn; eauto.
   - intros m1 m2 old H.
     generalize (inst_prun_equiv i old m1 m2 old H);
-    destruct (inst_prun i m1 old old); simpl.
-    + intros (m3 & H3 & H4); rewrite H3; simpl; eauto.
-    + intros H1; rewrite H1; simpl; auto.
+    destruct (inst_prun i m1 old old); cbn.
+    + intros (m3 & H3 & H4); rewrite H3; cbn; eauto.
+    + intros H1; rewrite H1; cbn; auto.
 Qed.
 
 
@@ -101,8 +101,8 @@ Lemma prun_iw_app p1: forall m1 old p2,
      | None => None
      end.
 Proof.
-   induction p1; simpl; try congruence.
-   intros; destruct (inst_prun _ _ _); simpl; auto.
+   induction p1; cbn; try congruence.
+   intros; destruct (inst_prun _ _ _); cbn; auto.
 Qed.
 
 Lemma prun_iw_app_None p1: forall m1 old p2,
@@ -132,12 +132,12 @@ Fixpoint notIn {A} (x: A) (l:list A): Prop :=
 
 Lemma notIn_iff A (x:A) l: (~List.In x l) <-> notIn x l.
 Proof.
-  induction l; simpl; intuition.
+  induction l; cbn; intuition.
 Qed.
 
 Lemma notIn_app A (x:A) l1: forall l2, notIn x (l1++l2) <-> (notIn x l1 /\ notIn x l2).
 Proof.
-  induction l1; simpl.
+  induction l1; cbn.
   - intuition.
   - intros; rewrite IHl1. intuition.
 Qed.
@@ -145,7 +145,7 @@ Qed.
 
 Lemma In_Permutation A (l1 l2: list A): Permutation l1 l2 -> forall x, In x l1 -> In x l2.
 Proof.
-  induction 1; simpl; intuition.
+  induction 1; cbn; intuition.
 Qed.
 
 Lemma Permutation_incl A (l1 l2: list A): Permutation l1 l2 -> incl l1 l2.
@@ -174,7 +174,7 @@ Qed.
 
 Lemma disjoint_cons_l A (x:A) (l1 l2: list A): disjoint (x::l1) l2 <-> (notIn x l2) /\ (disjoint l1 l2).
 Proof.
-  unfold disjoint. simpl; intuition subst; auto. 
+  unfold disjoint. cbn; intuition subst; auto. 
 Qed.
 
 Lemma disjoint_cons_r A (x:A) (l1 l2: list A): disjoint l1 (x::l2) <-> (notIn x l1) /\ (disjoint l1 l2).
@@ -230,13 +230,13 @@ Fixpoint frame_assign m1 (f: list R.t) m2 :=
 Lemma frame_assign_def f: forall m1 m2 x,
    frame_assign m1 f m2 x = if notIn_dec x f then m1 x else m2 x.
 Proof.
-  induction f as [|y f] ; simpl; auto.
-  - intros; destruct (notIn_dec x []); simpl in *; tauto.
-  - intros; rewrite IHf; destruct (notIn_dec x (y::f)); simpl in *.
-    + destruct (notIn_dec x f); simpl in *; intuition.
+  induction f as [|y f] ; cbn; auto.
+  - intros; destruct (notIn_dec x []); cbn in *; tauto.
+  - intros; rewrite IHf; destruct (notIn_dec x (y::f)); cbn in *.
+    + destruct (notIn_dec x f); cbn in *; intuition.
       rewrite assign_diff; auto.
       rewrite <- notIn_iff in *; intuition.
-    + destruct (notIn_dec x f); simpl in *; intuition subst.
+    + destruct (notIn_dec x f); cbn in *; intuition subst.
       rewrite assign_eq; auto.
       rewrite <- notIn_iff in *; intuition.
 Qed.
@@ -266,7 +266,7 @@ Lemma frame_eq_list_split f1 (f2: R.t -> Prop) om1 om2:
  (forall m1 m2 x, om1 = Some m1 -> om2 = Some m2 -> f2 x -> notIn x f1 -> m1 x = m2 x) ->
  frame_eq f2 om1 om2.
 Proof.
-  unfold frame_eq; destruct om1 as [ m1 | ]; simpl; auto.
+  unfold frame_eq; destruct om1 as [ m1 | ]; cbn; auto.
   intros (m2 & H0 & H1); subst.
   intros H.
   eexists; intuition eauto.
@@ -280,7 +280,7 @@ Lemma frame_eq_res_eq f om1 om2:
  res_eq om1 om2.
 Proof.
   intros H H0; lapply (frame_eq_list_split f (fun _ => True) om1 om2 H); eauto.
-  clear H H0; unfold frame_eq, res_eq. destruct om1; simpl; firstorder.
+  clear H H0; unfold frame_eq, res_eq. destruct om1; cbn; firstorder.
 Qed.
 *)
 
@@ -296,19 +296,19 @@ Lemma inst_wframe_correct i m' old: forall m tmp,
   inst_prun ge i m tmp old = Some m' -> 
   forall x, notIn x (inst_wframe i) -> m' x  = m x.
 Proof.
-  induction i as [|[y e] i']; simpl.
+  induction i as [|[y e] i']; cbn.
   - intros m tmp H x H0; inversion_clear H; auto.
-  - intros m tmp H x (H1 & H2); destruct (exp_eval ge e tmp old); simpl; try congruence.
-    cutrewrite (m x = assign m y v x); eauto.
+  - intros m tmp H x (H1 & H2); destruct (exp_eval ge e tmp old); cbn; try congruence.
+    replace (m x) with (assign m y v x); eauto.
     rewrite assign_diff; auto.
 Qed.
 
 Lemma inst_prun_fequiv i old: forall m1 m2 tmp, 
   frame_eq (fun x => In x (inst_wframe i)) (inst_prun ge i m1 tmp old) (inst_prun ge i m2 tmp old).
 Proof.
-  induction i as [|[y e] i']; simpl.
+  induction i as [|[y e] i']; cbn.
   - intros m1 m2 tmp; eexists; intuition eauto.
-  - intros m1 m2 tmp. destruct (exp_eval ge e tmp old); simpl; auto.
+  - intros m1 m2 tmp. destruct (exp_eval ge e tmp old); cbn; auto.
     eapply frame_eq_list_split; eauto. clear IHi'.
     intros m1' m2' x H1 H2.
     lapply (inst_wframe_correct i' m1' old (assign m1 y v) (assign tmp y v)); eauto.
@@ -323,7 +323,7 @@ Lemma inst_prun_None i m1 m2 tmp old:
   inst_prun ge i m2 tmp old = None.
 Proof.
   intros H; generalize (inst_prun_fequiv i old m1 m2 tmp).
-  rewrite H; simpl; auto.
+  rewrite H; cbn; auto.
 Qed.
 
 Lemma inst_prun_Some i m1 m2 tmp old m1': 
@@ -331,7 +331,7 @@ Lemma inst_prun_Some i m1 m2 tmp old m1':
   res_eq (Some (frame_assign m2 (inst_wframe i) m1')) (inst_prun ge i m2 tmp old).
 Proof.
   intros H; generalize (inst_prun_fequiv i old m1 m2 tmp).
-  rewrite H; simpl.
+  rewrite H; cbn.
   intros (m2' & H1 & H2).
   eexists; intuition eauto.
   rewrite frame_assign_def.
@@ -351,7 +351,7 @@ Local Hint Resolve Permutation_app_head Permutation_app_tail Permutation_app_com
 Lemma bblock_wframe_Permutation p p': 
  Permutation p p' -> Permutation (bblock_wframe p)  (bblock_wframe p').
 Proof.
-  induction 1 as [|i p p'|i1 i2 p|p1 p2 p3]; simpl; auto.
+  induction 1 as [|i p p'|i1 i2 p|p1 p2 p3]; cbn; auto.
   - rewrite! app_assoc; auto.
   - eapply Permutation_trans; eauto.
 Qed.
@@ -361,11 +361,11 @@ Lemma bblock_wframe_correct p m' old: forall m,
   prun_iw p m old = Some m' -> 
   forall x, notIn x (bblock_wframe p) -> m' x = m x.
 Proof.
-  induction p as [|i p']; simpl.
+  induction p as [|i p']; cbn.
   - intros m H; inversion_clear H; auto.
   - intros m H x; rewrite notIn_app; intros (H1 & H2). 
     remember (inst_prun i m old old) as om.
-    destruct om as [m1|]; simpl.
+    destruct om as [m1|]; cbn.
     + eapply eq_trans.
       eapply IHp'; eauto.
       eapply inst_wframe_correct; eauto.
@@ -375,12 +375,12 @@ Qed.
 Lemma prun_iw_fequiv p old: forall m1 m2, 
   frame_eq (fun x => In x (bblock_wframe p)) (prun_iw p m1 old) (prun_iw p m2 old).
 Proof.
-  induction p as [|i p']; simpl.
+  induction p as [|i p']; cbn.
   - intros m1 m2; eexists; intuition eauto.
   - intros m1 m2; generalize (inst_prun_fequiv i old m1 m2 old).
     remember (inst_prun i m1 old old) as om.
-    destruct om as [m1'|]; simpl.
-    + intros (m2' & H1 & H2). rewrite H1; simpl.
+    destruct om as [m1'|]; cbn.
+    + intros (m2' & H1 & H2). rewrite H1; cbn.
     eapply frame_eq_list_split; eauto. clear IHp'.
     intros m1'' m2'' x H3 H4. rewrite in_app_iff.
     intros X X2. assert (X1: In x (inst_wframe i)). { destruct X; auto. rewrite <- notIn_iff in X2; tauto. }
@@ -389,7 +389,7 @@ Proof.
     lapply (bblock_wframe_correct p' m2'' old m2'); eauto.
     intros Xm2' Xm1'. 
     rewrite Xm1', Xm2'; auto.
-    + intro H; rewrite H; simpl; auto.
+    + intro H; rewrite H; cbn; auto.
 Qed.
 
 Lemma prun_iw_equiv p m1 m2 old: 
@@ -418,7 +418,7 @@ Fixpoint is_det (p: bblock): Prop :=
 Lemma is_det_Permutation p p': 
  Permutation p p' -> is_det p -> is_det p'.
 Proof.
-  induction 1; simpl; auto.
+  induction 1; cbn; auto.
   - intros; intuition. eapply disjoint_incl_r. 2: eauto.
     eapply Permutation_incl. eapply Permutation_sym. 
     eapply bblock_wframe_Permutation; auto.
@@ -431,20 +431,20 @@ Theorem is_det_correct p p':
   is_det p -> 
   forall m old, res_eq (prun_iw ge p m old) (prun_iw ge p' m old).
 Proof.
-  induction 1 as [ | i p p' | i1 i2 p | p1 p2 p3 ]; simpl; eauto.
+  induction 1 as [ | i p p' | i1 i2 p | p1 p2 p3 ]; cbn; eauto.
   - intros [H0 H1] m old.
     remember (inst_prun ge i m old old) as om0.
-    destruct om0 as [ m0 | ]; simpl; auto.
+    destruct om0 as [ m0 | ]; cbn; auto.
   - rewrite disjoint_app_r.
     intros ([Z1 Z2] & Z3 & Z4) m old.
     remember (inst_prun ge i2 m old old) as om2.
-    destruct om2 as [ m2 | ]; simpl; auto.
+    destruct om2 as [ m2 | ]; cbn; auto.
     + remember (inst_prun ge i1 m old old) as om1.
-      destruct om1 as [ m1 | ]; simpl; auto.
-      * lapply (inst_prun_Some i2 m m1 old old m2); simpl; auto.
-        lapply (inst_prun_Some i1 m m2 old old m1); simpl; auto.
+      destruct om1 as [ m1 | ]; cbn; auto.
+      * lapply (inst_prun_Some i2 m m1 old old m2); cbn; auto.
+        lapply (inst_prun_Some i1 m m2 old old m1); cbn; auto.
         intros (m1' & Hm1' & Xm1') (m2' & Hm2' & Xm2').
-        rewrite Hm1', Hm2'; simpl.
+        rewrite Hm1', Hm2'; cbn.
         eapply prun_iw_equiv.
         intros x; rewrite <- Xm1', <- Xm2'. clear Xm2' Xm1' Hm1' Hm2' m1' m2'.
         rewrite frame_assign_def.
@@ -455,9 +455,9 @@ Proof.
           erewrite (inst_wframe_correct i1 m1 old m old); eauto.
         }
         rewrite frame_assign_notIn; auto.
-     * erewrite inst_prun_None; eauto. simpl; auto.
+     * erewrite inst_prun_None; eauto. cbn; auto.
    + remember (inst_prun ge i1 m old old) as om1.
-     destruct om1 as [ m1 | ]; simpl; auto.
+     destruct om1 as [ m1 | ]; cbn; auto.
      erewrite inst_prun_None; eauto.
   - intros; eapply res_eq_trans.
     eapply IHPermutation1; eauto.
@@ -486,7 +486,7 @@ Lemma exp_frame_correct e old1 old2:
    (exp_eval ge e m1 old1)=(exp_eval ge e m2 old2).
 Proof.
   induction e using exp_mut with (P0:=fun l => (forall x, In x (list_exp_frame l) -> old1 x = old2 x) -> forall m1 m2, (forall x, In x (list_exp_frame l) -> m1 x = m2 x) ->
-   (list_exp_eval ge l m1 old1)=(list_exp_eval ge l m2 old2)); simpl; auto.
+   (list_exp_eval ge l m1 old1)=(list_exp_eval ge l m2 old2)); cbn; auto.
   - intros H1 m1 m2 H2; rewrite H2; auto.
   - intros H1 m1 m2 H2; erewrite IHe; eauto.
   - intros H1 m1 m2 H2; erewrite IHe, IHe0; eauto; 
@@ -501,7 +501,7 @@ Fixpoint inst_frame (i: inst): list R.t :=
 
 Lemma inst_wframe_frame i x: In x (inst_wframe i) -> In x (inst_frame i).
 Proof.
-  induction i as [ | [y e] i']; simpl; intuition.
+  induction i as [ | [y e] i']; cbn; intuition.
 Qed.
 
 
@@ -511,16 +511,16 @@ Lemma inst_frame_correct i wframe old1 old2: forall m tmp1 tmp2,
   (forall x, notIn x wframe -> tmp1 x = tmp2 x) ->
   inst_prun ge i m tmp1 old1 = inst_prun ge i m tmp2 old2.
 Proof.
-  induction i as [|[x e] i']; simpl; auto.
+  induction i as [|[x e] i']; cbn; auto.
   intros m tmp1 tmp2; rewrite disjoint_cons_l, disjoint_app_l.
   intros (H1 & H2 & H3) H6 H7.
-  cutrewrite (exp_eval ge e tmp1 old1 = exp_eval ge e tmp2 old2).
+  replace (exp_eval ge e tmp1 old1) with (exp_eval ge e tmp2 old2).
   - destruct (exp_eval ge e tmp2 old2); auto.
     eapply IHi'; eauto. 
-    simpl; intros x0 H0; unfold assign. destruct (R.eq_dec x x0); simpl; auto. 
+    cbn; intros x0 H0; unfold assign. destruct (R.eq_dec x x0); cbn; auto. 
   - unfold disjoint in H2; apply exp_frame_correct.
-    intros;apply H6; auto.
-    intros;apply H7; auto.
+    intros;rewrite H6; auto.
+    intros;rewrite H7; auto.
 Qed.
 
 (** * Parallelizability *)
@@ -535,8 +535,8 @@ Fixpoint pararec (p: bblock) (wframe: list R.t): Prop :=
 
 Lemma pararec_disjoint (p: bblock): forall wframe, pararec p wframe -> disjoint (bblock_wframe p) wframe.
 Proof.
-  induction p as [|i p']; simpl.
-  - unfold disjoint; simpl; intuition.
+  induction p as [|i p']; cbn.
+  - unfold disjoint; cbn; intuition.
   - intros wframe [H0 H1]; rewrite disjoint_app_l. 
     generalize (IHp' _ H1).
     rewrite disjoint_app_r. intuition. 
@@ -546,7 +546,7 @@ Qed.
 
 Lemma pararec_det p: forall wframe, pararec p wframe -> is_det p.
 Proof.
-  induction p as [|i p']; simpl; auto.
+  induction p as [|i p']; cbn; auto.
   intros wframe [H0 H1]. generalize (pararec_disjoint _ _ H1). rewrite disjoint_app_r.
   intuition. 
   - apply disjoint_sym; auto.
@@ -558,7 +558,7 @@ Lemma pararec_correct p old: forall wframe m,
   (forall x, notIn x wframe -> m x = old x) ->
   run ge p m = prun_iw ge p m old.
 Proof.
-  elim p; clear p; simpl; auto.
+  elim p; clear p; cbn; auto.
   intros i p' X wframe m [H H0] H1.
   erewrite inst_run_prun, inst_frame_correct; eauto.
   remember (inst_prun ge i m old old) as om0.
@@ -646,7 +646,7 @@ Fixpoint inst_wsframe(i:inst): S.t :=
 
 Lemma inst_wsframe_correct i: S.match_frame (inst_wsframe i) (inst_wframe i).
 Proof.
-  induction i; simpl; auto.
+  induction i; cbn; auto.
 Qed.
 
 Fixpoint exp_sframe (e: exp): S.t :=
@@ -664,7 +664,7 @@ with list_exp_sframe (le: list_exp): S.t :=
 
 Lemma exp_sframe_correct e: S.match_frame (exp_sframe e) (exp_frame e).
 Proof.
-  induction e using exp_mut with (P0:=fun l => S.match_frame (list_exp_sframe l) (list_exp_frame l)); simpl; auto.
+  induction e using exp_mut with (P0:=fun l => S.match_frame (list_exp_sframe l) (list_exp_frame l)); cbn; auto.
 Qed.
 
 Fixpoint inst_sframe (i: inst): S.t :=
@@ -677,7 +677,7 @@ Local Hint Resolve exp_sframe_correct: core.
 
 Lemma inst_sframe_correct i: S.match_frame (inst_sframe i) (inst_frame i).
 Proof.
-  induction i as [|[y e] i']; simpl; auto.
+  induction i as [|[y e] i']; cbn; auto.
 Qed.
 
 Local Hint Resolve inst_wsframe_correct inst_sframe_correct: core.
@@ -692,7 +692,7 @@ Fixpoint is_pararec (p: bblock) (wsframe: S.t): bool :=
 
 Lemma is_pararec_correct (p: bblock): forall s l, S.match_frame s l -> (is_pararec p s)=true -> (pararec p l).
 Proof.
-  induction p; simpl; auto.
+  induction p; cbn; auto.
   intros s l H1 H2; rewrite lazy_andb_bool_true in H2. destruct H2 as [H2 H3].
   constructor 1; eauto.
 Qed.
@@ -739,14 +739,14 @@ Definition empty:=PositiveSet.empty.
 
 Lemma empty_match_frame: match_frame empty nil.
 Proof.
-  unfold match_frame, empty, PositiveSet.In; simpl; intuition.
+  unfold match_frame, empty, PositiveSet.In; cbn; intuition.
 Qed.
 
 Definition add: R.t -> t -> t := PositiveSet.add.
 
 Lemma add_match_frame: forall s x l, match_frame s l -> match_frame (add x s) (x::l).
 Proof.
-  unfold match_frame, add; simpl. 
+  unfold match_frame, add; cbn. 
   intros s x l H y. rewrite PositiveSet.add_spec, H.
   intuition. 
 Qed.
@@ -772,13 +772,13 @@ Fixpoint is_disjoint (s s': PositiveSet.t) : bool :=
 
 Lemma is_disjoint_spec_true s: forall s', is_disjoint s s' = true -> forall x, PositiveSet.In x s -> PositiveSet.In x s' -> False.
 Proof.
-  unfold PositiveSet.In; induction s as [ |l IHl o r IHr]; simpl; try discriminate.
-  destruct s' as [|l' o' r']; simpl; try discriminate.
+  unfold PositiveSet.In; induction s as [ |l IHl o r IHr]; cbn; try discriminate.
+  destruct s' as [|l' o' r']; cbn; try discriminate.
   intros X.
   assert (H: ~(o = true /\ o'=true) /\ is_disjoint l l' = true /\ is_disjoint r r'=true).
-  { destruct o, o', (is_disjoint l l'), (is_disjoint r r'); simpl in X; intuition. }
+  { destruct o, o', (is_disjoint l l'), (is_disjoint r r'); cbn in X; intuition. }
   clear X; destruct H as (H & H1 & H2).
-  destruct x as [i|i|]; simpl; eauto.
+  destruct x as [i|i|]; cbn; eauto.
 Qed.
 
 Lemma is_disjoint_match_frame: forall s1 s2 l1 l2, match_frame s1 l1 -> match_frame s2 l2 -> (is_disjoint s1 s2)=true -> disjoint l1 l2.
diff --git a/kvx/abstractbb/README.md b/scheduling/abstractbb/README.md
index 69e5defc..69e5defc 100644
--- a/kvx/abstractbb/README.md
+++ b/scheduling/abstractbb/README.md
diff --git a/kvx/abstractbb/SeqSimuTheory.v b/scheduling/abstractbb/SeqSimuTheory.v
index a957c50a..df6b9963 100644
--- a/kvx/abstractbb/SeqSimuTheory.v
+++ b/scheduling/abstractbb/SeqSimuTheory.v
@@ -92,13 +92,13 @@ Variable ge: genv.
 Lemma set_spec_eq d x t m:
  term_eval ge (smem_set d x t x) m = term_eval ge t m.
 Proof.
-  unfold smem_set; simpl; case (R.eq_dec x x); try congruence.
+  unfold smem_set; cbn; case (R.eq_dec x x); try congruence.
 Qed.
 
 Lemma set_spec_diff d x y t m:
   x <> y -> term_eval ge (smem_set d x t y) m = term_eval ge (d y) m.
 Proof.
-  unfold smem_set; simpl; case (R.eq_dec x y); try congruence.
+  unfold smem_set; cbn; case (R.eq_dec x y); try congruence.
 Qed.
 
 Fixpoint inst_smem (i: inst) (d old: smem): smem :=
@@ -123,15 +123,15 @@ Definition bblock_smem: bblock -> smem
 Lemma inst_smem_pre_monotonic i old: forall d m,
   (pre (inst_smem i d old) ge m) -> (pre d ge m).
 Proof.
-  induction i as [|[y e] i IHi]; simpl; auto.
+  induction i as [|[y e] i IHi]; cbn; auto.
   intros d a H; generalize (IHi _ _ H); clear H IHi.
-  unfold smem_set; simpl; intuition.
+  unfold smem_set; cbn; intuition.
 Qed.
 
 Lemma bblock_smem_pre_monotonic p: forall d m,
   (pre (bblock_smem_rec p d) ge m) -> (pre d ge m).
 Proof.
-  induction p as [|i p' IHp']; simpl; eauto.
+  induction p as [|i p' IHp']; cbn; eauto.
   intros d a H; eapply inst_smem_pre_monotonic; eauto.
 Qed.
 
@@ -146,7 +146,7 @@ Proof.
   intro H.
   induction e using exp_mut with
     (P0:=fun l => forall (d:smem) m1, (forall x, term_eval ge (d x) m0 = Some (m1 x)) -> list_term_eval ge (list_exp_term l d od) m0 = list_exp_eval ge l m1 old);
-  simpl; auto.
+  cbn; auto.
   - intros; erewrite IHe; eauto.
   - intros. erewrite IHe, IHe0; eauto. 
 Qed.
@@ -156,12 +156,12 @@ Lemma inst_smem_abort i m0 x old: forall (d:smem),
     term_eval ge (d x) m0 = None ->
     term_eval ge (inst_smem i d old x) m0 = None.
 Proof.
-  induction i as [|[y e] i IHi]; simpl; auto.
+  induction i as [|[y e] i IHi]; cbn; auto.
   intros d VALID H; erewrite IHi; eauto. clear IHi.
-  unfold smem_set; simpl; destruct (R.eq_dec y x); auto.
+  unfold smem_set; cbn; destruct (R.eq_dec y x); auto.
   subst;
   generalize (inst_smem_pre_monotonic _ _ _ _ VALID); clear VALID.
-  unfold smem_set; simpl. intuition congruence.
+  unfold smem_set; cbn. intuition congruence.
 Qed.
 
 Lemma block_smem_rec_abort p m0 x: forall d,
@@ -169,7 +169,7 @@ Lemma block_smem_rec_abort p m0 x: forall d,
     term_eval ge (d x) m0 = None ->
     term_eval ge (bblock_smem_rec p d x) m0 = None.
 Proof.
-  induction p; simpl; auto.
+  induction p; cbn; auto.
   intros d VALID H; erewrite IHp; eauto. clear IHp.
   eapply inst_smem_abort; eauto.
 Qed.
@@ -181,13 +181,13 @@ Lemma inst_smem_Some_correct1 i m0 old (od:smem):
   (forall x, term_eval ge (d x) m0 = Some (m1 x)) ->
    forall x, term_eval ge (inst_smem i d od x) m0 = Some (m2 x).
 Proof.
-  intro X; induction i as [|[x e] i IHi]; simpl; intros m1 m2 d H.
+  intro X; induction i as [|[x e] i IHi]; cbn; intros m1 m2 d H.
   - inversion_clear H; eauto.
   - intros H0 x0.
     destruct (exp_eval ge e m1 old) eqn:Heqov; try congruence.
     refine (IHi _ _ _ _ _ _); eauto.
     clear x0; intros x0.
-    unfold assign, smem_set; simpl. destruct (R.eq_dec x x0); auto.
+    unfold assign, smem_set; cbn. destruct (R.eq_dec x x0); auto.
     subst; erewrite term_eval_exp; eauto.
 Qed.
 
@@ -197,7 +197,7 @@ Lemma bblocks_smem_rec_Some_correct1 p m0: forall (m1 m2: mem) (d: smem),
   forall x, term_eval ge (bblock_smem_rec p d x) m0 = Some (m2 x).
 Proof.
   Local Hint Resolve inst_smem_Some_correct1: core.
-  induction p as [ | i p]; simpl; intros m1 m2 d H.
+  induction p as [ | i p]; cbn; intros m1 m2 d H.
   - inversion_clear H; eauto.
   - intros H0 x0.
     destruct (inst_run ge i m1 m1) eqn: Heqov.
@@ -218,15 +218,15 @@ Lemma inst_smem_None_correct i m0 old (od: smem):
    (forall x, term_eval ge (d x) m0 = Some (m1 x)) ->
   inst_run ge i m1 old = None -> exists x, term_eval ge (inst_smem i d od x) m0 = None.
 Proof.
-  intro X; induction i as [|[x e] i IHi]; simpl; intros m1 d.
+  intro X; induction i as [|[x e] i IHi]; cbn; intros m1 d.
   - discriminate.
   - intros VALID H0.
     destruct (exp_eval ge e m1 old) eqn: Heqov.
     + refine (IHi _ _ _ _); eauto.
-      intros x0; unfold assign, smem_set; simpl. destruct (R.eq_dec x x0); auto.
+      intros x0; unfold assign, smem_set; cbn. destruct (R.eq_dec x x0); auto.
       subst; erewrite term_eval_exp; eauto.
     + intuition.
-      constructor 1 with (x:=x); simpl.
+      constructor 1 with (x:=x); cbn.
       apply inst_smem_abort; auto.
       rewrite set_spec_eq. 
       erewrite term_eval_exp; eauto.
@@ -241,14 +241,14 @@ Lemma inst_smem_Some_correct2 i m0 old (od: smem):
   res_eq (Some m2) (inst_run ge i m1 old).
 Proof.
   intro X.
-  induction i as [|[x e] i IHi]; simpl; intros m1 m2 d VALID H0.
+  induction i as [|[x e] i IHi]; cbn; intros m1 m2 d VALID H0.
   - intros H; eapply ex_intro; intuition eauto.
     generalize (H0 x); rewrite H.
     congruence.
   - intros H.
     destruct (exp_eval ge e m1 old) eqn: Heqov.
     + refine (IHi _ _ _ _ _ _); eauto.
-      intros x0; unfold assign, smem_set; simpl; destruct (R.eq_dec x x0); auto.
+      intros x0; unfold assign, smem_set; cbn; destruct (R.eq_dec x x0); auto.
       subst; erewrite term_eval_exp; eauto.
     + generalize (H x).
       rewrite inst_smem_abort; discriminate || auto.
@@ -262,7 +262,7 @@ Lemma bblocks_smem_rec_Some_correct2 p m0: forall (m1 m2: mem) d,
   (forall x, term_eval ge (bblock_smem_rec p d x) m0 = Some (m2 x)) ->
     res_eq (Some m2) (run ge p m1).
 Proof.
-  induction p as [|i p]; simpl; intros m1 m2 d VALID H0.
+  induction p as [|i p]; cbn; intros m1 m2 d VALID H0.
   - intros H; eapply ex_intro; intuition eauto.
     generalize (H0 x); rewrite H.
     congruence.
@@ -293,13 +293,13 @@ Lemma inst_valid i m0 old (od:smem):
   (forall x, term_eval ge (d x) m0 = Some (m1 x)) ->
    pre (inst_smem i d od) ge m0.
 Proof.
-  induction i as [|[x e] i IHi]; simpl; auto.
+  induction i as [|[x e] i IHi]; cbn; auto.
   intros Hold m1 m2 d VALID0 H Hm1.
-  destruct (exp_eval ge e m1 old) eqn: Heq; simpl; try congruence.
+  destruct (exp_eval ge e m1 old) eqn: Heq; cbn; try congruence.
   eapply IHi; eauto.
-  + unfold smem_set in * |- *; simpl. 
+  + unfold smem_set in * |- *; cbn. 
     rewrite Hm1; intuition congruence.
-  + intros x0. unfold assign, smem_set; simpl; destruct (R.eq_dec x x0); auto.
+  + intros x0. unfold assign, smem_set; cbn; destruct (R.eq_dec x x0); auto.
     subst; erewrite term_eval_exp; eauto.
 Qed.
 
@@ -311,7 +311,7 @@ Lemma block_smem_rec_valid p m0: forall (m1 m2: mem) (d:smem),
   pre (bblock_smem_rec p d) ge m0.
 Proof.
   Local Hint Resolve inst_valid: core.
-  induction p as [ | i p]; simpl; intros m1 d H; auto.
+  induction p as [ | i p]; cbn; intros m1 d H; auto.
   intros H0 H1.
   destruct (inst_run ge i m1 m1) eqn: Heqov; eauto.
   congruence.
@@ -322,7 +322,7 @@ Lemma bblock_smem_valid p m0 m1:
   pre (bblock_smem p) ge m0.
 Proof.
   intros; eapply block_smem_rec_valid; eauto.
-  unfold smem_empty; simpl. auto.
+  unfold smem_empty; cbn. auto.
 Qed.
 
 Definition smem_valid ge d m := pre d ge m /\ forall x, term_eval ge (d x) m <> None.
@@ -339,7 +339,7 @@ Theorem bblock_smem_simu p1 p2:
 Proof.
   Local Hint Resolve bblock_smem_valid bblock_smem_Some_correct1: core.
   intros (INCL & EQUIV) m DONTFAIL; unfold smem_valid in * |-.
-  destruct (run ge p1 m) as [m1|] eqn: RUN1; simpl; try congruence.
+  destruct (run ge p1 m) as [m1|] eqn: RUN1; cbn; try congruence.
   assert (X: forall x, term_eval ge (bblock_smem p1 x) m = Some (m1 x)); eauto.
   eapply bblock_smem_Some_correct2; eauto.
   + destruct (INCL m); intuition eauto.
@@ -371,7 +371,7 @@ Lemma smem_valid_set_proof d x t m:
 Proof.
   unfold smem_valid; intros (PRE & VALID) PREt. split.
   + split; auto.
-  + intros x0; unfold smem_set; simpl; case (R.eq_dec x x0); intros; subst; auto.
+  + intros x0; unfold smem_set; cbn; case (R.eq_dec x x0); intros; subst; auto.
 Qed.
 
 
@@ -384,7 +384,7 @@ Definition smem_correct ge (d: smem) (m: mem) (om: option mem): Prop:=
 
 Lemma bblock_smem_correct ge p m: smem_correct ge (bblock_smem p) m (run ge p m).
 Proof.
-  unfold smem_correct; simpl; intros m'; split.
+  unfold smem_correct; cbn; intros m'; split.
   + intros; split.
     * eapply bblock_smem_valid; eauto.
     * eapply bblock_smem_Some_correct1; eauto.
diff --git a/kvx/lib/ForwardSimulationBlock.v b/scheduling/postpass_lib/ForwardSimulationBlock.v
index f79814f2..cc6ecdd8 100644
--- a/kvx/lib/ForwardSimulationBlock.v
+++ b/scheduling/postpass_lib/ForwardSimulationBlock.v
@@ -25,7 +25,7 @@ Require Import Coqlib.
 Require Import Events.
 Require Import Globalenvs.
 Require Import Smallstep.
-
+Require Import Lia.
 
 Local Open Scope nat_scope.
 
@@ -42,11 +42,11 @@ Lemma starN_split n s t s':
   forall m k, n=m+k ->
   exists (t1 t2:trace) s0, starN (step L) (globalenv L) m s t1 s0 /\ starN (step L) (globalenv L) k s0 t2 s' /\ t=t1**t2.
 Proof.
-  induction 1; simpl.
-  + intros m k H; assert (X: m=0); try omega.
-    assert (X0: k=0); try omega.
+  induction 1; cbn.
+  + intros m k H; assert (X: m=0); try lia.
+    assert (X0: k=0); try lia.
     subst; repeat (eapply ex_intro); intuition eauto.
-  + intros m; destruct m as [| m']; simpl.
+  + intros m; destruct m as [| m']; cbn.
     - intros k H2; subst; repeat (eapply ex_intro); intuition eauto.
     - intros k H2. inversion H2.
       exploit (IHstarN m' k); eauto. intro.
@@ -61,7 +61,7 @@ Lemma starN_tailstep n s t1 s':
   forall (t t2:trace) s'',
      Step L s' t2 s'' -> t = t1 ** t2 -> starN (step L) (globalenv L) (S n) s t s''.
 Proof.
-  induction 1; simpl. 
+  induction 1; cbn. 
   + intros t t1 s0; autorewrite with trace_rewrite.
     intros; subst; eapply starN_step; eauto.
     autorewrite with trace_rewrite; auto.
@@ -119,10 +119,10 @@ Proof.
   intros t [H1 H2] H3 H4.
   destruct (simu_mid_block _ _ _ H3 H4) as [H5 H6]; subst. 
   constructor 1.
-  + omega.
-  + cutrewrite (dist_end_block head - dist_end_block next = S (dist_end_block head - dist_end_block previous)).
+  + lia.
+  + replace (dist_end_block head - dist_end_block next) with (S (dist_end_block head - dist_end_block previous)).
     - eapply starN_tailstep; eauto.
-    - omega.
+    - lia.
 Qed.
 
 Lemma follows_in_block_init (head current: state L1):
@@ -131,10 +131,10 @@ Proof.
   intros t H3 H4.
   destruct (simu_mid_block _ _ _ H3 H4) as [H5 H6]; subst. 
   constructor 1.
-  + omega.
-  + cutrewrite (dist_end_block head - dist_end_block current = 1).
+  + lia.
+  + replace (dist_end_block head - dist_end_block current) with 1.
     - eapply starN_tailstep; eauto.
-    - omega.
+    - lia.
 Qed.
 
 
@@ -153,13 +153,13 @@ Definition head (s: memostate): state L1 :=
 
 Lemma head_followed (s: memostate): follows_in_block (head s) (real s).
 Proof.
-  destruct s as [rs ms Hs]. simpl.
-  destruct ms as [ms|]; unfold head; simpl; auto.
+  destruct s as [rs ms Hs]. cbn.
+  destruct ms as [ms|]; unfold head; cbn; auto.
   constructor 1.
-  omega.
-  cutrewrite ((dist_end_block rs - dist_end_block rs)%nat=O).
+  lia.
+  replace (dist_end_block rs - dist_end_block rs)%nat with O.
   + apply starN_refl; auto.
-  + omega.
+  + lia.
 Qed.
 
 Inductive is_well_memorized (s s': memostate): Prop :=
@@ -198,21 +198,21 @@ Definition memoL1 := {|
 Lemma discr_dist_end s:
   {dist_end_block s = O} + {dist_end_block s <> O}.
 Proof.
-  destruct (dist_end_block s); simpl; intuition.
+  destruct (dist_end_block s); cbn; intuition.
 Qed.
 
 Lemma memo_simulation_step:
   forall s1 t s1', Step L1 s1 t s1' ->
   forall s2, s1 = (real s2) -> exists s2', Step memoL1 s2 t s2' /\ s1' = (real s2').
 Proof.
-  intros s1 t s1' H1 [rs2 ms2 Hmoi] H2. simpl in H2; subst.
+  intros s1 t s1' H1 [rs2 ms2 Hmoi] H2. cbn in H2; subst.
   destruct (discr_dist_end rs2) as [H3 | H3].
-  + refine (ex_intro _ {|real:=s1'; memorized:=None |} _); simpl.
+  + refine (ex_intro _ {|real:=s1'; memorized:=None |} _); cbn.
     intuition.
   + destruct ms2 as [s|].
-    - refine (ex_intro _ {|real:=s1'; memorized:=Some s |} _); simpl.
+    - refine (ex_intro _ {|real:=s1'; memorized:=Some s |} _); cbn.
       intuition.
-    - refine (ex_intro _ {|real:=s1'; memorized:=Some rs2 |} _); simpl.
+    - refine (ex_intro _ {|real:=s1'; memorized:=Some rs2 |} _); cbn.
       intuition.
   Unshelve.
   * intros; discriminate.
@@ -228,7 +228,7 @@ Qed.
 Lemma forward_memo_simulation_1: forward_simulation L1 memoL1.
 Proof.
   apply forward_simulation_step with (match_states:=fun s1 s2 => s1 = (real s2)); auto.
-  + intros s1 H; eapply ex_intro with (x:={|real:=s1; memorized:=None |}); simpl.
+  + intros s1 H; eapply ex_intro with (x:={|real:=s1; memorized:=None |}); cbn.
     intuition.
   + intros; subst; auto.
   + intros; exploit memo_simulation_step; eauto.
@@ -239,8 +239,8 @@ Qed.
 
 Lemma forward_memo_simulation_2: forward_simulation memoL1 L2.
 Proof.
-  unfold memoL1; simpl.
-  apply forward_simulation_opt with (measure:=fun s => dist_end_block (real s)) (match_states:=fun s1 s2 => match_states (head s1) s2); simpl; auto.
+  unfold memoL1; cbn.
+  apply forward_simulation_opt with (measure:=fun s => dist_end_block (real s)) (match_states:=fun s1 s2 => match_states (head s1) s2); cbn; auto.
   + intros s1 [H0 H1]; destruct (match_initial_states (real s1) H0).
     unfold head; rewrite H1.
     intuition eauto.
@@ -254,14 +254,14 @@ Proof.
     - (* MidBloc *)
       constructor 2. destruct (simu_mid_block (real s1) t (real s1')) as [H5 H4]; auto.
       unfold head in * |- *. rewrite H3. rewrite H4. intuition.
-      destruct (memorized s1); simpl; auto. tauto.
+      destruct (memorized s1); cbn; auto. tauto.
     - (* EndBloc *)
       constructor 1.
       destruct (simu_end_block (head s1) t (real s1') s2) as (s2' & H3 & H4); auto.
       * destruct (head_followed s1) as [H4 H3].
-        cutrewrite (dist_end_block (head s1) - dist_end_block (real s1) = dist_end_block (head s1)) in H3; try omega.
+        replace (dist_end_block (head s1) - dist_end_block (real s1)) with (dist_end_block (head s1)) in H3; try lia.
         eapply starN_tailstep; eauto.
-      * unfold head; rewrite H2; simpl. intuition eauto. 
+      * unfold head; rewrite H2; cbn. intuition eauto. 
 Qed.
 
 Lemma forward_simulation_block_rel: forward_simulation L1 L2.
diff --git a/kvx/lib/Machblock.v b/scheduling/postpass_lib/Machblock.v
index edae0ed4..404c2a96 100644
--- a/kvx/lib/Machblock.v
+++ b/scheduling/postpass_lib/Machblock.v
@@ -70,7 +70,7 @@ Lemma bblock_eq:
   b1 = b2.
 Proof.
   intros. destruct b1. destruct b2.
-  simpl in *. subst. auto.
+  cbn in *. subst. auto.
 Qed.
 
 Definition length_opt {A} (o: option A) : nat :=
@@ -85,15 +85,15 @@ Lemma size_null b:
   size b = 0%nat ->
   header b = nil /\ body b = nil /\ exit b = None.
 Proof.
-  destruct b as [h b e]. simpl. unfold size. simpl.
+  destruct b as [h b e]. cbn. unfold size. cbn.
   intros H.
   assert (length h = 0%nat) as Hh; [ omega |].
   assert (length b = 0%nat) as Hb; [ omega |].
   assert (length_opt e = 0%nat) as He; [ omega|].
   repeat split.
-  destruct h; try (simpl in Hh; discriminate); auto.
-  destruct b; try (simpl in Hb; discriminate); auto.
-  destruct e; try (simpl in He; discriminate); auto.
+  destruct h; try (cbn in Hh; discriminate); auto.
+  destruct b; try (cbn in Hb; discriminate); auto.
+  destruct e; try (cbn in He; discriminate); auto.
 Qed.
 
 (** ** programs *)
@@ -127,13 +127,13 @@ Definition is_label (lbl: label) (bb: bblock) : bool :=
 Lemma is_label_correct_true lbl bb:
   List.In lbl (header bb) <-> is_label lbl bb = true. 
 Proof.
-  unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition.
+  unfold is_label; destruct (in_dec lbl (header bb)); cbn; intuition.
 Qed.
 
 Lemma is_label_correct_false lbl bb:
   ~(List.In lbl (header bb)) <-> is_label lbl bb = false. 
 Proof.
-  unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition.
+  unfold is_label; destruct (in_dec lbl (header bb)); cbn; intuition.
 Qed.
 
 
diff --git a/kvx/lib/Machblockgen.v b/scheduling/postpass_lib/Machblockgen.v
index ab186083..5a2f2a61 100644
--- a/kvx/lib/Machblockgen.v
+++ b/scheduling/postpass_lib/Machblockgen.v
@@ -148,18 +148,18 @@ Lemma add_to_code_is_trans_code i c bl:
   is_trans_code c bl ->
   is_trans_code (i::c) (add_to_code (trans_inst i) bl).
 Proof.
-  destruct bl as [|bh0 bl]; simpl.
+  destruct bl as [|bh0 bl]; cbn.
   - intro H. inversion H. subst. eauto.
   - remember (trans_inst i) as ti.
     destruct ti as [l|bi|cfi].
-    + intros; eapply Tr_add_label; eauto. destruct i; simpl in * |- *; congruence.
+    + intros; eapply Tr_add_label; eauto. destruct i; cbn in * |- *; congruence.
     + intros. remember (header bh0) as hbh0. destruct hbh0 as [|b].
       * eapply Tr_add_basic; eauto.
-      * cutrewrite (add_basic bi empty_bblock = add_to_new_bblock (MB_basic bi)); auto.
+      * replace (add_basic bi empty_bblock) with (add_to_new_bblock (MB_basic bi)); auto.
         rewrite Heqti; eapply Tr_end_block; eauto.
         rewrite <- Heqti. eapply End_basic. congruence.
     + intros.
-      cutrewrite (cfi_bblock cfi = add_to_new_bblock (MB_cfi cfi)); auto.
+      replace (cfi_bblock cfi) with (add_to_new_bblock (MB_cfi cfi)); auto.
       rewrite Heqti. eapply Tr_end_block; eauto.
       rewrite <- Heqti. eapply End_cfi. congruence.
 Qed.
@@ -170,7 +170,7 @@ Lemma trans_code_is_trans_code_rev c1: forall c2 mbi,
   is_trans_code c2 mbi ->
   is_trans_code (rev_append c1 c2) (trans_code_rev c1 mbi).
 Proof.
-  induction c1 as [| i c1]; simpl; auto.
+  induction c1 as [| i c1]; cbn; auto.
 Qed.
 
 Lemma trans_code_is_trans_code c: is_trans_code c (trans_code c).
@@ -186,17 +186,17 @@ Lemma add_to_code_is_trans_code_inv i c bl:
   is_trans_code (i::c) bl -> exists bl0, is_trans_code c bl0 /\ bl = add_to_code (trans_inst i) bl0.
 Proof.
   intro H; inversion H as [|H0 H1 bl0| | H0 bi bh H1 bl0]; clear H; subst; (repeat econstructor); eauto.
-  + (* case Tr_end_block *) inversion H3; subst; simpl; auto.
+  + (* case Tr_end_block *) inversion H3; subst; cbn; auto.
      * destruct (header bh); congruence.
-     * destruct bl0; simpl; congruence.
-  + (* case Tr_add_basic *) rewrite H3. simpl. destruct (header bh); congruence.
+     * destruct bl0; cbn; congruence.
+  + (* case Tr_add_basic *) rewrite H3. cbn. destruct (header bh); congruence.
 Qed. 
 
 Lemma trans_code_is_trans_code_rev_inv c1: forall c2 mbi, 
   is_trans_code (rev_append c1 c2) mbi ->
   exists mbi0, is_trans_code c2 mbi0 /\ mbi=trans_code_rev c1 mbi0.
 Proof.
-  induction c1 as [| i c1]; simpl; eauto.
+  induction c1 as [| i c1]; cbn; eauto.
   intros; exploit IHc1; eauto.
   intros (mbi0 & H1 & H2); subst.
   exploit add_to_code_is_trans_code_inv; eauto.
diff --git a/kvx/lib/Machblockgenproof.v b/scheduling/postpass_lib/Machblockgenproof.v
index dfb97bfe..d121a54b 100644
--- a/kvx/lib/Machblockgenproof.v
+++ b/scheduling/postpass_lib/Machblockgenproof.v
@@ -146,16 +146,16 @@ Lemma parent_sp_preserved:
   forall s,
   Mach.parent_sp s = parent_sp (trans_stack s).
 Proof.
-  unfold parent_sp. unfold Mach.parent_sp. destruct s; simpl; auto.
-  unfold trans_stackframe. destruct s; simpl; auto.
+  unfold parent_sp. unfold Mach.parent_sp. destruct s; cbn; auto.
+  unfold trans_stackframe. destruct s; cbn; auto.
 Qed.
 
 Lemma parent_ra_preserved:
   forall s,
   Mach.parent_ra s = parent_ra (trans_stack s).
 Proof.
-  unfold parent_ra. unfold Mach.parent_ra. destruct s; simpl; auto.
-  unfold trans_stackframe. destruct s; simpl; auto.
+  unfold parent_ra. unfold Mach.parent_ra. destruct s; cbn; auto.
+  unfold trans_stackframe. destruct s; cbn; auto.
 Qed.
 
 Lemma external_call_preserved:
@@ -175,11 +175,11 @@ Proof.
   destruct i; try (constructor 2; split; auto; discriminate ).
   destruct (peq l0 l) as [P|P].
   - constructor. subst l0; split; auto.
-    revert H. unfold Mach.find_label. simpl. rewrite peq_true.
+    revert H. unfold Mach.find_label. cbn. rewrite peq_true.
     intros H; injection H; auto.
   - constructor 2. split.
     + intro F. injection F. intros. contradict P; auto.
-    + revert H. unfold Mach.find_label. simpl. rewrite peq_false; auto.
+    + revert H. unfold Mach.find_label. cbn. rewrite peq_false; auto.
 Qed.
 
 Lemma find_label_is_end_block_not_label i l c bl:
@@ -190,26 +190,26 @@ Proof.
   intros H H0 H1.
   unfold find_label.
   remember (is_label l _) as b.
-  cutrewrite (b = false); auto.
+  replace b with false; auto.
   subst; unfold is_label.
-  destruct i; simpl in * |- *; try (destruct (in_dec l nil); intuition).
+  destruct i; cbn in * |- *; try (destruct (in_dec l nil); intuition).
   inversion H.
   destruct (in_dec l (l0::nil)) as [H6|H6]; auto.
-  simpl in H6; intuition try congruence.
+  cbn in H6; intuition try congruence.
 Qed.
 
 Lemma find_label_at_begin l bh bl:
   In l (header bh)
   -> find_label l (bh :: bl) = Some (bh::bl).
 Proof.
-  unfold find_label; rewrite is_label_correct_true; intro H; rewrite H; simpl; auto.
+  unfold find_label; rewrite is_label_correct_true; intro H; rewrite H; cbn; auto.
 Qed.
 
 Lemma find_label_add_label_diff l bh bl:
       ~(In l (header bh)) -> 
       find_label l (bh::bl) = find_label l bl.
 Proof.
-  unfold find_label; rewrite is_label_correct_false; intro H; rewrite H; simpl; auto.
+  unfold find_label; rewrite is_label_correct_false; intro H; rewrite H; cbn; auto.
 Qed.
 
 Definition concat (h: list label) (c: code): code :=
@@ -227,18 +227,18 @@ Proof.
   rewrite <- is_trans_code_inv in * |-.
   induction Heqbl. 
   + (* Tr_nil *) 
-    intros; exists (l::nil); simpl in * |- *; intuition.
+    intros; exists (l::nil); cbn in * |- *; intuition.
     discriminate.
   + (* Tr_end_block *)
     intros.
     exploit Mach_find_label_split; eauto.
     clear H0; destruct 1 as [(H0&H2)|(H0&H2)].
-    - subst. rewrite find_label_at_begin; simpl; auto.
+    - subst. rewrite find_label_at_begin; cbn; auto.
       inversion H as [mbi H1 H2| | ].
       subst.
       inversion Heqbl.
       subst.
-      exists (l :: nil); simpl; eauto.
+      exists (l :: nil); cbn; eauto.
     - exploit IHHeqbl; eauto.
       destruct 1 as (h & H3 & H4).
       exists h.
@@ -251,21 +251,21 @@ Proof.
     - subst.
       inversion H0 as [H1].
       clear H0.
-      erewrite find_label_at_begin; simpl; eauto.
+      erewrite find_label_at_begin; cbn; eauto.
       subst_is_trans_code Heqbl.
-      exists (l :: nil); simpl; eauto.
+      exists (l :: nil); cbn; eauto.
     - subst; assert (H: l0 <> l); try congruence; clear H0.
       exploit IHHeqbl; eauto.
       clear IHHeqbl Heqbl.
       intros (h & H3 & H4).
-      simpl; unfold is_label, add_label; simpl.
-      destruct (in_dec l (l0::header bh)) as [H5|H5]; simpl in H5.
+      cbn; unfold is_label, add_label; cbn.
+      destruct (in_dec l (l0::header bh)) as [H5|H5]; cbn in H5.
       * destruct H5; try congruence.
-        exists (l0::h); simpl; intuition.
+        exists (l0::h); cbn; intuition.
         rewrite find_label_at_begin in H4; auto.
         apply f_equal. inversion H4 as [H5]. clear H4.
-        destruct (trans_code c'); simpl in * |- *;
-        inversion H5; subst; simpl; auto.
+        destruct (trans_code c'); cbn in * |- *;
+        inversion H5; subst; cbn; auto.
       * exists h. intuition.
         erewrite <- find_label_add_label_diff; eauto.
   + (* Tr_add_basic *)
@@ -286,7 +286,7 @@ Lemma find_label_preserved:
   Mach.find_label l (Mach.fn_code f) = Some c ->
   exists h, In l h /\ find_label l (fn_code (transf_function f)) = Some (concat h (trans_code c)).
 Proof.
-  intros. cutrewrite ((fn_code (transf_function f)) = trans_code (Mach.fn_code f)); eauto.
+  intros. replace (fn_code (transf_function f)) with (trans_code (Mach.fn_code f)); eauto.
   apply find_label_transcode_preserved; auto.
 Qed.
 
@@ -318,12 +318,12 @@ Local Hint Resolve exec_MBgetstack exec_MBsetstack exec_MBgetparam exec_MBop exe
 
 Lemma size_add_label l bh: size (add_label l bh) = size bh + 1.
 Proof.
-  unfold add_label, size; simpl; omega.
+  unfold add_label, size; cbn; omega.
 Qed.
 
 Lemma size_add_basic bi bh: header bh = nil -> size (add_basic bi bh) = size bh + 1.
 Proof.
-  intro H. unfold add_basic, size; rewrite H; simpl. omega.
+  intro H. unfold add_basic, size; rewrite H; cbn. omega.
 Qed.
 
 
@@ -418,8 +418,8 @@ Proof.
   + exists lbl. 
     unfold trans_inst in H1. 
     destruct i; congruence.
-  + unfold add_basic in H; simpl in H; congruence.
-  + unfold cfi_bblock in H; simpl in H; congruence.
+  + unfold add_basic in H; cbn in H; congruence.
+  + unfold cfi_bblock in H; cbn in H; congruence.
 Qed.
 
 Local Hint Resolve Mlabel_is_not_basic: core.
@@ -433,11 +433,11 @@ Proof.
   intros b bl H; remember (trans_inst i) as ti.
   destruct ti as [lbl|bi|cfi];
   inversion H as [|d0 d1 d2 H0 H1| |]; subst;
-  try (rewrite <- Heqti in * |- *); simpl in * |- *;
+  try (rewrite <- Heqti in * |- *); cbn in * |- *;
   try congruence.
   + (* label at end block *)
     inversion H1; subst. inversion H0; subst.
-    assert (X:i=Mlabel lbl). { destruct i; simpl in Heqti; congruence. }
+    assert (X:i=Mlabel lbl). { destruct i; cbn in Heqti; congruence. }
     subst. repeat econstructor; eauto.
   + (* label at mid block *)
     exploit IHc; eauto.
@@ -451,12 +451,12 @@ Proof.
       assert (X:(trans_inst i) = MB_basic bi ). { repeat econstructor; congruence. }
       repeat econstructor; congruence.
     - exists (i::c), c, c.
-      repeat econstructor; eauto; inversion H0; subst; repeat econstructor; simpl; try congruence.
+      repeat econstructor; eauto; inversion H0; subst; repeat econstructor; cbn; try congruence.
       * exploit (add_to_new_block_is_label i0); eauto.
-        intros (l & H8); subst; simpl; congruence.
+        intros (l & H8); subst; cbn; congruence.
       * exploit H3; eauto.
       * exploit (add_to_new_block_is_label i0); eauto.
-        intros (l & H8); subst; simpl; congruence.
+        intros (l & H8); subst; cbn; congruence.
   + (* basic at mid block *)
     inversion H1; subst.
     exploit IHc; eauto.
@@ -476,7 +476,7 @@ Lemma step_simu_header st f sp rs m s c h c' t:
  starN (Mach.step (inv_trans_rao rao)) (Genv.globalenv prog) (length h) (Mach.State st f sp c rs m) t s -> 
  s = Mach.State st f sp c' rs m /\ t = E0.
 Proof.
-  induction 1; simpl; intros hs; try (inversion hs; tauto).
+  induction 1; cbn; intros hs; try (inversion hs; tauto).
   inversion hs as [|n1 s1 t1 t2 s2 t3 s3 H1]. inversion H1. subst. auto. 
 Qed.
 
@@ -487,21 +487,21 @@ Lemma step_simu_basic_step (i: Mach.instruction) (bi: basic_inst) (c: Mach.code)
   Mach.step (inv_trans_rao rao) ge (Mach.State s f sp (i::c) rs m) t s' ->
   exists rs' m', s'=Mach.State s f sp c rs' m' /\ t=E0 /\ basic_step tge (trans_stack s) f sp rs m bi rs' m'.
 Proof.
-  destruct i; simpl in * |-;
+  destruct i; cbn in * |-;
    (discriminate
     || (intro H; inversion_clear H; intro X; inversion_clear X; eapply ex_intro; eapply ex_intro; intuition eauto)).
   - eapply exec_MBgetparam; eauto. exploit (functions_translated); eauto. intro.
     destruct H3 as (tf & A & B). subst. eapply A.
-    all: simpl; rewrite <- parent_sp_preserved; auto.
-  - eapply exec_MBop; eauto. rewrite <- H. destruct o; simpl; auto. destruct (rs ## l); simpl; auto.
+    all: cbn; rewrite <- parent_sp_preserved; auto.
+  - eapply exec_MBop; eauto. rewrite <- H. destruct o; cbn; auto. destruct (rs ## l); cbn; auto.
     unfold Genv.symbol_address; rewrite symbols_preserved; auto.
-  - eapply exec_MBload; eauto; rewrite <- H; destruct a; simpl; auto; destruct (rs ## l); simpl; auto;
+  - eapply exec_MBload; eauto; rewrite <- H; destruct a; cbn; auto; destruct (rs ## l); cbn; auto;
     unfold Genv.symbol_address; rewrite symbols_preserved; auto.
-  - eapply exec_MBload_notrap1; eauto; rewrite <- H; destruct a; simpl; auto; destruct (rs ## l); simpl; auto;
+  - eapply exec_MBload_notrap1; eauto; rewrite <- H; destruct a; cbn; auto; destruct (rs ## l); cbn; auto;
     unfold Genv.symbol_address; rewrite symbols_preserved; auto.
-  - eapply exec_MBload_notrap2; eauto; rewrite <- H; destruct a; simpl; auto; destruct (rs ## l); simpl; auto;
+  - eapply exec_MBload_notrap2; eauto; rewrite <- H; destruct a; cbn; auto; destruct (rs ## l); cbn; auto;
     unfold Genv.symbol_address; rewrite symbols_preserved; auto.
-  - eapply exec_MBstore; eauto; rewrite <- H; destruct a; simpl; auto; destruct (rs ## l); simpl; auto;
+  - eapply exec_MBstore; eauto; rewrite <- H; destruct a; cbn; auto; destruct (rs ## l); cbn; auto;
     unfold Genv.symbol_address; rewrite symbols_preserved; auto.
 Qed.
 
@@ -511,7 +511,7 @@ Lemma star_step_simu_body_step s f sp c bdy c':
   starN (Mach.step (inv_trans_rao rao)) ge (length bdy) (Mach.State s f sp c rs m) t s' ->
   exists rs' m', s'=Mach.State s f sp c' rs' m' /\ t=E0 /\ body_step tge (trans_stack s) f sp bdy rs m rs' m'.
 Proof.
-  induction 1; simpl.
+  induction 1; cbn.
   + intros. inversion H. exists rs. exists m. auto.
   + intros. inversion H0. exists rs. exists m. auto.
   + intros. inversion H1; subst. 
@@ -531,15 +531,15 @@ Local Hint Resolve eval_builtin_args_preserved external_call_symbols_preserved f
 Lemma match_states_concat_trans_code st f sp c rs m h: 
   match_states (Mach.State st f sp c rs m) (State (trans_stack st) f sp (concat h (trans_code c)) rs m).
 Proof.
-  intros; constructor 1; simpl.
+  intros; constructor 1; cbn.
   + intros (t0 & s1' & H0) t s'. 
     remember (trans_code _) as bl.
     destruct bl as [|bh bl]. 
     { rewrite <- is_trans_code_inv in Heqbl; inversion Heqbl; inversion H0; congruence. } 
     clear H0.
-    simpl; constructor 1; 
-    intros X; inversion X as [d1 d2 d3 d4 d5 d6 d7 rs' m' d10 d11 X1 X2| | | ]; subst; simpl in * |- *; 
-    eapply exec_bblock; eauto; simpl;
+    cbn; constructor 1; 
+    intros X; inversion X as [d1 d2 d3 d4 d5 d6 d7 rs' m' d10 d11 X1 X2| | | ]; subst; cbn in * |- *; 
+    eapply exec_bblock; eauto; cbn;
     inversion X2 as [cfi d1 d2 d3 H1|]; subst; eauto;
     inversion H1; subst; eauto.
   + intros H r; constructor 1; intro X; inversion X.
@@ -551,7 +551,7 @@ Lemma step_simu_cfi_step (i: Mach.instruction) (cfi: control_flow_inst) (c: Mach
   Mach.step (inv_trans_rao rao) ge (Mach.State stk f sp (i::c) rs m) t s' ->
   exists s2, cfi_step rao tge cfi (State (trans_stack stk) f sp (b::blc) rs m) t s2 /\ match_states s' s2.
 Proof.
-  destruct i; simpl in * |-;
+  destruct i; cbn in * |-;
   (intro H; intro Htc;apply is_trans_code_inv in Htc;rewrite Htc;inversion_clear H;intro X; inversion_clear X).
   * eapply ex_intro.
     intuition auto.
@@ -561,8 +561,8 @@ Proof.
     intuition auto.
     eapply exec_MBtailcall;eauto. 
     - rewrite <-H; exploit (find_function_ptr_same); eauto.
-    - simpl; rewrite <- parent_sp_preserved; auto.
-    - simpl; rewrite <- parent_ra_preserved; auto.
+    - cbn; rewrite <- parent_sp_preserved; auto.
+    - cbn; rewrite <- parent_ra_preserved; auto.
   * eapply ex_intro.
     intuition auto.
     eapply exec_MBbuiltin ;eauto.
@@ -605,7 +605,7 @@ Proof.
     inversion H1; subst. 
     exploit (step_simu_cfi_step); eauto.
     intros [s2 [Hcfi1 Hcfi3]].
-    inversion H4. subst; simpl.
+    inversion H4. subst; cbn.
     autorewrite  with trace_rewrite.
     exists s2.
     split;eauto.
@@ -616,7 +616,7 @@ Lemma simu_end_block:
   starN (Mach.step (inv_trans_rao rao)) ge (Datatypes.S (dist_end_block s1)) s1 t s1' ->
   exists s2', step rao tge (trans_state s1) t s2' /\ match_states s1' s2'.
 Proof.
-  destruct s1; simpl.
+  destruct s1; cbn.
   + (* State *)
     remember (trans_code _) as tc.
     rewrite <- is_trans_code_inv in Heqtc.
@@ -624,7 +624,7 @@ Proof.
     destruct tc as [|b bl].
     { (* nil => absurd *) 
       inversion Heqtc. subst. 
-      unfold dist_end_block_code; simpl.
+      unfold dist_end_block_code; cbn.
       inversion_clear H;
       inversion_clear H0. 
     }
@@ -659,9 +659,9 @@ Proof.
     intros t s1' H; inversion_clear H.
     eapply ex_intro; constructor 1; eauto.
     inversion H1; subst; clear H1.
-    inversion_clear H0; simpl.
+    inversion_clear H0; cbn.
     - (* function_internal*)
-      cutrewrite (trans_code (Mach.fn_code f0) = fn_code (transf_function f0)); eauto.
+      replace (trans_code (Mach.fn_code f0)) with (fn_code (transf_function f0)); eauto.
       eapply exec_function_internal; eauto.
       rewrite <- parent_sp_preserved; eauto.
       rewrite <- parent_ra_preserved; eauto.
@@ -674,7 +674,7 @@ Proof.
     intros t s1' H; inversion_clear H.
     eapply ex_intro; constructor 1; eauto.
     inversion H1; subst; clear H1.
-    inversion_clear H0; simpl.
+    inversion_clear H0; cbn.
     eapply exec_return.
 Qed.
 
@@ -685,10 +685,10 @@ dist_end_block_code (i :: c) = 0.
 Proof.
   unfold dist_end_block_code.
   intro H. destruct H as [cfi H].
-  destruct i;simpl in H;try(congruence); ( 
+  destruct i;cbn in H;try(congruence); ( 
     remember (trans_code _) as bl; 
     rewrite <- is_trans_code_inv in Heqbl;
-    inversion Heqbl; subst; simpl in * |- *; try (congruence)).
+    inversion Heqbl; subst; cbn in * |- *; try (congruence)).
 Qed.
 
 Theorem transf_program_correct: 
@@ -697,23 +697,23 @@ Proof.
   apply forward_simulation_block_trans with (dist_end_block := dist_end_block) (trans_state := trans_state).
 (* simu_mid_block *)
   - intros s1 t s1' H1 H2.
-    destruct H1; simpl in * |- *; omega || (intuition auto);
-    destruct H2; eapply cfi_dist_end_block; simpl; eauto.
+    destruct H1; cbn in * |- *; omega || (intuition auto);
+    destruct H2; eapply cfi_dist_end_block; cbn; eauto.
 (* public_preserved *)
   - apply senv_preserved.
 (* match_initial_states *)
-  - intros. simpl.
+  - intros. cbn.
     eapply ex_intro; constructor 1.
     eapply match_states_trans_state.
     destruct H. split.
     apply init_mem_preserved; auto.
     rewrite prog_main_preserved. rewrite <- H0. apply symbols_preserved.
 (* match_final_states *)
-  - intros. simpl. destruct H. split with (r := r); auto.
+  - intros. cbn. destruct H. split with (r := r); auto.
 (* final_states_end_block *)
-  - intros. simpl in H0.
+  - intros. cbn in H0.
     inversion H0.
-    inversion H; simpl; auto.
+    inversion H; cbn; auto.
     all: try (subst; discriminate).
     apply cfi_dist_end_block; exists MBreturn; eauto.
 (* simu_end_block *)
@@ -733,22 +733,22 @@ Proof.
   intro H; destruct c as [|i' c]. { inversion H. }
   remember (trans_inst i) as ti.
   destruct ti as [lbl|bi|cfi].
-  - (*i=lbl *) cutrewrite (i = Mlabel lbl). 2: ( destruct i; simpl in * |- *; try congruence ).
-    exists nil; simpl; eexists. eapply Tr_add_label; eauto.
+  - (*i=lbl *) replace (i ) with (Mlabel lbl). 2: ( destruct i; cbn in * |- *; try congruence ).
+    exists nil; cbn; eexists. eapply Tr_add_label; eauto.
   - (*i=basic*)
     destruct i'.
     10: { exists (add_to_new_bblock (MB_basic bi)::nil).  exists b. 
-      cutrewrite ((add_to_new_bblock (MB_basic bi) :: nil) ++ (b::l)=(add_to_new_bblock (MB_basic bi) :: (b::l)));eauto.
+      replace ((add_to_new_bblock (MB_basic bi) :: nil) ++ (b::l)) with ((add_to_new_bblock (MB_basic bi) :: (b::l)));eauto.
       rewrite Heqti.        
       eapply  Tr_end_block; eauto.
       rewrite <-Heqti.
-      eapply End_basic. inversion H; try(simpl; congruence).
-      simpl in H5; congruence. }
-    all: try(exists nil; simpl; eexists;  eapply  Tr_add_basic; eauto; inversion H; try(eauto || congruence)).
+      eapply End_basic. inversion H; try(cbn; congruence).
+      cbn in H5; congruence. }
+    all: try(exists nil; cbn; eexists;  eapply  Tr_add_basic; eauto; inversion H; try(eauto || congruence)).
   - (*i=cfi*)
-    destruct i; try(simpl in Heqti; congruence).
+    destruct i; try(cbn in Heqti; congruence).
     all: exists (add_to_new_bblock (MB_cfi cfi)::nil);  exists b; 
-        cutrewrite ((add_to_new_bblock (MB_cfi cfi) :: nil) ++ (b::l)=(add_to_new_bblock (MB_cfi cfi) :: (b::l)));eauto;
+        replace ((add_to_new_bblock (MB_cfi cfi) :: nil) ++ (b::l)) with ((add_to_new_bblock (MB_cfi cfi) :: (b::l)));eauto;
         rewrite Heqti;        
         eapply  Tr_end_block; eauto;
         rewrite <-Heqti;
@@ -765,21 +765,6 @@ Proof.
     eauto.
 Qed.
 
-(* FIXME: these two lemma should go into [Coqlib.v] *) 
-Lemma is_tail_app A (l1: list A): forall l2, is_tail l2 (l1 ++ l2).
-Proof.
-  induction l1; simpl; auto with coqlib.
-Qed.
-Hint Resolve is_tail_app: coqlib.
-
-Lemma is_tail_app_inv A (l1: list A): forall l2 l3, is_tail (l1 ++ l2) l3 -> is_tail l2 l3.
-Proof.
-  induction l1; simpl; auto with coqlib.
-  intros l2 l3 H; inversion H; eauto with coqlib.
-Qed.
-Hint Resolve is_tail_app_inv: coqlib.
-
-
 Lemma Mach_Machblock_tail sg ros c c1 c2: c1=(Mcall sg ros :: c) -> is_tail c1 c2 -> 
   exists b, is_tail (b :: trans_code c) (trans_code c2).
 Proof.
@@ -787,17 +772,17 @@ Proof.
   - intros; subst.
     remember (trans_code (Mcall _ _::c)) as tc2.
     rewrite <- is_trans_code_inv in Heqtc2.
-    inversion Heqtc2; simpl in * |- *; subst; try congruence.
+    inversion Heqtc2; cbn in * |- *; subst; try congruence.
     subst_is_trans_code H1.
     eapply ex_intro; eauto with coqlib.
   - intros; exploit IHis_tail; eauto. clear IHis_tail.
     intros (b & Hb). inversion Hb; clear Hb.
     * exploit (trans_code_monotonic i c2); eauto.
       intros (l' & b' & Hl'); rewrite Hl'.
-      exists b'; simpl; eauto with coqlib.
+      exists b'; cbn; eauto with coqlib.
     * exploit (trans_code_monotonic i c2); eauto.
       intros (l' & b' & Hl'); rewrite Hl'.
-      simpl; eapply ex_intro.
+      cbn; eapply ex_intro.
       eapply is_tail_trans; eauto with coqlib.
 Qed.
 
diff --git a/test/aarch64/README.md b/test/aarch64/README.md
new file mode 100644
index 00000000..f943489c
--- /dev/null
+++ b/test/aarch64/README.md
@@ -0,0 +1,15 @@
+# Testing the Machblock --> Asmblock translation
+1. Get the reference version of compcert-aarch in the father's directory if this repo (checkout `aarch64-ref`)
+2. Compile both repo for aarch64
+3. CD in this folder (`test/aarch64`)
+4. Launch `./asmb_aarch64_gen_test.sh`
+
+## Options
+The script takes following options :
+- `-c` to clear generated files at the end
+- `-w` to suppress warnings from Compcert
+
+## Tests files
+The variable `DIRS` in the script takes the list of directories containing c files.
+The tests under `test/aarch64/c` are simpler and useful to debug only one feature at a time.
+Most of them comes from [here](https://cis.temple.edu/~ingargio/cis71/code/).
diff --git a/test/aarch64/c/add_return.c b/test/aarch64/c/add_return.c
new file mode 100644
index 00000000..c29aeb16
--- /dev/null
+++ b/test/aarch64/c/add_return.c
@@ -0,0 +1 @@
+int main(int r) { return r+1; }
diff --git a/test/aarch64/c/addresses.c b/test/aarch64/c/addresses.c
new file mode 100644
index 00000000..e3cb5201
--- /dev/null
+++ b/test/aarch64/c/addresses.c
@@ -0,0 +1,32 @@
+/* addresses.c -- Playing with addresses of variables and their contents:
+ *                what is done by C with variables, addresses, and values.
+ */
+
+#include <stdio.h>
+
+void moo(int a, int * b);
+
+int main(void) {
+  int x;
+  int *y;
+
+  x=1;
+  y=&x;
+  printf("Address of x = %d, value of x = %d\n", &x, x);
+  printf("Address of y = %d, value of y = %d, value of *y = %d\n", &y, y, *y);
+  moo(9,y);
+}
+
+void moo(int a, int *b){
+  printf("Address of a = %d, value of a = %d\n", &a, a);
+  printf("Address of b = %d, value of b = %d, value of *b = %d\n", &b, b, *b);
+}
+
+/* Output from running this program on my computer:
+
+Address of x = 536869640, value of x = 1
+Address of y = 536869632, value of y = 536869640, value of *y = 1
+Address of a = 536869608, value of a = 9
+Address of b = 536869600, value of b = 536869640, value of *b = 1
+
+ */
diff --git a/test/aarch64/c/arith.c b/test/aarch64/c/arith.c
new file mode 100644
index 00000000..02df141b
--- /dev/null
+++ b/test/aarch64/c/arith.c
@@ -0,0 +1,16 @@
+int main(int num1, int num2)
+{
+  int sum, sub, mult, mod;
+  float div;
+
+  /*
+   * Perform all arithmetic operations
+   */ 
+  sum = num1 + num2;
+  sub = num1 - num2;
+  mult = num1 * num2;
+  div = (float)num1 / num2;
+  mod = num1 % num2;
+
+  return sum + sub + mult + div + mod;
+}
diff --git a/test/aarch64/c/arith_print.c b/test/aarch64/c/arith_print.c
new file mode 100644
index 00000000..d404a151
--- /dev/null
+++ b/test/aarch64/c/arith_print.c
@@ -0,0 +1,19 @@
+int main()
+{
+  int num1 = 2;
+  int num2 = 4;
+  int sum, sub, mult, mod;
+  float div;
+
+  /*
+   * Perform all arithmetic operations
+   */ 
+  sum = num1 + num2;
+  sub = num1 - num2;
+  mult = num1 * num2;
+  div = (float)num1 / num2;
+  mod = num1 % num2;
+
+  printf("%d", sum + sub + mult + div + mod);
+  return;
+}
diff --git a/test/aarch64/c/armstrong.c b/test/aarch64/c/armstrong.c
new file mode 100644
index 00000000..c5d838f9
--- /dev/null
+++ b/test/aarch64/c/armstrong.c
@@ -0,0 +1,21 @@
+int main()
+{
+  int n,sum,i,t,a,z;
+
+  for(i = 1; i <= 500; i++)
+  {
+      t = i;  // as we need to retain the original number
+      sum = 0;
+      while(t != 0)
+      {
+          a = t%10;
+          sum += a*a*a;
+          t = t/10;
+      }
+
+      if(sum == i)
+        z += i;
+  }
+
+  return 0;
+}
diff --git a/test/aarch64/c/array1.c b/test/aarch64/c/array1.c
new file mode 100644
index 00000000..5840ca66
--- /dev/null
+++ b/test/aarch64/c/array1.c
@@ -0,0 +1,64 @@
+/* array1.c -- Simple operations with arrays.
+ */
+
+#include <stdio.h>
+#define N 10
+
+void oneWay(void);
+void anotherWay(void);
+
+int main(void) {
+  printf("\noneWay:\n");
+  oneWay();
+  printf("\nantherWay:\n");
+  anotherWay();
+}
+
+/*Array initialized with aggregate */
+void oneWay(void) {
+  int vect[N] = {1,2,3,4,5,6,7,8,9,0};
+  int i;
+  
+  for (i=0; i<N; i++)
+    printf("i = %2d  vect[i] = %2d\n", i, vect[i]);
+} 
+
+/*Array initialized with loop */
+void anotherWay(void) {
+  int vect[N];
+  int i;
+  
+  for (i=0; i<N; i++)
+    vect[i] = i+1;
+
+  for (i=0; i<N; i++)
+    printf("i = %2d  vect[i] = %2d\n", i, vect[i]);
+} 
+
+/* The output of this program is
+
+oneWay:
+i =  0  vect[i] =  1
+i =  1  vect[i] =  2
+i =  2  vect[i] =  3
+i =  3  vect[i] =  4
+i =  4  vect[i] =  5
+i =  5  vect[i] =  6
+i =  6  vect[i] =  7
+i =  7  vect[i] =  8
+i =  8  vect[i] =  9
+i =  9  vect[i] =  0
+
+antherWay:
+i =  0  vect[i] =  1
+i =  1  vect[i] =  2
+i =  2  vect[i] =  3
+i =  3  vect[i] =  4
+i =  4  vect[i] =  5
+i =  5  vect[i] =  6
+i =  6  vect[i] =  7
+i =  7  vect[i] =  8
+i =  8  vect[i] =  9
+i =  9  vect[i] = 10
+
+ */
diff --git a/test/aarch64/c/array2.c b/test/aarch64/c/array2.c
new file mode 100644
index 00000000..389e1596
--- /dev/null
+++ b/test/aarch64/c/array2.c
@@ -0,0 +1,74 @@
+/* array2.c -- Read/writing/reversing integer arrays
+ */
+
+#include <stdio.h>
+
+#define NMAX 10
+
+void intSwap(int *x, int *y);
+int getIntArray(int a[], int nmax, int sentinel);
+void printIntArray(int a[], int n);
+void reverseIntArray(int a[], int n);
+
+int main(void) {
+  int x[NMAX];
+  int hmny;
+
+  hmny = getIntArray(x, NMAX, 0);
+  printf("The array was: \n");
+  printIntArray(x,hmny);
+  reverseIntArray(x,hmny);
+  printf("after reverse it is:\n");
+  printIntArray(x,hmny);
+}
+
+void intSwap(int *x, int *y)
+     /* It swaps the content of x and y */
+{
+  int temp = *x;
+  *x = *y;
+  *y = temp;
+}
+
+void printIntArray(int a[], int n)
+     /* n is the number of elements in the array a.
+      * These values are printed out, five per line. */
+{
+  int i;
+
+  for (i=0; i<n; ){
+    printf("\t%d ", a[i++]);
+    if (i%5==0)
+      printf("\n");
+  }
+  printf("\n");
+}
+
+int getIntArray(int a[], int nmax, int sentinel)
+     /* It reads up to nmax integers and stores then in a; sentinel 
+      * terminates input. */
+{
+  int n = 0;
+  int temp;
+
+  do {
+    printf("Enter integer [%d to terminate] : ", sentinel);
+    scanf("%d", &temp);
+    if (temp==sentinel) break;
+    if (n==nmax)
+      printf("array is full\n");
+    else 
+      a[n++] = temp;
+  }while (1);
+  return n;
+}
+
+void reverseIntArray(int a[], int n)
+     /* It reverse the order of the first n elements of a */
+{
+  int i;
+
+  for(i=0;i<n/2;i++){
+    intSwap(&a[i],&a[n-i-1]);
+  }
+}
diff --git a/test/aarch64/c/biggest_of_3_int.c b/test/aarch64/c/biggest_of_3_int.c
new file mode 100644
index 00000000..346908fc
--- /dev/null
+++ b/test/aarch64/c/biggest_of_3_int.c
@@ -0,0 +1,10 @@
+int main(int a, int b, int c) {
+  if ((a > b) && (a > c)) {
+    return a;
+  } else if (b > c) {
+    return b;
+  } else {
+    return c;
+  }
+  return 0;
+}
diff --git a/test/aarch64/c/bitwise1.c b/test/aarch64/c/bitwise1.c
new file mode 100644
index 00000000..d20641de
--- /dev/null
+++ b/test/aarch64/c/bitwise1.c
@@ -0,0 +1,8 @@
+int main()
+{
+  int x = 6, y = 4;
+  x = x^y;
+  y = x^876987^x | y << 42;
+
+	return ~x^y;
+}
diff --git a/test/aarch64/c/cpintarray.c b/test/aarch64/c/cpintarray.c
new file mode 100644
index 00000000..8049fdfb
--- /dev/null
+++ b/test/aarch64/c/cpintarray.c
@@ -0,0 +1,108 @@
+/* cpintarray.c -- Example showing how addresses and arrays are alike
+ */
+
+#include <stdio.h>
+#define SIZE 8
+
+void cpIntArray(int *a, int *b, int n)
+/*It copies n integers starting at b into a*/
+{
+  for(;n>0;n--)
+    *a++=*b++;
+}
+
+
+void printIntArray(int a[], int n)
+     /* n is the number of elements in the array a.
+      * These values are printed out, five per line. */
+{
+  int i;
+
+  for (i=0; i<n; ){
+    printf("\t%d ", a[i++]);
+    if (i%5==0)
+      printf("\n");
+  }
+  printf("\n");
+}
+
+int getIntArray(int a[], int nmax, int sentinel)
+     /* It reads up to nmax integers and stores then in a; sentinel 
+      * terminates input. */
+{
+  int n = 0;
+  int temp;
+
+  do {
+    printf("Enter integer [%d to terminate] : ", sentinel);
+    scanf("%d", &temp);
+    if (temp==sentinel) break;
+    if (n==nmax)
+      printf("array is full\n");
+    else 
+      a[n++] = temp;
+  }while (1);
+  return n;
+}
+
+int main(void){
+  int x[SIZE], nx;
+  int y[SIZE], ny;
+
+  printf("Read the x array:\n");
+  nx = getIntArray(x,SIZE,0);
+  printf("The x array is:\n");
+  printIntArray(x,nx);
+
+  printf("Read the y array:\n");
+  ny = getIntArray(y,SIZE,0);
+  printf("The y array is:\n");
+  printIntArray(y,ny);
+
+  cpIntArray(x+2,y+3,4);
+  /*Notice the expression 'x+2'. x is interpreted as the address for
+    the beginning of the x array. +2 sais to increment that address
+    by two units, in accordance with the type of x, which is
+    an integer array. Thus we move from x to two integer locations
+    past it, that is to the location of x[2]. The same reasoning applied
+    to 'y+3'.
+    */
+  printf("Printing x after having copied 4 elements\n"
+         "from y starting at y[3] into x starting at x[2]\n");
+  printIntArray(x,nx);
+}
+
+/* Here is the interaction in a run of this program:
+
+Read the x array:
+Enter integer [0 to terminate] : 1
+Enter integer [0 to terminate] : 3
+Enter integer [0 to terminate] : 5
+Enter integer [0 to terminate] : 7
+Enter integer [0 to terminate] : 9
+Enter integer [0 to terminate] : 11
+Enter integer [0 to terminate] : 13
+Enter integer [0 to terminate] : 15
+Enter integer [0 to terminate] : 0
+The x array is:
+	1 	3 	5 	7 	9 
+	11 	13 	15 
+Read the y array:
+Enter integer [0 to terminate] : 2
+Enter integer [0 to terminate] : 4
+Enter integer [0 to terminate] : 6
+Enter integer [0 to terminate] : 8
+Enter integer [0 to terminate] : 10
+Enter integer [0 to terminate] : 12
+Enter integer [0 to terminate] : 14
+Enter integer [0 to terminate] : 16
+Enter integer [0 to terminate] : 0
+The y array is:
+	2 	4 	6 	8 	10 
+	12 	14 	16 
+Printing x after having copied 4 elements
+from y starting at y[3] into x starting at x[2]
+	1 	3 	8 	10 	12 
+	14 	13 	15 
+
+ */
diff --git a/test/aarch64/c/enum1.c b/test/aarch64/c/enum1.c
new file mode 100644
index 00000000..d1f6b48d
--- /dev/null
+++ b/test/aarch64/c/enum1.c
@@ -0,0 +1,52 @@
+/* enum1.c  -- Starting to use enumerated types: Printing for each 
+ *             day of the week, today, yesterday, and tomorrow, both
+ *             as a string and as a number.
+ */
+
+#include <stdio.h>
+
+/* Introducing an enumerated data type */
+enum days {monday,tuesday,wednesday,thursday,friday,saturday,sunday};
+typedef enum days days; // This allows us to use "days" as an abbreviation 
+                        // for "enum days"
+
+/* Two useful functions */
+days yesterday(days today){
+  return (today+6)%7;
+}
+days tomorrow(days today){
+  return (today+1)%7;
+}
+
+// A useful array: thedays is an array of constant (i.e you cannot
+// modify them) pointers to constant (i.e. you cannot modify them) strings
+const char * const thedays[] = 
+                      {"monday", "tuesday", "wednesday", "thursday",
+		       "friday", "saturday", "sunday"};
+
+int main(void){
+  days today;
+
+  printf("today    \tyesterday  \ttomorrow\n"
+         "============================================\n");
+  for (today=monday;today<=sunday;today++)
+    printf("%s = %d \t %s = %d \t %s = %d\n", 
+	   thedays[today], today, 
+	   thedays[yesterday(today)], yesterday(today),
+	   thedays[tomorrow(today)], tomorrow(today));
+}
+
+/*
+ The output is:
+
+today    	yesterday  	tomorrow
+============================================
+monday = 0 	 sunday = 6 	 tuesday = 1
+tuesday = 1 	 monday = 0 	 wednesday = 2
+wednesday = 2 	 tuesday = 1 	 thursday = 3
+thursday = 3 	 wednesday = 2 	 friday = 4
+friday = 4 	 thursday = 3 	 saturday = 5
+saturday = 5 	 friday = 4 	 sunday = 6
+sunday = 6 	 saturday = 5 	 monday = 0
+
+*/
diff --git a/test/aarch64/c/enum2.c b/test/aarch64/c/enum2.c
new file mode 100644
index 00000000..a18acb80
--- /dev/null
+++ b/test/aarch64/c/enum2.c
@@ -0,0 +1,50 @@
+/* enum2.c  -- Starting to use enumerated types: Printing for each 
+ *             day of the week, today, yesterday, and tomorrow, both
+ *             as a string and as a number. We use typedef
+ */
+
+#include <stdio.h>
+
+/* Introducing an enumerated data type */
+typedef enum {monday,tuesday,wednesday,thursday,friday,saturday,sunday} days;
+
+/* Two useful functions */
+days yesterday(days today);
+days tomorrow(days today);
+
+char *thedays[] = {"monday", "tuesday", "wednesday", "thursday",
+		       "friday", "saturday", "sunday"};
+
+int main(void){
+  days today;
+
+  printf("today    \tyesterday  \ttomorrow\n"
+         "============================================\n");
+  for (today=monday;today<=sunday;today++)
+    printf("%s = %d \t %s = %d \t %s = %d\n", 
+	   thedays[today], today, 
+	   thedays[yesterday(today)], yesterday(today),
+	   thedays[tomorrow(today)], tomorrow(today));
+}
+
+days yesterday(days today){
+  return (today+6)%7;
+}
+days tomorrow(days today){
+  return (today+1)%7;
+}
+
+/*
+ The output is:
+
+today    	yesterday  	tomorrow
+============================================
+monday = 0 	 sunday = 6 	 tuesday = 1
+tuesday = 1 	 monday = 0 	 wednesday = 2
+wednesday = 2 	 tuesday = 1 	 thursday = 3
+thursday = 3 	 wednesday = 2 	 friday = 4
+friday = 4 	 thursday = 3 	 saturday = 5
+saturday = 5 	 friday = 4 	 sunday = 6
+sunday = 6 	 saturday = 5 	 monday = 0
+
+*/
diff --git a/test/aarch64/c/floop.c b/test/aarch64/c/floop.c
new file mode 100644
index 00000000..30270892
--- /dev/null
+++ b/test/aarch64/c/floop.c
@@ -0,0 +1,8 @@
+int main(int x)
+{
+  int y = 4;
+  int s = 23;
+  for(int i = 0; i <= x; i++)
+    y << s;
+  return y;
+}
diff --git a/test/aarch64/c/floor.c b/test/aarch64/c/floor.c
new file mode 100644
index 00000000..33a57af3
--- /dev/null
+++ b/test/aarch64/c/floor.c
@@ -0,0 +1,29 @@
+int main(int n)
+{
+  int x = 1, i;
+
+  /* for positive values */
+  if (n > 0)
+  {
+    for (; x <= n >> 1;)
+    {
+      x = x << 1;
+    }
+    n = x;
+  }
+  /* for negative values */
+  else
+  {
+    n = ~n;
+    n = n + 1;
+    for (; x <= n >> 1;)
+    {
+      x = x << 1;
+    }
+    x = x << 1;
+    x = ~x;
+    x = x + 1;
+    n = x;
+  }
+  return n;
+}
diff --git a/test/aarch64/c/funcs.c b/test/aarch64/c/funcs.c
new file mode 100644
index 00000000..49e610d6
--- /dev/null
+++ b/test/aarch64/c/funcs.c
@@ -0,0 +1,36 @@
+/* funcs.c  -- More examples of functions
+ */
+
+#include <stdio.h>
+
+int getint(void); /*It prompts user to enter an integer, which it returns*/
+
+int getmax(int a, int b, int c); /*It returns value of largest of a, b, c*/
+
+/* Main program: Using the various functions */
+int main (void) {
+  int x, y, z;
+
+  x = getint();
+  y = getint();
+  z = getint();
+  printf("The largest of %d, %d, and %d is %d\n", x, y, z, getmax(x,y,z));
+}
+
+int getint(void) {
+  int a;
+
+  printf("Please enter an integer > ");
+  scanf("%d", &a);
+  return(a);
+}
+
+int getmax(int a, int b, int c){
+  int m = a;
+
+  if (m<b)
+    m = b;
+  if (m<c)
+    m = c;
+  return(m);
+}
diff --git a/test/aarch64/c/hello.c b/test/aarch64/c/hello.c
new file mode 100644
index 00000000..0279269e
--- /dev/null
+++ b/test/aarch64/c/hello.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void) {
+  printf("Hello World!\n");
+  return 0; 
+}
diff --git a/test/aarch64/c/if.c b/test/aarch64/c/if.c
new file mode 100644
index 00000000..7d2e249a
--- /dev/null
+++ b/test/aarch64/c/if.c
@@ -0,0 +1,7 @@
+int main(int x)
+{
+  if (x > 27)
+    return 11;
+  else x--;
+  return x;
+}
diff --git a/test/aarch64/c/msb_pos.c b/test/aarch64/c/msb_pos.c
new file mode 100644
index 00000000..f2e7fe09
--- /dev/null
+++ b/test/aarch64/c/msb_pos.c
@@ -0,0 +1,20 @@
+/* Function to find the MSB bit position */
+int main(int n)
+{
+  int i = 0, bit;
+  while (i < 32)
+  {
+     bit = n & 0x80000000;
+     if (bit == -0x80000000)
+     {
+        bit = 1;
+     }
+
+     if (bit == 1) 
+       break;
+
+      n = n << 1;
+      i++;
+  }
+  return i;
+}
diff --git a/test/aarch64/c/power2.c b/test/aarch64/c/power2.c
new file mode 100644
index 00000000..64707df9
--- /dev/null
+++ b/test/aarch64/c/power2.c
@@ -0,0 +1,42 @@
+/* power2.c -- Print out powers of 2: 1, 2, 4, 8, .. up to 2^N
+ */
+
+#include <stdio.h>
+#define N 16
+
+int main(void) {
+  int n;           /* The current exponent */
+  int val = 1;     /* The current power of 2  */
+
+  printf("\t  n  \t    2^n\n");
+  printf("\t================\n");
+  for (n=0; n<=N; n++) {
+    printf("\t%3d \t %6d\n", n, val); 
+    val = 2*val;
+  }
+  return 0;
+}
+
+/* It prints out :
+
+	  n  	    2^n
+	================
+	  0 	      1
+	  1 	      2
+	  2 	      4
+	  3 	      8
+	  4 	     16
+	  5 	     32
+	  6 	     64
+	  7 	    128
+	  8 	    256
+	  9 	    512
+	 10 	   1024
+	 11 	   2048
+	 12 	   4096
+	 13 	   8192
+	 14 	  16384
+	 15 	  32768
+	 16 	  65536
+
+*/
diff --git a/test/aarch64/c/prime.c b/test/aarch64/c/prime.c
new file mode 100644
index 00000000..6c51db32
--- /dev/null
+++ b/test/aarch64/c/prime.c
@@ -0,0 +1,23 @@
+/* prime1.c  It prompts the user to enter an integer N. It prints out
+ *           if it is a prime or not. If not, it prints out a factor of N.
+ */
+
+#include <stdio.h>
+
+int main(int n) {
+  int i;
+  int flag;
+
+  flag = 1;
+  for (i=2; (i<(n/2)) && flag; ) { /* May be we do not need to test
+			values of i greater than the square root of n? */
+    if ((n % i) == 0) /* If true n is divisible by i */
+      flag = 0;
+    else
+      i++;
+  }
+ 
+  if (flag)
+    return 1;
+  return 0;
+}
diff --git a/test/aarch64/c/random.c b/test/aarch64/c/random.c
new file mode 100644
index 00000000..50aa5737
--- /dev/null
+++ b/test/aarch64/c/random.c
@@ -0,0 +1,50 @@
+/* Generating random number sequences using the formula (linear congruence)
+	x[k+1] = (a*x[k] + c)mod m
+   where a, c, and m are parameters set by the user and passed as command line 
+   parameters together with a seed i.e. x[0]
+   As a simple example try  a=7, c=1, m=13, and seed=5
+   A more sophisticated selection would be a=69069, c=0, 
+   m=2^32=4294967296, and seed=31
+   It will print out, in a sort of random order, up to m-1 distinct values. 
+   Then it loops.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+static long seed = 13;
+static long a;
+static long c;
+static long m;
+
+void random_init(long s) {
+    if (s != 0) seed = s;
+}
+
+long random() {
+	seed = (a*seed + c)%m;
+	return seed;
+    }
+
+int main(int argc, char * argv[]) {
+    if (argc != 5) {
+	printf("usage: %s a, c, m, seed\n", argv[0]);
+	return 1;
+    }
+    a = atoi(argv[1]);
+    c = atoi(argv[2]);
+    m = atoi(argv[3]);
+    long s = atoi(argv[4]);
+    random_init(s);
+    int k;
+    for (k = 0; k < m-1; k++) { 
+	printf("%8ld", random());
+	if (k % 8 == 7) { // after 8 elements go to a new line
+	    printf("\n");
+	    sleep(1); // sleep for a second
+	} 
+    }
+    printf("\n");
+    return 0;
+}
diff --git a/test/aarch64/c/simple_op.c b/test/aarch64/c/simple_op.c
new file mode 100644
index 00000000..7c43b081
--- /dev/null
+++ b/test/aarch64/c/simple_op.c
@@ -0,0 +1,8 @@
+int main(int argc, char ** argv)
+{
+  int n, m;
+  n = n + 1;
+  n = n * 7;
+  n / (8 - 2);
+  return n;
+}
diff --git a/test/aarch64/c/wloop.c b/test/aarch64/c/wloop.c
new file mode 100644
index 00000000..5ba67419
--- /dev/null
+++ b/test/aarch64/c/wloop.c
@@ -0,0 +1,8 @@
+int main(int x)
+{
+  int y = 4;
+  int s = 23;
+  while(s < x)
+    y << s;
+  return y;
+}
diff --git a/test/aarch64/gen_tests/asmb_aarch64_gen_test.sh b/test/aarch64/gen_tests/asmb_aarch64_gen_test.sh
new file mode 100755
index 00000000..38235f14
--- /dev/null
+++ b/test/aarch64/gen_tests/asmb_aarch64_gen_test.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+
+CLEAN=0
+WOFF=0
+while getopts ':cw' 'OPTKEY'; do
+  case ${OPTKEY} in
+    c) CLEAN=1;;
+    w) WOFF=1;;
+  esac
+done
+
+DIRS=(
+  ../c/*.c       # Special simple tests
+  #../../c/*.c 
+  ../../clightgen/*.c
+  #../../compression/*.c
+  ../../cse2/*.c
+
+  # Monniaux test directory
+  ../../monniaux/binary_search/*.c
+  ../../monniaux/complex/*.c
+  #../../monniaux/crypto-algorithms/*.c                    # Warnings
+  ../../monniaux/cse2/*.c
+  #../../monniaux/des/*.c                                  # Unsupported feature?
+  ../../monniaux/expect/*.c
+  ../../monniaux/fill_buffer/*.c
+  ../../monniaux/genann/*.c
+  #../../monniaux/heptagon_radio_transmitter/*.c           # Warnings
+  ../../monniaux/idea/*.c
+  ../../monniaux/jumptable/*.c
+  ../../monniaux/licm/*.c
+  ../../monniaux/longjmp/*.c
+  ../../monniaux/loop/*.c
+  ../../monniaux/lustrev4_lustrec_heater_control/*.c
+  ../../monniaux/lustrev4_lv4_heater_control/*.c
+  ../../monniaux/lustrev4_lv6-en-2cgc_heater_control/*.c
+  #../../monniaux/lustrev6-carlightV2/*.c                  # Warnings
+  #../../monniaux/lustrev6-convertible-2cgc/*.c            # Unsupported feature?
+  #../../monniaux/lustrev6-convertible-en-2cgc/*.c
+  #../../monniaux/lustrev6-convertible/*.c                 # Warnings
+  ../../monniaux/madd/*.c
+  #../../monniaux/math/*.c                                 # Unsupported feature?
+  ../../monniaux/memcpy/*.c
+  #../../monniaux/micro-bunzip/*.c                         # Warnings
+  ../../monniaux/moves/*.c
+  ../../monniaux/multithreaded_volatile/*.c
+  ../../monniaux/nand/*.c
+  #../../monniaux/ncompress/*.c                            # Warnings
+  ../../monniaux/number_theoretic_transform/*.c
+  ../../monniaux/predicated/*.c
+  ../../monniaux/regalloc/*.c
+  ../../monniaux/rotate/*.c
+  ../../monniaux/scheduling/*.c
+  ../../monniaux/send_through/*.c
+  ../../monniaux/tiny-AES-c/*.c
+  ../../monniaux/varargs/*.c
+  ../../monniaux/xor_and_mat/*.c
+  #../../monniaux/zlib-1.2.11/*.c                          # Warnings
+)
+#FILES=../c/*.c
+CCOMP_BBLOCKS="../../../ccomp -fno-postpass"
+CCOMP_REF="../../../../CompCert_kvx/ccomp"
+COUNT=0
+
+if [ $WOFF -eq 1 ]
+then
+  CCOMP_BBLOCKS="${CCOMP_BBLOCKS} -w"
+  CCOMP_REF="${CCOMP_REF} -w"
+fi
+
+for files in ${DIRS[@]}
+do
+  for f in $files
+  do
+    BNAME=$(basename -s .c $f)
+    SNAME="$BNAME".s
+    SREFNAME="$BNAME"_ref.s
+    ./$CCOMP_BBLOCKS -S $f -o $SNAME
+    ./$CCOMP_REF -dmach -S $f -o $SREFNAME
+    #diff -I '^//*' <(cut -c-5 $SNAME) <(cut -c-5 $SREFNAME) > /dev/null 2>&1 
+    diff -I '^//*' $SNAME $SREFNAME > /dev/null 2>&1 
+
+    error=$?
+    if [ $error -eq 0 ]
+    then
+      echo "[$BNAME] OK"
+      COUNT=$((COUNT + 1))
+    elif [ $error -eq 1 ]
+    then
+      echo "[$BNAME] FAIL"
+      diff -I '^//*' -y $SNAME $SREFNAME
+      exit 1
+    else
+      echo "[$BNAME] FAIL"
+      echo "[WARNING] There was something wrong with the diff command !"
+      exit 1
+    fi
+  done
+done
+
+echo "[TOTAL] $COUNT tests PASSED"
+
+if [ $CLEAN -eq 1 ]
+then
+  rm *.s *.mach
+fi
diff --git a/test/aarch64/postpass_tests/postpass_exec_c_test.sh b/test/aarch64/postpass_tests/postpass_exec_c_test.sh
new file mode 100755
index 00000000..73422990
--- /dev/null
+++ b/test/aarch64/postpass_tests/postpass_exec_c_test.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+CLEAN=0
+WOFF=0
+SRC=""
+while getopts ':cwi:' 'OPTKEY'; do
+  case ${OPTKEY} in
+    c) CLEAN=1;;
+    w) WOFF=1;;
+    i) SRC=${OPTARG};;
+  esac
+done
+
+CCOMP="../../../ccomp -static"
+
+if [ $WOFF -eq 1 ]
+then
+  CCOMP="${CCOMP} -w"
+fi
+
+BNAME=$(basename -s .c $SRC)
+SNAME="$BNAME".s
+SREFNAME="$BNAME"_ref.s
+ENAME="$BNAME"
+EREFNAME="$BNAME"_ref
+./$CCOMP -S $SRC -o $SNAME
+./$CCOMP -fno-postpass -S $SRC -o $SREFNAME
+./$CCOMP $SRC -o $ENAME
+./$CCOMP -fno-postpass $SRC -o $EREFNAME
+
+#diff -I '^//*' -y $SNAME $SREFNAME
+
+if [ $CLEAN -eq 1 ]
+then
+  rm $SNAME $SREFNAME $ENAME $EREFNAME
+fi
diff --git a/test/c/Makefile b/test/c/Makefile
index 726631d2..a728d182 100644
--- a/test/c/Makefile
+++ b/test/c/Makefile
@@ -1,6 +1,8 @@
 include ../../Makefile.config
 
 CCOMP=../../ccomp
+# TODO - temporary
+# CCOMPOPTS:=$(CCOMPOPTS) -fall-loads-nontrap -fduplicate 2 -fprepass
 CCOMPFLAGS=$(CCOMPOPTS) -stdlib ../../runtime -dc -dclight -dasm
 
 CFLAGS+=-O2 -Wall
diff --git a/test/clightgen/Makefile b/test/clightgen/Makefile
index 0607e2fa..f0e9d961 100644
--- a/test/clightgen/Makefile
+++ b/test/clightgen/Makefile
@@ -5,8 +5,12 @@ ARCHDIRS=$(ARCH)
 else
 ARCHDIRS=$(ARCH)_$(BITSIZE) $(ARCH)
 endif
-RECDIRS=lib common $(ARCHDIRS) cfrontend flocq  exportclight
-COQINCLUDES=$(foreach d, $(RECDIRS), -R ../../$(d) compcert.$(d))
+RECDIRS := lib common $(ARCHDIRS) cfrontend  exportclight
+COQINCLUDES := $(foreach d, $(RECDIRS), -R ../../$(d) compcert.$(d))
+ifeq ($(LIBRARY_FLOCQ),local)
+COQINCLUDES += -R ../../flocq Flocq
+endif
+
 
 CLIGHTGEN=../../clightgen
 COQC=coqc
@@ -22,6 +26,14 @@ SRC+=aes.c almabench.c binarytrees.c bisect.c chomp.c fannkuch.c fft.c \
 SRC+=arrays.c eval.c gmllexer.c gmlparser.c intersect.c light.c main.c \
   matrix.c memory.c object.c render.c simplify.c surface.c vector.c
 
+CFLAGS=-DSYSTEM_$(SYSTEM)
+
+aes.vo almabench.vo binarytrees.vo bisect.vo chomp.vo: CFLAGS += -short-idents
+
+fft.vo fftsp.vo fftw.vo fib.vo integr.vo knucleotide.vo: CFLAGS += -short-idents -normalize
+
+qsort.vo sha1.vo sha3.vo siphash24.vo spectral.vo vmach.vo: CFLAGS += -normalize
+
 all: $(SRC:.c=.vo)
 
 test:
@@ -41,4 +53,4 @@ test:
 .SECONDARY: $(SRC:.c=.v)
 
 clean:
-	rm -f *.v *.vo .*.aux
+	rm -f *.v *.vo* .*.aux
diff --git a/test/clightgen/annotations.c b/test/clightgen/annotations.c
new file mode 100644
index 00000000..e91c7fbc
--- /dev/null
+++ b/test/clightgen/annotations.c
@@ -0,0 +1,8 @@
+int f(int x, long y)
+{
+#if !defined(SYSTEM_macosx) && !defined(SYSTEM_cygwin)
+  __builtin_ais_annot("x is %e1, y is %e2", x, y);
+#endif
+  __builtin_annot("x is %1, y is %2", x, y);
+  return __builtin_annot_intval("x was here: %1", x);
+}
diff --git a/test/gourdinl/clause.h b/test/gourdinl/clause.h
new file mode 100644
index 00000000..3eb44402
--- /dev/null
+++ b/test/gourdinl/clause.h
@@ -0,0 +1,12 @@
+typedef struct {
+  int b;
+  int a;
+} * CLAUSE;
+__inline__ int g(CLAUSE c) { return c->b; }
+__inline__ int d(CLAUSE c) { return c->a; }
+__inline__ void clause_SetNumOfConsLits(CLAUSE c, int e) {
+  c->b = e;
+  c->a = e;
+}
+__inline__ int f(CLAUSE c) { return g(c) + d(c); }
+__inline__ int clause_LastLitIndex(c) { return f(c); }
diff --git a/test/gourdinl/clause2.c b/test/gourdinl/clause2.c
new file mode 100644
index 00000000..42cd0fa6
--- /dev/null
+++ b/test/gourdinl/clause2.c
@@ -0,0 +1,23 @@
+#include "clause.h"
+int a, b;
+void c();
+void h() {
+  int f = clause_LastLitIndex(d);
+  a = clause_LastLitIndex(0);
+  if (f)
+    if (a)
+      1;
+}
+void i() {
+  CLAUSE e = 0;
+  int *g[] = {h, c};
+  for (; b;)
+    l(e);
+}
+void m() {
+  int k, j;
+  for (; k <= 0;)
+    ;
+  clause_SetNumOfConsLits(0, j);
+  n(0 - j);
+}
diff --git a/test/gourdinl/cond_exp_mini_cse.c b/test/gourdinl/cond_exp_mini_cse.c
new file mode 100644
index 00000000..3a2ce9c3
--- /dev/null
+++ b/test/gourdinl/cond_exp_mini_cse.c
@@ -0,0 +1,6 @@
+int main(int x, int y, int* t) {
+  if (x + *t < 7)
+    if (y < 7)
+      return 421;
+  return 0;
+}
diff --git a/test/gourdinl/cscript.sh b/test/gourdinl/cscript.sh
new file mode 100755
index 00000000..8bf3a613
--- /dev/null
+++ b/test/gourdinl/cscript.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+/home/yuki/Work/VERIMAG/Compcert_neutral/ccomp -stdlib ../../runtime -dparse -dclight -S -fstruct-return   -c clause2.c > log 2>&1
+
+b1=$(cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr64")
+sb1=$?
+b2=$(cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32")
+sb2=$?
+b3=$(cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str32")
+sb3=$?
+b4=$(cat log | ack "STP_CONSEC_PEEP_IMM_INC_str64")
+sb4=$?
+
+#if [ "$sb1" == 0 ] && [ "$sb2" == 0 ] && [ "$sb3" == 0 ] && [ "$sb4" == 0 ]
+if [ "$sb1" == 0 ] && [ "$sb2" == 0 ] && [ "$sb3" == 0 ] && [ "$sb4" == 0 ]
+then
+  exit 0
+else
+  exit 1
+fi
diff --git a/test/gourdinl/fp_init.c b/test/gourdinl/fp_init.c
new file mode 100644
index 00000000..1d835994
--- /dev/null
+++ b/test/gourdinl/fp_init.c
@@ -0,0 +1,7 @@
+int main (float *x) {
+  double a = 1.0;
+  float b = 1.0f;
+  printf("%f", a);
+  *x = b;
+  return b;
+}
diff --git a/test/gourdinl/gen_asm_files.sh b/test/gourdinl/gen_asm_files.sh
new file mode 100755
index 00000000..08cd4b3d
--- /dev/null
+++ b/test/gourdinl/gen_asm_files.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+../../ccomp -S clause2.c -o clause2.nopostpass.noph.s -fno-coalesce-mem -fno-postpass
+../../ccomp -S clause2.c -o clause2.nopostpass.ph.s -fcoalesce-mem -fno-postpass
+../../ccomp -S clause2.c -o clause2.noph.s -fno-coalesce-mem
+../../ccomp -S clause2.c -o clause2.ph.s -fcoalesce-mem
diff --git a/test/kvx/instr/Makefile b/test/kvx/instr/Makefile
index e4f964b3..fce32178 100644
--- a/test/kvx/instr/Makefile
+++ b/test/kvx/instr/Makefile
@@ -1,15 +1,15 @@
 SHELL := /bin/bash
 
-KVXC ?= k1-cos-gcc
+KVXC ?= kvx-elf-gcc
 CC ?= gcc
 CCOMP ?= ccomp
 OPTIM ?= -O2
 CFLAGS ?= $(OPTIM)
 CCOMPFLAGS ?= $(CFLAGS)
-SIMU ?= k1-mppa
+SIMU ?= kvx-mppa
 TIMEOUT ?= --signal=SIGTERM 120s
 DIFF ?= python2.7 floatcmp.py -reltol .00001
-HARDRUN ?= k1-jtag-runner
+HARDRUN ?= kvx-jtag-runner
 
 DIR=./
 SRCDIR=$(DIR)
@@ -64,7 +64,7 @@ simutest: $(X86_GCC_OUT) $(GCC_SIMUOUT)
 		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
 		gccout=$(OUTDIR)/$$test.gcc.simu.out;\
 		if grep "__KVX__" -q $$test.c; then\
-			printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __KVX__\`\n";\
+			printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __KVX__\`\n$(NC)";\
 		elif $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
 			>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
 		else\
@@ -92,7 +92,7 @@ hardtest: $(X86_GCC_OUT) $(GCC_HARDOUT)
 		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
 		gccout=$(OUTDIR)/$$test.gcc.hard.out;\
 		if grep "__KVX__" -q $$test.c; then\
-			printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __KVX__\`\n";\
+			printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __KVX__\`\n$(NC)";\
 		elif $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
 			>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
 		else\
diff --git a/test/kvx/interop/Makefile b/test/kvx/interop/Makefile
index a0d4d7da..aa018aac 100644
--- a/test/kvx/interop/Makefile
+++ b/test/kvx/interop/Makefile
@@ -1,12 +1,12 @@
 SHELL := /bin/bash
 
-KVXC ?= k1-cos-gcc
+KVXC ?= kvx-elf-gcc
 CC ?= gcc
 CCOMP ?= ccomp
 CFLAGS ?= -O2 -Wno-varargs
-SIMU ?= k1-mppa
+SIMU ?= kvx-mppa
 TIMEOUT ?= --signal=SIGTERM 120s
-HARDRUN ?= k1-jtag-runner
+HARDRUN ?= kvx-jtag-runner
 
 DIR=./
 SRCDIR=$(DIR)
diff --git a/test/kvx/lib/Makefile b/test/kvx/lib/Makefile
index 5a947bb3..7df7dd16 100644
--- a/test/kvx/lib/Makefile
+++ b/test/kvx/lib/Makefile
@@ -1,10 +1,10 @@
-KVXC ?= k1-cos-gcc
-K1AR ?= k1-cos-ar
+KVXC ?= kvx-elf-gcc
+K1AR ?= kvx-elf-ar
 CC ?= gcc
 AR ?= gcc-ar
 CCOMP ?= ccomp
 CFLAGS ?= -O1 -Wl,--wrap=printf
-SIMU ?= k1-mppa
+SIMU ?= kvx-mppa
 TIMEOUT ?= --signal=SIGTERM 60s
 
 DIR=./
diff --git a/test/kvx/mmult/Makefile b/test/kvx/mmult/Makefile
index e7cd890e..252f8911 100644
--- a/test/kvx/mmult/Makefile
+++ b/test/kvx/mmult/Makefile
@@ -1,8 +1,8 @@
-KVXC ?= k1-cos-gcc
+KVXC ?= kvx-elf-gcc
 CC ?= gcc
 CCOMP ?= ccomp
 CFLAGS ?= -O2
-SIMU ?= k1-mppa
+SIMU ?= kvx-mppa
 TIMEOUT ?= 10s
 
 KVXCPATH=$(shell which $(KVXC))
@@ -65,3 +65,7 @@ check: $(CCOMP_OUT) $(STUB_OUT)
 	else\
 		echo "GOOD kvx: $< succeeded";\
 	fi
+
+.PHONY:
+clean:
+	rm -f *.out mmult-test-ccomp-kvx mmult-test-gcc-kvx mmult-test-gcc-x86
diff --git a/test/kvx/prng/Makefile b/test/kvx/prng/Makefile
index 68e5ffc9..b97f4aa4 100644
--- a/test/kvx/prng/Makefile
+++ b/test/kvx/prng/Makefile
@@ -1,8 +1,8 @@
-KVXC ?= k1-cos-gcc
+KVXC ?= kvx-elf-gcc
 CC ?= gcc
 CCOMP ?= ccomp
 CFLAGS ?= -O2
-SIMU ?= k1-mppa
+SIMU ?= kvx-mppa
 TIMEOUT ?= 10s
 
 KVXCPATH=$(shell which $(KVXC))
@@ -67,3 +67,4 @@ check: $(CCOMP_OUT) $(STUB_OUT)
 .PHONY:
 clean:
 	rm -f prng-test-gcc-x86 prng-test-gcc-kvx prng-test-ccomp-kvx
+	rm -f *.out
diff --git a/test/kvx/sort/Makefile b/test/kvx/sort/Makefile
index c4090352..46a8f025 100644
--- a/test/kvx/sort/Makefile
+++ b/test/kvx/sort/Makefile
@@ -1,9 +1,9 @@
-KVXC ?= k1-cos-gcc
+KVXC ?= kvx-elf-gcc
 CC ?= gcc
 CCOMP ?= ccomp
 CFLAGS ?= -O2
-SIMU ?= k1-mppa
-TIMEOUT ?= 10s
+SIMU ?= kvx-mppa
+TIMEOUT ?= 20s
 
 KVXCPATH=$(shell which $(KVXC))
 CCPATH=$(shell which $(CC))
@@ -89,3 +89,10 @@ check: $(STUB_OUT) $(CCOMP_OUT)
 			echo "GOOD kvx: $$test succeeded";\
 		fi;\
 	done
+
+.PHONY:
+clean:
+	for test in insertion main merge selection; do\
+		rm -f $$test-ccomp-kvx $$test-gcc-kvx $$test-gcc-x86;\
+	done
+	rm -f *.out
diff --git a/test/monniaux/cmov/cmov.c b/test/monniaux/cmov/cmov.c
new file mode 100644
index 00000000..2e388834
--- /dev/null
+++ b/test/monniaux/cmov/cmov.c
@@ -0,0 +1,22 @@
+#include <stdio.h>
+
+long cmovl(int x, long y, long z) {
+  return __builtin_sel(x, y, z);
+}
+
+int cmovi(int x, int y, int z) {
+  return __builtin_sel(x, y, z);
+}
+
+double cmovd(int x, double y, double z) {
+  return __builtin_sel(x, y, z);
+}
+
+int main() {
+  printf("%ld\n", cmovl(1, 42, 65));
+  printf("%ld\n", cmovl(0, 42, 65));
+  printf("%d\n", cmovi(1, 42, 65));
+  printf("%d\n", cmovi(0, 42, 65));
+  printf("%f\n", cmovd(1, 42., 65.));
+  printf("%f\n", cmovd(0, 42., 65.));
+}
diff --git a/test/monniaux/cmov/cmov2.c b/test/monniaux/cmov/cmov2.c
new file mode 100644
index 00000000..6ecab61b
--- /dev/null
+++ b/test/monniaux/cmov/cmov2.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+
+long cmovl(int x, long y, long z) {
+  return x ? y : z;
+}
+
+int cmovi(int x, int y, int z) {
+  return x ? y : z;
+}
+
+double cmovd(int x, double y, double z) {
+  return x ? y : z;
+}
+
+float cmovf(int x, float y, float z) {
+  return x ? y : z;
+}
+
+int main() {
+  printf("%ld\n", cmovl(1, 42, 65));
+  printf("%ld\n", cmovl(0, 42, 65));
+  printf("%d\n", cmovi(1, 42, 65));
+  printf("%d\n", cmovi(0, 42, 65));
+  printf("%f\n", cmovd(1, 42., 65.));
+  printf("%f\n", cmovd(0, 42., 65.));
+  printf("%f\n", cmovf(1, 42.f, 65.f));
+  printf("%f\n", cmovf(0, 42.f, 65.f));
+}
diff --git a/test/monniaux/if/if2.c b/test/monniaux/if/if2.c
new file mode 100644
index 00000000..2a6d5507
--- /dev/null
+++ b/test/monniaux/if/if2.c
@@ -0,0 +1,11 @@
+int toto(int x) {
+  if (2*x+1 >= 3) {
+    if (2*x+1 >= 3) {
+      return 3;
+    } else {
+      return 2;
+    }
+  } else {
+    return 1;
+  }
+}
diff --git a/test/monniaux/loop_nest/polybench.h b/test/monniaux/loop_nest/polybench.h
new file mode 100644
index 00000000..7d092e45
--- /dev/null
+++ b/test/monniaux/loop_nest/polybench.h
@@ -0,0 +1,202 @@
+/**
+ * polybench.h: This file is part of the PolyBench/C 3.2 test suite.
+ *
+ *
+ * Contact: Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
+ * Web address: http://polybench.sourceforge.net
+ */
+/*
+ * Polybench header for instrumentation.
+ *
+ * Programs must be compiled with `-I utilities utilities/polybench.c'
+ *
+ * Optionally, one can define:
+ *
+ * -DPOLYBENCH_TIME, to report the execution time,
+ *   OR (exclusive):
+ * -DPOLYBENCH_PAPI, to use PAPI H/W counters (defined in polybench.c)
+ *
+ *
+ * See README or utilities/polybench.c for additional options.
+ *
+ */
+#ifndef POLYBENCH_H
+# define POLYBENCH_H
+
+# include <stdlib.h>
+
+/* Array padding. By default, none is used. */
+# ifndef POLYBENCH_PADDING_FACTOR
+/* default: */
+#  define POLYBENCH_PADDING_FACTOR 0
+# endif
+
+
+/* C99 arrays in function prototype. By default, do not use. */
+# ifdef POLYBENCH_USE_C99_PROTO
+#  define POLYBENCH_C99_SELECT(x,y) y
+# else
+/* default: */
+#  define POLYBENCH_C99_SELECT(x,y) x
+# endif
+
+
+/* Scalar loop bounds in SCoPs. By default, use parametric loop bounds. */
+# ifdef POLYBENCH_USE_SCALAR_LB
+#  define POLYBENCH_LOOP_BOUND(x,y) x
+# else
+/* default: */
+#  define POLYBENCH_LOOP_BOUND(x,y) y
+# endif
+
+
+/* Macros to reference an array. Generic for heap and stack arrays
+   (C99).  Each array dimensionality has his own macro, to be used at
+   declaration or as a function argument.
+   Example:
+   int b[x] => POLYBENCH_1D_ARRAY(b, x)
+   int A[N][N] => POLYBENCH_2D_ARRAY(A, N, N)
+*/
+# ifndef POLYBENCH_STACK_ARRAYS
+#  define POLYBENCH_ARRAY(x) *x
+#  define POLYBENCH_FREE_ARRAY(x) free((void*)x);
+#  define POLYBENCH_DECL_VAR(x) (*x)
+# else
+#  define POLYBENCH_ARRAY(x) x
+#  define POLYBENCH_FREE_ARRAY(x)
+#  define POLYBENCH_DECL_VAR(x) x
+# endif
+/* Macros for using arrays in the function prototypes. */
+# define POLYBENCH_1D(var, dim1,ddim1) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR]
+# define POLYBENCH_2D(var, dim1, dim2, ddim1, ddim2) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR]
+# define POLYBENCH_3D(var, dim1, dim2, dim3, ddim1, ddim2, ddim3) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR]
+# define POLYBENCH_4D(var, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR]
+# define POLYBENCH_5D(var, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) var[POLYBENCH_C99_SELECT(dim1,ddim1) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim2,ddim2) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim3,ddim3) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim4,ddim4) + POLYBENCH_PADDING_FACTOR][POLYBENCH_C99_SELECT(dim5,ddim5) + POLYBENCH_PADDING_FACTOR]
+
+
+/* Macros to allocate heap arrays.
+   Example:
+   polybench_alloc_2d_array(N, M, double) => allocates N x M x sizeof(double)
+					  and returns a pointer to the 2d array
+ */
+# define POLYBENCH_ALLOC_1D_ARRAY(n1, type)	\
+  (type(*)[n1 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data (n1 + POLYBENCH_PADDING_FACTOR, sizeof(type))
+# define POLYBENCH_ALLOC_2D_ARRAY(n1, n2, type)		\
+  (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR), sizeof(type))
+# define POLYBENCH_ALLOC_3D_ARRAY(n1, n2, n3, type)		\
+  (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR), sizeof(type))
+# define POLYBENCH_ALLOC_4D_ARRAY(n1, n2, n3, n4, type)			\
+  (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR), sizeof(type))
+# define POLYBENCH_ALLOC_5D_ARRAY(n1, n2, n3, n4, n5, type)		\
+  (type(*)[n1 + POLYBENCH_PADDING_FACTOR][n2 + POLYBENCH_PADDING_FACTOR][n3 + POLYBENCH_PADDING_FACTOR][n4 + POLYBENCH_PADDING_FACTOR][n5 + POLYBENCH_PADDING_FACTOR])polybench_alloc_data ((n1 + POLYBENCH_PADDING_FACTOR) * (n2 + POLYBENCH_PADDING_FACTOR) * (n3 + POLYBENCH_PADDING_FACTOR) * (n4 + POLYBENCH_PADDING_FACTOR) * (n5 + POLYBENCH_PADDING_FACTOR), sizeof(type))
+
+/* Macros for array declaration. */
+# ifndef POLYBENCH_STACK_ARRAYS
+#  define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1)		\
+  type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1); \
+  var = POLYBENCH_ALLOC_1D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), type);
+#  define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2)	\
+  type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2); \
+  var = POLYBENCH_ALLOC_2D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), type);
+#  define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \
+  type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3); \
+  var = POLYBENCH_ALLOC_3D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), type);
+#  define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \
+  type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, ,dim3, dim4, ddim1, ddim2, ddim3, ddim4); \
+  var = POLYBENCH_ALLOC_4D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), type);
+#  define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \
+  type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5); \
+  var = POLYBENCH_ALLOC_5D_ARRAY(POLYBENCH_C99_SELECT(dim1, ddim1), POLYBENCH_C99_SELECT(dim2, ddim2), POLYBENCH_C99_SELECT(dim3, ddim3), POLYBENCH_C99_SELECT(dim4, ddim4), POLYBENCH_C99_SELECT(dim5, ddim5), type);
+# else
+#  define POLYBENCH_1D_ARRAY_DECL(var, type, dim1, ddim1)		\
+  type POLYBENCH_1D(POLYBENCH_DECL_VAR(var), dim1, ddim1);
+#  define POLYBENCH_2D_ARRAY_DECL(var, type, dim1, dim2, ddim1, ddim2)	\
+  type POLYBENCH_2D(POLYBENCH_DECL_VAR(var), dim1, dim2, ddim1, ddim2);
+#  define POLYBENCH_3D_ARRAY_DECL(var, type, dim1, dim2, dim3, ddim1, ddim2, ddim3) \
+  type POLYBENCH_3D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, ddim1, ddim2, ddim3);
+#  define POLYBENCH_4D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4) \
+  type POLYBENCH_4D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, ddim1, ddim2, ddim3, ddim4);
+#  define POLYBENCH_5D_ARRAY_DECL(var, type, dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5) \
+  type POLYBENCH_5D(POLYBENCH_DECL_VAR(var), dim1, dim2, dim3, dim4, dim5, ddim1, ddim2, ddim3, ddim4, ddim5);
+# endif
+
+
+/* Dead-code elimination macros. Use argc/argv for the run-time check. */
+# ifndef POLYBENCH_DUMP_ARRAYS
+#  define POLYBENCH_DCE_ONLY_CODE    if (argc > 42 && ! strcmp(argv[0], ""))
+# else
+#  define POLYBENCH_DCE_ONLY_CODE
+# endif
+
+# define polybench_prevent_dce(func)		\
+  POLYBENCH_DCE_ONLY_CODE			\
+  func
+
+
+/* Performance-related instrumentation. See polybench.c */
+# define polybench_start_instruments
+# define polybench_stop_instruments
+# define polybench_print_instruments
+
+
+/* PAPI support. */
+# ifdef POLYBENCH_PAPI
+extern const unsigned int polybench_papi_eventlist[];
+#  undef polybench_start_instruments
+#  undef polybench_stop_instruments
+#  undef polybench_print_instruments
+#  define polybench_set_papi_thread_report(x)	\
+   polybench_papi_counters_threadid = x;
+#  define polybench_start_instruments				\
+  polybench_prepare_instruments();				\
+  polybench_papi_init();					\
+  int evid;							\
+  for (evid = 0; polybench_papi_eventlist[evid] != 0; evid++)	\
+    {								\
+      if (polybench_papi_start_counter(evid))			\
+	continue;						\
+
+#  define polybench_stop_instruments		\
+      polybench_papi_stop_counter(evid);	\
+    }						\
+  polybench_papi_close();			\
+
+#  define polybench_print_instruments polybench_papi_print();
+# endif
+
+
+/* Timing support. */
+# if defined(POLYBENCH_TIME) || defined(POLYBENCH_GFLOPS)
+#  undef polybench_start_instruments
+#  undef polybench_stop_instruments
+#  undef polybench_print_instruments
+#  define polybench_start_instruments polybench_timer_start();
+#  define polybench_stop_instruments polybench_timer_stop();
+#  define polybench_print_instruments polybench_timer_print();
+extern double polybench_program_total_flops;
+extern void polybench_timer_start();
+extern void polybench_timer_stop();
+extern void polybench_timer_print();
+# endif
+
+/* Function declaration. */
+# ifdef POLYBENCH_TIME
+extern void polybench_timer_start();
+extern void polybench_timer_stop();
+extern void polybench_timer_print();
+# endif
+
+# ifdef POLYBENCH_PAPI
+extern void polybench_prepare_instruments();
+extern int polybench_papi_start_counter(int evid);
+extern void polybench_papi_stop_counter(int evid);
+extern void polybench_papi_init();
+extern void polybench_papi_close();
+extern void polybench_papi_print();
+# endif
+
+/* Function prototypes. */
+extern void* polybench_alloc_data(unsigned long long int n, int elt_size);
+
+
+#endif /* !POLYBENCH_H */
diff --git a/test/monniaux/loop_nest/syrk.c b/test/monniaux/loop_nest/syrk.c
new file mode 100644
index 00000000..490d0a01
--- /dev/null
+++ b/test/monniaux/loop_nest/syrk.c
@@ -0,0 +1,28 @@
+/* Include polybench common header. */
+#include "polybench.h"
+
+/* Include benchmark-specific header. */
+/* Default data type is double, default size is 4000. */
+#include "syrk.h"
+
+/* Main computational kernel. The whole function will be timed,
+   including the call and return. */
+void kernel_syrk(int ni, int nj,
+		 DATA_TYPE alpha,
+		 DATA_TYPE beta,
+		 DATA_TYPE POLYBENCH_2D(C,NI,NI,ni,ni),
+		 DATA_TYPE POLYBENCH_2D(A,NI,NJ,ni,nj))
+{
+  int i, j, k;
+
+  /*  C := alpha*A*A' + beta*C */
+#if 0
+  for (i = 0; i < _PB_NI; i++)
+    for (j = 0; j < _PB_NI; j++)
+      C[i][j] *= beta;
+#endif
+  for (i = 0; i < _PB_NI; i++)
+    for (j = 0; j < _PB_NI; j++)
+      for (k = 0; k < _PB_NJ; k++)
+	  C[i][j] += alpha * A[i][k] * A[j][k];
+}
diff --git a/test/monniaux/loop_nest/syrk.h b/test/monniaux/loop_nest/syrk.h
new file mode 100644
index 00000000..c753ff3b
--- /dev/null
+++ b/test/monniaux/loop_nest/syrk.h
@@ -0,0 +1,54 @@
+/**
+ * syrk.h: This file is part of the PolyBench/C 3.2 test suite.
+ *
+ *
+ * Contact: Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
+ * Web address: http://polybench.sourceforge.net
+ */
+#ifndef SYRK_H
+# define SYRK_H
+
+/* Default to STANDARD_DATASET. */
+# if !defined(MINI_DATASET) && !defined(SMALL_DATASET) && !defined(LARGE_DATASET) && !defined(EXTRALARGE_DATASET)
+#  define STANDARD_DATASET
+# endif
+
+/* Do not define anything if the user manually defines the size. */
+# if !defined(NI) && !defined(NJ)
+/* Define the possible dataset sizes. */
+#  ifdef MINI_DATASET
+#   define NI 32
+#   define NJ 32
+#  endif
+
+#  ifdef SMALL_DATASET
+#   define NI 128
+#   define NJ 128
+#  endif
+
+#  ifdef STANDARD_DATASET /* Default if unspecified. */
+#   define NI 1024
+#   define NJ 1024
+#  endif
+
+#  ifdef LARGE_DATASET
+#   define NI 2000
+#   define NJ 2000
+#  endif
+
+#  ifdef EXTRALARGE_DATASET
+#   define NI 4000
+#   define NJ 4000
+#  endif
+# endif /* !N */
+
+# define _PB_NI POLYBENCH_LOOP_BOUND(NI,ni)
+# define _PB_NJ POLYBENCH_LOOP_BOUND(NJ,nj)
+
+# ifndef DATA_TYPE
+#  define DATA_TYPE double
+#  define DATA_PRINTF_MODIFIER "%0.2lf "
+# endif
+
+
+#endif /* !SYRK */
diff --git a/test/monniaux/many_temporaries/matrix_product.py b/test/monniaux/many_temporaries/matrix_product.py
new file mode 100644
index 00000000..911ca92c
--- /dev/null
+++ b/test/monniaux/many_temporaries/matrix_product.py
@@ -0,0 +1,20 @@
+m=4
+n=5
+p=6
+number_type='double'
+with open('matrix_product.c', 'w') as cfile:
+    cfile.write(f'void matrix_product({number_type} c[{m}][{p}], const {number_type} a[{m}][{n}], const {number_type} b[{n}][{p}]) {{\n')
+    for i in range(m):
+        for j in range(n):
+            for k in range(p):
+                cfile.write(f'  const {number_type} p_{i}_{j}_{k} = a[{i}][{j}] * b[{j}][{k}];\n')
+    for i in range(m):
+        for k in range(p):
+            cfile.write(f'  c[{i}][{k}] = ')
+            for j in range(n):
+                if j>0:
+                    cfile.write(' + ')
+                cfile.write(f'p_{i}_{j}_{k}')
+            cfile.write(';\n')
+    cfile.write('}\n')
+    
diff --git a/test/monniaux/many_temporaries/matrix_product2.py b/test/monniaux/many_temporaries/matrix_product2.py
new file mode 100644
index 00000000..7baeb09b
--- /dev/null
+++ b/test/monniaux/many_temporaries/matrix_product2.py
@@ -0,0 +1,24 @@
+m=4
+n=5
+p=6
+number_type='double'
+with open('matrix_product2.c', 'w') as cfile:
+    cfile.write(f'void matrix_product({number_type} c[{m}][{p}], const {number_type} a[{m}][{n}], const {number_type} b[{n}][{p}]) {{\n')
+    for i in range(m):
+        for j in range(n):
+            for k in range(p):
+                cfile.write(f'  const {number_type} p_{i}_{j}_{k} = a[{i}][{j}] * b[{j}][{k}];\n')
+    for i in range(m):
+        for k in range(p):
+            cfile.write(f'  const {number_type} r_{i}_{k} = ')
+            for j in range(n):
+                if j>0:
+                    cfile.write(' + ')
+                cfile.write(f'p_{i}_{j}_{k}')
+            cfile.write(';\n')
+    for i in range(m):
+        for k in range(p):
+            cfile.write(f'  c[{i}][{k}] = r_{i}_{k};\n')
+    
+    cfile.write('}\n')
+    
diff --git a/test/monniaux/picosat-965/onefile/picosat.c b/test/monniaux/picosat-965/onefile/picosat.c
new file mode 100644
index 00000000..e1c18438
--- /dev/null
+++ b/test/monniaux/picosat-965/onefile/picosat.c
@@ -0,0 +1,25 @@
+typedef struct b b;
+b *a;
+struct b {
+  int c;
+  int d, **clshead;
+  int **ahead;
+  unsigned h;
+} i;
+b *j();
+int k();
+int main() {
+  a = j();
+  k(a);
+}
+#define e(f) f - g->c
+static void m(b *g, int *l) {
+  if (g)
+    *g->ahead = l;
+}
+b *j() { return &i; }
+int k(b *g) {
+  if (g->d)
+    m(g, e(g->clshead[-1]));
+  return g->h;
+}
diff --git a/test/monniaux/picosat-965/onefile/testcmp.sh b/test/monniaux/picosat-965/onefile/testcmp.sh
new file mode 100755
index 00000000..2228c675
--- /dev/null
+++ b/test/monniaux/picosat-965/onefile/testcmp.sh
@@ -0,0 +1,146 @@
+DEFINES="-DNALARM -DNZIP -DNGETRUSAGE -DNDEBUG"
+COMPCERT=/local/monniaux/Kalray/mppa-RTLpathSE-verif-hash-junk
+DATA=$COMPCERT/test/monniaux/picosat-965/tiny.dat
+CCOMP="$COMPCERT/ccomp -fbitfields -fduplicate 2 -fall-loads-nontrap $DEFINES"
+GCC="kvx-cos-gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC0="gcc -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC1="gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC2="gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror -fsanitize=undefined -fsanitize=address $DEFINES"
+HOSTCC3="gcc -O3 -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC4="clang -Wimplicit -Wuninitialized -Werror $DEFINES"
+HOSTCC5="clang -Wimplicit -Wuninitialized -Werror -fsanitize=undefined -fsanitize=address $DEFINES"
+CFILES="picosat.c"
+SIMU="kvx-cluster --timeout=100000 -- "
+
+if ! $HOSTCC0 $CFILES -o picosat.cc0.host ;
+then exit 30 ;
+fi
+
+if ! $HOSTCC1 $CFILES -o picosat.cc1.host ;
+then exit 31 ;
+fi
+
+if ! $HOSTCC2 $CFILES -o picosat.cc2.host ;
+then exit 32 ;
+fi
+
+if ! $HOSTCC3 $CFILES -o picosat.cc3.host ;
+then exit 33 ;
+fi
+
+if ! $HOSTCC4 $CFILES -o picosat.cc4.host ;
+then exit 34 ;
+fi
+
+if ! $HOSTCC5 $CFILES -o picosat.cc5.host ;
+then exit 35 ;
+fi
+
+timeout 1 ./picosat.cc0.host $DATA 2>&1 > picosat.cc0.out
+if [ $? -ge 100 ];
+then exit 40 ;
+fi
+
+timeout 1 ./picosat.cc1.host $DATA 2>&1 > picosat.cc1.out
+if [ $? -ge 100 ];
+then exit 41 ;
+fi
+
+timeout 1 valgrind --log-file=picosat.cc0.valgrind.log ./picosat.cc0.host $DATA 2>&1 > picosat.cc0.valgrind.out
+if [ $? -ge 100 ];
+then exit 50 ;
+fi
+
+timeout 1 valgrind --log-file=picosat.cc1.valgrind.log ./picosat.cc1.host $DATA 2>&1 > picosat.cc1.valgrind.out
+if [ $? -ge 100 ];
+then exit 51 ;
+fi
+
+timeout 1 ./picosat.cc2.host $DATA 2>&1 > picosat.cc2.out
+if [ $? -ge 100 ];
+then exit 42 ;
+fi
+
+timeout 1 ./picosat.cc3.host $DATA 2>&1 > picosat.cc3.out
+if [ $? -ge 100 ];
+then exit 43 ;
+fi
+
+timeout 1 ./picosat.cc4.host $DATA 2>&1 > picosat.cc4.out
+if [ $? -ge 100 ];
+then exit 44 ;
+fi
+
+timeout 1 ./picosat.cc5.host $DATA 2>&1 > picosat.cc5.out
+if [ $? -ge 100 ];
+then exit 45 ;
+fi
+
+if ! cmp picosat.cc0.out picosat.cc1.out ;
+then exit 60 ;
+fi
+
+if ! cmp picosat.cc0.out picosat.cc0.valgrind.out ;
+then exit 70 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc1.valgrind.out ;
+then exit 61 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc2.out ;
+then exit 62 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc3.out ;
+then exit 63 ;
+fi
+
+if ! $GCC $CFILES -o picosat.gcc.target ;
+then exit 1 ;
+fi
+
+if ! $CCOMP $CFILES -o picosat.ccomp.target ;
+then exit 2 ;
+fi
+
+if ! $CCOMP -fprepass -fprepass= list $CFILES -o picosat.prepass.target ;
+then exit 3 ;
+fi
+
+$SIMU ./picosat.gcc.target $DATA 2>&1 > picosat.gcc.out
+if [ $? -ge 100 ];
+then exit 4 ;
+fi
+
+if ! cmp picosat.gcc.out picosat.cc1.out ;
+then exit 13 ;
+fi
+
+if grep timeout picosat.gcc.out ;
+then exit 8 ;
+fi
+
+$SIMU ./picosat.ccomp.target $DATA 2>&1 > picosat.ccomp.out
+if [ $? -ge 100 ];
+then exit 5 ;
+fi
+
+if grep timeout picosat.ccomp.out ;
+then exit 9 ;
+fi
+
+if ! cmp picosat.gcc.out picosat.ccomp.out ;
+then exit 6 ;
+fi
+
+$SIMU ./picosat.prepass.target $DATA 2>&1 > picosat.prepass.out
+if [ $? -ge 100 ];
+then exit 0 ;
+fi
+
+if cmp picosat.gcc.out picosat.prepass.out ;
+then exit 7 ;
+fi
+
+exit 0
diff --git a/test/monniaux/picosat-965/small.dat b/test/monniaux/picosat-965/small.dat
new file mode 100644
index 00000000..accb9054
--- /dev/null
+++ b/test/monniaux/picosat-965/small.dat
@@ -0,0 +1,2 @@
+p cnf 1 1
+1 0
diff --git a/test/monniaux/picosat-965/tiny.dat b/test/monniaux/picosat-965/tiny.dat
new file mode 100644
index 00000000..1d89b303
--- /dev/null
+++ b/test/monniaux/picosat-965/tiny.dat
@@ -0,0 +1,2 @@
+p cnf 0 1
+0
diff --git a/test/monniaux/profiling/compcert_profiling.dat b/test/monniaux/profiling/compcert_profiling.dat
new file mode 100644
index 00000000..fa57a995
--- /dev/null
+++ b/test/monniaux/profiling/compcert_profiling.dat
diff --git a/test/monniaux/profiling/test_profiling b/test/monniaux/profiling/test_profiling
new file mode 100755
index 00000000..b530aae2
--- /dev/null
+++ b/test/monniaux/profiling/test_profiling
diff --git a/test/monniaux/profiling/test_profiling.c b/test/monniaux/profiling/test_profiling.c
new file mode 100644
index 00000000..013b1d68
--- /dev/null
+++ b/test/monniaux/profiling/test_profiling.c
@@ -0,0 +1,15 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv) {
+  if (argc < 2) return 1;
+  int i = atoi(argv[1]);
+  if (i > 0) {
+    printf("positive\n");
+  } else if (i==0) {
+    printf("zero\n");
+  } else {
+    printf("negative\n");
+  }
+  return 0;
+}
diff --git a/test/monniaux/reduced_picosat/reduced_picosat.c b/test/monniaux/reduced_picosat/reduced_picosat.c
new file mode 100644
index 00000000..eb9fdaf8
--- /dev/null
+++ b/test/monniaux/reduced_picosat/reduced_picosat.c
@@ -0,0 +1,23 @@
+typedef struct b b;
+b *a;
+struct b {
+  int c;
+  int d, **clshead;
+  int **ahead;
+  unsigned h;
+} glob;
+int k();
+int main() {
+  a = &glob;
+  k(a);
+}
+#define e(f) f - g->c
+static void m(b *g, int *l) {
+  if (g)
+    *g->ahead = l;
+}
+int k(b *g) {
+  if (g->d)
+    m(g, e(g->clshead[-1]));
+  return g->h;
+}
diff --git a/test/monniaux/reduced_picosat/test_a.s b/test/monniaux/reduced_picosat/test_a.s
new file mode 100644
index 00000000..c14cc8f9
--- /dev/null
+++ b/test/monniaux/reduced_picosat/test_a.s
@@ -0,0 +1,10 @@
+	.text
+	.global	dummyload
+	.type	dummyload, @function
+dummyload:
+	make	$r0 = 0
+	;;
+	ld.s	$r0 = -8[$r0]
+	ret
+	;;
+	.size	dummyload, .-dummyload
diff --git a/test/monniaux/reduced_picosat/test_b.c b/test/monniaux/reduced_picosat/test_b.c
new file mode 100644
index 00000000..a0fe625b
--- /dev/null
+++ b/test/monniaux/reduced_picosat/test_b.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+extern uint64_t dummyload(void);
+
+int main() {
+  printf("%" PRIu64 "\n", dummyload());
+}
diff --git a/test/monniaux/reduced_picosat/testcmp.sh b/test/monniaux/reduced_picosat/testcmp.sh
new file mode 100755
index 00000000..8dc93de9
--- /dev/null
+++ b/test/monniaux/reduced_picosat/testcmp.sh
@@ -0,0 +1,146 @@
+DEFINES="-DNALARM -DNZIP -DNGETRUSAGE -DNDEBUG"
+COMPCERT=/home/monniaux/work/Kalray/mppa-RTLpathSE-verif-hash-junk
+DATA=$COMPCERT/test/monniaux/picosat-965/tiny.dat
+CCOMP="$COMPCERT/ccomp -fbitfields -fduplicate 2 -fall-loads-nontrap $DEFINES"
+GCC="kvx-cos-gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC0="gcc -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC1="gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC2="gcc -O -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror -fsanitize=undefined -fsanitize=address $DEFINES"
+HOSTCC3="gcc -O3 -Wimplicit -Wuninitialized -Wmaybe-uninitialized -Werror $DEFINES"
+HOSTCC4="/usr/bin/clang -Wimplicit -Wuninitialized -Werror $DEFINES"
+HOSTCC5="/usr/bin/clang -Wimplicit -Wuninitialized -Werror -fsanitize=undefined -fsanitize=address $DEFINES"
+CFILES="reduced_picosat.c"
+SIMU="kvx-cluster --timeout=10000000 -- "
+
+if ! $HOSTCC0 $CFILES -o picosat.cc0.host ;
+then exit 30 ;
+fi
+
+if ! $HOSTCC1 $CFILES -o picosat.cc1.host ;
+then exit 31 ;
+fi
+
+if ! $HOSTCC2 $CFILES -o picosat.cc2.host ;
+then exit 32 ;
+fi
+
+if ! $HOSTCC3 $CFILES -o picosat.cc3.host ;
+then exit 33 ;
+fi
+
+if ! $HOSTCC4 $CFILES -o picosat.cc4.host ;
+then exit 34 ;
+fi
+
+if ! $HOSTCC5 $CFILES -o picosat.cc5.host ;
+then exit 35 ;
+fi
+
+timeout 1 ./picosat.cc0.host $DATA 2>&1 > picosat.cc0.out
+if [ $? -ge 100 ];
+then exit 40 ;
+fi
+
+timeout 1 ./picosat.cc1.host $DATA 2>&1 > picosat.cc1.out
+if [ $? -ge 100 ];
+then exit 41 ;
+fi
+
+timeout 1 valgrind --log-file=picosat.cc0.valgrind.log ./picosat.cc0.host $DATA 2>&1 > picosat.cc0.valgrind.out
+if [ $? -ge 100 ];
+then exit 50 ;
+fi
+
+timeout 1 valgrind --log-file=picosat.cc1.valgrind.log ./picosat.cc1.host $DATA 2>&1 > picosat.cc1.valgrind.out
+if [ $? -ge 100 ];
+then exit 51 ;
+fi
+
+timeout 1 ./picosat.cc2.host $DATA 2>&1 > picosat.cc2.out
+if [ $? -ge 100 ];
+then exit 42 ;
+fi
+
+timeout 1 ./picosat.cc3.host $DATA 2>&1 > picosat.cc3.out
+if [ $? -ge 100 ];
+then exit 43 ;
+fi
+
+timeout 1 ./picosat.cc4.host $DATA 2>&1 > picosat.cc4.out
+if [ $? -ge 100 ];
+then exit 44 ;
+fi
+
+timeout 1 ./picosat.cc5.host $DATA 2>&1 > picosat.cc5.out
+if [ $? -ge 100 ];
+then exit 45 ;
+fi
+
+if ! cmp picosat.cc0.out picosat.cc1.out ;
+then exit 60 ;
+fi
+
+if ! cmp picosat.cc0.out picosat.cc0.valgrind.out ;
+then exit 70 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc1.valgrind.out ;
+then exit 61 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc2.out ;
+then exit 62 ;
+fi
+
+if ! cmp picosat.cc1.out picosat.cc3.out ;
+then exit 63 ;
+fi
+
+if ! $GCC $CFILES -o picosat.gcc.target ;
+then exit 1 ;
+fi
+
+if ! $CCOMP $CFILES -o picosat.ccomp.target ;
+then exit 2 ;
+fi
+
+if ! $CCOMP -fprepass -fprepass= list $CFILES -o picosat.prepass.target ;
+then exit 3 ;
+fi
+
+$SIMU ./picosat.gcc.target $DATA 2>&1 > picosat.gcc.out
+if [ $? -ge 100 ];
+then exit 4 ;
+fi
+
+if ! cmp picosat.gcc.out picosat.cc1.out ;
+then exit 13 ;
+fi
+
+if grep timeout picosat.gcc.out ;
+then exit 8 ;
+fi
+
+$SIMU ./picosat.ccomp.target $DATA 2>&1 > picosat.ccomp.out
+if [ $? -ge 100 ];
+then exit 5 ;
+fi
+
+if grep timeout picosat.ccomp.out ;
+then exit 9 ;
+fi
+
+if ! cmp picosat.gcc.out picosat.ccomp.out ;
+then exit 6 ;
+fi
+
+$SIMU ./picosat.prepass.target $DATA 2>&1 > picosat.prepass.out
+if [ $? -ge 100 ];
+then exit 0 ;
+fi
+
+if cmp picosat.gcc.out picosat.prepass.out ;
+then exit 7 ;
+fi
+
+exit 0
diff --git a/test/monniaux/rules.mk b/test/monniaux/rules.mk
index c0594ef9..cab957c0 100644
--- a/test/monniaux/rules.mk
+++ b/test/monniaux/rules.mk
@@ -21,15 +21,15 @@ MEASURES?=time
 ALL_CFLAGS+=-Wall -D__KVX_COS__ -DMAX_MEASURES=$(MAX_MEASURES)
 #ALL_CFLAGS+=-g
 ALL_GCCFLAGS+=$(ALL_CFLAGS) -std=c99 -Wextra -Werror=implicit
-ALL_CCOMPFLAGS+=$(ALL_CFLAGS)
+ALL_CCOMPFLAGS+=$(ALL_CFLAGS) # -fprofile-use= ../compcert_profiling.dat
 
 # The compilers
-KVX_CC?=kvx-cos-gcc
+KVX_CC?=kvx-elf-gcc
 KVX_CCOMP?=ccomp
 
 # Command to execute
 #EXECUTE_CYCLES?=timeout --signal=SIGTERM 3m kvx-cluster --syscall=libstd_scalls.so --cycle-based --
-EXECUTE_CYCLES?=kvx-cluster --syscall=libstd_scalls.so --cycle-based --
+EXECUTE_CYCLES?=kvx-cluster --enable-cache --syscall=libstd_scalls.so --cycle-based --
 
 # You can define up to GCC4FLAGS and CCOMP4FLAGS
 GCC0FLAGS?=$(ALL_GCCFLAGS) -O0
@@ -37,11 +37,11 @@ GCC1FLAGS?=$(ALL_GCCFLAGS) -O1
 GCC2FLAGS?=$(ALL_GCCFLAGS) -O2
 GCC3FLAGS?=$(ALL_GCCFLAGS) -O3
 GCC4FLAGS?=
-CCOMP0FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fno-postpass
-CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fpostpass= greedy
-CCOMP2FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fno-if-conversion
-CCOMP3FLAGS?=$(ALL_CCOMPFLAGS) -O2
-CCOMP4FLAGS?=
+CCOMP0FLAGS?=$(ALL_CCOMPFLAGS) -O2 
+CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fprepass= list
+CCOMP2FLAGS?=$(ALL_CCOMPFLAGS) -O2 -funrollsingle 30
+CCOMP3FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fprepass= list -funrollsingle 30
+CCOMP4FLAGS?=$(ALL_CCOMPFLAGS) -O2 -fprepass= zigzag
 
 # Prefix names
 GCC0PREFIX?=.gcc.o0
@@ -49,11 +49,11 @@ GCC1PREFIX?=.gcc.o1
 GCC2PREFIX?=.gcc.o2
 GCC3PREFIX?=.gcc.o3
 GCC4PREFIX?=
-CCOMP0PREFIX?=.ccomp.nobundle
-CCOMP1PREFIX?=.ccomp.greedy
-CCOMP2PREFIX?=.ccomp.noif
-CCOMP3PREFIX?=.ccomp
-CCOMP4PREFIX?=
+CCOMP0PREFIX?=.ccomp
+CCOMP1PREFIX?=.ccomp.prepass_list
+CCOMP2PREFIX?=.ccomp.unrollsingle_30
+CCOMP3PREFIX?=.ccomp.prepass_list-unrollsingle_30
+CCOMP4PREFIX?=.ccomp.prepass_zigzag
 
 # List of outfiles, updated by gen_rules
 OUTFILES:=
diff --git a/test/monniaux/yarpgen/Makefile b/test/monniaux/yarpgen/Makefile
index 24dd19c3..f9efd5a0 100644
--- a/test/monniaux/yarpgen/Makefile
+++ b/test/monniaux/yarpgen/Makefile
@@ -39,6 +39,20 @@ TESTS_GCC_TARGET_OUT=$(shell seq --format $(PREFIX)/example.gcc.target.out 1 $(M
 TESTS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 1 $(MAX))
 TESTS_CMP=$(shell seq --format $(PREFIX)/example.target.cmp 1 $(MAX)) # $(shell seq --format $(PREFIX)/example.host_target.cmp 1 $(MAX))
 
+# FIXME - test000089 fails in CI in arm and armhf because of memory consumption during register allocation being too high
+# Removing it from the pool
+BADID:=89
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/func.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/driver.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/init.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/hash.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/check.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/init.h $(BADID) $(BADID)),$(TESTS_C))
+TESTS_CMP:=$(filter-out $(shell seq --format $(PREFIX)/example.target.cmp $(BADID) $(BADID)),$(TESTS_CMP))
+TESTS_GCC_HOST_OUT:=$(filter-out $(shell seq --format $(PREFIX)/example.gcc.host.out $(BADID) $(BADID)),$(TESTS_GCC_HOST_OUT))
+TESTS_CCOMP_TARGET_OUT:=$(filter-out $(shell seq --format $(PREFIX)/example.ccomp.target.out $(BADID) $(BADID)),$(TESTS_CCOMP_TARGET_OUT))
+TESTS_GCC_TARGET_OUT:=$(filter-out $(shell seq --format $(PREFIX)/example.gcc.target.out $(BADID) $(BADID)),$(TESTS_GCC_TARGET_OUT))
+
 all:	$(TESTS_CCOMP_TARGET_OUT) $(TESTS_GCC_TARGET_OUT) $(TESTS_CCOMP_TARGET_S) $(TESTS_GCC_TARGET_S) $(TESTS_CMP) $(TESTS_C)
 
 tests_c: $(TESTS_C)
diff --git a/test/regression/Makefile b/test/regression/Makefile
index 744a2c03..56d90469 100644
--- a/test/regression/Makefile
+++ b/test/regression/Makefile
@@ -1,6 +1,8 @@
 include ../../Makefile.config
 
 CCOMP=../../ccomp
+# TODO - temporary
+# CCOMPOPTS:=$(CCOMPOPTS) -fall-loads-nontrap -fduplicate 2 -fprepass
 CCOMPFLAGS=$(CCOMPOPTS) -stdlib ../../runtime \
  -dparse -dc -dclight -dasm -fall \
  -DARCH_$(ARCH) -DMODEL_$(MODEL)
@@ -22,9 +24,8 @@ TESTS?=int32 int64 floats floats-basics floats-lit \
 
 TESTS_COMP?=attribs1 bitfields1 bitfields2 bitfields3 bitfields4 \
   bitfields5 bitfields6 bitfields7 bitfields8 \
-  builtins-$(ARCH) alignas \
-  varargs1 varargs2 varargs3 sections alias aligned\
-  packedstruct1 packedstruct2
+  builtins-common builtins-$(ARCH) packedstruct1 packedstruct2 alignas \
+  varargs1 varargs2 varargs3 sections alias aligned
 
 ifeq ($(ARCH),kvx)
 	TESTS_COMP:=$(filter-out packedstruct1,$(TESTS_COMP))
diff --git a/test/regression/Results/builtins-aarch64 b/test/regression/Results/builtins-aarch64
index c70432d8..7e3575e5 100644
--- a/test/regression/Results/builtins-aarch64
+++ b/test/regression/Results/builtins-aarch64
@@ -1,14 +1,7 @@
-bswap(12345678) = 78563412
-bswap16(1234) = 3412
-bswap64(123456789abcdef0) = f0debc9a78563412
-clz(12345678) = 3
-clzll(12345678) = 35
-clzll(1234567812345678) = 3
 cls(1234567) = 10
 cls(-9999) = 17
 clsll(1234567) = 42
 clsll(-9999) = 49
-fsqrt(3.141590) = 1.772453
 fmadd(3.141590, 2.718000, 1.414000) = 9.952842
 fmsub(3.141590, 2.718000, 1.414000) = -7.124842
 fnmadd(3.141590, 2.718000, 1.414000) = -9.952842
diff --git a/test/regression/Results/builtins-arm b/test/regression/Results/builtins-arm
index f637fb16..bfdad28d 100644
--- a/test/regression/Results/builtins-arm
+++ b/test/regression/Results/builtins-arm
@@ -1,9 +1,3 @@
-bswap(12345678) = 78563412
-bswap16(1234) = 3412
-clz(12345678) = 3
-clzll(12345678) = 35
-clzll(1234567812345678) = 3
-clzll(78563412) = 33
 fsqrt(3.141590) = 1.772453
 read_16_rev = 3412
 read_32_rev = efbeadde
diff --git a/test/regression/Results/builtins-common b/test/regression/Results/builtins-common
new file mode 100644
index 00000000..8adf7bfb
--- /dev/null
+++ b/test/regression/Results/builtins-common
@@ -0,0 +1,393 @@
+bswap(12345678) = 78563412
+bswap16(1234) = 3412
+bswap64(123456789abcdef0) = f0debc9a78563412
+clz(ffffffff) = 0
+clz(80000000) = 0
+clz(7fffffff) = 1
+clz(40000000) = 1
+clz(3fffffff) = 2
+clz(20000000) = 2
+clz(1fffffff) = 3
+clz(10000000) = 3
+clz(0fffffff) = 4
+clz(08000000) = 4
+clz(07ffffff) = 5
+clz(04000000) = 5
+clz(03ffffff) = 6
+clz(02000000) = 6
+clz(01ffffff) = 7
+clz(01000000) = 7
+clz(00ffffff) = 8
+clz(00800000) = 8
+clz(007fffff) = 9
+clz(00400000) = 9
+clz(003fffff) = 10
+clz(00200000) = 10
+clz(001fffff) = 11
+clz(00100000) = 11
+clz(000fffff) = 12
+clz(00080000) = 12
+clz(0007ffff) = 13
+clz(00040000) = 13
+clz(0003ffff) = 14
+clz(00020000) = 14
+clz(0001ffff) = 15
+clz(00010000) = 15
+clz(0000ffff) = 16
+clz(00008000) = 16
+clz(00007fff) = 17
+clz(00004000) = 17
+clz(00003fff) = 18
+clz(00002000) = 18
+clz(00001fff) = 19
+clz(00001000) = 19
+clz(00000fff) = 20
+clz(00000800) = 20
+clz(000007ff) = 21
+clz(00000400) = 21
+clz(000003ff) = 22
+clz(00000200) = 22
+clz(000001ff) = 23
+clz(00000100) = 23
+clz(000000ff) = 24
+clz(00000080) = 24
+clz(0000007f) = 25
+clz(00000040) = 25
+clz(0000003f) = 26
+clz(00000020) = 26
+clz(0000001f) = 27
+clz(00000010) = 27
+clz(0000000f) = 28
+clz(00000008) = 28
+clz(00000007) = 29
+clz(00000004) = 29
+clz(00000003) = 30
+clz(00000002) = 30
+clz(00000001) = 31
+clz(00000001) = 31
+clzll(ffffffffffffffff) = 0
+clzll(8000000000000000) = 0
+clzll(7fffffffffffffff) = 1
+clzll(4000000000000000) = 1
+clzll(3fffffffffffffff) = 2
+clzll(2000000000000000) = 2
+clzll(1fffffffffffffff) = 3
+clzll(1000000000000000) = 3
+clzll(0fffffffffffffff) = 4
+clzll(0800000000000000) = 4
+clzll(07ffffffffffffff) = 5
+clzll(0400000000000000) = 5
+clzll(03ffffffffffffff) = 6
+clzll(0200000000000000) = 6
+clzll(01ffffffffffffff) = 7
+clzll(0100000000000000) = 7
+clzll(00ffffffffffffff) = 8
+clzll(0080000000000000) = 8
+clzll(007fffffffffffff) = 9
+clzll(0040000000000000) = 9
+clzll(003fffffffffffff) = 10
+clzll(0020000000000000) = 10
+clzll(001fffffffffffff) = 11
+clzll(0010000000000000) = 11
+clzll(000fffffffffffff) = 12
+clzll(0008000000000000) = 12
+clzll(0007ffffffffffff) = 13
+clzll(0004000000000000) = 13
+clzll(0003ffffffffffff) = 14
+clzll(0002000000000000) = 14
+clzll(0001ffffffffffff) = 15
+clzll(0001000000000000) = 15
+clzll(0000ffffffffffff) = 16
+clzll(0000800000000000) = 16
+clzll(00007fffffffffff) = 17
+clzll(0000400000000000) = 17
+clzll(00003fffffffffff) = 18
+clzll(0000200000000000) = 18
+clzll(00001fffffffffff) = 19
+clzll(0000100000000000) = 19
+clzll(00000fffffffffff) = 20
+clzll(0000080000000000) = 20
+clzll(000007ffffffffff) = 21
+clzll(0000040000000000) = 21
+clzll(000003ffffffffff) = 22
+clzll(0000020000000000) = 22
+clzll(000001ffffffffff) = 23
+clzll(0000010000000000) = 23
+clzll(000000ffffffffff) = 24
+clzll(0000008000000000) = 24
+clzll(0000007fffffffff) = 25
+clzll(0000004000000000) = 25
+clzll(0000003fffffffff) = 26
+clzll(0000002000000000) = 26
+clzll(0000001fffffffff) = 27
+clzll(0000001000000000) = 27
+clzll(0000000fffffffff) = 28
+clzll(0000000800000000) = 28
+clzll(00000007ffffffff) = 29
+clzll(0000000400000000) = 29
+clzll(00000003ffffffff) = 30
+clzll(0000000200000000) = 30
+clzll(00000001ffffffff) = 31
+clzll(0000000100000000) = 31
+clzll(00000000ffffffff) = 32
+clzll(0000000080000000) = 32
+clzll(000000007fffffff) = 33
+clzll(0000000040000000) = 33
+clzll(000000003fffffff) = 34
+clzll(0000000020000000) = 34
+clzll(000000001fffffff) = 35
+clzll(0000000010000000) = 35
+clzll(000000000fffffff) = 36
+clzll(0000000008000000) = 36
+clzll(0000000007ffffff) = 37
+clzll(0000000004000000) = 37
+clzll(0000000003ffffff) = 38
+clzll(0000000002000000) = 38
+clzll(0000000001ffffff) = 39
+clzll(0000000001000000) = 39
+clzll(0000000000ffffff) = 40
+clzll(0000000000800000) = 40
+clzll(00000000007fffff) = 41
+clzll(0000000000400000) = 41
+clzll(00000000003fffff) = 42
+clzll(0000000000200000) = 42
+clzll(00000000001fffff) = 43
+clzll(0000000000100000) = 43
+clzll(00000000000fffff) = 44
+clzll(0000000000080000) = 44
+clzll(000000000007ffff) = 45
+clzll(0000000000040000) = 45
+clzll(000000000003ffff) = 46
+clzll(0000000000020000) = 46
+clzll(000000000001ffff) = 47
+clzll(0000000000010000) = 47
+clzll(000000000000ffff) = 48
+clzll(0000000000008000) = 48
+clzll(0000000000007fff) = 49
+clzll(0000000000004000) = 49
+clzll(0000000000003fff) = 50
+clzll(0000000000002000) = 50
+clzll(0000000000001fff) = 51
+clzll(0000000000001000) = 51
+clzll(0000000000000fff) = 52
+clzll(0000000000000800) = 52
+clzll(00000000000007ff) = 53
+clzll(0000000000000400) = 53
+clzll(00000000000003ff) = 54
+clzll(0000000000000200) = 54
+clzll(00000000000001ff) = 55
+clzll(0000000000000100) = 55
+clzll(00000000000000ff) = 56
+clzll(0000000000000080) = 56
+clzll(000000000000007f) = 57
+clzll(0000000000000040) = 57
+clzll(000000000000003f) = 58
+clzll(0000000000000020) = 58
+clzll(000000000000001f) = 59
+clzll(0000000000000010) = 59
+clzll(000000000000000f) = 60
+clzll(0000000000000008) = 60
+clzll(0000000000000007) = 61
+clzll(0000000000000004) = 61
+clzll(0000000000000003) = 62
+clzll(0000000000000002) = 62
+clzll(0000000000000001) = 63
+clzll(0000000000000001) = 63
+ctz(00000001) = 0
+ctz(ffffffff) = 0
+ctz(00000002) = 1
+ctz(fffffffe) = 1
+ctz(00000004) = 2
+ctz(fffffffc) = 2
+ctz(00000008) = 3
+ctz(fffffff8) = 3
+ctz(00000010) = 4
+ctz(fffffff0) = 4
+ctz(00000020) = 5
+ctz(ffffffe0) = 5
+ctz(00000040) = 6
+ctz(ffffffc0) = 6
+ctz(00000080) = 7
+ctz(ffffff80) = 7
+ctz(00000100) = 8
+ctz(ffffff00) = 8
+ctz(00000200) = 9
+ctz(fffffe00) = 9
+ctz(00000400) = 10
+ctz(fffffc00) = 10
+ctz(00000800) = 11
+ctz(fffff800) = 11
+ctz(00001000) = 12
+ctz(fffff000) = 12
+ctz(00002000) = 13
+ctz(ffffe000) = 13
+ctz(00004000) = 14
+ctz(ffffc000) = 14
+ctz(00008000) = 15
+ctz(ffff8000) = 15
+ctz(00010000) = 16
+ctz(ffff0000) = 16
+ctz(00020000) = 17
+ctz(fffe0000) = 17
+ctz(00040000) = 18
+ctz(fffc0000) = 18
+ctz(00080000) = 19
+ctz(fff80000) = 19
+ctz(00100000) = 20
+ctz(fff00000) = 20
+ctz(00200000) = 21
+ctz(ffe00000) = 21
+ctz(00400000) = 22
+ctz(ffc00000) = 22
+ctz(00800000) = 23
+ctz(ff800000) = 23
+ctz(01000000) = 24
+ctz(ff000000) = 24
+ctz(02000000) = 25
+ctz(fe000000) = 25
+ctz(04000000) = 26
+ctz(fc000000) = 26
+ctz(08000000) = 27
+ctz(f8000000) = 27
+ctz(10000000) = 28
+ctz(f0000000) = 28
+ctz(20000000) = 29
+ctz(e0000000) = 29
+ctz(40000000) = 30
+ctz(c0000000) = 30
+ctz(80000000) = 31
+ctz(80000000) = 31
+ctzll(0000000000000001) = 0
+ctzll(ffffffffffffffff) = 0
+ctzll(0000000000000002) = 1
+ctzll(fffffffffffffffe) = 1
+ctzll(0000000000000004) = 2
+ctzll(fffffffffffffffc) = 2
+ctzll(0000000000000008) = 3
+ctzll(fffffffffffffff8) = 3
+ctzll(0000000000000010) = 4
+ctzll(fffffffffffffff0) = 4
+ctzll(0000000000000020) = 5
+ctzll(ffffffffffffffe0) = 5
+ctzll(0000000000000040) = 6
+ctzll(ffffffffffffffc0) = 6
+ctzll(0000000000000080) = 7
+ctzll(ffffffffffffff80) = 7
+ctzll(0000000000000100) = 8
+ctzll(ffffffffffffff00) = 8
+ctzll(0000000000000200) = 9
+ctzll(fffffffffffffe00) = 9
+ctzll(0000000000000400) = 10
+ctzll(fffffffffffffc00) = 10
+ctzll(0000000000000800) = 11
+ctzll(fffffffffffff800) = 11
+ctzll(0000000000001000) = 12
+ctzll(fffffffffffff000) = 12
+ctzll(0000000000002000) = 13
+ctzll(ffffffffffffe000) = 13
+ctzll(0000000000004000) = 14
+ctzll(ffffffffffffc000) = 14
+ctzll(0000000000008000) = 15
+ctzll(ffffffffffff8000) = 15
+ctzll(0000000000010000) = 16
+ctzll(ffffffffffff0000) = 16
+ctzll(0000000000020000) = 17
+ctzll(fffffffffffe0000) = 17
+ctzll(0000000000040000) = 18
+ctzll(fffffffffffc0000) = 18
+ctzll(0000000000080000) = 19
+ctzll(fffffffffff80000) = 19
+ctzll(0000000000100000) = 20
+ctzll(fffffffffff00000) = 20
+ctzll(0000000000200000) = 21
+ctzll(ffffffffffe00000) = 21
+ctzll(0000000000400000) = 22
+ctzll(ffffffffffc00000) = 22
+ctzll(0000000000800000) = 23
+ctzll(ffffffffff800000) = 23
+ctzll(0000000001000000) = 24
+ctzll(ffffffffff000000) = 24
+ctzll(0000000002000000) = 25
+ctzll(fffffffffe000000) = 25
+ctzll(0000000004000000) = 26
+ctzll(fffffffffc000000) = 26
+ctzll(0000000008000000) = 27
+ctzll(fffffffff8000000) = 27
+ctzll(0000000010000000) = 28
+ctzll(fffffffff0000000) = 28
+ctzll(0000000020000000) = 29
+ctzll(ffffffffe0000000) = 29
+ctzll(0000000040000000) = 30
+ctzll(ffffffffc0000000) = 30
+ctzll(0000000080000000) = 31
+ctzll(ffffffff80000000) = 31
+ctzll(0000000100000000) = 32
+ctzll(ffffffff00000000) = 32
+ctzll(0000000200000000) = 33
+ctzll(fffffffe00000000) = 33
+ctzll(0000000400000000) = 34
+ctzll(fffffffc00000000) = 34
+ctzll(0000000800000000) = 35
+ctzll(fffffff800000000) = 35
+ctzll(0000001000000000) = 36
+ctzll(fffffff000000000) = 36
+ctzll(0000002000000000) = 37
+ctzll(ffffffe000000000) = 37
+ctzll(0000004000000000) = 38
+ctzll(ffffffc000000000) = 38
+ctzll(0000008000000000) = 39
+ctzll(ffffff8000000000) = 39
+ctzll(0000010000000000) = 40
+ctzll(ffffff0000000000) = 40
+ctzll(0000020000000000) = 41
+ctzll(fffffe0000000000) = 41
+ctzll(0000040000000000) = 42
+ctzll(fffffc0000000000) = 42
+ctzll(0000080000000000) = 43
+ctzll(fffff80000000000) = 43
+ctzll(0000100000000000) = 44
+ctzll(fffff00000000000) = 44
+ctzll(0000200000000000) = 45
+ctzll(ffffe00000000000) = 45
+ctzll(0000400000000000) = 46
+ctzll(ffffc00000000000) = 46
+ctzll(0000800000000000) = 47
+ctzll(ffff800000000000) = 47
+ctzll(0001000000000000) = 48
+ctzll(ffff000000000000) = 48
+ctzll(0002000000000000) = 49
+ctzll(fffe000000000000) = 49
+ctzll(0004000000000000) = 50
+ctzll(fffc000000000000) = 50
+ctzll(0008000000000000) = 51
+ctzll(fff8000000000000) = 51
+ctzll(0010000000000000) = 52
+ctzll(fff0000000000000) = 52
+ctzll(0020000000000000) = 53
+ctzll(ffe0000000000000) = 53
+ctzll(0040000000000000) = 54
+ctzll(ffc0000000000000) = 54
+ctzll(0080000000000000) = 55
+ctzll(ff80000000000000) = 55
+ctzll(0100000000000000) = 56
+ctzll(ff00000000000000) = 56
+ctzll(0200000000000000) = 57
+ctzll(fe00000000000000) = 57
+ctzll(0400000000000000) = 58
+ctzll(fc00000000000000) = 58
+ctzll(0800000000000000) = 59
+ctzll(f800000000000000) = 59
+ctzll(1000000000000000) = 60
+ctzll(f000000000000000) = 60
+ctzll(2000000000000000) = 61
+ctzll(e000000000000000) = 61
+ctzll(4000000000000000) = 62
+ctzll(c000000000000000) = 62
+ctzll(8000000000000000) = 63
+ctzll(8000000000000000) = 63
+fabs(3.141590) = 3.141590
+fabs(-3.141590) = 3.141590
+fabsf(7.250000) = 7.250000
+fabsf(-7.250000) = 7.250000
+fsqrt(3.141590) = 1.772453
+sqrt(3.141590) = 1.772453
diff --git a/test/regression/Results/builtins-common-kvx b/test/regression/Results/builtins-common-kvx
new file mode 100644
index 00000000..fad75e7c
--- /dev/null
+++ b/test/regression/Results/builtins-common-kvx
@@ -0,0 +1,391 @@
+bswap(12345678) = 78563412
+bswap16(1234) = 3412
+bswap64(123456789abcdef0) = f0debc9a78563412
+clz(ffffffff) = 0
+clz(80000000) = 0
+clz(7fffffff) = 1
+clz(40000000) = 1
+clz(3fffffff) = 2
+clz(20000000) = 2
+clz(1fffffff) = 3
+clz(10000000) = 3
+clz(0fffffff) = 4
+clz(08000000) = 4
+clz(07ffffff) = 5
+clz(04000000) = 5
+clz(03ffffff) = 6
+clz(02000000) = 6
+clz(01ffffff) = 7
+clz(01000000) = 7
+clz(00ffffff) = 8
+clz(00800000) = 8
+clz(007fffff) = 9
+clz(00400000) = 9
+clz(003fffff) = 10
+clz(00200000) = 10
+clz(001fffff) = 11
+clz(00100000) = 11
+clz(000fffff) = 12
+clz(00080000) = 12
+clz(0007ffff) = 13
+clz(00040000) = 13
+clz(0003ffff) = 14
+clz(00020000) = 14
+clz(0001ffff) = 15
+clz(00010000) = 15
+clz(0000ffff) = 16
+clz(00008000) = 16
+clz(00007fff) = 17
+clz(00004000) = 17
+clz(00003fff) = 18
+clz(00002000) = 18
+clz(00001fff) = 19
+clz(00001000) = 19
+clz(00000fff) = 20
+clz(00000800) = 20
+clz(000007ff) = 21
+clz(00000400) = 21
+clz(000003ff) = 22
+clz(00000200) = 22
+clz(000001ff) = 23
+clz(00000100) = 23
+clz(000000ff) = 24
+clz(00000080) = 24
+clz(0000007f) = 25
+clz(00000040) = 25
+clz(0000003f) = 26
+clz(00000020) = 26
+clz(0000001f) = 27
+clz(00000010) = 27
+clz(0000000f) = 28
+clz(00000008) = 28
+clz(00000007) = 29
+clz(00000004) = 29
+clz(00000003) = 30
+clz(00000002) = 30
+clz(00000001) = 31
+clz(00000001) = 31
+clzll(ffffffffffffffff) = 0
+clzll(8000000000000000) = 0
+clzll(7fffffffffffffff) = 1
+clzll(4000000000000000) = 1
+clzll(3fffffffffffffff) = 2
+clzll(2000000000000000) = 2
+clzll(1fffffffffffffff) = 3
+clzll(1000000000000000) = 3
+clzll(0fffffffffffffff) = 4
+clzll(0800000000000000) = 4
+clzll(07ffffffffffffff) = 5
+clzll(0400000000000000) = 5
+clzll(03ffffffffffffff) = 6
+clzll(0200000000000000) = 6
+clzll(01ffffffffffffff) = 7
+clzll(0100000000000000) = 7
+clzll(00ffffffffffffff) = 8
+clzll(0080000000000000) = 8
+clzll(007fffffffffffff) = 9
+clzll(0040000000000000) = 9
+clzll(003fffffffffffff) = 10
+clzll(0020000000000000) = 10
+clzll(001fffffffffffff) = 11
+clzll(0010000000000000) = 11
+clzll(000fffffffffffff) = 12
+clzll(0008000000000000) = 12
+clzll(0007ffffffffffff) = 13
+clzll(0004000000000000) = 13
+clzll(0003ffffffffffff) = 14
+clzll(0002000000000000) = 14
+clzll(0001ffffffffffff) = 15
+clzll(0001000000000000) = 15
+clzll(0000ffffffffffff) = 16
+clzll(0000800000000000) = 16
+clzll(00007fffffffffff) = 17
+clzll(0000400000000000) = 17
+clzll(00003fffffffffff) = 18
+clzll(0000200000000000) = 18
+clzll(00001fffffffffff) = 19
+clzll(0000100000000000) = 19
+clzll(00000fffffffffff) = 20
+clzll(0000080000000000) = 20
+clzll(000007ffffffffff) = 21
+clzll(0000040000000000) = 21
+clzll(000003ffffffffff) = 22
+clzll(0000020000000000) = 22
+clzll(000001ffffffffff) = 23
+clzll(0000010000000000) = 23
+clzll(000000ffffffffff) = 24
+clzll(0000008000000000) = 24
+clzll(0000007fffffffff) = 25
+clzll(0000004000000000) = 25
+clzll(0000003fffffffff) = 26
+clzll(0000002000000000) = 26
+clzll(0000001fffffffff) = 27
+clzll(0000001000000000) = 27
+clzll(0000000fffffffff) = 28
+clzll(0000000800000000) = 28
+clzll(00000007ffffffff) = 29
+clzll(0000000400000000) = 29
+clzll(00000003ffffffff) = 30
+clzll(0000000200000000) = 30
+clzll(00000001ffffffff) = 31
+clzll(0000000100000000) = 31
+clzll(00000000ffffffff) = 32
+clzll(0000000080000000) = 32
+clzll(000000007fffffff) = 33
+clzll(0000000040000000) = 33
+clzll(000000003fffffff) = 34
+clzll(0000000020000000) = 34
+clzll(000000001fffffff) = 35
+clzll(0000000010000000) = 35
+clzll(000000000fffffff) = 36
+clzll(0000000008000000) = 36
+clzll(0000000007ffffff) = 37
+clzll(0000000004000000) = 37
+clzll(0000000003ffffff) = 38
+clzll(0000000002000000) = 38
+clzll(0000000001ffffff) = 39
+clzll(0000000001000000) = 39
+clzll(0000000000ffffff) = 40
+clzll(0000000000800000) = 40
+clzll(00000000007fffff) = 41
+clzll(0000000000400000) = 41
+clzll(00000000003fffff) = 42
+clzll(0000000000200000) = 42
+clzll(00000000001fffff) = 43
+clzll(0000000000100000) = 43
+clzll(00000000000fffff) = 44
+clzll(0000000000080000) = 44
+clzll(000000000007ffff) = 45
+clzll(0000000000040000) = 45
+clzll(000000000003ffff) = 46
+clzll(0000000000020000) = 46
+clzll(000000000001ffff) = 47
+clzll(0000000000010000) = 47
+clzll(000000000000ffff) = 48
+clzll(0000000000008000) = 48
+clzll(0000000000007fff) = 49
+clzll(0000000000004000) = 49
+clzll(0000000000003fff) = 50
+clzll(0000000000002000) = 50
+clzll(0000000000001fff) = 51
+clzll(0000000000001000) = 51
+clzll(0000000000000fff) = 52
+clzll(0000000000000800) = 52
+clzll(00000000000007ff) = 53
+clzll(0000000000000400) = 53
+clzll(00000000000003ff) = 54
+clzll(0000000000000200) = 54
+clzll(00000000000001ff) = 55
+clzll(0000000000000100) = 55
+clzll(00000000000000ff) = 56
+clzll(0000000000000080) = 56
+clzll(000000000000007f) = 57
+clzll(0000000000000040) = 57
+clzll(000000000000003f) = 58
+clzll(0000000000000020) = 58
+clzll(000000000000001f) = 59
+clzll(0000000000000010) = 59
+clzll(000000000000000f) = 60
+clzll(0000000000000008) = 60
+clzll(0000000000000007) = 61
+clzll(0000000000000004) = 61
+clzll(0000000000000003) = 62
+clzll(0000000000000002) = 62
+clzll(0000000000000001) = 63
+clzll(0000000000000001) = 63
+ctz(00000001) = 0
+ctz(ffffffff) = 0
+ctz(00000002) = 1
+ctz(fffffffe) = 1
+ctz(00000004) = 2
+ctz(fffffffc) = 2
+ctz(00000008) = 3
+ctz(fffffff8) = 3
+ctz(00000010) = 4
+ctz(fffffff0) = 4
+ctz(00000020) = 5
+ctz(ffffffe0) = 5
+ctz(00000040) = 6
+ctz(ffffffc0) = 6
+ctz(00000080) = 7
+ctz(ffffff80) = 7
+ctz(00000100) = 8
+ctz(ffffff00) = 8
+ctz(00000200) = 9
+ctz(fffffe00) = 9
+ctz(00000400) = 10
+ctz(fffffc00) = 10
+ctz(00000800) = 11
+ctz(fffff800) = 11
+ctz(00001000) = 12
+ctz(fffff000) = 12
+ctz(00002000) = 13
+ctz(ffffe000) = 13
+ctz(00004000) = 14
+ctz(ffffc000) = 14
+ctz(00008000) = 15
+ctz(ffff8000) = 15
+ctz(00010000) = 16
+ctz(ffff0000) = 16
+ctz(00020000) = 17
+ctz(fffe0000) = 17
+ctz(00040000) = 18
+ctz(fffc0000) = 18
+ctz(00080000) = 19
+ctz(fff80000) = 19
+ctz(00100000) = 20
+ctz(fff00000) = 20
+ctz(00200000) = 21
+ctz(ffe00000) = 21
+ctz(00400000) = 22
+ctz(ffc00000) = 22
+ctz(00800000) = 23
+ctz(ff800000) = 23
+ctz(01000000) = 24
+ctz(ff000000) = 24
+ctz(02000000) = 25
+ctz(fe000000) = 25
+ctz(04000000) = 26
+ctz(fc000000) = 26
+ctz(08000000) = 27
+ctz(f8000000) = 27
+ctz(10000000) = 28
+ctz(f0000000) = 28
+ctz(20000000) = 29
+ctz(e0000000) = 29
+ctz(40000000) = 30
+ctz(c0000000) = 30
+ctz(80000000) = 31
+ctz(80000000) = 31
+ctzll(0000000000000001) = 0
+ctzll(ffffffffffffffff) = 0
+ctzll(0000000000000002) = 1
+ctzll(fffffffffffffffe) = 1
+ctzll(0000000000000004) = 2
+ctzll(fffffffffffffffc) = 2
+ctzll(0000000000000008) = 3
+ctzll(fffffffffffffff8) = 3
+ctzll(0000000000000010) = 4
+ctzll(fffffffffffffff0) = 4
+ctzll(0000000000000020) = 5
+ctzll(ffffffffffffffe0) = 5
+ctzll(0000000000000040) = 6
+ctzll(ffffffffffffffc0) = 6
+ctzll(0000000000000080) = 7
+ctzll(ffffffffffffff80) = 7
+ctzll(0000000000000100) = 8
+ctzll(ffffffffffffff00) = 8
+ctzll(0000000000000200) = 9
+ctzll(fffffffffffffe00) = 9
+ctzll(0000000000000400) = 10
+ctzll(fffffffffffffc00) = 10
+ctzll(0000000000000800) = 11
+ctzll(fffffffffffff800) = 11
+ctzll(0000000000001000) = 12
+ctzll(fffffffffffff000) = 12
+ctzll(0000000000002000) = 13
+ctzll(ffffffffffffe000) = 13
+ctzll(0000000000004000) = 14
+ctzll(ffffffffffffc000) = 14
+ctzll(0000000000008000) = 15
+ctzll(ffffffffffff8000) = 15
+ctzll(0000000000010000) = 16
+ctzll(ffffffffffff0000) = 16
+ctzll(0000000000020000) = 17
+ctzll(fffffffffffe0000) = 17
+ctzll(0000000000040000) = 18
+ctzll(fffffffffffc0000) = 18
+ctzll(0000000000080000) = 19
+ctzll(fffffffffff80000) = 19
+ctzll(0000000000100000) = 20
+ctzll(fffffffffff00000) = 20
+ctzll(0000000000200000) = 21
+ctzll(ffffffffffe00000) = 21
+ctzll(0000000000400000) = 22
+ctzll(ffffffffffc00000) = 22
+ctzll(0000000000800000) = 23
+ctzll(ffffffffff800000) = 23
+ctzll(0000000001000000) = 24
+ctzll(ffffffffff000000) = 24
+ctzll(0000000002000000) = 25
+ctzll(fffffffffe000000) = 25
+ctzll(0000000004000000) = 26
+ctzll(fffffffffc000000) = 26
+ctzll(0000000008000000) = 27
+ctzll(fffffffff8000000) = 27
+ctzll(0000000010000000) = 28
+ctzll(fffffffff0000000) = 28
+ctzll(0000000020000000) = 29
+ctzll(ffffffffe0000000) = 29
+ctzll(0000000040000000) = 30
+ctzll(ffffffffc0000000) = 30
+ctzll(0000000080000000) = 31
+ctzll(ffffffff80000000) = 31
+ctzll(0000000100000000) = 32
+ctzll(ffffffff00000000) = 32
+ctzll(0000000200000000) = 33
+ctzll(fffffffe00000000) = 33
+ctzll(0000000400000000) = 34
+ctzll(fffffffc00000000) = 34
+ctzll(0000000800000000) = 35
+ctzll(fffffff800000000) = 35
+ctzll(0000001000000000) = 36
+ctzll(fffffff000000000) = 36
+ctzll(0000002000000000) = 37
+ctzll(ffffffe000000000) = 37
+ctzll(0000004000000000) = 38
+ctzll(ffffffc000000000) = 38
+ctzll(0000008000000000) = 39
+ctzll(ffffff8000000000) = 39
+ctzll(0000010000000000) = 40
+ctzll(ffffff0000000000) = 40
+ctzll(0000020000000000) = 41
+ctzll(fffffe0000000000) = 41
+ctzll(0000040000000000) = 42
+ctzll(fffffc0000000000) = 42
+ctzll(0000080000000000) = 43
+ctzll(fffff80000000000) = 43
+ctzll(0000100000000000) = 44
+ctzll(fffff00000000000) = 44
+ctzll(0000200000000000) = 45
+ctzll(ffffe00000000000) = 45
+ctzll(0000400000000000) = 46
+ctzll(ffffc00000000000) = 46
+ctzll(0000800000000000) = 47
+ctzll(ffff800000000000) = 47
+ctzll(0001000000000000) = 48
+ctzll(ffff000000000000) = 48
+ctzll(0002000000000000) = 49
+ctzll(fffe000000000000) = 49
+ctzll(0004000000000000) = 50
+ctzll(fffc000000000000) = 50
+ctzll(0008000000000000) = 51
+ctzll(fff8000000000000) = 51
+ctzll(0010000000000000) = 52
+ctzll(fff0000000000000) = 52
+ctzll(0020000000000000) = 53
+ctzll(ffe0000000000000) = 53
+ctzll(0040000000000000) = 54
+ctzll(ffc0000000000000) = 54
+ctzll(0080000000000000) = 55
+ctzll(ff80000000000000) = 55
+ctzll(0100000000000000) = 56
+ctzll(ff00000000000000) = 56
+ctzll(0200000000000000) = 57
+ctzll(fe00000000000000) = 57
+ctzll(0400000000000000) = 58
+ctzll(fc00000000000000) = 58
+ctzll(0800000000000000) = 59
+ctzll(f800000000000000) = 59
+ctzll(1000000000000000) = 60
+ctzll(f000000000000000) = 60
+ctzll(2000000000000000) = 61
+ctzll(e000000000000000) = 61
+ctzll(4000000000000000) = 62
+ctzll(c000000000000000) = 62
+ctzll(8000000000000000) = 63
+ctzll(8000000000000000) = 63
+fabs(3.141590) = 3.141590
+fabs(-3.141590) = 3.141590
+fabsf(7.250000) = 7.250000
+fabsf(-7.250000) = 7.250000
diff --git a/test/regression/Results/builtins-powerpc b/test/regression/Results/builtins-powerpc
index a3c63ad6..a37bfc63 100644
--- a/test/regression/Results/builtins-powerpc
+++ b/test/regression/Results/builtins-powerpc
@@ -1,16 +1,7 @@
 mulhw(12345678, deadbeef) = fda16776
 mulhwu(12345678, deadbeef) = fd5bdee
-clz(12345678) = 3
-clzll(12345678) = 35
-clzll(1234567812345678) = 3
-clzll(78563412) = 33
-bswap(12345678) = 78563412
-bswap16(1234) = 3412
 fmadd(3.141590, 2.718000, 1.414000) = 9.952842
 fmsub(3.141590, 2.718000, 1.414000) = 7.124842
-fabs(3.141590) = 3.141590
-fabs(-3.141590) = 3.141590
-fsqrt(3.141590) = 1.772453
 frsqrte(3.141590) = OK
 fres(3.141590) = OK
 fsel(3.141590, 2.718000, 1.414000) = 2.718000
diff --git a/test/regression/Results/builtins-riscV b/test/regression/Results/builtins-riscV
index 1576b252..a14b7677 100644
--- a/test/regression/Results/builtins-riscV
+++ b/test/regression/Results/builtins-riscV
@@ -1,12 +1,6 @@
-bswap16(1234) = 3412
-bswap32(12345678) = 78563412
-bswap64(123456789abcdef0) = f0debc9a78563412
 fmadd(3.141590, 2.718000, 1.414000) = 9.952842
 fmsub(3.141590, 2.718000, 1.414000) = 7.124842
 fnmadd(3.141590, 2.718000, 1.414000) = -9.952842
 fnmsub(3.141590, 2.718000, 1.414000) = -7.124842
-fabs(3.141590) = 3.141590
-fabs(-3.141590) = 3.141590
-fsqrt(3.141590) = 1.772453
 fmax(3.141590, 2.718000) = 3.141590
 fmin(3.141590, 2.718000) = 2.718000
diff --git a/test/regression/Results/builtins-x86 b/test/regression/Results/builtins-x86
index 393ac1fd..7cd8838b 100644
--- a/test/regression/Results/builtins-x86
+++ b/test/regression/Results/builtins-x86
@@ -1,15 +1,3 @@
-bswap(12345678) = 78563412
-bswap16(1234) = 3412
-bswap64(123456789abcdef0) = f0debc9a78563412
-clz(12345678) = 3
-clzll(12345678) = 35
-clzll(1234567812345678) = 3
-clzll(78563412) = 33
-ctz(1234) = 2
-ctzll(1234567812345678) = 3
-ctzll(1234567800000000) = 35
-ctzll(78563412) = 1
-fsqrt(3.141590) = 1.772453
 fmin(3.141590, 2.718000) = 2.718000
 fmax(3.141590, 2.718000) = 3.141590
 read_16_rev = 3412
diff --git a/test/regression/builtins-aarch64.c b/test/regression/builtins-aarch64.c
index 2cfa2d09..671f65e3 100644
--- a/test/regression/builtins-aarch64.c
+++ b/test/regression/builtins-aarch64.c
@@ -1,43 +1,25 @@
-/* Fun with builtin functions */
+/* AArch64-specific builtin functions */
 
 #include <stdio.h>
 
 int main(int argc, char ** argv)
 {
-  unsigned int x = 0x12345678;
-  unsigned int y = 0xDEADBEEF;
-  unsigned long long xx = 0x1234567812345678ULL;
-  unsigned long long yy = 0x1234567800000000ULL;
-  unsigned long long zz = 0x123456789ABCDEF0ULL;
-  unsigned z;
   double a = 3.14159;
   double b = 2.718;
   double c = 1.414;
-  unsigned short s = 0x1234;
   signed int u = 1234567;
   signed int v = -9999;
 
-  printf("bswap(%x) = %x\n", x, __builtin_bswap(x));
-  printf("bswap16(%x) = %x\n", s, __builtin_bswap16(s));
-  printf("bswap64(%llx) = %llx\n", zz, __builtin_bswap64(zz));
-  printf("clz(%x) = %d\n", x, __builtin_clz(x));
-  printf("clzll(%llx) = %d\n", (unsigned long long) x, __builtin_clzll(x));
-  printf("clzll(%llx) = %d\n", xx, __builtin_clzll(xx));
   printf("cls(%d) = %d\n", u, __builtin_cls(u));
   printf("cls(%d) = %d\n", v, __builtin_cls(v));
   printf("clsll(%lld) = %d\n", (signed long long) u, __builtin_clsll(u));
   printf("clsll(%lld) = %d\n", (signed long long) v, __builtin_clsll(v));
 
-  printf("fsqrt(%f) = %f\n", a, __builtin_fsqrt(a));
   printf("fmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fmadd(a, b, c));
   printf("fmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fmsub(a, b, c));
   printf("fnmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmadd(a, b, c));
   printf("fnmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmsub(a, b, c));
 
-  /* Make sure that ignoring the result of a builtin
-     doesn't cause an internal error */
-  (void) __builtin_bswap(x);
-  (void) __builtin_fsqrt(a);
   return 0;
 }
 
diff --git a/test/regression/builtins-arm.c b/test/regression/builtins-arm.c
index d06e8e5e..85b611e8 100644
--- a/test/regression/builtins-arm.c
+++ b/test/regression/builtins-arm.c
@@ -1,10 +1,9 @@
-/* Fun with builtins */
+/* ARM-specific builtins */
 
 #include <stdio.h>
 
 unsigned int x = 0x12345678;
 unsigned int y = 0xDEADBEEF;
-unsigned long long xx = 0x1234567812345678ULL;
 double a = 3.14159;
 unsigned short s = 0x1234;
 
@@ -12,13 +11,6 @@ int main(int argc, char ** argv)
 {
   unsigned z;
 
-  printf("bswap(%x) = %x\n", x, __builtin_bswap(x));
-  printf("bswap16(%x) = %x\n", s, __builtin_bswap16(s));
-  printf("clz(%x) = %d\n", x, __builtin_clz(x));
-  printf("clzll(%llx) = %d\n", (unsigned long long) x, __builtin_clzll(x));
-  printf("clzll(%llx) = %d\n", xx, __builtin_clzll(xx));
-  z = __builtin_bswap(x);
-  printf("clzll(%lx) = %d\n", z, __builtin_clzll(z));
   printf("fsqrt(%f) = %f\n", a, __builtin_fsqrt(a));
   
   printf ("read_16_rev = %x\n", __builtin_read16_reversed(&s));
diff --git a/test/regression/builtins-common.c b/test/regression/builtins-common.c
new file mode 100644
index 00000000..72839a61
--- /dev/null
+++ b/test/regression/builtins-common.c
@@ -0,0 +1,58 @@
+/* Builtin functions that are implemented on all target processors */
+
+#include <stdio.h>
+
+unsigned int x = 0x12345678;
+unsigned int y = 0xDEADBEEF;
+unsigned long long xx = 0x123456789ABCDEF0ULL;
+double a = 3.14159;
+double b = 2.718;
+double c = 1.414;
+float f = 7.25;
+unsigned short s = 0x1234;
+
+int main(int argc, char ** argv)
+{
+  printf("bswap(%x) = %x\n", x, __builtin_bswap(x));
+  printf("bswap16(%x) = %x\n", s, __builtin_bswap16(s));
+  printf("bswap64(%llx) = %llx\n", xx, __builtin_bswap64(xx));
+  for (int i = 0; i < 32; i++) {
+    unsigned z = 0xFFFFFFFFU >> i;
+    printf("clz(%08x) = %d\n", z, __builtin_clz(z));
+    z = 0x80000000U >> i;
+    printf("clz(%08x) = %d\n", z, __builtin_clz(z));
+  }
+  for (int i = 0; i < 64; i++) {
+    unsigned long long z = 0xFFFFFFFFFFFFFFFFULL >> i;
+    printf("clzll(%016llx) = %d\n", z, __builtin_clzll(z));
+    z = 0x8000000000000000ULL >> i;
+    printf("clzll(%016llx) = %d\n", z, __builtin_clzll(z));
+  }
+  for (int i = 0; i < 32; i++) {
+    unsigned z = 1U << i;
+    printf("ctz(%08x) = %d\n", z, __builtin_ctz(z));
+    z = 0xFFFFFFFFU << i;
+    printf("ctz(%08x) = %d\n", z, __builtin_ctz(z));
+  }
+  for (int i = 0; i < 64; i++) {
+    unsigned long long z = 1ULL << i;
+    printf("ctzll(%016llx) = %d\n", z, __builtin_ctzll(z));
+    z = 0xFFFFFFFFFFFFFFFFULL << i;
+    printf("ctzll(%016llx) = %d\n", z, __builtin_ctzll(z));
+  }
+  printf("fabs(%f) = %f\n", a, __builtin_fabs(a));
+  printf("fabs(%f) = %f\n", -a, __builtin_fabs(-a));
+  printf("fabsf(%f) = %f\n", f, __builtin_fabsf(f));
+  printf("fabsf(%f) = %f\n", -f, __builtin_fabsf(-f));
+#ifndef __KVX__ // no builtin FSQRT or SQRT on KVX
+  printf("fsqrt(%f) = %f\n", a, __builtin_fsqrt(a));
+  printf("sqrt(%f) = %f\n", a, __builtin_sqrt(a));
+#endif
+
+  /* Make sure that ignoring the result of a builtin
+     doesn't cause an internal error */
+  (void) __builtin_bswap(x);
+  (void) __builtin_fsqrt(a);
+  (void) __builtin_sel(a > 0.0, x, y);
+  return 0;
+}
diff --git a/test/regression/builtins-powerpc.c b/test/regression/builtins-powerpc.c
index 8fd5818b..55809106 100644
--- a/test/regression/builtins-powerpc.c
+++ b/test/regression/builtins-powerpc.c
@@ -1,4 +1,4 @@
-/* Fun with builtins */
+/* PowerPC-specific builtins */
 
 #include <stdio.h>
 #include <math.h>
@@ -11,7 +11,6 @@ char * check_relative_error(double exact, double actual, double precision)
 
 unsigned int x = 0x12345678;
 unsigned int y = 0xDEADBEEF;
-unsigned long long xx = 0x1234567812345678ULL;
 double a = 3.14159;
 double b = 2.718;
 double c = 1.414;
@@ -23,19 +22,9 @@ int main(int argc, char ** argv)
 
   printf("mulhw(%x, %x) = %x\n", x, y, __builtin_mulhw(x, y));
   printf("mulhwu(%x, %x) = %x\n", x, y, __builtin_mulhwu(x, y));
-  printf("clz(%x) = %d\n", x, __builtin_clz(x));
-  printf("clzll(%llx) = %d\n", (unsigned long long) x, __builtin_clzll(x));
-  printf("clzll(%llx) = %d\n", xx, __builtin_clzll(xx));
-  z = __builtin_bswap(x);
-  printf("clzll(%lx) = %d\n", z, __builtin_clzll(z));
-  printf("bswap(%x) = %x\n", x, __builtin_bswap(x));
-  printf("bswap16(%x) = %x\n", s, __builtin_bswap16(s));
 
   printf("fmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fmadd(a, b, c));
   printf("fmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fmsub(a, b, c));
-  printf("fabs(%f) = %f\n", a, __builtin_fabs(a));
-  printf("fabs(%f) = %f\n", -a, __builtin_fabs(-a));
-  printf("fsqrt(%f) = %f\n", a, __builtin_fsqrt(a));
   printf("frsqrte(%f) = %s\n",
          a, check_relative_error(1.0 / sqrt(a), __builtin_frsqrte(a), 1./32.));
   printf("fres(%f) = %s\n",
diff --git a/test/regression/builtins-riscV.c b/test/regression/builtins-riscV.c
index c34fdf2c..acd98e49 100644
--- a/test/regression/builtins-riscV.c
+++ b/test/regression/builtins-riscV.c
@@ -1,4 +1,4 @@
-/* Fun with builtins */
+/* RISC-V specific builtin functions */
 
 #include <stdio.h>
 
@@ -11,20 +11,11 @@ double c = 1.414;
 
 int main(int argc, char ** argv)
 {
-  printf("bswap16(%x) = %x\n", s, __builtin_bswap16(s));
-  printf("bswap32(%x) = %x\n", x, __builtin_bswap32(x));
-  printf("bswap64(%llx) = %llx\n", zz, __builtin_bswap64(zz));
   printf("fmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fmadd(a, b, c));
   printf("fmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fmsub(a, b, c));
   printf("fnmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmadd(a, b, c));
   printf("fnmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmsub(a, b, c));
-  printf("fabs(%f) = %f\n", a, __builtin_fabs(a));
-  printf("fabs(%f) = %f\n", -a, __builtin_fabs(-a));
-  printf("fsqrt(%f) = %f\n", a, __builtin_fsqrt(a));
   printf("fmax(%f, %f) = %f\n", a, b, __builtin_fmax(a, b));
   printf("fmin(%f, %f) = %f\n", a, b, __builtin_fmin(a, b));
-  /* Make sure that ignoring the result of a builtin
-     doesn't cause an internal error */
-  (void) __builtin_fsqrt(a);
   return 0;
 }
diff --git a/test/regression/builtins-x86.c b/test/regression/builtins-x86.c
index 6233f9fd..05d3ab93 100644
--- a/test/regression/builtins-x86.c
+++ b/test/regression/builtins-x86.c
@@ -4,9 +4,6 @@
 
 unsigned int x = 0x12345678;
 unsigned int y = 0xDEADBEEF;
-unsigned long long xx = 0x1234567812345678ULL;
-unsigned long long yy = 0x1234567800000000ULL;
-unsigned long long zz = 0x123456789ABCDEF0ULL;
 double a = 3.14159;
 double b = 2.718;
 double c = 1.414;
@@ -14,22 +11,6 @@ unsigned short s = 0x1234;
 
 int main(int argc, char ** argv)
 {
-  unsigned z;
-
-  printf("bswap(%x) = %x\n", x, __builtin_bswap(x));
-  printf("bswap16(%x) = %x\n", s, __builtin_bswap16(s));
-  printf("bswap64(%llx) = %llx\n", zz, __builtin_bswap64(zz));
-  printf("clz(%x) = %d\n", x, __builtin_clz(x));
-  printf("clzll(%llx) = %d\n", (unsigned long long) x, __builtin_clzll(x));
-  printf("clzll(%llx) = %d\n", xx, __builtin_clzll(xx));
-  z = __builtin_bswap(x);
-  printf("clzll(%lx) = %d\n", z, __builtin_clzll(z));
-  printf("ctz(%x) = %d\n", s, __builtin_ctz(s));
-  printf("ctzll(%llx) = %d\n", xx, __builtin_ctzll(xx));
-  printf("ctzll(%llx) = %d\n", yy, __builtin_ctzll(yy));
-  printf("ctzll(%lx) = %d\n", z, __builtin_ctzll(z));
-
-  printf("fsqrt(%f) = %f\n", a, __builtin_fsqrt(a));
   printf("fmin(%f, %f) = %f\n", a, b, __builtin_fmin(a, b));
   printf("fmax(%f, %f) = %f\n", a, b, __builtin_fmax(a, b));
 
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index 1fa5ad28..ddb3c21a 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -11,64 +11,83 @@ David Monniaux, CNRS, VERIMAG
 type is_partial = TOTAL | PARTIAL;;
 type print_result = Noprint | Print of string;;
 type when_triggered = Always | Option of string;;
+type needs_require = Require | NoRequire;;
 
+(* FIXME - The gestion of NoRequire is a bit ugly right now. *)
 let rtl_passes =
 [|
-TOTAL, (Option "optim_tailcalls"), (Some "Tail calls"), "Tailcall";
-PARTIAL, Always, (Some "Inlining"), "Inlining";
-TOTAL, (Option "profile_arcs"), (Some "Profiling insertion"), "Profiling";
-TOTAL, (Option "branch_probabilities"), (Some "Profiling use"), "ProfilingExploit";
-TOTAL, (Option "optim_move_loop_invariants"), (Some "Inserting initial nop"), "FirstNop";
-TOTAL, Always, (Some "Renumbering"), "Renumber";
-PARTIAL, (Option "optim_duplicate"),  (Some "Tail-duplicating"), "Duplicate";
-TOTAL, Always, (Some "Renumbering pre constprop"), "Renumber";
-TOTAL, (Option "optim_constprop"), (Some "Constant propagation"), "Constprop";
-TOTAL, Always, (Some "Renumbering pre CSE"), "Renumber";
-PARTIAL, (Option "optim_CSE"), (Some "CSE"), "CSE";
-TOTAL, (Option "optim_CSE2"), (Some "CSE2"), "CSE2";
-PARTIAL, (Option "optim_CSE3"), (Some "CSE3"), "CSE3";
-TOTAL, (Option "optim_forward_moves"), (Some "Forwarding moves"), "ForwardMoves";
-PARTIAL, (Option "optim_redundancy"), (Some "Redundancy elimination"), "Deadcode";
-PARTIAL, (Option "optim_move_loop_invariants"), (Some "LICM"), "LICM";
-TOTAL, (Option "optim_move_loop_invariants"), (Some "Renumbering for LICM"), "Renumber";
-PARTIAL, (Option "optim_move_loop_invariants"), (Some "CSE3 for LICM"), "CSE3";
-PARTIAL, (Option "optim_move_loop_invariants"), (Some "Redundancy elimination for LICM"), "Deadcode";
-TOTAL, (Option "all_loads_nontrap"), None, "Allnontrap";
-PARTIAL, Always, (Some "Unused globals"), "Unusedglob"
+TOTAL, (Option "optim_tailcalls"), Require, (Some "Tail calls"), "Tailcall";
+PARTIAL, Always, Require, (Some "Inlining"), "Inlining";
+TOTAL, (Option "profile_arcs"), Require, (Some "Profiling insertion"), "Profiling";
+TOTAL, (Option "branch_probabilities"), Require, (Some "Profiling use"), "ProfilingExploit";
+TOTAL, (Option "optim_move_loop_invariants"), Require, (Some "Inserting initial nop"), "FirstNop";
+TOTAL, Always, Require, (Some "Renumbering"), "Renumber";
+PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE";
+PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict";
+PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle";
+TOTAL, Always, NoRequire, (Some "Renumbering pre tail duplication"), "Renumber";
+PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate";
+TOTAL, Always, NoRequire, (Some "Renumbering pre unrolling"), "Renumber";
+PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody";
+TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber";
+TOTAL, (Option "optim_constprop"), Require, (Some "Constant propagation"), "Constprop";
+TOTAL, Always, NoRequire, (Some "Renumbering pre CSE"), "Renumber";
+PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE";
+TOTAL, (Option "optim_CSE2"), Require, (Some "CSE2"), "CSE2";
+PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3";
+TOTAL, (Option "optim_CSE3"), Require, (Some "Kill useless moves after CSE3"), "KillUselessMoves";
+TOTAL, (Option "optim_forward_moves"), Require, (Some "Forwarding moves"), "ForwardMoves";
+PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode";
+TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber";
+PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate";
+TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber";
+PARTIAL, (Option "optim_move_loop_invariants"), Require, (Some "LICM"), "LICM";
+TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber";
+PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"), "CSE3";
+PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode";
+TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap";
+PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob"
 |];;
 
 let post_rtl_passes =
 [|
-  PARTIAL, Always, (Some "Register allocation"), "Allocation", (Print "LTL");
-  TOTAL, Always, (Some "Branch tunneling"), "Tunneling", Noprint;
-  PARTIAL, Always, (Some "CFG linearization"), "Linearize", Noprint;
-  TOTAL, Always, (Some "Label cleanup"), "CleanupLabels", Noprint;
-  PARTIAL, (Option "debug"), (Some "Debugging info for local variables"), "Debugvar", Noprint;
-  PARTIAL, Always, (Some "Mach generation"), "Stacking", (Print "Mach")
+  PARTIAL, Always, Require, (Some "RTLpath generation"), "RTLpathLivegen", Noprint;
+  PARTIAL, Always, Require, (Some "Prepass scheduling"), "RTLpathScheduler", Noprint;
+  TOTAL, Always, Require,  (Some "Projection to RTL"), "RTLpath", (Print (Printf.sprintf "RTL %d" ((Array.length rtl_passes) + 1)));
+  PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL 1");
+  PARTIAL, Always, Require, (Some "Branch tunneling"), "Tunneling", (Print "LTL 2");
+  PARTIAL, Always, Require, (Some "CFG linearization"), "Linearize", Noprint;
+  TOTAL, Always, Require, (Some "Label cleanup"), "CleanupLabels", Noprint;
+  PARTIAL, (Option "debug"), Require, (Some "Debugging info for local variables"), "Debugvar", Noprint;
+  PARTIAL, Always, Require, (Some "Mach generation"), "Stacking", (Print "Mach")
 |];;
 
 let all_passes =
   Array.concat
     [Array.mapi
-       (fun i (a,b,c,d) -> (a,b,c,d, Print (Printf.sprintf "RTL %d" (i+1))))
+       (fun i (a,b,r,c,d) -> (a,b,r,c,d, Print (Printf.sprintf "RTL %d" (i+1))))
        rtl_passes;
      post_rtl_passes];;
 
 let totality = function TOTAL -> "total" | PARTIAL -> "partial";;
 
 let print_rtl_require oc =
-  Array.iter (fun (partial, trigger, time_label, pass_name, printing) ->
-      Printf.fprintf oc "Require %s.\n" pass_name)
-    all_passes;;
+  Array.iter (fun (partial, trigger, require, time_label, pass_name, printing) ->
+      match require with Require -> 
+        Printf.fprintf oc "Require %s.\n" pass_name
+      | _ -> ()
+    ) all_passes;;
 
 let print_rtl_require_proof oc =
-  Array.iter (fun (partial, trigger, time_label, pass_name, printing) ->
-      Printf.fprintf oc "Require %sproof.\n" pass_name)
-    all_passes;;
+  Array.iter (fun (partial, trigger, require, time_label, pass_name, printing) ->
+      match require with Require -> 
+      Printf.fprintf oc "Require %sproof.\n" pass_name
+      | _ -> ()
+    ) all_passes;;
 
 let print_rtl_transf oc =
   Array.iteri
-    (fun i (partial, trigger, time_label, pass_name, printing) ->
+    (fun i (partial, trigger, require, time_label, pass_name, printing) ->
       output_string oc (match partial with
                         | TOTAL -> "   @@ "
                         | PARTIAL -> "  @@@ ");
@@ -89,7 +108,7 @@ let print_rtl_transf oc =
     ) all_passes;;
 
 let print_rtl_mkpass oc =
-  Array.iter (fun (partial, trigger, time_label, pass_name, printing) ->
+  Array.iter (fun (partial, trigger, require, time_label, pass_name, printing) ->
       output_string oc "  ::: mkpass (";
       (match trigger with
        | Always -> ()
@@ -105,7 +124,7 @@ let print_if kind oc = function
 let numbering_base = 7
                    
 let print_rtl_proof oc =
-  Array.iteri (fun i (partial, trigger, time_label, pass_name, printing) ->
+  Array.iteri (fun i (partial, trigger, require, time_label, pass_name, printing) ->
       let j = i+numbering_base in
       match partial with
       | TOTAL ->
@@ -117,7 +136,7 @@ let print_rtl_proof oc =
     all_passes;;
 
 let print_rtl_proof2 oc =
-  Array.iteri (fun i (partial, trigger, time_label, pass_name, printing) ->
+  Array.iteri (fun i (partial, trigger, require, time_label, pass_name, printing) ->
       let j = i+numbering_base in
       Printf.fprintf oc "  exists p%d; split. " j;
       (match trigger with
@@ -130,7 +149,7 @@ let print_rtl_proof2 oc =
     all_passes;;
 
 let print_rtl_forward_simulations oc =
-  Array.iter (fun (partial, trigger, time_label, pass_name) ->
+  Array.iter (fun (partial, trigger, require, time_label, pass_name) ->
       output_string oc "  eapply compose_forward_simulations.\n    ";
       (match trigger with
        | Always -> ()
diff --git a/tools/modorder.ml b/tools/modorder.ml
index d1203568..7ca6a9e9 100644
--- a/tools/modorder.ml
+++ b/tools/modorder.ml
@@ -84,7 +84,7 @@ let filename_suffix s =
 let emit_dependencies deps targets =
 
   let rec dsort target suff =
-    match (try Some(Hashtbl.find deps target) with Not_found -> None) with
+    match Hashtbl.find_opt deps target with
     | None -> ()
     | Some node ->
         match node.status with
diff --git a/x86/Asm.v b/x86/Asm.v
index 58e28c40..33f1f2ad 100644
--- a/x86/Asm.v
+++ b/x86/Asm.v
@@ -279,6 +279,7 @@ Inductive instruction: Type :=
   | Pmaxsd (rd: freg) (r2: freg)
   | Pminsd (rd: freg) (r2: freg)
   | Pmovb_rm (rd: ireg) (a: addrmode)
+  | Pmovq_rf (rd: ireg) (r1: freg)
   | Pmovsq_mr  (a: addrmode) (rs: freg)
   | Pmovsq_rm (rd: freg) (a: addrmode)
   | Pmovsb
@@ -998,6 +999,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pmaxsd _ _
   | Pminsd _ _
   | Pmovb_rm _ _
+  | Pmovq_rf _ _
   | Pmovsq_rm _ _
   | Pmovsq_mr _ _
   | Pmovsb
diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml
index ad667e3d..20f5d170 100644
--- a/x86/Asmexpand.ml
+++ b/x86/Asmexpand.ml
@@ -44,7 +44,7 @@ let stack_alignment () = 16
 let _Plea (r, addr) =
   if Archi.ptr64 then Pleaq (r, addr) else Pleal (r, addr)
 
-(* SP adjustment to allocate or free a stack frame *)
+(* SP adjustment to allocate or free a stack frame. *)
 
 let align n a =
   if n >= 0 then (n + a - 1) land (-a) else n land (-a)
@@ -56,7 +56,7 @@ let sp_adjustment_32 sz =
   (* The top 4 bytes have already been allocated by the "call" instruction. *)
   sz - 4
 
-let sp_adjustment_64 sz =
+let sp_adjustment_elf64 sz =
   let sz = Z.to_int sz in
   if is_current_function_variadic() then begin
     (* If variadic, add room for register save area, which must be 16-aligned *)
@@ -73,6 +73,13 @@ let sp_adjustment_64 sz =
     (sz - 8, -1)
   end
 
+let sp_adjustment_win64 sz =
+  let sz = Z.to_int sz in
+  (* Preserve proper alignment of the stack *)
+  let sz = align sz 16 in
+  (* The top 8 bytes have already been allocated by the "call" instruction. *)
+  sz - 8
+
 (* Built-ins.  They come in two flavors:
    - annotation statements: take their arguments in registers or stack
    locations; generate no code;
@@ -256,7 +263,7 @@ let expand_builtin_va_start_32 r =
   emit (Pleal (RAX, linear_addr RSP (Z.of_uint32 ofs)));
   emit (Pmovl_mr (linear_addr r _0z, RAX))
 
-let expand_builtin_va_start_64 r =
+let expand_builtin_va_start_elf64 r =
   if not (is_current_function_variadic ()) then
     invalid_arg "Fatal error: va_start used in non-vararg function";
   let (ir, fr, ofs) =
@@ -287,6 +294,17 @@ let expand_builtin_va_start_64 r =
   emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 reg_save_area)));
   emit (Pmovq_mr (linear_addr r _16z, RAX))
 
+let expand_builtin_va_start_win64 r =
+  if not (is_current_function_variadic ()) then
+    invalid_arg "Fatal error: va_start used in non-vararg function";
+  let num_args =
+    List.length (get_current_function_args()) in
+  let ofs =
+    Int64.(add !current_function_stacksize
+               (mul 8L (of_int num_args))) in
+  emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 ofs)));
+  emit (Pmovq_mr (linear_addr r _0z, RAX))
+
 (* FMA operations *)
 
 (*   vfmadd<i><j><k> r1, r2, r3   performs r1 := ri * rj + rk
@@ -378,11 +396,7 @@ let expand_builtin_inline name args res =
      emit (Paddl_ri(res, coqint_of_camlint 32l));
      emit (Plabel lbl2)
   (* Float arithmetic *)
-  | "__builtin_fabs", [BA(FR a1)], BR(FR res) ->
-     if a1 <> res then
-       emit (Pmovsd_ff (res,a1));
-     emit (Pabsd res) (* This ensures that need_masks is set to true *)
-  | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) ->
+  | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) ->
      emit (Psqrtsd (res,a1))
   | "__builtin_fmax", [BA(FR a1); BA(FR a2)], BR(FR res) ->
      if res = a1 then
@@ -467,8 +481,8 @@ let expand_builtin_inline name args res =
   (* Vararg stuff *)
   | "__builtin_va_start", [BA(IR a)], _ ->
      assert (a = RDX);
-     if Archi.ptr64
-     then expand_builtin_va_start_64 a
+     if Archi.win64 then expand_builtin_va_start_win64 a
+     else if Archi.ptr64 then expand_builtin_va_start_elf64 a
      else expand_builtin_va_start_32 a
   (* Synchronization *)
   | "__builtin_membar", [], _ ->
@@ -480,24 +494,66 @@ let expand_builtin_inline name args res =
   | _ ->
      raise (Error ("unrecognized builtin " ^ name))
 
-(* Calls to variadic functions for x86-64: register AL must contain
+(* Calls to variadic functions for x86-64 ELF: register AL must contain
    the number of XMM registers used for parameter passing.  To be on
-   the safe side.  do the same if the called function is
+   the safe side, do the same if the called function is
    unprototyped. *)
 
-let set_al sg =
-  if Archi.ptr64 && (sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto) then begin
+let fixup_funcall_elf64 sg =
+  if sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto then begin
     let (ir, fr, ofs) = next_arg_locations 0 0 0 sg.sig_args in
     emit (Pmovl_ri (RAX, coqint_of_camlint (Int32.of_int fr)))
   end
 
+(* Calls to variadic functions for x86-64 Windows:
+   FP arguments passed in FP registers must also be passed in integer
+   registers.
+*)
+
+let rec copy_fregs_to_iregs args fr ir =
+  match (ir, fr, args) with
+  | (i1 :: ir, f1 :: fr, (Tfloat | Tsingle) :: args) ->
+      emit (Pmovq_rf (i1, f1));
+      copy_fregs_to_iregs args fr ir
+  | (i1 :: ir, f1 :: fr, _ :: args) ->
+      copy_fregs_to_iregs args fr ir
+  | _ ->
+      ()
+
+let fixup_funcall_win64 sg =
+  if sg.sig_cc.cc_vararg then
+    copy_fregs_to_iregs sg.sig_args [XMM0; XMM1; XMM2; XMM3] [RCX; RDX; R8; R9]
+
+let fixup_funcall sg =
+  if Archi.ptr64
+  then if Archi.win64
+       then fixup_funcall_win64 sg
+       else fixup_funcall_elf64 sg
+  else ()
+
 (* Expansion of instructions *)
 
 let expand_instruction instr =
   match instr with
   | Pallocframe (sz, ofs_ra, ofs_link) ->
-     if Archi.ptr64 then begin
-       let (sz, save_regs) = sp_adjustment_64 sz in
+     if Archi.win64 then begin
+       let sz = sp_adjustment_win64 sz in
+       if is_current_function_variadic() then
+         (* Save parameters passed in registers in reserved stack area *)
+         emit (Pcall_s (intern_string "__compcert_va_saveregs",
+                        {sig_args = []; sig_res = Tvoid; sig_cc = cc_default}));
+       (* Allocate frame *)
+       let sz' = Z.of_uint sz in
+       emit (Psubl_ri (RSP, sz'));
+       emit (Pcfi_adjust sz');
+       (* Stack chaining *)
+       let addr1 = linear_addr RSP (Z.of_uint (sz + 8)) in
+       let addr2 = linear_addr RSP ofs_link in
+       emit (Pleaq (RAX,addr1));
+       emit (Pmovq_mr (addr2, RAX));
+       current_function_stacksize := Int64.of_int (sz + 8)
+     end else if Archi.ptr64 then begin
+       let (sz, save_regs) = sp_adjustment_elf64 sz in
        (* Allocate frame *)
        let sz' = Z.of_uint sz in
        emit (Psubq_ri (RSP, sz'));
@@ -529,15 +585,18 @@ let expand_instruction instr =
        PrintAsmaux.current_function_stacksize := Int32.of_int sz
      end
   | Pfreeframe(sz, ofs_ra, ofs_link) ->
-     if Archi.ptr64 then begin
-       let (sz, _) = sp_adjustment_64 sz in
+     if Archi.win64 then begin
+       let sz = sp_adjustment_win64 sz in
+       emit (Paddq_ri (RSP, Z.of_uint sz))
+     end else if Archi.ptr64 then begin
+       let (sz, _) = sp_adjustment_elf64 sz in
        emit (Paddq_ri (RSP, Z.of_uint sz))
      end else begin
        let sz = sp_adjustment_32 sz in
        emit (Paddl_ri (RSP, Z.of_uint sz))
      end
   | Pjmp_s(_, sg) | Pjmp_r(_, sg) | Pcall_s(_, sg) | Pcall_r(_, sg) ->
-     set_al sg;
+     fixup_funcall sg;
      emit instr
   | Pbuiltin (ef,args, res) ->
      begin
diff --git a/x86/CBuiltins.ml b/x86/CBuiltins.ml
index e7f714c7..a16f3ef7 100644
--- a/x86/CBuiltins.ml
+++ b/x86/CBuiltins.ml
@@ -19,8 +19,12 @@ open C
 
 let (va_list_type, va_list_scalar, size_va_list) =
   if Archi.ptr64 then
-    (* Actually a struct passed by reference; equivalent to 3 64-bit words *)
-    (TArray(TInt(IULong, []), Some 3L, []), false, 3*8)
+    if Archi.win64 then
+      (* Just a pointer *)
+      (TPtr(TVoid [], []), true, 8)
+    else
+      (* Actually a struct passed by reference; equivalent to 3 64-bit words *)
+      (TArray(TInt(IULong, []), Some 3L, []), false, 3*8)
   else
     (* Just a pointer *)
     (TPtr(TVoid [], []), true, 4)
@@ -30,19 +34,6 @@ let builtins = {
     "__builtin_va_list", va_list_type;
   ];
   builtin_functions = [
-    (* Integer arithmetic *)
-    "__builtin_clz",
-      (TInt(IInt, []), [TInt(IUInt, [])], false);
-    "__builtin_clzl",
-      (TInt(IInt, []), [TInt(IULong, [])], false);
-    "__builtin_clzll",
-      (TInt(IInt, []), [TInt(IULongLong, [])], false);
-    "__builtin_ctz",
-      (TInt(IInt, []), [TInt(IUInt, [])], false);
-    "__builtin_ctzl",
-      (TInt(IInt, []), [TInt(IULong, [])], false);
-    "__builtin_ctzll",
-      (TInt(IInt, []), [TInt(IULongLong, [])], false);
     (* Float arithmetic *)
     "__builtin_fmax",
       (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
diff --git a/x86/CSE2deps.v b/x86/CSE2deps.v
index a4b47a5c..757966b8 100644
--- a/x86/CSE2deps.v
+++ b/x86/CSE2deps.v
@@ -32,5 +32,7 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
     if peq symb symb'
     then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
     else false
+  | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+    negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
   | _, _, _, _ => true
   end.
diff --git a/x86/CSE2depsproof.v b/x86/CSE2depsproof.v
index fd088962..e181b8f4 100644
--- a/x86/CSE2depsproof.v
+++ b/x86/CSE2depsproof.v
@@ -20,11 +20,79 @@ Require Import Registers Op RTL.
 Require Import CSE2 CSE2deps.
 Require Import Lia.
 
+Lemma ptrofs_modulus :
+  Ptrofs.modulus = if Archi.ptr64
+                   then 18446744073709551616
+                   else 4294967296.
+Proof.
+  reflexivity.
+Qed.
+
 Section SOUNDNESS.
   Variable F V : Type.
   Variable genv: Genv.t F V.
   Variable sp : val.
 
+Section STACK_WRITE.
+  Variable m m2 : mem.
+  Variable chunkw chunkr : memory_chunk.
+  
+  Variable addrw addrr valw : val.
+  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+  Section INDEXED_AWAY.
+  Variable ofsw ofsr : ptrofs.
+  Hypothesis ADDRW : eval_addressing genv sp
+                       (Ainstack ofsw) nil = Some addrw.
+  Hypothesis ADDRR : eval_addressing genv sp
+                       (Ainstack ofsr) nil = Some addrr.
+
+  Lemma stack_load_store_away1 :
+    forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+    forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+    forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intros.
+    
+    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+    unfold largest_size_chunk in *.
+
+    inv ADDRR.
+    inv ADDRW.
+
+    destruct sp; try discriminate.
+    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+    exact STORE.
+    right.
+
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+              rewrite OFSR).
+    all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+              rewrite OFSW).
+    all: try rewrite ptrofs_modulus in *.
+    all: destruct Archi.ptr64.
+
+    all: intuition lia.
+  Qed.
+  
+  Theorem stack_load_store_away :
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+  Proof.
+    intro SWAP.
+    unfold can_swap_accesses_ofs in SWAP.
+    repeat rewrite andb_true_iff in SWAP.
+    repeat rewrite orb_true_iff in SWAP.
+    repeat rewrite Z.leb_le in SWAP.
+    apply stack_load_store_away1.
+    all: tauto.
+  Qed.
+  End INDEXED_AWAY.
+End STACK_WRITE.
+
 Section MEMORY_WRITE.
   Variable m m2 : mem.
   Variable chunkw chunkr : memory_chunk.
@@ -237,7 +305,7 @@ Proof.
   intros until rs.
   intros ADDR ADDR' OVERLAP STORE.
   destruct addr; destruct addr'; try discriminate.
-  { (* Aindexed / Aindexed *)
+- (* Aindexed / Aindexed *)
   destruct args as [ | base [ | ]]. 1,3: discriminate.
   destruct args' as [ | base' [ | ]]. 1,3: discriminate.
   simpl in OVERLAP.
@@ -247,8 +315,7 @@ Proof.
   2: discriminate.
   simpl in *.
   eapply load_store_away; eassumption.
-  }
-  { (* Aglobal / Aglobal *)
+- (* Aglobal / Aglobal *)
     destruct args. 2: discriminate.
     destruct args'. 2: discriminate.
     simpl in *.
@@ -259,7 +326,14 @@ Proof.
       eapply load_store_glob_away; eassumption.
     }
     eapply load_store_diff_globals; eassumption.
-  }
+- (* Ainstack / Ainstack *)
+  destruct args. 2: discriminate.
+  destruct args'. 2: discriminate.
+  cbn in OVERLAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+  2: discriminate.
+  cbn in *.
+  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/x86/ConstpropOp.vp b/x86/ConstpropOp.vp
index ada8d54a..dd4b839a 100644
--- a/x86/ConstpropOp.vp
+++ b/x86/ConstpropOp.vp
@@ -17,11 +17,10 @@ Require Import Coqlib Compopts.
 Require Import AST Integers Floats.
 Require Import Op Registers.
 Require Import ValueDomain ValueAOp.
+Require SelectOp.
 
 (** * Converting known values to constants *)
 
-Parameter symbol_is_external: ident -> bool.  (**r See [SelectOp] *)
-
 Definition Olea_ptr (a: addressing) := if Archi.ptr64 then Oleal a else Olea a.
 
 Definition const_for_result (a: aval) : option operation :=
@@ -31,7 +30,7 @@ Definition const_for_result (a: aval) : option operation :=
   | F n => if Compopts.generate_float_constants tt then Some(Ofloatconst n) else None
   | FS n => if Compopts.generate_float_constants tt then Some(Osingleconst n) else None
   | Ptr(Gl id ofs) =>
-      if symbol_is_external id then
+      if SelectOp.symbol_is_external id then
         if Ptrofs.eq ofs Ptrofs.zero then Some (Oindirectsymbol id) else None
       else
         Some (Olea_ptr (Aglobal id ofs))
diff --git a/x86/ConstpropOpproof.v b/x86/ConstpropOpproof.v
index 6d2df9c1..82179fa4 100644
--- a/x86/ConstpropOpproof.v
+++ b/x86/ConstpropOpproof.v
@@ -107,7 +107,7 @@ Proof.
 - (* pointer *)
   destruct p; try discriminate; SimplVM.
   + (* global *)
-    destruct (symbol_is_external id).
+    destruct (SelectOp.symbol_is_external id).
   * revert H2; predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero; intros EQ; inv EQ.
     exists (Genv.symbol_address ge id Ptrofs.zero); auto.
   * inv H2. exists (Genv.symbol_address ge id ofs); split.
diff --git a/x86/Conventions1.v b/x86/Conventions1.v
index d9f5b8fa..b4cb233e 100644
--- a/x86/Conventions1.v
+++ b/x86/Conventions1.v
@@ -32,40 +32,44 @@ Definition is_callee_save (r: mreg) : bool :=
   match r with
   | AX | CX | DX => false
   | BX | BP => true
-  | SI | DI => negb Archi.ptr64 (**r callee-save in 32 bits but not in 64 bits *)
+  | SI | DI => negb Archi.ptr64 || Archi.win64 (**r callee-save in ELF 64 bits *)
   | R8 | R9 | R10 | R11 => false
   | R12 | R13 | R14 | R15 => true
   | X0 | X1 | X2 | X3 | X4 | X5 | X6 | X7 => false
-  | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 => false
+  | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 => Archi.win64
   | FP0 => false
   end.
 
 Definition int_caller_save_regs :=
   if Archi.ptr64
-  then AX :: CX :: DX :: SI :: DI :: R8 :: R9 :: R10 :: R11 :: nil
+  then if Archi.win64
+       then AX :: CX :: DX :: R8 :: R9 :: R10 :: R11 :: nil
+       else AX :: CX :: DX :: SI :: DI :: R8 :: R9 :: R10 :: R11 :: nil
   else AX :: CX :: DX :: nil.
 
 Definition float_caller_save_regs :=
   if Archi.ptr64
-  then X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 ::
-       X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil
+  then if Archi.win64
+       then X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: nil
+       else X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 ::
+            X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil
   else X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
 
 Definition int_callee_save_regs :=
   if Archi.ptr64
-  then BX :: BP :: R12 :: R13 :: R14 :: R15 :: nil
+  then if Archi.win64
+       then BX :: SI :: DI :: BP :: R12 :: R13 :: R14 :: R15 :: nil
+       else BX :: BP :: R12 :: R13 :: R14 :: R15 :: nil
   else BX :: SI :: DI :: BP :: nil.
 
-Definition float_callee_save_regs : list mreg := nil.
+Definition float_callee_save_regs :=
+  if Archi.ptr64 && Archi.win64
+  then X6 :: X7 :: X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil
+  else nil.
 
 Definition destroyed_at_call :=
   List.filter (fun r => negb (is_callee_save r)) all_mregs.
 
-Definition dummy_int_reg := AX.     (**r Used in [Regalloc]. *)
-Definition dummy_float_reg := X0.   (**r Used in [Regalloc]. *)
-
-Definition callee_save_type := mreg_type.
-  
 Definition is_float_reg (r: mreg) :=
   match r with
   | AX | BX | CX | DX | SI | DI | BP
@@ -74,6 +78,11 @@ Definition is_float_reg (r: mreg) :=
   | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 | FP0 => true
   end.
 
+Definition dummy_int_reg := AX.     (**r Used in [Regalloc]. *)
+Definition dummy_float_reg := X0.   (**r Used in [Regalloc]. *)
+
+Definition callee_save_type := mreg_type.
+
 (** * Function calling conventions *)
 
 (** The functions in this section determine the locations (machine registers
@@ -182,7 +191,7 @@ Fixpoint loc_arguments_32
       :: loc_arguments_32 tys (ofs + typesize ty)
   end.
 
-(** In the x86-64 ABI:
+(** In the x86-64 ELF ABI:
 - The first 6 integer arguments are passed in registers [DI], [SI], [DX], [CX], [R8], [R9].
 - The first 8 floating-point arguments are passed in registers [X0] to [X7].
 - Extra arguments are passed on the stack, in [Outgoing] slots.
@@ -190,26 +199,62 @@ Fixpoint loc_arguments_32
   of data is used in a slot.
 *)
 
-Definition int_param_regs := DI :: SI :: DX :: CX :: R8 :: R9 :: nil.
-Definition float_param_regs := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
+Definition int_param_regs_elf64 := DI :: SI :: DX :: CX :: R8 :: R9 :: nil.
+Definition float_param_regs_elf64 := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
 
-Fixpoint loc_arguments_64
+Fixpoint loc_arguments_elf64
     (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list (rpair loc) :=
   match tyl with
   | nil => nil
   | (Tint | Tlong | Tany32 | Tany64) as ty :: tys =>
-      match list_nth_z int_param_regs ir with
+      match list_nth_z int_param_regs_elf64 ir with
       | None =>
-          One (S Outgoing ofs ty) :: loc_arguments_64 tys ir fr (ofs + 2)
+          One (S Outgoing ofs ty) :: loc_arguments_elf64 tys ir fr (ofs + 2)
       | Some ireg =>
-          One (R ireg) :: loc_arguments_64 tys (ir + 1) fr ofs
+          One (R ireg) :: loc_arguments_elf64 tys (ir + 1) fr ofs
       end
   | (Tfloat | Tsingle) as ty :: tys =>
-      match list_nth_z float_param_regs fr with
+      match list_nth_z float_param_regs_elf64 fr with
       | None =>
-          One (S Outgoing ofs ty) :: loc_arguments_64 tys ir fr (ofs + 2)
+          One (S Outgoing ofs ty) :: loc_arguments_elf64 tys ir fr (ofs + 2)
       | Some freg =>
-          One (R freg) :: loc_arguments_64 tys ir (fr + 1) ofs
+          One (R freg) :: loc_arguments_elf64 tys ir (fr + 1) ofs
+      end
+  end.
+
+(** In the x86-64 Win64 ABI:
+- The first 4 arguments are passed in registers [RCX], [RDX], [R8], and [R9]
+  (for integer arguments) and [X0] to [X3] (for floating-point arguments).
+  Each argument "burns" both an integer register and a FP integer.
+- The first 8 floating-point arguments are passed in registers [X0] to [X7].
+- Extra arguments are passed on the stack, in [Outgoing] slots.
+  Consecutive stack slots are separated by 8 bytes, even if only 4 bytes
+  of data is used in a slot.
+- Four 8-byte words are always reserved at the bottom of the outgoing area
+  so that the callee can use them to save the registers containing the
+  first four arguments.  This is handled in the Stacking phase.
+*)
+
+Definition int_param_regs_win64 := CX :: DX :: R8 :: R9 :: nil.
+Definition float_param_regs_win64 := X0 :: X1 :: X2 :: X3 :: nil.
+
+Fixpoint loc_arguments_win64
+    (tyl: list typ) (r ofs: Z) {struct tyl} : list (rpair loc) :=
+  match tyl with
+  | nil => nil
+  | (Tint | Tlong | Tany32 | Tany64) as ty :: tys =>
+      match list_nth_z int_param_regs_win64 r with
+      | None =>
+          One (S Outgoing ofs ty) :: loc_arguments_win64 tys r (ofs + 2)
+      | Some ireg =>
+          One (R ireg) :: loc_arguments_win64 tys (r + 1) ofs
+      end
+  | (Tfloat | Tsingle) as ty :: tys =>
+      match list_nth_z float_param_regs_win64 r with
+      | None =>
+          One (S Outgoing ofs ty) :: loc_arguments_win64 tys r (ofs + 2)
+      | Some freg =>
+          One (R freg) :: loc_arguments_win64 tys (r + 1) ofs
       end
   end.
 
@@ -218,7 +263,9 @@ Fixpoint loc_arguments_64
 
 Definition loc_arguments (s: signature) : list (rpair loc) :=
   if Archi.ptr64
-  then loc_arguments_64 s.(sig_args) 0 0 0
+  then if Archi.win64
+       then loc_arguments_win64 s.(sig_args) 0 0
+       else loc_arguments_elf64 s.(sig_args) 0 0 0
   else loc_arguments_32 s.(sig_args) 0.
 
 (** Argument locations are either caller-save registers or [Outgoing]
@@ -237,9 +284,16 @@ Definition loc_argument_32_charact (ofs: Z) (l: loc) : Prop :=
   | _ => False
   end.
 
-Definition loc_argument_64_charact (ofs: Z) (l: loc) : Prop :=
+Definition loc_argument_elf64_charact (ofs: Z) (l: loc) : Prop :=
+  match l with
+  | R r => In r int_param_regs_elf64 \/ In r float_param_regs_elf64
+  | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs')
+  | _ => False
+  end.
+
+Definition loc_argument_win64_charact (ofs: Z) (l: loc) : Prop :=
   match l with
-  | R r => In r int_param_regs \/ In r float_param_regs
+  | R r => In r int_param_regs_win64 \/ In r float_param_regs_win64
   | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs')
   | _ => False
   end.
@@ -259,37 +313,75 @@ Proof.
 * destruct H; split; eapply X; eauto; omega.
 Qed.
 
-Remark loc_arguments_64_charact:
+Remark loc_arguments_elf64_charact:
   forall tyl ir fr ofs p,
-  In p (loc_arguments_64 tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_64_charact ofs) p.
+  In p (loc_arguments_elf64 tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_elf64_charact ofs) p.
 Proof.
-  assert (X: forall ofs1 ofs2 l, loc_argument_64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_64_charact ofs1 l).
+  assert (X: forall ofs1 ofs2 l, loc_argument_elf64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_elf64_charact ofs1 l).
   { destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. }
-  assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_64_charact ofs1) p).
+  assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_elf64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_elf64_charact ofs1) p).
   { destruct p; simpl; intuition eauto. }
   assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)).
   { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. }
 Opaque list_nth_z.
-  induction tyl; simpl loc_arguments_64; intros.
+  induction tyl; simpl loc_arguments_elf64; intros.
   elim H.
   assert (A: forall ty, In p
-      match list_nth_z int_param_regs ir with
-      | Some ireg => One (R ireg) :: loc_arguments_64 tyl (ir + 1) fr ofs
-      | None => One (S Outgoing ofs ty) :: loc_arguments_64 tyl ir fr (ofs + 2)
+      match list_nth_z int_param_regs_elf64 ir with
+      | Some ireg => One (R ireg) :: loc_arguments_elf64 tyl (ir + 1) fr ofs
+      | None => One (S Outgoing ofs ty) :: loc_arguments_elf64 tyl ir fr (ofs + 2)
       end ->
-      forall_rpair (loc_argument_64_charact ofs) p).
-  { intros. destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H1.
+      forall_rpair (loc_argument_elf64_charact ofs) p).
+  { intros. destruct (list_nth_z int_param_regs_elf64 ir) as [r|] eqn:E; destruct H1.
     subst. left. eapply list_nth_z_in; eauto.
     eapply IHtyl; eauto.
     subst. split. omega. assumption.
     eapply Y; eauto. omega. }
   assert (B: forall ty, In p
-      match list_nth_z float_param_regs fr with
-      | Some ireg => One (R ireg) :: loc_arguments_64 tyl ir (fr + 1) ofs
-      | None => One (S Outgoing ofs ty) :: loc_arguments_64 tyl ir fr (ofs + 2)
+      match list_nth_z float_param_regs_elf64 fr with
+      | Some ireg => One (R ireg) :: loc_arguments_elf64 tyl ir (fr + 1) ofs
+      | None => One (S Outgoing ofs ty) :: loc_arguments_elf64 tyl ir fr (ofs + 2)
       end ->
-      forall_rpair (loc_argument_64_charact ofs) p).
-  { intros. destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H1.
+      forall_rpair (loc_argument_elf64_charact ofs) p).
+  { intros. destruct (list_nth_z float_param_regs_elf64 fr) as [r|] eqn:E; destruct H1.
+    subst. right. eapply list_nth_z_in; eauto.
+    eapply IHtyl; eauto.
+    subst. split. omega. assumption.
+    eapply Y; eauto. omega. }
+  destruct a; eauto.
+Qed.
+
+Remark loc_arguments_win64_charact:
+  forall tyl r ofs p,
+  In p (loc_arguments_win64 tyl r ofs) -> (2 | ofs) -> forall_rpair (loc_argument_win64_charact ofs) p.
+Proof.
+  assert (X: forall ofs1 ofs2 l, loc_argument_win64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_win64_charact ofs1 l).
+  { destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. }
+  assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_win64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_win64_charact ofs1) p).
+  { destruct p; simpl; intuition eauto. }
+  assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)).
+  { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. }
+Opaque list_nth_z.
+  induction tyl; simpl loc_arguments_win64; intros.
+  elim H.
+  assert (A: forall ty, In p
+      match list_nth_z int_param_regs_win64 r with
+      | Some ireg => One (R ireg) :: loc_arguments_win64 tyl (r + 1) ofs
+      | None => One (S Outgoing ofs ty) :: loc_arguments_win64 tyl r (ofs + 2)
+      end ->
+      forall_rpair (loc_argument_win64_charact ofs) p).
+  { intros. destruct (list_nth_z int_param_regs_win64 r) as [r'|] eqn:E; destruct H1.
+    subst. left. eapply list_nth_z_in; eauto.
+    eapply IHtyl; eauto.
+    subst. split. omega. assumption.
+    eapply Y; eauto. omega. }
+  assert (B: forall ty, In p
+      match list_nth_z float_param_regs_win64 r with
+      | Some ireg => One (R ireg) :: loc_arguments_win64 tyl (r + 1) ofs
+      | None => One (S Outgoing ofs ty) :: loc_arguments_win64 tyl r (ofs + 2)
+      end ->
+      forall_rpair (loc_argument_win64_charact ofs) p).
+  { intros. destruct (list_nth_z float_param_regs_win64 r) as [r'|] eqn:E; destruct H1.
     subst. right. eapply list_nth_z_in; eauto.
     eapply IHtyl; eauto.
     subst. split. omega. assumption.
@@ -301,18 +393,30 @@ Lemma loc_arguments_acceptable:
   forall (s: signature) (p: rpair loc),
   In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p.
 Proof.
-  unfold loc_arguments; intros. destruct Archi.ptr64 eqn:SF.
-- (* 64 bits *)
-  assert (A: forall r, In r int_param_regs -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF; decide_goal).
-  assert (B: forall r, In r float_param_regs -> is_callee_save r = false) by decide_goal.
-  assert (X: forall l, loc_argument_64_charact 0 l -> loc_argument_acceptable l).
-  { unfold loc_argument_64_charact, loc_argument_acceptable.
+  unfold loc_arguments; intros. destruct Archi.ptr64 eqn:SF; [destruct Archi.win64 eqn:W64|].
+- (* WIN 64 bits *)
+  assert (A: forall r, In r int_param_regs_win64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF; decide_goal).
+  assert (B: forall r, In r float_param_regs_win64 -> is_callee_save r = false) by (unfold is_callee_save; decide_goal).
+  assert (X: forall l, loc_argument_win64_charact 0 l -> loc_argument_acceptable l).
+  { unfold loc_argument_win64_charact, loc_argument_acceptable.
     destruct l as [r | [] ofs ty]; auto.  intros [C|C]; auto.
     intros [C D]. split; auto. apply Z.divide_trans with 2; auto.
     exists (2 / typealign ty); destruct ty; reflexivity.
   }
-  exploit loc_arguments_64_charact; eauto using Z.divide_0_r.
+  exploit loc_arguments_win64_charact; eauto using Z.divide_0_r.
   unfold forall_rpair; destruct p; intuition auto.
+- (* ELF 64 bits *)
+  assert (A: forall r, In r int_param_regs_elf64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF, W64; decide_goal).
+  assert (B: forall r, In r float_param_regs_elf64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite W64; decide_goal).
+  assert (X: forall l, loc_argument_elf64_charact 0 l -> loc_argument_acceptable l).
+  { unfold loc_argument_elf64_charact, loc_argument_acceptable.
+    destruct l as [r | [] ofs ty]; auto.  intros [C|C]; auto.
+    intros [C D]. split; auto. apply Z.divide_trans with 2; auto.
+    exists (2 / typealign ty); destruct ty; reflexivity.
+  }
+  exploit loc_arguments_elf64_charact; eauto using Z.divide_0_r.
+  unfold forall_rpair; destruct p; intuition auto.
+
 - (* 32 bits *)
   assert (X: forall l, loc_argument_32_charact 0 l -> loc_argument_acceptable l).
   { destruct l as [r | [] ofs ty]; simpl; intuition auto. rewrite H2; apply Z.divide_1_l. }
@@ -325,7 +429,7 @@ Hint Resolve loc_arguments_acceptable: locs.
 Lemma loc_arguments_main:
   loc_arguments signature_main = nil.
 Proof.
-  unfold loc_arguments; destruct Archi.ptr64; reflexivity.
+  unfold loc_arguments; destruct Archi.ptr64; auto; destruct Archi.win64; auto.
 Qed.
 
 (** ** Normalization of function results *)
diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml
new file mode 120000
index 00000000..ee2674bf
--- /dev/null
+++ b/x86/ExpansionOracle.ml
@@ -0,0 +1 @@
+../aarch64/ExpansionOracle.ml
+\ No newline at end of file
diff --git a/x86/Machregsaux.ml b/x86/Machregsaux.ml
index 80066b00..840943e7 100644
--- a/x86/Machregsaux.ml
+++ b/x86/Machregsaux.ml
@@ -12,25 +12,7 @@
 
 (** Auxiliary functions on machine registers *)
 
-open Camlcoq
-open Machregs
-   
-let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
-                                              
-let _ =
-  List.iter
-    (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
-    Machregs.register_names
-
 let is_scratch_register r = false
-
-let name_of_register r =
-  try Some (Hashtbl.find register_names r) with Not_found -> None
-
-let register_by_name s =
-  Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
-
-let can_reserve_register r = Conventions1.is_callee_save r
                                               
 let class_of_type = function
   | AST.Tint | AST.Tlong -> 0
diff --git a/x86/Machregsaux.mli b/x86/Machregsaux.mli
index d7117c21..01b0f9fd 100644
--- a/x86/Machregsaux.mli
+++ b/x86/Machregsaux.mli
@@ -12,9 +12,6 @@
 
 (** Auxiliary functions on machine registers *)
 
-val name_of_register: Machregs.mreg -> string option
-val register_by_name: string -> Machregs.mreg option
 val is_scratch_register: string -> bool
-val can_reserve_register: Machregs.mreg -> bool
 
 val class_of_type: AST.typ -> int
diff --git a/x86/Op.v b/x86/Op.v
index 28e6dbd8..caa63235 100644
--- a/x86/Op.v
+++ b/x86/Op.v
@@ -999,7 +999,7 @@ Definition is_trivial_op (op: operation) : bool :=
 
 (** Operations that depend on the memory state. *)
 
-Definition condition_depends_on_memory (c: condition) : bool :=
+Definition cond_depends_on_memory (c: condition) : bool :=
   match c with
   | Ccompu _ => negb Archi.ptr64
   | Ccompuimm _ _ => negb Archi.ptr64
@@ -1010,14 +1010,14 @@ Definition condition_depends_on_memory (c: condition) : bool :=
 
 Definition op_depends_on_memory (op: operation) : bool :=
   match op with
-  | Ocmp c => condition_depends_on_memory c
-  | Osel c ty => condition_depends_on_memory c
+  | Ocmp c => cond_depends_on_memory c
+  | Osel c ty => cond_depends_on_memory c
   | _ => false
   end.
 
-Lemma condition_depends_on_memory_correct:
+Lemma cond_depends_on_memory_correct:
   forall c args m1 m2,
-  condition_depends_on_memory c = false ->
+  cond_depends_on_memory c = false ->
   eval_condition c args m1 = eval_condition c args m2.
 Proof.
   intros until m2. 
@@ -1031,12 +1031,36 @@ Lemma op_depends_on_memory_correct:
   eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
 Proof.
   intros until m2. destruct op; simpl; try congruence; intros C.
-- f_equal; f_equal; apply condition_depends_on_memory_correct; auto.
+- f_equal; f_equal; apply cond_depends_on_memory_correct; auto.
 - destruct args; auto. destruct args; auto.
-  rewrite (condition_depends_on_memory_correct c args m1 m2 C).
+  rewrite (cond_depends_on_memory_correct c args m1 m2 C).
   auto.
 Qed.
 
+Lemma cond_valid_pointer_eq:
+  forall cond args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_condition cond args m1 = eval_condition cond args m2.
+Proof.
+  intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence.
+  all: repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
+Lemma op_valid_pointer_eq:
+  forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+  (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+  eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+  intros until m2. destruct op eqn:OP; simpl; try congruence.
+  - intros MEM; destruct cond; simpl; try congruence;
+    repeat (destruct args; simpl; try congruence);
+    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  - intro MEM; destruct c; simpl; try congruence;
+      repeat (destruct args; simpl; try congruence);
+      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
diff --git a/x86/PrepassSchedulingOracle.ml b/x86/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..7b6a1b14
--- /dev/null
+++ b/x86/PrepassSchedulingOracle.ml
@@ -0,0 +1,5 @@
+open RTL
+open Registers
+
+(* Do not do anything *)
+let schedule_sequence (seqa : (instruction*Regset.t) array) = None
diff --git a/x86/RTLpathSE_simplify.v b/x86/RTLpathSE_simplify.v
new file mode 120000
index 00000000..55bf0e52
--- /dev/null
+++ b/x86/RTLpathSE_simplify.v
@@ -0,0 +1 @@
+../aarch64/RTLpathSE_simplify.v
+\ No newline at end of file
diff --git a/x86/Stacklayout.v b/x86/Stacklayout.v
index 96b0c8ef..92244e46 100644
--- a/x86/Stacklayout.v
+++ b/x86/Stacklayout.v
@@ -15,11 +15,13 @@
 Require Import Coqlib.
 Require Import AST Memory Separation.
 Require Import Bounds.
+Require Archi.
 
 Local Open Scope sep_scope.
 
 (** The general shape of activation records is as follows,
   from bottom (lowest offsets) to top:
+- For the Win64 ABI: 32 reserved bytes
 - Space for outgoing arguments to function calls.
 - Back link to parent frame
 - Saved values of integer callee-save registers used by the function.
@@ -29,11 +31,11 @@ Local Open Scope sep_scope.
 - Return address.
 *)
 
-Definition fe_ofs_arg := 0.
+Definition fe_ofs_arg := if Archi.win64 then 32 else 0.
 
 Definition make_env (b: bounds) : frame_env :=
   let w := if Archi.ptr64 then 8 else 4 in
-  let olink := align (4 * b.(bound_outgoing)) w in  (* back link *)
+  let olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w in  (* back link *)
   let ocs := olink + w in                           (* callee-saves *)
   let ol :=  align (size_callee_save_area b ocs) 8 in (* locals *)
   let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *)
@@ -61,7 +63,7 @@ Proof.
 Local Opaque Z.add Z.mul sepconj range'.
   intros; simpl.
   set (w := if Archi.ptr64 then 8 else 4).
-  set (olink := align (4 * b.(bound_outgoing)) w).
+  set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w).
   set (ocs := olink + w).
   set (ol :=  align (size_callee_save_area b ocs) 8).
   set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
@@ -69,8 +71,9 @@ Local Opaque Z.add Z.mul sepconj range'.
   replace (size_chunk Mptr) with w by (rewrite size_chunk_Mptr; auto).
   assert (0 < w) by (unfold w; destruct Archi.ptr64; omega).
   generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
+  assert (0 <= fe_ofs_arg) by (unfold fe_ofs_arg; destruct Archi.win64; omega).
   assert (0 <= 4 * b.(bound_outgoing)) by omega.
-  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
+  assert (fe_ofs_arg + 4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
   assert (olink + w <= ocs) by (unfold ocs; omega).
   assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
   assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
@@ -87,7 +90,7 @@ Local Opaque Z.add Z.mul sepconj range'.
   rewrite sep_swap45.
   rewrite sep_swap34.
 (* Apply range_split and range_split2 repeatedly *)
-  unfold fe_ofs_arg.
+  apply range_drop_left with 0. omega. 
   apply range_split_2. fold olink. omega. omega.
   apply range_split. omega.
   apply range_split_2. fold ol. omega. omega.
@@ -105,15 +108,16 @@ Lemma frame_env_range:
 Proof.
   intros; simpl.
   set (w := if Archi.ptr64 then 8 else 4).
-  set (olink := align (4 * b.(bound_outgoing)) w).
+  set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w).
   set (ocs := olink + w).
   set (ol :=  align (size_callee_save_area b ocs) 8).
   set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
   set (oretaddr := align (ostkdata + b.(bound_stack_data)) w).
   assert (0 < w) by (unfold w; destruct Archi.ptr64; omega).
   generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
+  assert (0 <= fe_ofs_arg) by (unfold fe_ofs_arg; destruct Archi.win64; omega).
   assert (0 <= 4 * b.(bound_outgoing)) by omega.
-  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
+  assert (fe_ofs_arg + 4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
   assert (olink + w <= ocs) by (unfold ocs; omega).
   assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
   assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
@@ -133,14 +137,14 @@ Lemma frame_env_aligned:
 Proof.
   intros; simpl.
   set (w := if Archi.ptr64 then 8 else 4).
-  set (olink := align (4 * b.(bound_outgoing)) w).
+  set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w).
   set (ocs := olink + w).
   set (ol :=  align (size_callee_save_area b ocs) 8).
   set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
   set (oretaddr := align (ostkdata + b.(bound_stack_data)) w).
   assert (0 < w) by (unfold w; destruct Archi.ptr64; omega).
   replace (align_chunk Mptr) with w by (rewrite align_chunk_Mptr; auto).
-  split. apply Z.divide_0_r.
+  split. exists (fe_ofs_arg / 8). unfold fe_ofs_arg; destruct Archi.win64; reflexivity.
   split. apply align_divides; omega.
   split. apply align_divides; omega.
   split. apply align_divides; omega.
diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml
index 38eff731..52955dcb 100644
--- a/x86/TargetPrinter.ml
+++ b/x86/TargetPrinter.ml
@@ -238,7 +238,7 @@ module MacOS_System : SYSTEM =
           if i || (not !Clflags.option_fcommon) then ".const" else "COMM"
       | Section_string -> ".const"
       | Section_literal -> ".literal8"
-      | Section_jumptable -> ".text"  (* needed in 64 bits, not a problem in 32 bits *)
+      | Section_jumptable -> ".text"
       | Section_user(s, wr, ex) ->
           sprintf ".section	\"%s\", %s, %s"
             (if wr then "__DATA" else "__TEXT") s
@@ -257,32 +257,14 @@ module MacOS_System : SYSTEM =
     let print_align oc n =
       fprintf oc "	.align	%d\n" (log2 n)
 
-    let indirect_symbols : StringSet.t ref = ref StringSet.empty
-
     let print_mov_rs oc rd id =
-      if Archi.ptr64 then begin
-        fprintf oc "	movq    %a@GOTPCREL(%%rip), %a\n" symbol id ireg64 rd
-      end else begin
-        let id = extern_atom id in
-        indirect_symbols := StringSet.add id !indirect_symbols;
-        fprintf oc "	movl	L%a$non_lazy_ptr, %a\n" raw_symbol id ireg rd
-      end
+      fprintf oc "	movq    %a@GOTPCREL(%%rip), %a\n" symbol id ireg64 rd
 
     let print_fun_info _ _ = ()
 
     let print_var_info _ _ = ()
 
-    let print_epilogue oc =
-      if not Archi.ptr64 then begin
-        fprintf oc "	.section __IMPORT,__pointers,non_lazy_symbol_pointers\n";
-        StringSet.iter
-          (fun s ->
-            fprintf oc "L%a$non_lazy_ptr:\n" raw_symbol s;
-            fprintf oc "	.indirect_symbol %a\n" raw_symbol s;
-            fprintf oc "	.long	0\n")
-          !indirect_symbols;
-        indirect_symbols := StringSet.empty
-      end
+    let print_epilogue oc = ()
 
     let print_comm_decl oc name sz al =
       fprintf oc "	.comm	%a, %s, %d\n"
@@ -298,8 +280,11 @@ module MacOS_System : SYSTEM =
 module Cygwin_System : SYSTEM =
   struct
 
+    let symbol_prefix =
+      if Archi.ptr64 then "" else "_"
+
     let raw_symbol oc s =
-       fprintf oc "_%s" s
+       fprintf oc "%s%s" symbol_prefix s
 
     let symbol oc symb =
       raw_symbol oc (extern_atom symb)
@@ -329,19 +314,39 @@ module Cygwin_System : SYSTEM =
       | Section_debug_str-> assert false (* Should not be used *)
       | Section_ais_annotation -> assert false (* Not supported for coff binaries *)
 
-    let stack_alignment = 8 (* minimum is 4, 8 is better for perfs *)
+    let stack_alignment = 8
+      (* minimum is 4 for 32 bits, 8 for 64 bits; 8 is better for perfs *)
 
     let print_align oc n =
       fprintf oc "	.balign	%d\n" n
 
+    let indirect_symbols : StringSet.t ref = ref StringSet.empty
+
     let print_mov_rs oc rd id =
-      fprintf oc "	movl	$%a, %a\n" symbol id ireg rd
+      if Archi.ptr64 then begin
+        let s = extern_atom id in
+        indirect_symbols := StringSet.add s !indirect_symbols;
+        fprintf oc "	movq	.refptr.%s(%%rip), %a\n" s ireg rd
+      end else begin
+        fprintf oc "	movl	$%a, %a\n" symbol id ireg rd
+      end
 
     let print_fun_info _ _  = ()
 
     let print_var_info _ _ = ()
 
-    let print_epilogue _ = ()
+    let declare_indirect_symbol oc s =
+      fprintf oc "	.section	.rdata$.refptr.%s, \"dr\"\n" s;
+      fprintf oc "	.globl	.refptr.%s\n" s;
+      fprintf oc "	.linkonce	discard\n";
+      fprintf oc ".refptr.%s:\n" s;
+      fprintf oc "	.quad	%s\n" s
+
+    let print_epilogue oc =
+      if Archi.ptr64 then begin
+        StringSet.iter (declare_indirect_symbol oc) !indirect_symbols;
+        indirect_symbols := StringSet.empty
+      end
 
     let print_comm_decl oc name sz al =
       fprintf oc "	.comm   %a, %s, %d\n" 
@@ -349,7 +354,8 @@ module Cygwin_System : SYSTEM =
 
     let print_lcomm_decl oc name sz al =
       fprintf oc "	.lcomm   %a, %s, %d\n" 
-                 symbol name (Z.to_string sz) (log2 al)
+                 symbol name (Z.to_string sz)
+                 (if Archi.ptr64 then al else log2 al)
 
   end
 
@@ -850,6 +856,8 @@ module Target(System: SYSTEM):TARGET =
 	 fprintf oc "	minsd	%a, %a\n" freg a1 freg res
       | Pmovb_rm (rd,a) ->
 	 fprintf oc "	movb	%a, %a\n" addressing a ireg8 rd
+      | Pmovq_rf (rd, r1) ->
+         fprintf oc "	movq	%a, %a\n" freg r1 ireg64 rd
       | Pmovsq_mr(a, rs) ->
           fprintf oc "	movq	%a, %a\n" freg rs addressing a
       | Pmovsq_rm(rd, a) ->
diff --git a/x86/extractionMachdep.v b/x86/extractionMachdep.v
index a29553e8..20c6a521 100644
--- a/x86/extractionMachdep.v
+++ b/x86/extractionMachdep.v
@@ -15,15 +15,19 @@
 
 (* Additional extraction directives specific to the x86-64 port *)
 
-Require SelectOp ConstpropOp.
+Require Archi SelectOp.
 
-(* SelectOp *)
-
-Extract Constant SelectOp.symbol_is_external =>
-  "fun id -> Configuration.system = ""macosx"" && C2C.atom_is_extern id".
+(* Archi *)
 
-(* ConstpropOp *)
+Extract Constant Archi.win64 =>
+  "match Configuration.system with
+    | ""cygwin"" when ptr64 -> true
+    | _ -> false".
 
-Extract Constant ConstpropOp.symbol_is_external =>
-  "fun id -> Configuration.system = ""macosx"" && C2C.atom_is_extern id".
+(* SelectOp *)
 
+Extract Constant SelectOp.symbol_is_external =>
+  "match Configuration.system with
+    | ""macosx"" -> C2C.atom_is_extern
+    | ""cygwin"" when Archi.ptr64 -> C2C.atom_is_extern
+    | _ -> (fun _ -> false)".
diff --git a/x86_32/Archi.v b/x86_32/Archi.v
index 4681784d..facb5879 100644
--- a/x86_32/Archi.v
+++ b/x86_32/Archi.v
@@ -16,9 +16,8 @@
 
 (** Architecture-dependent parameters for x86 in 32-bit mode *)
 
+From Flocq Require Import Binary Bits.
 Require Import ZArith List.
-(*From Flocq*)
-Require Import Binary Bits.
 
 Definition ptr64 := false.
 
@@ -59,6 +58,9 @@ Definition fma_invalid_mul_is_nan := false.
 
 Definition float_of_single_preserves_sNaN := false.
 
+(** Which ABI to use. *)
+Parameter win64: bool.   (* Always false in 32 bits *)
+
 Global Opaque ptr64 big_endian splitlong
               default_nan_64 choose_nan_64
               default_nan_32 choose_nan_32
diff --git a/x86_64/Archi.v b/x86_64/Archi.v
index 0e3c55f8..87f727bd 100644
--- a/x86_64/Archi.v
+++ b/x86_64/Archi.v
@@ -16,9 +16,8 @@
 
 (** Architecture-dependent parameters for x86 in 64-bit mode *)
 
+From Flocq Require Import Binary Bits.
 Require Import ZArith List.
-(*From Flocq*)
-Require Import Binary Bits.
 
 Definition ptr64 := true.
 
@@ -59,6 +58,9 @@ Definition fma_invalid_mul_is_nan := false.
 
 Definition float_of_single_preserves_sNaN := false.
 
+(** Which ABI to use. *)
+Parameter win64: bool.
+
 Global Opaque ptr64 big_endian splitlong
               default_nan_64 choose_nan_64
               default_nan_32 choose_nan_32
author	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2021-04-19 18:17:38 +0200
committer	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2021-04-19 18:17:38 +0200
commit	54c05043c20a1d028d905d1a08d15a811542781d (patch)
tree	c7cf94daf31c08e850e2ff620ae7b64753c8d5d6
parent	e6714c8db021117c7bfbaf8fd102a07fc7b42692 (diff)
parent	a05f92785ffa93e4001d2a2e9a630351593fabc2 (diff)
download	compcert-kvx-54c05043c20a1d028d905d1a08d15a811542781d.tar.gz compcert-kvx-54c05043c20a1d028d905d1a08d15a811542781d.zip