Add scheduling oracle to Verilog

author: Yann Herklotz <git@yannherklotz.com> 2022-03-14 00:13:04 +0000
committer: Yann Herklotz <git@yannherklotz.com> 2022-03-14 00:13:04 +0000
commit: 20096af8d044ccea01360822834c748e17acd572 (patch)
tree: 8cd763523cf298bde2ee014d14ec3a7ee2db09e4
parent: 9c72ffe762dc2f90109d5991f74ee0ee4e9a8ec3 (diff)
download: compcert-kvx-20096af8d044ccea01360822834c748e17acd572.tar.gz
compcert-kvx-20096af8d044ccea01360822834c748e17acd572.zip
38 files changed, 12886 insertions, 8163 deletions
diff --git a/configure b/configure
index 2e7b62c4..37f6cb7f 100755
--- a/configure
+++ b/configure
@@ -887,6 +887,13 @@ BACKENDLIB=Asmgenproof0.v Asmgenproof1.v ExtValues.v
 EOF
 fi
 
+if [ "$arch" = "verilog" ] ; then
+cat >> Makefile.config <<EOF
+EXTRA_EXTRACTION=Asm.ireg_eq Asm.ireg0_eq
+BACKENDLIB=Asmgenproof0.v Asmgenproof1.v ExtValues.v
+EOF
+fi
+
 #
 # Generate Merlin and CoqProject files to simplify development
 #
diff --git a/verilog/Archi.v b/verilog/Archi.v
index dc5a078d..236bdb96 100644
--- a/verilog/Archi.v
+++ b/verilog/Archi.v
@@ -15,7 +15,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Architecture-dependent parameters for x86 in 32-bit mode *)
+(** Architecture-dependent parameters for RISC-V *)
 
 From Flocq Require Import Binary Bits.
 Require Import ZArith List.
@@ -24,24 +24,30 @@ Definition ptr64 := false.
 
 Definition big_endian := false.
 
-Definition align_int64 := 4%Z.
-Definition align_float64 := 4%Z.
+Definition align_int64 := 8%Z.
+Definition align_float64 := 8%Z.
 
 Definition splitlong := false.
 
 Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
-Proof. discriminate. Qed.
+Proof.
+  unfold splitlong. destruct ptr64; simpl; congruence. 
+Qed.
 
-Definition default_nan_64 := (true, iter_nat 51 _ xO xH).
-Definition default_nan_32 := (true, iter_nat 22 _ xO xH).
+(** Section 7.3: "Except when otherwise stated, if the result of a
+   floating-point operation is NaN, it is the canonical NaN. The
+   canonical NaN has a positive sign and all significand bits clear
+   except the MSB, a.k.a. the quiet bit."
+   Exceptions are operations manipulating signs. *)
 
-(* Always choose the first NaN argument, if any *)
+Definition default_nan_64 := (false, iter_nat 51 _ xO xH).
+Definition default_nan_32 := (false, iter_nat 22 _ xO xH).
 
 Definition choose_nan_64 (l: list (bool * positive)) : bool * positive :=
-  match l with nil => default_nan_64 | n :: _ => n end.
+  default_nan_64.
 
 Definition choose_nan_32 (l: list (bool * positive)) : bool * positive :=
-  match l with nil => default_nan_32 | n :: _ => n end.
+  default_nan_32.
 
 Lemma choose_nan_64_idem: forall n,
   choose_nan_64 (n :: n :: nil) = choose_nan_64 (n :: nil).
@@ -57,13 +63,14 @@ Definition fma_invalid_mul_is_nan := false.
 
 Definition float_of_single_preserves_sNaN := false.
 
-(** Which ABI to use. *)
-Parameter win64: bool.   (* Always false in 32 bits *)
-
 Global Opaque ptr64 big_endian splitlong
               default_nan_64 choose_nan_64
               default_nan_32 choose_nan_32
               fma_order fma_invalid_mul_is_nan
               float_of_single_preserves_sNaN.
 
+(** Whether to generate position-independent code or not *)
+
+Parameter pic_code: unit -> bool.
+
 Definition has_notrap_loads := false.
diff --git a/verilog/Asm.v b/verilog/Asm.v
index 799b533e..c80c6cc2 100644
--- a/verilog/Asm.v
+++ b/verilog/Asm.v
@@ -2,296 +2,431 @@
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
-(*                  Xavier Leroy, INRIA Paris                          *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
-(** Abstract syntax and semantics for IA32 assembly language *)
-
-Require Import Coqlib Maps.
-Require Import AST Integers Floats Values Memory Events Globalenvs Smallstep.
-Require Import Locations Stacklayout Conventions.
+(** Abstract syntax and semantics for RISC-V assembly language. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Memory.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Locations.
+Require Stacklayout.
+Require Import Conventions.
+Require ExtValues.
 
 (** * Abstract syntax *)
 
-(** ** Registers. *)
-
-(** Integer registers. *)
+(** Integer registers.  X0 is treated specially because it always reads 
+  as zero and is never used as a destination of an instruction. *)
 
 Inductive ireg: Type :=
-  | RAX | RBX | RCX | RDX | RSI | RDI | RBP | RSP
-  | R8  | R9  | R10 | R11 | R12 | R13 | R14 | R15.
+  | X1:  ireg | X2:  ireg | X3:  ireg | X4:  ireg | X5:  ireg
+  | X6:  ireg | X7:  ireg | X8:  ireg | X9:  ireg | X10: ireg
+  | X11: ireg | X12: ireg | X13: ireg | X14: ireg | X15: ireg
+  | X16: ireg | X17: ireg | X18: ireg | X19: ireg | X20: ireg
+  | X21: ireg | X22: ireg | X23: ireg | X24: ireg | X25: ireg
+  | X26: ireg | X27: ireg | X28: ireg | X29: ireg | X30: ireg
+  | X31: ireg.
 
-(** Floating-point registers, i.e. SSE2 registers *)
+Inductive ireg0: Type :=
+  | X0: ireg0 | X: ireg -> ireg0.
 
-Inductive freg: Type :=
-  | XMM0  | XMM1  | XMM2  | XMM3  | XMM4  | XMM5  | XMM6  | XMM7
-  | XMM8  | XMM9  | XMM10 | XMM11 | XMM12 | XMM13 | XMM14 | XMM15.
+Coercion X: ireg >-> ireg0.
 
-Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
-Proof. decide equality. Defined.
+(** Floating-point registers *)
 
-Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}.
+Inductive freg: Type :=
+  | F0: freg  | F1: freg  | F2: freg  | F3: freg
+  | F4: freg  | F5: freg  | F6: freg  | F7: freg
+  | F8: freg  | F9: freg  | F10: freg | F11: freg
+  | F12: freg | F13: freg | F14: freg | F15: freg
+  | F16: freg | F17: freg | F18: freg | F19: freg
+  | F20: freg | F21: freg | F22: freg | F23: freg
+  | F24: freg | F25: freg | F26: freg | F27: freg
+  | F28: freg | F29: freg | F30: freg | F31: freg.
+
+Definition ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
 Proof. decide equality. Defined.
 
-(** Bits of the flags register. *)
+Definition ireg0_eq: forall (x y: ireg0), {x=y} + {x<>y}.
+Proof. decide equality. apply ireg_eq. Defined.
 
-Inductive crbit: Type :=
-  | ZF | CF | PF | SF | OF.
-
-(** All registers modeled here. *)
+Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+  
+(** We model the following registers of the RISC-V architecture. *)
 
 Inductive preg: Type :=
-  | PC: preg                            (**r program counter *)
-  | IR: ireg -> preg                    (**r integer register *)
-  | FR: freg -> preg                    (**r XMM register *)
-  | ST0: preg                           (**r top of FP stack *)
-  | CR: crbit -> preg                   (**r bit of the flags register *)
-  | RA: preg.                   (**r pseudo-reg representing return address *)
+  | IR: ireg -> preg                    (**r integer registers *)
+  | FR: freg -> preg                    (**r double-precision float registers *)
+  | PC: preg.                           (**r program counter *)
 
 Coercion IR: ireg >-> preg.
 Coercion FR: freg >-> preg.
-Coercion CR: crbit >-> preg.
 
-(** Conventional names for stack pointer ([SP]) and return address ([RA]) *)
+Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}.
+Proof. decide equality. apply ireg_eq. apply freg_eq. Defined.
 
-Notation SP := RSP (only parsing).
+Module PregEq.
+  Definition t  := preg.
+  Definition eq := preg_eq.
+End PregEq.
+
+Module Pregmap := EMap(PregEq).
 
-(** ** Instruction set. *)
+(** Conventional names for stack pointer ([SP]) and return address ([RA]). *)
+
+Notation "'SP'" := X2 (only parsing) : asm.
+Notation "'RA'" := X1 (only parsing) : asm.
+
+(** Offsets for load and store instructions.  An offset is either an
+  immediate integer or the low part of a symbol. *)
+
+Inductive offset : Type :=
+  | Ofsimm (ofs: ptrofs)
+  | Ofslow (id: ident) (ofs: ptrofs).
+
+(** The RISC-V instruction set is composed of several subsets.  We model
+  the "32I" (32-bit integers), "64I" (64-bit integers),
+  "M" (multiplication and division), 
+  "F" (single-precision floating-point)
+  and "D" (double-precision floating-point) subsets.  
+
+  For 32- and 64-bit integer arithmetic, the RISC-V instruction set comprises
+  generic integer operations such as ADD that operate over the full width
+  of an integer register (either 32 or 64 bit), plus specific instructions
+  such as ADDW that normalize their results to signed 32-bit integers.
+  Other instructions such as AND work equally well over 32- and 64-bit
+  integers, with the convention that 32-bit integers are represented
+  sign-extended in 64-bit registers.
+
+  This clever design is challenging to formalize in the CompCert value
+  model.  As a first step, we follow a more traditional approach,
+  also used in the x86 port, whereas we have two sets of (pseudo-)
+  instructions, one for 32-bit integer arithmetic, with suffix W,
+  the other for 64-bit integer arithmetic, with suffix L.  The mapping
+  to actual instructions is done when printing assembly code, as follows:
+  - In 32-bit mode:
+    ADDW becomes ADD, ADDL is an error, ANDW becomes AND, ANDL is an error.
+  - In 64-bit mode:
+    ADDW becomes ADDW, ADDL becomes ADD, ANDW and ANDL both become AND.
+*)
 
 Definition label := positive.
 
-(** General form of an addressing mode. *)
-
-Inductive addrmode: Type :=
-  | Addrmode (base: option ireg)
-             (ofs: option (ireg * Z))
-             (const: Z + ident * ptrofs).
-
-(** Testable conditions (for conditional jumps and more). *)
-
-Inductive testcond: Type :=
-  | Cond_e | Cond_ne
-  | Cond_b | Cond_be | Cond_ae | Cond_a
-  | Cond_l | Cond_le | Cond_ge | Cond_g
-  | Cond_p | Cond_np.
-
-(** Instructions.  IA32 instructions accept many combinations of
-  registers, memory references and immediate constants as arguments.
-  Here, we list only the combinations that we actually use.
-
-  Naming conventions for types:
-- [b]: 8 bits
-- [w]: 16 bits ("word")
-- [l]: 32 bits ("longword")
-- [q]: 64 bits ("quadword")
-- [d] or [sd]: FP double precision (64 bits)
-- [s] or [ss]: FP single precision (32 bits)
-
-  Naming conventions for operands:
-- [r]: integer register operand
-- [f]: XMM register operand
-- [m]: memory operand
-- [i]: immediate integer operand
-- [s]: immediate symbol operand
-- [l]: immediate label operand
-- [cl]: the [CL] register
-
-  For two-operand instructions, the first suffix describes the result
-  (and first argument), the second suffix describes the second argument.
+(** A note on immediates: there are various constraints on immediate
+  operands to RISC-V instructions.  We do not attempt to capture these
+  restrictions in the abstract syntax nor in the semantics.  The
+  assembler will emit an error if immediate operands exceed the
+  representable range.  Of course, our RISC-V generator (file
+  [Asmgen]) is careful to respect this range. *)
+
+Inductive instruction : Type :=
+  | Pmv     (rd: ireg) (rs: ireg)                    (**r integer move *)
+
+(** 32-bit integer register-immediate instructions *)
+  | Paddiw  (rd: ireg) (rs: ireg0) (imm: int)        (**r add immediate *)
+  | Psltiw  (rd: ireg) (rs: ireg0) (imm: int)        (**r set-less-than immediate *)
+  | Psltiuw (rd: ireg) (rs: ireg0) (imm: int)        (**r set-less-than unsigned immediate *)
+  | Pandiw  (rd: ireg) (rs: ireg0) (imm: int)        (**r and immediate *)
+  | Poriw   (rd: ireg) (rs: ireg0) (imm: int)        (**r or immediate *)
+  | Pxoriw  (rd: ireg) (rs: ireg0) (imm: int)        (**r xor immediate *)
+  | Pslliw  (rd: ireg) (rs: ireg0) (imm: int)        (**r shift-left-logical immediate *)
+  | Psrliw  (rd: ireg) (rs: ireg0) (imm: int)        (**r shift-right-logical immediate *)
+  | Psraiw  (rd: ireg) (rs: ireg0) (imm: int)        (**r shift-right-arith immediate *)
+  | Pluiw   (rd: ireg)             (imm: int)        (**r load upper-immediate *)
+(** 32-bit integer register-register instructions *)
+  | Paddw   (rd: ireg) (rs1 rs2: ireg0)              (**r integer addition *)
+  | Psubw   (rd: ireg) (rs1 rs2: ireg0)              (**r integer subtraction *)
+
+  | Pmulw   (rd: ireg) (rs1 rs2: ireg0)              (**r integer multiply low *)
+  | Pmulhw  (rd: ireg) (rs1 rs2: ireg0)              (**r integer multiply high signed *)
+  | Pmulhuw (rd: ireg) (rs1 rs2: ireg0)              (**r integer multiply high unsigned *)
+  | Pdivw   (rd: ireg) (rs1 rs2: ireg0)              (**r integer division *)
+  | Pdivuw  (rd: ireg) (rs1 rs2: ireg0)              (**r unsigned integer division *)
+  | Premw   (rd: ireg) (rs1 rs2: ireg0)              (**r integer remainder *)
+  | Premuw  (rd: ireg) (rs1 rs2: ireg0)              (**r unsigned integer remainder *)
+  | Psltw   (rd: ireg) (rs1 rs2: ireg0)              (**r set-less-than *)
+  | Psltuw  (rd: ireg) (rs1 rs2: ireg0)              (**r set-less-than unsigned *)
+  | Pseqw   (rd: ireg) (rs1 rs2: ireg0)              (**r [rd <- rs1 == rs2] (pseudo) *)
+  | Psnew   (rd: ireg) (rs1 rs2: ireg0)              (**r [rd <- rs1 != rs2] (pseudo) *)
+  | Pandw   (rd: ireg) (rs1 rs2: ireg0)              (**r bitwise and *)
+  | Porw    (rd: ireg) (rs1 rs2: ireg0)              (**r bitwise or *)
+  | Pxorw   (rd: ireg) (rs1 rs2: ireg0)              (**r bitwise xor *)
+  | Psllw   (rd: ireg) (rs1 rs2: ireg0)              (**r shift-left-logical *)
+  | Psrlw   (rd: ireg) (rs1 rs2: ireg0)              (**r shift-right-logical *)
+  | Psraw   (rd: ireg) (rs1 rs2: ireg0)              (**r shift-right-arith *)
+
+(** 64-bit integer register-immediate instructions *)
+  | Paddil  (rd: ireg) (rs: ireg0) (imm: int64)      (**r add immediate *)
+  | Psltil  (rd: ireg) (rs: ireg0) (imm: int64)      (**r set-less-than immediate *)
+  | Psltiul (rd: ireg) (rs: ireg0) (imm: int64)      (**r set-less-than unsigned immediate *)
+  | Pandil  (rd: ireg) (rs: ireg0) (imm: int64)      (**r and immediate *)
+  | Poril   (rd: ireg) (rs: ireg0) (imm: int64)      (**r or immediate *)
+  | Pxoril  (rd: ireg) (rs: ireg0) (imm: int64)      (**r xor immediate *)
+  | Psllil  (rd: ireg) (rs: ireg0) (imm: int)        (**r shift-left-logical immediate *)
+  | Psrlil  (rd: ireg) (rs: ireg0) (imm: int)        (**r shift-right-logical immediate *)
+  | Psrail  (rd: ireg) (rs: ireg0) (imm: int)        (**r shift-right-arith immediate *)
+  | Pluil   (rd: ireg)             (imm: int64)      (**r load upper-immediate *)
+(** 64-bit integer register-register instructions *)
+  | Paddl   (rd: ireg) (rs1 rs2: ireg0)              (**r integer addition *)
+  | Psubl   (rd: ireg) (rs1 rs2: ireg0)              (**r integer subtraction *)
+
+  | Pmull   (rd: ireg) (rs1 rs2: ireg0)              (**r integer multiply low *)
+  | Pmulhl  (rd: ireg) (rs1 rs2: ireg0)              (**r integer multiply high signed *)
+  | Pmulhul (rd: ireg) (rs1 rs2: ireg0)              (**r integer multiply high unsigned *)
+  | Pdivl   (rd: ireg) (rs1 rs2: ireg0)              (**r integer division *)
+  | Pdivul  (rd: ireg) (rs1 rs2: ireg0)              (**r unsigned integer division *)
+  | Preml   (rd: ireg) (rs1 rs2: ireg0)              (**r integer remainder *)
+  | Premul  (rd: ireg) (rs1 rs2: ireg0)              (**r unsigned integer remainder *)
+  | Psltl   (rd: ireg) (rs1 rs2: ireg0)              (**r set-less-than *)
+  | Psltul  (rd: ireg) (rs1 rs2: ireg0)              (**r set-less-than unsigned *)
+  | Pseql   (rd: ireg) (rs1 rs2: ireg0)              (**r [rd <- rs1 == rs2] (pseudo) *)
+  | Psnel   (rd: ireg) (rs1 rs2: ireg0)              (**r [rd <- rs1 != rs2] (pseudo) *)
+  | Pandl   (rd: ireg) (rs1 rs2: ireg0)              (**r bitwise and *)
+  | Porl    (rd: ireg) (rs1 rs2: ireg0)              (**r bitwise or *)
+  | Pxorl   (rd: ireg) (rs1 rs2: ireg0)              (**r bitwise xor *)
+  | Pslll   (rd: ireg) (rs1 rs2: ireg0)              (**r shift-left-logical *)
+  | Psrll   (rd: ireg) (rs1 rs2: ireg0)              (**r shift-right-logical *)
+  | Psral   (rd: ireg) (rs1 rs2: ireg0)              (**r shift-right-arith *)
+
+  | Pcvtl2w (rd: ireg) (rs: ireg0)                   (**r int64->int32 (pseudo) *)
+  | Pcvtw2l (r: ireg)                                (**r int32 signed -> int64 (pseudo) *)
+
+  (* Unconditional jumps.  Links are always to X1/RA. *)
+  | Pj_l    (l: label)                              (**r jump to label *)
+  | Pj_s    (symb: ident) (sg: signature)           (**r jump to symbol *)
+  | Pj_r    (r: ireg)     (sg: signature)           (**r jump register *)
+  | Pjal_s  (symb: ident) (sg: signature)           (**r jump-and-link symbol *)
+  | Pjal_r  (r: ireg)     (sg: signature)           (**r jump-and-link register *)
+
+  (* Conditional branches, 32-bit comparisons *)
+  | Pbeqw   (rs1 rs2: ireg0) (l: label)             (**r branch-if-equal *)
+  | Pbnew   (rs1 rs2: ireg0) (l: label)             (**r branch-if-not-equal signed *)
+  | Pbltw   (rs1 rs2: ireg0) (l: label)             (**r branch-if-less signed *)
+  | Pbltuw  (rs1 rs2: ireg0) (l: label)             (**r branch-if-less unsigned *)
+  | Pbgew   (rs1 rs2: ireg0) (l: label)             (**r branch-if-greater-or-equal signed *)
+  | Pbgeuw  (rs1 rs2: ireg0) (l: label)             (**r branch-if-greater-or-equal unsigned *)
+
+  (* Conditional branches, 64-bit comparisons *)
+  | Pbeql   (rs1 rs2: ireg0) (l: label)             (**r branch-if-equal *)
+  | Pbnel   (rs1 rs2: ireg0) (l: label)             (**r branch-if-not-equal signed *)
+  | Pbltl   (rs1 rs2: ireg0) (l: label)             (**r branch-if-less signed *)
+  | Pbltul  (rs1 rs2: ireg0) (l: label)             (**r branch-if-less unsigned *)
+  | Pbgel   (rs1 rs2: ireg0) (l: label)             (**r branch-if-greater-or-equal signed *)
+  | Pbgeul  (rs1 rs2: ireg0) (l: label)             (**r branch-if-greater-or-equal unsigned *)
+
+  (* Loads and stores *)
+  | Plb     (rd: ireg) (ra: ireg) (ofs: offset)     (**r load signed int8 *)
+  | Plbu    (rd: ireg) (ra: ireg) (ofs: offset)     (**r load unsigned int8 *)
+  | Plh     (rd: ireg) (ra: ireg) (ofs: offset)     (**r load signed int16 *)
+  | Plhu    (rd: ireg) (ra: ireg) (ofs: offset)     (**r load unsigned int16 *)
+  | Plw     (rd: ireg) (ra: ireg) (ofs: offset)     (**r load int32 *)
+  | Plw_a   (rd: ireg) (ra: ireg) (ofs: offset)     (**r load any32 *)
+  | Pld     (rd: ireg) (ra: ireg) (ofs: offset)     (**r load int64 *)
+  | Pld_a   (rd: ireg) (ra: ireg) (ofs: offset)     (**r load any64 *)
+
+  | Psb     (rs: ireg) (ra: ireg) (ofs: offset)     (**r store int8 *)
+  | Psh     (rs: ireg) (ra: ireg) (ofs: offset)     (**r store int16 *)
+  | Psw     (rs: ireg) (ra: ireg) (ofs: offset)     (**r store int32 *)
+  | Psw_a   (rs: ireg) (ra: ireg) (ofs: offset)     (**r store any32 *)
+  | Psd     (rs: ireg) (ra: ireg) (ofs: offset)     (**r store int64 *)
+  | Psd_a   (rs: ireg) (ra: ireg) (ofs: offset)     (**r store any64 *)
+
+  (* Synchronization *)
+  | Pfence                                          (**r fence *)
+
+  (* floating point register move *)
+  | Pfmv     (rd: freg) (rs: freg)                  (**r move *)
+  | Pfmvxs   (rd: ireg) (rs: freg)                  (**r bitwise move FP single to integer register *)
+  | Pfmvxd   (rd: ireg) (rs: freg)                  (**r bitwise move FP double to integer register *)
+  | Pfmvsx   (rd: freg) (rs: ireg)                  (**r bitwise move integer register to FP single *)
+  | Pfmvdx   (rd: freg) (rs: ireg)                  (**r bitwise move integer register to FP double*)
+
+  (* 32-bit (single-precision) floating point *)
+  | Pfls     (rd: freg) (ra: ireg) (ofs: offset)    (**r load float *)
+  | Pfss     (rs: freg) (ra: ireg) (ofs: offset)    (**r store float *)
+
+  | Pfnegs   (rd: freg) (rs: freg)                  (**r negation *)
+  | Pfabss   (rd: freg) (rs: freg)                  (**r absolute value *)
+
+  | Pfadds   (rd: freg) (rs1 rs2: freg)             (**r addition *)
+  | Pfsubs   (rd: freg) (rs1 rs2: freg)             (**r subtraction *)
+  | Pfmuls   (rd: freg) (rs1 rs2: freg)             (**r multiplication *)
+  | Pfdivs   (rd: freg) (rs1 rs2: freg)             (**r division *)
+  | Pfmins   (rd: freg) (rs1 rs2: freg)             (**r minimum *)
+  | Pfmaxs   (rd: freg) (rs1 rs2: freg)             (**r maximum *)
+
+  | Pfeqs    (rd: ireg) (rs1 rs2: freg)             (**r compare equal *)
+  | Pflts    (rd: ireg) (rs1 rs2: freg)             (**r compare less-than *)
+  | Pfles    (rd: ireg) (rs1 rs2: freg)             (**r compare less-than/equal *)
+
+  | Pfsqrts  (rd: freg) (rs: freg)                  (**r square-root *)
+
+  | Pfmadds  (rd: freg) (rs1 rs2 rs3: freg)         (**r fused multiply-add *)
+  | Pfmsubs  (rd: freg) (rs1 rs2 rs3: freg)         (**r fused multiply-sub *)
+  | Pfnmadds (rd: freg) (rs1 rs2 rs3: freg)         (**r fused negated multiply-add *)
+  | Pfnmsubs (rd: freg) (rs1 rs2 rs3: freg)         (**r fused negated multiply-sub *)
+
+  | Pfcvtws  (rd: ireg) (rs: freg)                  (**r float32 -> int32 conversion *)
+  | Pfcvtwus (rd: ireg) (rs: freg)                  (**r float32 -> unsigned int32 conversion *)
+  | Pfcvtsw  (rd: freg) (rs: ireg0)                 (**r int32 -> float32 conversion *)
+  | Pfcvtswu (rd: freg) (rs: ireg0)                 (**r unsigned int32 -> float32 conversion *)
+
+  | Pfcvtls  (rd: ireg) (rs: freg)                  (**r float32 -> int64 conversion *)
+  | Pfcvtlus (rd: ireg) (rs: freg)                  (**r float32 -> unsigned int64 conversion *)
+  | Pfcvtsl  (rd: freg) (rs: ireg0)                 (**r int64 -> float32 conversion *)
+  | Pfcvtslu (rd: freg) (rs: ireg0)                 (**r unsigned int 64-> float32 conversion *)
+
+  (* 64-bit (double-precision) floating point *)
+  | Pfld     (rd: freg) (ra: ireg) (ofs: offset)    (**r load 64-bit float *)
+  | Pfld_a   (rd: freg) (ra: ireg) (ofs: offset)    (**r load any64 *)
+  | Pfsd     (rd: freg) (ra: ireg) (ofs: offset)    (**r store 64-bit float *)
+  | Pfsd_a   (rd: freg) (ra: ireg) (ofs: offset)    (**r store any64 *)
+
+  | Pfnegd   (rd: freg) (rs: freg)                  (**r negation *)
+  | Pfabsd   (rd: freg) (rs: freg)                  (**r absolute value *)
+
+  | Pfaddd   (rd: freg) (rs1 rs2: freg)             (**r addition *)
+  | Pfsubd   (rd: freg) (rs1 rs2: freg)             (**r subtraction *)
+  | Pfmuld   (rd: freg) (rs1 rs2: freg)             (**r multiplication *)
+  | Pfdivd   (rd: freg) (rs1 rs2: freg)             (**r division *)
+  | Pfmind   (rd: freg) (rs1 rs2: freg)             (**r minimum *)
+  | Pfmaxd   (rd: freg) (rs1 rs2: freg)             (**r maximum *)
+
+  | Pfeqd    (rd: ireg) (rs1 rs2: freg)             (**r compare equal *)
+  | Pfltd    (rd: ireg) (rs1 rs2: freg)             (**r compare less-than *)
+  | Pfled    (rd: ireg) (rs1 rs2: freg)             (**r compare less-than/equal *)
+
+  | Pfsqrtd  (rd: freg) (rs: freg)                  (**r square-root *)
+
+  | Pfmaddd  (rd: freg) (rs1 rs2 rs3: freg)         (**r fused multiply-add *)
+  | Pfmsubd  (rd: freg) (rs1 rs2 rs3: freg)         (**r fused multiply-sub *)
+  | Pfnmaddd (rd: freg) (rs1 rs2 rs3: freg)         (**r fused negated multiply-add *)
+  | Pfnmsubd (rd: freg) (rs1 rs2 rs3: freg)         (**r fused negated multiply-sub *)
+
+  | Pfcvtwd  (rd: ireg) (rs: freg)                  (**r float -> int32 conversion *)
+  | Pfcvtwud (rd: ireg) (rs: freg)                  (**r float -> unsigned int32 conversion *)
+  | Pfcvtdw  (rd: freg) (rs: ireg0)                 (**r int32 -> float conversion *)
+  | Pfcvtdwu (rd: freg) (rs: ireg0)                 (**r unsigned int32 -> float conversion *)
+
+  | Pfcvtld  (rd: ireg) (rs: freg)                  (**r float -> int64 conversion *)
+  | Pfcvtlud (rd: ireg) (rs: freg)                  (**r float -> unsigned int64 conversion *)
+  | Pfcvtdl  (rd: freg) (rs: ireg0)                 (**r int64 -> float conversion *)
+  | Pfcvtdlu (rd: freg) (rs: ireg0)                 (**r unsigned int64 -> float conversion *)
+
+  | Pfcvtds  (rd: freg) (rs: freg)                  (**r float32 -> float   *)
+  | Pfcvtsd  (rd: freg) (rs: freg)                  (**r float   -> float32 *)
+
+  (* Pseudo-instructions *)
+  | Pallocframe (sz: Z) (pos: ptrofs)               (**r allocate new stack frame *)
+  | Pfreeframe  (sz: Z) (pos: ptrofs)               (**r deallocate stack frame and restore previous frame *)
+  | Plabel  (lbl: label)                            (**r define a code label *)
+  | Ploadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (**r load the address of a symbol *)
+  | Ploadsymbol_high (rd: ireg) (id: ident) (ofs: ptrofs) (**r load the high part of the address of a symbol *)
+  | Ploadli (rd: ireg) (i: int64)                   (**r load an immediate int64 *)
+  | Ploadfi (rd: freg) (f: float)                   (**r load an immediate float *)
+  | Ploadsi (rd: freg) (f: float32)                 (**r load an immediate single *)
+  | Pbtbl   (r: ireg)  (tbl: list label)            (**r N-way branch through a jump table *)
+  | Pbuiltin: external_function -> list (builtin_arg preg)
+              -> builtin_res preg -> instruction    (**r built-in function (pseudo) *)
+  | Pselectl (rd: ireg) (rb: ireg0) (rt: ireg0) (rf: ireg0)
+  | Pnop : instruction.                             (**r nop instruction *)
+
+
+(** The pseudo-instructions are the following:
+
+- [Plabel]: define a code label at the current program point.
+
+- [Ploadsymbol]: load the address of a symbol in an integer register.
+  Expands to the [la] assembler pseudo-instruction, which does the right
+  thing even if we are in PIC mode.
+
+- [Ploadli rd ival]: load an immediate 64-bit integer into an integer
+  register rd.  Expands to a load from an address in the constant data section,
+  using the integer register x31 as temporary:
+<<
+        lui x31, %hi(lbl)
+        ld rd, %lo(lbl)(x31)
+lbl:
+        .quad ival
+>>
+
+- [Ploadfi rd fval]: similar to [Ploadli] but loads a double FP constant fval
+  into a float register rd.
+
+- [Ploadsi rd fval]: similar to [Ploadli] but loads a single FP constant fval
+  into a float register rd.
+
+- [Pallocframe sz pos]: in the formal semantics, this
+  pseudo-instruction allocates a memory block with bounds [0] and
+  [sz], stores the value of the stack pointer at offset [pos] in this
+  block, and sets the stack pointer to the address of the bottom of
+  this block.
+  In the printed ASM assembly code, this allocation is:
+<<
+        mv      x30, sp
+        sub     sp,  sp, #sz
+        sw      x30, #pos(sp)
+>>
+  This cannot be expressed in our memory model, which does not reflect
+  the fact that stack frames are adjacent and allocated/freed
+  following a stack discipline.
+
+- [Pfreeframe sz pos]: in the formal semantics, this pseudo-instruction
+  reads the word at [pos] of the block pointed by the stack pointer,
+  frees this block, and sets the stack pointer to the value read.
+  In the printed ASM assembly code, this freeing is just an increment of [sp]:
+<<
+        add     sp,  sp, #sz
+>>
+  Again, our memory model cannot comprehend that this operation
+  frees (logically) the current stack frame.
+
+- [Pbtbl reg table]: this is a N-way branch, implemented via a jump table
+  as follows:
+<<
+        la      x31, table
+        add     x31, x31, reg
+        jr      x31
+table:  .long   table[0], table[1], ...
+>>
+  Note that [reg] contains 4 times the index of the desired table entry.
+
+- [Pseq rd rs1 rs2]: since unsigned comparisons have particular
+  semantics for pointers, we cannot encode equality directly using
+  xor/sub etc, which have only integer semantics.
+<<
+        xor     rd, rs1, rs2
+        sltiu   rd, rd, 1
+>>
+  The [xor] is omitted if one of [rs1] and [rs2] is [x0].
+
+- [Psne rd rs1 rs2]: similarly for unsigned inequality.
+<<
+        xor     rd, rs1, rs2
+        sltu    rd, x0, rd
+>>
 *)
 
-Inductive instruction: Type :=
-  (** Moves *)
-  | Pmov_rr (rd: ireg) (r1: ireg)       (**r [mov] (integer) *)
-  | Pmovl_ri (rd: ireg) (n: int)
-  | Pmovq_ri (rd: ireg) (n: int64)
-  | Pmov_rs (rd: ireg) (id: ident)
-  | Pmovl_rm (rd: ireg) (a: addrmode)
-  | Pmovq_rm (rd: ireg) (a: addrmode)
-  | Pmovl_mr (a: addrmode) (rs: ireg)
-  | Pmovq_mr (a: addrmode) (rs: ireg)
-  | Pmovsd_ff (rd: freg) (r1: freg)     (**r [movsd] (single 64-bit float) *)
-  | Pmovsd_fi (rd: freg) (n: float)     (**r (pseudo-instruction) *)
-  | Pmovsd_fm (rd: freg) (a: addrmode)
-  | Pmovsd_mf (a: addrmode) (r1: freg)
-  | Pmovss_fi (rd: freg) (n: float32)   (**r [movss] (single 32-bit float) *)
-  | Pmovss_fm (rd: freg) (a: addrmode)
-  | Pmovss_mf (a: addrmode) (r1: freg)
-  | Pfldl_m (a: addrmode)               (**r [fld] double precision *)
-  | Pfstpl_m (a: addrmode)              (**r [fstp] double precision *)
-  | Pflds_m (a: addrmode)               (**r [fld] simple precision *)
-  | Pfstps_m (a: addrmode)              (**r [fstp] simple precision *)
-  (** Moves with conversion *)
-  | Pmovb_mr (a: addrmode) (rs: ireg)   (**r [mov] (8-bit int) *)
-  | Pmovw_mr (a: addrmode) (rs: ireg)   (**r [mov] (16-bit int) *)
-  | Pmovzb_rr (rd: ireg) (rs: ireg)     (**r [movzb] (8-bit zero-extension) *)
-  | Pmovzb_rm (rd: ireg) (a: addrmode)
-  | Pmovsb_rr (rd: ireg) (rs: ireg)     (**r [movsb] (8-bit sign-extension) *)
-  | Pmovsb_rm (rd: ireg) (a: addrmode)
-  | Pmovzw_rr (rd: ireg) (rs: ireg)     (**r [movzw] (16-bit zero-extension) *)
-  | Pmovzw_rm (rd: ireg) (a: addrmode)
-  | Pmovsw_rr (rd: ireg) (rs: ireg)     (**r [movsw] (16-bit sign-extension) *)
-  | Pmovsw_rm (rd: ireg) (a: addrmode)
-  | Pmovzl_rr (rd: ireg) (rs: ireg)     (**r [movzl] (32-bit zero-extension) *)
-  | Pmovsl_rr (rd: ireg) (rs: ireg)     (**r [movsl] (32-bit sign-extension) *)
-  | Pmovls_rr (rd: ireg)                (** 64 to 32 bit conversion (pseudo) *)
-  | Pcvtsd2ss_ff (rd: freg) (r1: freg)  (**r conversion to single float *)
-  | Pcvtss2sd_ff (rd: freg) (r1: freg)  (**r conversion to double float *)
-  | Pcvttsd2si_rf (rd: ireg) (r1: freg) (**r double to signed int *)
-  | Pcvtsi2sd_fr (rd: freg) (r1: ireg)  (**r signed int to double *)
-  | Pcvttss2si_rf (rd: ireg) (r1: freg) (**r single to signed int *)
-  | Pcvtsi2ss_fr (rd: freg) (r1: ireg)  (**r signed int to single *)
-  | Pcvttsd2sl_rf (rd: ireg) (r1: freg) (**r double to signed long *)
-  | Pcvtsl2sd_fr (rd: freg) (r1: ireg)  (**r signed long to double *)
-  | Pcvttss2sl_rf (rd: ireg) (r1: freg) (**r single to signed long *)
-  | Pcvtsl2ss_fr (rd: freg) (r1: ireg)  (**r signed long to single *)
-  (** Integer arithmetic *)
-  | Pleal (rd: ireg) (a: addrmode)
-  | Pleaq (rd: ireg) (a: addrmode)
-  | Pnegl (rd: ireg)
-  | Pnegq (rd: ireg)
-  | Paddl_ri (rd: ireg) (n: int)
-  | Paddq_ri (rd: ireg) (n: int64)
-  | Psubl_rr (rd: ireg) (r1: ireg)
-  | Psubq_rr (rd: ireg) (r1: ireg)
-  | Pimull_rr (rd: ireg) (r1: ireg)
-  | Pimulq_rr (rd: ireg) (r1: ireg)
-  | Pimull_ri (rd: ireg) (n: int)
-  | Pimulq_ri (rd: ireg) (n: int64)
-  | Pimull_r (r1: ireg)
-  | Pimulq_r (r1: ireg)
-  | Pmull_r (r1: ireg)
-  | Pmulq_r (r1: ireg)
-  | Pcltd
-  | Pcqto
-  | Pdivl (r1: ireg)
-  | Pdivq (r1: ireg)
-  | Pidivl (r1: ireg)
-  | Pidivq (r1: ireg)
-  | Pandl_rr (rd: ireg) (r1: ireg)
-  | Pandq_rr (rd: ireg) (r1: ireg)
-  | Pandl_ri (rd: ireg) (n: int)
-  | Pandq_ri (rd: ireg) (n: int64)
-  | Porl_rr (rd: ireg) (r1: ireg)
-  | Porq_rr (rd: ireg) (r1: ireg)
-  | Porl_ri (rd: ireg) (n: int)
-  | Porq_ri (rd: ireg) (n: int64)
-  | Pxorl_r (rd: ireg)                  (**r [xor] with self = set to zero *)
-  | Pxorq_r (rd: ireg)
-  | Pxorl_rr (rd: ireg) (r1: ireg)
-  | Pxorq_rr (rd: ireg) (r1: ireg)
-  | Pxorl_ri (rd: ireg) (n: int)
-  | Pxorq_ri (rd: ireg) (n: int64)
-  | Pnotl (rd: ireg)
-  | Pnotq (rd: ireg)
-  | Psall_rcl (rd: ireg)
-  | Psalq_rcl (rd: ireg)
-  | Psall_ri (rd: ireg) (n: int)
-  | Psalq_ri (rd: ireg) (n: int)
-  | Pshrl_rcl (rd: ireg)
-  | Pshrq_rcl (rd: ireg)
-  | Pshrl_ri (rd: ireg) (n: int)
-  | Pshrq_ri (rd: ireg) (n: int)
-  | Psarl_rcl (rd: ireg)
-  | Psarq_rcl (rd: ireg)
-  | Psarl_ri (rd: ireg) (n: int)
-  | Psarq_ri (rd: ireg) (n: int)
-  | Pshld_ri (rd: ireg) (r1: ireg) (n: int)
-  | Prorl_ri (rd: ireg) (n: int)
-  | Prorq_ri (rd: ireg) (n: int)
-  | Pcmpl_rr (r1 r2: ireg)
-  | Pcmpq_rr (r1 r2: ireg)
-  | Pcmpl_ri (r1: ireg) (n: int)
-  | Pcmpq_ri (r1: ireg) (n: int64)
-  | Ptestl_rr (r1 r2: ireg)
-  | Ptestq_rr (r1 r2: ireg)
-  | Ptestl_ri (r1: ireg) (n: int)
-  | Ptestq_ri (r1: ireg) (n: int64)
-  | Pcmov (c: testcond) (rd: ireg) (r1: ireg)
-  | Psetcc (c: testcond) (rd: ireg)
-  (** Floating-point arithmetic *)
-  | Paddd_ff (rd: freg) (r1: freg)
-  | Psubd_ff (rd: freg) (r1: freg)
-  | Pmuld_ff (rd: freg) (r1: freg)
-  | Pdivd_ff (rd: freg) (r1: freg)
-  | Pnegd (rd: freg)
-  | Pabsd (rd: freg)
-  | Pcomisd_ff (r1 r2: freg)
-  | Pxorpd_f (rd: freg)	              (**r [xor] with self = set to zero *)
-  | Padds_ff (rd: freg) (r1: freg)
-  | Psubs_ff (rd: freg) (r1: freg)
-  | Pmuls_ff (rd: freg) (r1: freg)
-  | Pdivs_ff (rd: freg) (r1: freg)
-  | Pnegs (rd: freg)
-  | Pabss (rd: freg)
-  | Pcomiss_ff (r1 r2: freg)
-  | Pxorps_f (rd: freg)	              (**r [xor] with self = set to zero *)
-  (** Branches and calls *)
-  | Pjmp_l (l: label)
-  | Pjmp_s (symb: ident) (sg: signature)
-  | Pjmp_r (r: ireg) (sg: signature)
-  | Pjcc (c: testcond)(l: label)
-  | Pjcc2 (c1 c2: testcond)(l: label)   (**r pseudo *)
-  | Pjmptbl (r: ireg) (tbl: list label) (**r pseudo *)
-  | Pcall_s (symb: ident) (sg: signature)
-  | Pcall_r (r: ireg) (sg: signature)
-  | Pret
-  (** Saving and restoring registers *)
-  | Pmov_rm_a (rd: ireg) (a: addrmode)  (**r like [Pmov_rm], using [Many64] chunk *)
-  | Pmov_mr_a (a: addrmode) (rs: ireg)  (**r like [Pmov_mr], using [Many64] chunk *)
-  | Pmovsd_fm_a (rd: freg) (a: addrmode) (**r like [Pmovsd_fm], using [Many64] chunk *)
-  | Pmovsd_mf_a (a: addrmode) (r1: freg) (**r like [Pmovsd_mf], using [Many64] chunk *)
-  (** Pseudo-instructions *)
-  | Plabel(l: label)
-  | Pallocframe(sz: Z)(ofs_ra ofs_link: ptrofs)
-  | Pfreeframe(sz: Z)(ofs_ra ofs_link: ptrofs)
-  | Pbuiltin(ef: external_function)(args: list (builtin_arg preg))(res: builtin_res preg)
-  (** Instructions not generated by [Asmgen] -- TO CHECK *)
-  | Padcl_ri (rd: ireg) (n: int)
-  | Padcl_rr (rd: ireg) (r2: ireg)
-  | Paddl_mi (a: addrmode) (n: int)
-  | Paddl_rr (rd: ireg) (r2: ireg)
-  | Pbsfl (rd: ireg) (r1: ireg)
-  | Pbsfq (rd: ireg) (r1: ireg)
-  | Pbsrl (rd: ireg) (r1: ireg)
-  | Pbsrq (rd: ireg) (r1: ireg)
-  | Pbswap64 (rd: ireg)
-  | Pbswap32 (rd: ireg)
-  | Pbswap16 (rd: ireg)
-  | Pcfi_adjust (n: int)
-  | Pfmadd132 (rd: freg) (r2: freg) (r3: freg)
-  | Pfmadd213 (rd: freg) (r2: freg) (r3: freg)
-  | Pfmadd231 (rd: freg) (r2: freg) (r3: freg)
-  | Pfmsub132 (rd: freg) (r2: freg) (r3: freg)
-  | Pfmsub213 (rd: freg) (r2: freg) (r3: freg)
-  | Pfmsub231 (rd: freg) (r2: freg) (r3: freg)
-  | Pfnmadd132 (rd: freg) (r2: freg) (r3: freg)
-  | Pfnmadd213 (rd: freg) (r2: freg) (r3: freg)
-  | Pfnmadd231 (rd: freg) (r2: freg) (r3: freg)
-  | Pfnmsub132 (rd: freg) (r2: freg) (r3: freg)
-  | Pfnmsub213 (rd: freg) (r2: freg) (r3: freg)
-  | Pfnmsub231 (rd: freg) (r2: freg) (r3: freg)
-  | Pmaxsd (rd: freg) (r2: freg)
-  | Pminsd (rd: freg) (r2: freg)
-  | Pmovb_rm (rd: ireg) (a: addrmode)
-  | Pmovq_rf (rd: ireg) (r1: freg)
-  | Pmovsq_mr  (a: addrmode) (rs: freg)
-  | Pmovsq_rm (rd: freg) (a: addrmode)
-  | Pmovsb
-  | Pmovsw
-  | Pmovw_rm (rd: ireg) (ad: addrmode)
-  | Pnop
-  | Prep_movsl
-  | Psbbl_rr (rd: ireg) (r2: ireg)
-  | Psqrtsd (rd: freg) (r1: freg)
-  | Psubl_ri (rd: ireg) (n: int)
-  | Psubq_ri (rd: ireg) (n: int64).
-
 Definition code := list instruction.
 Record function : Type := mkfunction { fn_sig: signature; fn_code: code }.
 Definition fundef := AST.fundef function.
@@ -299,20 +434,30 @@ Definition program := AST.program fundef unit.
 
 (** * Operational semantics *)
 
-Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}.
-Proof. decide equality. apply ireg_eq. apply freg_eq. decide equality. Defined.
-
-Module PregEq.
-  Definition t := preg.
-  Definition eq := preg_eq.
-End PregEq.
-
-Module Pregmap := EMap(PregEq).
+(** The semantics operates over a single mapping from registers
+  (type [preg]) to values.  We maintain
+  the convention that integer registers are mapped to values of
+  type [Tint] or [Tlong] (in 64 bit mode),
+  and float registers to values of type [Tsingle] or [Tfloat]. *)
 
 Definition regset := Pregmap.t val.
 Definition genv := Genv.t fundef unit.
 
+Definition get0w (rs: regset) (r: ireg0) : val :=
+  match r with
+  | X0 => Vint Int.zero
+  | X r => rs r
+  end.
+
+Definition get0l (rs: regset) (r: ireg0) : val :=
+  match r with
+  | X0 => Vlong Int64.zero
+  | X r => rs r
+  end.
+
 Notation "a # b" := (a b) (at level 1, only parsing) : asm.
+Notation "a ## b" := (get0w a b) (at level 1) : asm.
+Notation "a ### b" := (get0l a b) (at level 1) : asm.
 Notation "a # b <- c" := (Pregmap.set b c a) (at level 1, b at next level) : asm.
 
 Open Scope asm.
@@ -333,6 +478,14 @@ Definition set_pair (p: rpair preg) (v: val) (rs: regset) : regset :=
   | Twolong rhi rlo => rs#rhi <- (Val.hiword v) #rlo <- (Val.loword v)
   end.
 
+(** Assigning multiple registers *)
+
+Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset :=
+  match rl, vl with
+  | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1)
+  | _, _ => rs
+  end.
+
 (** Assigning the result of a builtin *)
 
 Fixpoint set_res (res: builtin_res preg) (v: val) (rs: regset) : regset :=
@@ -365,7 +518,7 @@ Lemma is_label_correct:
   if is_label lbl instr then instr = Plabel lbl else instr <> Plabel lbl.
 Proof.
   intros.  destruct instr; simpl; try discriminate.
-  case (peq lbl l); intro; congruence.
+  case (peq lbl lbl0); intro; congruence.
 Qed.
 
 Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z :=
@@ -377,167 +530,21 @@ Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z :=
 
 Variable ge: genv.
 
-(** Evaluating an addressing mode *)
-
-Definition eval_addrmode32 (a: addrmode) (rs: regset) : val :=
-  let '(Addrmode base ofs const) := a in
-  Val.add  (match base with
-             | None => Vint Int.zero
-             | Some r => rs r
-            end)
-  (Val.add (match ofs with
-             | None => Vint Int.zero
-             | Some(r, sc) =>
-                if zeq sc 1
-                then rs r
-                else Val.mul (rs r) (Vint (Int.repr sc))
-             end)
-           (match const with
-            | inl ofs => Vint (Int.repr ofs)
-            | inr(id, ofs) => Genv.symbol_address ge id ofs
-            end)).
-
-Definition eval_addrmode64 (a: addrmode) (rs: regset) : val :=
-  let '(Addrmode base ofs const) := a in
-  Val.addl (match base with
-             | None => Vlong Int64.zero
-             | Some r => rs r
-            end)
-  (Val.addl (match ofs with
-             | None => Vlong Int64.zero
-             | Some(r, sc) =>
-                if zeq sc 1
-                then rs r
-                else Val.mull (rs r) (Vlong (Int64.repr sc))
-             end)
-           (match const with
-            | inl ofs => Vlong (Int64.repr ofs)
-            | inr(id, ofs) => Genv.symbol_address ge id ofs
-            end)).
-
-Definition eval_addrmode (a: addrmode) (rs: regset) : val :=
-  if Archi.ptr64 then eval_addrmode64 a rs else eval_addrmode32 a rs.
-
-(** Performing a comparison *)
-
-(** Integer comparison between x and y:
--       ZF = 1 if x = y, 0 if x != y
--       CF = 1 if x <u y, 0 if x >=u y
--       SF = 1 if x - y is negative, 0 if x - y is positive
--       OF = 1 if x - y overflows (signed), 0 if not
--       PF is undefined
-*)
+(** The two functions below axiomatize how the linker processes
+  symbolic references [symbol + offset)] and splits their
+  actual values into a 20-bit high part [%hi(symbol + offset)] and 
+  a 12-bit low part [%lo(symbol + offset)]. *)
 
-Definition compare_ints (x y: val) (rs: regset) (m: mem): regset :=
-  rs #ZF  <- (Val.cmpu (Mem.valid_pointer m) Ceq x y)
-     #CF  <- (Val.cmpu (Mem.valid_pointer m) Clt x y)
-     #SF  <- (Val.negative (Val.sub x y))
-     #OF  <- (Val.sub_overflow x y)
-     #PF  <- Vundef.
-
-Definition compare_longs (x y: val) (rs: regset) (m: mem): regset :=
-  rs #ZF  <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq x y))
-     #CF  <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Clt x y))
-     #SF  <- (Val.negativel (Val.subl x y))
-     #OF  <- (Val.subl_overflow x y)
-     #PF  <- Vundef.
-
-(** Floating-point comparison between x and y:
--       ZF = 1 if x=y or unordered, 0 if x<>y and ordered
--       CF = 1 if x<y or unordered, 0 if x>=y.
--       PF = 1 if unordered, 0 if ordered.
--       SF and 0F are undefined
-*)
+Parameter low_half: genv -> ident -> ptrofs -> ptrofs.
+Parameter high_half: genv -> ident -> ptrofs -> val.
 
-Definition compare_floats (vx vy: val) (rs: regset) : regset :=
-  match vx, vy with
-  | Vfloat x, Vfloat y =>
-      rs #ZF  <- (Val.of_bool (Float.cmp Ceq x y || negb (Float.ordered x y)))
-         #CF  <- (Val.of_bool (negb (Float.cmp Cge x y)))
-         #PF  <- (Val.of_bool (negb (Float.ordered x y)))
-         #SF  <- Vundef
-         #OF  <- Vundef
-  | _, _ =>
-      undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs
-  end.
+(** The fundamental property of these operations is that, when applied
+  to the address of a symbol, their results can be recombined by
+  addition, rebuilding the original address. *)
 
-Definition compare_floats32 (vx vy: val) (rs: regset) : regset :=
-  match vx, vy with
-  | Vsingle x, Vsingle y =>
-      rs #ZF  <- (Val.of_bool (Float32.cmp Ceq x y || negb (Float32.ordered x y)))
-         #CF  <- (Val.of_bool (negb (Float32.cmp Cge x y)))
-         #PF  <- (Val.of_bool (negb (Float32.ordered x y)))
-         #SF  <- Vundef
-         #OF  <- Vundef
-  | _, _ =>
-      undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs
-  end.
-
-(** Testing a condition *)
-
-Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
-  match c with
-  | Cond_e =>
-      match rs ZF with
-      | Vint n => Some (Int.eq n Int.one)
-      | _ => None
-      end
-  | Cond_ne =>
-      match rs ZF with
-      | Vint n => Some (Int.eq n Int.zero)
-      | _ => None
-      end
-  | Cond_b =>
-      match rs CF with
-      | Vint n => Some (Int.eq n Int.one)
-      | _ => None
-      end
-  | Cond_be =>
-      match rs CF, rs ZF with
-      | Vint c, Vint z => Some (Int.eq c Int.one || Int.eq z Int.one)
-      | _, _ => None
-      end
-  | Cond_ae =>
-      match rs CF with
-      | Vint n => Some (Int.eq n Int.zero)
-      | _ => None
-      end
-  | Cond_a =>
-      match rs CF, rs ZF with
-      | Vint c, Vint z => Some (Int.eq c Int.zero && Int.eq z Int.zero)
-      | _, _ => None
-      end
-  | Cond_l =>
-      match rs OF, rs SF with
-      | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.one)
-      | _, _ => None
-      end
-  | Cond_le =>
-      match rs OF, rs SF, rs ZF with
-      | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.one || Int.eq z Int.one)
-      | _, _, _ => None
-      end
-  | Cond_ge =>
-      match rs OF, rs SF with
-      | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.zero)
-      | _, _ => None
-      end
-  | Cond_g =>
-      match rs OF, rs SF, rs ZF with
-      | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.zero && Int.eq z Int.zero)
-      | _, _, _ => None
-      end
-  | Cond_p =>
-      match rs PF with
-      | Vint n => Some (Int.eq n Int.one)
-      | _ => None
-      end
-  | Cond_np =>
-      match rs PF with
-      | Vint n => Some (Int.eq n Int.zero)
-      | _ => None
-      end
-  end.
+Axiom low_high_half:
+  forall id ofs,
+  Val.offset_ptr (high_half ge id ofs) (low_half ge id ofs) = Genv.symbol_address ge id ofs.
 
 (** The semantics is purely small-step and defined as a function
   from the current state (a register set + a memory state)
@@ -546,510 +553,482 @@ Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
   or [Stuck] if the processor is stuck. *)
 
 Inductive outcome: Type :=
-  | Next: regset -> mem -> outcome
+  | Next:  regset -> mem -> outcome
   | Stuck: outcome.
 
 (** Manipulations over the [PC] register: continuing with the next
-  instruction ([nextinstr]) or branching to a label ([goto_label]).
-  [nextinstr_nf] is a variant of [nextinstr] that sets condition flags
-  to [Vundef] in addition to incrementing the [PC]. *)
+  instruction ([nextinstr]) or branching to a label ([goto_label]). *)
 
 Definition nextinstr (rs: regset) :=
   rs#PC <- (Val.offset_ptr rs#PC Ptrofs.one).
 
-Definition nextinstr_nf (rs: regset) : regset :=
-  nextinstr (undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs).
-
 Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) :=
   match label_pos lbl 0 (fn_code f) with
   | None => Stuck
   | Some pos =>
       match rs#PC with
       | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m
-      | _ => Stuck
-    end
+      | _          => Stuck
+      end
   end.
 
-(** Auxiliaries for memory accesses. *)
+(** Auxiliaries for memory accesses *)
 
-Definition exec_load (chunk: memory_chunk) (m: mem)
-                     (a: addrmode) (rs: regset) (rd: preg) :=
-  match Mem.loadv chunk m (eval_addrmode a rs) with
-  | Some v => Next (nextinstr_nf (rs#rd <- v)) m
+Definition eval_offset (ofs: offset) : ptrofs :=
+  match ofs with
+  | Ofsimm n => n
+  | Ofslow id delta => low_half ge id delta
+  end.
+
+Definition exec_load (chunk: memory_chunk) (rs: regset) (m: mem)
+                     (d: preg) (a: ireg) (ofs: offset) :=
+  match Mem.loadv chunk m (Val.offset_ptr (rs a) (eval_offset ofs)) with
   | None => Stuck
+  | Some v => Next (nextinstr (rs#d <- v)) m
   end.
 
-Definition exec_store (chunk: memory_chunk) (m: mem)
-                      (a: addrmode) (rs: regset) (r1: preg)
-                      (destroyed: list preg) :=
-  match Mem.storev chunk m (eval_addrmode a rs) (rs r1) with
-  | Some m' => Next (nextinstr_nf (undef_regs destroyed rs)) m'
+Definition exec_store (chunk: memory_chunk) (rs: regset) (m: mem)
+                      (s: preg) (a: ireg) (ofs: offset) :=
+  match Mem.storev chunk m (Val.offset_ptr (rs a) (eval_offset ofs)) (rs s) with
   | None => Stuck
+  | Some m' => Next (nextinstr rs) m'
+  end.
+
+(** Evaluating a branch *)
+
+Definition eval_branch (f: function) (l: label) (rs: regset) (m: mem) (res: option bool) : outcome :=
+  match res with
+    | Some true  => goto_label f l rs m
+    | Some false => Next (nextinstr rs) m
+    | None => Stuck
   end.
 
-(** Execution of a single instruction [i] in initial state
-    [rs] and [m].  Return updated state.  For instructions
-    that correspond to actual IA32 instructions, the cases are
-    straightforward transliterations of the informal descriptions
-    given in the IA32 reference manuals.  For pseudo-instructions,
-    refer to the informal descriptions given above.
+(** Execution of a single instruction [i] in initial state [rs] and
+    [m].  Return updated state.  For instructions that correspond to
+    actual RISC-V instructions, the cases are straightforward
+    transliterations of the informal descriptions given in the RISC-V
+    user-mode specification.  For pseudo-instructions, refer to the
+    informal descriptions given above.
 
     Note that we set to [Vundef] the registers used as temporaries by
-    the expansions of the pseudo-instructions, so that the IA32 code
+    the expansions of the pseudo-instructions, so that the RISC-V code
     we generate cannot use those registers to hold values that must
-    survive the execution of the pseudo-instruction.
-
-    Concerning condition flags, the comparison instructions set them
-    accurately; other instructions (moves, [lea]) preserve them;
-    and all other instruction set those flags to [Vundef], to reflect
-    the fact that the processor updates some or all of those flags,
-    but we do not need to model this precisely.
-*)
+    survive the execution of the pseudo-instruction. *)
 
 Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : outcome :=
   match i with
-  (** Moves *)
-  | Pmov_rr rd r1 =>
-      Next (nextinstr (rs#rd <- (rs r1))) m
-  | Pmovl_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Vint n))) m
-  | Pmovq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Vlong n))) m
-  | Pmov_rs rd id =>
-      Next (nextinstr_nf (rs#rd <- (Genv.symbol_address ge id Ptrofs.zero))) m
-  | Pmovl_rm rd a =>
-      exec_load Mint32 m a rs rd
-  | Pmovq_rm rd a =>
-      exec_load Mint64 m a rs rd
-  | Pmovl_mr a r1 =>
-      exec_store Mint32 m a rs r1 nil
-  | Pmovq_mr a r1 =>
-      exec_store Mint64 m a rs r1 nil
-  | Pmovsd_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (rs r1))) m
-  | Pmovsd_fi rd n =>
-      Next (nextinstr (rs#rd <- (Vfloat n))) m
-  | Pmovsd_fm rd a =>
-      exec_load Mfloat64 m a rs rd
-  | Pmovsd_mf a r1 =>
-      exec_store Mfloat64 m a rs r1 nil
-  | Pmovss_fi rd n =>
-      Next (nextinstr (rs#rd <- (Vsingle n))) m
-  | Pmovss_fm rd a =>
-      exec_load Mfloat32 m a rs rd
-  | Pmovss_mf a r1 =>
-      exec_store Mfloat32 m a rs r1 nil
-  | Pfldl_m a =>
-      exec_load Mfloat64 m a rs ST0
-  | Pfstpl_m a =>
-      exec_store Mfloat64 m a rs ST0 (ST0 :: nil)
-  | Pflds_m a =>
-      exec_load Mfloat32 m a rs ST0
-  | Pfstps_m a =>
-      exec_store Mfloat32 m a rs ST0 (ST0 :: nil)
-  (** Moves with conversion *)
-  | Pmovb_mr a r1 =>
-      exec_store Mint8unsigned m a rs r1 nil
-  | Pmovw_mr a r1 =>
-      exec_store Mint16unsigned m a rs r1 nil
-  | Pmovzb_rr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.zero_ext 8 rs#r1))) m
-  | Pmovzb_rm rd a =>
-      exec_load Mint8unsigned m a rs rd
-  | Pmovsb_rr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.sign_ext 8 rs#r1))) m
-  | Pmovsb_rm rd a =>
-      exec_load Mint8signed m a rs rd
-  | Pmovzw_rr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.zero_ext 16 rs#r1))) m
-  | Pmovzw_rm rd a =>
-      exec_load Mint16unsigned m a rs rd
-  | Pmovsw_rr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.sign_ext 16 rs#r1))) m
-  | Pmovsw_rm rd a =>
-      exec_load Mint16signed m a rs rd
-  | Pmovzl_rr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.longofintu rs#r1))) m
-  | Pmovsl_rr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.longofint rs#r1))) m
-  | Pmovls_rr rd =>
-      Next (nextinstr (rs#rd <- (Val.loword rs#rd))) m
-  | Pcvtsd2ss_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.singleoffloat rs#r1))) m
-  | Pcvtss2sd_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.floatofsingle rs#r1))) m
-  | Pcvttsd2si_rf rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m
-  | Pcvtsi2sd_fr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatofint rs#r1)))) m
-  | Pcvttss2si_rf rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.intofsingle rs#r1)))) m
-  | Pcvtsi2ss_fr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.singleofint rs#r1)))) m
-  | Pcvttsd2sl_rf rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.longoffloat rs#r1)))) m
-  | Pcvtsl2sd_fr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatoflong rs#r1)))) m
-  | Pcvttss2sl_rf rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.longofsingle rs#r1)))) m
-  | Pcvtsl2ss_fr rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.maketotal (Val.singleoflong rs#r1)))) m
-  (** Integer arithmetic *)
-  | Pleal rd a =>
-      Next (nextinstr (rs#rd <- (eval_addrmode32 a rs))) m
-  | Pleaq rd a =>
-      Next (nextinstr (rs#rd <- (eval_addrmode64 a rs))) m
-  | Pnegl rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.neg rs#rd))) m
-  | Pnegq rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.negl rs#rd))) m
-  | Paddl_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.add rs#rd (Vint n)))) m
-  | Paddq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.addl rs#rd (Vlong n)))) m
-  | Psubl_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.sub rs#rd rs#r1))) m
-  | Psubq_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.subl rs#rd rs#r1))) m
-  | Pimull_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.mul rs#rd rs#r1))) m
-  | Pimulq_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.mull rs#rd rs#r1))) m
-  | Pimull_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.mul rs#rd (Vint n)))) m
-  | Pimulq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.mull rs#rd (Vlong n)))) m
-  | Pimull_r r1 =>
-      Next (nextinstr_nf (rs#RAX <- (Val.mul rs#RAX rs#r1)
-                            #RDX <- (Val.mulhs rs#RAX rs#r1))) m
-  | Pimulq_r r1 =>
-      Next (nextinstr_nf (rs#RAX <- (Val.mull rs#RAX rs#r1)
-                            #RDX <- (Val.mullhs rs#RAX rs#r1))) m
-  | Pmull_r r1 =>
-      Next (nextinstr_nf (rs#RAX <- (Val.mul rs#RAX rs#r1)
-                            #RDX <- (Val.mulhu rs#RAX rs#r1))) m
-  | Pmulq_r r1 =>
-      Next (nextinstr_nf (rs#RAX <- (Val.mull rs#RAX rs#r1)
-                            #RDX <- (Val.mullhu rs#RAX rs#r1))) m
-  | Pcltd =>
-      Next (nextinstr_nf (rs#RDX <- (Val.shr rs#RAX (Vint (Int.repr 31))))) m
-  | Pcqto =>
-      Next (nextinstr_nf (rs#RDX <- (Val.shrl rs#RAX (Vint (Int.repr 63))))) m
-  | Pdivl r1 =>
-      match rs#RDX, rs#RAX, rs#r1 with
-      | Vint nh, Vint nl, Vint d =>
-          match Int.divmodu2 nh nl d with
-          | Some(q, r) => Next (nextinstr_nf (rs#RAX <- (Vint q) #RDX <- (Vint r))) m
-          | None => Stuck
-          end
-      | _, _, _ => Stuck
-      end
-  | Pdivq r1 =>
-      match rs#RDX, rs#RAX, rs#r1 with
-      | Vlong nh, Vlong nl, Vlong d =>
-          match Int64.divmodu2 nh nl d with
-          | Some(q, r) => Next (nextinstr_nf (rs#RAX <- (Vlong q) #RDX <- (Vlong r))) m
-          | None => Stuck
-          end
-      | _, _, _ => Stuck
-      end
-  | Pidivl r1 =>
-      match rs#RDX, rs#RAX, rs#r1 with
-      | Vint nh, Vint nl, Vint d =>
-          match Int.divmods2 nh nl d with
-          | Some(q, r) => Next (nextinstr_nf (rs#RAX <- (Vint q) #RDX <- (Vint r))) m
-          | None => Stuck
-          end
-      | _, _, _ => Stuck
-      end
-  | Pidivq r1 =>
-      match rs#RDX, rs#RAX, rs#r1 with
-      | Vlong nh, Vlong nl, Vlong d =>
-          match Int64.divmods2 nh nl d with
-          | Some(q, r) => Next (nextinstr_nf (rs#RAX <- (Vlong q) #RDX <- (Vlong r))) m
-          | None => Stuck
-          end
-      | _, _, _ => Stuck
-      end
-  | Pandl_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.and rs#rd rs#r1))) m
-  | Pandq_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.andl rs#rd rs#r1))) m
-  | Pandl_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.and rs#rd (Vint n)))) m
-  | Pandq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.andl rs#rd (Vlong n)))) m
-  | Porl_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.or rs#rd rs#r1))) m
-  | Porq_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.orl rs#rd rs#r1))) m
-  | Porl_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.or rs#rd (Vint n)))) m
-  | Porq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.orl rs#rd (Vlong n)))) m
-  | Pxorl_r rd =>
-      Next (nextinstr_nf (rs#rd <- Vzero)) m
-  | Pxorq_r rd =>
-      Next (nextinstr_nf (rs#rd <- (Vlong Int64.zero))) m
-  | Pxorl_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.xor rs#rd rs#r1))) m
-  | Pxorq_rr rd r1 =>
-      Next (nextinstr_nf (rs#rd <- (Val.xorl rs#rd rs#r1))) m
-  | Pxorl_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.xor rs#rd (Vint n)))) m
-  | Pxorq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.xorl rs#rd (Vlong n)))) m
-  | Pnotl rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.notint rs#rd))) m
-  | Pnotq rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.notl rs#rd))) m
-  | Psall_rcl rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.shl rs#rd rs#RCX))) m
-  | Psalq_rcl rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.shll rs#rd rs#RCX))) m
-  | Psall_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.shl rs#rd (Vint n)))) m
-  | Psalq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.shll rs#rd (Vint n)))) m
-  | Pshrl_rcl rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.shru rs#rd rs#RCX))) m
-  | Pshrq_rcl rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.shrlu rs#rd rs#RCX))) m
-  | Pshrl_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.shru rs#rd (Vint n)))) m
-  | Pshrq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.shrlu rs#rd (Vint n)))) m
-  | Psarl_rcl rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.shr rs#rd rs#RCX))) m
-  | Psarq_rcl rd =>
-      Next (nextinstr_nf (rs#rd <- (Val.shrl rs#rd rs#RCX))) m
-  | Psarl_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.shr rs#rd (Vint n)))) m
-  | Psarq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.shrl rs#rd (Vint n)))) m
-  | Pshld_ri rd r1 n =>
-      Next (nextinstr_nf
-              (rs#rd <- (Val.or (Val.shl rs#rd (Vint n))
-                                (Val.shru rs#r1 (Vint (Int.sub Int.iwordsize n)))))) m
-  | Prorl_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.ror rs#rd (Vint n)))) m
-  | Prorq_ri rd n =>
-      Next (nextinstr_nf (rs#rd <- (Val.rorl rs#rd (Vint n)))) m
-  | Pcmpl_rr r1 r2 =>
-      Next (nextinstr (compare_ints (rs r1) (rs r2) rs m)) m
-  | Pcmpq_rr r1 r2 =>
-      Next (nextinstr (compare_longs (rs r1) (rs r2) rs m)) m
-  | Pcmpl_ri r1 n =>
-      Next (nextinstr (compare_ints (rs r1) (Vint n) rs m)) m
-  | Pcmpq_ri r1 n =>
-      Next (nextinstr (compare_longs (rs r1) (Vlong n) rs m)) m
-  | Ptestl_rr r1 r2 =>
-      Next (nextinstr (compare_ints (Val.and (rs r1) (rs r2)) Vzero rs m)) m
-  | Ptestq_rr r1 r2 =>
-      Next (nextinstr (compare_longs (Val.andl (rs r1) (rs r2)) (Vlong Int64.zero) rs m)) m
-  | Ptestl_ri r1 n =>
-      Next (nextinstr (compare_ints (Val.and (rs r1) (Vint n)) Vzero rs m)) m
-  | Ptestq_ri r1 n =>
-      Next (nextinstr (compare_longs (Val.andl (rs r1) (Vlong n)) (Vlong Int64.zero) rs m)) m
-  | Pcmov c rd r1 =>
-      let v :=
-        match eval_testcond c rs with
-        | Some b => if b then rs#r1 else rs#rd
-        | None   => Vundef
-      end in
-      Next (nextinstr (rs#rd <- v)) m
-  | Psetcc c rd =>
-      Next (nextinstr (rs#rd <- (Val.of_optbool (eval_testcond c rs)))) m
-  (** Arithmetic operations over double-precision floats *)
-  | Paddd_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.addf rs#rd rs#r1))) m
-  | Psubd_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.subf rs#rd rs#r1))) m
-  | Pmuld_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.mulf rs#rd rs#r1))) m
-  | Pdivd_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.divf rs#rd rs#r1))) m
-  | Pnegd rd =>
-      Next (nextinstr (rs#rd <- (Val.negf rs#rd))) m
-  | Pabsd rd =>
-      Next (nextinstr (rs#rd <- (Val.absf rs#rd))) m
-  | Pcomisd_ff r1 r2 =>
-      Next (nextinstr (compare_floats (rs r1) (rs r2) rs)) m
-  | Pxorpd_f rd =>
-      Next (nextinstr_nf (rs#rd <- (Vfloat Float.zero))) m
-  (** Arithmetic operations over single-precision floats *)
-  | Padds_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.addfs rs#rd rs#r1))) m
-  | Psubs_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.subfs rs#rd rs#r1))) m
-  | Pmuls_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.mulfs rs#rd rs#r1))) m
-  | Pdivs_ff rd r1 =>
-      Next (nextinstr (rs#rd <- (Val.divfs rs#rd rs#r1))) m
-  | Pnegs rd =>
-      Next (nextinstr (rs#rd <- (Val.negfs rs#rd))) m
-  | Pabss rd =>
-      Next (nextinstr (rs#rd <- (Val.absfs rs#rd))) m
-  | Pcomiss_ff r1 r2 =>
-      Next (nextinstr (compare_floats32 (rs r1) (rs r2) rs)) m
-  | Pxorps_f rd =>
-      Next (nextinstr_nf (rs#rd <- (Vsingle Float32.zero))) m
-  (** Branches and calls *)
-  | Pjmp_l lbl =>
-      goto_label f lbl rs m
-  | Pjmp_s id sg =>
-      Next (rs#PC <- (Genv.symbol_address ge id Ptrofs.zero)) m
-  | Pjmp_r r sg =>
-      Next (rs#PC <- (rs r)) m
-  | Pjcc cond lbl =>
-      match eval_testcond cond rs with
-      | Some true => goto_label f lbl rs m
-      | Some false => Next (nextinstr rs) m
+  | Pmv d s =>
+      Next (nextinstr (rs#d <- (rs#s))) m
+
+(** 32-bit integer register-immediate instructions *)
+  | Paddiw d s i =>
+      Next (nextinstr (rs#d <- (Val.add rs##s (Vint i)))) m
+  | Psltiw d s i =>
+      Next (nextinstr (rs#d <- (Val.cmp Clt rs##s (Vint i)))) m
+  | Psltiuw d s i =>
+      Next (nextinstr (rs#d <- (Val.cmpu (Mem.valid_pointer m) Clt rs##s (Vint i)))) m
+  | Pandiw d s i =>
+      Next (nextinstr (rs#d <- (Val.and rs##s (Vint i)))) m
+  | Poriw d s i =>
+      Next (nextinstr (rs#d <- (Val.or rs##s (Vint i)))) m
+  | Pxoriw d s i =>
+      Next (nextinstr (rs#d <- (Val.xor rs##s (Vint i)))) m
+  | Pslliw d s i =>
+      Next (nextinstr (rs#d <- (Val.shl rs##s (Vint i)))) m
+  | Psrliw d s i =>
+      Next (nextinstr (rs#d <- (Val.shru rs##s (Vint i)))) m
+  | Psraiw d s i =>
+      Next (nextinstr (rs#d <- (Val.shr rs##s (Vint i)))) m
+  | Pluiw d i =>
+      Next (nextinstr (rs#d <- (Vint (Int.shl i (Int.repr 12))))) m
+
+(** 32-bit integer register-register instructions *)
+  | Paddw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.add rs##s1 rs##s2))) m
+  | Psubw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.sub rs##s1 rs##s2))) m
+  | Pmulw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.mul rs##s1 rs##s2))) m
+  | Pmulhw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.mulhs rs##s1 rs##s2))) m
+  | Pmulhuw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.mulhu rs##s1 rs##s2))) m
+  | Pdivw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.divs rs##s1 rs##s2)))) m
+  | Pdivuw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.divu rs##s1 rs##s2)))) m
+  | Premw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.mods rs##s1 rs##s2)))) m
+  | Premuw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.modu rs##s1 rs##s2)))) m
+  | Psltw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmp Clt rs##s1 rs##s2))) m
+  | Psltuw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmpu (Mem.valid_pointer m) Clt rs##s1 rs##s2))) m
+  | Pseqw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmpu (Mem.valid_pointer m) Ceq rs##s1 rs##s2))) m
+  | Psnew d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmpu (Mem.valid_pointer m) Cne rs##s1 rs##s2))) m
+  | Pandw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.and rs##s1 rs##s2))) m
+  | Porw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.or rs##s1 rs##s2))) m
+  | Pxorw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.xor rs##s1 rs##s2))) m
+  | Psllw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.shl rs##s1 rs##s2))) m
+  | Psrlw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.shru rs##s1 rs##s2))) m
+  | Psraw d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.shr rs##s1 rs##s2))) m
+
+(** 64-bit integer register-immediate instructions *)
+  | Paddil d s i =>
+      Next (nextinstr (rs#d <- (Val.addl rs###s (Vlong i)))) m
+  | Psltil d s i =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.cmpl Clt rs###s (Vlong i))))) m
+  | Psltiul d s i =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Clt rs###s (Vlong i))))) m
+  | Pandil d s i =>
+      Next (nextinstr (rs#d <- (Val.andl rs###s (Vlong i)))) m
+  | Poril d s i =>
+      Next (nextinstr (rs#d <- (Val.orl rs###s (Vlong i)))) m
+  | Pxoril d s i =>
+      Next (nextinstr (rs#d <- (Val.xorl rs###s (Vlong i)))) m
+  | Psllil d s i =>
+      Next (nextinstr (rs#d <- (Val.shll rs###s (Vint i)))) m
+  | Psrlil d s i =>
+      Next (nextinstr (rs#d <- (Val.shrlu rs###s (Vint i)))) m
+  | Psrail d s i =>
+      Next (nextinstr (rs#d <- (Val.shrl rs###s (Vint i)))) m
+  | Pluil d i =>
+      Next (nextinstr (rs#d <- (Vlong (Int64.sign_ext 32 (Int64.shl i (Int64.repr 12)))))) m
+
+(** 64-bit integer register-register instructions *)
+  | Paddl d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.addl rs###s1 rs###s2))) m
+  | Psubl d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.subl rs###s1 rs###s2))) m
+  | Pmull d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.mull rs###s1 rs###s2))) m
+  | Pmulhl d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.mullhs rs###s1 rs###s2))) m
+  | Pmulhul d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.mullhu rs###s1 rs###s2))) m
+  | Pdivl d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.divls rs###s1 rs###s2)))) m
+  | Pdivul d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.divlu rs###s1 rs###s2)))) m
+  | Preml d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.modls rs###s1 rs###s2)))) m
+  | Premul d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.modlu rs###s1 rs###s2)))) m
+  | Psltl d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.cmpl Clt rs###s1 rs###s2)))) m
+  | Psltul d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Clt rs###s1 rs###s2)))) m
+  | Pseql d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq rs###s1 rs###s2)))) m
+  | Psnel d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Cne rs###s1 rs###s2)))) m
+  | Pandl d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.andl rs###s1 rs###s2))) m
+  | Porl d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.orl rs###s1 rs###s2))) m
+  | Pxorl d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.xorl rs###s1 rs###s2))) m
+  | Pslll d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.shll rs###s1 rs###s2))) m
+  | Psrll d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.shrlu rs###s1 rs###s2))) m
+  | Psral d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.shrl rs###s1 rs###s2))) m
+
+  | Pcvtl2w d s =>
+      Next (nextinstr (rs#d <- (Val.loword rs##s))) m
+  | Pcvtw2l r =>
+      Next (nextinstr (rs#r <- (Val.longofint rs#r))) m
+
+(** Unconditional jumps.  Links are always to X1/RA. *)
+  | Pj_l l =>
+      goto_label f l rs m
+  | Pj_s s sg =>
+      Next (rs#PC <- (Genv.symbol_address ge s Ptrofs.zero)) m
+  | Pj_r r sg =>
+      Next (rs#PC <- (rs#r)) m
+  | Pjal_s s sg =>
+      Next (rs#PC <- (Genv.symbol_address ge s Ptrofs.zero)
+              #RA <- (Val.offset_ptr rs#PC Ptrofs.one)
+           ) m
+  | Pjal_r r sg =>
+      Next (rs#PC <- (rs#r)
+              #RA <- (Val.offset_ptr rs#PC Ptrofs.one)
+           ) m
+(** Conditional branches, 32-bit comparisons *)
+  | Pbeqw s1 s2 l =>
+      eval_branch f l rs m (Val.cmpu_bool (Mem.valid_pointer m) Ceq rs##s1 rs##s2)
+  | Pbnew s1 s2 l =>
+      eval_branch f l rs m (Val.cmpu_bool (Mem.valid_pointer m) Cne rs##s1 rs##s2)
+  | Pbltw s1 s2 l =>
+      eval_branch f l rs m (Val.cmp_bool Clt rs##s1 rs##s2)
+  | Pbltuw s1 s2 l =>
+      eval_branch f l rs m (Val.cmpu_bool (Mem.valid_pointer m) Clt rs##s1 rs##s2)
+  | Pbgew s1 s2 l =>
+      eval_branch f l rs m (Val.cmp_bool Cge rs##s1 rs##s2)
+  | Pbgeuw s1 s2 l =>
+      eval_branch f l rs m (Val.cmpu_bool (Mem.valid_pointer m) Cge rs##s1 rs##s2)
+
+(** Conditional branches, 64-bit comparisons *)
+  | Pbeql s1 s2 l =>
+      eval_branch f l rs m (Val.cmplu_bool (Mem.valid_pointer m) Ceq rs###s1 rs###s2)
+  | Pbnel s1 s2 l =>
+      eval_branch f l rs m (Val.cmplu_bool (Mem.valid_pointer m) Cne rs###s1 rs###s2)
+  | Pbltl s1 s2 l =>
+      eval_branch f l rs m (Val.cmpl_bool Clt rs###s1 rs###s2)
+  | Pbltul s1 s2 l =>
+      eval_branch f l rs m (Val.cmplu_bool (Mem.valid_pointer m) Clt rs###s1 rs###s2)
+  | Pbgel s1 s2 l =>
+      eval_branch f l rs m (Val.cmpl_bool Cge rs###s1 rs###s2)
+  | Pbgeul s1 s2 l =>
+      eval_branch f l rs m (Val.cmplu_bool (Mem.valid_pointer m) Cge rs###s1 rs###s2)
+
+(** Loads and stores *)
+  | Plb d a ofs =>
+      exec_load Mint8signed rs m d a ofs
+  | Plbu d a ofs =>
+      exec_load Mint8unsigned rs m d a ofs
+  | Plh d a ofs =>
+      exec_load Mint16signed rs m d a ofs
+  | Plhu d a ofs =>
+      exec_load Mint16unsigned rs m d a ofs
+  | Plw d a ofs =>
+      exec_load Mint32 rs m d a ofs
+  | Plw_a d a ofs =>
+      exec_load Many32 rs m d a ofs
+  | Pld d a ofs =>
+      exec_load Mint64 rs m d a ofs
+  | Pld_a d a ofs =>
+      exec_load Many64 rs m d a ofs
+  | Psb s a ofs =>
+      exec_store Mint8unsigned rs m s a ofs
+  | Psh s a ofs =>
+      exec_store Mint16unsigned rs m s a ofs
+  | Psw s a ofs =>
+      exec_store Mint32 rs m s a ofs
+  | Psw_a s a ofs =>
+      exec_store Many32 rs m s a ofs
+  | Psd s a ofs =>
+      exec_store Mint64 rs m s a ofs
+  | Psd_a s a ofs =>
+      exec_store Many64 rs m s a ofs
+
+(** Floating point register move *)
+  | Pfmv d s =>
+      Next (nextinstr (rs#d <- (rs#s))) m
+
+(** 32-bit (single-precision) floating point *)
+  | Pfls d a ofs =>
+      exec_load Mfloat32 rs m d a ofs
+  | Pfss s a ofs =>
+      exec_store Mfloat32 rs m s a ofs
+
+  | Pfnegs d s =>
+      Next (nextinstr (rs#d <- (Val.negfs rs#s))) m
+  | Pfabss d s =>
+      Next (nextinstr (rs#d <- (Val.absfs rs#s))) m
+
+  | Pfadds d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.addfs rs#s1 rs#s2))) m
+  | Pfsubs d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.subfs rs#s1 rs#s2))) m
+  | Pfmuls d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.mulfs rs#s1 rs#s2))) m
+  | Pfdivs d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.divfs rs#s1 rs#s2))) m
+  | Pfeqs d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmpfs Ceq rs#s1 rs#s2))) m
+  | Pflts d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmpfs Clt rs#s1 rs#s2))) m
+  | Pfles d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmpfs Cle rs#s1 rs#s2))) m
+
+  | Pfcvtws d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.intofsingle rs#s)))) m
+  | Pfcvtwus d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.intuofsingle rs#s)))) m
+  | Pfcvtsw d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.singleofint rs##s)))) m
+  | Pfcvtswu d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.singleofintu rs##s)))) m
+
+  | Pfcvtls d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.longofsingle rs#s)))) m
+  | Pfcvtlus d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.longuofsingle rs#s)))) m
+  | Pfcvtsl d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.singleoflong rs###s)))) m
+  | Pfcvtslu d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.singleoflongu rs###s)))) m
+
+(** 64-bit (double-precision) floating point *)
+  | Pfld d a ofs =>
+      exec_load Mfloat64 rs m d a ofs
+  | Pfld_a d a ofs =>
+      exec_load Many64 rs m d a ofs
+  | Pfsd s a ofs =>
+      exec_store Mfloat64 rs m s a ofs
+  | Pfsd_a s a ofs =>
+      exec_store Many64 rs m s a ofs
+
+  | Pfnegd d s =>
+      Next (nextinstr (rs#d <- (Val.negf rs#s))) m
+  | Pfabsd d s =>
+      Next (nextinstr (rs#d <- (Val.absf rs#s))) m
+
+  | Pfaddd d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.addf rs#s1 rs#s2))) m
+  | Pfsubd d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.subf rs#s1 rs#s2))) m
+  | Pfmuld d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.mulf rs#s1 rs#s2))) m
+  | Pfdivd d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.divf rs#s1 rs#s2))) m
+  | Pfeqd d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmpf Ceq rs#s1 rs#s2))) m
+  | Pfltd d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmpf Clt rs#s1 rs#s2))) m
+  | Pfled d s1 s2 =>
+      Next (nextinstr (rs#d <- (Val.cmpf Cle rs#s1 rs#s2))) m
+
+  | Pfcvtwd d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.intoffloat rs#s)))) m
+  | Pfcvtwud d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.intuoffloat rs#s)))) m
+  | Pfcvtdw d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.floatofint rs##s)))) m
+  | Pfcvtdwu d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.floatofintu rs##s)))) m
+
+  | Pfcvtld d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.longoffloat rs#s)))) m
+  | Pfcvtlud d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.longuoffloat rs#s)))) m
+  | Pfcvtdl d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.floatoflong rs###s)))) m
+  | Pfcvtdlu d s =>
+      Next (nextinstr (rs#d <- (Val.maketotal (Val.floatoflongu rs###s)))) m
+
+  | Pfcvtds d s =>
+      Next (nextinstr (rs#d <- (Val.floatofsingle rs#s))) m
+  | Pfcvtsd d s =>
+      Next (nextinstr (rs#d <- (Val.singleoffloat rs#s))) m
+          
+  | Pfmvxs d s =>
+      Next (nextinstr (rs#d <- (ExtValues.bits_of_single rs#s))) m
+  | Pfmvxd d s =>
+      Next (nextinstr (rs#d <- (ExtValues.bits_of_float rs#s))) m
+          
+  | Pfmvsx d s =>
+      Next (nextinstr (rs#d <- (ExtValues.single_of_bits rs#s))) m
+  | Pfmvdx d s =>
+      Next (nextinstr (rs#d <- (ExtValues.float_of_bits rs#s))) m
+
+
+(** Pseudo-instructions *)
+  | Pallocframe sz pos =>
+      let (m1, stk) := Mem.alloc m 0 sz in
+      let sp := (Vptr stk Ptrofs.zero) in
+      match Mem.storev Mptr m1 (Val.offset_ptr sp pos) rs#SP with
       | None => Stuck
+      | Some m2 => Next (nextinstr (rs #X30 <- (rs SP) #SP <- sp #X31 <- Vundef)) m2
       end
-  | Pjcc2 cond1 cond2 lbl =>
-      match eval_testcond cond1 rs, eval_testcond cond2 rs with
-      | Some true, Some true => goto_label f lbl rs m
-      | Some _, Some _ => Next (nextinstr rs) m
-      | _, _ => Stuck
-      end
-  | Pjmptbl r tbl =>
-      match rs#r with
-      | Vint n =>
-          match list_nth_z tbl (Int.unsigned n) with
-          | None => Stuck
-          | Some lbl => goto_label f lbl (rs #RAX <- Vundef #RDX <- Vundef) m
+  | Pfreeframe sz pos =>
+      match Mem.loadv Mptr m (Val.offset_ptr rs#SP pos) with
+      | None => Stuck
+      | Some v =>
+          match rs SP with
+          | Vptr stk ofs =>
+              match Mem.free m stk 0 sz with
+              | None => Stuck
+              | Some m' => Next (nextinstr (rs#SP <- v #X31 <- Vundef)) m'
+              end
+          | _ => Stuck
           end
-      | _ => Stuck
       end
-  | Pcall_s id sg =>
-      Next (rs#RA <- (Val.offset_ptr rs#PC Ptrofs.one) #PC <- (Genv.symbol_address ge id Ptrofs.zero)) m
-  | Pcall_r r sg =>
-      Next (rs#RA <- (Val.offset_ptr rs#PC Ptrofs.one) #PC <- (rs r)) m
-  | Pret =>
-      Next (rs#PC <- (rs#RA)) m
-  (** Saving and restoring registers *)
-  | Pmov_rm_a rd a =>
-      exec_load (if Archi.ptr64 then Many64 else Many32) m a rs rd
-  | Pmov_mr_a a r1 =>
-      exec_store (if Archi.ptr64 then Many64 else Many32) m a rs r1 nil
-  | Pmovsd_fm_a rd a =>
-      exec_load Many64 m a rs rd
-  | Pmovsd_mf_a a r1 =>
-      exec_store Many64 m a rs r1 nil
-  (** Pseudo-instructions *)
+  | Pselectl rd rb rt rf =>
+    Next (nextinstr (rs#rd <- (ExtValues.select01_long
+                                 (rs###rb) (rs###rt) (rs###rf)))
+                     #X31 <- Vundef) m
   | Plabel lbl =>
       Next (nextinstr rs) m
-  | Pallocframe sz ofs_ra ofs_link =>
-      let (m1, stk) := Mem.alloc m 0 sz in
-      let sp := Vptr stk Ptrofs.zero in
-      match Mem.storev Mptr m1 (Val.offset_ptr sp ofs_link) rs#RSP with
-      | None => Stuck
-      | Some m2 =>
-          match Mem.storev Mptr m2 (Val.offset_ptr sp ofs_ra) rs#RA with
-          | None => Stuck
-          | Some m3 => Next (nextinstr (rs #RAX <- (rs#RSP) #RSP <- sp)) m3
-          end
-      end
-  | Pfreeframe sz ofs_ra ofs_link =>
-      match Mem.loadv Mptr m (Val.offset_ptr rs#RSP ofs_ra) with
-      | None => Stuck
-      | Some ra =>
-          match Mem.loadv Mptr m (Val.offset_ptr rs#RSP ofs_link) with
+  | Ploadsymbol rd s ofs =>
+      Next (nextinstr (rs#rd <- (Genv.symbol_address ge s ofs))) m
+  | Ploadsymbol_high rd s ofs =>
+      Next (nextinstr (rs#rd <- (high_half ge s ofs))) m
+  | Ploadli rd i =>
+      Next (nextinstr (rs#X31 <- Vundef #rd <- (Vlong i))) m
+  | Ploadfi rd f =>
+      Next (nextinstr (rs#X31 <- Vundef #rd <- (Vfloat f))) m
+  | Ploadsi rd f =>
+      Next (nextinstr (rs#X31 <- Vundef #rd <- (Vsingle f))) m
+  | Pbtbl r tbl =>
+      match rs r with
+      | Vint n =>
+          match list_nth_z tbl (Int.unsigned n) with
           | None => Stuck
-          | Some sp =>
-              match rs#RSP with
-              | Vptr stk ofs =>
-                  match Mem.free m stk 0 sz with
-                  | None => Stuck
-                  | Some m' => Next (nextinstr (rs#RSP <- sp #RA <- ra)) m'
-                  end
-              | _ => Stuck
-              end
+          | Some lbl => goto_label f lbl (rs#X5 <- Vundef #X31 <- Vundef) m
           end
+      | _ => Stuck
       end
   | Pbuiltin ef args res =>
-      Stuck                             (**r treated specially below *)
-  (** The following instructions and directives are not generated
-      directly by [Asmgen], so we do not model them. *)
-  | Padcl_ri _ _
-  | Padcl_rr _ _
-  | Paddl_mi _ _
-  | Paddl_rr _ _
-  | Pbsfl _ _
-  | Pbsfq _ _
-  | Pbsrl _ _
-  | Pbsrq _ _
-  | Pbswap64 _
-  | Pbswap32 _
-  | Pbswap16 _
-  | Pcfi_adjust _
-  | Pfmadd132 _ _ _
-  | Pfmadd213 _ _ _
-  | Pfmadd231 _ _ _
-  | Pfmsub132 _ _ _
-  | Pfmsub213 _ _ _
-  | Pfmsub231 _ _ _
-  | Pfnmadd132 _ _ _
-  | Pfnmadd213 _ _ _
-  | Pfnmadd231 _ _ _
-  | Pfnmsub132 _ _ _
-  | Pfnmsub213 _ _ _
-  | Pfnmsub231 _ _ _
-  | Pmaxsd _ _
-  | Pminsd _ _
-  | Pmovb_rm _ _
-  | Pmovq_rf _ _
-  | Pmovsq_rm _ _
-  | Pmovsq_mr _ _
-  | Pmovsb
-  | Pmovsw
-  | Pmovw_rm _ _
-  | Pnop
-  | Prep_movsl
-  | Psbbl_rr _ _
-  | Psqrtsd _ _
-  | Psubl_ri _ _
-  | Psubq_ri _ _ => Stuck
+      Stuck (**r treated specially below *)
+  | Pnop => Next (nextinstr rs) m (**r Pnop is used by an oracle during expansion *)
+
+  (** The following instructions and directives are not generated directly by Asmgen,
+      so we do not model them. *)
+  | Pfence
+
+  | Pfmins _ _ _
+  | Pfmaxs _ _ _
+  | Pfsqrts _ _
+  | Pfmadds _ _ _ _
+  | Pfmsubs _ _ _ _
+  | Pfnmadds _ _ _ _
+  | Pfnmsubs _ _ _ _
+
+  | Pfmind _ _ _
+  | Pfmaxd _ _ _
+  | Pfsqrtd _ _
+  | Pfmaddd _ _ _ _
+  | Pfmsubd _ _ _ _
+  | Pfnmaddd _ _ _ _
+  | Pfnmsubd _ _ _ _
+    => Stuck
   end.
 
-(** Translation of the LTL/Linear/Mach view of machine registers
-  to the Asm view.  *)
+(** Translation of the LTL/Linear/Mach view of machine registers to
+  the RISC-V view.  Note that no LTL register maps to [X31].  This
+  register is reserved as temporary, to be used by the generated RV32G
+  code.  *)
 
 Definition preg_of (r: mreg) : preg :=
   match r with
-  | AX => IR RAX
-  | BX => IR RBX
-  | CX => IR RCX
-  | DX => IR RDX
-  | SI => IR RSI
-  | DI => IR RDI
-  | BP => IR RBP
-  | Machregs.R8 => IR R8
-  | Machregs.R9 => IR R9
-  | Machregs.R10 => IR R10
-  | Machregs.R11 => IR R11
-  | Machregs.R12 => IR R12
-  | Machregs.R13 => IR R13
-  | Machregs.R14 => IR R14
-  | Machregs.R15 => IR R15
-  | X0 => FR XMM0
-  | X1 => FR XMM1
-  | X2 => FR XMM2
-  | X3 => FR XMM3
-  | X4 => FR XMM4
-  | X5 => FR XMM5
-  | X6 => FR XMM6
-  | X7 => FR XMM7
-  | X8 => FR XMM8
-  | X9 => FR XMM9
-  | X10 => FR XMM10
-  | X11 => FR XMM11
-  | X12 => FR XMM12
-  | X13 => FR XMM13
-  | X14 => FR XMM14
-  | X15 => FR XMM15
-  | FP0 => ST0
+               | R5  => X5  | R6  => X6  | R7  => X7
+  | R8  => X8  | R9  => X9  | R10 => X10 | R11 => X11
+  | R12 => X12 | R13 => X13 | R14 => X14 | R15 => X15
+  | R16 => X16 | R17 => X17 | R18 => X18 | R19 => X19
+  | R20 => X20 | R21 => X21 | R22 => X22 | R23 => X23
+  | R24 => X24 | R25 => X25 | R26 => X26 | R27 => X27
+  | R28 => X28 | R29 => X29 | R30 => X30
+
+  | Machregs.F0  => F0  | Machregs.F1  => F1  | Machregs.F2  => F2  | Machregs.F3  => F3
+  | Machregs.F4  => F4  | Machregs.F5  => F5  | Machregs.F6  => F6  | Machregs.F7  => F7
+  | Machregs.F8  => F8  | Machregs.F9  => F9  | Machregs.F10 => F10 | Machregs.F11 => F11
+  | Machregs.F12 => F12 | Machregs.F13 => F13 | Machregs.F14 => F14 | Machregs.F15 => F15
+  | Machregs.F16 => F16 | Machregs.F17 => F17 | Machregs.F18 => F18 | Machregs.F19 => F19
+  | Machregs.F20 => F20 | Machregs.F21 => F21 | Machregs.F22 => F22 | Machregs.F23 => F23
+  | Machregs.F24 => F24 | Machregs.F25 => F25 | Machregs.F26 => F26 | Machregs.F27 => F27
+  | Machregs.F28 => F28 | Machregs.F29 => F29 | Machregs.F30 => F30 | Machregs.F31 => F31
   end.
 
 (** Undefine all registers except SP and callee-save registers *)
@@ -1063,7 +1042,7 @@ Definition undef_caller_save_regs (rs: regset) : regset :=
 
 (** Extract the values of the arguments of an external call.
     We exploit the calling conventions from module [Conventions], except that
-    we use machine registers instead of locations. *)
+    we use RISC-V registers instead of locations. *)
 
 Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
   | extcall_arg_reg: forall r,
@@ -1071,7 +1050,7 @@ Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
   | extcall_arg_stack: forall ofs ty bofs v,
       bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
       Mem.loadv (chunk_of_type ty) m
-                (Val.offset_ptr (rs (IR RSP)) (Ptrofs.repr bofs)) = Some v ->
+                (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v ->
       extcall_arg rs m (S Outgoing ofs ty) v.
 
 Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop :=
@@ -1090,7 +1069,7 @@ Definition extcall_arguments
 Definition loc_external_result (sg: signature) : rpair preg :=
   map_rpair preg_of (loc_result sg).
 
-(** Execution of the instruction at [rs#PC]. *)
+(** Execution of the instruction at [rs PC]. *)
 
 Inductive state: Type :=
   | State: regset -> mem -> state.
@@ -1100,7 +1079,7 @@ Inductive step: state -> trace -> state -> Prop :=
       forall b ofs f i rs m rs' m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal f) ->
-      find_instr (Ptrofs.unsigned ofs) f.(fn_code) = Some i ->
+      find_instr (Ptrofs.unsigned ofs) (fn_code f) = Some i ->
       exec_instr f i rs m = Next rs' m' ->
       step (State rs m) E0 (State rs' m')
   | exec_step_builtin:
@@ -1108,19 +1087,20 @@ Inductive step: state -> trace -> state -> Prop :=
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal f) ->
       find_instr (Ptrofs.unsigned ofs) f.(fn_code) = Some (Pbuiltin ef args res) ->
-      eval_builtin_args ge rs (rs RSP) m args vargs ->
+      eval_builtin_args ge rs (rs SP) m args vargs ->
       external_call ef ge vargs m t vres m' ->
-      rs' = nextinstr_nf
-             (set_res res vres
-               (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+      rs' = nextinstr
+              (set_res res vres
+                (undef_regs (map preg_of (destroyed_by_builtin ef))
+                   (rs #X1 <- Vundef #X31 <- Vundef))) ->
       step (State rs m) t (State rs' m')
   | exec_step_external:
       forall b ef args res rs m t rs' m',
       rs PC = Vptr b Ptrofs.zero ->
       Genv.find_funct_ptr ge b = Some (External ef) ->
-      extcall_arguments rs m (ef_sig ef) args ->
       external_call ef ge args m t res m' ->
-      rs' = (set_pair (loc_external_result (ef_sig ef)) res (undef_caller_save_regs rs)) #PC <- (rs RA) ->
+      extcall_arguments rs m (ef_sig ef) args ->
+      rs' = (set_pair (loc_external_result (ef_sig ef) ) res (undef_caller_save_regs rs))#PC <- (rs RA) ->
       step (State rs m) t (State rs' m').
 
 End RELSEM.
@@ -1129,19 +1109,19 @@ End RELSEM.
 
 Inductive initial_state (p: program): state -> Prop :=
   | initial_state_intro: forall m0,
-      Genv.init_mem p = Some m0 ->
       let ge := Genv.globalenv p in
       let rs0 :=
         (Pregmap.init Vundef)
         # PC <- (Genv.symbol_address ge p.(prog_main) Ptrofs.zero)
-        # RA <- Vnullptr
-        # RSP <- Vnullptr in
+        # SP <- Vnullptr
+        # RA <- Vnullptr in
+      Genv.init_mem p = Some m0 ->
       initial_state p (State rs0 m0).
 
 Inductive final_state: state -> int -> Prop :=
   | final_state_intro: forall rs m r,
-      rs#PC = Vnullptr ->
-      rs#RAX = Vint r ->
+      rs PC = Vnullptr ->
+      rs X10 = Vint r ->
       final_state (State rs m) r.
 
 Definition semantics (p: program) :=
@@ -1159,7 +1139,7 @@ Proof.
   { intros. inv H; inv H0; congruence. }
   assert (B: forall p v1 v2,
              extcall_arg_pair rs m p v1 -> extcall_arg_pair rs m p v2 -> v1 = v2).
-  { intros. inv H; inv H0.
+  { intros. inv H; inv H0. 
     eapply A; eauto.
     f_equal; eapply A; eauto. }
   assert (C: forall ll vl1, list_forall2 (extcall_arg_pair rs m) ll vl1 ->
@@ -1182,17 +1162,17 @@ Ltac Equalities :=
   intros; constructor; simpl; intros.
 - (* determ *)
   inv H; inv H0; Equalities.
-+ split. constructor. auto.
-+ discriminate.
-+ discriminate.
-+ assert (vargs0 = vargs) by (eapply eval_builtin_args_determ; eauto). subst vargs0.
+  split. constructor. auto.
+  discriminate.
+  discriminate.
+  assert (vargs0 = vargs) by (eapply eval_builtin_args_determ; eauto). subst vargs0.
   exploit external_call_determ. eexact H5. eexact H11. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
-+ assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
-  exploit external_call_determ. eexact H4. eexact H9. intros [A B].
+  assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
+  exploit external_call_determ. eexact H3. eexact H8. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
 - (* trace length *)
-  red; intros; inv H; simpl.
+  red; intros. inv H; simpl.
   lia.
   eapply external_call_trace_length; eauto.
   eapply external_call_trace_length; eauto.
@@ -1201,7 +1181,8 @@ Ltac Equalities :=
 - (* final no step *)
   assert (NOTNULL: forall b ofs, Vnullptr <> Vptr b ofs).
   { intros; unfold Vnullptr; destruct Archi.ptr64; congruence. }
-  inv H. red; intros; red; intros. inv H; rewrite H0 in *; eelim NOTNULL; eauto.
+  inv H. unfold Vzero in H0. red; intros; red; intros.
+  inv H; rewrite H0 in *; eelim NOTNULL; eauto.
 - (* final states *)
   inv H; inv H0. congruence.
 Qed.
@@ -1210,11 +1191,9 @@ Qed.
 
 Definition data_preg (r: preg) : bool :=
   match r with
-  | PC => false
-  | IR _ => true
-  | FR _ => true
-  | ST0 => true
-  | CR _ => false
-  | RA => false
+  | IR RA  => false
+  | IR X31 => false
+  | IR _   => true
+  | FR _   => true
+  | PC     => false
   end.
-
diff --git a/verilog/AsmToJSON.ml b/verilog/AsmToJSON.ml
index 59cc7d40..8a6a97a7 100644
--- a/verilog/AsmToJSON.ml
+++ b/verilog/AsmToJSON.ml
@@ -10,7 +10,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(* Simple functions to serialize ia32 Asm to JSON *)
+(* Simple functions to serialize RISC-V Asm to JSON *)
 
 (* Dummy function *)
 let destination: string option ref = ref None
diff --git a/verilog/AsmToJSON.mli b/verilog/AsmToJSON.mli
deleted file mode 100644
index 52c055c4..00000000
--- a/verilog/AsmToJSON.mli
+++ /dev/null
@@ -1,19 +0,0 @@
-(* *********************************************************************)
-(*                                                                     *)
-(*              The Compcert verified compiler                         *)
-(*                                                                     *)
-(*          Bernhard Schommer, AbsInt Angewandte Informatik GmbH       *)
-(*                                                                     *)
-(*  AbsInt Angewandte Informatik GmbH. All rights reserved. This file  *)
-(*  is distributed under the terms of the INRIA Non-Commercial         *)
-(*  License Agreement.                                                 *)
-(*                                                                     *)
-(* *********************************************************************)
-
-val pp_mnemonics: Format.formatter -> unit
-
-val print_if:  (Asm.coq_function AST.fundef, 'a) AST.program  -> string -> unit
-
-val destination: string option ref
-
-val sdump_folder : string ref
diff --git a/verilog/Asmexpand.ml b/verilog/Asmexpand.ml
index e2c556c7..50dc20be 100644
--- a/verilog/Asmexpand.ml
+++ b/verilog/Asmexpand.ml
@@ -4,87 +4,139 @@
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
 (*          Bernhard Schommer, AbsInt Angewandte Informatik GmbH       *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
 (* Expanding built-ins and some pseudo-instructions by rewriting
-   of the IA32 assembly code.  *)
+   of the RISC-V assembly code. *)
 
 open Asm
 open Asmexpandaux
 open AST
 open Camlcoq
-open Datatypes
+open! Integers
+open Locations
 
 exception Error of string
 
 (* Useful constants and helper functions *)
 
-let _0 = Integers.Int.zero
-let _1 = Integers.Int.one
-let _2 = coqint_of_camlint 2l
-let _4 = coqint_of_camlint 4l
-let _8 = coqint_of_camlint 8l
-
-let _0z = Z.zero
-let _1z = Z.one
-let _2z = Z.of_sint 2
-let _4z = Z.of_sint 4
-let _8z = Z.of_sint 8
-let _16z = Z.of_sint 16
-
-let stack_alignment () = 16
-
-(* Pseudo instructions for 32/64 bit compatibility *)
-
-let _Plea (r, addr) =
-  if Archi.ptr64 then Pleaq (r, addr) else Pleal (r, addr)
-
-(* SP adjustment to allocate or free a stack frame. *)
-
-let align n a =
-  if n >= 0 then (n + a - 1) land (-a) else n land (-a)
-
-let sp_adjustment_32 sz =
-  let sz = Z.to_int sz in
-  (* Preserve proper alignment of the stack *)
-  let sz = align sz (stack_alignment ()) in
-  (* The top 4 bytes have already been allocated by the "call" instruction. *)
-  sz - 4
-
-let sp_adjustment_elf64 sz =
-  let sz = Z.to_int sz in
-  if is_current_function_variadic() then begin
-    (* If variadic, add room for register save area, which must be 16-aligned *)
-    let ofs = align (sz - 8) 16 in
-    let sz = ofs + 176 (* save area *) + 8 (* return address *) in
-    (* Preserve proper alignment of the stack *)
-    let sz = align sz 16 in
-    (* The top 8 bytes have already been allocated by the "call" instruction. *)
-    (sz - 8, ofs)
+let _0  = Integers.Int.zero
+let _1  = Integers.Int.one
+let _2  = coqint_of_camlint 2l
+let _4  = coqint_of_camlint 4l
+let _8  = coqint_of_camlint 8l
+let _16  = coqint_of_camlint 16l
+let _m1 = coqint_of_camlint (-1l)
+
+let wordsize = if Archi.ptr64 then 8 else 4
+
+let align n a = (n + a - 1) land (-a)
+
+(* Emit instruction sequences that set or offset a register by a constant. *)
+
+let expand_loadimm32 dst n =
+  List.iter emit (Asmgen.loadimm32 dst n [])
+let expand_addptrofs dst src n =
+  List.iter emit (Asmgen.addptrofs dst src n [])
+let expand_storeind_ptr src base ofs =
+  List.iter emit (Asmgen.storeind_ptr src base ofs [])
+
+(* Fix-up code around function calls and function entry.
+   Some floating-point arguments residing in FP registers need to be
+   moved to integer registers or register pairs.
+   Symmetrically, some floating-point parameter passed in integer
+   registers or register pairs need to be moved to FP registers. *)
+
+let int_param_regs = [| X10; X11; X12; X13; X14; X15; X16; X17 |]
+
+let move_single_arg fr i =
+  emit (Pfmvxs(int_param_regs.(i), fr))
+
+let move_double_arg fr i =
+  if Archi.ptr64 then begin
+    emit (Pfmvxd(int_param_regs.(i), fr))
   end else begin
-    (* Preserve proper alignment of the stack *)
-    let sz = align sz 16 in
-    (* The top 8 bytes have already been allocated by the "call" instruction. *)
-    (sz - 8, -1)
+    emit (Paddiw(X2, X X2, Integers.Int.neg _16));
+    emit (Pfsd(fr, X2, Ofsimm _0));
+    emit (Plw(int_param_regs.(i), X2, Ofsimm _0));
+    if i < 7 then begin
+      emit (Plw(int_param_regs.(i + 1), X2, Ofsimm _4))
+    end else begin
+      emit (Plw(X31, X2, Ofsimm _4));
+      emit (Psw(X31, X2, Ofsimm _16))
+    end;
+    emit (Paddiw(X2, X X2, _16))
   end
 
-let sp_adjustment_win64 sz =
-  let sz = Z.to_int sz in
-  (* Preserve proper alignment of the stack *)
-  let sz = align sz 16 in
-  (* The top 8 bytes have already been allocated by the "call" instruction. *)
-  sz - 8
+let move_single_param fr i =
+  emit (Pfmvsx(fr, int_param_regs.(i)))
+
+let move_double_param fr i =
+  if Archi.ptr64 then begin
+    emit (Pfmvdx(fr, int_param_regs.(i)))
+  end else begin
+    emit (Paddiw(X2, X X2, Integers.Int.neg _16));
+    emit (Psw(int_param_regs.(i), X2, Ofsimm _0));
+    if i < 7 then begin
+      emit (Psw(int_param_regs.(i + 1), X2, Ofsimm _4))
+    end else begin
+      emit (Plw(X31, X2, Ofsimm _16));
+      emit (Psw(X31, X2, Ofsimm _4))
+    end;
+    emit (Pfld(fr, X2, Ofsimm _0));
+    emit (Paddiw(X2, X X2, _16))
+  end
+
+let float_extra_index = function
+  | Machregs.F0 -> Some (F0, 0)
+  | Machregs.F1 -> Some (F1, 1)
+  | Machregs.F2 -> Some (F2, 2)
+  | Machregs.F3 -> Some (F3, 3)
+  | Machregs.F4 -> Some (F4, 4)
+  | Machregs.F5 -> Some (F5, 5)
+  | Machregs.F6 -> Some (F6, 6)
+  | Machregs.F7 -> Some (F7, 7)
+  | _  -> None
+
+let fixup_gen single double sg =
+  let fixup ty loc =
+    match ty, loc with
+    | Tsingle, One (R r) ->
+        begin match float_extra_index r with
+        | Some(r, i) -> single r i
+        | None -> ()
+        end
+    | (Tfloat | Tany64), One (R r) ->
+        begin match float_extra_index r with
+        | Some(r, i) -> double r i
+        | None -> ()
+        end
+    | _, _ -> ()
+  in
+    List.iter2 fixup sg.sig_args (Conventions1.loc_arguments sg)
+
+let fixup_call sg =
+  fixup_gen move_single_arg move_double_arg sg
+
+let fixup_function_entry sg =
+  fixup_gen move_single_param move_double_param sg
 
 (* Built-ins.  They come in two flavors:
    - annotation statements: take their arguments in registers or stack
-   locations; generate no code;
+     locations; generate no code;
    - inlined by the compiler: take their arguments in arbitrary
-   registers; preserve all registers except ECX, EDX, XMM6 and XMM7. *)
+     registers.
+*)
 
 (* Handling of annotations *)
 
@@ -92,401 +144,508 @@ let expand_annot_val kind txt targ args res =
   emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none));
   match args, res with
   | [BA(IR src)], BR(IR dst) ->
-     if dst <> src then emit (Pmov_rr (dst,src))
+     if dst <> src then emit (Pmv (dst, src))
   | [BA(FR src)], BR(FR dst) ->
-     if dst <> src then emit (Pmovsd_ff (dst,src))
+     if dst <> src then emit (Pfmv (dst, src))
   | _, _ ->
-     raise (Error "ill-formed __builtin_annot_intval")
-
-(* Operations on addressing modes *)
-
-let offset_addressing (Addrmode(base, ofs, cst)) delta =
-  Addrmode(base, ofs,
-           match cst with
-           | Coq_inl n -> Coq_inl(Z.add n delta)
-           | Coq_inr(id, n) -> Coq_inr(id, Integers.Ptrofs.add n delta))
-
-let linear_addr reg ofs = Addrmode(Some reg, None, Coq_inl ofs)
-let global_addr id ofs = Addrmode(None, None, Coq_inr(id, ofs))
-
-(* Translate a builtin argument into an addressing mode *)
-
-let addressing_of_builtin_arg = function
-  | BA (IR r) -> linear_addr r Z.zero
-  | BA_addrstack ofs -> linear_addr RSP (Integers.Ptrofs.unsigned ofs)
-  | BA_addrglobal(id, ofs) -> global_addr id ofs
-  | BA_addptr(BA (IR r), BA_int n) -> linear_addr r (Integers.Int.signed n)
-  | BA_addptr(BA (IR r), BA_long n) -> linear_addr r (Integers.Int64.signed n)
-  | _ -> assert false
+     raise (Error "ill-formed __builtin_annot_val")
 
 (* Handling of memcpy *)
 
-(* Unaligned memory accesses are quite fast on IA32, so use large
-   memory accesses regardless of alignment. *)
+(* Unaligned accesses are slow on RISC-V, so don't use them *)
+
+let offset_in_range ofs =
+  let ofs = Z.to_int64 ofs in -2048L <= ofs && ofs < 2048L
+  
+let memcpy_small_arg sz arg tmp =
+  match arg with
+  | BA (IR r) ->
+      (r, _0)
+  | BA_addrstack ofs ->
+      if offset_in_range ofs
+      && offset_in_range (Ptrofs.add ofs (Ptrofs.repr (Z.of_uint sz)))
+      then (X2, ofs)
+      else begin expand_addptrofs tmp X2 ofs; (tmp, _0) end
+  | _ ->
+      assert false
 
 let expand_builtin_memcpy_small sz al src dst =
-  let rec copy src dst sz =
-    if sz >= 8 && Archi.ptr64 then begin
-	emit (Pmovq_rm (RCX, src));
-	emit (Pmovq_mr (dst, RCX));
-        copy (offset_addressing src _8z) (offset_addressing dst _8z) (sz - 8)
-    end else if sz >= 8 && !Clflags.option_ffpu then begin
-	emit (Pmovsq_rm (XMM7, src));
-	emit (Pmovsq_mr (dst, XMM7));
-        copy (offset_addressing src _8z) (offset_addressing dst _8z) (sz - 8)
-      end else if sz >= 4 then begin
-	emit (Pmovl_rm (RCX, src));
-	emit (Pmovl_mr (dst, RCX));
-        copy (offset_addressing src _4z) (offset_addressing dst _4z) (sz - 4)
-      end else if sz >= 2 then begin
-	emit (Pmovw_rm (RCX, src));
-	emit (Pmovw_mr (dst, RCX));
-        copy (offset_addressing src _2z) (offset_addressing dst _2z) (sz - 2)
-      end else if sz >= 1 then begin
-	emit (Pmovb_rm (RCX, src));
-	emit (Pmovb_mr (dst, RCX));
-        copy (offset_addressing src _1z) (offset_addressing dst _1z) (sz - 1)
-      end in
-  copy (addressing_of_builtin_arg src) (addressing_of_builtin_arg dst) sz
+  let (tsrc, tdst) =
+    if dst <> BA (IR X5) then (X5, X6) else (X6, X5) in
+  let (rsrc, osrc) = memcpy_small_arg sz src tsrc in
+  let (rdst, odst) = memcpy_small_arg sz dst tdst in
+  let rec copy osrc odst sz =
+    if sz >= 8 && al >= 8 then
+      begin
+        emit (Pfld (F0, rsrc, Ofsimm osrc));
+        emit (Pfsd (F0, rdst, Ofsimm odst));
+        copy (Ptrofs.add osrc _8) (Ptrofs.add odst _8) (sz - 8)
+      end
+    else if sz >= 4 && al >= 4 then
+      begin
+        emit (Plw (X31, rsrc, Ofsimm osrc));
+        emit (Psw (X31, rdst, Ofsimm odst));
+        copy (Ptrofs.add osrc _4) (Ptrofs.add odst _4) (sz - 4)
+      end
+    else if sz >= 2 && al >= 2 then
+      begin
+        emit (Plh (X31, rsrc, Ofsimm osrc));
+        emit (Psh (X31, rdst, Ofsimm odst));
+        copy (Ptrofs.add osrc _2) (Ptrofs.add odst _2) (sz - 2)
+      end
+    else if sz >= 1 then
+      begin
+        emit (Plb (X31, rsrc, Ofsimm osrc));
+        emit (Psb (X31, rdst, Ofsimm odst));
+        copy (Ptrofs.add osrc _1) (Ptrofs.add odst _1) (sz - 1)
+      end
+  in copy osrc odst sz
+
+let memcpy_big_arg sz arg tmp =
+  match arg with
+  | BA (IR r) -> if r <> tmp then emit (Pmv(tmp, r))
+  | BA_addrstack ofs ->
+      expand_addptrofs tmp X2 ofs
+  | _ ->
+      assert false
 
 let expand_builtin_memcpy_big sz al src dst =
-  if src <> BA (IR RSI) then emit (_Plea (RSI, addressing_of_builtin_arg src));
-  if dst <> BA (IR RDI) then emit (_Plea (RDI, addressing_of_builtin_arg dst));
-  (* TODO: movsq? *)
-  emit (Pmovl_ri (RCX,coqint_of_camlint (Int32.of_int (sz / 4))));
-  emit Prep_movsl;
-  if sz mod 4 >= 2 then emit Pmovsw;
-  if sz mod 2 >= 1 then emit Pmovsb
-
-let expand_builtin_memcpy sz al args =
-  let (dst, src) = match args with [d; s] -> (d, s) | _ -> assert false in
+  assert (sz >= al);
+  assert (sz mod al = 0);
+  let (s, d) =
+    if dst <> BA (IR X5) then (X5, X6) else (X6, X5) in
+  memcpy_big_arg sz src s;
+  memcpy_big_arg sz dst d;
+  (* Use X7 as loop count, X1 and F0 as ld/st temporaries. *)
+  let (load, store, chunksize) =
+    if al >= 8 then
+      (Pfld (F0, s, Ofsimm _0), Pfsd (F0, d, Ofsimm _0), 8)
+    else if al >= 4 then
+      (Plw (X31, s, Ofsimm _0), Psw (X31, d, Ofsimm _0), 4)
+    else if al = 2 then
+      (Plh (X31, s, Ofsimm _0), Psh (X31, d, Ofsimm _0), 2)
+    else
+      (Plb (X31, s, Ofsimm _0), Psb (X31, d, Ofsimm _0), 1) in
+  expand_loadimm32 X7 (Z.of_uint (sz / chunksize));
+  let delta = Z.of_uint chunksize in
+  let lbl = new_label () in
+  emit (Plabel lbl);
+  emit load;
+  expand_addptrofs s s delta;
+  emit (Paddiw(X7, X X7, _m1));
+  emit store;
+  expand_addptrofs d d delta;
+  emit (Pbnew (X X7, X0, lbl))
+
+let expand_builtin_memcpy  sz al args =
+  let (dst, src) =
+    match args with [d; s] -> (d, s) | _ -> assert false in
   if sz <= 32
   then expand_builtin_memcpy_small sz al src dst
   else expand_builtin_memcpy_big sz al src dst
 
 (* Handling of volatile reads and writes *)
 
-let expand_builtin_vload_common chunk addr res =
+let expand_builtin_vload_common chunk base ofs res =
   match chunk, res with
   | Mint8unsigned, BR(IR res) ->
-     emit (Pmovzb_rm (res,addr))
+     emit (Plbu (res, base, Ofsimm ofs))
   | Mint8signed, BR(IR res) ->
-     emit (Pmovsb_rm (res,addr))
+     emit (Plb  (res, base, Ofsimm ofs))
   | Mint16unsigned, BR(IR res) ->
-     emit (Pmovzw_rm (res,addr))
+     emit (Plhu (res, base, Ofsimm ofs))
   | Mint16signed, BR(IR res) ->
-     emit (Pmovsw_rm (res,addr))
+     emit (Plh  (res, base, Ofsimm ofs))
   | Mint32, BR(IR res) ->
-     emit (Pmovl_rm (res,addr))
+     emit (Plw  (res, base, Ofsimm ofs))
   | Mint64, BR(IR res) ->
-     emit (Pmovq_rm (res,addr))
+     emit (Pld  (res, base, Ofsimm ofs))
   | Mint64, BR_splitlong(BR(IR res1), BR(IR res2)) ->
-     let addr' = offset_addressing addr _4z in
-     if not (Asmgen.addressing_mentions addr res2) then begin
-	 emit (Pmovl_rm (res2,addr));
-	 emit (Pmovl_rm (res1,addr'))
+     let ofs' = Ptrofs.add ofs _4 in
+     if base <> res2 then begin
+         emit (Plw (res2, base, Ofsimm ofs));
+         emit (Plw (res1, base, Ofsimm ofs'))
        end else begin
-	 emit (Pmovl_rm (res1,addr'));
-	 emit (Pmovl_rm (res2,addr))
+         emit (Plw (res1, base, Ofsimm ofs'));
+         emit (Plw (res2, base, Ofsimm ofs))
        end
   | Mfloat32, BR(FR res) ->
-     emit (Pmovss_fm (res,addr))
+     emit (Pfls (res, base, Ofsimm ofs))
   | Mfloat64, BR(FR res) ->
-     emit (Pmovsd_fm (res,addr))
+     emit (Pfld (res, base, Ofsimm ofs))
   | _ ->
      assert false
 
 let expand_builtin_vload chunk args res =
   match args with
-  | [addr] ->
-     expand_builtin_vload_common chunk (addressing_of_builtin_arg addr) res
+  | [BA(IR addr)] ->
+      expand_builtin_vload_common chunk addr _0 res
+  | [BA_addrstack ofs] ->
+      if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
+        expand_builtin_vload_common chunk X2 ofs res
+      else begin
+        expand_addptrofs X31 X2 ofs; (* X31 <- sp + ofs *)
+        expand_builtin_vload_common chunk X31 _0 res
+      end
+  | [BA_addptr(BA(IR addr), (BA_int ofs | BA_long ofs))] ->
+      if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
+        expand_builtin_vload_common chunk addr ofs res
+      else begin
+        expand_addptrofs X31 addr ofs; (* X31 <- addr + ofs *)
+        expand_builtin_vload_common chunk X31 _0 res
+      end
   | _ ->
-     assert false
+      assert false
 
-let expand_builtin_vstore_common chunk addr src tmp =
+let expand_builtin_vstore_common chunk base ofs src =
   match chunk, src with
   | (Mint8signed | Mint8unsigned), BA(IR src) ->
-     if Archi.ptr64 || Asmgen.low_ireg src then
-       emit (Pmovb_mr (addr,src))
-     else begin
-       emit (Pmov_rr (tmp,src));
-       emit (Pmovb_mr (addr,tmp))
-     end
+     emit (Psb (src, base, Ofsimm ofs))
   | (Mint16signed | Mint16unsigned), BA(IR src) ->
-     emit (Pmovw_mr (addr,src))
+     emit (Psh (src, base, Ofsimm ofs))
   | Mint32, BA(IR src) ->
-     emit (Pmovl_mr (addr,src))
+     emit (Psw (src, base, Ofsimm ofs))
   | Mint64, BA(IR src) ->
-     emit (Pmovq_mr (addr,src))
+     emit (Psd (src, base, Ofsimm ofs))
   | Mint64, BA_splitlong(BA(IR src1), BA(IR src2)) ->
-     let addr' = offset_addressing addr _4z in
-     emit (Pmovl_mr (addr,src2));
-     emit (Pmovl_mr (addr',src1))
+     let ofs' = Ptrofs.add ofs _4 in
+     emit (Psw (src2, base, Ofsimm ofs));
+     emit (Psw (src1, base, Ofsimm ofs'))
   | Mfloat32, BA(FR src) ->
-     emit (Pmovss_mf (addr,src))
+     emit (Pfss (src, base, Ofsimm ofs))
   | Mfloat64, BA(FR src) ->
-     emit (Pmovsd_mf (addr,src))
+     emit (Pfsd (src, base, Ofsimm ofs))
   | _ ->
      assert false
 
 let expand_builtin_vstore chunk args =
   match args with
-  | [addr; src] ->
-     let addr = addressing_of_builtin_arg addr in
-     expand_builtin_vstore_common chunk addr src
-       (if Asmgen.addressing_mentions addr RAX then RCX else RAX)
-  | _ -> assert false
+  | [BA(IR addr); src] ->
+      expand_builtin_vstore_common chunk addr _0 src
+  | [BA_addrstack ofs; src] ->
+      if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
+        expand_builtin_vstore_common chunk X2 ofs src
+      else begin
+        expand_addptrofs X31 X2 ofs; (* X31 <- sp + ofs *)
+        expand_builtin_vstore_common chunk X31 _0 src
+      end
+  | [BA_addptr(BA(IR addr), (BA_int ofs | BA_long ofs)); src] ->
+      if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
+        expand_builtin_vstore_common chunk addr ofs src
+      else begin
+        expand_addptrofs X31 addr ofs; (* X31 <- addr + ofs *)
+        expand_builtin_vstore_common chunk X31 _0 src
+      end
+  | _ ->
+      assert false
 
 (* Handling of varargs *)
 
-let rec next_arg_locations ir fr ofs = function
-  | [] ->
-      (ir, fr, ofs)
-  | (Tint | Tlong | Tany32 | Tany64) :: l ->
-      if ir < 6
-      then next_arg_locations (ir + 1) fr ofs l
-      else next_arg_locations ir fr (ofs + 8) l
-  | (Tfloat | Tsingle) :: l ->
-      if fr < 8
-      then next_arg_locations ir (fr + 1) ofs l
-      else next_arg_locations ir fr (ofs + 8) l
-
-let current_function_stacksize = ref 0L
-
-let expand_builtin_va_start_32 r =
-  if not (is_current_function_variadic ()) then
-    invalid_arg "Fatal error: va_start used in non-vararg function";
-  let ofs =
-    Int32.(add (add !PrintAsmaux.current_function_stacksize 4l)
-               (mul 4l (Z.to_int32 (Conventions.size_arguments
-                                      (get_current_function_sig ()))))) in
-  emit (Pleal (RAX, linear_addr RSP (Z.of_uint32 ofs)));
-  emit (Pmovl_mr (linear_addr r _0z, RAX))
-
-let expand_builtin_va_start_elf64 r =
-  if not (is_current_function_variadic ()) then
-    invalid_arg "Fatal error: va_start used in non-vararg function";
-  let (ir, fr, ofs) =
-    next_arg_locations 0 0 0 (get_current_function_args ()) in
-  (* [r] points to the following struct:
-       struct {
-         unsigned int gp_offset;
-         unsigned int fp_offset;
-         void *overflow_arg_area;
-         void *reg_save_area;
-       }
-     gp_offset is initialized to ir * 8
-     fp_offset is initialized to  6 * 8 + fr * 16
-     overflow_arg_area is initialized to sp + current stacksize + ofs
-     reg_save_area is initialized to
-         sp + current stacksize - 16 - save area size (6 * 8 + 8 * 16) *)
-  let gp_offset = Int32.of_int (ir * 8)
-  and fp_offset = Int32.of_int (6 * 8 + fr * 16)
-  and overflow_arg_area = Int64.(add !current_function_stacksize (of_int ofs))
-  and reg_save_area = Int64.(sub !current_function_stacksize 192L) in
-  assert (r <> RAX);
-  emit (Pmovl_ri (RAX, coqint_of_camlint gp_offset));
-  emit (Pmovl_mr (linear_addr r _0z, RAX));
-  emit (Pmovl_ri (RAX, coqint_of_camlint fp_offset));
-  emit (Pmovl_mr (linear_addr r _4z, RAX));
-  emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 overflow_arg_area)));
-  emit (Pmovq_mr (linear_addr r _8z, RAX));
-  emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 reg_save_area)));
-  emit (Pmovq_mr (linear_addr r _16z, RAX))
-
-let expand_builtin_va_start_win64 r =
-  if not (is_current_function_variadic ()) then
-    invalid_arg "Fatal error: va_start used in non-vararg function";
-  let num_args =
-    List.length (get_current_function_args()) in
-  let ofs =
-    Int64.(add !current_function_stacksize
-               (mul 8L (of_int num_args))) in
-  emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 ofs)));
-  emit (Pmovq_mr (linear_addr r _0z, RAX))
-
-(* FMA operations *)
-
-(*   vfmadd<i><j><k> r1, r2, r3   performs r1 := ri * rj + rk
-   hence
-     vfmadd132 r1, r2, r3    performs  r1 := r1 * r3 + r2
-     vfmadd213 r1, r2, r3    performs  r1 := r2 * r1 + r3
-     vfmadd231 r1, r2, r3    performs  r1 := r2 * r3 + r1
+(* Number of integer registers, FP registers, and stack words
+   used to pass the (fixed) arguments to a function. *)
+
+let arg_int_size ri rf ofs k =
+  if ri < 8
+  then k (ri + 1) rf ofs
+  else k ri rf (ofs + 1)
+
+let arg_single_size ri rf ofs k =
+  if rf < 8
+  then k ri (rf + 1) ofs
+  else arg_int_size ri rf ofs k
+
+let arg_long_size ri rf ofs k =
+  if Archi.ptr64 then
+    if ri < 8
+    then k (ri + 1) rf ofs
+    else k ri rf (ofs + 1)
+  else
+    if ri < 7 then k (ri + 2) rf ofs
+    else if ri = 7 then k (ri + 1) rf (ofs + 1)
+    else k ri rf (align ofs 2 + 2)
+
+let arg_double_size ri rf ofs k =
+  if rf < 8
+  then k ri (rf + 1) ofs
+  else arg_long_size ri rf ofs k
+
+let rec args_size l ri rf ofs =
+  match l with
+  | [] -> (ri, rf, ofs)
+  | (Tint | Tany32) :: l ->
+      arg_int_size ri rf ofs (args_size l)
+  | Tsingle :: l ->
+      arg_single_size ri rf ofs (args_size l)
+  | Tlong :: l ->
+      arg_long_size ri rf ofs (args_size l)
+  | (Tfloat | Tany64) :: l ->
+      arg_double_size ri rf ofs (args_size l)
+
+(* Size in words of the arguments to a function.  This includes both
+   arguments passed in integer registers and arguments passed on stack,
+   but not arguments passed in FP registers. *)
+
+let arguments_size sg =
+  let (ri, _, ofs) = args_size sg.sig_args 0 0 0 in
+  ri + ofs
+
+let save_arguments first_reg base_ofs =
+  for i = first_reg to 7 do
+    expand_storeind_ptr
+      int_param_regs.(i)
+      X2
+      (Ptrofs.repr (Z.add base_ofs (Z.of_uint ((i - first_reg) * wordsize))))
+  done
+
+let vararg_start_ofs : Z.t option ref = ref None
+
+let expand_builtin_va_start r =
+  match !vararg_start_ofs with
+  | None ->
+      invalid_arg "Fatal error: va_start used in non-vararg function"
+  | Some ofs ->
+      expand_addptrofs X31 X2 (Ptrofs.repr ofs);
+      expand_storeind_ptr X31 r Ptrofs.zero
+
+(* Auxiliary for 64-bit integer arithmetic built-ins.  They expand to
+   two instructions, one computing the low 32 bits of the result,
+   followed by another computing the high 32 bits.  In cases where
+   the first instruction would overwrite arguments to the second
+   instruction, we must go through X31 to hold the low 32 bits of the result.
 *)
 
-let expand_fma args res i132 i213 i231 =
-  match args, res with
-  | [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
-      if res = a1 then emit (i132 a1 a3 a2)       (* a1 * a2 + a3 *)
-      else if res = a2 then emit (i213 a2 a1 a3)  (* a1 * a2 + a3 *)
-      else if res = a3 then emit (i231 a3 a1 a2)  (* a1 * a2 + a3 *)
-      else begin
-        emit (Pmovsd_ff(res, a3));
-        emit (i231 res a1 a2)                     (* a1 * a2 + res *)
-      end
-  | _ ->
-     invalid_arg ("ill-formed fma builtin")
+let expand_int64_arith conflict rl fn =
+  if conflict then (fn X31; emit (Pmv(rl, X31))) else fn rl
+
+(* Byte swaps.  There are no specific instructions, so we use standard,
+   not-very-efficient formulas. *)
+
+let expand_bswap16 d s =
+  (* d = (s & 0xFF) << 8 | (s >> 8) & 0xFF *)
+  emit (Pandiw(X31, X s, coqint_of_camlint 0xFFl));
+  emit (Pslliw(X31, X X31, _8));
+  emit (Psrliw(d, X s, _8));
+  emit (Pandiw(d, X d, coqint_of_camlint 0xFFl));
+  emit (Porw(d, X X31, X d))
+
+let expand_bswap32 d s =
+  (* d = (s << 24)
+       | (((s >> 8) & 0xFF) << 16)
+       | (((s >> 16) & 0xFF) << 8)
+       | (s >> 24)  *)
+  emit (Pslliw(X1, X s, coqint_of_camlint 24l));
+  emit (Psrliw(X31, X s, _8));
+  emit (Pandiw(X31, X X31, coqint_of_camlint 0xFFl));
+  emit (Pslliw(X31, X X31, _16));
+  emit (Porw(X1, X X1, X X31));
+  emit (Psrliw(X31, X s, _16));
+  emit (Pandiw(X31, X X31, coqint_of_camlint 0xFFl));
+  emit (Pslliw(X31, X X31, _8));
+  emit (Porw(X1, X X1, X X31));
+  emit (Psrliw(X31, X s, coqint_of_camlint 24l));
+  emit (Porw(d, X X1, X X31))
+
+let expand_bswap64 d s =
+  (* d = s << 56
+         | (((s >> 8) & 0xFF) << 48)
+         | (((s >> 16) & 0xFF) << 40)
+         | (((s >> 24) & 0xFF) << 32)
+         | (((s >> 32) & 0xFF) << 24)
+         | (((s >> 40) & 0xFF) << 16)
+         | (((s >> 48) & 0xFF) << 8)
+         | s >> 56 *)
+  emit (Psllil(X1, X s, coqint_of_camlint 56l));
+  List.iter
+    (fun (n1, n2) ->
+      emit (Psrlil(X31, X s, coqint_of_camlint n1));
+      emit (Pandil(X31, X X31, coqint_of_camlint 0xFFl));
+      emit (Psllil(X31, X X31, coqint_of_camlint n2));
+      emit (Porl(X1, X X1, X X31)))
+    [(8l,48l); (16l,40l); (24l,32l); (32l,24l); (40l,16l); (48l,8l)];
+  emit (Psrlil(X31, X s, coqint_of_camlint 56l));
+  emit (Porl(d, X X1, X X31))
+
+(* Count leading zeros.  Algorithm 5-7 from Hacker's Delight,
+   re-rolled as a loop to produce more compact code. *)
+
+let expand_clz ~sixtyfour ~splitlong =
+  (* Input:  X in X5 or (X5, X6) if splitlong
+     Result: N in X7
+     Temporaries: S in X8, Y in X9 *)
+  let lbl1 = new_label() in
+  let lbl2 = new_label() in
+  (* N := bitsize of X's type (32 or 64) *)
+  expand_loadimm32 X7 (coqint_of_camlint
+                         (if sixtyfour || splitlong then 64l else 32l));
+  (* S := initial shift amount (16 or 32) *)                         
+  expand_loadimm32 X8 (coqint_of_camlint (if sixtyfour then 32l else 16l));
+  if splitlong then begin
+    (* if (Xhigh == 0) goto lbl1 *)
+    emit (Pbeqw(X X6, X0, lbl1));
+    (* N := 32 *)
+    expand_loadimm32 X7 (coqint_of_camlint 32l);
+    (* X := Xhigh *)
+    emit (Pmv(X5, X6))
+  end;
+  (* lbl1: *)
+  emit (Plabel lbl1);
+  (* Y := X >> S *)
+  emit (if sixtyfour then Psrll(X9, X X5, X X8) else Psrlw(X9, X X5, X X8));
+  (* if (Y == 0) goto lbl2 *)
+  emit (if sixtyfour then Pbeql(X X9, X0, lbl2) else Pbeqw(X X9, X0, lbl2));
+  (* N := N - S *)
+  emit (Psubw(X7, X X7, X X8));
+  (* X := Y *)
+  emit (Pmv(X5, X9));
+  (* lbl2: *)
+  emit (Plabel lbl2);
+  (* S := S / 2 *)
+  emit (Psrliw(X8, X X8, _1));
+  (* if (S != 0) goto lbl1; *)
+  emit (Pbnew(X X8, X0, lbl1));
+  (* N := N - X *)
+  emit (Psubw(X7, X X7, X X5))
+
+(* Count trailing zeros.  Algorithm 5-14 from Hacker's Delight,
+   re-rolled as a loop to produce more compact code. *)
+
+let expand_ctz ~sixtyfour ~splitlong =
+  (* Input:  X in X6 or (X5, X6) if splitlong
+     Result: N in X7
+     Temporaries: S in X8, Y in X9 *)
+  let lbl1 = new_label() in
+  let lbl2 = new_label() in
+  (* N := bitsize of X's type (32 or 64) *)
+  expand_loadimm32 X7 (coqint_of_camlint
+                         (if sixtyfour || splitlong then 64l else 32l));
+  (* S := initial shift amount (16 or 32) *)                         
+  expand_loadimm32 X8 (coqint_of_camlint (if sixtyfour then 32l else 16l));
+  if splitlong then begin
+    (* if (Xlow == 0) goto lbl1 *)
+    emit (Pbeqw(X X5, X0, lbl1));
+    (* N := 32 *)
+    expand_loadimm32 X7 (coqint_of_camlint 32l);
+    (* X := Xlow *)
+    emit (Pmv(X6, X5))
+  end;
+  (* lbl1: *)
+  emit (Plabel lbl1);
+  (* Y := X >> S *)
+  emit (if sixtyfour then Pslll(X9, X X6, X X8) else Psllw(X9, X X6, X X8));
+  (* if (Y == 0) goto lbl2 *)
+  emit (if sixtyfour then Pbeql(X X9, X0, lbl2) else Pbeqw(X X9, X0, lbl2));
+  (* N := N - S *)
+  emit (Psubw(X7, X X7, X X8));
+  (* X := Y *)
+  emit (Pmv(X6, X9));
+  (* lbl2: *)
+  emit (Plabel lbl2);
+  (* S := S / 2 *)
+  emit (Psrliw(X8, X X8, _1));
+  (* if (S != 0) goto lbl1; *)
+  emit (Pbnew(X X8, X0, lbl1));
+  (* N := N - most significant bit of X *)
+  emit (if sixtyfour then Psrlil(X6, X X6, coqint_of_camlint 63l)
+                     else Psrliw(X6, X X6, coqint_of_camlint 31l));
+  emit (Psubw(X7, X X7, X X6))
 
 (* Handling of compiler-inlined builtins *)
 
 let expand_builtin_inline name args res =
   match name, args, res with
-  (* Integer arithmetic *)
+  (* Synchronization *)
+  | "__builtin_membar", [], _ ->
+     ()
+  | "__builtin_fence", [], _ ->
+     emit Pfence
+  (* Vararg stuff *)
+  | "__builtin_va_start", [BA(IR a)], _ ->
+     expand_builtin_va_start a
+  (* Byte swaps *)
+  | "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
+     expand_bswap16 res a1
   | ("__builtin_bswap"| "__builtin_bswap32"), [BA(IR a1)], BR(IR res) ->
-     if a1 <> res then
-       emit (Pmov_rr (res,a1));
-     emit (Pbswap32 res)
+     expand_bswap32 res a1
   | "__builtin_bswap64", [BA(IR a1)], BR(IR res) ->
-     if a1 <> res then
-       emit (Pmov_rr (res,a1));
-     emit (Pbswap64 res)
+     expand_bswap64 res a1
   | "__builtin_bswap64", [BA_splitlong(BA(IR ah), BA(IR al))],
                          BR_splitlong(BR(IR rh), BR(IR rl)) ->
-     assert (ah = RAX && al = RDX && rh = RDX && rl = RAX);
-     emit (Pbswap32 RAX);
-     emit (Pbswap32 RDX)
-  | "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
-     if a1 <> res then
-       emit (Pmov_rr (res,a1));
-     emit (Pbswap16 res)
-  | "__builtin_clz", [BA(IR a1)], BR(IR res) ->
-     emit (Pbsrl (res,a1));
-     emit (Pxorl_ri(res,coqint_of_camlint 31l))
-  | "__builtin_clzl", [BA(IR a1)], BR(IR res) ->
-     if not(Archi.ptr64) then begin
-       emit (Pbsrl (res,a1));
-       emit (Pxorl_ri(res,coqint_of_camlint 31l))
-     end else begin
-       emit (Pbsrq (res,a1));
-       emit (Pxorl_ri(res,coqint_of_camlint 63l))
-     end
-  | "__builtin_clzll", [BA(IR a1)], BR(IR res) ->
-     emit (Pbsrq (res,a1));
-     emit (Pxorl_ri(res,coqint_of_camlint 63l))
-  | "__builtin_clzll", [BA_splitlong(BA (IR ah), BA (IR al))], BR(IR res) ->
-     let lbl1 = new_label() in
-     let lbl2 = new_label() in
-     emit (Ptestl_rr(ah, ah));
-     emit (Pjcc(Cond_e, lbl1));
-     emit (Pbsrl(res, ah));
-     emit (Pxorl_ri(res, coqint_of_camlint 31l));
-     emit (Pjmp_l lbl2);
-     emit (Plabel lbl1);
-     emit (Pbsrl(res, al));
-     emit (Pxorl_ri(res, coqint_of_camlint 63l));
-     emit (Plabel lbl2)
-  | "__builtin_ctz", [BA(IR a1)], BR(IR res) ->
-     emit (Pbsfl (res,a1))
-  | "__builtin_ctzl", [BA(IR a1)], BR(IR res) ->
-     if not(Archi.ptr64) then
-       emit (Pbsfl (res,a1))
-     else
-       emit (Pbsfq (res,a1))
-  | "__builtin_ctzll", [BA(IR a1)], BR(IR res) ->
-     emit (Pbsfq (res,a1))
-  | "__builtin_ctzll", [BA_splitlong(BA (IR ah), BA (IR al))], BR(IR res) ->
-     let lbl1 = new_label() in
-     let lbl2 = new_label() in
-     emit (Ptestl_rr(al, al));
-     emit (Pjcc(Cond_e, lbl1));
-     emit (Pbsfl(res, al));
-     emit (Pjmp_l lbl2);
-     emit (Plabel lbl1);
-     emit (Pbsfl(res, ah));
-     emit (Paddl_ri(res, coqint_of_camlint 32l));
-     emit (Plabel lbl2)
+     assert (ah = X6 && al = X5 && rh = X5 && rl = X6);
+     expand_bswap32 X5 X5;
+     expand_bswap32 X6 X6
+  (* Count zeros *)
+  | "__builtin_clz", [BA(IR a)], BR(IR res) ->
+     assert (a = X5 && res = X7);
+     expand_clz ~sixtyfour:false ~splitlong:false
+  | "__builtin_clzl", [BA(IR a)], BR(IR res) ->
+     assert (a = X5 && res = X7);
+     expand_clz ~sixtyfour:Archi.ptr64 ~splitlong:false
+  | "__builtin_clzll", [BA(IR a)], BR(IR res) ->
+     assert (a = X5 && res = X7);
+     expand_clz ~sixtyfour:true ~splitlong:false
+  | "__builtin_clzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) ->
+     assert (al = X5 && ah = X6 && res = X7);
+     expand_clz ~sixtyfour:false ~splitlong:true
+  | "__builtin_ctz", [BA(IR a)], BR(IR res) ->
+     assert (a = X6 && res = X7);
+     expand_ctz ~sixtyfour:false ~splitlong:false
+  | "__builtin_ctzl", [BA(IR a)], BR(IR res) ->
+     assert (a = X6 && res = X7);
+     expand_ctz ~sixtyfour:Archi.ptr64 ~splitlong:false
+  | "__builtin_ctzll", [BA(IR a)], BR(IR res) ->
+     assert (a = X6 && res = X7);
+     expand_ctz ~sixtyfour:true ~splitlong:false
+  | "__builtin_ctzll", [BA_splitlong(BA(IR ah), BA(IR al))], BR(IR res) ->
+     assert (al = X5 && ah = X6 && res = X7);
+     expand_ctz ~sixtyfour:false ~splitlong:true
   (* Float arithmetic *)
   | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) ->
-     emit (Psqrtsd (res,a1))
-  | "__builtin_fmax", [BA(FR a1); BA(FR a2)], BR(FR res) ->
-     if res = a1 then
-       emit (Pmaxsd (res,a2))
-     else if res = a2 then
-       emit (Pmaxsd (res,a1))
-     else begin
-	 emit (Pmovsd_ff (res,a1));
-	 emit (Pmaxsd (res,a2))
-       end
+     emit (Pfsqrtd(res, a1))
+  | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+      emit (Pfmaddd(res, a1, a2, a3))
+  | "__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+      emit (Pfmsubd(res, a1, a2, a3))
+  | "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+      emit (Pfnmaddd(res, a1, a2, a3))
+  | "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+      emit (Pfnmsubd(res, a1, a2, a3))
   | "__builtin_fmin", [BA(FR a1); BA(FR a2)], BR(FR res) ->
-     if res = a1 then
-       emit (Pminsd (res,a2))
-     else if res = a2 then
-       emit (Pminsd (res,a1))
-     else begin
-	 emit (Pmovsd_ff (res,a1));
-	 emit (Pminsd (res,a2))
-       end
-  | "__builtin_fmadd",  _, _ ->
-      expand_fma args res
-        (fun r1 r2 r3 -> Pfmadd132(r1, r2, r3))
-        (fun r1 r2 r3 -> Pfmadd213(r1, r2, r3))
-        (fun r1 r2 r3 -> Pfmadd231(r1, r2, r3))
-  | "__builtin_fmsub",  _, _ ->
-      expand_fma args res
-        (fun r1 r2 r3 -> Pfmsub132(r1, r2, r3))
-        (fun r1 r2 r3 -> Pfmsub213(r1, r2, r3))
-        (fun r1 r2 r3 -> Pfmsub231(r1, r2, r3))
-  | "__builtin_fnmadd",  _, _ ->
-      expand_fma args res
-        (fun r1 r2 r3 -> Pfnmadd132(r1, r2, r3))
-        (fun r1 r2 r3 -> Pfnmadd213(r1, r2, r3))
-        (fun r1 r2 r3 -> Pfnmadd231(r1, r2, r3))
-  | "__builtin_fnmsub",  _, _ ->
-      expand_fma args res
-        (fun r1 r2 r3 -> Pfnmsub132(r1, r2, r3))
-        (fun r1 r2 r3 -> Pfnmsub213(r1, r2, r3))
-        (fun r1 r2 r3 -> Pfnmsub231(r1, r2, r3))
-  (* 64-bit integer arithmetic *)
+      emit (Pfmind(res, a1, a2))
+  | "__builtin_fmax", [BA(FR a1); BA(FR a2)], BR(FR res) ->
+      emit (Pfmaxd(res, a1, a2))
+  (* 64-bit integer arithmetic for a 32-bit platform *)
   | "__builtin_negl", [BA_splitlong(BA(IR ah), BA(IR al))],
                       BR_splitlong(BR(IR rh), BR(IR rl)) ->
-     assert (ah = RDX && al = RAX && rh = RDX && rl = RAX);
-     emit (Pnegl RAX);
-     emit (Padcl_ri (RDX,_0));
-     emit (Pnegl RDX)
+     expand_int64_arith (rl = ah) rl
+			(fun rl ->
+                         emit (Psltuw (X1, X0, X al));
+			 emit (Psubw (rl, X0, X al));
+			 emit (Psubw (rh, X0, X ah));
+			 emit (Psubw (rh, X rh, X X1)))
   | "__builtin_addl", [BA_splitlong(BA(IR ah), BA(IR al));
                        BA_splitlong(BA(IR bh), BA(IR bl))],
-                       BR_splitlong(BR(IR rh), BR(IR rl)) ->
-     assert (ah = RDX && al = RAX && bh = RCX && bl = RBX && rh = RDX && rl = RAX);
-     emit (Paddl_rr (RAX,RBX));
-     emit (Padcl_rr (RDX,RCX))
+                      BR_splitlong(BR(IR rh), BR(IR rl)) ->
+     expand_int64_arith (rl = bl || rl = ah || rl = bh) rl
+			(fun rl ->
+			 emit (Paddw (rl, X al, X bl));
+                         emit (Psltuw (X1, X rl, X bl));
+			 emit (Paddw (rh, X ah, X bh));
+			 emit (Paddw (rh, X rh, X X1)))
   | "__builtin_subl", [BA_splitlong(BA(IR ah), BA(IR al));
                        BA_splitlong(BA(IR bh), BA(IR bl))],
-                       BR_splitlong(BR(IR rh), BR(IR rl)) ->
-     assert (ah = RDX && al = RAX && bh = RCX && bl = RBX && rh = RDX && rl = RAX);
-     emit (Psubl_rr (RAX,RBX));
-     emit (Psbbl_rr (RDX,RCX))
+                      BR_splitlong(BR(IR rh), BR(IR rl)) ->
+     expand_int64_arith (rl = ah || rl = bh) rl
+			(fun rl ->
+                         emit (Psltuw (X1, X al, X bl));
+			 emit (Psubw (rl, X al, X bl));
+			 emit (Psubw (rh, X ah, X bh));
+			 emit (Psubw (rh, X rh, X X1)))
   | "__builtin_mull", [BA(IR a); BA(IR b)],
                       BR_splitlong(BR(IR rh), BR(IR rl)) ->
-     assert (a = RAX && b = RDX && rh = RDX && rl = RAX);
-     emit (Pmull_r RDX)
-  (* Memory accesses *)
-  | "__builtin_read16_reversed", [BA(IR a1)], BR(IR res) ->
-     emit (Pmovzw_rm (res, linear_addr a1 _0));
-     emit (Pbswap16 res)
-  | "__builtin_read32_reversed", [BA(IR a1)], BR(IR res) ->
-     emit (Pmovl_rm (res, linear_addr a1 _0));
-     emit (Pbswap32 res)
-  | "__builtin_write16_reversed", [BA(IR a1); BA(IR a2)], _ ->
-     let tmp = if a1 = RCX then RDX else RCX in
-     if a2 <> tmp then
-       emit (Pmov_rr (tmp,a2));
-     emit (Pbswap16 tmp);
-     emit (Pmovw_mr (linear_addr a1 _0z, tmp))
-  | "__builtin_write32_reversed", [BA(IR a1); BA(IR a2)], _ ->
-     let tmp = if a1 = RCX then RDX else RCX in
-     if a2 <> tmp then
-       emit (Pmov_rr (tmp,a2));
-     emit (Pbswap32 tmp);
-     emit (Pmovl_mr (linear_addr a1 _0z, tmp))
-  (* Vararg stuff *)
-  | "__builtin_va_start", [BA(IR a)], _ ->
-     assert (a = RDX);
-     if Archi.win64 then expand_builtin_va_start_win64 a
-     else if Archi.ptr64 then expand_builtin_va_start_elf64 a
-     else expand_builtin_va_start_32 a
-  (* Synchronization *)
-  | "__builtin_membar", [], _ ->
-     ()
+     expand_int64_arith (rl = a || rl = b) rl
+                        (fun rl ->
+                          emit (Pmulw (rl, X a, X b));
+                          emit (Pmulhuw (rh, X a, X b)))
   (* No operation *)
   | "__builtin_nop", [], _ ->
      emit Pnop
@@ -497,204 +656,173 @@ let expand_builtin_inline name args res =
   | _ ->
      raise (Error ("unrecognized builtin " ^ name))
 
-(* Calls to variadic functions for x86-64 ELF: register AL must contain
-   the number of XMM registers used for parameter passing.  To be on
-   the safe side, do the same if the called function is
-   unprototyped. *)
-
-let fixup_funcall_elf64 sg =
-  if sg.sig_cc.cc_vararg <> None || sg.sig_cc.cc_unproto then begin
-    let (ir, fr, ofs) = next_arg_locations 0 0 0 sg.sig_args in
-    emit (Pmovl_ri (RAX, coqint_of_camlint (Int32.of_int fr)))
-  end
-
-(* Calls to variadic functions for x86-64 Windows:
-   FP arguments passed in FP registers must also be passed in integer
-   registers.
-*)
-
-let rec copy_fregs_to_iregs args fr ir =
-  match (ir, fr, args) with
-  | (i1 :: ir, f1 :: fr, (Tfloat | Tsingle) :: args) ->
-      emit (Pmovq_rf (i1, f1));
-      copy_fregs_to_iregs args fr ir
-  | (i1 :: ir, f1 :: fr, _ :: args) ->
-      copy_fregs_to_iregs args fr ir
-  | _ ->
-      ()
-
-let fixup_funcall_win64 sg =
-  if sg.sig_cc.cc_vararg <> None then
-    copy_fregs_to_iregs sg.sig_args [XMM0; XMM1; XMM2; XMM3] [RCX; RDX; R8; R9]
-
-let fixup_funcall sg =
-  if Archi.ptr64
-  then if Archi.win64
-       then fixup_funcall_win64 sg
-       else fixup_funcall_elf64 sg
-  else ()
-
 (* Expansion of instructions *)
-
+                                 
 let expand_instruction instr =
   match instr with
-  | Pallocframe (sz, ofs_ra, ofs_link) ->
-     if Archi.win64 then begin
-       let sz = sp_adjustment_win64 sz in
-       if is_current_function_variadic() then
-         (* Save parameters passed in registers in reserved stack area *)
-         emit (Pcall_s (intern_string "__compcert_va_saveregs",
-                        {sig_args = []; sig_res = Tvoid; sig_cc = cc_default}));
-       (* Allocate frame *)
-       let sz' = Z.of_uint sz in
-       emit (Psubl_ri (RSP, sz'));
-       emit (Pcfi_adjust sz');
-       (* Stack chaining *)
-       let addr1 = linear_addr RSP (Z.of_uint (sz + 8)) in
-       let addr2 = linear_addr RSP ofs_link in
-       emit (Pleaq (RAX,addr1));
-       emit (Pmovq_mr (addr2, RAX));
-       current_function_stacksize := Int64.of_int (sz + 8)
-     end else if Archi.ptr64 then begin
-       let (sz, save_regs) = sp_adjustment_elf64 sz in
-       (* Allocate frame *)
-       let sz' = Z.of_uint sz in
-       emit (Psubq_ri (RSP, sz'));
-       emit (Pcfi_adjust sz');
-       if save_regs >= 0 then begin
-         (* Save the registers *)
-         emit (Pleaq (R10, linear_addr RSP (Z.of_uint save_regs)));
-         emit (Pcall_s (intern_string "__compcert_va_saveregs",
-                        {sig_args = []; sig_res = Tvoid; sig_cc = cc_default}))
-       end;
-       (* Stack chaining *)
-       let fullsz = sz + 8 in
-       let addr1 = linear_addr RSP (Z.of_uint fullsz) in
-       let addr2 = linear_addr RSP ofs_link in
-       emit (Pleaq (RAX, addr1));
-       emit (Pmovq_mr (addr2, RAX));
-       current_function_stacksize := Int64.of_int fullsz
-     end else begin
-       let sz = sp_adjustment_32 sz in
-       (* Allocate frame *)
-       let sz' = Z.of_uint sz in
-       emit (Psubl_ri (RSP, sz'));
-       emit (Pcfi_adjust sz');
-       (* Stack chaining *)
-       let addr1 = linear_addr RSP (Z.of_uint (sz + 4)) in
-       let addr2 = linear_addr RSP ofs_link in
-       emit (Pleal (RAX,addr1));
-       emit (Pmovl_mr (addr2,RAX));
-       PrintAsmaux.current_function_stacksize := Int32.of_int sz
-     end
-  | Pfreeframe(sz, ofs_ra, ofs_link) ->
-     if Archi.win64 then begin
-       let sz = sp_adjustment_win64 sz in
-       emit (Paddq_ri (RSP, Z.of_uint sz))
-     end else if Archi.ptr64 then begin
-       let (sz, _) = sp_adjustment_elf64 sz in
-       emit (Paddq_ri (RSP, Z.of_uint sz))
-     end else begin
-       let sz = sp_adjustment_32 sz in
-       emit (Paddl_ri (RSP, Z.of_uint sz))
+  | Pselectl(rd, rb, rt, rf) ->
+     if not Archi.ptr64
+     then failwith "Pselectl not available on RV32, only on RV64"
+     else
+       if ireg0_eq rt rf then
+         begin
+           if ireg0_eq (X rd) rt then
+             begin
+             end
+           else
+             begin
+               emit (Paddl(rd, X0, rt))
+             end
+         end
+       else
+         if (ireg0_eq (X rd) rt) then
+           begin
+             emit (Psubl(X31, X0, rb));
+             emit (Pandl(X31, X X31, rt));
+             emit (Paddil(rd, rb, Int64.mone));
+             emit (Pandl(rd, X rd, rf));
+             emit (Porl(rd, X rd, X X31))
+           end
+         else
+           if (ireg0_eq (X rd) rf) then
+             begin
+               emit (Paddil(X31, rb, Int64.mone));
+               emit (Pandl(X31, X X31, rf));
+               emit (Psubl(rd, X0, rb));
+               emit (Pandl(rd, X rd, rt));
+               emit (Porl(rd, X rd, X X31))
+             end  
+           else
+             begin
+               emit (Psubl(X31, X0, rb));
+               emit (Paddil(rd, rb, Int64.mone));
+               emit (Pandl(X31, X X31, rt));
+               emit (Pandl(rd, X rd, rf));
+               emit (Porl(rd, X rd, X X31))
+             end
+  | Pallocframe (sz, ofs) ->
+      let sg = get_current_function_sig() in
+      emit (Pmv (X30, X2));
+      if (sg.sig_cc.cc_vararg <> None) then begin
+        let n = arguments_size sg in
+        let extra_sz = if n >= 8 then 0 else align ((8 - n) * wordsize) 16 in
+        let full_sz = Z.add sz (Z.of_uint extra_sz) in
+        expand_addptrofs X2 X2 (Ptrofs.repr (Z.neg full_sz));
+        expand_storeind_ptr X30 X2 ofs;
+        let va_ofs =
+          Z.add full_sz (Z.of_sint ((n - 8) * wordsize)) in
+        vararg_start_ofs := Some va_ofs;
+        save_arguments n va_ofs
+      end else begin
+        expand_addptrofs X2 X2 (Ptrofs.repr (Z.neg sz));
+        expand_storeind_ptr X30 X2 ofs;
+        vararg_start_ofs := None
+      end
+  | Pfreeframe (sz, ofs) ->
+     let sg = get_current_function_sig() in
+     let extra_sz =
+      if (sg.sig_cc.cc_vararg <> None) then begin
+        let n = arguments_size sg in
+        if n >= 8 then 0 else align ((8 - n) * wordsize) 16
+      end else 0 in
+     expand_addptrofs X2 X2 (Ptrofs.repr (Z.add sz (Z.of_uint extra_sz)))
+
+  | Pseqw(rd, rs1, rs2) ->
+      (* emulate based on the fact that x == 0 iff x <u 1 (unsigned cmp) *)
+      if rs2 = X0 then begin
+        emit (Psltiuw(rd, rs1, Int.one))
+      end else begin
+        emit (Pxorw(rd, rs1, rs2)); emit (Psltiuw(rd, X rd, Int.one))
+      end
+  | Psnew(rd, rs1, rs2) ->
+      (* emulate based on the fact that x != 0 iff 0 <u x (unsigned cmp) *)
+      if rs2 = X0 then begin
+        emit (Psltuw(rd, X0, rs1))
+      end else begin
+        emit (Pxorw(rd, rs1, rs2)); emit (Psltuw(rd, X0, X rd))
+      end
+  | Pseql(rd, rs1, rs2) ->
+      (* emulate based on the fact that x == 0 iff x <u 1 (unsigned cmp) *)
+      if rs2 = X0 then begin
+        emit (Psltiul(rd, rs1, Int64.one))
+      end else begin
+        emit (Pxorl(rd, rs1, rs2)); emit (Psltiul(rd, X rd, Int64.one))
+      end
+  | Psnel(rd, rs1, rs2) ->
+      (* emulate based on the fact that x != 0 iff 0 <u x (unsigned cmp) *)
+      if rs2 = X0 then begin
+        emit (Psltul(rd, X0, rs1))
+      end else begin
+        emit (Pxorl(rd, rs1, rs2)); emit (Psltul(rd, X0, X rd))
+      end
+  | Pcvtl2w(rd, rs) ->
+      assert Archi.ptr64;
+      emit (Paddiw(rd, rs, Int.zero))  (* 32-bit sign extension *)
+  | Pcvtw2l(r) ->
+      assert Archi.ptr64
+      (* no-operation because the 32-bit integer was kept sign extended already *)
+
+  | Pjal_r(r, sg) ->
+      fixup_call sg; emit instr
+  | Pjal_s(symb, sg) ->
+      fixup_call sg; emit instr
+  | Pj_r(r, sg) when r <> X1 ->
+      fixup_call sg; emit instr
+  | Pj_s(symb, sg) ->
+      fixup_call sg; emit instr
+
+  | Pbuiltin (ef,args,res) ->
+     begin match ef with
+     | EF_builtin (name,sg) ->
+        expand_builtin_inline (camlstring_of_coqstring name) args res
+     | EF_vload chunk ->
+        expand_builtin_vload chunk args res
+     | EF_vstore chunk ->
+        expand_builtin_vstore chunk args
+     | EF_annot_val (kind,txt,targ) ->
+        expand_annot_val kind txt targ args res
+     | EF_memcpy(sz, al) ->
+        expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
+     | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+        emit instr
+     | _ ->
+        assert false
      end
-  | Pjmp_s(_, sg) | Pjmp_r(_, sg) | Pcall_s(_, sg) | Pcall_r(_, sg) ->
-     fixup_funcall sg;
+  | _ ->
      emit instr
-  | Pbuiltin (ef,args, res) ->
-     begin
-       match ef with
-       | EF_builtin(name, sg) ->
-	  expand_builtin_inline (camlstring_of_coqstring name) args res
-       | EF_vload chunk ->
-          expand_builtin_vload chunk args res
-       | EF_vstore chunk ->
-          expand_builtin_vstore chunk args
-       | EF_memcpy(sz, al) ->
-          expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
-       | EF_annot_val(kind,txt, targ) ->
-          expand_annot_val kind txt targ args res
-       | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ ->
-          emit instr
-       | _ ->
-          assert false
-     end
-  | _ -> emit instr
-
-let int_reg_to_dwarf_32 = function
-  | RAX -> 0
-  | RBX -> 3
-  | RCX -> 1
-  | RDX -> 2
-  | RSI -> 6
-  | RDI -> 7
-  | RBP -> 5
-  | RSP -> 4
-  | _ -> assert false
-
-let int_reg_to_dwarf_64 = function
-  | RAX -> 0
-  | RDX -> 1
-  | RCX -> 2
-  | RBX -> 3
-  | RSI -> 4
-  | RDI -> 5
-  | RBP -> 6
-  | RSP -> 7
-  | R8 -> 8
-  | R9 -> 9
-  | R10 -> 10
-  | R11 -> 11
-  | R12 -> 12
-  | R13 -> 13
-  | R14 -> 14
-  | R15 -> 15
-
-let int_reg_to_dwarf =
-  if Archi.ptr64 then int_reg_to_dwarf_64 else int_reg_to_dwarf_32
-
-let float_reg_to_dwarf_32 = function
-  | XMM0 -> 21
-  | XMM1 -> 22
-  | XMM2 -> 23
-  | XMM3 -> 24
-  | XMM4 -> 25
-  | XMM5 -> 26
-  | XMM6 -> 27
-  | XMM7 -> 28
-  | _ -> assert false
-
-let float_reg_to_dwarf_64 = function
-  | XMM0 -> 17
-  | XMM1 -> 18
-  | XMM2 -> 19
-  | XMM3 -> 20
-  | XMM4 -> 21
-  | XMM5 -> 22
-  | XMM6 -> 23
-  | XMM7 -> 24
-  | XMM8 -> 25
-  | XMM9 -> 26
-  | XMM10 -> 27
-  | XMM11 -> 28
-  | XMM12 -> 29
-  | XMM13 -> 30
-  | XMM14 -> 31
-  | XMM15 -> 32
-
-let float_reg_to_dwarf =
-  if Archi.ptr64 then float_reg_to_dwarf_64 else float_reg_to_dwarf_32
+
+(* NOTE: Dwarf register maps for RV32G are not yet specified
+   officially.  This is just a placeholder.  *)
+let int_reg_to_dwarf = function
+               | X1  -> 1  | X2  -> 2  | X3  -> 3
+   | X4  -> 4  | X5  -> 5  | X6  -> 6  | X7  -> 7
+   | X8  -> 8  | X9  -> 9  | X10 -> 10 | X11 -> 11
+   | X12 -> 12 | X13 -> 13 | X14 -> 14 | X15 -> 15
+   | X16 -> 16 | X17 -> 17 | X18 -> 18 | X19 -> 19
+   | X20 -> 20 | X21 -> 21 | X22 -> 22 | X23 -> 23
+   | X24 -> 24 | X25 -> 25 | X26 -> 26 | X27 -> 27
+   | X28 -> 28 | X29 -> 29 | X30 -> 30 | X31 -> 31
+
+let float_reg_to_dwarf = function
+   | F0  -> 32 | F1  -> 33 | F2  -> 34 | F3  -> 35
+   | F4  -> 36 | F5  -> 37 | F6  -> 38 | F7  -> 39
+   | F8  -> 40 | F9  -> 41 | F10 -> 42 | F11 -> 43
+   | F12 -> 44 | F13 -> 45 | F14 -> 46 | F15 -> 47
+   | F16 -> 48 | F17 -> 49 | F18 -> 50 | F19 -> 51
+   | F20 -> 52 | F21 -> 53 | F22 -> 54 | F23 -> 55
+   | F24 -> 56 | F25 -> 57 | F26 -> 58 | F27 -> 59
+   | F28 -> 60 | F29 -> 61 | F30 -> 62 | F31 -> 63
 
 let preg_to_dwarf = function
    | IR r -> int_reg_to_dwarf r
    | FR r -> float_reg_to_dwarf r
    | _ -> assert false
 
-
 let expand_function id fn =
   try
     set_current_function fn;
-    expand id (int_reg_to_dwarf RSP) preg_to_dwarf expand_instruction fn.fn_code;
+    fixup_function_entry fn.fn_sig;
+    expand id (* sp= *) 2 preg_to_dwarf expand_instruction fn.fn_code;
     Errors.OK (get_current_function ())
   with Error s ->
     Errors.Error (Errors.msg (coqstring_of_camlstring s))
diff --git a/verilog/Asmgen.v b/verilog/Asmgen.v
index 99e9fc2b..3e84e950 100644
--- a/verilog/Asmgen.v
+++ b/verilog/Asmgen.v
@@ -2,16 +2,22 @@
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
-(*                  Xavier Leroy, INRIA Paris                          *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
-(** Translation from Mach to IA32 assembly language *)
+(** Translation from Mach to RISC-V assembly language *)
 
+Require Archi.
 Require Import Coqlib Errors.
 Require Import AST Integers Floats Memdata.
 Require Import Op Locations Mach Asm.
@@ -19,15 +25,13 @@ Require Import Op Locations Mach Asm.
 Local Open Scope string_scope.
 Local Open Scope error_monad_scope.
 
-(** The code generation functions take advantage of several characteristics of the [Mach] code generated by earlier passes of the compiler:
-- Argument and result registers are of the correct type.
-- For two-address instructions, the result and the first argument
-  are in the same register.  (True by construction in [RTLgen], and preserved by [Reload].)
-- The top of the floating-point stack ([ST0], a.k.a. [FP0]) can only
-  appear in [mov] instructions, but never in arithmetic instructions.
+Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := f.
 
-All these properties are true by construction, but it is painful to track them statically.  Instead, we recheck them during code generation and fail if they do not hold.
-*)
+(** The code generation functions take advantage of several
+  characteristics of the [Mach] code generated by earlier passes of the
+  compiler, mostly that argument and result registers are of the correct
+  types.  These properties are true by construction, but it's easier to
+  recheck them during code generation and fail if they do not hold. *)
 
 (** Extracting integer or float registers. *)
 
@@ -37,702 +41,1131 @@ Definition ireg_of (r: mreg) : res ireg :=
 Definition freg_of (r: mreg) : res freg :=
   match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgen.freg_of") end.
 
-(** Smart constructors for some operations. *)
+(** Decomposition of 32-bit integer constants.  They are split into either
+  small signed immediates that fit in 12-bits, or, if they do not fit,
+  into a (20-bit hi, 12-bit lo) pair where lo is sign-extended. *)
 
-Definition mk_mov (rd rs: preg) (k: code) : res code :=
-  match rd, rs with
-  | IR rd, IR rs => OK (Pmov_rr rd rs :: k)
-  | FR rd, FR rs => OK (Pmovsd_ff rd rs :: k)
-  | _, _ => Error(msg "Asmgen.mk_mov")
-  end.
+Inductive immed32 : Type :=
+  | Imm32_single (imm: int)
+  | Imm32_pair   (hi: int) (lo: int).
 
-Definition mk_shrximm (n: int) (k: code) : res code :=
-  let p := Int.sub (Int.shl Int.one n) Int.one in
-  OK (Ptestl_rr RAX RAX ::
-      Pleal RCX (Addrmode (Some RAX) None (inl _ (Int.unsigned p))) ::
-      Pcmov Cond_l RAX RCX ::
-      Psarl_ri RAX n :: k).
-
-Definition mk_shrxlimm (n: int) (k: code) : res code :=
-  OK (if Int.eq n Int.zero then Pmov_rr RAX RAX :: k else
-      Pcqto ::
-      Pshrq_ri RDX (Int.sub (Int.repr 64) n) ::
-      Pleaq RAX (Addrmode (Some RAX) (Some(RDX, 1)) (inl _ 0)) ::
-      Psarq_ri RAX n :: k).
-
-Definition low_ireg (r: ireg) : bool :=
-  match r with RAX | RBX | RCX | RDX => true | _ => false end.
-
-Definition mk_intconv (mk: ireg -> ireg -> instruction) (rd rs: ireg) (k: code) :=
-  if Archi.ptr64 || low_ireg rs then
-    OK (mk rd rs :: k)
-  else
-    OK (Pmov_rr RAX rs :: mk rd RAX :: k).
+Definition make_immed32 (val: int) :=
+  let lo := Int.sign_ext 12 val in
+  if Int.eq val lo
+  then Imm32_single val
+  else Imm32_pair (Int.shru (Int.sub val lo) (Int.repr 12)) lo.
+(*
+  let discr := Int.shr val (Int.repr 11) in
+  let hi    := Int.shru val (Int.repr 12) in
+  if Int.eq discr Int.zero || Int.eq discr Int.mone
+  then Imm32_single val
+  else Imm32_pair (Int.add hi (Int.and discr Int.one)) (Int.sign_ext 12 val).
+*)
 
-Definition addressing_mentions (addr: addrmode) (r: ireg) : bool :=
-  match addr with Addrmode base displ const =>
-      match base with Some r' => ireg_eq r r' | None => false end
-   || match displ with Some(r', sc) => ireg_eq r r' | None => false end
-  end.
+(** Likewise, for 64-bit integer constants. *)
 
-Definition mk_storebyte (addr: addrmode) (rs: ireg) (k: code) :=
-  if Archi.ptr64 || low_ireg rs then
-    OK (Pmovb_mr addr rs :: k)
-  else if addressing_mentions addr RAX then
-    OK (Pleal RCX addr :: Pmov_rr RAX rs ::
-        Pmovb_mr (Addrmode (Some RCX) None (inl _ 0)) RAX :: k)
-  else
-    OK (Pmov_rr RAX rs :: Pmovb_mr addr RAX :: k).
+Inductive immed64 : Type :=
+  | Imm64_single (imm: int64)
+  | Imm64_pair   (hi: int64) (lo: int64)
+  | Imm64_large  (imm: int64).
 
-(** Accessing slots in the stack frame. *)
+Definition make_immed64 (val: int64) :=
+  let lo := Int64.sign_ext 12 val in
+  if Int64.eq val lo then Imm64_single lo else
+  let hi := Int64.zero_ext 20 (Int64.shru (Int64.sub val lo) (Int64.repr 12)) in
+  if Int64.eq val (Int64.add (Int64.sign_ext 32 (Int64.shl hi (Int64.repr 12))) lo)
+  then Imm64_pair hi lo
+  else Imm64_large val.
 
-Definition loadind (base: ireg) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: code) :=
-  let a := Addrmode (Some base) None (inl _ (Ptrofs.unsigned ofs)) in
-  match ty, preg_of dst with
-  | Tint, IR r => OK (Pmovl_rm r a :: k)
-  | Tlong, IR r => OK (Pmovq_rm r a :: k)
-  | Tsingle, FR r => OK (Pmovss_fm r a :: k)
-  | Tsingle, ST0  => OK (Pflds_m a :: k)
-  | Tfloat, FR r => OK (Pmovsd_fm r a :: k)
-  | Tfloat, ST0  => OK (Pfldl_m a :: k)
-  | Tany32, IR r => if Archi.ptr64 then Error (msg "Asmgen.loadind1") else OK (Pmov_rm_a r a :: k)
-  | Tany64, IR r => if Archi.ptr64 then OK (Pmov_rm_a r a :: k) else Error (msg "Asmgen.loadind2")
-  | Tany64, FR r => OK (Pmovsd_fm_a r a :: k)
-  | _, _ => Error (msg "Asmgen.loadind")
+(** Smart constructors for arithmetic operations involving
+  a 32-bit or 64-bit integer constant.  Depending on whether the
+  constant fits in 12 bits or not, one or several instructions
+  are generated as required to perform the operation
+  and prepended to the given instruction sequence [k]. *)
+
+Definition load_hilo32 (r: ireg) (hi lo: int) k :=
+  if Int.eq lo Int.zero then Pluiw r hi :: k
+  else Pluiw r hi :: Paddiw r r lo :: k.
+  
+Definition loadimm32 (r: ireg) (n: int) (k: code) :=
+  match make_immed32 n with
+  | Imm32_single imm => Paddiw r X0 imm :: k
+  | Imm32_pair hi lo => load_hilo32 r hi lo k
   end.
 
-Definition storeind (src: mreg) (base: ireg) (ofs: ptrofs) (ty: typ) (k: code) :=
-  let a := Addrmode (Some base) None (inl _ (Ptrofs.unsigned ofs)) in
-  match ty, preg_of src with
-  | Tint, IR r => OK (Pmovl_mr a r :: k)
-  | Tlong, IR r => OK (Pmovq_mr a r :: k)
-  | Tsingle, FR r => OK (Pmovss_mf a r :: k)
-  | Tsingle, ST0 => OK (Pfstps_m a :: k)
-  | Tfloat, FR r => OK (Pmovsd_mf a r :: k)
-  | Tfloat, ST0 => OK (Pfstpl_m a :: k)
-  | Tany32, IR r => if Archi.ptr64 then Error (msg "Asmgen.storeind1") else OK (Pmov_mr_a a r :: k)
-  | Tany64, IR r => if Archi.ptr64 then OK (Pmov_mr_a a r :: k) else Error (msg "Asmgen.storeind2")
-  | Tany64, FR r => OK (Pmovsd_mf_a a r :: k)
-  | _, _ => Error (msg "Asmgen.storeind")
+Definition opimm32 (op: ireg -> ireg0 -> ireg0 -> instruction)
+                   (opimm: ireg -> ireg0 -> int -> instruction)
+                   (rd rs: ireg) (n: int) (k: code) :=
+  match make_immed32 n with
+  | Imm32_single imm => opimm rd rs imm :: k
+  | Imm32_pair hi lo => load_hilo32 X31 hi lo (op rd rs X31 :: k)
   end.
 
-(** Translation of addressing modes *)
+Definition addimm32 := opimm32 Paddw Paddiw.
+Definition andimm32 := opimm32 Pandw Pandiw.
+Definition orimm32  := opimm32 Porw  Poriw.
+Definition xorimm32 := opimm32 Pxorw Pxoriw.
+Definition sltimm32 := opimm32 Psltw Psltiw.
+Definition sltuimm32 := opimm32 Psltuw Psltiuw.
+  
+Definition load_hilo64 (r: ireg) (hi lo: int64) k :=
+  if Int64.eq lo Int64.zero then Pluil r hi :: k
+  else Pluil r hi :: Paddil r r lo :: k.
 
-Definition transl_addressing (a: addressing) (args: list mreg): res addrmode :=
-  match a, args with
-  | Aindexed n, a1 :: nil =>
-      do r1 <- ireg_of a1; OK(Addrmode (Some r1) None (inl _ n))
-  | Aindexed2 n, a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK(Addrmode (Some r1) (Some(r2, 1)) (inl _ n))
-  | Ascaled sc n, a1 :: nil =>
-      do r1 <- ireg_of a1; OK(Addrmode None (Some(r1, sc)) (inl _ n))
-  | Aindexed2scaled sc n, a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK(Addrmode (Some r1) (Some(r2, sc)) (inl _ n))
-  | Aglobal id ofs, nil =>
-      OK(Addrmode None None (inr _ (id, ofs)))
-  | Abased id ofs, a1 :: nil =>
-      do r1 <- ireg_of a1; OK(Addrmode (Some r1) None (inr _ (id, ofs)))
-  | Abasedscaled sc id ofs, a1 :: nil =>
-      do r1 <- ireg_of a1; OK(Addrmode None (Some(r1, sc)) (inr _ (id, ofs)))
-  | Ainstack n, nil =>
-      OK(Addrmode (Some RSP) None (inl _ (Ptrofs.signed n)))
-  | _, _ =>
-      Error(msg "Asmgen.transl_addressing")
+Definition loadimm64 (r: ireg) (n: int64) (k: code) :=
+  match make_immed64 n with
+  | Imm64_single imm => Paddil r X0 imm :: k
+  | Imm64_pair hi lo => load_hilo64 r hi lo k
+  | Imm64_large imm  => Ploadli r imm :: k
   end.
 
-Definition normalize_addrmode_32 (a: addrmode) :=
-  match a with
-  | Addrmode base ofs (inl n) =>
-      Addrmode base ofs (inl _ (Int.signed (Int.repr n)))
-  | Addrmode base ofs (inr _) =>
-      a
+Definition opimm64 (op: ireg -> ireg0 -> ireg0 -> instruction)
+                   (opimm: ireg -> ireg0 -> int64 -> instruction)
+                   (rd rs: ireg) (n: int64) (k: code) :=
+  match make_immed64 n with
+  | Imm64_single imm => opimm rd rs imm :: k
+  | Imm64_pair hi lo => load_hilo64 X31 hi lo (op rd rs X31 :: k)
+  | Imm64_large imm  => Ploadli X31 imm :: op rd rs X31 :: k
   end.
 
-Definition normalize_addrmode_64 (a: addrmode) :=
-  match a with
-  | Addrmode base ofs (inl n) =>
-      if Op.offset_in_range n
-      then (a, None)
-      else (Addrmode base ofs (inl _ 0), Some (Int64.repr n))
-  | Addrmode base ofs (inr (id, delta)) =>
-      if Op.ptroffset_in_range delta || negb Archi.ptr64
-      then (a, None)
-      else (Addrmode base ofs (inr _ (id, Ptrofs.zero)), Some (Ptrofs.to_int64 delta))
+Definition addimm64 := opimm64 Paddl Paddil.
+Definition andimm64 := opimm64 Pandl Pandil.
+Definition orimm64  := opimm64 Porl  Poril.
+Definition xorimm64 := opimm64 Pxorl  Pxoril.
+Definition sltimm64 := opimm64 Psltl Psltil.
+Definition sltuimm64 := opimm64 Psltul Psltiul.
+
+Definition addptrofs (rd rs: ireg) (n: ptrofs) (k: code) :=
+  if Ptrofs.eq_dec n Ptrofs.zero then
+    Pmv rd rs :: k
+  else
+    if Archi.ptr64
+    then addimm64 rd rs (Ptrofs.to_int64 n) k
+    else addimm32 rd rs (Ptrofs.to_int n) k.
+  
+(** Translation of conditional branches. *)
+
+Definition transl_cbranch_int32s (cmp: comparison) (r1 r2: ireg0) (lbl: label) :=
+  match cmp with
+  | Ceq => Pbeqw r1 r2 lbl
+  | Cne => Pbnew r1 r2 lbl
+  | Clt => Pbltw r1 r2 lbl
+  | Cle => Pbgew r2 r1 lbl
+  | Cgt => Pbltw r2 r1 lbl
+  | Cge => Pbgew r1 r2 lbl
   end.
 
-(** Floating-point comparison.  We swap the operands in some cases
-   to simplify the handling of the unordered case. *)
+Definition transl_cbranch_int32u (cmp: comparison) (r1 r2: ireg0) (lbl: label) :=
+  match cmp with
+  | Ceq => Pbeqw  r1 r2 lbl
+  | Cne => Pbnew  r1 r2 lbl
+  | Clt => Pbltuw r1 r2 lbl
+  | Cle => Pbgeuw r2 r1 lbl
+  | Cgt => Pbltuw r2 r1 lbl
+  | Cge => Pbgeuw r1 r2 lbl
+  end.
+
+Definition transl_cbranch_int64s (cmp: comparison) (r1 r2: ireg0) (lbl: label) :=
+  match cmp with
+  | Ceq => Pbeql r1 r2 lbl
+  | Cne => Pbnel r1 r2 lbl
+  | Clt => Pbltl r1 r2 lbl
+  | Cle => Pbgel r2 r1 lbl
+  | Cgt => Pbltl r2 r1 lbl
+  | Cge => Pbgel r1 r2 lbl
+  end.
 
-Definition floatcomp (cmp: comparison) (r1 r2: freg) : instruction :=
+Definition transl_cbranch_int64u (cmp: comparison) (r1 r2: ireg0) (lbl: label) :=
   match cmp with
-  | Clt | Cle => Pcomisd_ff r2 r1
-  | Ceq | Cne | Cgt | Cge => Pcomisd_ff r1 r2
+  | Ceq => Pbeql  r1 r2 lbl
+  | Cne => Pbnel  r1 r2 lbl
+  | Clt => Pbltul r1 r2 lbl
+  | Cle => Pbgeul r2 r1 lbl
+  | Cgt => Pbltul r2 r1 lbl
+  | Cge => Pbgeul r1 r2 lbl
   end.
 
-Definition floatcomp32 (cmp: comparison) (r1 r2: freg) : instruction :=
+Definition transl_cond_float (cmp: comparison) (rd: ireg) (fs1 fs2: freg) :=
   match cmp with
-  | Clt | Cle => Pcomiss_ff r2 r1
-  | Ceq | Cne | Cgt | Cge => Pcomiss_ff r1 r2
+  | Ceq => (Pfeqd rd fs1 fs2, true)
+  | Cne => (Pfeqd rd fs1 fs2, false)
+  | Clt => (Pfltd rd fs1 fs2, true)
+  | Cle => (Pfled rd fs1 fs2, true)
+  | Cgt => (Pfltd rd fs2 fs1, true)
+  | Cge => (Pfled rd fs2 fs1, true)
   end.
+  
+Definition transl_cond_single (cmp: comparison) (rd: ireg) (fs1 fs2: freg) :=
+  match cmp with
+  | Ceq => (Pfeqs rd fs1 fs2, true)
+  | Cne => (Pfeqs rd fs1 fs2, false)
+  | Clt => (Pflts rd fs1 fs2, true)
+  | Cle => (Pfles rd fs1 fs2, true)
+  | Cgt => (Pflts rd fs2 fs1, true)
+  | Cge => (Pfles rd fs2 fs1, true)
+   end.
+
+(** Functions to select a special register according to the op "oreg" argument from RTL *)
+
+Definition apply_bin_oreg_ireg0 (optR: option oreg) (r1 r2: ireg0): (ireg0 * ireg0) :=
+  match optR with
+  | None => (r1, r2)
+  | Some X0_L => (X0, r1)
+  | Some X0_R => (r1, X0)
+  end. 
 
-(** Translation of a condition.  Prepends to [k] the instructions
-  that evaluate the condition and leave its boolean result in bits
-  of the condition register. *)
+Definition get_oreg (optR: option oreg) (r: ireg0) :=
+  match optR with
+  | Some X0_L | Some X0_R => X0
+  | _ => r
+  end. 
 
-Definition transl_cond
-              (cond: condition) (args: list mreg) (k: code) : res code :=
+Definition transl_cbranch
+           (cond: condition) (args: list mreg) (lbl: label) (k: code) :=
   match cond, args with
   | Ccomp c, a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Pcmpl_rr r1 r2 :: k)
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (transl_cbranch_int32s c r1 r2 lbl :: k)
   | Ccompu c, a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Pcmpl_rr r1 r2 :: k)
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (transl_cbranch_int32u c r1 r2 lbl :: k)
   | Ccompimm c n, a1 :: nil =>
       do r1 <- ireg_of a1;
-      OK (if Int.eq_dec n Int.zero then Ptestl_rr r1 r1 :: k else Pcmpl_ri r1 n :: k)
+      OK (if Int.eq n Int.zero then
+            transl_cbranch_int32s c r1 X0 lbl :: k
+          else
+            loadimm32 X31 n (transl_cbranch_int32s c r1 X31 lbl :: k))
   | Ccompuimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1; OK (Pcmpl_ri r1 n :: k)
+      do r1 <- ireg_of a1;
+      OK (if Int.eq n Int.zero then
+            transl_cbranch_int32u c r1 X0 lbl :: k
+          else
+            loadimm32 X31 n (transl_cbranch_int32u c r1 X31 lbl :: k))
   | Ccompl c, a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Pcmpq_rr r1 r2 :: k)
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (transl_cbranch_int64s c r1 r2 lbl :: k)
   | Ccomplu c, a1 :: a2 :: nil =>
-      do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Pcmpq_rr r1 r2 :: k)
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (transl_cbranch_int64u c r1 r2 lbl :: k)
   | Ccomplimm c n, a1 :: nil =>
       do r1 <- ireg_of a1;
-      OK (if Int64.eq_dec n Int64.zero then Ptestq_rr r1 r1 :: k else Pcmpq_ri r1 n :: k)
+      OK (if Int64.eq n Int64.zero then
+            transl_cbranch_int64s c r1 X0 lbl :: k
+          else
+            loadimm64 X31 n (transl_cbranch_int64s c r1 X31 lbl :: k))
   | Ccompluimm c n, a1 :: nil =>
-      do r1 <- ireg_of a1; OK (Pcmpq_ri r1 n :: k)
-  | Ccompf cmp, a1 :: a2 :: nil =>
-      do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp cmp r1 r2 :: k)
-  | Cnotcompf cmp, a1 :: a2 :: nil =>
-      do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp cmp r1 r2 :: k)
-  | Ccompfs cmp, a1 :: a2 :: nil =>
-      do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp32 cmp r1 r2 :: k)
-  | Cnotcompfs cmp, a1 :: a2 :: nil =>
-      do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp32 cmp r1 r2 :: k)
-  | Cmaskzero n, a1 :: nil =>
-      do r1 <- ireg_of a1; OK (Ptestl_ri r1 n :: k)
-  | Cmasknotzero n, a1 :: nil =>
-      do r1 <- ireg_of a1; OK (Ptestl_ri r1 n :: k)
+      do r1 <- ireg_of a1;
+      OK (if Int64.eq n Int64.zero then
+            transl_cbranch_int64u c r1 X0 lbl :: k
+          else
+            loadimm64 X31 n (transl_cbranch_int64u c r1 X31 lbl :: k))
+  | Ccompf c, f1 :: f2 :: nil =>
+      do r1 <- freg_of f1; do r2 <- freg_of f2;
+      let (insn, normal) := transl_cond_float c X31 r1 r2 in
+      OK (insn :: (if normal then Pbnew X31 X0 lbl else Pbeqw X31 X0 lbl) :: k)
+  | Cnotcompf c, f1 :: f2 :: nil =>
+      do r1 <- freg_of f1; do r2 <- freg_of f2;
+      let (insn, normal) := transl_cond_float c X31 r1 r2 in
+      OK (insn :: (if normal then Pbeqw X31 X0 lbl else Pbnew X31 X0 lbl) :: k)
+  | Ccompfs c, f1 :: f2 :: nil =>
+      do r1 <- freg_of f1; do r2 <- freg_of f2;
+      let (insn, normal) := transl_cond_single c X31 r1 r2 in
+      OK (insn :: (if normal then Pbnew X31 X0 lbl else Pbeqw X31 X0 lbl) :: k)
+  | Cnotcompfs c, f1 :: f2 :: nil =>
+      do r1 <- freg_of f1; do r2 <- freg_of f2;
+      let (insn, normal) := transl_cond_single c X31 r1 r2 in
+   OK (insn :: (if normal then Pbeqw X31 X0 lbl else Pbnew X31 X0 lbl) :: k)
+
+  | CEbeqw optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbeqw r1' r2' lbl :: k)
+  | CEbnew optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbnew r1' r2' lbl :: k)
+  | CEbequw optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbeqw r1' r2' lbl :: k)
+  | CEbneuw optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbnew r1' r2' lbl :: k)
+  | CEbltw optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbltw r1' r2' lbl :: k)
+  | CEbltuw optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbltuw r1' r2' lbl :: k)
+  | CEbgew optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbgew r1' r2' lbl :: k)
+  | CEbgeuw optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbgeuw r1' r2' lbl :: k)
+  | CEbeql optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbeql r1' r2' lbl :: k)
+  | CEbnel optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbnel r1' r2' lbl :: k)
+  | CEbequl optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbeql r1' r2' lbl :: k)
+  | CEbneul optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbnel r1' r2' lbl :: k)
+  | CEbltl optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbltl r1' r2' lbl :: k)
+  | CEbltul optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbltul r1' r2' lbl :: k)
+  | CEbgel optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbgel r1' r2' lbl :: k)
+  | CEbgeul optR, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      let (r1', r2') := apply_bin_oreg_ireg0 optR r1 r2 in
+      OK (Pbgeul r1' r2' lbl :: k)
   | _, _ =>
-     Error(msg "Asmgen.transl_cond")
+      Error(msg "Asmgen.transl_cond_branch")
   end.
 
-(** What processor condition to test for a given Mach condition. *)
+(** Translation of a condition operator.  The generated code sets the
+  [rd] target register to 0 or 1 depending on the truth value of the
+  condition. *)
 
-Definition testcond_for_signed_comparison (cmp: comparison) :=
+Definition transl_cond_int32s (cmp: comparison) (rd: ireg) (r1 r2: ireg0) (k: code) :=
   match cmp with
-  | Ceq => Cond_e
-  | Cne => Cond_ne
-  | Clt => Cond_l
-  | Cle => Cond_le
-  | Cgt => Cond_g
-  | Cge => Cond_ge
+  | Ceq => Pseqw rd r1 r2 :: k
+  | Cne => Psnew rd r1 r2 :: k
+  | Clt => Psltw rd r1 r2 :: k
+  | Cle => Psltw rd r2 r1 :: Pxoriw rd rd Int.one :: k
+  | Cgt => Psltw rd r2 r1 :: k
+  | Cge => Psltw rd r1 r2 :: Pxoriw rd rd Int.one :: k
   end.
 
-Definition testcond_for_unsigned_comparison (cmp: comparison) :=
+Definition transl_cond_int32u (cmp: comparison) (rd: ireg) (r1 r2: ireg0) (k: code) :=
   match cmp with
-  | Ceq => Cond_e
-  | Cne => Cond_ne
-  | Clt => Cond_b
-  | Cle => Cond_be
-  | Cgt => Cond_a
-  | Cge => Cond_ae
+  | Ceq => Pseqw rd r1 r2 :: k
+  | Cne => Psnew rd r1 r2 :: k
+  | Clt => Psltuw rd r1 r2 :: k
+  | Cle => Psltuw rd r2 r1 :: Pxoriw rd rd Int.one :: k
+  | Cgt => Psltuw rd r2 r1 :: k
+  | Cge => Psltuw rd r1 r2 :: Pxoriw rd rd Int.one :: k
   end.
 
-Inductive extcond: Type :=
-  | Cond_base (c: testcond)
-  | Cond_or (c1 c2: testcond)
-  | Cond_and (c1 c2: testcond).
-
-Definition testcond_for_condition (cond: condition) : extcond :=
-  match cond with
-  | Ccomp c => Cond_base(testcond_for_signed_comparison c)
-  | Ccompu c => Cond_base(testcond_for_unsigned_comparison c)
-  | Ccompimm c n => Cond_base(testcond_for_signed_comparison c)
-  | Ccompuimm c n => Cond_base(testcond_for_unsigned_comparison c)
-  | Ccompl c => Cond_base(testcond_for_signed_comparison c)
-  | Ccomplu c => Cond_base(testcond_for_unsigned_comparison c)
-  | Ccomplimm c n => Cond_base(testcond_for_signed_comparison c)
-  | Ccompluimm c n => Cond_base(testcond_for_unsigned_comparison c)
-  | Ccompf c | Ccompfs c =>
-      match c with
-      | Ceq => Cond_and Cond_np Cond_e
-      | Cne => Cond_or Cond_p Cond_ne
-      | Clt => Cond_base Cond_a
-      | Cle => Cond_base Cond_ae
-      | Cgt => Cond_base Cond_a
-      | Cge => Cond_base Cond_ae
-      end
-  | Cnotcompf c | Cnotcompfs c =>
-      match c with
-      | Ceq => Cond_or Cond_p Cond_ne
-      | Cne => Cond_and Cond_np Cond_e
-      | Clt => Cond_base Cond_be
-      | Cle => Cond_base Cond_b
-      | Cgt => Cond_base Cond_be
-      | Cge => Cond_base Cond_b
-      end
-  | Cmaskzero n => Cond_base Cond_e
-  | Cmasknotzero n => Cond_base Cond_ne
+Definition transl_cond_int64s (cmp: comparison) (rd: ireg) (r1 r2: ireg0) (k: code) :=
+  match cmp with
+  | Ceq => Pseql rd r1 r2 :: k
+  | Cne => Psnel rd r1 r2 :: k
+  | Clt => Psltl rd r1 r2 :: k
+  | Cle => Psltl rd r2 r1 :: Pxoriw rd rd Int.one :: k
+  | Cgt => Psltl rd r2 r1 :: k
+  | Cge => Psltl rd r1 r2 :: Pxoriw rd rd Int.one :: k
   end.
 
-(** Acting upon extended conditions. *)
-
-Definition mk_setcc_base (cond: extcond) (rd: ireg) (k: code) :=
-  match cond with
-  | Cond_base c =>
-      Psetcc c rd :: k
-  | Cond_and c1 c2 =>
-      if ireg_eq rd RAX
-      then Psetcc c1 RAX :: Psetcc c2 RCX :: Pandl_rr RAX RCX :: k
-      else Psetcc c1 RAX :: Psetcc c2 rd  :: Pandl_rr rd RAX :: k
-  | Cond_or c1 c2 =>
-      if ireg_eq rd RAX
-      then Psetcc c1 RAX :: Psetcc c2 RCX :: Porl_rr RAX RCX :: k
-      else Psetcc c1 RAX :: Psetcc c2 rd  :: Porl_rr rd RAX :: k
+Definition transl_cond_int64u (cmp: comparison) (rd: ireg) (r1 r2: ireg0) (k: code) :=
+  match cmp with
+  | Ceq => Pseql rd r1 r2 :: k
+  | Cne => Psnel rd r1 r2 :: k
+  | Clt => Psltul rd r1 r2 :: k
+  | Cle => Psltul rd r2 r1 :: Pxoriw rd rd Int.one :: k
+  | Cgt => Psltul rd r2 r1 :: k
+  | Cge => Psltul rd r1 r2 :: Pxoriw rd rd Int.one :: k
   end.
 
-Definition mk_setcc (cond: extcond) (rd: ireg) (k: code) :=
-  if Archi.ptr64 || low_ireg rd
-  then mk_setcc_base cond rd k
-  else mk_setcc_base cond RAX (Pmov_rr rd RAX :: k).
-
-Definition mk_jcc (cond: extcond) (lbl: label) (k: code) :=
-  match cond with
-  | Cond_base c => Pjcc c lbl :: k
-  | Cond_and c1 c2 => Pjcc2 c1 c2 lbl :: k
-  | Cond_or c1 c2 => Pjcc c1 lbl :: Pjcc c2 lbl :: k
+Definition transl_condimm_int32s (cmp: comparison) (rd: ireg) (r1: ireg) (n: int) (k: code) :=
+  if Int.eq n Int.zero then transl_cond_int32s cmp rd r1 X0 k else
+  match cmp with
+  | Ceq | Cne => xorimm32 rd r1 n (transl_cond_int32s cmp rd rd X0 k)
+  | Clt => sltimm32 rd r1 n k
+  | Cle => if Int.eq n (Int.repr Int.max_signed)
+           then loadimm32 rd Int.one k
+           else sltimm32 rd r1 (Int.add n Int.one) k
+  | _   => loadimm32 X31 n (transl_cond_int32s cmp rd r1 X31 k)
   end.
 
-Definition negate_testcond (c: testcond) : testcond :=
-  match c with
-  | Cond_e => Cond_ne  | Cond_ne => Cond_e
-  | Cond_b => Cond_ae  | Cond_be => Cond_a
-  | Cond_ae => Cond_b  | Cond_a => Cond_be
-  | Cond_l => Cond_ge  | Cond_le => Cond_g
-  | Cond_ge => Cond_l  | Cond_g => Cond_le
-  | Cond_p => Cond_np  | Cond_np => Cond_p
+Definition transl_condimm_int32u (cmp: comparison) (rd: ireg) (r1: ireg) (n: int) (k: code) :=
+  if Int.eq n Int.zero then transl_cond_int32u cmp rd r1 X0 k else
+  match cmp with
+  | Clt => sltuimm32 rd r1 n k
+  | _   => loadimm32 X31 n (transl_cond_int32u cmp rd r1 X31 k)
   end.
 
-Definition mk_sel (cond: extcond) (rd r2: ireg) (k: code) :=
-  match cond with
-  | Cond_base c =>
-      OK (Pcmov (negate_testcond c) rd r2 :: k)
-  | Cond_and c1 c2 =>
-      OK (Pcmov (negate_testcond c1) rd r2 ::
-          Pcmov (negate_testcond c2) rd r2 :: k)
-  | Cond_or c1 c2 =>
-      Error (msg "Asmgen.mk_sel")    (**r should never happen, see [SelectOp.select] *)
+Definition transl_condimm_int64s (cmp: comparison) (rd: ireg) (r1: ireg) (n: int64) (k: code) :=
+  if Int64.eq n Int64.zero then transl_cond_int64s cmp rd r1 X0 k else
+  match cmp with
+  | Ceq | Cne => xorimm64 rd r1 n (transl_cond_int64s cmp rd rd X0 k)
+  | Clt => sltimm64 rd r1 n k
+  | Cle => if Int64.eq n (Int64.repr Int64.max_signed)
+           then loadimm32 rd Int.one k
+           else sltimm64 rd r1 (Int64.add n Int64.one) k
+  | _   => loadimm64 X31 n (transl_cond_int64s cmp rd r1 X31 k)
   end.
 
-Definition transl_sel
-      (cond: condition) (args: list mreg) (rd r2: ireg) (k: code) : res code :=
-  if ireg_eq rd r2 then
-    OK (Pmov_rr rd r2 :: k)    (* must generate one instruction... *)
-  else
-    do k1 <- mk_sel (testcond_for_condition cond) rd r2 k;
-    transl_cond cond args k1.
+Definition transl_condimm_int64u (cmp: comparison) (rd: ireg) (r1: ireg) (n: int64) (k: code) :=
+  if Int64.eq n Int64.zero then transl_cond_int64u cmp rd r1 X0 k else
+  match cmp with
+  | Clt => sltuimm64 rd r1 n k
+  | _   => loadimm64 X31 n (transl_cond_int64u cmp rd r1 X31 k)
+   end.
+
+Definition transl_cond_op
+           (cond: condition) (rd: ireg) (args: list mreg) (k: code) :=
+  match cond, args with
+  | Ccomp c, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (transl_cond_int32s c rd r1 r2 k)
+  | Ccompu c, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (transl_cond_int32u c rd r1 r2 k)
+  | Ccompimm c n, a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (transl_condimm_int32s c rd r1 n k)
+  | Ccompuimm c n, a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (transl_condimm_int32u c rd r1 n k)
+  | Ccompl c, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+      OK (transl_cond_int64s c rd r1 r2 k)
+  | Ccomplu c, a1 :: a2 :: nil =>
+      do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+     OK (transl_cond_int64u c rd r1 r2 k)
+  | Ccomplimm c n, a1 :: nil =>
+      do r1 <- ireg_of a1;
+      OK (transl_condimm_int64s c rd r1 n k)
+  | Ccompluimm c n, a1 :: nil =>
+      do r1 <- ireg_of a1;
+     OK (transl_condimm_int64u c rd r1 n k)
+  | Ccompf c, f1 :: f2 :: nil =>
+      do r1 <- freg_of f1; do r2 <- freg_of f2;
+      let (insn, normal) := transl_cond_float c rd r1 r2 in
+      OK (insn :: if normal then k else Pxoriw rd rd Int.one :: k)
+  | Cnotcompf c, f1 :: f2 :: nil =>
+      do r1 <- freg_of f1; do r2 <- freg_of f2;
+      let (insn, normal) := transl_cond_float c rd r1 r2 in
+      OK (insn :: if normal then Pxoriw rd rd Int.one :: k else k)
+  | Ccompfs c, f1 :: f2 :: nil =>
+      do r1 <- freg_of f1; do r2 <- freg_of f2;
+      let (insn, normal) := transl_cond_single c rd r1 r2 in
+      OK (insn :: if normal then k else Pxoriw rd rd Int.one :: k)
+  | Cnotcompfs c, f1 :: f2 :: nil =>
+      do r1 <- freg_of f1; do r2 <- freg_of f2;
+      let (insn, normal) := transl_cond_single c rd r1 r2 in
+     OK (insn :: if normal then Pxoriw rd rd Int.one :: k else k)
+  | _, _ =>
+      Error(msg "Asmgen.transl_cond_op")
+   end.
 
 (** Translation of the arithmetic operation [r <- op(args)].
   The corresponding instructions are prepended to [k]. *)
-
+  
 Definition transl_op
-             (op: operation) (args: list mreg) (res: mreg) (k: code) : Errors.res code :=
+              (op: operation) (args: list mreg) (res: mreg) (k: code) :=
   match op, args with
   | Omove, a1 :: nil =>
-      mk_mov (preg_of res) (preg_of a1) k
+      match preg_of res, preg_of a1 with
+      | IR r, IR a => OK (Pmv r a :: k)
+      | FR r, FR a => OK (Pfmv r a :: k)
+      |  _  ,  _   => Error(msg "Asmgen.Omove")
+      end
   | Ointconst n, nil =>
-      do r <- ireg_of res;
-      OK ((if Int.eq_dec n Int.zero then Pxorl_r r else Pmovl_ri r n) :: k)
+      do rd <- ireg_of res;
+      OK (loadimm32 rd n k)
   | Olongconst n, nil =>
-      do r <- ireg_of res;
-      OK ((if Int64.eq_dec n Int64.zero then Pxorq_r r else Pmovq_ri r n) :: k)
+      do rd <- ireg_of res;
+      OK (loadimm64 rd n k)
   | Ofloatconst f, nil =>
-      do r <- freg_of res;
-      OK ((if Float.eq_dec f Float.zero then Pxorpd_f r else Pmovsd_fi r f) :: k)
+      do rd <- freg_of res;
+      OK (if Float.eq_dec f Float.zero
+          then Pfcvtdw rd X0 :: k
+          else Ploadfi rd f :: k)
   | Osingleconst f, nil =>
-      do r <- freg_of res;
-      OK ((if Float32.eq_dec f Float32.zero then Pxorps_f r else Pmovss_fi r f) :: k)
-  | Oindirectsymbol id, nil =>
-      do r <- ireg_of res;
-      OK (Pmov_rs r id :: k)
+      do rd <- freg_of res;
+      OK (if Float32.eq_dec f Float32.zero
+          then Pfcvtsw rd X0 :: k
+          else Ploadsi rd f :: k)
+  | Oaddrsymbol s ofs, nil =>
+      do rd <- ireg_of res;
+      OK (if Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)
+          then Ploadsymbol rd s Ptrofs.zero :: addptrofs rd rd ofs k
+          else Ploadsymbol rd s ofs :: k)
+  | Oaddrstack n, nil =>
+      do rd <- ireg_of res;
+      OK (addptrofs rd SP n k)
+
   | Ocast8signed, a1 :: nil =>
-      do r1 <- ireg_of a1; do r <- ireg_of res; mk_intconv Pmovsb_rr r r1 k
-  | Ocast8unsigned, a1 :: nil =>
-      do r1 <- ireg_of a1; do r <- ireg_of res; mk_intconv Pmovzb_rr r r1 k
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      OK (Pslliw rd rs (Int.repr 24) :: Psraiw rd rd (Int.repr 24) :: k)
   | Ocast16signed, a1 :: nil =>
-      do r1 <- ireg_of a1; do r <- ireg_of res; OK (Pmovsw_rr r r1 :: k)
-  | Ocast16unsigned, a1 :: nil =>
-      do r1 <- ireg_of a1; do r <- ireg_of res; OK (Pmovzw_rr r r1 :: k)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      OK (Pslliw rd rs (Int.repr 16) :: Psraiw rd rd (Int.repr 16) :: k)
+  | Oadd, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Paddw rd rs1 rs2 :: k)
+  | Oaddimm n, a1 :: nil =>
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (addimm32 rd rs n k)
   | Oneg, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pnegl r :: k)
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (Psubw rd X0 rs :: k)
   | Osub, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Psubl_rr r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Psubw rd rs1 rs2 :: k)
   | Omul, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pimull_rr r r2 :: k)
-  | Omulimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pimull_ri r n :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pmulw rd rs1 rs2 :: k)
   | Omulhs, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq res DX);
-      do r2 <- ireg_of a2; OK (Pimull_r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pmulhw rd rs1 rs2 :: k)
   | Omulhu, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq res DX);
-      do r2 <- ireg_of a2; OK (Pmull_r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pmulhuw rd rs1 rs2 :: k)
   | Odiv, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq a2 CX);
-      assertion (mreg_eq res AX);
-      OK(Pcltd :: Pidivl RCX :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pdivw rd rs1 rs2 :: k)
   | Odivu, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq a2 CX);
-      assertion (mreg_eq res AX);
-      OK(Pxorl_r RDX :: Pdivl RCX :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pdivuw rd rs1 rs2 :: k)
   | Omod, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq a2 CX);
-      assertion (mreg_eq res DX);
-      OK(Pcltd :: Pidivl RCX :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Premw rd rs1 rs2 :: k)
   | Omodu, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq a2 CX);
-      assertion (mreg_eq res DX);
-      OK(Pxorl_r RDX :: Pdivl RCX :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Premuw rd rs1 rs2 :: k)
   | Oand, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pandl_rr r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pandw rd rs1 rs2 :: k)
   | Oandimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pandl_ri r n :: k)
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (andimm32 rd rs n k)
   | Oor, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Porl_rr r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Porw rd rs1 rs2 :: k)
   | Oorimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Porl_ri r n :: k)
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (orimm32 rd rs n k)
   | Oxor, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pxorl_rr r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pxorw rd rs1 rs2 :: k)
   | Oxorimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pxorl_ri r n :: k)
-  | Onot, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pnotl r :: k)
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (xorimm32 rd rs n k)
   | Oshl, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      assertion (mreg_eq a2 CX);
-      do r <- ireg_of res; OK (Psall_rcl r :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Psllw rd rs1 rs2 :: k)
   | Oshlimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Psall_ri r n :: k)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      OK (Pslliw rd rs n :: k)
   | Oshr, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      assertion (mreg_eq a2 CX);
-      do r <- ireg_of res; OK (Psarl_rcl r :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Psraw rd rs1 rs2 :: k)
   | Oshrimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Psarl_ri r n :: k)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      OK (Psraiw rd rs n :: k)
   | Oshru, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      assertion (mreg_eq a2 CX);
-      do r <- ireg_of res; OK (Pshrl_rcl r :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Psrlw rd rs1 rs2 :: k)
   | Oshruimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pshrl_ri r n :: k)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      OK (Psrliw rd rs n :: k)
   | Oshrximm n, a1 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq res AX);
-      mk_shrximm n k
-  | Ororimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Prorl_ri r n :: k)
-  | Oshldimm n, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pshld_ri r r2 n :: k)
-  | Olea addr, _ =>
-      do am <- transl_addressing addr args; do r <- ireg_of res;
-      OK (Pleal r (normalize_addrmode_32 am) :: k)
-(* 64-bit integer operations *)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+        OK (if Int.eq n Int.zero
+            then Pmv rd rs :: k
+            else if Int.eq n Int.one
+                 then Psrliw X31 rs (Int.repr 31) ::
+                      Paddw X31 rs X31 ::
+                      Psraiw rd X31 Int.one :: k
+                 else Psraiw X31 rs (Int.repr 31) ::
+                      Psrliw X31 X31 (Int.sub Int.iwordsize n) ::
+                      Paddw X31 rs X31 ::
+                      Psraiw rd X31 n :: k)  
+
+  (* [Omakelong], [Ohighlong]  should not occur *)
   | Olowlong, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pmovls_rr r :: k)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      OK (Pcvtl2w rd rs :: k)  
   | Ocast32signed, a1 :: nil =>
-      do r1 <- ireg_of a1; do r <- ireg_of res; OK (Pmovsl_rr r r1 :: k)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      assertion (ireg_eq rd rs);
+      OK (Pcvtw2l rd :: k)
   | Ocast32unsigned, a1 :: nil =>
-      do r1 <- ireg_of a1; do r <- ireg_of res; OK (Pmovzl_rr r r1 :: k)
-  | Onegl, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pnegq r :: k)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      assertion (ireg_eq rd rs);
+      OK (Pcvtw2l rd :: Psllil rd rd (Int.repr 32) :: Psrlil rd rd (Int.repr 32) :: k)
+  | Oaddl, a1 :: a2 :: nil =>
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Paddl rd rs1 rs2 :: k)
   | Oaddlimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Paddq_ri r n :: k)
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (addimm64 rd rs n k)
+  | Onegl, a1 :: nil =>
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (Psubl rd X0 rs :: k)
   | Osubl, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Psubq_rr r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Psubl rd rs1 rs2 :: k)
   | Omull, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pimulq_rr r r2 :: k)
-  | Omullimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pimulq_ri r n :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pmull rd rs1 rs2 :: k)
   | Omullhs, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq res DX);
-      do r2 <- ireg_of a2; OK (Pimulq_r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pmulhl rd rs1 rs2 :: k)
   | Omullhu, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq res DX);
-      do r2 <- ireg_of a2; OK (Pmulq_r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pmulhul rd rs1 rs2 :: k)
   | Odivl, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq a2 CX);
-      assertion (mreg_eq res AX);
-      OK(Pcqto :: Pidivq RCX :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pdivl rd rs1 rs2 :: k)
   | Odivlu, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq a2 CX);
-      assertion (mreg_eq res AX);
-      OK(Pxorq_r RDX :: Pdivq RCX :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pdivul rd rs1 rs2 :: k)
   | Omodl, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq a2 CX);
-      assertion (mreg_eq res DX);
-      OK(Pcqto :: Pidivq RCX :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Preml rd rs1 rs2 :: k)
   | Omodlu, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq a2 CX);
-      assertion (mreg_eq res DX);
-      OK(Pxorq_r RDX :: Pdivq RCX :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Premul rd rs1 rs2 :: k)
   | Oandl, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pandq_rr r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pandl rd rs1 rs2 :: k)
   | Oandlimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pandq_ri r n :: k)
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (andimm64 rd rs n k)
   | Oorl, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Porq_rr r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Porl rd rs1 rs2 :: k)
   | Oorlimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Porq_ri r n :: k)
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (orimm64 rd rs n k)
   | Oxorl, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pxorq_rr r r2 :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pxorl rd rs1 rs2 :: k)
   | Oxorlimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pxorq_ri r n :: k)
-  | Onotl, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pnotq r :: k)
+      do rd  <- ireg_of res; do rs <- ireg_of a1;
+      OK (xorimm64 rd rs n k)
   | Oshll, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      assertion (mreg_eq a2 CX);
-      do r <- ireg_of res; OK (Psalq_rcl r :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Pslll rd rs1 rs2 :: k)
   | Oshllimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Psalq_ri r n :: k)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      OK (Psllil rd rs n :: k)
   | Oshrl, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      assertion (mreg_eq a2 CX);
-      do r <- ireg_of res; OK (Psarq_rcl r :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Psral rd rs1 rs2 :: k)
   | Oshrlimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Psarq_ri r n :: k)
-  | Oshrxlimm n, a1 :: nil =>
-      assertion (mreg_eq a1 AX);
-      assertion (mreg_eq res AX);
-      mk_shrxlimm n k
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      OK (Psrail rd rs n :: k)
   | Oshrlu, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      assertion (mreg_eq a2 CX);
-      do r <- ireg_of res; OK (Pshrq_rcl r :: k)
+      do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+      OK (Psrll rd rs1 rs2 :: k)
   | Oshrluimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Pshrq_ri r n :: k)
-  | Ororlimm n, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; OK (Prorq_ri r n :: k)
-  | Oleal addr, _ =>
-      do am <- transl_addressing addr args; do r <- ireg_of res;
-      OK (match normalize_addrmode_64 am with
-          | (am', None)       => Pleaq r am' :: k
-          | (am', Some delta) => Pleaq r am' :: Paddq_ri r delta :: k
-          end)
-(**)
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+      OK (Psrlil rd rs n :: k)
+  | Oshrxlimm n, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- ireg_of a1;
+        OK (if Int.eq n Int.zero
+            then Pmv rd rs :: k
+            else if Int.eq n Int.one
+                 then Psrlil X31 rs (Int.repr 63) ::
+                      Paddl X31 rs X31 ::
+                      Psrail rd X31 Int.one :: k
+                 else Psrail X31 rs (Int.repr 63) ::
+                      Psrlil X31 X31 (Int.sub Int64.iwordsize' n) ::
+                      Paddl X31 rs X31 ::
+                      Psrail rd X31 n :: k)  
+
   | Onegf, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; OK (Pnegd r :: k)
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfnegd rd rs :: k)
   | Oabsf, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; OK (Pabsd r :: k)
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfabsd rd rs :: k)
   | Oaddf, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; do r2 <- freg_of a2; OK (Paddd_ff r r2 :: k)
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfaddd rd rs1 rs2 :: k)
   | Osubf, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; do r2 <- freg_of a2; OK (Psubd_ff r r2 :: k)
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfsubd rd rs1 rs2 :: k)
   | Omulf, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; do r2 <- freg_of a2; OK (Pmuld_ff r r2 :: k)
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfmuld rd rs1 rs2 :: k)
   | Odivf, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; do r2 <- freg_of a2; OK (Pdivd_ff r r2 :: k)
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfdivd rd rs1 rs2 :: k)
+
   | Onegfs, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; OK (Pnegs r :: k)
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfnegs rd rs :: k)
   | Oabsfs, a1 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; OK (Pabss r :: k)
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfabss rd rs :: k)
   | Oaddfs, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; do r2 <- freg_of a2; OK (Padds_ff r r2 :: k)
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfadds rd rs1 rs2 :: k)
   | Osubfs, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; do r2 <- freg_of a2; OK (Psubs_ff r r2 :: k)
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfsubs rd rs1 rs2 :: k)
   | Omulfs, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; do r2 <- freg_of a2; OK (Pmuls_ff r r2 :: k)
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfmuls rd rs1 rs2 :: k)
   | Odivfs, a1 :: a2 :: nil =>
-      assertion (mreg_eq a1 res);
-      do r <- freg_of res; do r2 <- freg_of a2; OK (Pdivs_ff r r2 :: k)
+      do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+      OK (Pfdivs rd rs1 rs2 :: k)
+
   | Osingleoffloat, a1 :: nil =>
-      do r <- freg_of res; do r1 <- freg_of a1; OK (Pcvtsd2ss_ff r r1 :: k)
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfcvtsd rd rs :: k)
   | Ofloatofsingle, a1 :: nil =>
-      do r <- freg_of res; do r1 <- freg_of a1; OK (Pcvtss2sd_ff r r1 :: k)
+      do rd <- freg_of res; do rs <- freg_of a1;
+      OK (Pfcvtds rd rs :: k)
+
   | Ointoffloat, a1 :: nil =>
-      do r <- ireg_of res; do r1 <- freg_of a1; OK (Pcvttsd2si_rf r r1 :: k)
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtwd rd rs :: k)
+  | Ointuoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtwud rd rs :: k)
   | Ofloatofint, a1 :: nil =>
-      do r <- freg_of res; do r1 <- ireg_of a1; OK (Pcvtsi2sd_fr r r1 :: k)
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfcvtdw rd rs :: k)
+  | Ofloatofintu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfcvtdwu rd rs :: k)
   | Ointofsingle, a1 :: nil =>
-      do r <- ireg_of res; do r1 <- freg_of a1; OK (Pcvttss2si_rf r r1 :: k)
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtws rd rs :: k)
+  | Ointuofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtwus rd rs :: k)
   | Osingleofint, a1 :: nil =>
-      do r <- freg_of res; do r1 <- ireg_of a1; OK (Pcvtsi2ss_fr r r1 :: k)
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfcvtsw rd rs :: k)
+  | Osingleofintu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfcvtswu rd rs :: k)
+
   | Olongoffloat, a1 :: nil =>
-      do r <- ireg_of res; do r1 <- freg_of a1; OK (Pcvttsd2sl_rf r r1 :: k)
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtld rd rs :: k)
+  | Olonguoffloat, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtlud rd rs :: k)
   | Ofloatoflong, a1 :: nil =>
-      do r <- freg_of res; do r1 <- ireg_of a1; OK (Pcvtsl2sd_fr r r1 :: k)
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfcvtdl rd rs :: k)
+  | Ofloatoflongu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfcvtdlu rd rs :: k)
   | Olongofsingle, a1 :: nil =>
-      do r <- ireg_of res; do r1 <- freg_of a1; OK (Pcvttss2sl_rf r r1 :: k)
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtls rd rs :: k)
+  | Olonguofsingle, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfcvtlus rd rs :: k)
   | Osingleoflong, a1 :: nil =>
-      do r <- freg_of res; do r1 <- ireg_of a1; OK (Pcvtsl2ss_fr r r1 :: k)
-  | Ocmp c, args =>
-      do r <- ireg_of res;
-      transl_cond c args (mk_setcc (testcond_for_condition c) r k)
-  | Osel c ty, a1 :: a2 :: args =>
-      assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2;
-      transl_sel c args r r2 k
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfcvtsl rd rs :: k)
+  | Osingleoflongu, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfcvtslu rd rs :: k)
+  | Ocmp cmp, _ =>
+      do rd <- ireg_of res;
+     transl_cond_op cmp rd args k
+
+  (* Instructions expanded in RTL *)
+  | OEseqw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Pseqw rd rs1' rs2' :: k)
+  | OEsnew optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psnew rd rs1' rs2' :: k)
+  | OEsequw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Pseqw rd rs1' rs2' :: k)
+  | OEsneuw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psnew rd rs1' rs2' :: k)
+  | OEsltw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psltw rd rs1' rs2' :: k)
+  | OEsltuw optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psltuw rd rs1' rs2' :: k)
+  | OEsltiw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Psltiw rd rs n :: k)
+  | OEsltiuw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Psltiuw rd rs n :: k)
+  | OExoriw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Pxoriw rd rs n :: k)
+  | OEluiw n, nil =>
+      do rd <- ireg_of res;
+      OK (Pluiw rd n :: k)
+  | OEaddiw optR n, nil =>
+      do rd <- ireg_of res;
+      let rs := get_oreg optR X0 in
+      OK (Paddiw rd rs n :: k)
+  | OEaddiw optR n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      let rs' := get_oreg optR rs in
+      OK (Paddiw rd rs' n :: k)
+  | OEandiw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Pandiw rd rs n :: k)
+  | OEoriw n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Poriw rd rs n :: k)
+  | OEseql optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Pseql rd rs1' rs2' :: k)
+  | OEsnel optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psnel rd rs1' rs2' :: k)
+  | OEsequl optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Pseql rd rs1' rs2' :: k)
+  | OEsneul optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psnel rd rs1' rs2' :: k)
+  | OEsltl optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psltl rd rs1' rs2' :: k)
+  | OEsltul optR, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do rs1 <- ireg_of a1;
+      do rs2 <- ireg_of a2;
+      let (rs1', rs2') := apply_bin_oreg_ireg0 optR rs1 rs2 in
+      OK (Psltul rd rs1' rs2' :: k)
+  | OEsltil n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Psltil rd rs n :: k)
+  | OEsltiul n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Psltiul rd rs n :: k)
+  | OExoril n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Pxoril rd rs n :: k)
+  | OEluil n, nil =>
+      do rd <- ireg_of res;
+      OK (Pluil rd n :: k)
+  | OEaddil optR n, nil =>
+      do rd <- ireg_of res;
+      let rs := get_oreg optR X0 in
+      OK (Paddil rd rs n :: k)
+  | OEaddil optR n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      let rs' := get_oreg optR rs in
+      OK (Paddil rd rs' n :: k)
+  | OEandil n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Pandil rd rs n :: k)
+  | OEoril n, a1 :: nil =>
+      do rd <- ireg_of res;
+      do rs <- ireg_of a1;
+      OK (Poril rd rs n :: k)
+  | OEloadli n, nil =>
+      do rd <- ireg_of res;
+      OK (Ploadli rd n :: k)
+  | OEfeqd, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfeqd rd r1 r2 :: k)
+  | OEfltd, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfltd rd r1 r2 :: k)
+  | OEfled, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfled rd r1 r2 :: k)
+  | OEfeqs, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfeqs rd r1 r2 :: k)
+  | OEflts, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pflts rd r1 r2 :: k)
+  | OEfles, f1 :: f2 :: nil =>
+      do rd <- ireg_of res;
+      do r1 <- freg_of f1;
+      do r2 <- freg_of f2;
+      OK (Pfles rd r1 r2 :: k)
+  | OEmayundef _, a1 :: a2 :: nil =>
+      do rd <- ireg_of res;
+      do r2 <- ireg_of a2;
+      if ireg_eq rd r2 then
+        OK (Pnop :: k)
+      else
+        OK (Pmv rd r2 :: k)
+
+  | Obits_of_single, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfmvxs rd rs :: k)
+  | Obits_of_float, a1 :: nil =>
+      do rd <- ireg_of res; do rs <- freg_of a1;
+      OK (Pfmvxd rd rs :: k)
+  | Osingle_of_bits, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfmvsx rd rs :: k)
+  | Ofloat_of_bits, a1 :: nil =>
+      do rd <- freg_of res; do rs <- ireg_of a1;
+      OK (Pfmvdx rd rs :: k)
+  | Oselectl, b::t::f::nil =>
+      do rd <- ireg_of res;
+      do rb <- ireg_of b;
+      do rt <- ireg_of t;
+      do rf <- ireg_of f;
+      OK (Pselectl rd rb rt rf :: k)
   | _, _ =>
       Error(msg "Asmgen.transl_op")
   end.
 
-(** Translation of memory loads and stores *)
+(** Accessing data in the stack frame. *)
+
+Definition indexed_memory_access
+        (mk_instr: ireg -> offset -> instruction)
+        (base: ireg) (ofs: ptrofs) (k: code) :=
+  if Archi.ptr64 then
+    match make_immed64 (Ptrofs.to_int64 ofs) with
+    | Imm64_single imm =>
+        mk_instr base (Ofsimm (Ptrofs.of_int64 imm)) :: k
+    | Imm64_pair hi lo =>
+        Pluil X31 hi :: Paddl X31 base X31 :: mk_instr X31 (Ofsimm (Ptrofs.of_int64 lo)) :: k
+    | Imm64_large imm =>
+        Ploadli X31 imm :: Paddl X31 base X31 :: mk_instr X31 (Ofsimm Ptrofs.zero) :: k
+    end
+  else
+    match make_immed32 (Ptrofs.to_int ofs) with
+    | Imm32_single imm =>
+        mk_instr base (Ofsimm (Ptrofs.of_int imm)) :: k
+    | Imm32_pair hi lo =>
+        Pluiw X31 hi :: Paddw X31 base X31 :: mk_instr X31 (Ofsimm (Ptrofs.of_int lo)) :: k
+    end.
+
+Definition loadind (base: ireg) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: code) :=
+  match ty, preg_of dst with
+  | Tint,    IR rd => OK (indexed_memory_access (Plw rd) base ofs k)
+  | Tlong,   IR rd => OK (indexed_memory_access (Pld rd) base ofs k)
+  | Tsingle, FR rd => OK (indexed_memory_access (Pfls rd) base ofs k)
+  | Tfloat,  FR rd => OK (indexed_memory_access (Pfld rd) base ofs k)
+  | Tany32,  IR rd => OK (indexed_memory_access (Plw_a rd) base ofs k)
+  | Tany64,  IR rd => OK (indexed_memory_access (Pld_a rd) base ofs k)
+  | Tany64,  FR rd => OK (indexed_memory_access (Pfld_a rd) base ofs k)
+  | _, _           => Error (msg "Asmgen.loadind")
+  end.
+
+Definition storeind (src: mreg) (base: ireg) (ofs: ptrofs) (ty: typ) (k: code) :=
+  match ty, preg_of src with
+  | Tint,    IR rd => OK (indexed_memory_access (Psw rd) base ofs k)
+  | Tlong,   IR rd => OK (indexed_memory_access (Psd rd) base ofs k)
+  | Tsingle, FR rd => OK (indexed_memory_access (Pfss rd) base ofs k)
+  | Tfloat,  FR rd => OK (indexed_memory_access (Pfsd rd) base ofs k)
+  | Tany32,  IR rd => OK (indexed_memory_access (Psw_a rd) base ofs k)
+  | Tany64,  IR rd => OK (indexed_memory_access (Psd_a rd) base ofs k)
+  | Tany64,  FR rd => OK (indexed_memory_access (Pfsd_a rd) base ofs k)
+  | _, _           => Error (msg "Asmgen.storeind")
+  end.
+
+Definition loadind_ptr (base: ireg) (ofs: ptrofs) (dst: ireg) (k: code) :=
+  indexed_memory_access (if Archi.ptr64 then Pld dst else Plw dst) base ofs k.
 
-Definition transl_load
-           (trap : trapping_mode)
-           (chunk: memory_chunk)
-                       (addr: addressing) (args: list mreg) (dest: mreg)
-                       (k: code) : res code :=
+Definition storeind_ptr (src: ireg) (base: ireg) (ofs: ptrofs) (k: code) :=
+  indexed_memory_access (if Archi.ptr64 then Psd src else Psw src) base ofs k.
+
+(** Translation of memory accesses: loads, and stores. *)
+
+Definition transl_memory_access
+     (mk_instr: ireg -> offset -> instruction)
+     (addr: addressing) (args: list mreg) (k: code) :=
+  match addr, args with
+  | Aindexed ofs, a1 :: nil =>
+      do rs <- ireg_of a1;
+      OK (indexed_memory_access mk_instr rs ofs k)
+  | Aglobal id ofs, nil =>
+      OK (Ploadsymbol_high X31 id ofs :: mk_instr X31 (Ofslow id ofs) :: k)
+  | Ainstack ofs, nil =>
+      OK (indexed_memory_access mk_instr SP ofs k)
+  | _, _ =>
+      Error(msg "Asmgen.transl_memory_access")
+  end.
+
+Definition transl_load (trap : trapping_mode)
+           (chunk: memory_chunk) (addr: addressing)
+           (args: list mreg) (dst: mreg) (k: code) :=
   match trap with
-  | NOTRAP => Error (msg "Asmgen.transl_load x86 does not support non trapping loads")
+  | NOTRAP => Error (msg "Asmgen.transl_load non-trapping loads unsupported on Arm")
   | TRAP =>
-  do am <- transl_addressing addr args;
-  match chunk with
-  | Mint8unsigned =>
-      do r <- ireg_of dest; OK(Pmovzb_rm r am :: k)
+    match chunk with
   | Mint8signed =>
-      do r <- ireg_of dest; OK(Pmovsb_rm r am :: k)
-  | Mint16unsigned =>
-      do r <- ireg_of dest; OK(Pmovzw_rm r am :: k)
+      do r <- ireg_of dst;
+      transl_memory_access (Plb r)  addr args k
+  | Mint8unsigned =>
+      do r <- ireg_of dst;
+      transl_memory_access (Plbu r) addr args k
   | Mint16signed =>
-      do r <- ireg_of dest; OK(Pmovsw_rm r am :: k)
+      do r <- ireg_of dst;
+      transl_memory_access (Plh r)  addr args k
+  | Mint16unsigned =>
+      do r <- ireg_of dst;
+      transl_memory_access (Plhu r) addr args k
   | Mint32 =>
-      do r <- ireg_of dest; OK(Pmovl_rm r am :: k)
+      do r <- ireg_of dst;
+      transl_memory_access (Plw r)  addr args k
   | Mint64 =>
-      do r <- ireg_of dest; OK(Pmovq_rm r am :: k)
+      do r <- ireg_of dst;
+      transl_memory_access (Pld r)  addr args k
   | Mfloat32 =>
-      do r <- freg_of dest; OK(Pmovss_fm r am :: k)
+      do r <- freg_of dst;
+      transl_memory_access (Pfls r) addr args k
   | Mfloat64 =>
-      do r <- freg_of dest; OK(Pmovsd_fm r am :: k)
+      do r <- freg_of dst;
+      transl_memory_access (Pfld r) addr args k
   | _ =>
       Error (msg "Asmgen.transl_load")
-  end
+    end
   end.
 
-Definition transl_store (chunk: memory_chunk)
-                        (addr: addressing) (args: list mreg) (src: mreg)
-                        (k: code) : res code :=
-  do am <- transl_addressing addr args;
+Definition transl_store (chunk: memory_chunk) (addr: addressing)
+           (args: list mreg) (src: mreg) (k: code) :=
   match chunk with
-  | Mint8unsigned | Mint8signed =>
-      do r <- ireg_of src; mk_storebyte am r k
-  | Mint16unsigned | Mint16signed =>
-      do r <- ireg_of src; OK(Pmovw_mr am r :: k)
+  | Mint8signed | Mint8unsigned =>
+      do r <- ireg_of src;
+      transl_memory_access (Psb r)  addr args k
+  | Mint16signed | Mint16unsigned =>
+      do r <- ireg_of src;
+      transl_memory_access (Psh r)  addr args k
   | Mint32 =>
-      do r <- ireg_of src; OK(Pmovl_mr am r :: k)
+      do r <- ireg_of src;
+      transl_memory_access (Psw r)  addr args k
   | Mint64 =>
-      do r <- ireg_of src; OK(Pmovq_mr am r :: k)
+      do r <- ireg_of src;
+      transl_memory_access (Psd r)  addr args k
   | Mfloat32 =>
-      do r <- freg_of src; OK(Pmovss_mf am r :: k)
+      do r <- freg_of src;
+      transl_memory_access (Pfss r) addr args k
   | Mfloat64 =>
-      do r <- freg_of src; OK(Pmovsd_mf am r :: k)
+      do r <- freg_of src;
+      transl_memory_access (Pfsd r) addr args k
   | _ =>
       Error (msg "Asmgen.transl_store")
   end.
 
+(** Function epilogue *)
+
+Definition make_epilogue (f: Mach.function) (k: code) :=
+  loadind_ptr SP f.(fn_retaddr_ofs) RA
+    (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k).
+
 (** Translation of a Mach instruction. *)
 
 Definition transl_instr (f: Mach.function) (i: Mach.instruction)
-                        (ax_is_parent: bool) (k: code) :=
+                        (ep: bool) (k: code) :=
   match i with
   | Mgetstack ofs ty dst =>
-      loadind RSP ofs ty dst k
+      loadind SP ofs ty dst k
   | Msetstack src ofs ty =>
-      storeind src RSP ofs ty k
+      storeind src SP ofs ty k
   | Mgetparam ofs ty dst =>
-      if ax_is_parent then
-        loadind RAX ofs ty dst k
-      else
-        (do k1 <- loadind RAX ofs ty dst k;
-         loadind RSP f.(fn_link_ofs) Tptr AX k1)
+      (* load via the frame pointer if it is valid *)
+      do c <- loadind X30 ofs ty dst k;
+      OK (if ep then c
+                else loadind_ptr SP f.(fn_link_ofs) X30 c)
   | Mop op args res =>
       transl_op op args res k
   | Mload trap chunk addr args dst =>
       transl_load trap chunk addr args dst k
   | Mstore chunk addr args src =>
       transl_store chunk addr args src k
-  | Mcall sig (inl reg) =>
-      do r <- ireg_of reg; OK (Pcall_r r sig :: k)
+  | Mcall sig (inl r) =>
+      do r1 <- ireg_of r; OK (Pjal_r r1 sig :: k)
   | Mcall sig (inr symb) =>
-      OK (Pcall_s symb sig :: k)
-  | Mtailcall sig (inl reg) =>
-      do r <- ireg_of reg;
-      OK (Pfreeframe f.(fn_stacksize) f.(fn_retaddr_ofs) f.(fn_link_ofs) ::
-          Pjmp_r r sig :: k)
+      OK (Pjal_s symb sig :: k)
+  | Mtailcall sig (inl r) =>
+      do r1 <- ireg_of r;
+      OK (make_epilogue f (Pj_r r1 sig :: k))
   | Mtailcall sig (inr symb) =>
-      OK (Pfreeframe f.(fn_stacksize) f.(fn_retaddr_ofs) f.(fn_link_ofs) ::
-          Pjmp_s symb sig :: k)
+      OK (make_epilogue f (Pj_s symb sig :: k))
+  | Mbuiltin ef args res =>
+      OK (Pbuiltin ef (List.map (map_builtin_arg preg_of) args) (map_builtin_res preg_of res) :: k)
   | Mlabel lbl =>
-      OK(Plabel lbl :: k)
+      OK (Plabel lbl :: k)
   | Mgoto lbl =>
-      OK(Pjmp_l lbl :: k)
+      OK (Pj_l lbl :: k)
   | Mcond cond args lbl =>
-      transl_cond cond args (mk_jcc (testcond_for_condition cond) lbl k)
+      transl_cbranch cond args lbl k
   | Mjumptable arg tbl =>
-      do r <- ireg_of arg; OK (Pjmptbl r tbl :: k)
+      do r <- ireg_of arg;
+      OK (Pbtbl r tbl :: k)
   | Mreturn =>
-      OK (Pfreeframe f.(fn_stacksize) f.(fn_retaddr_ofs) f.(fn_link_ofs) ::
-          Pret :: k)
-  | Mbuiltin ef args res =>
-      OK (Pbuiltin ef (List.map (map_builtin_arg preg_of) args) (map_builtin_res preg_of res) :: k)
+      OK (make_epilogue f (Pj_r RA f.(Mach.fn_sig) :: k))
   end.
 
 (** Translation of a code sequence *)
@@ -740,35 +1173,36 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
 Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
   match i with
   | Msetstack src ofs ty => before
-  | Mgetparam ofs ty dst => negb (mreg_eq dst AX)
+  | Mgetparam ofs ty dst => negb (mreg_eq dst R30)
+  | Mop op args res => before && negb (mreg_eq res R30)
   | _ => false
   end.
 
 (** This is the naive definition that we no longer use because it
   is not tail-recursive.  It is kept as specification. *)
 
-Fixpoint transl_code (f: Mach.function) (il: list Mach.instruction) (axp: bool) :=
+Fixpoint transl_code (f: Mach.function) (il: list Mach.instruction) (it1p: bool) :=
   match il with
   | nil => OK nil
   | i1 :: il' =>
-      do k <- transl_code f il' (it1_is_parent axp i1);
-      transl_instr f i1 axp k
+      do k <- transl_code f il' (it1_is_parent it1p i1);
+      transl_instr f i1 it1p k
   end.
 
 (** This is an equivalent definition in continuation-passing style
   that runs in constant stack space. *)
 
 Fixpoint transl_code_rec (f: Mach.function) (il: list Mach.instruction)
-                         (axp: bool) (k: code -> res code) :=
+                         (it1p: bool) (k: code -> res code) :=
   match il with
   | nil => k nil
   | i1 :: il' =>
-      transl_code_rec f il' (it1_is_parent axp i1)
-        (fun c1 => do c2 <- transl_instr f i1 axp c1; k c2)
+      transl_code_rec f il' (it1_is_parent it1p i1)
+        (fun c1 => do c2 <- transl_instr f i1 it1p c1; k c2)
   end.
 
-Definition transl_code' (f: Mach.function) (il: list Mach.instruction) (axp: bool) :=
-  transl_code_rec f il axp (fun c => OK c).
+Definition transl_code' (f: Mach.function) (il: list Mach.instruction) (it1p: bool) :=
+  transl_code_rec f il it1p (fun c => OK c).
 
 (** Translation of a whole function.  Note that we must check
   that the generated code contains less than [2^32] instructions,
@@ -778,7 +1212,8 @@ Definition transl_code' (f: Mach.function) (il: list Mach.instruction) (axp: boo
 Definition transl_function (f: Mach.function) :=
   do c <- transl_code' f f.(Mach.fn_code) true;
   OK (mkfunction f.(Mach.fn_sig)
-        (Pallocframe f.(fn_stacksize) f.(fn_retaddr_ofs) f.(fn_link_ofs) :: c)).
+        (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
+         storeind_ptr RA SP f.(fn_retaddr_ofs) c)).
 
 Definition transf_function (f: Mach.function) : res Asm.function :=
   do tf <- transl_function f;
@@ -791,4 +1226,3 @@ Definition transf_fundef (f: Mach.fundef) : res Asm.fundef :=
 
 Definition transf_program (p: Mach.program) : res Asm.program :=
   transform_partial_program transf_fundef p.
-
diff --git a/verilog/Asmgenproof.v b/verilog/Asmgenproof.v
index 8c28fb1b..e59c4535 100644
--- a/verilog/Asmgenproof.v
+++ b/verilog/Asmgenproof.v
@@ -2,7 +2,7 @@
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
-(*                  Xavier Leroy, INRIA Paris                          *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
@@ -10,7 +10,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Correctness proof for x86-64 generation: main proof. *)
+(** Correctness proof for RISC-V generation: main proof. *)
 
 Require Import Coqlib Errors.
 Require Import Integers Floats AST Linking.
@@ -64,9 +64,9 @@ Qed.
 
 Lemma transf_function_no_overflow:
   forall f tf,
-  transf_function f = OK tf -> list_length_z (fn_code tf) <= Ptrofs.max_unsigned.
+  transf_function f = OK tf -> list_length_z tf.(fn_code) <= Ptrofs.max_unsigned.
 Proof.
-  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x))); monadInv EQ0.
+  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0.
   lia.
 Qed.
 
@@ -111,143 +111,269 @@ Qed.
 >>
   The proof demands many boring lemmas showing that Asm constructor
   functions do not introduce new labels.
-
-  In passing, we also prove a "is tail" property of the generated Asm code.
 *)
 
 Section TRANSL_LABEL.
 
-Remark mk_mov_label:
-  forall rd rs k c, mk_mov rd rs k = OK c -> tail_nolabel k c.
+Remark loadimm32_label:
+  forall r n k, tail_nolabel k (loadimm32 r n k).
 Proof.
-  unfold mk_mov; intros.
-  destruct rd; try discriminate; destruct rs; TailNoLabel.
+  intros; unfold loadimm32. destruct (make_immed32 n); TailNoLabel.
+  unfold load_hilo32. destruct (Int.eq lo Int.zero); TailNoLabel.
 Qed.
-Hint Resolve mk_mov_label: labels.
+Hint Resolve loadimm32_label: labels.
 
-Remark mk_shrximm_label:
-  forall n k c, mk_shrximm n k = OK c -> tail_nolabel k c.
+Remark opimm32_label:
+  forall op opimm r1 r2 n k,
+  (forall r1 r2 r3, nolabel (op r1 r2 r3)) ->
+  (forall r1 r2 n, nolabel (opimm r1 r2 n)) ->
+  tail_nolabel k (opimm32 op opimm r1 r2 n k).
 Proof.
-  intros. monadInv H; TailNoLabel.
+  intros; unfold opimm32. destruct (make_immed32 n); TailNoLabel.
+  unfold load_hilo32. destruct (Int.eq lo Int.zero); TailNoLabel.
 Qed.
-Hint Resolve mk_shrximm_label: labels.
+Hint Resolve opimm32_label: labels.
 
-Remark mk_shrxlimm_label:
-  forall n k c, mk_shrxlimm n k = OK c -> tail_nolabel k c.
+Remark loadimm64_label:
+  forall r n k, tail_nolabel k (loadimm64 r n k).
 Proof.
-  intros. monadInv H. destruct (Int.eq n Int.zero); TailNoLabel.
+  intros; unfold loadimm64. destruct (make_immed64 n); TailNoLabel.
+  unfold load_hilo64. destruct (Int64.eq lo Int64.zero); TailNoLabel.
 Qed.
-Hint Resolve mk_shrxlimm_label: labels.
+Hint Resolve loadimm64_label: labels.
 
-Remark mk_intconv_label:
-  forall f r1 r2 k c, mk_intconv f r1 r2 k = OK c ->
-  (forall r r', nolabel (f r r')) ->
-  tail_nolabel k c.
+Remark opimm64_label:
+  forall op opimm r1 r2 n k,
+  (forall r1 r2 r3, nolabel (op r1 r2 r3)) ->
+  (forall r1 r2 n, nolabel (opimm r1 r2 n)) ->
+  tail_nolabel k (opimm64 op opimm r1 r2 n k).
 Proof.
-  unfold mk_intconv; intros. TailNoLabel.
+  intros; unfold opimm64. destruct (make_immed64 n); TailNoLabel.
+  unfold load_hilo64. destruct (Int64.eq lo Int64.zero); TailNoLabel.
 Qed.
-Hint Resolve mk_intconv_label: labels.
+Hint Resolve opimm64_label: labels.
 
-Remark mk_storebyte_label:
-  forall addr r k c, mk_storebyte addr r k = OK c -> tail_nolabel k c.
+Remark addptrofs_label:
+  forall r1 r2 n k, tail_nolabel k (addptrofs r1 r2 n k).
 Proof.
-  unfold mk_storebyte; intros. TailNoLabel.
+  unfold addptrofs; intros. destruct (Ptrofs.eq_dec n Ptrofs.zero). TailNoLabel.
+  destruct Archi.ptr64. apply opimm64_label; TailNoLabel. apply opimm32_label; TailNoLabel.
 Qed.
-Hint Resolve mk_storebyte_label: labels.
+Hint Resolve addptrofs_label: labels.
 
-Remark loadind_label:
-  forall base ofs ty dst k c,
-  loadind base ofs ty dst k = OK c ->
-  tail_nolabel k c.
+Remark transl_cond_float_nolabel:
+  forall c r1 r2 r3 insn normal,
+  transl_cond_float c r1 r2 r3 = (insn, normal) -> nolabel insn.
 Proof.
-  unfold loadind; intros. destruct ty; try discriminate; destruct (preg_of dst); TailNoLabel.
+  unfold transl_cond_float; intros. destruct c; inv H; exact I.
 Qed.
 
-Remark storeind_label:
-  forall base ofs ty src k c,
-  storeind src base ofs ty k = OK c ->
-  tail_nolabel k c.
+Remark transl_cond_single_nolabel:
+  forall c r1 r2 r3 insn normal,
+  transl_cond_single c r1 r2 r3 = (insn, normal) -> nolabel insn.
 Proof.
-  unfold storeind; intros. destruct ty; try discriminate; destruct (preg_of src); TailNoLabel.
+  unfold transl_cond_single; intros. destruct c; inv H; exact I.
+   Qed.
+
+Remark transl_cbranch_label:
+  forall cond args lbl k c,
+  transl_cbranch cond args lbl k = OK c -> tail_nolabel k c.
+Proof.
+  intros. unfold transl_cbranch in H; destruct cond; TailNoLabel.
+- destruct c0; simpl; TailNoLabel.
+- destruct c0; simpl; TailNoLabel.
+- destruct (Int.eq n Int.zero).
+  destruct c0; simpl; TailNoLabel.
+  apply tail_nolabel_trans with (transl_cbranch_int32s c0 x X31 lbl :: k).
+  auto with labels. destruct c0; simpl; TailNoLabel.
+- destruct (Int.eq n Int.zero).
+  destruct c0; simpl; TailNoLabel.
+  apply tail_nolabel_trans with (transl_cbranch_int32u c0 x X31 lbl :: k).
+  auto with labels. destruct c0; simpl; TailNoLabel.
+- destruct c0; simpl; TailNoLabel.
+- destruct c0; simpl; TailNoLabel.
+- destruct (Int64.eq n Int64.zero).
+  destruct c0; simpl; TailNoLabel.
+  apply tail_nolabel_trans with (transl_cbranch_int64s c0 x X31 lbl :: k).
+  auto with labels. destruct c0; simpl; TailNoLabel.
+- destruct (Int64.eq n Int64.zero).
+  destruct c0; simpl; TailNoLabel.
+  apply tail_nolabel_trans with (transl_cbranch_int64u c0 x X31 lbl :: k).
+  auto with labels. destruct c0; simpl; TailNoLabel.
+- destruct (transl_cond_float c0 X31 x x0) as [insn normal] eqn:F; inv EQ2.
+  apply tail_nolabel_cons. eapply transl_cond_float_nolabel; eauto. 
+  destruct normal; TailNoLabel.
+- destruct (transl_cond_float c0 X31 x x0) as [insn normal] eqn:F; inv EQ2.
+  apply tail_nolabel_cons. eapply transl_cond_float_nolabel; eauto. 
+  destruct normal; TailNoLabel.
+- destruct (transl_cond_single c0 X31 x x0) as [insn normal] eqn:F; inv EQ2.
+  apply tail_nolabel_cons. eapply transl_cond_single_nolabel; eauto. 
+  destruct normal; TailNoLabel.
+- destruct (transl_cond_single c0 X31 x x0) as [insn normal] eqn:F; inv EQ2.
+  apply tail_nolabel_cons. eapply transl_cond_single_nolabel; eauto. 
+     destruct normal; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
 Qed.
 
-Remark mk_setcc_base_label:
-  forall xc rd k,
-  tail_nolabel k (mk_setcc_base xc rd k).
+Remark transl_cond_op_label:
+  forall cond args r k c,
+  transl_cond_op cond r args k = OK c -> tail_nolabel k c.
 Proof.
-  intros. destruct xc; simpl; destruct (ireg_eq rd RAX); TailNoLabel.
+  intros. unfold transl_cond_op in H; destruct cond; TailNoLabel.
+- destruct c0; simpl; TailNoLabel.
+- destruct c0; simpl; TailNoLabel. 
+- unfold transl_condimm_int32s.
+  destruct (Int.eq n Int.zero).
++ destruct c0; simpl; TailNoLabel.
++ destruct c0; simpl.
+* eapply tail_nolabel_trans; [apply opimm32_label; intros; exact I | TailNoLabel].
+* eapply tail_nolabel_trans; [apply opimm32_label; intros; exact I | TailNoLabel].
+* apply opimm32_label; intros; exact I.
+* destruct (Int.eq n (Int.repr Int.max_signed)). apply loadimm32_label. apply opimm32_label; intros; exact I.
+* eapply tail_nolabel_trans. apply loadimm32_label. TailNoLabel.
+* eapply tail_nolabel_trans. apply loadimm32_label. TailNoLabel.
+- unfold transl_condimm_int32u.
+  destruct (Int.eq n Int.zero).
++ destruct c0; simpl; TailNoLabel.
++ destruct c0; simpl; 
+  try (eapply tail_nolabel_trans; [apply loadimm32_label | TailNoLabel]).
+  apply opimm32_label; intros; exact I.
+- destruct c0; simpl; TailNoLabel.
+     - destruct c0; simpl; TailNoLabel.
+- unfold transl_condimm_int64s.
+  destruct (Int64.eq n Int64.zero).
++ destruct c0; simpl; TailNoLabel.
++ destruct c0; simpl.
+* eapply tail_nolabel_trans; [apply opimm64_label; intros; exact I | TailNoLabel].
+* eapply tail_nolabel_trans; [apply opimm64_label; intros; exact I | TailNoLabel].
+* apply opimm64_label; intros; exact I.
+* destruct (Int64.eq n (Int64.repr Int64.max_signed)). apply loadimm32_label. apply opimm64_label; intros; exact I.
+* eapply tail_nolabel_trans. apply loadimm64_label. TailNoLabel.
+* eapply tail_nolabel_trans. apply loadimm64_label. TailNoLabel.
+- unfold transl_condimm_int64u.
+  destruct (Int64.eq n Int64.zero).
++ destruct c0; simpl; TailNoLabel.
++ destruct c0; simpl; 
+  try (eapply tail_nolabel_trans; [apply loadimm64_label | TailNoLabel]).
+     apply opimm64_label; intros; exact I.
+- destruct (transl_cond_float c0 r x x0) as [insn normal] eqn:F; inv EQ2.
+  apply tail_nolabel_cons. eapply transl_cond_float_nolabel; eauto. 
+  destruct normal; TailNoLabel.
+- destruct (transl_cond_float c0 r x x0) as [insn normal] eqn:F; inv EQ2.
+  apply tail_nolabel_cons. eapply transl_cond_float_nolabel; eauto. 
+  destruct normal; TailNoLabel.
+- destruct (transl_cond_single c0 r x x0) as [insn normal] eqn:F; inv EQ2.
+  apply tail_nolabel_cons. eapply transl_cond_single_nolabel; eauto. 
+  destruct normal; TailNoLabel.
+- destruct (transl_cond_single c0 r x x0) as [insn normal] eqn:F; inv EQ2.
+  apply tail_nolabel_cons. eapply transl_cond_single_nolabel; eauto. 
+  destruct normal; TailNoLabel.
+   Qed.
+
+Remark transl_op_label:
+  forall op args r k c,
+  transl_op op args r k = OK c -> tail_nolabel k c.
+Proof.
+Opaque Int.eq.
+  unfold transl_op; intros; destruct op; TailNoLabel.
+- destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel.
+- destruct (Float.eq_dec n Float.zero); TailNoLabel.
+- destruct (Float32.eq_dec n Float32.zero); TailNoLabel.
+- destruct (Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)).
++ eapply tail_nolabel_trans; [|apply addptrofs_label]. TailNoLabel.
++ TailNoLabel. 
+- apply opimm32_label; intros; exact I.
+- apply opimm32_label; intros; exact I.
+- apply opimm32_label; intros; exact I.
+- apply opimm32_label; intros; exact I.
+- destruct (Int.eq n Int.zero); try destruct (Int.eq n Int.one); TailNoLabel.
+- apply opimm64_label; intros; exact I.
+- apply opimm64_label; intros; exact I.
+- apply opimm64_label; intros; exact I.
+- apply opimm64_label; intros; exact I.
+- destruct (Int.eq n Int.zero); try destruct (Int.eq n Int.one); TailNoLabel.
+- eapply transl_cond_op_label; eauto.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
+- destruct optR as [[]|]; simpl in *; TailNoLabel.
 Qed.
 
-Remark mk_setcc_label:
-  forall xc rd k,
-  tail_nolabel k (mk_setcc xc rd k).
+Remark indexed_memory_access_label:
+  forall (mk_instr: ireg -> offset -> instruction) base ofs k,
+  (forall r o, nolabel (mk_instr r o)) ->
+  tail_nolabel k (indexed_memory_access mk_instr base ofs k).
 Proof.
-  intros. unfold mk_setcc. destruct (Archi.ptr64 || low_ireg rd).
-  apply mk_setcc_base_label.
-  eapply tail_nolabel_trans. apply mk_setcc_base_label. TailNoLabel.
+  unfold indexed_memory_access; intros. 
+  destruct Archi.ptr64.
+  destruct (make_immed64 (Ptrofs.to_int64 ofs)); TailNoLabel.
+  destruct (make_immed32 (Ptrofs.to_int ofs)); TailNoLabel.
 Qed.
 
-Remark mk_jcc_label:
-  forall xc lbl' k,
-  tail_nolabel k (mk_jcc xc lbl' k).
+Remark loadind_label:
+  forall base ofs ty dst k c,
+  loadind base ofs ty dst k = OK c -> tail_nolabel k c.
 Proof.
-  intros. destruct xc; simpl; TailNoLabel.
+  unfold loadind; intros.
+  destruct ty, (preg_of dst); inv H; apply indexed_memory_access_label; intros; exact I.
 Qed.
 
-Remark mk_sel_label:
-  forall xc rd r2 k c,
-  mk_sel xc rd r2 k = OK c ->
-  tail_nolabel k c.
+Remark storeind_label:
+  forall src base ofs ty k c,
+  storeind src base ofs ty k = OK c -> tail_nolabel k c.
 Proof.
-  unfold mk_sel; intros; destruct xc; inv H; TailNoLabel.
+  unfold storeind; intros.
+  destruct ty, (preg_of src); inv H; apply indexed_memory_access_label; intros; exact I.
 Qed.
 
-Remark transl_cond_label:
-  forall cond args k c,
-  transl_cond cond args k = OK c ->
-  tail_nolabel k c.
+Remark loadind_ptr_label:
+  forall base ofs dst k, tail_nolabel k (loadind_ptr base ofs dst k).
 Proof.
-  unfold transl_cond; intros.
-  destruct cond; TailNoLabel.
-  destruct (Int.eq_dec n Int.zero); TailNoLabel.
-  destruct (Int64.eq_dec n Int64.zero); TailNoLabel.
-  destruct c0; simpl; TailNoLabel.
-  destruct c0; simpl; TailNoLabel.
-  destruct c0; simpl; TailNoLabel.
-  destruct c0; simpl; TailNoLabel.
+  intros. apply indexed_memory_access_label. intros; destruct Archi.ptr64; exact I.
 Qed.
 
-Remark transl_op_label:
-  forall op args r k c,
-  transl_op op args r k = OK c ->
-  tail_nolabel k c.
+Remark storeind_ptr_label:
+  forall src base ofs k, tail_nolabel k (storeind_ptr src base ofs k).
 Proof.
-  unfold transl_op; intros. destruct op; TailNoLabel.
-  destruct (Int.eq_dec n Int.zero); TailNoLabel.
-  destruct (Int64.eq_dec n Int64.zero); TailNoLabel.
-  destruct (Float.eq_dec n Float.zero); TailNoLabel.
-  destruct (Float32.eq_dec n Float32.zero); TailNoLabel.
-  destruct (normalize_addrmode_64 x) as [am' [delta|]]; TailNoLabel.
-  eapply tail_nolabel_trans. eapply transl_cond_label; eauto. eapply mk_setcc_label.
-  unfold transl_sel in EQ2. destruct (ireg_eq x x0); monadInv EQ2.
-  TailNoLabel.
-  eapply tail_nolabel_trans. eapply transl_cond_label; eauto. eapply mk_sel_label; eauto.
-Qed.
-
-Remark transl_load_label:
-  forall trap chunk addr args dest k c,
-  transl_load trap chunk addr args dest k = OK c ->
+  intros. apply indexed_memory_access_label. intros; destruct Archi.ptr64; exact I.
+Qed.
+
+Remark transl_memory_access_label:
+  forall (mk_instr: ireg -> offset -> instruction) addr args k c,
+  (forall r o, nolabel (mk_instr r o)) ->
+  transl_memory_access mk_instr addr args k = OK c ->
   tail_nolabel k c.
 Proof.
-  intros. destruct trap; try discriminate. monadInv H. destruct chunk; TailNoLabel.
+  unfold transl_memory_access; intros; destruct addr; TailNoLabel; apply indexed_memory_access_label; auto. 
 Qed.
 
-Remark transl_store_label:
-  forall chunk addr args src k c,
-  transl_store chunk addr args src k = OK c ->
-  tail_nolabel k c.
+Remark make_epilogue_label:
+  forall f k, tail_nolabel k (make_epilogue f k).
 Proof.
-  intros. monadInv H. destruct chunk; TailNoLabel.
+  unfold make_epilogue; intros. eapply tail_nolabel_trans. apply loadind_ptr_label. TailNoLabel.
 Qed.
 
 Lemma transl_instr_label:
@@ -255,18 +381,18 @@ Lemma transl_instr_label:
   transl_instr f i ep k = OK c ->
   match i with Mlabel lbl => c = Plabel lbl :: k | _ => tail_nolabel k c end.
 Proof.
-Opaque loadind.
   unfold transl_instr; intros; destruct i; TailNoLabel.
-  eapply loadind_label; eauto.
-  eapply storeind_label; eauto.
-  eapply loadind_label; eauto.
-  eapply tail_nolabel_trans; eapply loadind_label; eauto.
-  eapply transl_op_label; eauto.
-  eapply transl_load_label; eauto.
-  eapply transl_store_label; eauto.
-  destruct s0; TailNoLabel.
-  destruct s0; TailNoLabel.
-  eapply tail_nolabel_trans. eapply transl_cond_label; eauto. eapply mk_jcc_label.
+- eapply loadind_label; eauto.
+- eapply storeind_label; eauto.
+- destruct ep. eapply loadind_label; eauto.
+  eapply tail_nolabel_trans. apply loadind_ptr_label. eapply loadind_label; eauto. 
+- eapply transl_op_label; eauto.
+- destruct t; (try discriminate); destruct m; monadInv H; eapply transl_memory_access_label; eauto; intros; exact I.
+- destruct m; monadInv H; eapply transl_memory_access_label; eauto; intros; exact I.
+- destruct s0; monadInv H; TailNoLabel.
+- destruct s0; monadInv H; (eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel]).
+- eapply transl_cbranch_label; eauto.
+- eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel].
 Qed.
 
 Lemma transl_instr_label':
@@ -304,14 +430,16 @@ Lemma transl_find_label:
   | Some c => exists tc, find_label lbl tf.(fn_code) = Some tc /\ transl_code f c false = OK tc
   end.
 Proof.
-  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x))); inv EQ0.
-  monadInv EQ. simpl. eapply transl_code_label; eauto. rewrite transl_code'_transl_code in EQ0; eauto.
+  intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0.
+  monadInv EQ. rewrite transl_code'_transl_code in EQ0. unfold fn_code. 
+  simpl. destruct (storeind_ptr_label X1 X2 (fn_retaddr_ofs f) x) as [A B]; rewrite B. 
+  eapply transl_code_label; eauto.
 Qed.
 
 End TRANSL_LABEL.
 
 (** A valid branch in a piece of Mach code translates to a valid ``go to''
-  transition in the generated PPC code. *)
+  transition in the generated Asm code. *)
 
 Lemma find_label_goto_label:
   forall f tf lbl rs m c' b ofs,
@@ -347,9 +475,10 @@ Proof.
 - intros. exploit transl_instr_label; eauto.
   destruct i; try (intros [A B]; apply A). intros. subst c0. repeat constructor.
 - intros. monadInv H0.
-  destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x))); inv EQ0.
-  monadInv EQ. rewrite transl_code'_transl_code in EQ0.
-  exists x; exists true; split; auto. unfold fn_code. repeat constructor.
+  destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0. monadInv EQ.
+  rewrite transl_code'_transl_code in EQ0.
+  exists x; exists true; split; auto. unfold fn_code.
+  constructor. apply (storeind_ptr_label X1 X2 (fn_retaddr_ofs f0) x).
 - exact transf_function_no_overflow.
 Qed.
 
@@ -366,9 +495,9 @@ Qed.
            st1'--------------- st2'
 >>
   The invariant is the [match_states] predicate below, which includes:
-- The PPC code pointed by the PC register is the translation of
+- The Asm code pointed by the PC register is the translation of
   the current Mach code sequence.
-- Mach register values and PPC register values agree.
+- Mach register values and Asm register values agree.
 *)
 
 Inductive match_states: Mach.state -> Asm.state -> Prop :=
@@ -379,7 +508,7 @@ Inductive match_states: Mach.state -> Asm.state -> Prop :=
         (MEXT: Mem.extends m m')
         (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
         (AG: agree ms sp rs)
-        (AXP: ep = true -> rs#RAX = parent_sp s),
+        (DXP: ep = true -> rs#X30 = parent_sp s),
       match_states (Mach.State s fb sp c ms m)
                    (Asm.State rs m')
   | match_states_call:
@@ -410,7 +539,7 @@ Lemma exec_straight_steps:
    exists rs2,
        exec_straight tge tf c rs1 m1' k rs2 m2'
     /\ agree ms2 sp rs2
-    /\ (it1_is_parent ep i = true -> rs2#RAX = parent_sp s)) ->
+    /\ (it1_is_parent ep i = true -> rs2#X30 = parent_sp s)) ->
   exists st',
   plus step tge (State rs1 m1') E0 st' /\
   match_states (Mach.State s fb sp c ms2 m2) st'.
@@ -459,11 +588,58 @@ Proof.
   congruence.
 Qed.
 
+Lemma exec_straight_opt_steps_goto:
+  forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2 lbl c',
+  match_stack ge s ->
+  Mem.extends m2 m2' ->
+  Genv.find_funct_ptr ge fb = Some (Internal f) ->
+  Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
+  transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
+  it1_is_parent ep i = false ->
+  (forall k c (TR: transl_instr f i ep k = OK c),
+   exists jmp, exists k', exists rs2,
+       exec_straight_opt tge tf c rs1 m1' (jmp :: k') rs2 m2'
+    /\ agree ms2 sp rs2
+    /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') ->
+  exists st',
+  plus step tge (State rs1 m1') E0 st' /\
+  match_states (Mach.State s fb sp c' ms2 m2) st'.
+Proof.
+  intros. inversion H3. subst. monadInv H9.
+  exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]].
+  generalize (functions_transl _ _ _ H7 H8); intro FN.
+  generalize (transf_function_no_overflow _ _ H8); intro NOOV.
+  inv A.
+- exploit find_label_goto_label; eauto.
+  intros [tc' [rs3 [GOTO [AT' OTH]]]].
+  exists (State rs3 m2'); split.
+  apply plus_one. econstructor; eauto.
+  eapply find_instr_tail. eauto.
+  rewrite C. eexact GOTO.
+  econstructor; eauto.
+  apply agree_exten with rs2; auto with asmgen.
+  congruence.
+- exploit exec_straight_steps_2; eauto.
+  intros [ofs' [PC2 CT2]].
+  exploit find_label_goto_label; eauto.
+  intros [tc' [rs3 [GOTO [AT' OTH]]]].
+  exists (State rs3 m2'); split.
+  eapply plus_right'.
+  eapply exec_straight_steps_1; eauto.
+  econstructor; eauto.
+  eapply find_instr_tail. eauto.
+  rewrite C. eexact GOTO.
+  traceEq.
+  econstructor; eauto.
+  apply agree_exten with rs2; auto with asmgen.
+  congruence.
+Qed.
+
 (** We need to show that, in the simulation diagram, we cannot
   take infinitely many Mach transitions that correspond to zero
-  transitions on the PPC side.  Actually, all Mach transitions
+  transitions on the Asm side.  Actually, all Mach transitions
   correspond to at least one Asm transition, except the
-  transition from [Mach.Returnstate] to [Mach.State].
+  transition from [Machsem.Returnstate] to [Machsem.State].
   So, the following integer measure will suffice to rule out
   the unwanted behaviour. *)
 
@@ -474,6 +650,12 @@ Definition measure (s: Mach.state) : nat :=
   | Mach.Returnstate _ _ _ => 1%nat
   end.
 
+Remark preg_of_not_X30: forall r, negb (mreg_eq r R30) = true -> IR X30 <> preg_of r.
+Proof.
+  intros. change (IR X30) with (preg_of R30). red; intros.
+  exploit preg_of_injective; eauto. intros; subst r; discriminate.
+Qed.
+
 (** This is the simulation diagram.  We prove it by case analysis on the Mach transition. *)
 
 Theorem step_simulation:
@@ -494,9 +676,9 @@ Proof.
   exploit Mem.loadv_extends; eauto. intros [v' [A B]].
   rewrite (sp_val _ _ _ AG) in A.
   left; eapply exec_straight_steps; eauto. intros. simpl in TR.
-  exploit loadind_correct; eauto. intros [rs' [P [Q R]]].
+  exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q R]]].
   exists rs'; split. eauto.
-  split. eapply agree_set_mreg; eauto. congruence.
+  split. eapply agree_set_mreg; eauto with asmgen. congruence.
   simpl; congruence.
 
 - (* Msetstack *)
@@ -505,12 +687,10 @@ Proof.
   exploit Mem.storev_extends; eauto. intros [m2' [A B]].
   left; eapply exec_straight_steps; eauto.
   rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
-  exploit storeind_correct; eauto. intros [rs' [P Q]].
+  exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]].
   exists rs'; split. eauto.
-  split. eapply agree_undef_regs; eauto.
+  split. eapply agree_undef_regs; eauto with asmgen.
   simpl; intros. rewrite Q; auto with asmgen.
-Local Transparent destroyed_by_setstack.
-  destruct ty; simpl; intuition congruence.
 
 - (* Mgetparam *)
   assert (f0 = f) by congruence; subst f0.
@@ -521,42 +701,47 @@ Local Transparent destroyed_by_setstack.
   exploit Mem.loadv_extends. eauto. eexact H1. auto.
   intros [v' [C D]].
 Opaque loadind.
-  left; eapply exec_straight_steps; eauto; intros.
-  assert (DIFF: negb (mreg_eq dst AX) = true -> IR RAX <> preg_of dst).
-    intros. change (IR RAX) with (preg_of AX). red; intros.
-    unfold proj_sumbool in H1. destruct (mreg_eq dst AX); try discriminate.
-    elim n. eapply preg_of_injective; eauto.
-  destruct ep; simpl in TR.
-(* RAX contains parent *)
-  exploit loadind_correct. eexact TR.
-  instantiate (2 := rs0). rewrite AXP; eauto.
+  left; eapply exec_straight_steps; eauto; intros. monadInv TR. 
+  destruct ep.
+(* X30 contains parent *)
+  exploit loadind_correct. eexact EQ.
+  instantiate (2 := rs0). rewrite DXP; eauto. congruence.
   intros [rs1 [P [Q R]]].
   exists rs1; split. eauto.
-  split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto.
-  simpl; intros. rewrite R; auto.
-(* RAX does not contain parent *)
-  monadInv TR.
-  exploit loadind_correct. eexact EQ0. eauto. intros [rs1 [P [Q R]]]. simpl in Q.
-  exploit loadind_correct. eexact EQ. instantiate (2 := rs1). rewrite Q. eauto.
+  split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
+  simpl; intros. rewrite R; auto with asmgen.
+  apply preg_of_not_X30; auto.
+(* GPR11 does not contain parent *)
+  rewrite chunk_of_Tptr in A. 
+  exploit loadind_ptr_correct. eexact A. congruence. intros [rs1 [P [Q R]]].
+  exploit loadind_correct. eexact EQ. instantiate (2 := rs1). rewrite Q. eauto. congruence.
   intros [rs2 [S [T U]]].
   exists rs2; split. eapply exec_straight_trans; eauto.
-  split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto.
-  simpl; intros. rewrite U; auto.
+  split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
+  instantiate (1 := rs1#X30 <- (rs2#X30)). intros.
+  rewrite Pregmap.gso; auto with asmgen.
+  congruence.
+  intros. unfold Pregmap.set. destruct (PregEq.eq r' X30). congruence. auto with asmgen.
+  simpl; intros. rewrite U; auto with asmgen.
+  apply preg_of_not_X30; auto.
 
 - (* Mop *)
-  assert (eval_operation tge sp op rs##args m = Some v).
+  assert (eval_operation tge sp op (map rs args) m = Some v).
     rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
   exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eexact H0.
   intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
   left; eapply exec_straight_steps; eauto; intros. simpl in TR.
   exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
-  assert (S: Val.lessdef v (rs2 (preg_of res))) by (eapply Val.lessdef_trans; eauto).
-  exists rs2; split. eauto.
-  split. eapply agree_set_undef_mreg; eauto.
-  simpl; congruence.
+  exists rs2; split. eauto. split. auto.
+  apply agree_set_undef_mreg with rs0; auto. 
+  apply Val.lessdef_trans with v'; auto.
+  simpl; intros. destruct (andb_prop _ _ H1); clear H1.
+  rewrite R; auto. apply preg_of_not_X30; auto.
+Local Transparent destroyed_by_op.
+  destruct op; simpl; auto; congruence.
 
 - (* Mload *)
-  assert (eval_addressing tge sp addr rs##args = Some a).
+  assert (eval_addressing tge sp addr (map rs args) = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
   exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
   intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
@@ -565,6 +750,7 @@ Opaque loadind.
   exploit transl_load_correct; eauto. intros [rs2 [P [Q R]]].
   exists rs2; split. eauto.
   split. eapply agree_set_undef_mreg; eauto. congruence.
+  intros; auto with asmgen.
   simpl; congruence.
 
 - (* Mload notrap *) (* isn't there a nicer way? *)
@@ -572,19 +758,18 @@ Opaque loadind.
 
 - (* Mload notrap *)
   inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate.
-  
+
 - (* Mstore *)
-  assert (eval_addressing tge sp addr rs##args = Some a).
+  assert (eval_addressing tge sp addr (map rs args) = Some a).
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
   exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
   intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
   assert (Val.lessdef (rs src) (rs0 (preg_of src))). eapply preg_val; eauto.
   exploit Mem.storev_extends; eauto. intros [m2' [C D]].
   left; eapply exec_straight_steps; eauto.
-  intros. simpl in TR.
-  exploit transl_store_correct; eauto. intros [rs2 [P Q]].
+  intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]].
   exists rs2; split. eauto.
-  split. eapply agree_undef_regs; eauto.
+  split. eapply agree_undef_regs; eauto with asmgen.
   simpl; congruence.
 
 - (* Mcall *)
@@ -604,14 +789,14 @@ Opaque loadind.
     econstructor; eauto.
   exploit return_address_offset_correct; eauto. intros; subst ra.
   left; econstructor; split.
-  apply plus_one. eapply exec_step_internal. eauto.
+  apply plus_one. eapply exec_step_internal. Simpl. rewrite <- H2; simpl; eauto.
   eapply functions_transl; eauto. eapply find_instr_tail; eauto.
   simpl. eauto.
   econstructor; eauto.
   econstructor; eauto.
   eapply agree_sp_def; eauto.
-  simpl. eapply agree_exten; eauto. intros. Simplifs.
-  Simplifs. rewrite <- H2. auto.
+  simpl. eapply agree_exten; eauto. intros. Simpl.
+  Simpl. rewrite <- H2. auto.
 + (* Direct call *)
   generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1.
   assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x).
@@ -624,20 +809,14 @@ Opaque loadind.
   econstructor; eauto.
   econstructor; eauto.
   eapply agree_sp_def; eauto.
-  simpl. eapply agree_exten; eauto. intros. Simplifs.
-  Simplifs. rewrite <- H2. auto.
+  simpl. eapply agree_exten; eauto. intros. Simpl.
+  Simpl. rewrite <- H2. auto.
 
 - (* Mtailcall *)
   assert (f0 = f) by congruence.  subst f0.
-  inv AT.
+  inversion AT; subst.
   assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
-    eapply transf_function_no_overflow; eauto.
-  rewrite (sp_val _ _ _ AG) in *. unfold load_stack in *.
-  exploit Mem.loadv_extends. eauto. eexact H1. auto. simpl. intros [parent' [A B]].
-  exploit Mem.loadv_extends. eauto. eexact H2. auto. simpl. intros [ra' [C D]].
-  exploit lessdef_parent_sp; eauto. intros. subst parent'. clear B.
-  exploit lessdef_parent_ra; eauto. intros. subst ra'. clear D.
-  exploit Mem.free_parallel_extends; eauto. intros [m2' [E F]].
+    eapply transf_function_no_overflow; eauto.  exploit Mem.loadv_extends. eauto. eexact H1. auto. simpl. intros [parent' [A B]].
   destruct ros as [rf|fid]; simpl in H; monadInv H7.
 + (* Indirect call *)
   assert (rs rf = Vptr f' Ptrofs.zero).
@@ -645,36 +824,33 @@ Opaque loadind.
     revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence.
   assert (rs0 x0 = Vptr f' Ptrofs.zero).
     exploit ireg_val; eauto. rewrite H7; intros LD; inv LD; auto.
-  generalize (code_tail_next_int _ _ _ _ NOOV H8). intro CT1.
+  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). 
+  exploit exec_straight_steps_2; eauto using functions_transl.                      
+  intros (ofs' & P & Q).
   left; econstructor; split.
-  eapply plus_left. eapply exec_step_internal. eauto.
-  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
-  simpl. replace (chunk_of_type Tptr) with Mptr in * by (unfold Tptr, Mptr; destruct Archi.ptr64; auto).
-  rewrite C. rewrite A. rewrite <- (sp_val _ _ _ AG). rewrite E. eauto.
-  apply star_one. eapply exec_step_internal.
-  transitivity (Val.offset_ptr rs0#PC Ptrofs.one). auto. rewrite <- H4. simpl. eauto.
-  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
-  simpl. eauto. traceEq.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
+  simpl. reflexivity.
+  traceEq.
+  (* match states *)
   econstructor; eauto.
-  apply agree_set_other; auto. apply agree_nextinstr. apply agree_set_other; auto.
-  eapply agree_change_sp; eauto. eapply parent_sp_def; eauto.
-  Simplifs. rewrite Pregmap.gso; auto.
-  generalize (preg_of_not_SP rf). rewrite (ireg_of_eq _ _ EQ1). congruence.
+  apply agree_set_other; auto with asmgen.
+  Simpl. rewrite Z by (rewrite <- (ireg_of_eq _ _ EQ1); eauto with asmgen). assumption. 
 + (* Direct call *)
-  generalize (code_tail_next_int _ _ _ _ NOOV H8). intro CT1.
+  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). 
+  exploit exec_straight_steps_2; eauto using functions_transl.                      
+  intros (ofs' & P & Q).
   left; econstructor; split.
-  eapply plus_left. eapply exec_step_internal. eauto.
-  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
-  simpl. replace (chunk_of_type Tptr) with Mptr in * by (unfold Tptr, Mptr; destruct Archi.ptr64; auto).
-  rewrite C. rewrite A. rewrite <- (sp_val _ _ _ AG). rewrite E. eauto.
-  apply star_one. eapply exec_step_internal.
-  transitivity (Val.offset_ptr rs0#PC Ptrofs.one). auto. rewrite <- H4. simpl. eauto.
-  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
-  simpl. eauto. traceEq.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
+  simpl. reflexivity.
+  traceEq.
+  (* match states *)
   econstructor; eauto.
-  apply agree_set_other; auto. apply agree_nextinstr. apply agree_set_other; auto.
-  eapply agree_change_sp; eauto. eapply parent_sp_def; eauto.
-  rewrite Pregmap.gss. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. auto.
+  apply agree_set_other; auto with asmgen.
+  Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. auto.
 
 - (* Mbuiltin *)
   inv AT. monadInv H4.
@@ -692,15 +868,16 @@ Opaque loadind.
   eauto.
   econstructor; eauto.
   instantiate (2 := tf); instantiate (1 := x).
-  unfold nextinstr_nf, nextinstr. rewrite Pregmap.gss.
-  rewrite undef_regs_other. rewrite set_res_other. rewrite undef_regs_other_2.
+  unfold nextinstr. rewrite Pregmap.gss.
+  rewrite set_res_other. rewrite undef_regs_other_2.
+  rewrite ! Pregmap.gso by congruence. 
   rewrite <- H1. simpl. econstructor; eauto.
   eapply code_tail_next_int; eauto.
   rewrite preg_notin_charact. intros. auto with asmgen.
   auto with asmgen.
-  simpl; intros. intuition congruence.
-  apply agree_nextinstr_nf. eapply agree_set_res; auto.
-  eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto.
+  apply agree_nextinstr. eapply agree_set_res; auto.
+  eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2; auto.
+  rewrite ! Pregmap.gso; auto with asmgen.
   congruence.
 
 - (* Mgoto *)
@@ -719,153 +896,109 @@ Opaque loadind.
 - (* Mcond true *)
   assert (f0 = f) by congruence. subst f0.
   exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
-  left; eapply exec_straight_steps_goto; eauto.
+  left; eapply exec_straight_opt_steps_goto; eauto.
   intros. simpl in TR.
-  destruct (transl_cond_correct tge tf cond args _ _ rs0 m' TR)
-  as [rs' [A [B C]]].
-  rewrite EC in B. destruct B as [B _].
-  destruct (testcond_for_condition cond); simpl in *.
-(* simple jcc *)
-  exists (Pjcc c1 lbl); exists k; exists rs'.
-  split. eexact A.
-  split. eapply agree_exten; eauto.
-  simpl. rewrite B. auto.
-(* jcc; jcc *)
-  destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
-  destruct (eval_testcond c2 rs') as [b2|] eqn:TC2; inv B.
-  destruct b1.
-  (* first jcc jumps *)
-  exists (Pjcc c1 lbl); exists (Pjcc c2 lbl :: k); exists rs'.
-  split. eexact A.
-  split. eapply agree_exten; eauto.
-  simpl. rewrite TC1. auto.
-  (* second jcc jumps *)
-  exists (Pjcc c2 lbl); exists k; exists (nextinstr rs').
-  split. eapply exec_straight_trans. eexact A.
-  eapply exec_straight_one. simpl. rewrite TC1. auto. auto.
-  split. eapply agree_exten; eauto.
-  intros; Simplifs.
-  simpl. rewrite eval_testcond_nextinstr. rewrite TC2.
-  destruct b2; auto || discriminate.
-(* jcc2 *)
-  destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
-  destruct (eval_testcond c2 rs') as [b2|] eqn:TC2; inv B.
-  destruct (andb_prop _ _ H3). subst.
-  exists (Pjcc2 c1 c2 lbl); exists k; exists rs'.
-  split. eexact A.
-  split. eapply agree_exten; eauto.
-  simpl. rewrite TC1; rewrite TC2; auto.
+  exploit transl_cbranch_correct_true; eauto. intros (rs' & jmp & A & B & C).
+  exists jmp; exists k; exists rs'.
+  split. eexact A. 
+  split. apply agree_exten with rs0; auto with asmgen.
+  exact B. 
 
 - (* Mcond false *)
   exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
   left; eapply exec_straight_steps; eauto. intros. simpl in TR.
-  destruct (transl_cond_correct tge tf cond args _ _ rs0 m' TR)
-  as [rs' [A [B C]]].
-  rewrite EC in B. destruct B as [B _].
-  destruct (testcond_for_condition cond); simpl in *.
-(* simple jcc *)
-  econstructor; split.
-  eapply exec_straight_trans. eexact A.
-  apply exec_straight_one. simpl. rewrite B. eauto. auto.
-  split. apply agree_nextinstr. eapply agree_exten; eauto.
-  simpl; congruence.
-(* jcc ; jcc *)
-  destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
-  destruct (eval_testcond c2 rs') as [b2|] eqn:TC2; inv B.
-  destruct (orb_false_elim _ _ H1); subst.
-  econstructor; split.
-  eapply exec_straight_trans. eexact A.
-  eapply exec_straight_two. simpl. rewrite TC1. eauto. auto.
-  simpl. rewrite eval_testcond_nextinstr. rewrite TC2. eauto. auto. auto.
-  split. apply agree_nextinstr. apply agree_nextinstr. eapply agree_exten; eauto.
-  simpl; congruence.
-(* jcc2 *)
-  destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
-  destruct (eval_testcond c2 rs') as [b2|] eqn:TC2; inv B.
-  exists (nextinstr rs'); split.
-  eapply exec_straight_trans. eexact A.
-  apply exec_straight_one. simpl.
-  rewrite TC1; rewrite TC2.
-  destruct b1. simpl in *. subst b2. auto. auto.
-  auto.
-  split. apply agree_nextinstr. eapply agree_exten; eauto.
-  rewrite H1; congruence.
+  exploit transl_cbranch_correct_false; eauto. intros (rs' & A & B).
+  exists rs'.
+  split. eexact A.
+  split. apply agree_exten with rs0; auto with asmgen.
+  simpl. congruence.
 
 - (* Mjumptable *)
   assert (f0 = f) by congruence. subst f0.
   inv AT. monadInv H6.
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H5); intro NOOV.
-  set (rs1 := rs0 #RAX <- Vundef #RDX <- Vundef).
-  exploit (find_label_goto_label f tf lbl rs1); eauto.
+  exploit find_label_goto_label. eauto. eauto.
+  instantiate (2 := rs0#X5 <- Vundef #X31 <- Vundef).
+  Simpl. eauto.
+  eauto.
   intros [tc' [rs' [A [B C]]]].
   exploit ireg_val; eauto. rewrite H. intros LD; inv LD.
   left; econstructor; split.
   apply plus_one. econstructor; eauto.
   eapply find_instr_tail; eauto.
-  simpl. rewrite <- H9.  unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
+  simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
   econstructor; eauto.
-Transparent destroyed_by_jumptable.
-  apply agree_undef_regs with rs0; auto.
-  simpl; intros. destruct H8. rewrite C by auto with asmgen. unfold rs1; Simplifs.
+  eapply agree_undef_regs; eauto.
+  simpl. intros. rewrite C; auto with asmgen. Simpl.
   congruence.
 
 - (* Mreturn *)
   assert (f0 = f) by congruence. subst f0.
-  inv AT.
+  inversion AT; subst. simpl in H6; monadInv H6.
   assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
     eapply transf_function_no_overflow; eauto.
-  rewrite (sp_val _ _ _ AG) in *. unfold load_stack in *.
-  replace (chunk_of_type Tptr) with Mptr in * by (unfold Tptr, Mptr; destruct Archi.ptr64; auto).
-  exploit Mem.loadv_extends. eauto. eexact H0. auto. simpl. intros [parent' [A B]].
-  exploit lessdef_parent_sp; eauto. intros. subst parent'. clear B.
-  exploit Mem.loadv_extends. eauto. eexact H1. auto. simpl. intros [ra' [C D]].
-  exploit lessdef_parent_ra; eauto. intros. subst ra'. clear D.
-  exploit Mem.free_parallel_extends; eauto. intros [m2' [E F]].
-  monadInv H6.
-  exploit code_tail_next_int; eauto. intro CT1.
+  exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+  exploit exec_straight_steps_2; eauto using functions_transl.                      
+  intros (ofs' & P & Q).
   left; econstructor; split.
-  eapply plus_left. eapply exec_step_internal. eauto.
-  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
-  simpl. rewrite C. rewrite A. rewrite <- (sp_val _ _ _ AG). rewrite E. eauto.
-  apply star_one. eapply exec_step_internal.
-  transitivity (Val.offset_ptr rs0#PC Ptrofs.one). auto. rewrite <- H3. simpl. eauto.
-  eapply functions_transl; eauto. eapply find_instr_tail; eauto.
-  simpl. eauto. traceEq.
-  constructor; auto.
-  apply agree_set_other; auto. apply agree_nextinstr. apply agree_set_other; auto.
-  eapply agree_change_sp; eauto. eapply parent_sp_def; eauto.
+  (* execution *)
+  eapply plus_right'. eapply exec_straight_exec; eauto.
+  econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
+  simpl. reflexivity.
+  traceEq.
+  (* match states *)
+  econstructor; eauto.
+  apply agree_set_other; auto with asmgen.
 
 - (* internal function *)
   exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
   generalize EQ; intros EQ'. monadInv EQ'.
-  destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x0))); inv EQ1.
-  monadInv EQ0. rewrite transl_code'_transl_code in EQ1.
+  destruct (zlt Ptrofs.max_unsigned (list_length_z x0.(fn_code))); inversion EQ1. clear EQ1. subst x0.
   unfold store_stack in *.
   exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
   intros [m1' [C D]].
   exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
   intros [m2' [F G]].
+  simpl chunk_of_type in F.
   exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
   intros [m3' [P Q]].
-  left; econstructor; split.
-  apply plus_one. econstructor; eauto.
-  simpl. rewrite Ptrofs.unsigned_zero. simpl. eauto.
-  simpl. rewrite C. simpl in F, P.
-  replace (chunk_of_type Tptr) with Mptr in F, P by (unfold Tptr, Mptr; destruct Archi.ptr64; auto).
-  rewrite (sp_val _ _ _ AG) in F. rewrite F.
-  rewrite ATLR. rewrite P. eauto.
+  (* Execution of function prologue *)
+  monadInv EQ0. rewrite transl_code'_transl_code in EQ1.
+  set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) ::
+                 storeind_ptr RA SP (fn_retaddr_ofs f) x0) in *.
+  set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *.
+  set (rs2 := nextinstr (rs0#X30 <- (parent_sp s) #SP <- sp #X31 <- Vundef)).
+  exploit (storeind_ptr_correct tge tf SP (fn_retaddr_ofs f) RA x0 rs2 m2').
+    rewrite chunk_of_Tptr in P. change (rs2 X1) with (rs0 X1). rewrite ATLR. 
+    change (rs2 X2) with sp. eexact P. 
+    congruence. congruence.
+  intros (rs3 & U & V).
+  assert (EXEC_PROLOGUE:
+            exec_straight tge tf
+              tf.(fn_code) rs0 m'
+              x0 rs3 m3').
+  { change (fn_code tf) with tfbody; unfold tfbody.
+    apply exec_straight_step with rs2 m2'.
+    unfold exec_instr. rewrite C. fold sp.
+    rewrite <- (sp_val _ _ _ AG). rewrite chunk_of_Tptr in F. rewrite F. reflexivity.
+    reflexivity. 
+    eexact U. }
+  exploit exec_straight_steps_2; eauto using functions_transl. lia. constructor.
+  intros (ofs' & X & Y).                    
+  left; exists (State rs3 m3'); split.
+  eapply exec_straight_steps_1; eauto. lia. constructor.
   econstructor; eauto.
-  unfold nextinstr. rewrite Pregmap.gss. repeat rewrite Pregmap.gso; auto with asmgen.
-  rewrite ATPC. simpl. constructor; eauto.
-  unfold fn_code. eapply code_tail_next_int. simpl in g. lia.
-  constructor.
-  apply agree_nextinstr. eapply agree_change_sp; eauto.
-Transparent destroyed_at_function_entry.
-  apply agree_undef_regs with rs0; eauto.
-  simpl; intros. apply Pregmap.gso; auto with asmgen. tauto.
-  congruence.
-  intros. Simplifs. eapply agree_sp; eauto.
+  rewrite X; econstructor; eauto. 
+  apply agree_exten with rs2; eauto with asmgen.
+  unfold rs2. 
+  apply agree_nextinstr. apply agree_set_other; auto with asmgen.
+  apply agree_change_sp with (parent_sp s). 
+  apply agree_undef_regs with rs0. auto.
+Local Transparent destroyed_at_function_entry.
+  simpl; intros; Simpl.
+  unfold sp; congruence.
+  intros. rewrite V by auto with asmgen. reflexivity.
 
 - (* external function *)
   exploit functions_translated; eauto.
@@ -884,7 +1017,8 @@ Transparent destroyed_at_function_entry.
 - (* return *)
   inv STACKS. simpl in *.
   right. split. lia. split. auto.
-  econstructor; eauto. rewrite ATPC; eauto. congruence.
+  rewrite <- ATPC in H5.
+  econstructor; eauto. congruence.
 Qed.
 
 Lemma transf_initial_states:
@@ -900,8 +1034,7 @@ Proof.
   econstructor; eauto.
   constructor.
   apply Mem.extends_refl.
-  split. reflexivity. simpl.
-  unfold Vnullptr; destruct Archi.ptr64; congruence.
+  split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
   intros. rewrite Regmap.gi. auto.
   unfold Genv.symbol_address.
   rewrite (match_program_main TRANSF).
@@ -913,11 +1046,9 @@ Lemma transf_final_states:
   forall st1 st2 r,
   match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r.
 Proof.
-  intros. inv H0. inv H. constructor. auto.
-  assert (r0 = AX).
-  { unfold loc_result in H1; destruct Archi.ptr64; compute in H1; congruence. }
-  subst r0.
-  generalize (preg_val _ _ _ AX AG). rewrite H2. intros LD; inv LD. auto.
+  intros. inv H0. inv H. constructor. assumption.
+  compute in H1. inv H1.
+  generalize (preg_val _ _ _ R10 AG). rewrite H2. intros LD; inv LD. auto.
 Qed.
 
 Theorem transf_program_correct:
diff --git a/verilog/Asmgenproof1.v b/verilog/Asmgenproof1.v
index 7cff1047..42ab8375 100644
--- a/verilog/Asmgenproof1.v
+++ b/verilog/Asmgenproof1.v
@@ -2,1541 +2,1629 @@
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
-(*                  Xavier Leroy, INRIA Paris                          *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
-(** Correctness proof for x86-64 generation: auxiliary results. *)
-
-Require Import Coqlib.
-Require Import AST Errors Integers Floats Values Memory Globalenvs.
-Require Import Op Locations Conventions Mach Asm.
-Require Import Asmgen Asmgenproof0.
+Require Import Coqlib Errors Maps.
+Require Import AST Zbits Integers Floats Values Memory Globalenvs.
+Require Import Op Locations Mach Conventions.
+Require Import Asm Asmgen Asmgenproof0.
 
-Local Open Scope error_monad_scope.
+(** Decomposition of integer constants. *)
 
-(** * Correspondence between Mach registers and x86 registers *)
-
-Lemma agree_nextinstr_nf:
-  forall ms sp rs,
-  agree ms sp rs -> agree ms sp (nextinstr_nf rs).
+Lemma make_immed32_sound:
+  forall n,
+  match make_immed32 n with
+  | Imm32_single imm => n = imm
+  | Imm32_pair hi lo => n = Int.add (Int.shl hi (Int.repr 12)) lo
+  end.
 Proof.
-  intros. unfold nextinstr_nf. apply agree_nextinstr.
-  apply agree_undef_nondata_regs. auto.
-  simpl; intros. intuition (subst r; auto).
+  intros; unfold make_immed32. set (lo := Int.sign_ext 12 n).
+  predSpec Int.eq Int.eq_spec n lo.
+- auto.
+- set (m := Int.sub n lo).
+  assert (A: eqmod (two_p 12) (Int.unsigned lo) (Int.unsigned n)) by (apply Int.eqmod_sign_ext'; compute; auto).
+  assert (B: eqmod (two_p 12) (Int.unsigned n - Int.unsigned  lo) 0).
+  { replace 0 with (Int.unsigned n - Int.unsigned n) by lia.
+    auto using eqmod_sub, eqmod_refl. }
+  assert (C: eqmod (two_p 12) (Int.unsigned m) 0).
+  { apply eqmod_trans with (Int.unsigned n - Int.unsigned lo); auto.
+    apply eqmod_divides with Int.modulus. apply Int.eqm_sym; apply Int.eqm_unsigned_repr.
+    exists (two_p (32-12)); auto. }
+  assert (D: Int.modu m (Int.repr 4096) = Int.zero).
+  { apply eqmod_mod_eq in C. unfold Int.modu. 
+    change (Int.unsigned (Int.repr 4096)) with (two_p 12). rewrite C. 
+    reflexivity.
+    apply two_p_gt_ZERO; lia. }
+  rewrite <- (Int.divu_pow2 m (Int.repr 4096) (Int.repr 12)) by auto.
+  rewrite Int.shl_mul_two_p. 
+  change (two_p (Int.unsigned (Int.repr 12))) with 4096.
+  replace (Int.mul (Int.divu m (Int.repr 4096)) (Int.repr 4096)) with m.
+  unfold m. rewrite Int.sub_add_opp. rewrite Int.add_assoc. rewrite <- (Int.add_commut lo).
+  rewrite Int.add_neg_zero. rewrite Int.add_zero. auto.
+  rewrite (Int.modu_divu_Euclid m (Int.repr 4096)) at 1 by (vm_compute; congruence).
+  rewrite D. apply Int.add_zero.
 Qed.
 
-(** Useful properties of the PC register. *)
-
-Lemma nextinstr_nf_inv:
-  forall r rs,
-  match r with PC => False | CR _ => False | _ => True end ->
-  (nextinstr_nf rs)#r = rs#r.
+Lemma make_immed64_sound:
+  forall n,
+  match make_immed64 n with
+  | Imm64_single imm => n = imm
+  | Imm64_pair hi lo => n = Int64.add (Int64.sign_ext 32 (Int64.shl hi (Int64.repr 12))) lo
+  | Imm64_large imm => n = imm
+  end.
 Proof.
-  intros. unfold nextinstr_nf. rewrite nextinstr_inv.
-  simpl. repeat rewrite Pregmap.gso; auto;
-  red; intro; subst; contradiction.
-  red; intro; subst; contradiction.
+  intros; unfold make_immed64. set (lo := Int64.sign_ext 12 n).
+  predSpec Int64.eq Int64.eq_spec n lo.
+- auto.
+- set (m := Int64.sub n lo).
+  set (p := Int64.zero_ext 20 (Int64.shru m (Int64.repr 12))).
+  predSpec Int64.eq Int64.eq_spec n (Int64.add (Int64.sign_ext 32 (Int64.shl p (Int64.repr 12))) lo).
+  auto.
+  auto.
 Qed.
 
-Lemma nextinstr_nf_inv1:
-  forall r rs,
-  data_preg r = true -> (nextinstr_nf rs)#r = rs#r.
+(** Properties of registers *)
+
+Lemma ireg_of_not_X31:
+  forall m r, ireg_of m = OK r -> IR r <> IR X31.
 Proof.
-  intros. apply nextinstr_nf_inv. destruct r; auto || discriminate.
+  intros. erewrite <- ireg_of_eq; eauto with asmgen.
 Qed.
 
-Lemma nextinstr_nf_set_preg:
-  forall rs m v,
-  (nextinstr_nf (rs#(preg_of m) <- v))#PC = Val.offset_ptr rs#PC Ptrofs.one.
+Lemma ireg_of_not_X31':
+  forall m r, ireg_of m = OK r -> r <> X31.
 Proof.
-  intros. unfold nextinstr_nf.
-  transitivity (nextinstr (rs#(preg_of m) <- v) PC). auto.
-  apply nextinstr_set_preg.
+  intros. apply ireg_of_not_X31 in H. congruence.
 Qed.
 
+Global Hint Resolve ireg_of_not_X31 ireg_of_not_X31': asmgen.
+
 (** Useful simplification tactic *)
 
 Ltac Simplif :=
-  match goal with
-  | [ |- nextinstr_nf _ _ = _ ] =>
-      ((rewrite nextinstr_nf_inv by auto with asmgen)
-       || (rewrite nextinstr_nf_inv1 by auto with asmgen)); auto
-  | [ |- nextinstr _ _ = _ ] =>
-      ((rewrite nextinstr_inv by auto with asmgen)
-       || (rewrite nextinstr_inv1 by auto with asmgen)); auto
-  | [ |- Pregmap.get ?x (Pregmap.set ?x _ _) = _ ] =>
-      rewrite Pregmap.gss; auto
-  | [ |- Pregmap.set ?x _ _ ?x = _ ] =>
-      rewrite Pregmap.gss; auto
-  | [ |- Pregmap.get _ (Pregmap.set _ _ _) = _ ] =>
-      rewrite Pregmap.gso by (auto with asmgen); auto
-  | [ |- Pregmap.set _ _ _ _ = _ ] =>
-      rewrite Pregmap.gso by (auto with asmgen); auto
-  end.
+  ((rewrite nextinstr_inv by eauto with asmgen)
+  || (rewrite nextinstr_inv1 by eauto with asmgen)
+  || (rewrite Pregmap.gss)
+  || (rewrite nextinstr_pc)
+  || (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen.
 
-Ltac Simplifs := repeat Simplif.
+Ltac Simpl := repeat Simplif.
 
-(** * Correctness of x86-64 constructor functions *)
+(** * Correctness of RISC-V constructor functions *)
 
 Section CONSTRUCTORS.
 
 Variable ge: genv.
 Variable fn: function.
 
-(** Smart constructor for moves. *)
+(** 32-bit integer constants and arithmetic *)
 
-Lemma mk_mov_correct:
-  forall rd rs k c rs1 m,
-  mk_mov rd rs k = OK c ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#rd = rs1#rs
-  /\ forall r, data_preg r = true -> r <> rd -> rs2#r = rs1#r.
+Lemma load_hilo32_correct:
+  forall rd hi lo k rs m,
+  exists rs',
+     exec_straight ge fn (load_hilo32 rd hi lo k) rs m k rs' m
+  /\ rs'#rd = Vint (Int.add (Int.shl hi (Int.repr 12)) lo)
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
 Proof.
-  unfold mk_mov; intros.
-  destruct rd; try (monadInv H); destruct rs; monadInv H.
-(* mov *)
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto.
-  split. Simplifs. intros; Simplifs.
-(* movsd *)
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto.
-  split. Simplifs. intros; Simplifs.
+  unfold load_hilo32; intros. 
+  predSpec Int.eq Int.eq_spec lo Int.zero.
+- subst lo. econstructor; split. 
+  apply exec_straight_one. simpl; eauto. auto.
+  split. rewrite Int.add_zero. Simpl.
+  intros; Simpl.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split. Simpl. 
+  intros; Simpl.
 Qed.
 
-(** Properties of division *)
-
-Lemma divu_modu_exists:
-  forall v1 v2,
-  Val.divu v1 v2 <> None \/ Val.modu v1 v2 <> None ->
-  exists n d q r,
-     v1 = Vint n /\ v2 = Vint d
-  /\ Int.divmodu2 Int.zero n d = Some(q, r)
-  /\ Val.divu v1 v2 = Some (Vint q) /\ Val.modu v1 v2 = Some (Vint r).
+Lemma loadimm32_correct:
+  forall rd n k rs m,
+  exists rs',
+     exec_straight ge fn (loadimm32 rd n k) rs m k rs' m
+  /\ rs'#rd = Vint n
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
 Proof.
-  intros v1 v2; unfold Val.divu, Val.modu.
-  destruct v1; try (intuition discriminate).
-  destruct v2; try (intuition discriminate).
-  predSpec Int.eq Int.eq_spec i0 Int.zero ; try (intuition discriminate).
-  intros _. exists i, i0, (Int.divu i i0), (Int.modu i i0); intuition auto.
-  apply Int.divmodu2_divu_modu; auto.
+  unfold loadimm32; intros. generalize (make_immed32_sound n); intros E.
+  destruct (make_immed32 n). 
+- subst imm. econstructor; split. 
+  apply exec_straight_one. simpl; eauto. auto.
+  split. rewrite Int.add_zero_l; Simpl. 
+  intros; Simpl.
+- rewrite E. apply load_hilo32_correct.
 Qed.
 
-Lemma divs_mods_exists:
-  forall v1 v2,
-  Val.divs v1 v2 <> None \/ Val.mods v1 v2 <> None ->
-  exists nh nl d q r,
-     Val.shr v1 (Vint (Int.repr 31)) = Vint nh /\ v1 = Vint nl /\ v2 = Vint d
-  /\ Int.divmods2 nh nl d = Some(q, r)
-  /\ Val.divs v1 v2 = Some (Vint q) /\ Val.mods v1 v2 = Some (Vint r).
+Lemma opimm32_correct:
+  forall (op: ireg -> ireg0 -> ireg0 -> instruction)
+         (opi: ireg -> ireg0 -> int -> instruction)
+         (sem: val -> val -> val) m,
+  (forall d s1 s2 rs,
+   exec_instr ge fn (op d s1 s2) rs m = Next (nextinstr (rs#d <- (sem rs##s1 rs##s2))) m) ->
+  (forall d s n rs,
+   exec_instr ge fn (opi d s n) rs m = Next (nextinstr (rs#d <- (sem rs##s (Vint n)))) m) ->
+  forall rd r1 n k rs,
+  r1 <> X31 ->
+  exists rs',
+     exec_straight ge fn (opimm32 op opi rd r1 n k) rs m k rs' m
+  /\ rs'#rd = sem rs##r1 (Vint n)
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros v1 v2; unfold Val.divs, Val.mods.
-  destruct v1; try (intuition discriminate).
-  destruct v2; try (intuition discriminate).
-  destruct (Int.eq i0 Int.zero
-            || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone) eqn:OK;
-  try (intuition discriminate).
-  intros _.
-  InvBooleans.
-  exists (Int.shr i (Int.repr 31)), i, i0, (Int.divs i i0), (Int.mods i i0); intuition auto.
-  rewrite Int.shr_lt_zero. apply Int.divmods2_divs_mods.
-  red; intros; subst i0; rewrite Int.eq_true in H; discriminate.
-  revert H0. predSpec Int.eq Int.eq_spec i (Int.repr Int.min_signed); auto.
-  predSpec Int.eq Int.eq_spec i0 Int.mone; auto.
-  discriminate.
+  intros. unfold opimm32. generalize (make_immed32_sound n); intros E.
+  destruct (make_immed32 n). 
+- subst imm. econstructor; split. 
+  apply exec_straight_one. rewrite H0. simpl; eauto. auto.
+  split. Simpl. intros; Simpl.
+- destruct (load_hilo32_correct X31 hi lo (op rd r1 X31 :: k) rs m)
+  as (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. 
+  rewrite H; eauto. auto.
+  split. Simpl. simpl. rewrite B, C, E. auto. congruence. congruence.
+  intros; Simpl. 
 Qed.
 
-Lemma divlu_modlu_exists:
-  forall v1 v2,
-  Val.divlu v1 v2 <> None \/ Val.modlu v1 v2 <> None ->
-  exists n d q r,
-     v1 = Vlong n /\ v2 = Vlong d
-  /\ Int64.divmodu2 Int64.zero n d = Some(q, r)
-  /\ Val.divlu v1 v2 = Some (Vlong q) /\ Val.modlu v1 v2 = Some (Vlong r).
+(** 64-bit integer constants and arithmetic *)
+
+Lemma load_hilo64_correct:
+  forall rd hi lo k rs m,
+  exists rs',
+     exec_straight ge fn (load_hilo64 rd hi lo k) rs m k rs' m
+  /\ rs'#rd = Vlong (Int64.add (Int64.sign_ext 32 (Int64.shl hi (Int64.repr 12))) lo)
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
 Proof.
-  intros v1 v2; unfold Val.divlu, Val.modlu.
-  destruct v1; try (intuition discriminate).
-  destruct v2; try (intuition discriminate).
-  predSpec Int64.eq Int64.eq_spec i0 Int64.zero ; try (intuition discriminate).
-  intros _. exists i, i0, (Int64.divu i i0), (Int64.modu i i0); intuition auto.
-  apply Int64.divmodu2_divu_modu; auto.
+  unfold load_hilo64; intros. 
+  predSpec Int64.eq Int64.eq_spec lo Int64.zero.
+- subst lo. econstructor; split. 
+  apply exec_straight_one. simpl; eauto. auto.
+  split. rewrite Int64.add_zero. Simpl.
+  intros; Simpl.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split. Simpl. 
+  intros; Simpl.
 Qed.
 
-Lemma divls_modls_exists:
-  forall v1 v2,
-  Val.divls v1 v2 <> None \/ Val.modls v1 v2 <> None ->
-  exists nh nl d q r,
-     Val.shrl v1 (Vint (Int.repr 63)) = Vlong nh /\ v1 = Vlong nl /\ v2 = Vlong d
-  /\ Int64.divmods2 nh nl d = Some(q, r)
-  /\ Val.divls v1 v2 = Some (Vlong q) /\ Val.modls v1 v2 = Some (Vlong r).
+Lemma loadimm64_correct:
+  forall rd n k rs m,
+  exists rs',
+     exec_straight ge fn (loadimm64 rd n k) rs m k rs' m
+  /\ rs'#rd = Vlong n
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros v1 v2; unfold Val.divls, Val.modls.
-  destruct v1; try (intuition discriminate).
-  destruct v2; try (intuition discriminate).
-  destruct (Int64.eq i0 Int64.zero
-            || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone) eqn:OK;
-  try (intuition discriminate).
-  intros _.
-  InvBooleans.
-  exists (Int64.shr i (Int64.repr 63)), i, i0, (Int64.divs i i0), (Int64.mods i i0); intuition auto.
-  rewrite Int64.shr_lt_zero. apply Int64.divmods2_divs_mods.
-  red; intros; subst i0; rewrite Int64.eq_true in H; discriminate.
-  revert H0. predSpec Int64.eq Int64.eq_spec i (Int64.repr Int64.min_signed); auto.
-  predSpec Int64.eq Int64.eq_spec i0 Int64.mone; auto.
-  discriminate.
+  unfold loadimm64; intros. generalize (make_immed64_sound n); intros E.
+  destruct (make_immed64 n). 
+- subst imm. econstructor; split. 
+  apply exec_straight_one. simpl; eauto. auto.
+  split. rewrite Int64.add_zero_l; Simpl. 
+  intros; Simpl.
+- exploit load_hilo64_correct; eauto. intros (rs' & A & B & C).
+  rewrite E. exists rs'; eauto.
+- subst imm. econstructor; split. 
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. 
+  intros; Simpl.
 Qed.
 
-(** Smart constructor for [shrx] *)
-
-Lemma mk_shrximm_correct:
-  forall n k c (rs1: regset) v m,
-  mk_shrximm n k = OK c ->
-  Val.shrx (rs1#RAX) (Vint n) = Some v ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#RAX = v
-  /\ forall r, data_preg r = true -> r <> RAX -> r <> RCX -> rs2#r = rs1#r.
+Lemma opimm64_correct:
+  forall (op: ireg -> ireg0 -> ireg0 -> instruction)
+         (opi: ireg -> ireg0 -> int64 -> instruction)
+         (sem: val -> val -> val) m,
+  (forall d s1 s2 rs,
+   exec_instr ge fn (op d s1 s2) rs m = Next (nextinstr (rs#d <- (sem rs###s1 rs###s2))) m) ->
+  (forall d s n rs,
+   exec_instr ge fn (opi d s n) rs m = Next (nextinstr (rs#d <- (sem rs###s (Vlong n)))) m) ->
+  forall rd r1 n k rs,
+  r1 <> X31 ->
+  exists rs',
+     exec_straight ge fn (opimm64 op opi rd r1 n k) rs m k rs' m
+  /\ rs'#rd = sem rs##r1 (Vlong n)
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  unfold mk_shrximm; intros. inv H.
-  exploit Val.shrx_shr; eauto. intros [x [y [A [B C]]]].
-  inversion B; clear B; subst y; subst v; clear H0.
-  set (tnm1 := Int.sub (Int.shl Int.one n) Int.one).
-  set (x' := Int.add x tnm1).
-  set (rs2 := nextinstr (compare_ints (Vint x) (Vint Int.zero) rs1 m)).
-  set (rs3 := nextinstr (rs2#RCX <- (Vint x'))).
-  set (v' := if Int.lt x Int.zero then Vint x' else Vint x).
-  set (rs4 := nextinstr (rs3#RAX <- v')).
-  set (rs5 := nextinstr_nf (rs4#RAX <- (Val.shr rs4#RAX (Vint n)))).
-  assert (rs3#RAX = Vint x). unfold rs3. Simplifs.
-  assert (rs3#RCX = Vint x'). unfold rs3. Simplifs.
-  exists rs5. split.
-  apply exec_straight_step with rs2 m. simpl. rewrite A. simpl. rewrite Int.and_idem. auto. auto.
-  apply exec_straight_step with rs3 m. simpl.
-  change (rs2 RAX) with (rs1 RAX). rewrite A. simpl.
-  rewrite Int.repr_unsigned. rewrite Int.add_zero_l. auto. auto.
-  apply exec_straight_step with rs4 m. simpl.
-  rewrite Int.lt_sub_overflow. unfold rs4, v'. rewrite H, H0. destruct (Int.lt x Int.zero); simpl; auto.
-  auto.
-  apply exec_straight_one. auto. auto.
-  split. unfold rs5. Simplifs. unfold rs4. rewrite nextinstr_inv; auto with asmgen.
-  rewrite Pregmap.gss. unfold v'. rewrite A. reflexivity.
-  intros. unfold rs5. Simplifs. unfold rs4. Simplifs.
-  unfold rs3. Simplifs. unfold rs2. Simplifs.
-  unfold compare_ints. Simplifs.
+  intros. unfold opimm64. generalize (make_immed64_sound n); intros E.
+  destruct (make_immed64 n). 
+- subst imm. econstructor; split. 
+  apply exec_straight_one. rewrite H0. simpl; eauto. auto.
+  split. Simpl. intros; Simpl.
+- destruct (load_hilo64_correct X31 hi lo (op rd r1 X31 :: k) rs m)
+  as (rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_trans. eexact A. apply exec_straight_one. 
+  rewrite H; eauto. auto.
+  split. Simpl. simpl. rewrite B, C, E. auto. congruence. congruence.
+  intros; Simpl. 
+- subst imm. econstructor; split. 
+  eapply exec_straight_two. simpl; eauto. rewrite H. simpl; eauto. auto. auto.
+  split. Simpl. intros; Simpl.
 Qed.
 
-(** Smart constructor for [shrxl] *)
+(** Add offset to pointer *)
 
-Lemma mk_shrxlimm_correct:
-  forall n k c (rs1: regset) v m,
-  mk_shrxlimm n k = OK c ->
-  Val.shrxl (rs1#RAX) (Vint n) = Some v ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#RAX = v
-  /\ forall r, data_preg r = true -> r <> RAX -> r <> RDX -> rs2#r = rs1#r.
+Lemma addptrofs_correct:
+  forall rd r1 n k rs m,
+  r1 <> X31 ->
+  exists rs',
+     exec_straight ge fn (addptrofs rd r1 n k) rs m k rs' m
+  /\ Val.lessdef (Val.offset_ptr rs#r1 n) rs'#rd
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  unfold mk_shrxlimm; intros. exploit Val.shrxl_shrl_2; eauto. intros EQ.
-  destruct (Int.eq n Int.zero); inv H.
-- econstructor; split. apply exec_straight_one. simpl; reflexivity. auto.
-  split. Simplifs. intros; Simplifs.
-- set (v1 := Val.shrl (rs1 RAX) (Vint (Int.repr 63))) in *.
-  set (v2 := Val.shrlu v1 (Vint (Int.sub (Int.repr 64) n))) in *.
-  set (v3 := Val.addl (rs1 RAX) v2) in *.
-  set (v4 := Val.shrl v3 (Vint n)) in *.
-  set (rs2 := nextinstr_nf (rs1#RDX <- v1)).
-  set (rs3 := nextinstr_nf (rs2#RDX <- v2)).
-  set (rs4 := nextinstr (rs3#RAX <- v3)).
-  set (rs5 := nextinstr_nf (rs4#RAX <- v4)).
-  assert (X: forall v1 v2,
-             Val.addl v1 (Val.addl v2 (Vlong Int64.zero)) = Val.addl v1 v2).
-  { intros. unfold Val.addl; destruct Archi.ptr64 eqn:SF, v0; auto; destruct v5; auto.
-    rewrite Int64.add_zero; auto.
-    rewrite Ptrofs.add_zero; auto.
-    rewrite Int64.add_zero; auto.
-    rewrite Int64.add_zero; auto. }
-  exists rs5; split.
-  eapply exec_straight_trans with (rs2 := rs3).
-  eapply exec_straight_two with (rs2 := rs2); reflexivity.
-  eapply exec_straight_two with (rs2 := rs4).
-  simpl. rewrite X. reflexivity. reflexivity. reflexivity. reflexivity.
-  split. unfold rs5; Simplifs.
-  intros. unfold rs5; Simplifs. unfold rs4; Simplifs. unfold rs3; Simplifs. unfold rs2; Simplifs.
+  unfold addptrofs; intros.
+  destruct (Ptrofs.eq_dec n Ptrofs.zero).
+- subst n. econstructor; split.
+  apply exec_straight_one. simpl; eauto. auto.
+  split. Simpl. destruct (rs r1); simpl; auto. rewrite Ptrofs.add_zero; auto.
+  intros; Simpl.
+- destruct Archi.ptr64 eqn:SF.
++ unfold addimm64.
+  exploit (opimm64_correct Paddl Paddil Val.addl); eauto. intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split; auto.
+  rewrite B. simpl. destruct (rs r1); simpl; auto. rewrite SF.
+  rewrite Ptrofs.of_int64_to_int64 by auto. auto.
++ unfold addimm32.
+  exploit (opimm32_correct Paddw Paddiw Val.add); eauto. intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split; auto.
+  rewrite B. simpl. destruct (rs r1); simpl; auto. rewrite SF.
+  rewrite Ptrofs.of_int_to_int by auto. auto.
 Qed.
 
-(** Smart constructor for integer conversions *)
-
-Lemma mk_intconv_correct:
-  forall mk sem rd rs k c rs1 m,
-  mk_intconv mk rd rs k = OK c ->
-  (forall c rd rs r m,
-   exec_instr ge c (mk rd rs) r m = Next (nextinstr (r#rd <- (sem r#rs))) m) ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#rd = sem rs1#rs
-  /\ forall r, data_preg r = true -> r <> rd -> r <> RAX -> rs2#r = rs1#r.
+Lemma addptrofs_correct_2:
+  forall rd r1 n k (rs: regset) m b ofs,
+  r1 <> X31 -> rs#r1 = Vptr b ofs ->
+  exists rs',
+     exec_straight ge fn (addptrofs rd r1 n k) rs m k rs' m
+  /\ rs'#rd = Vptr b (Ptrofs.add ofs n)
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  unfold mk_intconv; intros. destruct (Archi.ptr64 || low_ireg rs); monadInv H.
-  econstructor. split. apply exec_straight_one. rewrite H0. eauto. auto.
-  split. Simplifs. intros. Simplifs.
-  econstructor. split. eapply exec_straight_two.
-  simpl. eauto. apply H0. auto. auto.
-  split. Simplifs. intros. Simplifs.
+  intros. exploit (addptrofs_correct rd r1 n); eauto. intros (rs' & A & B & C).
+  exists rs'; intuition eauto. 
+  rewrite H0 in B. inv B. auto.
 Qed.
 
-(** Smart constructor for small stores *)
+(** Translation of conditional branches *)
 
-Lemma addressing_mentions_correct:
-  forall a r (rs1 rs2: regset),
-  (forall (r': ireg), r' <> r -> rs1 r' = rs2 r') ->
-  addressing_mentions a r = false ->
-  eval_addrmode32 ge a rs1 = eval_addrmode32 ge a rs2.
+Lemma transl_cbranch_int32s_correct:
+  forall cmp r1 r2 lbl (rs: regset) m b,
+  Val.cmp_bool cmp rs##r1 rs##r2 = Some b ->
+  exec_instr ge fn (transl_cbranch_int32s cmp r1 r2 lbl) rs m =
+  eval_branch fn lbl rs m (Some b).
 Proof.
-  intros until rs2; intro AG. unfold addressing_mentions, eval_addrmode32.
-  destruct a. intros. destruct (orb_false_elim _ _ H). unfold proj_sumbool in *.
-  decEq. destruct base; auto. apply AG. destruct (ireg_eq r i); congruence.
-  decEq. destruct ofs as [[r' sc] | ]; auto. rewrite AG; auto. destruct (ireg_eq r r'); congruence.
+  intros. destruct cmp; simpl; rewrite ? H.
+- destruct rs##r1; simpl in H; try discriminate. destruct rs##r2; inv H.
+  simpl; auto.
+- destruct rs##r1; simpl in H; try discriminate. destruct rs##r2; inv H.
+  simpl; auto.
+- auto.
+- rewrite <- Val.swap_cmp_bool. simpl. rewrite H; auto.
+- rewrite <- Val.swap_cmp_bool. simpl. rewrite H; auto.
+- auto.
 Qed.
 
-Lemma mk_storebyte_correct:
-  forall addr r k c rs1 m1 m2,
-  mk_storebyte addr r k = OK c ->
-  Mem.storev Mint8unsigned m1 (eval_addrmode ge addr rs1) (rs1 r) = Some m2 ->
-  exists rs2,
-     exec_straight ge fn c rs1 m1 k rs2 m2
-  /\ forall r, data_preg r = true -> preg_notin r (if Archi.ptr64 then nil else AX :: CX :: nil) -> rs2#r = rs1#r.
+Lemma transl_cbranch_int32u_correct:
+  forall cmp r1 r2 lbl (rs: regset) m b,
+  Val.cmpu_bool (Mem.valid_pointer m) cmp rs##r1 rs##r2 = Some b ->
+  exec_instr ge fn (transl_cbranch_int32u cmp r1 r2 lbl) rs m =
+  eval_branch fn lbl rs m (Some b).
 Proof.
-  unfold mk_storebyte; intros.
-  destruct (Archi.ptr64 || low_ireg r) eqn:E.
-(* low reg *)
-  monadInv H. econstructor; split. apply exec_straight_one.
-  simpl. unfold exec_store. rewrite H0. eauto. auto.
-  intros; Simplifs.
-(* high reg *)
-  InvBooleans. rewrite H1; simpl. destruct (addressing_mentions addr RAX) eqn:E; monadInv H.
-(* RAX is mentioned. *)
-  assert (r <> RCX). { red; intros; subst r; discriminate H2. }
-  set (rs2 := nextinstr (rs1#RCX <- (eval_addrmode32 ge addr rs1))).
-  set (rs3 := nextinstr (rs2#RAX <- (rs1 r))).
-  econstructor; split.
-  apply exec_straight_three with rs2 m1 rs3 m1.
-  simpl. auto.
-  simpl. replace (rs2 r) with (rs1 r). auto. symmetry. unfold rs2; Simplifs.
-  simpl. unfold exec_store. unfold eval_addrmode; rewrite H1; simpl. rewrite Int.add_zero.
-  change (rs3 RAX) with (rs1 r).
-  change (rs3 RCX) with (eval_addrmode32 ge addr rs1).
-  replace (Val.add (eval_addrmode32 ge addr rs1) (Vint Int.zero))
-     with (eval_addrmode ge addr rs1).
-  rewrite H0. eauto.
-  unfold eval_addrmode in *; rewrite H1 in *.
-  destruct (eval_addrmode32 ge addr rs1); simpl in H0; try discriminate H0.
-  simpl. rewrite H1. rewrite Ptrofs.add_zero; auto.
-  auto. auto. auto.
-  intros. destruct H4. Simplifs. unfold rs3; Simplifs. unfold rs2; Simplifs.
-(* RAX is not mentioned *)
-  set (rs2 := nextinstr (rs1#RAX <- (rs1 r))).
-  econstructor; split.
-  apply exec_straight_two with rs2 m1.
-  simpl. auto.
-  simpl. unfold exec_store. unfold eval_addrmode in *; rewrite H1 in *.
-  rewrite (addressing_mentions_correct addr RAX rs2 rs1); auto.
-  change (rs2 RAX) with (rs1 r). rewrite H0. eauto.
-  intros. unfold rs2; Simplifs.
-  auto. auto.
-  intros. destruct H3. simpl. Simplifs. unfold rs2; Simplifs.
+  intros. destruct cmp; simpl; rewrite ? H; auto.
+- rewrite <- Val.swap_cmpu_bool. simpl. rewrite H; auto.
+- rewrite <- Val.swap_cmpu_bool. simpl. rewrite H; auto.
 Qed.
 
-(** Accessing slots in the stack frame *)
-
-Remark eval_addrmode_indexed:
-  forall (base: ireg) ofs (rs: regset),
-  match rs#base with Vptr _ _ => True | _ => False end ->
-  eval_addrmode ge (Addrmode (Some base) None (inl _ (Ptrofs.unsigned ofs))) rs = Val.offset_ptr rs#base ofs.
+Lemma transl_cbranch_int64s_correct:
+  forall cmp r1 r2 lbl (rs: regset) m b,
+  Val.cmpl_bool cmp rs###r1 rs###r2 = Some b ->
+  exec_instr ge fn (transl_cbranch_int64s cmp r1 r2 lbl) rs m =
+  eval_branch fn lbl rs m (Some b).
 Proof.
-  intros. destruct (rs#base) eqn:BASE; try contradiction.
-  intros; unfold eval_addrmode; destruct Archi.ptr64 eqn:SF; simpl; rewrite BASE; simpl; rewrite SF; simpl.
-- apply f_equal. apply f_equal. rewrite Int64.add_zero_l. rewrite <- (Ptrofs.repr_unsigned ofs) at 2. auto with ptrofs.
--  apply f_equal. apply f_equal. rewrite Int.add_zero_l. rewrite <- (Ptrofs.repr_unsigned ofs) at 2. auto with ptrofs.
+  intros. destruct cmp; simpl; rewrite ? H.
+- destruct rs###r1; simpl in H; try discriminate. destruct rs###r2; inv H.
+  simpl; auto.
+- destruct rs###r1; simpl in H; try discriminate. destruct rs###r2; inv H.
+  simpl; auto.
+- auto.
+- rewrite <- Val.swap_cmpl_bool. simpl. rewrite H; auto.
+- rewrite <- Val.swap_cmpl_bool. simpl. rewrite H; auto.
+- auto.
 Qed.
 
-Ltac loadind_correct_solve :=
-  match goal with
-  | H: Error _ = OK _ |- _ => discriminate H
-  | H: OK _ = OK _ |- _ => inv H
-  | H: match ?x with _ => _ end = OK _ |- _ => destruct x eqn:?; loadind_correct_solve
-  | _ => idtac
-  end.
-
-Lemma loadind_correct:
-  forall (base: ireg) ofs ty dst k (rs: regset) c m v,
-  loadind base ofs ty dst k = OK c ->
-  Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ rs'#(preg_of dst) = v
-  /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
+Lemma transl_cbranch_int64u_correct:
+  forall cmp r1 r2 lbl (rs: regset) m b,
+  Val.cmplu_bool (Mem.valid_pointer m) cmp rs###r1 rs###r2 = Some b ->
+  exec_instr ge fn (transl_cbranch_int64u cmp r1 r2 lbl) rs m =
+  eval_branch fn lbl rs m (Some b).
 Proof.
-  unfold loadind; intros.
-  set (addr := Addrmode (Some base) None (inl (ident * ptrofs) (Ptrofs.unsigned ofs))) in *.
-  assert (eval_addrmode ge addr rs = Val.offset_ptr rs#base ofs).
-  { apply eval_addrmode_indexed. destruct (rs base); auto || discriminate. }
-  rewrite <- H1 in H0.
-  exists (nextinstr_nf (rs#(preg_of dst) <- v)); split.
-- loadind_correct_solve; apply exec_straight_one; auto; simpl in *; unfold exec_load; rewrite ?Heqb, ?H0; auto.
-- intuition Simplifs.
+  intros. destruct cmp; simpl; rewrite ? H; auto.
+- rewrite <- Val.swap_cmplu_bool. simpl. rewrite H; auto.
+- rewrite <- Val.swap_cmplu_bool. simpl. rewrite H; auto.
 Qed.
 
-Lemma storeind_correct:
-  forall (base: ireg) ofs ty src k (rs: regset) c m m',
-  storeind src base ofs ty k = OK c ->
-  Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) (rs#(preg_of src)) = Some m' ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m'
-  /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_setstack ty) -> rs'#r = rs#r.
+Lemma transl_cond_float_correct:
+  forall (rs: regset) m cmp rd r1 r2 insn normal v,
+  transl_cond_float cmp rd r1 r2 = (insn, normal) ->
+  v = (if normal then Val.cmpf cmp rs#r1 rs#r2 else Val.notbool (Val.cmpf cmp rs#r1 rs#r2)) ->
+  exec_instr ge fn insn rs m = Next (nextinstr (rs#rd <- v)) m.
 Proof.
-  unfold storeind; intros.
-  set (addr := Addrmode (Some base) None (inl (ident * ptrofs) (Ptrofs.unsigned ofs))) in *.
-  assert (eval_addrmode ge addr rs = Val.offset_ptr rs#base ofs).
-  { apply eval_addrmode_indexed. destruct (rs base); auto || discriminate. }
-  rewrite <- H1 in H0.
-  loadind_correct_solve; simpl in H0;
-  (econstructor; split;
-  [apply exec_straight_one; [simpl; unfold exec_store; rewrite ?Heqb, H0;eauto|auto]
-  |simpl; intros; unfold undef_regs; repeat Simplifs]).
+  intros. destruct cmp; simpl in H; inv H; auto. 
+- rewrite Val.negate_cmpf_eq. auto.
+- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpf, Val.cmpf_bool.
+  rewrite <- Float.cmp_swap. auto.
+- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpf, Val.cmpf_bool.
+  rewrite <- Float.cmp_swap. auto.
 Qed.
 
-(** Translation of addressing modes *)
-
-Lemma transl_addressing_mode_32_correct:
-  forall addr args am (rs: regset) v,
-  transl_addressing addr args = OK am ->
-  eval_addressing32 ge (rs RSP) addr (List.map rs (List.map preg_of args)) = Some v ->
-  Val.lessdef v (eval_addrmode32 ge am rs).
+Lemma transl_cond_single_correct:
+  forall (rs: regset) m cmp rd r1 r2 insn normal v,
+  transl_cond_single cmp rd r1 r2 = (insn, normal) ->
+  v = (if normal then Val.cmpfs cmp rs#r1 rs#r2 else Val.notbool (Val.cmpfs cmp rs#r1 rs#r2)) ->
+  exec_instr ge fn insn rs m = Next (nextinstr (rs#rd <- v)) m.
 Proof.
-  assert (A: forall id ofs, Archi.ptr64 = false ->
-          Val.add (Vint Int.zero) (Genv.symbol_address ge id ofs) = Genv.symbol_address ge id ofs).
-  { intros. unfold Val.add; rewrite H. unfold Genv.symbol_address.
-    destruct (Genv.find_symbol ge id); auto. rewrite Ptrofs.add_zero; auto. }
-  assert (C: forall v i,
-    Val.lessdef (Val.mul v (Vint (Int.repr i)))
-               (if zeq i 1 then v else Val.mul v (Vint (Int.repr i)))).
-  { intros. destruct (zeq i 1); subst; auto.
-    destruct v; simpl; auto. rewrite Int.mul_one; auto. }
-  unfold transl_addressing; intros.
-  destruct addr; repeat (destruct args; try discriminate H); simpl in H0; FuncInv;
-  monadInv H; try (erewrite ! ireg_of_eq by eauto); unfold eval_addrmode32.
-- simpl; rewrite Int.add_zero_l; auto.
-- rewrite Val.add_assoc. apply Val.add_lessdef; auto.
-- rewrite Val.add_permut. apply Val.add_lessdef; auto. simpl; rewrite Int.add_zero_l; auto.
-- apply Val.add_lessdef; auto. apply Val.add_lessdef; auto.
-- rewrite ! A by auto. auto.
-- rewrite Val.add_commut. rewrite A by auto. auto.
-- rewrite Val.add_permut. rewrite Val.add_commut. apply Val.add_lessdef; auto. rewrite A; auto.
-- simpl. unfold Val.add; rewrite Heqb.
-  destruct (rs RSP); simpl; auto.
-  rewrite Int.add_zero_l. apply Val.lessdef_same; f_equal; f_equal.
-  symmetry. transitivity (Ptrofs.repr (Ptrofs.signed i)). auto with ptrofs. auto with ints.
-Qed.
-
-Lemma transl_addressing_mode_64_correct:
-  forall addr args am (rs: regset) v,
-  transl_addressing addr args = OK am ->
-  eval_addressing64 ge (rs RSP) addr (List.map rs (List.map preg_of args)) = Some v ->
-  Val.lessdef v (eval_addrmode64 ge am rs).
+  intros. destruct cmp; simpl in H; inv H; auto. 
+- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpfs, Val.cmpfs_bool.
+  rewrite Float32.cmp_ne_eq. destruct (Float32.cmp Ceq f0 f); auto.
+- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpfs, Val.cmpfs_bool.
+  rewrite <- Float32.cmp_swap. auto.
+- simpl. f_equal. f_equal. f_equal. destruct (rs r2), (rs r1); auto. unfold Val.cmpfs, Val.cmpfs_bool.
+  rewrite <- Float32.cmp_swap. auto.
+   Qed.
+
+(* TODO gourdinl UNUSUED ? Remark branch_on_X31:
+  forall normal lbl (rs: regset) m b,
+  rs#X31 = Val.of_bool (eqb normal b) -> 
+  exec_instr ge fn (if normal then Pbnew X31 X0 lbl else Pbeqw X31 X0 lbl) rs m =
+  eval_branch fn lbl rs m (Some b).
 Proof.
-  assert (A: forall id ofs, Archi.ptr64 = true ->
-          Val.addl (Vlong Int64.zero) (Genv.symbol_address ge id ofs) = Genv.symbol_address ge id ofs).
-  { intros. unfold Val.addl; rewrite H. unfold Genv.symbol_address.
-    destruct (Genv.find_symbol ge id); auto. rewrite Ptrofs.add_zero; auto. }
-  assert (C: forall v i,
-    Val.lessdef (Val.mull v (Vlong (Int64.repr i)))
-               (if zeq i 1 then v else Val.mull v (Vlong (Int64.repr i)))).
-  { intros. destruct (zeq i 1); subst; auto.
-    destruct v; simpl; auto. rewrite Int64.mul_one; auto. }
-  unfold transl_addressing; intros.
-  destruct addr; repeat (destruct args; try discriminate H); simpl in H0; FuncInv;
-  monadInv H; try (erewrite ! ireg_of_eq by eauto); unfold eval_addrmode64.
-- simpl; rewrite Int64.add_zero_l; auto.
-- rewrite Val.addl_assoc. apply Val.addl_lessdef; auto.
-- rewrite Val.addl_permut. apply Val.addl_lessdef; auto. simpl; rewrite Int64.add_zero_l; auto.
-- apply Val.addl_lessdef; auto. apply Val.addl_lessdef; auto.
-- rewrite ! A by auto. auto.
-- unfold Val.addl; rewrite Heqb. destruct (rs RSP); auto. simpl.
-  rewrite Int64.add_zero_l. apply Val.lessdef_same; f_equal; f_equal.
-  symmetry. transitivity (Ptrofs.repr (Ptrofs.signed i)). auto with ptrofs. auto with ints.
-Qed.
+  intros. destruct normal; simpl; rewrite H; simpl; destruct b; reflexivity. 
+   Qed.*)
 
-Lemma transl_addressing_mode_correct:
-  forall addr args am (rs: regset) v,
-  transl_addressing addr args = OK am ->
-  eval_addressing ge (rs RSP) addr (List.map rs (List.map preg_of args)) = Some v ->
-  Val.lessdef v (eval_addrmode ge am rs).
+Ltac ArgsInv :=
+  repeat (match goal with
+  | [ H: Error _ = OK _ |- _ ] => discriminate
+  | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
+  | [ H: bind _ _ = OK _ |- _ ] => monadInv H
+  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  end);
+  subst;
+  repeat (match goal with
+  | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *
+  | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *
+  end).
+
+Lemma transl_cbranch_correct_1:
+  forall cond args lbl k c m ms b sp rs m',
+  transl_cbranch cond args lbl k = OK c ->
+  eval_condition cond (List.map ms args) m = Some b ->
+  agree ms sp rs ->
+  Mem.extends m m' ->
+  exists rs', exists insn,
+     exec_straight_opt ge fn c rs m' (insn :: k) rs' m'
+  /\ exec_instr ge fn insn rs' m' = eval_branch fn lbl rs' m' (Some b)
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  unfold eval_addressing, eval_addrmode; intros. destruct Archi.ptr64.
-  eapply transl_addressing_mode_64_correct; eauto.
-  eapply transl_addressing_mode_32_correct; eauto.
+  intros until m'; intros TRANSL EVAL AG MEXT.
+  set (vl' := map rs (map preg_of args)). 
+  assert (EVAL': eval_condition cond vl' m' = Some b).
+  { apply eval_condition_lessdef with (map ms args) m; auto. eapply preg_vals; eauto. }
+  clear EVAL MEXT AG.
+  destruct cond; simpl in TRANSL; ArgsInv.
+  - exists rs, (transl_cbranch_int32s c0 x x0 lbl).
+  intuition auto. constructor. apply transl_cbranch_int32s_correct; auto.
+- exists rs, (transl_cbranch_int32u c0 x x0 lbl).
+  intuition auto. constructor. apply transl_cbranch_int32u_correct; auto.
+- predSpec Int.eq Int.eq_spec n Int.zero.
++ subst n. exists rs, (transl_cbranch_int32s c0 x X0 lbl).
+  intuition auto. constructor. apply transl_cbranch_int32s_correct; auto.
++ exploit (loadimm32_correct X31 n); eauto. intros (rs' & A & B & C).
+  exists rs', (transl_cbranch_int32s c0 x X31 lbl).
+  split. constructor; eexact A. split; auto.
+  apply transl_cbranch_int32s_correct; auto.
+  simpl; rewrite B, C; eauto with asmgen.
+- predSpec Int.eq Int.eq_spec n Int.zero.
++ subst n. exists rs, (transl_cbranch_int32u c0 x X0 lbl).
+  intuition auto. constructor. apply transl_cbranch_int32u_correct; auto.
++ exploit (loadimm32_correct X31 n); eauto. intros (rs' & A & B & C).
+  exists rs', (transl_cbranch_int32u c0 x X31 lbl).
+  split. constructor; eexact A. split; auto.
+  apply transl_cbranch_int32u_correct; auto.
+  simpl; rewrite B, C; eauto with asmgen.
+- exists rs, (transl_cbranch_int64s c0 x x0 lbl).
+  intuition auto. constructor. apply transl_cbranch_int64s_correct; auto.
+- exists rs, (transl_cbranch_int64u c0 x x0 lbl).
+  intuition auto. constructor. apply transl_cbranch_int64u_correct; auto.
+- predSpec Int64.eq Int64.eq_spec n Int64.zero.
++ subst n. exists rs, (transl_cbranch_int64s c0 x X0 lbl).
+  intuition auto. constructor. apply transl_cbranch_int64s_correct; auto.
++ exploit (loadimm64_correct X31 n); eauto. intros (rs' & A & B & C).
+  exists rs', (transl_cbranch_int64s c0 x X31 lbl).
+  split. constructor; eexact A. split; auto.
+  apply transl_cbranch_int64s_correct; auto.
+  simpl; rewrite B, C; eauto with asmgen.
+- predSpec Int64.eq Int64.eq_spec n Int64.zero.
++ subst n. exists rs, (transl_cbranch_int64u c0 x X0 lbl).
+  intuition auto. constructor. apply transl_cbranch_int64u_correct; auto.
++ exploit (loadimm64_correct X31 n); eauto. intros (rs' & A & B & C).
+  exists rs', (transl_cbranch_int64u c0 x X31 lbl).
+  split. constructor; eexact A. split; auto.
+  apply transl_cbranch_int64u_correct; auto.
+  simpl; rewrite B, C; eauto with asmgen.
+- destruct (transl_cond_float c0 X31 x x0) as [insn normal] eqn:TC; inv EQ2.
+  set (v := if normal then Val.cmpf c0 rs#x rs#x0 else Val.notbool (Val.cmpf c0 rs#x rs#x0)).
+  assert (V: v = Val.of_bool (eqb normal b)).
+  { unfold v, Val.cmpf. rewrite EVAL'. destruct normal, b; reflexivity. }
+  econstructor; econstructor.
+  split. constructor. apply exec_straight_one. eapply transl_cond_float_correct with (v := v); eauto. auto.
+  split. rewrite V; destruct normal, b; reflexivity.
+  intros; Simpl.
+- destruct (transl_cond_float c0 X31 x x0) as [insn normal] eqn:TC; inv EQ2.
+  assert (EVAL'': Val.cmpf_bool c0 (rs x) (rs x0) = Some (negb b)).
+  { destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; inv EVAL'; auto. }
+  set (v := if normal then Val.cmpf c0 rs#x rs#x0 else Val.notbool (Val.cmpf c0 rs#x rs#x0)).
+  assert (V: v = Val.of_bool (xorb normal b)).
+  { unfold v, Val.cmpf. rewrite EVAL''. destruct normal, b; reflexivity. }
+  econstructor; econstructor.
+  split. constructor. apply exec_straight_one. eapply transl_cond_float_correct with (v := v); eauto. auto.
+  split. rewrite V; destruct normal, b; reflexivity.
+  intros; Simpl.
+- destruct (transl_cond_single c0 X31 x x0) as [insn normal] eqn:TC; inv EQ2.
+  set (v := if normal then Val.cmpfs c0 rs#x rs#x0 else Val.notbool (Val.cmpfs c0 rs#x rs#x0)).
+  assert (V: v = Val.of_bool (eqb normal b)).
+  { unfold v, Val.cmpfs. rewrite EVAL'. destruct normal, b; reflexivity. }
+  econstructor; econstructor.
+  split. constructor. apply exec_straight_one. eapply transl_cond_single_correct with (v := v); eauto. auto.
+  split. rewrite V; destruct normal, b; reflexivity.
+  intros; Simpl.
+- destruct (transl_cond_single c0 X31 x x0) as [insn normal] eqn:TC; inv EQ2.
+  assert (EVAL'': Val.cmpfs_bool c0 (rs x) (rs x0) = Some (negb b)).
+  { destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; inv EVAL'; auto. }
+  set (v := if normal then Val.cmpfs c0 rs#x rs#x0 else Val.notbool (Val.cmpfs c0 rs#x rs#x0)).
+  assert (V: v = Val.of_bool (xorb normal b)).
+  { unfold v, Val.cmpfs. rewrite EVAL''. destruct normal, b; reflexivity. }
+  econstructor; econstructor.
+  split. constructor. apply exec_straight_one. eapply transl_cond_single_correct with (v := v); eauto. auto.
+  split. rewrite V; destruct normal, b; reflexivity.
+     intros; Simpl.
+
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *;
+  destruct (rs x) eqn:EQRS; simpl in *; try congruence;
+  inv EQ2; eexists; eexists; eauto; split; constructor; auto;
+  simpl in *.
+  + rewrite EQRS;
+    assert (HB: (Int.eq Int.zero i) = b) by congruence.
+    rewrite HB; destruct b; simpl; auto.
+  + rewrite EQRS;
+    assert (HB: (Int.eq i Int.zero) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  + rewrite EQRS;
+    destruct (rs x0); try congruence.
+    assert (HB: (Int.eq i i0) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *;
+  destruct (rs x) eqn:EQRS; simpl in *; try congruence;
+  inv EQ2; eexists; eexists; eauto; split; constructor; auto;
+  simpl in *.
+  + rewrite EQRS;
+    assert (HB: negb (Int.eq Int.zero i) = b) by congruence.
+    rewrite HB; destruct b; simpl; auto.
+  + rewrite EQRS;
+    assert (HB: negb (Int.eq i Int.zero) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  + rewrite EQRS;
+    destruct (rs x0); try congruence.
+    assert (HB: negb (Int.eq i i0) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *; inv EQ2;
+  destruct (rs x) eqn:EQRS; simpl in *; try congruence;
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; auto.
+  + rewrite EQRS;
+    assert (HB: (Int64.eq Int64.zero i) = b) by congruence.
+    rewrite HB; destruct b; simpl; auto.
+  + rewrite EQRS;
+    assert (HB: (Int64.eq i Int64.zero) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  + rewrite EQRS;
+    destruct (rs x0); try congruence.
+    assert (HB: (Int64.eq i i0) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32 in *; inv EQ2;
+  destruct (rs x) eqn:EQRS; simpl in *; try congruence;
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; auto.
+  + rewrite EQRS;
+    assert (HB: negb (Int64.eq Int64.zero i) = b) by congruence.
+    rewrite HB; destruct b; simpl; auto.
+  + rewrite EQRS;
+    assert (HB: negb (Int64.eq i Int64.zero) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+  + rewrite EQRS;
+    destruct (rs x0); try congruence.
+    assert (HB: negb (Int64.eq i i0) = b) by congruence.
+    rewrite <- HB; destruct b; simpl; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32, zero64, Op.zero64 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32, zero64, Op.zero64 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32, zero64, Op.zero64 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32, zero64, Op.zero64 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32, zero64, Op.zero64 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
+- destruct optR as [[]|];
+  unfold apply_bin_oreg_ireg0, apply_bin_oreg in *;
+  unfold zero32, Op.zero32, zero64, Op.zero64 in *; inv EQ2;
+  try (destruct (rs x); simpl in EVAL'; discriminate; fail);
+  eexists; eexists; eauto; split; constructor;
+  simpl in *; try rewrite EVAL'; auto.
 Qed.
 
-Lemma normalize_addrmode_32_correct:
-  forall am rs, eval_addrmode32 ge (normalize_addrmode_32 am) rs = eval_addrmode32 ge am rs.
+Lemma transl_cbranch_correct_true:
+  forall cond args lbl k c m ms sp rs m',
+  transl_cbranch cond args lbl k = OK c ->
+  eval_condition cond (List.map ms args) m = Some true ->
+  agree ms sp rs ->
+  Mem.extends m m' ->
+  exists rs', exists insn,
+     exec_straight_opt ge fn c rs m' (insn :: k) rs' m'
+  /\ exec_instr ge fn insn rs' m' = goto_label fn lbl rs' m'
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros; destruct am as [base ofs [n|r]]; simpl; auto. rewrite Int.repr_signed. auto.
-Qed.
-
-Lemma normalize_addrmode_64_correct:
-  forall am rs,
-  eval_addrmode64 ge am rs =
-  match normalize_addrmode_64 am with
-  | (am', None) => eval_addrmode64 ge am' rs
-  | (am', Some delta) => Val.addl (eval_addrmode64 ge am' rs) (Vlong delta)
-  end.
+  intros. eapply transl_cbranch_correct_1 with (b := true); eauto.
+Qed. 
+
+Lemma transl_cbranch_correct_false:
+  forall cond args lbl k c m ms sp rs m',
+  transl_cbranch cond args lbl k = OK c ->
+  eval_condition cond (List.map ms args) m = Some false ->
+  agree ms sp rs ->
+  Mem.extends m m' ->
+  exists rs',
+     exec_straight ge fn c rs m' k rs' m'
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros; destruct am as [base ofs [n|[id delta]]]; simpl.
-- destruct (offset_in_range n); auto; simpl.
-  rewrite ! Val.addl_assoc. apply f_equal. apply f_equal. simpl. rewrite Int64.add_zero_l; auto.
-- destruct Archi.ptr64 eqn:SF; auto; simpl;
-  destruct (ptroffset_in_range delta); auto. simpl.
-  rewrite ! Val.addl_assoc. apply f_equal. apply f_equal.
-  rewrite <- Genv.shift_symbol_address_64 by auto.
-  f_equal. rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto.
+  intros. exploit transl_cbranch_correct_1; eauto. simpl. 
+  intros (rs' & insn & A & B & C).
+  exists (nextinstr rs').
+  split. eapply exec_straight_opt_right; eauto. apply exec_straight_one; auto.
+  intros; Simpl. 
 Qed.
 
-(** Processor conditions and comparisons *)
+(** Translation of condition operators *)
 
-Lemma compare_ints_spec:
-  forall rs v1 v2 m,
-  let rs' := nextinstr (compare_ints v1 v2 rs m) in
-     rs'#ZF = Val.cmpu (Mem.valid_pointer m) Ceq v1 v2
-  /\ rs'#CF = Val.cmpu (Mem.valid_pointer m) Clt v1 v2
-  /\ rs'#SF = Val.negative (Val.sub v1 v2)
-  /\ rs'#OF = Val.sub_overflow v1 v2
-  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
+Lemma transl_cond_int32s_correct:
+  forall cmp rd r1 r2 k rs m,
+  exists rs',
+     exec_straight ge fn (transl_cond_int32s cmp rd r1 r2 k) rs m k rs' m
+  /\ Val.lessdef (Val.cmp cmp rs##r1 rs##r2) rs'#rd
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
 Proof.
-  intros. unfold rs'; unfold compare_ints.
-  split. auto.
-  split. auto.
-  split. auto.
-  split. auto.
-  intros. Simplifs.
+  intros. destruct cmp; simpl. 
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl. destruct (rs##r1); auto. destruct (rs##r2); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl. destruct (rs##r1); auto. destruct (rs##r2); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. unfold Val.cmp. rewrite <- Val.swap_cmp_bool.
+  simpl. rewrite (Val.negate_cmp_bool Clt). 
+  destruct (Val.cmp_bool Clt rs##r2 rs##r1) as [[]|]; auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl. unfold Val.cmp. rewrite <- Val.swap_cmp_bool. auto.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. unfold Val.cmp. rewrite (Val.negate_cmp_bool Clt). 
+  destruct (Val.cmp_bool Clt rs##r1 rs##r2) as [[]|]; auto.
 Qed.
 
-Lemma testcond_for_signed_comparison_32_correct:
-  forall c v1 v2 rs m b,
-  Val.cmp_bool c v1 v2 = Some b ->
-  eval_testcond (testcond_for_signed_comparison c)
-                (nextinstr (compare_ints v1 v2 rs m)) = Some b.
+Lemma transl_cond_int32u_correct:
+  forall cmp rd r1 r2 k rs m,
+  exists rs',
+     exec_straight ge fn (transl_cond_int32u cmp rd r1 r2 k) rs m k rs' m
+  /\ rs'#rd = Val.cmpu (Mem.valid_pointer m) cmp rs##r1 rs##r2
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
 Proof.
-  intros. generalize (compare_ints_spec rs v1 v2 m).
-  set (rs' := nextinstr (compare_ints v1 v2 rs m)).
-  intros [A [B [C [D E]]]].
-  destruct v1; destruct v2; simpl in H; inv H.
-  unfold eval_testcond. rewrite A; rewrite B; rewrite C; rewrite D.
-  simpl. unfold Val.cmp, Val.cmpu.
-  rewrite Int.lt_sub_overflow.
-  destruct c; simpl.
-  destruct (Int.eq i i0); auto.
-  destruct (Int.eq i i0); auto.
-  destruct (Int.lt i i0); auto.
-  rewrite Int.not_lt. destruct (Int.lt i i0); simpl; destruct (Int.eq i i0); auto.
-  rewrite (Int.lt_not i i0). destruct (Int.lt i i0); destruct (Int.eq i i0); reflexivity.
-  destruct (Int.lt i i0); reflexivity.
+  intros. destruct cmp; simpl. 
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. unfold Val.cmpu. rewrite <- Val.swap_cmpu_bool.
+  simpl. rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Cle). 
+  destruct (Val.cmpu_bool (Mem.valid_pointer m) Cle rs##r1 rs##r2) as [[]|]; auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl. unfold Val.cmpu. rewrite <- Val.swap_cmpu_bool. auto.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. unfold Val.cmpu. rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Clt). 
+  destruct (Val.cmpu_bool (Mem.valid_pointer m) Clt rs##r1 rs##r2) as [[]|]; auto.
 Qed.
 
-Lemma testcond_for_unsigned_comparison_32_correct:
-  forall c v1 v2 rs m b,
-  Val.cmpu_bool (Mem.valid_pointer m) c v1 v2 = Some b ->
-  eval_testcond (testcond_for_unsigned_comparison c)
-                (nextinstr (compare_ints v1 v2 rs m)) = Some b.
+Lemma transl_cond_int64s_correct:
+  forall cmp rd r1 r2 k rs m,
+  exists rs',
+     exec_straight ge fn (transl_cond_int64s cmp rd r1 r2 k) rs m k rs' m
+  /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs###r1 rs###r2)) rs'#rd
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
 Proof.
-  intros. generalize (compare_ints_spec rs v1 v2 m).
-  set (rs' := nextinstr (compare_ints v1 v2 rs m)).
-  intros [A [B [C [D E]]]].
-  unfold eval_testcond. rewrite A; rewrite B. unfold Val.cmpu, Val.cmp.
-  destruct v1; destruct v2; simpl in H; FuncInv; subst.
-- (* int int *)
-  destruct c; simpl; auto.
-  destruct (Int.eq i i0); reflexivity.
-  destruct (Int.eq i i0); auto.
-  destruct (Int.ltu i i0); auto.
-  rewrite Int.not_ltu. destruct (Int.ltu i i0); simpl; destruct (Int.eq i i0); auto.
-  rewrite (Int.ltu_not i i0). destruct (Int.ltu i i0); destruct (Int.eq i i0); reflexivity.
-  destruct (Int.ltu i i0); reflexivity.
-- (* int ptr *)
-  unfold Val.cmpu_bool; rewrite Heqb1.
-  destruct (Int.eq i Int.zero &&
-    (Mem.valid_pointer m b0 (Ptrofs.unsigned i0) || Mem.valid_pointer m b0 (Ptrofs.unsigned i0 - 1))); try discriminate H.
-  destruct c; simpl in *; inv H; reflexivity.
-- (* ptr int *)
-  unfold Val.cmpu_bool; rewrite Heqb1.
-  destruct (Int.eq i0 Int.zero &&
-    (Mem.valid_pointer m b0 (Ptrofs.unsigned i) || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1))); try discriminate H.
-  destruct c; simpl in *; inv H; reflexivity.
-- (* ptr ptr *)
-  unfold Val.cmpu_bool; rewrite Heqb2.
-  fold (Mem.weak_valid_pointer m b0 (Ptrofs.unsigned i)) in *.
-  fold (Mem.weak_valid_pointer m b1 (Ptrofs.unsigned i0)) in *.
-  destruct (eq_block b0 b1).
-  destruct (Mem.weak_valid_pointer m b0 (Ptrofs.unsigned i) &&
-            Mem.weak_valid_pointer m b1 (Ptrofs.unsigned i0)); inv H.
-  destruct c; simpl; auto.
-  destruct (Ptrofs.eq i i0); auto.
-  destruct (Ptrofs.eq i i0); auto.
-  destruct (Ptrofs.ltu i i0); auto.
-  rewrite Ptrofs.not_ltu. destruct (Ptrofs.ltu i i0); simpl; destruct (Ptrofs.eq i i0); auto.
-  rewrite (Ptrofs.ltu_not i i0). destruct (Ptrofs.ltu i i0); destruct (Ptrofs.eq i i0); reflexivity.
-  destruct (Ptrofs.ltu i i0); reflexivity.
-  destruct (Mem.valid_pointer m b0 (Ptrofs.unsigned i) &&
-            Mem.valid_pointer m b1 (Ptrofs.unsigned i0)); try discriminate H.
-  destruct c; simpl in *; inv H; reflexivity.
+  intros. destruct cmp; simpl. 
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl. destruct (rs###r1); auto. destruct (rs###r2); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl. destruct (rs###r1); auto. destruct (rs###r2); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. unfold Val.cmpl. rewrite <- Val.swap_cmpl_bool.
+  simpl. rewrite (Val.negate_cmpl_bool Clt). 
+  destruct (Val.cmpl_bool Clt rs###r2 rs###r1) as [[]|]; auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl. unfold Val.cmpl. rewrite <- Val.swap_cmpl_bool. auto.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. unfold Val.cmpl. rewrite (Val.negate_cmpl_bool Clt). 
+  destruct (Val.cmpl_bool Clt rs###r1 rs###r2) as [[]|]; auto.
 Qed.
 
-Lemma compare_longs_spec:
-  forall rs v1 v2 m,
-  let rs' := nextinstr (compare_longs v1 v2 rs m) in
-     rs'#ZF = Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq v1 v2)
-  /\ rs'#CF = Val.maketotal (Val.cmplu (Mem.valid_pointer m) Clt v1 v2)
-  /\ rs'#SF = Val.negativel (Val.subl v1 v2)
-  /\ rs'#OF = Val.subl_overflow v1 v2
-  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
+Lemma transl_cond_int64u_correct:
+  forall cmp rd r1 r2 k rs m,
+  exists rs',
+     exec_straight ge fn (transl_cond_int64u cmp rd r1 r2 k) rs m k rs' m
+  /\ rs'#rd = Val.maketotal (Val.cmplu (Mem.valid_pointer m) cmp rs###r1 rs###r2)
+  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. 
 Proof.
-  intros. unfold rs'; unfold compare_longs.
-  split. auto.
-  split. auto.
-  split. auto.
-  split. auto.
-  intros. Simplifs.
+  intros. destruct cmp; simpl. 
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. unfold Val.cmplu. rewrite <- Val.swap_cmplu_bool.
+  simpl. rewrite (Val.negate_cmplu_bool (Mem.valid_pointer m) Cle). 
+  destruct (Val.cmplu_bool (Mem.valid_pointer m) Cle rs###r1 rs###r2) as [[]|]; auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto|auto].
+  split; intros; Simpl. unfold Val.cmplu. rewrite <- Val.swap_cmplu_bool. auto.
+- econstructor; split.
+  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. unfold Val.cmplu. rewrite (Val.negate_cmplu_bool (Mem.valid_pointer m) Clt). 
+  destruct (Val.cmplu_bool (Mem.valid_pointer m) Clt rs###r1 rs###r2) as [[]|]; auto.
 Qed.
 
-Lemma int64_sub_overflow:
-  forall x y,
-  Int.xor (Int.repr (Int64.unsigned (Int64.sub_overflow x y Int64.zero)))
-          (Int.repr (Int64.unsigned (Int64.negative (Int64.sub x y)))) =
-  (if Int64.lt x y then Int.one else Int.zero).
+Lemma transl_condimm_int32s_correct:
+  forall cmp rd r1 n k rs m,
+  r1 <> X31 ->
+  exists rs',
+     exec_straight ge fn (transl_condimm_int32s cmp rd r1 n k) rs m k rs' m
+  /\ Val.lessdef (Val.cmp cmp rs#r1 (Vint n)) rs'#rd
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros.
-  transitivity (Int.repr (Int64.unsigned (if Int64.lt x y then Int64.one else Int64.zero))).
-  rewrite <- (Int64.lt_sub_overflow x y).
-  unfold Int64.sub_overflow, Int64.negative.
-  set (s := Int64.signed x - Int64.signed y - Int64.signed Int64.zero).
-  destruct (zle Int64.min_signed s && zle s Int64.max_signed);
-  destruct (Int64.lt (Int64.sub x y) Int64.zero);
-  auto.
-  destruct (Int64.lt x y); auto.
+  intros. unfold transl_condimm_int32s.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. exploit transl_cond_int32s_correct. intros (rs' & A & B & C).
+  exists rs'; eauto.
+- assert (DFL:
+    exists rs',
+      exec_straight ge fn (loadimm32 X31 n (transl_cond_int32s cmp rd r1 X31 k)) rs m k rs' m
+   /\ Val.lessdef (Val.cmp cmp rs#r1 (Vint n)) rs'#rd
+   /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r).
+  { exploit loadimm32_correct; eauto. intros (rs1 & A1 & B1 & C1).
+    exploit transl_cond_int32s_correct; eauto. intros (rs2 & A2 & B2 & C2).
+    exists rs2; split. 
+    eapply exec_straight_trans. eexact A1. eexact A2. 
+    split. simpl in B2. rewrite B1, C1 in B2 by auto with asmgen. auto.
+    intros; transitivity (rs1 r); auto. }
+  destruct cmp.
++ unfold xorimm32. 
+  exploit (opimm32_correct Pxorw Pxoriw Val.xor); eauto. intros (rs1 & A1 & B1 & C1).
+  exploit transl_cond_int32s_correct; eauto. intros (rs2 & A2 & B2 & C2).
+  exists rs2; split. 
+  eapply exec_straight_trans. eexact A1. eexact A2. 
+  split. simpl in B2; rewrite B1 in B2; simpl in B2. destruct (rs#r1); auto.
+  unfold Val.cmp in B2; simpl in B2; rewrite Int.xor_is_zero in B2. exact B2.
+  intros; transitivity (rs1 r); auto.
++ unfold xorimm32. 
+  exploit (opimm32_correct Pxorw Pxoriw Val.xor); eauto. intros (rs1 & A1 & B1 & C1).
+  exploit transl_cond_int32s_correct; eauto. intros (rs2 & A2 & B2 & C2).
+  exists rs2; split. 
+  eapply exec_straight_trans. eexact A1. eexact A2. 
+  split. simpl in B2; rewrite B1 in B2; simpl in B2. destruct (rs#r1); auto.
+  unfold Val.cmp in B2; simpl in B2; rewrite Int.xor_is_zero in B2. exact B2.
+  intros; transitivity (rs1 r); auto.
++ exploit (opimm32_correct Psltw Psltiw (Val.cmp Clt)); eauto. intros (rs1 & A1 & B1 & C1).
+  exists rs1; split. eexact A1. split; auto. rewrite B1; auto.
++ predSpec Int.eq Int.eq_spec n (Int.repr Int.max_signed).
+* subst n. exploit loadimm32_correct; eauto. intros (rs1 & A1 & B1 & C1).
+  exists rs1; split. eexact A1. split; auto. 
+  unfold Val.cmp; destruct (rs#r1); simpl; auto. rewrite B1. 
+  unfold Int.lt. rewrite zlt_false. auto. 
+  change (Int.signed (Int.repr Int.max_signed)) with Int.max_signed.
+  generalize (Int.signed_range i); lia.
+* exploit (opimm32_correct Psltw Psltiw (Val.cmp Clt)); eauto. intros (rs1 & A1 & B1 & C1).
+  exists rs1; split. eexact A1. split; auto. 
+  rewrite B1. unfold Val.cmp; simpl; destruct (rs#r1); simpl; auto.
+  unfold Int.lt. replace (Int.signed (Int.add n Int.one)) with (Int.signed n + 1).
+  destruct (zlt (Int.signed n) (Int.signed i)).
+  rewrite zlt_false by lia. auto.
+  rewrite zlt_true by lia. auto.
+  rewrite Int.add_signed. symmetry; apply Int.signed_repr. 
+  assert (Int.signed n <> Int.max_signed).
+  { red; intros E. elim H1. rewrite <- (Int.repr_signed n). rewrite E. auto. }
+  generalize (Int.signed_range n); lia.
++ apply DFL.
++ apply DFL.
 Qed.
 
-Lemma testcond_for_signed_comparison_64_correct:
-  forall c v1 v2 rs m b,
-  Val.cmpl_bool c v1 v2 = Some b ->
-  eval_testcond (testcond_for_signed_comparison c)
-                (nextinstr (compare_longs v1 v2 rs m)) = Some b.
+Lemma transl_condimm_int32u_correct:
+  forall cmp rd r1 n k rs m,
+  r1 <> X31 ->
+  exists rs',
+     exec_straight ge fn (transl_condimm_int32u cmp rd r1 n k) rs m k rs' m
+  /\ Val.lessdef (Val.cmpu (Mem.valid_pointer m) cmp rs#r1 (Vint n)) rs'#rd
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros. generalize (compare_longs_spec rs v1 v2 m).
-  set (rs' := nextinstr (compare_longs v1 v2 rs m)).
-  intros [A [B [C [D E]]]].
-  destruct v1; destruct v2; simpl in H; inv H.
-  unfold eval_testcond. rewrite A; rewrite B; rewrite C; rewrite D.
-  simpl; rewrite int64_sub_overflow.
-  destruct c; simpl.
-  destruct (Int64.eq i i0); auto.
-  destruct (Int64.eq i i0); auto.
-  destruct (Int64.lt i i0); auto.
-  rewrite Int64.not_lt. destruct (Int64.lt i i0); simpl; destruct (Int64.eq i i0); auto.
-  rewrite (Int64.lt_not i i0). destruct (Int64.lt i i0); destruct (Int64.eq i i0); reflexivity.
-  destruct (Int64.lt i i0); reflexivity.
+  intros. unfold transl_condimm_int32u.
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. exploit transl_cond_int32u_correct. intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split; auto. rewrite B; auto.
+- assert (DFL:
+    exists rs',
+      exec_straight ge fn (loadimm32 X31 n (transl_cond_int32u cmp rd r1 X31 k)) rs m k rs' m
+   /\ Val.lessdef (Val.cmpu (Mem.valid_pointer m) cmp rs#r1 (Vint n)) rs'#rd
+   /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r).
+  { exploit loadimm32_correct; eauto. intros (rs1 & A1 & B1 & C1).
+    exploit transl_cond_int32u_correct; eauto. intros (rs2 & A2 & B2 & C2).
+    exists rs2; split. 
+    eapply exec_straight_trans. eexact A1. eexact A2. 
+    split. simpl in B2. rewrite B1, C1 in B2 by auto with asmgen. rewrite B2; auto.
+    intros; transitivity (rs1 r); auto. }
+  destruct cmp.
++ apply DFL.
++ apply DFL.
++ exploit (opimm32_correct Psltuw Psltiuw (Val.cmpu (Mem.valid_pointer m) Clt) m); eauto.
+  intros (rs1 & A1 & B1 & C1).
+  exists rs1; split. eexact A1. split; auto. rewrite B1; auto.
++ apply DFL.
++ apply DFL.
++ apply DFL.
 Qed.
 
-Lemma testcond_for_unsigned_comparison_64_correct:
-  forall c v1 v2 rs m b,
-  Val.cmplu_bool (Mem.valid_pointer m) c v1 v2 = Some b ->
-  eval_testcond (testcond_for_unsigned_comparison c)
-                (nextinstr (compare_longs v1 v2 rs m)) = Some b.
+Lemma transl_condimm_int64s_correct:
+  forall cmp rd r1 n k rs m,
+  r1 <> X31 ->
+  exists rs',
+     exec_straight ge fn (transl_condimm_int64s cmp rd r1 n k) rs m k rs' m
+  /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs#r1 (Vlong n))) rs'#rd
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros. generalize (compare_longs_spec rs v1 v2 m).
-  set (rs' := nextinstr (compare_longs v1 v2 rs m)).
-  intros [A [B [C [D E]]]].
-  unfold eval_testcond. rewrite A; rewrite B.
-  destruct v1; destruct v2; simpl in H; FuncInv; subst.
-- (* int int *)
-  destruct c; simpl; auto.
-  destruct (Int64.eq i i0); reflexivity.
-  destruct (Int64.eq i i0); auto.
-  destruct (Int64.ltu i i0); auto.
-  rewrite Int64.not_ltu. destruct (Int64.ltu i i0); simpl; destruct (Int64.eq i i0); auto.
-  rewrite (Int64.ltu_not i i0). destruct (Int64.ltu i i0); destruct (Int64.eq i i0); reflexivity.
-  destruct (Int64.ltu i i0); reflexivity.
-- (* int ptr *)
-  unfold Val.cmplu; simpl; destruct Archi.ptr64; try discriminate.
-  destruct (Int64.eq i Int64.zero &&
-    (Mem.valid_pointer m b0 (Ptrofs.unsigned i0) || Mem.valid_pointer m b0 (Ptrofs.unsigned i0 - 1))) eqn:?; try discriminate H.
-  destruct c; simpl in *; inv H; auto.
-- (* ptr int *)
-  unfold Val.cmplu; simpl; destruct Archi.ptr64; try discriminate.
-  destruct (Int64.eq i0 Int64.zero &&
-    (Mem.valid_pointer m b0 (Ptrofs.unsigned i) || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1))) eqn:?; try discriminate H.
-  destruct c; simpl in *; inv H; auto.
-- (* ptr ptr *)
-  unfold Val.cmplu; simpl; destruct Archi.ptr64; try discriminate H.
-  fold (Mem.weak_valid_pointer m b0 (Ptrofs.unsigned i)) in *.
-  fold (Mem.weak_valid_pointer m b1 (Ptrofs.unsigned i0)) in *.
-  destruct (eq_block b0 b1).
-  destruct (Mem.weak_valid_pointer m b0 (Ptrofs.unsigned i) &&
-            Mem.weak_valid_pointer m b1 (Ptrofs.unsigned i0)); inv H.
-  destruct c; simpl; auto.
-  destruct (Ptrofs.eq i i0); auto.
-  destruct (Ptrofs.eq i i0); auto.
-  destruct (Ptrofs.ltu i i0); auto.
-  rewrite Ptrofs.not_ltu. destruct (Ptrofs.ltu i i0); simpl; destruct (Ptrofs.eq i i0); auto.
-  rewrite (Ptrofs.ltu_not i i0). destruct (Ptrofs.ltu i i0); destruct (Ptrofs.eq i i0); reflexivity.
-  destruct (Ptrofs.ltu i i0); reflexivity.
-  destruct (Mem.valid_pointer m b0 (Ptrofs.unsigned i) &&
-            Mem.valid_pointer m b1 (Ptrofs.unsigned i0)); try discriminate H.
-  destruct c; simpl in *; inv H; reflexivity.
+  intros. unfold transl_condimm_int64s.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+- subst n. exploit transl_cond_int64s_correct. intros (rs' & A & B & C).
+  exists rs'; eauto.
+- assert (DFL:
+    exists rs',
+      exec_straight ge fn (loadimm64 X31 n (transl_cond_int64s cmp rd r1 X31 k)) rs m k rs' m
+   /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs#r1 (Vlong n))) rs'#rd
+   /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r).
+  { exploit loadimm64_correct; eauto. intros (rs1 & A1 & B1 & C1).
+    exploit transl_cond_int64s_correct; eauto. intros (rs2 & A2 & B2 & C2).
+    exists rs2; split. 
+    eapply exec_straight_trans. eexact A1. eexact A2. 
+    split. simpl in B2. rewrite B1, C1 in B2 by auto with asmgen. auto.
+    intros; transitivity (rs1 r); auto. }
+  destruct cmp.
++ unfold xorimm64. 
+  exploit (opimm64_correct Pxorl Pxoril Val.xorl); eauto. intros (rs1 & A1 & B1 & C1).
+  exploit transl_cond_int64s_correct; eauto. intros (rs2 & A2 & B2 & C2).
+  exists rs2; split. 
+  eapply exec_straight_trans. eexact A1. eexact A2. 
+  split. simpl in B2; rewrite B1 in B2; simpl in B2. destruct (rs#r1); auto.
+  unfold Val.cmpl in B2; simpl in B2; rewrite Int64.xor_is_zero in B2. exact B2.
+  intros; transitivity (rs1 r); auto.
++ unfold xorimm64. 
+  exploit (opimm64_correct Pxorl Pxoril Val.xorl); eauto. intros (rs1 & A1 & B1 & C1).
+  exploit transl_cond_int64s_correct; eauto. intros (rs2 & A2 & B2 & C2).
+  exists rs2; split. 
+  eapply exec_straight_trans. eexact A1. eexact A2. 
+  split. simpl in B2; rewrite B1 in B2; simpl in B2. destruct (rs#r1); auto.
+  unfold Val.cmpl in B2; simpl in B2; rewrite Int64.xor_is_zero in B2. exact B2.
+  intros; transitivity (rs1 r); auto.
++ exploit (opimm64_correct Psltl Psltil (fun v1 v2 => Val.maketotal (Val.cmpl Clt v1 v2))); eauto. intros (rs1 & A1 & B1 & C1).
+  exists rs1; split. eexact A1. split; auto. rewrite B1; auto.
++ predSpec Int64.eq Int64.eq_spec n (Int64.repr Int64.max_signed).
+* subst n. exploit loadimm32_correct; eauto. intros (rs1 & A1 & B1 & C1).
+  exists rs1; split. eexact A1. split; auto. 
+  unfold Val.cmpl; destruct (rs#r1); simpl; auto. rewrite B1. 
+  unfold Int64.lt. rewrite zlt_false. auto. 
+  change (Int64.signed (Int64.repr Int64.max_signed)) with Int64.max_signed.
+  generalize (Int64.signed_range i); lia.
+* exploit (opimm64_correct Psltl Psltil (fun v1 v2 => Val.maketotal (Val.cmpl Clt v1 v2))); eauto. intros (rs1 & A1 & B1 & C1).
+  exists rs1; split. eexact A1. split; auto. 
+  rewrite B1. unfold Val.cmpl; simpl; destruct (rs#r1); simpl; auto.
+  unfold Int64.lt. replace (Int64.signed (Int64.add n Int64.one)) with (Int64.signed n + 1).
+  destruct (zlt (Int64.signed n) (Int64.signed i)).
+  rewrite zlt_false by lia. auto.
+  rewrite zlt_true by lia. auto.
+  rewrite Int64.add_signed. symmetry; apply Int64.signed_repr. 
+  assert (Int64.signed n <> Int64.max_signed).
+  { red; intros E. elim H1. rewrite <- (Int64.repr_signed n). rewrite E. auto. }
+  generalize (Int64.signed_range n); lia.
++ apply DFL.
++ apply DFL.
 Qed.
 
-Lemma compare_floats_spec:
-  forall rs n1 n2,
-  let rs' := nextinstr (compare_floats (Vfloat n1) (Vfloat n2) rs) in
-     rs'#ZF = Val.of_bool (Float.cmp Ceq n1 n2 || negb (Float.ordered n1 n2))
-  /\ rs'#CF = Val.of_bool (negb (Float.cmp Cge n1 n2))
-  /\ rs'#PF = Val.of_bool (negb (Float.ordered n1 n2))
-  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
+Lemma transl_condimm_int64u_correct:
+  forall cmp rd r1 n k rs m,
+  r1 <> X31 ->
+  exists rs',
+     exec_straight ge fn (transl_condimm_int64u cmp rd r1 n k) rs m k rs' m
+  /\ Val.lessdef (Val.maketotal (Val.cmplu (Mem.valid_pointer m) cmp rs#r1 (Vlong n))) rs'#rd
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros. unfold rs'; unfold compare_floats.
-  split. auto.
-  split. auto.
-  split. auto.
-  intros. Simplifs.
-Qed.
-
-Lemma compare_floats32_spec:
-  forall rs n1 n2,
-  let rs' := nextinstr (compare_floats32 (Vsingle n1) (Vsingle n2) rs) in
-     rs'#ZF = Val.of_bool (Float32.cmp Ceq n1 n2 || negb (Float32.ordered n1 n2))
-  /\ rs'#CF = Val.of_bool (negb (Float32.cmp Cge n1 n2))
-  /\ rs'#PF = Val.of_bool (negb (Float32.ordered n1 n2))
-  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
+  intros. unfold transl_condimm_int64u.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+- subst n. exploit transl_cond_int64u_correct. intros (rs' & A & B & C).
+  exists rs'; split. eexact A. split; auto. rewrite B; auto.
+- assert (DFL:
+    exists rs',
+      exec_straight ge fn (loadimm64 X31 n (transl_cond_int64u cmp rd r1 X31 k)) rs m k rs' m
+   /\ Val.lessdef (Val.maketotal (Val.cmplu (Mem.valid_pointer m) cmp rs#r1 (Vlong n))) rs'#rd
+   /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r).
+  { exploit loadimm64_correct; eauto. intros (rs1 & A1 & B1 & C1).
+    exploit transl_cond_int64u_correct; eauto. intros (rs2 & A2 & B2 & C2).
+    exists rs2; split. 
+    eapply exec_straight_trans. eexact A1. eexact A2. 
+    split. simpl in B2. rewrite B1, C1 in B2 by auto with asmgen. rewrite B2; auto.
+    intros; transitivity (rs1 r); auto. }
+  destruct cmp.
++ apply DFL.
++ apply DFL.
++ exploit (opimm64_correct Psltul Psltiul (fun v1 v2 => Val.maketotal (Val.cmplu (Mem.valid_pointer m) Clt v1 v2)) m); eauto.
+  intros (rs1 & A1 & B1 & C1).
+  exists rs1; split. eexact A1. split; auto. rewrite B1; auto.
++ apply DFL.
++ apply DFL.
++ apply DFL.
+   Qed.
+
+Lemma transl_cond_op_correct:
+  forall cond rd args k c rs m,
+  transl_cond_op cond rd args k = OK c ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ Val.lessdef (Val.of_optbool (eval_condition cond (map rs (map preg_of args)) m)) rs'#rd
+  /\ forall r, r <> PC -> r <> rd -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros. unfold rs'; unfold compare_floats32.
-  split. auto.
-  split. auto.
-  split. auto.
-  intros. Simplifs.
-Qed.
-
-Definition eval_extcond (xc: extcond) (rs: regset) : option bool :=
-  match xc with
-  | Cond_base c =>
-      eval_testcond c rs
-  | Cond_and c1 c2 =>
-      match eval_testcond c1 rs, eval_testcond c2 rs with
-      | Some b1, Some b2 => Some (b1 && b2)
-      | _, _ => None
-      end
-  | Cond_or c1 c2 =>
-      match eval_testcond c1 rs, eval_testcond c2 rs with
-      | Some b1, Some b2 => Some (b1 || b2)
-      | _, _ => None
-      end
-  end.
+  assert (MKTOT: forall ob, Val.of_optbool ob = Val.maketotal (option_map Val.of_bool ob)).
+  { destruct ob as [[]|]; reflexivity. }
+  intros until m; intros TR.
+  destruct cond; simpl in TR; ArgsInv.
++ (* cmp *)
+  exploit transl_cond_int32s_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto.
++ (* cmpu *)
+  exploit transl_cond_int32u_correct; eauto. intros (rs' & A & B & C).
+  exists rs'; repeat split; eauto. rewrite B; auto.
++ (* cmpimm *)
+  apply transl_condimm_int32s_correct; eauto with asmgen.
++ (* cmpuimm *)
+  apply transl_condimm_int32u_correct; eauto with asmgen.
++ (* cmpl *)
+  exploit transl_cond_int64s_correct; eauto. intros (rs' & A & B & C).
+  exists rs'; repeat split; eauto. rewrite MKTOT; eauto.
++ (* cmplu *)
+  exploit transl_cond_int64u_correct; eauto. intros (rs' & A & B & C).
+   exists rs'; repeat split; eauto. rewrite B, MKTOT; eauto.
++ (* cmplimm *)
+  exploit transl_condimm_int64s_correct; eauto. instantiate (1 := x); eauto with asmgen. 
+  intros (rs' & A & B & C).
+  exists rs'; repeat split; eauto. rewrite MKTOT; eauto.
++ (* cmpluimm *)
+  exploit transl_condimm_int64u_correct; eauto. instantiate (1 := x); eauto with asmgen. 
+  intros (rs' & A & B & C).
+   exists rs'; repeat split; eauto. rewrite MKTOT; eauto.
++ (* cmpf *)
+  destruct (transl_cond_float c0 rd x x0) as [insn normal] eqn:TR.
+  fold (Val.cmpf c0 (rs x) (rs x0)).
+  set (v := Val.cmpf c0 (rs x) (rs x0)).
+  destruct normal; inv EQ2.
+* econstructor; split.
+  apply exec_straight_one. eapply transl_cond_float_correct with (v := v); eauto. auto.
+  split; intros; Simpl.
+* econstructor; split.
+  eapply exec_straight_two.
+  eapply transl_cond_float_correct with (v := Val.notbool v); eauto.
+  simpl; reflexivity.
+  auto. auto.
+  split; intros; Simpl. unfold v, Val.cmpf. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; auto.
++ (* notcmpf *)
+  destruct (transl_cond_float c0 rd x x0) as [insn normal] eqn:TR.
+  rewrite Val.notbool_negb_3. fold (Val.cmpf c0 (rs x) (rs x0)).
+  set (v := Val.cmpf c0 (rs x) (rs x0)).
+  destruct normal; inv EQ2.
+* econstructor; split.
+  eapply exec_straight_two.
+  eapply transl_cond_float_correct with (v := v); eauto.
+  simpl; reflexivity.
+  auto. auto.
+  split; intros; Simpl. unfold v, Val.cmpf. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; auto.
+* econstructor; split.
+  apply exec_straight_one. eapply transl_cond_float_correct with (v := Val.notbool v); eauto. auto.
+  split; intros; Simpl.
++ (* cmpfs *)
+  destruct (transl_cond_single c0 rd x x0) as [insn normal] eqn:TR.
+  fold (Val.cmpfs c0 (rs x) (rs x0)).
+  set (v := Val.cmpfs c0 (rs x) (rs x0)).
+  destruct normal; inv EQ2.
+* econstructor; split.
+  apply exec_straight_one. eapply transl_cond_single_correct with (v := v); eauto. auto.
+  split; intros; Simpl.
+* econstructor; split.
+  eapply exec_straight_two.
+  eapply transl_cond_single_correct with (v := Val.notbool v); eauto.
+  simpl; reflexivity.
+  auto. auto.
+  split; intros; Simpl. unfold v, Val.cmpfs. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; auto.
++ (* notcmpfs *)
+  destruct (transl_cond_single c0 rd x x0) as [insn normal] eqn:TR.
+  rewrite Val.notbool_negb_3. fold (Val.cmpfs c0 (rs x) (rs x0)).
+  set (v := Val.cmpfs c0 (rs x) (rs x0)).
+  destruct normal; inv EQ2.
+* econstructor; split.
+  eapply exec_straight_two.
+  eapply transl_cond_single_correct with (v := v); eauto.
+  simpl; reflexivity.
+  auto. auto.
+  split; intros; Simpl. unfold v, Val.cmpfs. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; auto.
+* econstructor; split.
+  apply exec_straight_one. eapply transl_cond_single_correct with (v := Val.notbool v); eauto. auto.
+  split; intros; Simpl.
+   Qed.
 
-Definition swap_floats {A: Type} (c: comparison) (n1 n2: A) : A :=
-  match c with
-  | Clt | Cle => n2
-  | Ceq | Cne | Cgt | Cge => n1
-  end.
+(** Some arithmetic properties. *)
 
-Lemma testcond_for_float_comparison_correct:
-  forall c n1 n2 rs,
-  eval_extcond (testcond_for_condition (Ccompf c))
-               (nextinstr (compare_floats (Vfloat (swap_floats c n1 n2))
-                                          (Vfloat (swap_floats c n2 n1)) rs)) =
-  Some(Float.cmp c n1 n2).
+Remark cast32unsigned_from_cast32signed:
+  forall i, Int64.repr (Int.unsigned i) = Int64.zero_ext 32 (Int64.repr (Int.signed i)).
 Proof.
-  intros.
-  generalize (compare_floats_spec rs (swap_floats c n1 n2) (swap_floats c n2 n1)).
-  set (rs' := nextinstr (compare_floats (Vfloat (swap_floats c n1 n2))
-                                        (Vfloat (swap_floats c n2 n1)) rs)).
-  intros [A [B [C D]]].
-  unfold eval_extcond, eval_testcond. rewrite A; rewrite B; rewrite C.
-  destruct c; simpl.
-- (* eq *)
-Transparent Float.cmp Float.ordered.
-  unfold Float.ordered, Float.cmp; destruct (Float.compare n1 n2) as [[]|]; reflexivity.
-- (* ne *)
-  unfold Float.ordered, Float.cmp; destruct (Float.compare n1 n2) as [[]|]; reflexivity.
-- (* lt *)
-  rewrite <- (Float.cmp_swap Clt n2 n1). simpl. unfold Float.ordered. 
-  destruct (Float.compare n2 n1) as [[]|]; reflexivity.
-- (* le *)
-  rewrite <- (Float.cmp_swap Cge n1 n2). simpl.
-  destruct (Float.compare n1 n2) as [[]|]; auto.
-- (* gt *)
- unfold Float.ordered, Float.cmp; destruct (Float.compare n1 n2) as [[]|]; reflexivity.
-- (* ge *)
-  destruct (Float.cmp Cge n1 n2); auto.
-Opaque Float.cmp Float.ordered.
+  intros. apply Int64.same_bits_eq; intros. 
+  rewrite Int64.bits_zero_ext, !Int64.testbit_repr by tauto.
+  rewrite Int.bits_signed by tauto. fold (Int.testbit i i0).
+  change Int.zwordsize with 32.
+  destruct (zlt i0 32). auto. apply Int.bits_above. auto.
 Qed.
 
-Lemma testcond_for_neg_float_comparison_correct:
-  forall c n1 n2 rs,
-  eval_extcond (testcond_for_condition (Cnotcompf c))
-               (nextinstr (compare_floats (Vfloat (swap_floats c n1 n2))
-                                          (Vfloat (swap_floats c n2 n1)) rs)) =
-  Some(negb(Float.cmp c n1 n2)).
-Proof.
-  intros.
-  generalize (compare_floats_spec rs (swap_floats c n1 n2) (swap_floats c n2 n1)).
-  set (rs' := nextinstr (compare_floats (Vfloat (swap_floats c n1 n2))
-                                        (Vfloat (swap_floats c n2 n1)) rs)).
-  intros [A [B [C D]]].
-  unfold eval_extcond, eval_testcond. rewrite A; rewrite B; rewrite C.
-  destruct c; simpl.
-- (* eq *)
-Transparent Float.cmp Float.ordered.
-  unfold Float.ordered, Float.cmp; destruct (Float.compare n1 n2) as [[]|]; reflexivity.
-- (* ne *)
-  unfold Float.ordered, Float.cmp; destruct (Float.compare n1 n2) as [[]|]; reflexivity.
-- (* lt *)
-  rewrite <- (Float.cmp_swap Clt n2 n1). simpl. unfold Float.ordered. 
-  destruct (Float.compare n2 n1) as [[]|]; reflexivity.
-- (* le *)
-  rewrite <- (Float.cmp_swap Cge n1 n2). simpl.
-  destruct (Float.compare n1 n2) as [[]|]; auto.
-- (* gt *)
- unfold Float.ordered, Float.cmp; destruct (Float.compare n1 n2) as [[]|]; reflexivity.
-- (* ge *)
-  destruct (Float.cmp Cge n1 n2); auto.
-Opaque Float.cmp Float.ordered.
-Qed.
+(* Translation of arithmetic operations *)
 
-Lemma testcond_for_float32_comparison_correct:
-  forall c n1 n2 rs,
-  eval_extcond (testcond_for_condition (Ccompfs c))
-               (nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
-                                            (Vsingle (swap_floats c n2 n1)) rs)) =
-  Some(Float32.cmp c n1 n2).
-Proof.
-  intros.
-  generalize (compare_floats32_spec rs (swap_floats c n1 n2) (swap_floats c n2 n1)).
-  set (rs' := nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
-                                        (Vsingle (swap_floats c n2 n1)) rs)).
-  intros [A [B [C D]]].
-  unfold eval_extcond, eval_testcond. rewrite A; rewrite B; rewrite C.
-  destruct c; simpl.
-- (* eq *)
-Transparent Float32.cmp Float32.ordered.
-  unfold Float32.ordered, Float32.cmp; destruct (Float32.compare n1 n2) as [[]|]; reflexivity.
-- (* ne *)
-  unfold Float32.ordered, Float32.cmp; destruct (Float32.compare n1 n2) as [[]|]; reflexivity.
-- (* lt *)
-  rewrite <- (Float32.cmp_swap Clt n2 n1). simpl. unfold Float32.ordered. 
-  destruct (Float32.compare n2 n1) as [[]|]; reflexivity.
-- (* le *)
-  rewrite <- (Float32.cmp_swap Cge n1 n2). simpl.
-  destruct (Float32.compare n1 n2) as [[]|]; auto.
-- (* gt *)
- unfold Float32.ordered, Float32.cmp; destruct (Float32.compare n1 n2) as [[]|]; reflexivity.
-- (* ge *)
-  destruct (Float32.cmp Cge n1 n2); auto.
-Opaque Float32.cmp Float32.ordered.
-Qed.
+Ltac SimplEval H :=
+  match type of H with
+  | Some _ = None _ => discriminate
+  | Some _ = Some _ => inv H
+  | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity)
+end.
 
-Lemma testcond_for_neg_float32_comparison_correct:
-  forall c n1 n2 rs,
-  eval_extcond (testcond_for_condition (Cnotcompfs c))
-               (nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
-                                            (Vsingle (swap_floats c n2 n1)) rs)) =
-  Some(negb(Float32.cmp c n1 n2)).
-Proof.
-  intros.
-  generalize (compare_floats32_spec rs (swap_floats c n1 n2) (swap_floats c n2 n1)).
-  set (rs' := nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
-                                          (Vsingle (swap_floats c n2 n1)) rs)).
-  intros [A [B [C D]]].
-  unfold eval_extcond, eval_testcond. rewrite A; rewrite B; rewrite C.
-  destruct c; simpl.
-- (* eq *)
-Transparent Float32.cmp Float32.ordered.
-  unfold Float32.ordered, Float32.cmp; destruct (Float32.compare n1 n2) as [[]|]; reflexivity.
-- (* ne *)
-  unfold Float32.ordered, Float32.cmp; destruct (Float32.compare n1 n2) as [[]|]; reflexivity.
-- (* lt *)
-  rewrite <- (Float32.cmp_swap Clt n2 n1). simpl. unfold Float32.ordered. 
-  destruct (Float32.compare n2 n1) as [[]|]; reflexivity.
-- (* le *)
-  rewrite <- (Float32.cmp_swap Cge n1 n2). simpl.
-  destruct (Float32.compare n1 n2) as [[]|]; auto.
-- (* gt *)
- unfold Float32.ordered, Float32.cmp; destruct (Float32.compare n1 n2) as [[]|]; reflexivity.
-- (* ge *)
-  destruct (Float32.cmp Cge n1 n2); auto.
-Opaque Float32.cmp Float32.ordered.
-Qed.
+Ltac TranslOpSimpl :=
+  econstructor; split;
+  [ apply exec_straight_one; [simpl; eauto | reflexivity]
+  | split; [ apply Val.lessdef_same; Simpl; fail | intros; Simpl; fail ] ].
 
-Remark swap_floats_commut:
-  forall (A B: Type) c (f: A -> B) x y, swap_floats c (f x) (f y) = f (swap_floats c x y).
+Lemma transl_op_correct:
+  forall op args res k (rs: regset) m v c,
+  transl_op op args res k = OK c ->
+  eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ Val.lessdef v rs'#(preg_of res)
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
 Proof.
-  intros. destruct c; auto.
+  assert (SAME: forall v1 v2, v1 = v2 -> Val.lessdef v2 v1). { intros; subst; auto. }
+Opaque Int.eq.
+  intros until c; intros TR EV.
+  unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl.
+  (* move *)
+  { destruct (preg_of res), (preg_of m0); inv TR; TranslOpSimpl. }
+  (* intconst *)
+  { exploit loadimm32_correct; eauto. intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* longconst *)
+  { exploit loadimm64_correct; eauto. intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* floatconst *)
+  { destruct (Float.eq_dec n Float.zero).
+    + subst n. econstructor; split. 
+      apply exec_straight_one. simpl; eauto. auto.
+      split; intros; Simpl. 
+    + econstructor; split. 
+      apply exec_straight_one. simpl; eauto. auto.
+      split; intros; Simpl. }
+  (* singleconst *)
+  { destruct (Float32.eq_dec n Float32.zero).
+    + subst n. econstructor; split. 
+      apply exec_straight_one. simpl; eauto. auto.
+      split; intros; Simpl. 
+    + econstructor; split. 
+      apply exec_straight_one. simpl; eauto. auto.
+      split; intros; Simpl. }
+  (* addrsymbol *)
+  { destruct (Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)).
+    + set (rs1 := nextinstr (rs#x <- (Genv.symbol_address ge id Ptrofs.zero))).
+      exploit (addptrofs_correct x x ofs k rs1 m); eauto with asmgen. 
+      intros (rs2 & A & B & C).
+      exists rs2; split. 
+      apply exec_straight_step with rs1 m; auto.
+      split. replace ofs with (Ptrofs.add Ptrofs.zero ofs) by (apply Ptrofs.add_zero_l). 
+      rewrite Genv.shift_symbol_address.
+      replace (rs1 x) with (Genv.symbol_address ge id Ptrofs.zero) in B by (unfold rs1; Simpl).
+      exact B.
+      intros. rewrite C by eauto with asmgen. unfold rs1; Simpl.  
+    + TranslOpSimpl. }
+  (* stackoffset *)
+  { exploit addptrofs_correct. instantiate (1 := X2); auto with asmgen. intros (rs' & A & B & C).
+  exists rs'; split; eauto. auto with asmgen. }
+  (* cast8signed *)
+  { econstructor; split.
+  eapply exec_straight_two. simpl;eauto. simpl;eauto. auto. auto.
+  split; intros; Simpl.
+  assert (A: Int.ltu (Int.repr 24) Int.iwordsize = true) by auto.
+  destruct (rs x0); auto; simpl. rewrite A; simpl. rewrite A. 
+  apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity. }
+  (* cast16signed *)
+  { econstructor; split.
+  eapply exec_straight_two. simpl;eauto. simpl;eauto. auto. auto.
+  split; intros; Simpl.
+  assert (A: Int.ltu (Int.repr 16) Int.iwordsize = true) by auto.
+  destruct (rs x0); auto; simpl. rewrite A; simpl. rewrite A. 
+  apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity. }
+  (* addimm *)
+  { exploit (opimm32_correct Paddw Paddiw Val.add); auto. instantiate (1 := x0); eauto with asmgen.
+  intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* andimm *)
+  { exploit (opimm32_correct Pandw Pandiw Val.and); auto. instantiate (1 := x0); eauto with asmgen.
+  intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* orimm *)
+  exploit (opimm32_correct Porw Poriw Val.or); auto. instantiate (1 := x0); eauto with asmgen.
+  { intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* xorimm *)
+  { exploit (opimm32_correct Pxorw Pxoriw Val.xor); auto. instantiate (1 := x0); eauto with asmgen.
+  intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* shrximm *)
+  { destruct (Val.shrx (rs x0) (Vint n)) eqn:TOTAL; cbn.
+    {
+    exploit Val.shrx_shr_3; eauto. intros E; subst v.
+    destruct (Int.eq n Int.zero).
+  + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+    split; intros; Simpl. 
+  + destruct (Int.eq n Int.one).
+    * econstructor; split.
+      eapply exec_straight_step. simpl; reflexivity. auto.
+      eapply exec_straight_step. simpl; reflexivity. auto.
+      apply exec_straight_one. simpl; reflexivity. auto.
+      split; intros; Simpl.
+    * change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
+      econstructor; split.
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      apply exec_straight_one. simpl; reflexivity. auto. 
+      split; intros; Simpl.
+    }
+    destruct (Int.eq n Int.zero).
+  + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+    split; intros; Simpl. 
+  + destruct (Int.eq n Int.one).
+    * econstructor; split.
+      eapply exec_straight_step. simpl; reflexivity. auto.
+      eapply exec_straight_step. simpl; reflexivity. auto.
+      apply exec_straight_one. simpl; reflexivity. auto.
+      split; intros; Simpl.
+    * change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n).
+      econstructor; split.
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      apply exec_straight_one. simpl; reflexivity. auto. 
+      split; intros; Simpl. }
+  (* longofintu *)
+  { econstructor; split.
+  eapply exec_straight_three. simpl; eauto. simpl; eauto. simpl; eauto. auto. auto. auto.
+  split; intros; Simpl. destruct (rs x0); auto. simpl. 
+  assert (A: Int.ltu (Int.repr 32) Int64.iwordsize' = true) by auto.
+  rewrite A; simpl. rewrite A. apply Val.lessdef_same. f_equal.
+  rewrite cast32unsigned_from_cast32signed. apply Int64.zero_ext_shru_shl. compute; auto. }
+  (* addlimm *)
+  { exploit (opimm64_correct Paddl Paddil Val.addl); auto. instantiate (1 := x0); eauto with asmgen.
+  intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* andimm *)
+  { exploit (opimm64_correct Pandl Pandil Val.andl); auto. instantiate (1 := x0); eauto with asmgen.
+  intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* orimm *)
+  { exploit (opimm64_correct Porl Poril Val.orl); auto. instantiate (1 := x0); eauto with asmgen.
+  intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* xorimm *)
+  { exploit (opimm64_correct Pxorl Pxoril Val.xorl); auto. instantiate (1 := x0); eauto with asmgen.
+  intros (rs' & A & B & C).
+  exists rs'; split; eauto. rewrite B; auto with asmgen. }
+  (* shrxlimm *)
+  { destruct (Val.shrxl (rs x0) (Vint n)) eqn:TOTAL.
+    {
+     exploit Val.shrxl_shrl_3; eauto. intros E; subst v.
+    destruct (Int.eq n Int.zero).
+  + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+    split; intros; Simpl. 
+  + destruct (Int.eq n Int.one).
+    * econstructor; split.
+      eapply exec_straight_step. simpl; reflexivity. auto.
+      eapply exec_straight_step. simpl; reflexivity. auto.
+      apply exec_straight_one. simpl; reflexivity. auto.
+      split; intros; Simpl.
+
+    * change (Int.repr 64) with Int64.iwordsize'. set (n' := Int.sub Int64.iwordsize' n).
+      econstructor; split.
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      apply exec_straight_one. simpl; reflexivity. auto. 
+      split; intros; Simpl.
+    }
+    destruct (Int.eq n Int.zero).
+  + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+    split; intros; Simpl. 
+  + destruct (Int.eq n Int.one).
+    * econstructor; split.
+      eapply exec_straight_step. simpl; reflexivity. auto.
+      eapply exec_straight_step. simpl; reflexivity. auto.
+      apply exec_straight_one. simpl; reflexivity. auto.
+      split; intros; Simpl.
+
+    * change (Int.repr 64) with Int64.iwordsize'. set (n' := Int.sub Int64.iwordsize' n).
+      econstructor; split.
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      eapply exec_straight_step. simpl; reflexivity. auto. 
+      apply exec_straight_one. simpl; reflexivity. auto. 
+      split; intros; Simpl. }
+  (* cond *)
+  { exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C).
+     exists rs'; split. eexact A. eauto with asmgen. }
+  (* Expanded instructions from RTL *)
+  9,10,19,20:
+    econstructor; split; try apply exec_straight_one; simpl; eauto;
+    split; intros; Simpl; try destruct (rs x0);
+    try rewrite Int64.add_commut;
+    try rewrite Int.add_commut; auto;
+    try rewrite Int64.and_commut;
+    try rewrite Int.and_commut; auto;
+    try rewrite Int64.or_commut;
+    try rewrite Int.or_commut; auto.
+  1-16:
+    destruct optR as [[]|]; try discriminate;
+    unfold apply_bin_oreg_ireg0, apply_bin_oreg in *; try inv EQ3; try inv EQ2;
+    try destruct (Int.eq _ _) eqn:A; try inv H0;
+    try destruct (Int64.eq _ _) eqn:A; try inv H1;
+    econstructor; split; try apply exec_straight_one; simpl; eauto;
+    split; intros; Simpl;
+    try apply Int.same_if_eq in A; subst;
+    try apply Int64.same_if_eq in A; subst;
+    unfold get_sp;
+    try destruct (rs x0); auto;
+    try destruct (rs x1); auto;
+    try destruct (rs X2); auto;
+    try destruct Archi.ptr64 eqn:B;
+    try fold (Val.add (Vint Int.zero) (get_sp (rs X2)));
+    try fold (Val.addl (Vlong Int64.zero) (get_sp (rs X2)));
+    try rewrite Val.add_commut; auto;
+    try rewrite Val.addl_commut; auto;
+    try rewrite Int.add_commut; auto;
+    try rewrite Int64.add_commut; auto;
+    replace (Ptrofs.of_int Int.zero) with (Ptrofs.zero) by auto;
+    replace (Ptrofs.of_int64 Int64.zero) with (Ptrofs.zero) by auto;
+    try rewrite Ptrofs.add_zero; auto.
+  (* mayundef *)
+  { destruct (ireg_eq x x0); inv EQ2;
+    econstructor; split;
+    try apply exec_straight_one; simpl; eauto;
+    split; unfold eval_may_undef;
+    destruct mu eqn:EQMU; simpl; intros; Simpl; auto.
+    all:
+      destruct (rs (preg_of m0)) eqn:EQM0; simpl; auto;
+      destruct (rs x0); simpl; auto; Simpl;
+      try destruct (Int.ltu _ _); simpl;
+      Simpl; auto. }
+  (* select *)
+  { econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+  split; intros; Simpl.
+  apply Val.lessdef_normalize. }
 Qed.
 
-Remark compare_floats_inv:
-  forall vx vy rs r,
-  r <> CR ZF -> r <> CR CF -> r <> CR PF -> r <> CR SF -> r <> CR OF ->
-  compare_floats vx vy rs r = rs r.
-Proof.
-  intros.
-  assert (DFL: undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs r = rs r).
-    simpl. Simplifs.
-  unfold compare_floats; destruct vx; destruct vy; auto. Simplifs.
-Qed.
+(** Memory accesses *)
 
-Remark compare_floats32_inv:
-  forall vx vy rs r,
-  r <> CR ZF -> r <> CR CF -> r <> CR PF -> r <> CR SF -> r <> CR OF ->
-  compare_floats32 vx vy rs r = rs r.
+Lemma indexed_memory_access_correct:
+  forall mk_instr base ofs k rs m,
+  base <> X31 ->
+  exists base' ofs' rs',
+     exec_straight_opt ge fn (indexed_memory_access mk_instr base ofs k) rs m
+                       (mk_instr base' ofs' :: k) rs' m
+  /\ Val.offset_ptr rs'#base' (eval_offset ge ofs') = Val.offset_ptr rs#base ofs
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros.
-  assert (DFL: undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs r = rs r).
-    simpl. Simplifs.
-  unfold compare_floats32; destruct vx; destruct vy; auto. Simplifs.
+  unfold indexed_memory_access; intros.
+  destruct Archi.ptr64 eqn:SF.
+- generalize (make_immed64_sound (Ptrofs.to_int64 ofs)); intros EQ.
+  destruct (make_immed64 (Ptrofs.to_int64 ofs)).
++ econstructor; econstructor; econstructor; split.
+  apply exec_straight_opt_refl. 
+  split; auto. simpl. subst imm. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
++ econstructor; econstructor; econstructor; split. 
+  constructor. eapply exec_straight_two. 
+  simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. destruct (rs base); auto; simpl. rewrite SF. simpl.
+  rewrite Ptrofs.add_assoc. f_equal. f_equal. 
+  rewrite <- (Ptrofs.of_int64_to_int64 SF ofs). rewrite EQ. 
+  symmetry; auto with ptrofs.
++ econstructor; econstructor; econstructor; split. 
+  constructor. eapply exec_straight_two. 
+  simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. unfold eval_offset. destruct (rs base); auto; simpl. rewrite SF. simpl.
+  rewrite Ptrofs.add_zero. subst imm. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
+- generalize (make_immed32_sound (Ptrofs.to_int ofs)); intros EQ.
+  destruct (make_immed32 (Ptrofs.to_int ofs)).
++ econstructor; econstructor; econstructor; split.
+  apply exec_straight_opt_refl. 
+  split; auto. simpl. subst imm. rewrite Ptrofs.of_int_to_int by auto. auto.
++ econstructor; econstructor; econstructor; split. 
+  constructor. eapply exec_straight_two. 
+  simpl; eauto. simpl; eauto. auto. auto. 
+  split; intros; Simpl. destruct (rs base); auto; simpl. rewrite SF. simpl.
+  rewrite Ptrofs.add_assoc. f_equal. f_equal. 
+  rewrite <- (Ptrofs.of_int_to_int SF ofs). rewrite EQ. 
+  symmetry; auto with ptrofs.
 Qed.
 
-Lemma transl_cond_correct:
-  forall cond args k c rs m,
-  transl_cond cond args k = OK c ->
+Lemma indexed_load_access_correct:
+  forall chunk (mk_instr: ireg -> offset -> instruction) rd m,
+  (forall base ofs rs,
+     exec_instr ge fn (mk_instr base ofs) rs m = exec_load ge chunk rs m rd base ofs) ->
+  forall (base: ireg) ofs k (rs: regset) v,
+  Mem.loadv chunk m (Val.offset_ptr rs#base ofs) = Some v ->
+  base <> X31 -> rd <> PC ->
   exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ match eval_condition cond (map rs (map preg_of args)) m with
-     | None => True
-     | Some b => eval_extcond (testcond_for_condition cond) rs' = Some b
-              /\ eval_extcond (testcond_for_condition (negate_condition cond)) rs' = Some (negb b)
-     end
-  /\ forall r, data_preg r = true -> rs'#r = rs r.
+     exec_straight ge fn (indexed_memory_access mk_instr base ofs k) rs m k rs' m
+  /\ rs'#rd = v
+  /\ forall r, r <> PC -> r <> X31 -> r <> rd -> rs'#r = rs#r.
 Proof.
-  unfold transl_cond; intros.
-  destruct cond; repeat (destruct args; try discriminate); monadInv H.
-- (* comp *)
-  simpl. rewrite (ireg_of_eq _ _ EQ). rewrite (ireg_of_eq _ _ EQ1).
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto.
-  split. destruct (Val.cmp_bool c0 (rs x) (rs x0)) eqn:?; auto. split.
-  eapply testcond_for_signed_comparison_32_correct; eauto.
-  eapply testcond_for_signed_comparison_32_correct; eauto.
-  rewrite Val.negate_cmp_bool, Heqo; auto.
-  intros. unfold compare_ints. Simplifs.
-- (* compu *)
-  simpl. rewrite (ireg_of_eq _ _ EQ). rewrite (ireg_of_eq _ _ EQ1).
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto.
-  split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (rs x0)) eqn:?; auto. split.
-  eapply testcond_for_unsigned_comparison_32_correct; eauto.
-  eapply testcond_for_unsigned_comparison_32_correct; eauto.
-  rewrite Val.negate_cmpu_bool, Heqo; auto.
-  intros. unfold compare_ints. Simplifs.
-- (* compimm *)
-  simpl. rewrite (ireg_of_eq _ _ EQ). destruct (Int.eq_dec n Int.zero).
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  split. destruct (rs x); simpl; auto. subst. rewrite Int.and_idem. split.
-  eapply testcond_for_signed_comparison_32_correct; eauto.
-  eapply testcond_for_signed_comparison_32_correct; eauto.
-  rewrite Val.negate_cmp_bool; auto.
-  intros. unfold compare_ints. Simplifs.
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  split. destruct (Val.cmp_bool c0 (rs x) (Vint n)) eqn:?; auto. split.
-  eapply testcond_for_signed_comparison_32_correct; eauto.
-  eapply testcond_for_signed_comparison_32_correct; eauto.
-  rewrite Val.negate_cmp_bool, Heqo; auto.
-  intros. unfold compare_ints. Simplifs.
-- (* compuimm *)
-  simpl. rewrite (ireg_of_eq _ _ EQ).
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto.
-  split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (Vint n)) eqn:?; auto; split.
-  eapply testcond_for_unsigned_comparison_32_correct; eauto.
-  eapply testcond_for_unsigned_comparison_32_correct; eauto.
-  rewrite Val.negate_cmpu_bool, Heqo; auto.
-  intros. unfold compare_ints. Simplifs.
-- (* compl *)
-  simpl. rewrite (ireg_of_eq _ _ EQ). rewrite (ireg_of_eq _ _ EQ1).
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto.
-  split. destruct (Val.cmpl_bool c0 (rs x) (rs x0)) eqn:?; auto. split.
-  eapply testcond_for_signed_comparison_64_correct; eauto.
-  eapply testcond_for_signed_comparison_64_correct; eauto.
-  rewrite Val.negate_cmpl_bool, Heqo; auto.
-  intros. unfold compare_longs. Simplifs.
-- (* complu *)
-  simpl. rewrite (ireg_of_eq _ _ EQ). rewrite (ireg_of_eq _ _ EQ1).
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto.
-  split. destruct (Val.cmplu_bool (Mem.valid_pointer m) c0 (rs x) (rs x0)) eqn:?; auto. split.
-  eapply testcond_for_unsigned_comparison_64_correct; eauto.
-  eapply testcond_for_unsigned_comparison_64_correct; eauto.
-  rewrite Val.negate_cmplu_bool, Heqo; auto.
-  intros. unfold compare_longs. Simplifs.
-- (* compimm *)
-  simpl. rewrite (ireg_of_eq _ _ EQ). destruct (Int64.eq_dec n Int64.zero).
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  split. destruct (rs x); simpl; auto. subst. rewrite Int64.and_idem. split.
-  eapply testcond_for_signed_comparison_64_correct; eauto.
-  eapply testcond_for_signed_comparison_64_correct; eauto.
-  rewrite Val.negate_cmpl_bool; auto.
-  intros. unfold compare_longs. Simplifs.
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  split. destruct (Val.cmpl_bool c0 (rs x) (Vlong n)) eqn:?; auto. split.
-  eapply testcond_for_signed_comparison_64_correct; eauto.
-  eapply testcond_for_signed_comparison_64_correct; eauto.
-  rewrite Val.negate_cmpl_bool, Heqo; auto.
-  intros. unfold compare_longs. Simplifs.
-- (* compuimm *)
-  simpl. rewrite (ireg_of_eq _ _ EQ).
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto.
-  split. destruct (Val.cmplu_bool (Mem.valid_pointer m) c0 (rs x) (Vlong n)) eqn:?; auto. split.
-  eapply testcond_for_unsigned_comparison_64_correct; eauto.
-  eapply testcond_for_unsigned_comparison_64_correct; eauto.
-  rewrite Val.negate_cmplu_bool, Heqo; auto.
-  intros. unfold compare_longs. Simplifs.
-- (* compf *)
-  simpl. rewrite (freg_of_eq _ _ EQ). rewrite (freg_of_eq _ _ EQ1).
-  exists (nextinstr (compare_floats (swap_floats c0 (rs x) (rs x0)) (swap_floats c0 (rs x0) (rs x)) rs)).
-  split. apply exec_straight_one.
-  destruct c0; simpl; auto.
-  unfold nextinstr. rewrite Pregmap.gss. rewrite compare_floats_inv; auto with asmgen.
-  split. destruct (rs x); destruct (rs x0); simpl; auto.
-  repeat rewrite swap_floats_commut. split.
-  apply testcond_for_float_comparison_correct.
-  apply testcond_for_neg_float_comparison_correct.
-  intros. Simplifs. apply compare_floats_inv; auto with asmgen.
-- (* notcompf *)
-  simpl. rewrite (freg_of_eq _ _ EQ). rewrite (freg_of_eq _ _ EQ1).
-  exists (nextinstr (compare_floats (swap_floats c0 (rs x) (rs x0)) (swap_floats c0 (rs x0) (rs x)) rs)).
-  split. apply exec_straight_one.
-  destruct c0; simpl; auto.
-  unfold nextinstr. rewrite Pregmap.gss. rewrite compare_floats_inv; auto with asmgen.
-  split. destruct (rs x); destruct (rs x0); simpl; auto.
-  repeat rewrite swap_floats_commut. split.
-  apply testcond_for_neg_float_comparison_correct.
-  rewrite negb_involutive. apply testcond_for_float_comparison_correct.
-  intros. Simplifs. apply compare_floats_inv; auto with asmgen.
-- (* compfs *)
-  simpl. rewrite (freg_of_eq _ _ EQ). rewrite (freg_of_eq _ _ EQ1).
-  exists (nextinstr (compare_floats32 (swap_floats c0 (rs x) (rs x0)) (swap_floats c0 (rs x0) (rs x)) rs)).
-  split. apply exec_straight_one.
-  destruct c0; simpl; auto.
-  unfold nextinstr. rewrite Pregmap.gss. rewrite compare_floats32_inv; auto with asmgen.
-  split. destruct (rs x); destruct (rs x0); simpl; auto.
-  repeat rewrite swap_floats_commut. split.
-  apply testcond_for_float32_comparison_correct.
-  apply testcond_for_neg_float32_comparison_correct.
-  intros. Simplifs. apply compare_floats32_inv; auto with asmgen.
-- (* notcompfs *)
-  simpl. rewrite (freg_of_eq _ _ EQ). rewrite (freg_of_eq _ _ EQ1).
-  exists (nextinstr (compare_floats32 (swap_floats c0 (rs x) (rs x0)) (swap_floats c0 (rs x0) (rs x)) rs)).
-  split. apply exec_straight_one.
-  destruct c0; simpl; auto.
-  unfold nextinstr. rewrite Pregmap.gss. rewrite compare_floats32_inv; auto with asmgen.
-  split. destruct (rs x); destruct (rs x0); simpl; auto.
-  repeat rewrite swap_floats_commut. split.
-  apply testcond_for_neg_float32_comparison_correct.
-  rewrite negb_involutive. apply testcond_for_float32_comparison_correct.
-  intros. Simplifs. apply compare_floats32_inv; auto with asmgen.
-- (* maskzero *)
-  simpl. rewrite (ireg_of_eq _ _ EQ).
-  econstructor. split. apply exec_straight_one. simpl; eauto. auto.
-  split. destruct (rs x); simpl; auto.
-  generalize (compare_ints_spec rs (Vint (Int.and i n)) Vzero m).
-  intros [A B]. rewrite A. unfold Val.cmpu; simpl. destruct (Int.eq (Int.and i n) Int.zero); auto.
-  intros. unfold compare_ints. Simplifs.
-- (* masknotzero *)
-  simpl. rewrite (ireg_of_eq _ _ EQ).
-  econstructor. split. apply exec_straight_one. simpl; eauto. auto.
-  split. destruct (rs x); simpl; auto.
-  generalize (compare_ints_spec rs (Vint (Int.and i n)) Vzero m).
-  intros [A B]. rewrite A. unfold Val.cmpu; simpl. destruct (Int.eq (Int.and i n) Int.zero); auto.
-  intros. unfold compare_ints. Simplifs.
+  intros until m; intros EXEC; intros until v; intros LOAD NOT31 NOTPC.
+  exploit indexed_memory_access_correct; eauto.
+  intros (base' & ofs' & rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. rewrite EXEC.
+  unfold exec_load. rewrite B, LOAD. eauto. Simpl. 
+  split; intros; Simpl.
 Qed.
 
-Remark eval_testcond_nextinstr:
-  forall c rs, eval_testcond c (nextinstr rs) = eval_testcond c rs.
+Lemma indexed_store_access_correct:
+  forall chunk (mk_instr: ireg -> offset -> instruction) r1 m,
+  (forall base ofs rs,
+     exec_instr ge fn (mk_instr base ofs) rs m = exec_store ge chunk rs m r1 base ofs) ->
+  forall (base: ireg) ofs k (rs: regset) m',
+  Mem.storev chunk m (Val.offset_ptr rs#base ofs) (rs#r1) = Some m' ->
+  base <> X31 -> r1 <> X31 -> r1 <> PC ->
+  exists rs',
+     exec_straight ge fn (indexed_memory_access mk_instr base ofs k) rs m k rs' m'
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros. unfold eval_testcond. repeat rewrite nextinstr_inv; auto with asmgen.
+  intros until m; intros EXEC; intros until m'; intros STORE NOT31 NOT31' NOTPC.
+  exploit indexed_memory_access_correct; eauto.
+  intros (base' & ofs' & rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. rewrite EXEC.
+  unfold exec_store. rewrite B, C, STORE by auto. eauto. auto. 
+  intros; Simpl.
 Qed.
 
-Remark eval_testcond_set_ireg:
-  forall c rs r v, eval_testcond c (rs#(IR r) <- v) = eval_testcond c rs.
+Lemma loadind_correct:
+  forall (base: ireg) ofs ty dst k c (rs: regset) m v,
+  loadind base ofs ty dst k = OK c ->
+  Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v ->
+  base <> X31 ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m
+  /\ rs'#(preg_of dst) = v
+  /\ forall r, r <> PC -> r <> X31 -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
-  intros. unfold eval_testcond. repeat rewrite Pregmap.gso; auto with asmgen.
+  intros until v; intros TR LOAD NOT31. 
+  assert (A: exists mk_instr,
+                c = indexed_memory_access mk_instr base ofs k
+             /\ forall base' ofs' rs',
+                   exec_instr ge fn (mk_instr base' ofs') rs' m =
+                   exec_load ge (chunk_of_type ty) rs' m (preg_of dst) base' ofs').
+  { unfold loadind in TR. destruct ty, (preg_of dst); inv TR; econstructor; split; eauto. }
+  destruct A as (mk_instr & B & C). subst c. 
+  eapply indexed_load_access_correct; eauto with asmgen. 
 Qed.
 
-Lemma mk_setcc_base_correct:
-  forall cond rd k rs1 m,
-  exists rs2,
-  exec_straight ge fn (mk_setcc_base cond rd k) rs1 m k rs2 m
-  /\ rs2#rd = Val.of_optbool(eval_extcond cond rs1)
-  /\ forall r, data_preg r = true -> r <> RAX /\ r <> RCX -> r <> rd -> rs2#r = rs1#r.
+Lemma storeind_correct:
+  forall (base: ireg) ofs ty src k c (rs: regset) m m',
+  storeind src base ofs ty k = OK c ->
+  Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' ->
+  base <> X31 ->
+  exists rs',
+     exec_straight ge fn c rs m k rs' m'
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros. destruct cond; simpl in *.
-- (* base *)
-  econstructor; split.
-  apply exec_straight_one. simpl; eauto. auto.
-  split. Simplifs. intros; Simplifs.
-- (* or *)
-  assert (Val.of_optbool
-    match eval_testcond c1 rs1 with
-    | Some b1 =>
-        match eval_testcond c2 rs1 with
-        | Some b2 => Some (b1 || b2)
-        | None => None
-        end
-    | None => None
-    end =
-    Val.or (Val.of_optbool (eval_testcond c1 rs1)) (Val.of_optbool (eval_testcond c2 rs1))).
-  destruct (eval_testcond c1 rs1). destruct (eval_testcond c2 rs1).
-  destruct b; destruct b0; auto.
-  destruct b; auto.
-  auto.
-  rewrite H; clear H.
-  destruct (ireg_eq rd RAX).
-  subst rd. econstructor; split.
-  eapply exec_straight_three.
-  simpl; eauto.
-  simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto.
-  simpl; eauto.
-  auto. auto. auto.
-  intuition Simplifs.
-  econstructor; split.
-  eapply exec_straight_three.
-  simpl; eauto.
-  simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto.
-  simpl. eauto.
-  auto. auto. auto.
-  split. Simplifs. rewrite Val.or_commut. decEq; Simplifs.
-  intros. destruct H0; Simplifs.
-- (* and *)
-  assert (Val.of_optbool
-    match eval_testcond c1 rs1 with
-    | Some b1 =>
-        match eval_testcond c2 rs1 with
-        | Some b2 => Some (b1 && b2)
-        | None => None
-        end
-    | None => None
-    end =
-    Val.and (Val.of_optbool (eval_testcond c1 rs1)) (Val.of_optbool (eval_testcond c2 rs1))).
-  {
-    destruct (eval_testcond c1 rs1). destruct (eval_testcond c2 rs1).
-    destruct b; destruct b0; auto.
-    destruct b; auto.
-    auto.
-  }
-  rewrite H; clear H.
-  destruct (ireg_eq rd RAX).
-  subst rd. econstructor; split.
-  eapply exec_straight_three.
-  simpl; eauto.
-  simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto.
-  simpl; eauto.
-  auto. auto. auto.
-  intuition Simplifs.
-  econstructor; split.
-  eapply exec_straight_three.
-  simpl; eauto.
-  simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto.
-  simpl. eauto.
-  auto. auto. auto.
-  split. Simplifs. rewrite Val.and_commut. decEq; Simplifs.
-  intros. destruct H0; Simplifs.
+  intros until m'; intros TR STORE NOT31. 
+  assert (A: exists mk_instr,
+                c = indexed_memory_access mk_instr base ofs k
+             /\ forall base' ofs' rs',
+                   exec_instr ge fn (mk_instr base' ofs') rs' m =
+                   exec_store ge (chunk_of_type ty) rs' m (preg_of src) base' ofs').
+  { unfold storeind in TR. destruct ty, (preg_of src); inv TR; econstructor; split; eauto. }
+  destruct A as (mk_instr & B & C). subst c. 
+  eapply indexed_store_access_correct; eauto with asmgen. 
 Qed.
 
-Lemma mk_setcc_correct:
-  forall cond rd k rs1 m,
-  exists rs2,
-  exec_straight ge fn (mk_setcc cond rd k) rs1 m k rs2 m
-  /\ rs2#rd = Val.of_optbool(eval_extcond cond rs1)
-  /\ forall r, data_preg r = true -> r <> RAX /\ r <> RCX -> r <> rd -> rs2#r = rs1#r.
+Lemma loadind_ptr_correct:
+  forall (base: ireg) ofs (dst: ireg) k (rs: regset) m v,
+  Mem.loadv Mptr m (Val.offset_ptr rs#base ofs) = Some v ->
+  base <> X31 ->
+  exists rs',
+     exec_straight ge fn (loadind_ptr base ofs dst k) rs m k rs' m
+  /\ rs'#dst = v
+  /\ forall r, r <> PC -> r <> X31 -> r <> dst -> rs'#r = rs#r.
 Proof.
-  intros. unfold mk_setcc. destruct (Archi.ptr64 || low_ireg rd).
-- apply mk_setcc_base_correct.
-- exploit mk_setcc_base_correct. intros [rs2 [A [B C]]].
-  econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one.
-    simpl. eauto. simpl. auto.
-  intuition Simplifs.
+  intros. eapply indexed_load_access_correct; eauto with asmgen.
+  intros. unfold Mptr. destruct Archi.ptr64; auto. 
 Qed.
 
-Definition negate_extcond (xc: extcond) : extcond :=
-  match xc with
-  | Cond_base c => Cond_base (negate_testcond c)
-  | Cond_and c1 c2 => Cond_or (negate_testcond c1) (negate_testcond c2)
-  | Cond_or c1 c2 => Cond_and (negate_testcond c1) (negate_testcond c2)
-  end.
-
-Remark negate_testcond_for_condition:
-  forall cond,
-  negate_extcond (testcond_for_condition cond) = testcond_for_condition (negate_condition cond).
+Lemma storeind_ptr_correct:
+  forall (base: ireg) ofs (src: ireg) k (rs: regset) m m',
+  Mem.storev Mptr m (Val.offset_ptr rs#base ofs) rs#src = Some m' ->
+  base <> X31 -> src <> X31 ->
+  exists rs',
+     exec_straight ge fn (storeind_ptr src base ofs k) rs m k rs' m'
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros. destruct cond; try destruct c; reflexivity.
+  intros. eapply indexed_store_access_correct with (r1 := src); eauto with asmgen.
+  intros. unfold Mptr. destruct Archi.ptr64; auto. 
 Qed.
 
-Lemma mk_sel_correct:
-  forall xc ty rd r2 k c ob rs m,
-  mk_sel xc rd r2 k = OK c ->
-  rd <> r2 ->
-  match ob with
-  | Some b => eval_extcond xc rs = Some b /\ eval_extcond (negate_extcond xc) rs = Some (negb b)
-  | None => True
-  end ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ Val.lessdef (Val.select ob rs#rd rs#r2 ty) rs'#rd
-  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs r.
+Lemma transl_memory_access_correct:
+  forall mk_instr addr args k c (rs: regset) m v,
+  transl_memory_access mk_instr addr args k = OK c ->
+  eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+  exists base ofs rs',
+     exec_straight_opt ge fn c rs m (mk_instr base ofs :: k) rs' m
+  /\ Val.offset_ptr rs'#base (eval_offset ge ofs) = v
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  intros. destruct xc; monadInv H; simpl in H1.
-- econstructor; split.
-  eapply exec_straight_one. reflexivity. reflexivity.
-  set (v :=  match eval_testcond (negate_testcond c0) rs with
-     | Some true => rs r2
-     | Some false => rs rd
-     | None => Vundef
-     end).
-  split. rewrite nextinstr_inv, Pregmap.gss by eauto with asmgen.
-  destruct ob; simpl; auto. destruct H1 as [_ B]; unfold v; rewrite B. 
-  destruct b; apply Val.lessdef_normalize.
-  intros; Simplifs.
-- econstructor; split.
-  eapply exec_straight_two. 
-  reflexivity. reflexivity. reflexivity. reflexivity.
-  set (v1 :=  match eval_testcond (negate_testcond c1) rs with
-     | Some true => rs r2
-     | Some false => rs rd
-     | None => Vundef
-     end).
-  rewrite eval_testcond_nextinstr, eval_testcond_set_ireg. 
-  set (v2 :=  match eval_testcond (negate_testcond c2) rs with
-     | Some true => nextinstr rs # rd <- v1 r2
-     | Some false => nextinstr rs # rd <- v1 rd
-     | None => Vundef
-     end).
-  split. rewrite nextinstr_inv, Pregmap.gss by eauto with asmgen.
-  destruct ob; simpl; auto.
-  destruct H1 as [_ B]. 
-  destruct (eval_testcond (negate_testcond c1) rs) as [b1|]; try discriminate.
-  destruct (eval_testcond (negate_testcond c2) rs) as [b2|]; try discriminate.
-  inv B. apply negb_sym in H1. subst b.
-  replace v2 with (if b2 then rs#r2 else v1).
-  unfold v1. destruct b1, b2; apply Val.lessdef_normalize.
-  unfold v2. destruct b2; symmetry; Simplifs.
-  intros; Simplifs.
+  intros until v; intros TR EV. 
+  unfold transl_memory_access in TR; destruct addr; ArgsInv.
+- (* indexed *)
+  inv EV. apply indexed_memory_access_correct; eauto with asmgen.
+- (* global *)
+  simpl in EV. inv EV. inv TR.  econstructor; econstructor; econstructor; split.
+  constructor. apply exec_straight_one. simpl; eauto. auto. 
+  split; intros; Simpl. unfold eval_offset. apply low_high_half.
+- (* stack *)
+  inv TR. inv EV. apply indexed_memory_access_correct; eauto with asmgen.
 Qed.
 
-Lemma transl_sel_correct:
-  forall ty cond args rd r2 k c rs m,
-  transl_sel cond args rd r2 k = OK c ->
+Lemma transl_load_access_correct:
+  forall chunk (mk_instr: ireg -> offset -> instruction) addr args k c rd (rs: regset) m v v',
+  (forall base ofs rs,
+     exec_instr ge fn (mk_instr base ofs) rs m = exec_load ge chunk rs m rd base ofs) ->
+  transl_memory_access mk_instr addr args k = OK c ->
+  eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+  Mem.loadv chunk m v = Some v' ->
+  rd <> PC ->
   exists rs',
      exec_straight ge fn c rs m k rs' m
-  /\ Val.lessdef (Val.select (eval_condition cond (map rs (map preg_of args)) m) rs#rd rs#r2 ty) rs'#rd
-  /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs r.
+  /\ rs'#rd = v'
+  /\ forall r, r <> PC -> r <> X31 -> r <> rd -> rs'#r = rs#r.
 Proof.
-  unfold transl_sel; intros. destruct (ireg_eq rd r2); monadInv H. 
-- econstructor; split. 
-  apply exec_straight_one; reflexivity.
-  split. rewrite nextinstr_inv, Pregmap.gss by auto with asmgen. 
-  destruct eval_condition as [[]|]; simpl; auto using Val.lessdef_normalize.
-  intros; Simplifs.
-- destruct (transl_cond_correct _ _ _ _ rs m EQ0) as (rs1 & A & B & C).
-  rewrite <- negate_testcond_for_condition in B.
-  destruct (mk_sel_correct _ ty _ _ _ _ _ rs1 m EQ n B) as (rs2 & D & E & F).
-  exists rs2; split. 
-  eapply exec_straight_trans; eauto. 
-  split. rewrite ! C in E by auto with asmgen. exact E.
-  intros. rewrite F; auto.
+  intros until v'; intros INSTR TR EV LOAD NOTPC. 
+  exploit transl_memory_access_correct; eauto.
+  intros (base & ofs & rs' & A & B & C).
+  econstructor; split.
+  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. 
+  rewrite INSTR. unfold exec_load. rewrite B, LOAD. reflexivity. Simpl. 
+  split; intros; Simpl.
 Qed.
 
-(** Translation of arithmetic operations. *)
-
-Ltac ArgsInv :=
-  match goal with
-  | [ H: Error _ = OK _ |- _ ] => discriminate
-  | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args; ArgsInv
-  | [ H: bind _ _ = OK _ |- _ ] => monadInv H; ArgsInv
-  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
-  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
-  | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *; 
-                                    let X := fresh "EQ" in generalize (ireg_of_eq _ _ H); intros X;
-                                    clear H; ArgsInv
-  | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *; clear H; ArgsInv
-  | _ => idtac
-  end.
-
-Ltac TranslOp :=
-  econstructor; split;
-  [ apply exec_straight_one; [ simpl; eauto | auto ]
-  | split; [ Simplifs | intros; Simplifs ]].
-
-Lemma transl_op_correct:
-  forall op args res k c (rs: regset) m v,
-  transl_op op args res k = OK c ->
-  eval_operation ge (rs#RSP) op (map rs (map preg_of args)) m = Some v ->
+Lemma transl_store_access_correct:
+  forall chunk (mk_instr: ireg -> offset -> instruction) addr args k c r1 (rs: regset) m v m',
+  (forall base ofs rs,
+     exec_instr ge fn (mk_instr base ofs) rs m = exec_store ge chunk rs m r1 base ofs) ->
+  transl_memory_access mk_instr addr args k = OK c ->
+  eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+  Mem.storev chunk m v rs#r1 = Some m' ->
+  r1 <> PC -> r1 <> X31 ->
   exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ Val.lessdef v rs'#(preg_of res)
-  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
+     exec_straight ge fn c rs m k rs' m'
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-Transparent destroyed_by_op.
-  intros until v; intros TR EV.
-  assert (SAME:
-  (exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ rs'#(preg_of res) = v
-  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r) ->
-  exists rs',
-     exec_straight ge fn c rs m k rs' m
-  /\ Val.lessdef v rs'#(preg_of res)
-  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r).
-  {
-    intros [rs' [A [B C]]]. subst v. exists rs'; auto.
-  }
-
-  destruct op; simpl in TR; ArgsInv; simpl in EV; try (inv EV); try (apply SAME; TranslOp; fail).
-(* move *)
-  exploit mk_mov_correct; eauto. intros [rs2 [A [B C]]].
-  apply SAME. exists rs2. eauto.
-(* intconst *)
-  apply SAME. destruct (Int.eq_dec n Int.zero). subst n. TranslOp. TranslOp.
-(* longconst *)
-  apply SAME. destruct (Int64.eq_dec n Int64.zero). subst n. TranslOp. TranslOp.
-(* floatconst *)
-  apply SAME. destruct (Float.eq_dec n Float.zero). subst n. TranslOp. TranslOp.
-(* singleconst *)
-  apply SAME. destruct (Float32.eq_dec n Float32.zero). subst n. TranslOp. TranslOp.
-(* cast8signed *)
-  apply SAME. eapply mk_intconv_correct; eauto.
-(* cast8unsigned *)
-  apply SAME. eapply mk_intconv_correct; eauto.
-(* mulhs *)
-  apply SAME. TranslOp. destruct H1. Simplifs.
-(* mulhu *)
-  apply SAME. TranslOp. destruct H1. Simplifs.
-(* div *)
-  apply SAME.
-  exploit (divs_mods_exists (rs RAX) (rs RCX)). left; congruence.
-  intros (nh & nl & d & q & r & A & B & C & D & E & F).
-  set (rs1 := nextinstr_nf (rs#RDX <- (Vint nh))).
-  econstructor; split.
-  eapply exec_straight_two with (rs2 := rs1). simpl. rewrite A. reflexivity.
-  simpl. change (rs1 RAX) with (rs RAX); rewrite B.
-  change (rs1 RCX) with (rs RCX); rewrite C.
-  rewrite D. reflexivity. auto. auto.
-  split. change (Vint q = v). congruence.
-  simpl; intros. destruct H2. unfold rs1; Simplifs.
-(* divu *)
-  apply SAME.
-  exploit (divu_modu_exists (rs RAX) (rs RCX)). left; congruence.
-  intros (n & d & q & r & B & C & D & E & F).
-  set (rs1 := nextinstr_nf (rs#RDX <- Vzero)).
-  econstructor; split.
-  eapply exec_straight_two with (rs2 := rs1). reflexivity.
-  simpl. change (rs1 RAX) with (rs RAX); rewrite B.
-  change (rs1 RCX) with (rs RCX); rewrite C.
-  rewrite D. reflexivity. auto. auto.
-  split. change (Vint q = v). congruence.
-  simpl; intros. destruct H2. unfold rs1; Simplifs.
-(* mod *)
-  apply SAME.
-  exploit (divs_mods_exists (rs RAX) (rs RCX)). right; congruence.
-  intros (nh & nl & d & q & r & A & B & C & D & E & F).
-  set (rs1 := nextinstr_nf (rs#RDX <- (Vint nh))).
-  econstructor; split.
-  eapply exec_straight_two with (rs2 := rs1). simpl. rewrite A. reflexivity.
-  simpl. change (rs1 RAX) with (rs RAX); rewrite B.
-  change (rs1 RCX) with (rs RCX); rewrite C.
-  rewrite D. reflexivity. auto. auto.
-  split. change (Vint r = v). congruence.
-  simpl; intros. destruct H2. unfold rs1; Simplifs.
-(* modu *)
-  apply SAME.
-  exploit (divu_modu_exists (rs RAX) (rs RCX)). right; congruence.
-  intros (n & d & q & r & B & C & D & E & F).
-  set (rs1 := nextinstr_nf (rs#RDX <- Vzero)).
-  econstructor; split.
-  eapply exec_straight_two with (rs2 := rs1). reflexivity.
-  simpl. change (rs1 RAX) with (rs RAX); rewrite B.
-  change (rs1 RCX) with (rs RCX); rewrite C.
-  rewrite D. reflexivity. auto. auto.
-  split. change (Vint r = v). congruence.
-  simpl; intros. destruct H2. unfold rs1; Simplifs.
-(* shrximm *)
-  apply SAME. eapply mk_shrximm_correct; eauto.
-(* lea *)
-  exploit transl_addressing_mode_32_correct; eauto. intros EA.
-  TranslOp. rewrite nextinstr_inv; auto with asmgen. rewrite Pregmap.gss. rewrite normalize_addrmode_32_correct; auto.
-(* mullhs *)
-  apply SAME. TranslOp. destruct H1. Simplifs.
-(* mullhu *)
-  apply SAME. TranslOp. destruct H1. Simplifs.
-(* divl *)
-  apply SAME.
-  exploit (divls_modls_exists (rs RAX) (rs RCX)). left; congruence.
-  intros (nh & nl & d & q & r & A & B & C & D & E & F).
-  set (rs1 := nextinstr_nf (rs#RDX <- (Vlong nh))).
+  intros until m'; intros INSTR TR EV STORE NOTPC NOT31. 
+  exploit transl_memory_access_correct; eauto.
+  intros (base & ofs & rs' & A & B & C).
   econstructor; split.
-  eapply exec_straight_two with (rs2 := rs1). simpl. rewrite A. reflexivity.
-  simpl. change (rs1 RAX) with (rs RAX); rewrite B.
-  change (rs1 RCX) with (rs RCX); rewrite C.
-  rewrite D. reflexivity. auto. auto.
-  split. change (Vlong q = v). congruence.
-  simpl; intros. destruct H2. unfold rs1; Simplifs.
-(* divlu *)
-  apply SAME.
-  exploit (divlu_modlu_exists (rs RAX) (rs RCX)). left; congruence.
-  intros (n & d & q & r & B & C & D & E & F).
-  set (rs1 := nextinstr_nf (rs#RDX <- (Vlong Int64.zero))).
-  econstructor; split.
-  eapply exec_straight_two with (rs2 := rs1). reflexivity.
-  simpl. change (rs1 RAX) with (rs RAX); rewrite B.
-  change (rs1 RCX) with (rs RCX); rewrite C.
-  rewrite D. reflexivity. auto. auto.
-  split. change (Vlong q = v). congruence.
-  simpl; intros. destruct H2. unfold rs1; Simplifs.
-(* modl *)
-  apply SAME.
-  exploit (divls_modls_exists (rs RAX) (rs RCX)). right; congruence.
-  intros (nh & nl & d & q & r & A & B & C & D & E & F).
-  set (rs1 := nextinstr_nf (rs#RDX <- (Vlong nh))).
-  econstructor; split.
-  eapply exec_straight_two with (rs2 := rs1). simpl. rewrite A. reflexivity.
-  simpl. change (rs1 RAX) with (rs RAX); rewrite B.
-  change (rs1 RCX) with (rs RCX); rewrite C.
-  rewrite D. reflexivity. auto. auto.
-  split. change (Vlong r = v). congruence.
-  simpl; intros. destruct H2. unfold rs1; Simplifs.
-(* modlu *)
-  apply SAME.
-  exploit (divlu_modlu_exists (rs RAX) (rs RCX)). right; congruence.
-  intros (n & d & q & r & B & C & D & E & F).
-  set (rs1 := nextinstr_nf (rs#RDX <- (Vlong Int64.zero))).
-  econstructor; split.
-  eapply exec_straight_two with (rs2 := rs1). reflexivity.
-  simpl. change (rs1 RAX) with (rs RAX); rewrite B.
-  change (rs1 RCX) with (rs RCX); rewrite C.
-  rewrite D. reflexivity. auto. auto.
-  split. change (Vlong r = v). congruence.
-  simpl; intros. destruct H2. unfold rs1; Simplifs.
-(* shrxlimm *)
-  apply SAME. eapply mk_shrxlimm_correct; eauto.
-(* leal *)
-  exploit transl_addressing_mode_64_correct; eauto. intros EA.
-  generalize (normalize_addrmode_64_correct x rs). destruct (normalize_addrmode_64 x) as [am' [delta|]]; intros EV.
-  econstructor; split. eapply exec_straight_two.
-  simpl. reflexivity.  simpl. reflexivity. auto. auto.
-  split. rewrite nextinstr_nf_inv by auto. rewrite Pregmap.gss. rewrite nextinstr_inv by auto with asmgen.
-  rewrite Pregmap.gss. rewrite <- EV; auto.
-  intros; Simplifs.
-  TranslOp. rewrite nextinstr_inv; auto with asmgen. rewrite Pregmap.gss; auto. rewrite <- EV; auto.
-(* intoffloat *)
-  apply SAME. TranslOp. rewrite H0; auto.
-(* floatofint *)
-  apply SAME. TranslOp. rewrite H0; auto.
-(* intofsingle *)
-  apply SAME. TranslOp. rewrite H0; auto.
-(* singleofint *)
-  apply SAME. TranslOp. rewrite H0; auto.
-(* longoffloat *)
-  apply SAME. TranslOp. rewrite H0; auto.
-(* floatoflong *)
-  apply SAME. TranslOp. rewrite H0; auto.
-(* longofsingle *)
-  apply SAME. TranslOp. rewrite H0; auto.
-(* singleoflong *)
-  apply SAME. TranslOp. rewrite H0; auto.
-(* condition *)
-  exploit transl_cond_correct; eauto. intros [rs2 [P [Q R]]].
-  exploit mk_setcc_correct; eauto. intros [rs3 [S [T U]]].
-  exists rs3.
-  split. eapply exec_straight_trans. eexact P. eexact S.
-  split. rewrite T. destruct (eval_condition cond rs ## (preg_of ## args) m).
-  destruct Q as [Q _]. rewrite Q. auto.
-  simpl; auto.
-  intros. transitivity (rs2 r); auto.
-(* selection *)
-  rewrite EQ1. exploit transl_sel_correct; eauto. intros (rs' & A & B & C).
-  exists rs'; split. eexact A. eauto.
+  eapply exec_straight_opt_right. eexact A. apply exec_straight_one. 
+  rewrite INSTR. unfold exec_store. rewrite B, C, STORE by auto. reflexivity. auto.
+  intros; Simpl.
 Qed.
 
-(** Translation of memory loads. *)
-
 Lemma transl_load_correct:
-  forall trap chunk addr args dest k c (rs: regset) m a v,
-  transl_load trap chunk addr args dest k = OK c ->
-  eval_addressing ge (rs#RSP) addr (map rs (map preg_of args)) = Some a ->
+  forall trap chunk addr args dst k c (rs: regset) m a v,
+  transl_load trap chunk addr args dst k = OK c ->
+  eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a ->
   Mem.loadv chunk m a = Some v ->
   exists rs',
      exec_straight ge fn c rs m k rs' m
-  /\ rs'#(preg_of dest) = v
-  /\ forall r, data_preg r = true -> r <> preg_of dest -> rs'#r = rs#r.
+  /\ rs'#(preg_of dst) = v
+  /\ forall r, r <> PC -> r <> X31 -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
-  unfold transl_load; intros.
-  destruct trap; simpl; try discriminate.
-  monadInv H.
-  exploit transl_addressing_mode_correct; eauto. intro EA.
-  assert (EA': eval_addrmode ge x rs = a). destruct a; simpl in H1; try discriminate; inv EA; auto.
-  set (rs2 := nextinstr_nf (rs#(preg_of dest) <- v)).
-  assert (exec_load ge chunk m x rs (preg_of dest) = Next rs2 m).
-    unfold exec_load. rewrite EA'. rewrite H1. auto.
-  assert (rs2 PC = Val.offset_ptr (rs PC) Ptrofs.one).
-    transitivity (Val.offset_ptr ((rs#(preg_of dest) <- v) PC) Ptrofs.one).
-    auto. decEq. apply Pregmap.gso; auto with asmgen.
-  exists rs2. split.
-  destruct chunk; ArgsInv; apply exec_straight_one; auto.
-  split. unfold rs2. rewrite nextinstr_nf_inv1. Simplifs. apply preg_of_data.
-  intros. unfold rs2. Simplifs.
+  intros until v; intros TR EV LOAD.
+  destruct trap; try (simpl in *; discriminate).
+  assert (A: exists mk_instr,
+      transl_memory_access mk_instr addr args k = OK c
+   /\ forall base ofs rs,
+        exec_instr ge fn (mk_instr base ofs) rs m = exec_load ge chunk rs m (preg_of dst) base ofs).
+  { unfold transl_load in TR; destruct chunk; ArgsInv; econstructor; (split; [eassumption|auto]). }
+  destruct A as (mk_instr & B & C).
+  eapply transl_load_access_correct; eauto with asmgen.
 Qed.
 
 Lemma transl_store_correct:
   forall chunk addr args src k c (rs: regset) m a m',
   transl_store chunk addr args src k = OK c ->
-  eval_addressing ge (rs#RSP) addr (map rs (map preg_of args)) = Some a ->
-  Mem.storev chunk m a (rs (preg_of src)) = Some m' ->
+  eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a ->
+  Mem.storev chunk m a rs#(preg_of src) = Some m' ->
   exists rs',
      exec_straight ge fn c rs m k rs' m'
-  /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_store chunk addr) -> rs'#r = rs#r.
+  /\ forall r, r <> PC -> r <> X31 -> rs'#r = rs#r.
 Proof.
-  unfold transl_store; intros. monadInv H.
-  exploit transl_addressing_mode_correct; eauto. intro EA.
-  assert (EA': eval_addrmode ge x rs = a). destruct a; simpl in H1; try discriminate; inv EA; auto.
-  rewrite <- EA' in H1. destruct chunk; ArgsInv.
-(* int8signed *)
-  eapply mk_storebyte_correct; eauto.
-  destruct (eval_addrmode ge x rs); simpl; auto. rewrite <- Mem.store_signed_unsigned_8; auto.
-(* int8unsigned *)
-  eapply mk_storebyte_correct; eauto.
-(* int16signed *)
-  econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store.
-  replace (Mem.storev Mint16unsigned m (eval_addrmode ge x rs) (rs x0))
-     with (Mem.storev Mint16signed m (eval_addrmode ge x rs) (rs x0)).
-  rewrite H1. eauto.
-  destruct (eval_addrmode ge x rs); simpl; auto. rewrite Mem.store_signed_unsigned_16; auto.
-  auto.
-  intros. Simplifs.
-(* int16unsigned *)
-  econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto.
-  intros. Simplifs.
-(* int32 *)
-  econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto.
-  intros. Simplifs.
-(* int64 *)
-  econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto.
-  intros. Simplifs.
-(* float32 *)
-  econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto.
-  intros. Transparent destroyed_by_store. simpl in H2. simpl. Simplifs.
-(* float64 *)
-  econstructor; split.
-  apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto.
-  intros. Simplifs.
+  intros until m'; intros TR EV STORE. 
+  assert (A: exists mk_instr chunk',
+      transl_memory_access mk_instr addr args k = OK c
+   /\ (forall base ofs rs,
+        exec_instr ge fn (mk_instr base ofs) rs m = exec_store ge chunk' rs m (preg_of src) base ofs)
+   /\ Mem.storev chunk m a rs#(preg_of src) = Mem.storev chunk' m a rs#(preg_of src)).
+  { unfold transl_store in TR; destruct chunk; ArgsInv;
+    (econstructor; econstructor; split; [eassumption | split; [ intros; simpl; reflexivity | auto]]).
+    destruct a; auto. apply Mem.store_signed_unsigned_8. 
+    destruct a; auto. apply Mem.store_signed_unsigned_16. 
+  }
+  destruct A as (mk_instr & chunk' & B & C & D).
+  rewrite D in STORE; clear D. 
+  eapply transl_store_access_correct; eauto with asmgen.
+Qed.
+
+(** Function epilogues *)
+
+Lemma make_epilogue_correct:
+  forall ge0 f m stk soff cs m' ms rs k tm,
+  load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) ->
+  load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) ->
+  Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+  agree ms (Vptr stk soff) rs ->
+  Mem.extends m tm ->
+  match_stack ge0 cs ->
+  exists rs', exists tm',
+     exec_straight ge fn (make_epilogue f k) rs tm k rs' tm'
+  /\ agree ms (parent_sp cs) rs'
+  /\ Mem.extends m' tm'
+  /\ rs'#RA = parent_ra cs
+  /\ rs'#SP = parent_sp cs
+  /\ (forall r, r <> PC -> r <> RA -> r <> SP -> r <> X31 -> rs'#r = rs#r).
+Proof.
+  intros until tm; intros LP LRA FREE AG MEXT MCS.
+  exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
+  exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA').
+  exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
+  exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'.
+  exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
+  unfold make_epilogue. 
+  rewrite chunk_of_Tptr in *. 
+  exploit (loadind_ptr_correct SP (fn_retaddr_ofs f) RA (Pfreeframe (fn_stacksize f) (fn_link_ofs f) :: k) rs tm).
+    rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. congruence.
+  intros (rs1 & A1 & B1 & C1).
+  econstructor; econstructor; split.
+  eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl. 
+    rewrite (C1 X2) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. 
+    rewrite FREE'. eauto. auto. 
+  split. apply agree_nextinstr. apply agree_set_other; auto with asmgen. 
+    apply agree_change_sp with (Vptr stk soff).
+    apply agree_exten with rs; auto. intros; apply C1; auto with asmgen.
+    eapply parent_sp_def; eauto.
+  split. auto.
+  split. Simpl. 
+  split. Simpl. 
+  intros. Simpl. 
 Qed.
 
 End CONSTRUCTORS.
diff --git a/verilog/Builtins1.v b/verilog/Builtins1.v
index e5233ff5..6691d15c 100644
--- a/verilog/Builtins1.v
+++ b/verilog/Builtins1.v
@@ -19,37 +19,35 @@
 Require Import String Coqlib.
 Require Import AST Integers Floats Values.
 Require Import Builtins0.
+Require ExtValues.
 
 Inductive platform_builtin : Type :=
-  | BI_fmin
-  | BI_fmax.
+| BI_bits_of_float
+| BI_bits_of_double
+| BI_float_of_bits
+| BI_double_of_bits.
 
 Local Open Scope string_scope.
 
 Definition platform_builtin_table : list (string * platform_builtin) :=
-     ("__builtin_fmin", BI_fmin)
-  :: ("__builtin_fmax", BI_fmax)
+     ("__builtin_bits_of_float", BI_bits_of_float)
+  :: ("__builtin_bits_of_double", BI_bits_of_double)
+  :: ("__builtin_float_of_bits", BI_float_of_bits)
+  :: ("__builtin_double_of_bits", BI_double_of_bits)
   :: nil.
 
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
   match b with
-  | BI_fmin | BI_fmax =>
-      mksignature (Tfloat :: Tfloat :: nil) Tfloat cc_default
+  | BI_bits_of_float => mksignature (Tsingle :: nil) Tint cc_default
+  | BI_bits_of_double => mksignature (Tfloat :: nil) Tlong cc_default
+  | BI_float_of_bits => mksignature (Tint :: nil) Tsingle cc_default
+  | BI_double_of_bits => mksignature (Tlong :: nil) Tfloat cc_default
   end.
 
 Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
   match b with
-  | BI_fmin =>
-      mkbuiltin_n2t Tfloat Tfloat Tfloat
-        (fun f1 f2 => match Float.compare f1 f2 with
-                      | Some Eq | Some Lt => f1
-                      | Some Gt | None => f2
-                      end)
-  | BI_fmax =>
-      mkbuiltin_n2t Tfloat Tfloat Tfloat
-        (fun f1 f2 => match Float.compare f1 f2 with
-                      | Some Eq | Some Gt => f1
-                      | Some Lt | None => f2
-                      end)
+  | BI_bits_of_float => mkbuiltin_n1t Tsingle Tint Float32.to_bits
+  | BI_bits_of_double => mkbuiltin_n1t Tfloat Tlong Float.to_bits
+  | BI_float_of_bits => mkbuiltin_n1t Tint Tsingle Float32.of_bits
+  | BI_double_of_bits => mkbuiltin_n1t Tlong Tfloat Float.of_bits
   end.
-
diff --git a/verilog/CBuiltins.ml b/verilog/CBuiltins.ml
index a549cd25..ca0dbc6d 100644
--- a/verilog/CBuiltins.ml
+++ b/verilog/CBuiltins.ml
@@ -18,28 +18,15 @@
 
 open C
 
-let (va_list_type, va_list_scalar, size_va_list) =
-  if Archi.ptr64 then
-    if Archi.win64 then
-      (* Just a pointer *)
-      (TPtr(TVoid [], []), true, 8)
-    else
-      (* Actually a struct passed by reference; equivalent to 3 64-bit words *)
-      (TArray(TInt(IULong, []), Some 3L, []), false, 3*8)
-  else
-    (* Just a pointer *)
-    (TPtr(TVoid [], []), true, 4)
-
 let builtins = {
   builtin_typedefs = [
-    "__builtin_va_list", va_list_type;
+    "__builtin_va_list", TPtr(TVoid [], [])
   ];
   builtin_functions = [
+    (* Synchronization *)
+    "__builtin_fence",
+      (TVoid [], [], false);
     (* Float arithmetic *)
-    "__builtin_fmax",
-      (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
-    "__builtin_fmin",
-      (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
     "__builtin_fmadd",
       (TFloat(FDouble, []),
        [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
@@ -56,18 +43,25 @@ let builtins = {
       (TFloat(FDouble, []),
        [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])],
        false);
-    (* Memory accesses *)
-    "__builtin_read16_reversed",
-      (TInt(IUShort, []), [TPtr(TInt(IUShort, [AConst]), [])], false);
-    "__builtin_read32_reversed",
-      (TInt(IUInt, []), [TPtr(TInt(IUInt, [AConst]), [])], false);
-    "__builtin_write16_reversed",
-      (TVoid [], [TPtr(TInt(IUShort, []), []); TInt(IUShort, [])], false);
-    "__builtin_write32_reversed",
-      (TVoid [], [TPtr(TInt(IUInt, []), []); TInt(IUInt, [])], false);
+    "__builtin_fmax",
+      (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
+    "__builtin_fmin",
+      (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
+    "__builtin_bits_of_double",
+      (TInt(IULong, []), [TFloat(FDouble, [])], false);
+    "__builtin_bits_of_float",
+      (TInt(IUInt, []), [TFloat(FFloat, [])], false);
+    "__builtin_double_of_bits",
+      (TFloat(FDouble, []), [TInt(IULong, [])], false);
+    "__builtin_float_of_bits",
+      (TFloat(FFloat, []), [TInt(IUInt, [])], false);
   ]
 }
 
+let va_list_type = TPtr(TVoid [], [])  (* to check! *)
+let size_va_list = if Archi.ptr64 then 8 else 4
+let va_list_scalar = true
+
 (* Expand memory references inside extended asm statements.  Used in C2C. *)
 
 let asm_mem_argument arg = Printf.sprintf "0(%s)" arg
diff --git a/verilog/CSE2deps.v b/verilog/CSE2deps.v
index 757966b8..c0deacf0 100644
--- a/verilog/CSE2deps.v
+++ b/verilog/CSE2deps.v
@@ -15,6 +15,7 @@ Require Import AST Integers Floats.
 Require Import Values Memory Globalenvs Events.
 Require Import Op.
 
+
 Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
      (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
   && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
@@ -26,12 +27,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
   | (Aindexed ofs), (Aindexed ofs'),
     (base :: nil), (base' :: nil) =>
     if peq base base'
-    then negb (can_swap_accesses_ofs ofs' chunk' ofs chunk)
-    else true
-  | (Aglobal symb ofs), (Aglobal symb' ofs'), nil, nil =>
-    if peq symb symb'
     then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
-    else false
+    else true
   | (Ainstack ofs), (Ainstack ofs'), _, _ =>
     negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
   | _, _, _, _ => true
diff --git a/verilog/CSE2depsproof.v b/verilog/CSE2depsproof.v
index e181b8f4..cf9e62b1 100644
--- a/verilog/CSE2depsproof.v
+++ b/verilog/CSE2depsproof.v
@@ -20,12 +20,20 @@ Require Import Registers Op RTL.
 Require Import CSE2 CSE2deps.
 Require Import Lia.
 
+Lemma ptrofs_size :
+  Ptrofs.wordsize = (if Archi.ptr64 then 64 else 32)%nat.
+Proof.
+  unfold Ptrofs.wordsize.
+  unfold Wordsize_Ptrofs.wordsize.
+  trivial.
+Qed.
+
 Lemma ptrofs_modulus :
-  Ptrofs.modulus = if Archi.ptr64
-                   then 18446744073709551616
-                   else 4294967296.
+  Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296.
 Proof.
-  reflexivity.
+  unfold Ptrofs.modulus.
+  rewrite ptrofs_size.
+  destruct Archi.ptr64; reflexivity.
 Qed.
 
 Section SOUNDNESS.
@@ -33,9 +41,10 @@ Section SOUNDNESS.
   Variable genv: Genv.t F V.
   Variable sp : val.
 
-Section STACK_WRITE.
+Section MEMORY_WRITE.
   Variable m m2 : mem.
   Variable chunkw chunkr : memory_chunk.
+  Variable base : val.
   
   Variable addrw addrr valw : val.
   Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
@@ -43,16 +52,17 @@ Section STACK_WRITE.
   Section INDEXED_AWAY.
   Variable ofsw ofsr : ptrofs.
   Hypothesis ADDRW : eval_addressing genv sp
-                       (Ainstack ofsw) nil = Some addrw.
+                       (Aindexed ofsw) (base :: nil) = Some addrw. 
   Hypothesis ADDRR : eval_addressing genv sp
-                       (Ainstack ofsr) nil = Some addrr.
+                       (Aindexed ofsr) (base :: nil) = Some addrr.
 
-  Lemma stack_load_store_away1 :
+  Lemma load_store_away1 :
     forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
     forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
     forall SWAPPABLE :    Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
-                       \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+                          \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
     Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+
   Proof.
     intros.
     
@@ -60,10 +70,11 @@ Section STACK_WRITE.
     pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
     unfold largest_size_chunk in *.
 
+    rewrite ptrofs_modulus in *.
+    simpl in *.
     inv ADDRR.
     inv ADDRW.
-
-    destruct sp; try discriminate.
+    destruct base; try discriminate.
     eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
     exact STORE.
     right.
@@ -78,90 +89,8 @@ Section STACK_WRITE.
     all: intuition lia.
   Qed.
   
-  Theorem stack_load_store_away :
-    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
-    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
-  Proof.
-    intro SWAP.
-    unfold can_swap_accesses_ofs in SWAP.
-    repeat rewrite andb_true_iff in SWAP.
-    repeat rewrite orb_true_iff in SWAP.
-    repeat rewrite Z.leb_le in SWAP.
-    apply stack_load_store_away1.
-    all: tauto.
-  Qed.
-  End INDEXED_AWAY.
-End STACK_WRITE.
-
-Section MEMORY_WRITE.
-  Variable m m2 : mem.
-  Variable chunkw chunkr : memory_chunk.
-  Variable base : val.
-  
-  Variable addrw addrr valw : val.
-  Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
-
-  Section INDEXED_AWAY.
-  Variable ofsw ofsr : Z.
-  Hypothesis ADDRW : eval_addressing genv sp
-                       (Aindexed ofsw) (base :: nil) = Some addrw. 
-  Hypothesis ADDRR : eval_addressing genv sp
-                       (Aindexed ofsr) (base :: nil) = Some addrr.
-
-  Lemma load_store_away1 :
-    forall RANGEW : 0 <= ofsw <= Ptrofs.modulus - largest_size_chunk,
-    forall RANGER : 0 <= ofsr <= Ptrofs.modulus - largest_size_chunk,
-    forall SWAPPABLE :    ofsw + size_chunk chunkw <= ofsr
-                       \/ ofsr + size_chunk chunkr <= ofsw,
-    Mem.loadv chunkr m2 addrr =  Mem.loadv chunkr m addrr.
-  Proof.
-    intros.
-    
-    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
-    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
-    try change (Ptrofs.modulus - largest_size_chunk) with 4294967288 in *.
-    try change (Ptrofs.modulus - largest_size_chunk) with 18446744073709551608 in *.
-    destruct addrr ; simpl in * ; trivial.
-    unfold eval_addressing, eval_addressing32, eval_addressing64 in *.
-    destruct Archi.ptr64 eqn:PTR64; destruct base; simpl in *; try discriminate.
-    rewrite PTR64 in *.
-    
-    inv ADDRR.
-    inv ADDRW.
-    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
-    exact STORE.
-    right.
- 
-    all: try (destruct (Ptrofs.unsigned_add_either i0
-                (Ptrofs.of_int (Int.repr ofsr))) as [OFSR | OFSR];
-              rewrite OFSR).
-    all: try (destruct (Ptrofs.unsigned_add_either i0
-                (Ptrofs.of_int64 (Int64.repr ofsr))) as [OFSR | OFSR];
-              rewrite OFSR).
-    all: try (destruct (Ptrofs.unsigned_add_either i0
-                (Ptrofs.of_int (Int.repr ofsw))) as [OFSW | OFSW];
-              rewrite OFSW).
-    all: try (destruct (Ptrofs.unsigned_add_either i0
-                (Ptrofs.of_int64 (Int64.repr ofsw))) as [OFSW | OFSW];
-              rewrite OFSW).
-    
-    all: unfold Ptrofs.of_int64.
-    all: unfold Ptrofs.of_int.
-   
-
-    all: repeat rewrite Int.unsigned_repr by (change Int.max_unsigned with 4294967295; lia).
-    all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 4294967295; lia).
-    all: repeat rewrite Int64.unsigned_repr by (change Int64.max_unsigned with 18446744073709551615; lia).
-    all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia).
-
-    all: try change Ptrofs.modulus with 4294967296.
-    all: try change Ptrofs.modulus with 18446744073709551616.
-   
-    all: intuition lia.
-  Qed.
-  
   Theorem load_store_away :
-    can_swap_accesses_ofs ofsr chunkr ofsw chunkw = true ->
+    can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
     Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
   Proof.
     intro SWAP.
@@ -173,117 +102,6 @@ Section MEMORY_WRITE.
     all: tauto.
   Qed.
   End INDEXED_AWAY.
-
-  Section DIFFERENT_GLOBALS.
-    Variable ofsw ofsr : ptrofs.
-    Hypothesis symw symr : ident.
-    Hypothesis ADDRW : eval_addressing genv sp
-                                       (Aglobal symw ofsw) nil = Some addrw. 
-    Hypothesis ADDRR : eval_addressing genv sp
-                                       (Aglobal symr ofsr) nil = Some addrr.
-
-    Lemma ptr64_cases:
-      forall {T : Type},
-      forall b : bool,
-      forall x y : T,
-        (if b then (if b then x else y) else (if b then y else x)) = x.
-    Proof.
-      destruct b; reflexivity.
-    Qed.
-
-    (* not needed
-    Lemma bool_cases_same:
-      forall {T : Type},
-      forall b : bool,
-      forall x : T,
-        (if b then x else x) = x.
-    Proof.
-      destruct b; reflexivity.
-    Qed.
-     *)
-
-    Lemma load_store_diff_globals :
-      symw <> symr ->
-      Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
-    Proof.
-      intros.
-      unfold eval_addressing in *.
-      simpl in *.
-      rewrite ptr64_cases in ADDRR.
-      rewrite ptr64_cases in ADDRW.
-      unfold Genv.symbol_address in *.
-      unfold Genv.find_symbol in *.
-      destruct ((Genv.genv_symb genv) ! symw) as [bw |] eqn:SYMW; inv ADDRW.
-      2: simpl in STORE; discriminate.
-      destruct ((Genv.genv_symb genv) ! symr) as [br |] eqn:SYMR; inv ADDRR.
-      2: reflexivity.
-      assert (br <> bw).
-      {
-        intro EQ.
-        subst br.
-        assert (symr = symw).
-        {
-          eapply Genv.genv_vars_inj; eauto.
-        }
-        congruence.
-      }
-      eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := bw).
-      - exact STORE.
-      - left. assumption.
-    Qed.
-  End DIFFERENT_GLOBALS.
-
-  Section SAME_GLOBALS.
-    Variable ofsw ofsr : ptrofs.
-    Hypothesis sym : ident.
-    Hypothesis ADDRW : eval_addressing genv sp
-                                       (Aglobal sym ofsw) nil = Some addrw. 
-    Hypothesis ADDRR : eval_addressing genv sp
-                                       (Aglobal sym ofsr) nil = Some addrr.
-    
-  Lemma load_store_glob_away1 :
-    forall RANGEW : 0 <= (Ptrofs.unsigned ofsw) <= Ptrofs.modulus - largest_size_chunk,
-    forall RANGER : 0 <= (Ptrofs.unsigned ofsr) <= Ptrofs.modulus - largest_size_chunk,
-    forall SWAPPABLE : (Ptrofs.unsigned ofsw) + size_chunk chunkw <= (Ptrofs.unsigned ofsr)
-                    \/ (Ptrofs.unsigned ofsr) + size_chunk chunkr <= (Ptrofs.unsigned ofsw),
-    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
-  Proof.
-    intros.
-    
-    pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
-    pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
-    unfold largest_size_chunk in size_chunkr_bounded, size_chunkw_bounded.
-    try change (Ptrofs.modulus - largest_size_chunk) with 4294967288 in *.
-    try change (Ptrofs.modulus - largest_size_chunk) with 18446744073709551608 in *.
-    unfold eval_addressing, eval_addressing32, eval_addressing64 in *.
-
-    rewrite ptr64_cases in ADDRR.
-    rewrite ptr64_cases in ADDRW.
-    unfold Genv.symbol_address in *.
-    inv ADDRR.
-    inv ADDRW.
-    destruct (Genv.find_symbol genv sym).
-    2: discriminate.
-    
-    eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
-    exact STORE.
-    right.
-    tauto.
-  Qed.
-
-  Lemma load_store_glob_away :
-    (can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw) = true ->
-    Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
-  Proof.
-    intro SWAP.
-    unfold can_swap_accesses_ofs in SWAP.
-    repeat rewrite andb_true_iff in SWAP.
-    repeat rewrite orb_true_iff in SWAP.
-    repeat rewrite Z.leb_le in SWAP.
-    apply load_store_glob_away1.
-    all: tauto.
-  Qed.
-  End SAME_GLOBALS.
 End MEMORY_WRITE.
 End SOUNDNESS.
 
@@ -311,21 +129,11 @@ Proof.
   simpl in OVERLAP.
   destruct (peq base base'). 2: discriminate.
   subst base'.
-  destruct (can_swap_accesses_ofs z0 chunk' z chunk) eqn:SWAP.
+  destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
   2: discriminate.
   simpl in *.
-  eapply load_store_away; eassumption.
-- (* Aglobal / Aglobal *)
-    destruct args. 2: discriminate.
-    destruct args'. 2: discriminate.
-    simpl in *.
-    destruct (peq i i1).
-    {
-      subst i1.
-      rewrite negb_false_iff in OVERLAP.
-      eapply load_store_glob_away; eassumption.
-    }
-    eapply load_store_diff_globals; eassumption.
+  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+
 - (* Ainstack / Ainstack *)
   destruct args. 2: discriminate.
   destruct args'. 2: discriminate.
@@ -333,7 +141,7 @@ Proof.
   destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
   2: discriminate.
   cbn in *.
-  eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+  eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
 Qed.
 
 End SOUNDNESS.
diff --git a/verilog/CombineOp.v b/verilog/CombineOp.v
index 34c1c9cc..6236f38f 100644
--- a/verilog/CombineOp.v
+++ b/verilog/CombineOp.v
@@ -2,7 +2,7 @@
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
-(*                 Xavier Leroy, INRIA Paris                           *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
@@ -14,10 +14,10 @@
   during the [CSE] phase. *)
 
 Require Import Coqlib.
-Require Import AST Integers.
-Require Import Op CSEdomain.
-
-Definition valnum := positive.
+Require Import AST.
+Require Import Integers.
+Require Import Op.
+Require Import CSEdomain.
 
 Section COMBINE.
 
@@ -26,14 +26,12 @@ Variable get: valnum -> option rhs.
 Function combine_compimm_ne_0 (x: valnum) : option(condition * list valnum) :=
   match get x with
   | Some(Op (Ocmp c) ys) => Some (c, ys)
-  | Some(Op (Oandimm n) ys) => Some (Cmasknotzero n, ys)
   | _ => None
   end.
 
 Function combine_compimm_eq_0 (x: valnum) : option(condition * list valnum) :=
   match get x with
   | Some(Op (Ocmp c) ys) => Some (negate_condition c, ys)
-  | Some(Op (Oandimm n) ys) => Some (Cmaskzero n, ys)
   | _ => None
   end.
 
@@ -70,46 +68,31 @@ Function combine_cond (cond: condition) (args: list valnum) : option(condition *
   | _, _ => None
   end.
 
-Function combine_addr_32 (addr: addressing) (args: list valnum) : option(addressing * list valnum) :=
-  match addr, args with
-  | Aindexed n, x::nil =>
-      match get x with
-      | Some(Op (Olea a) ys) =>
-          match offset_addressing a n with Some a' => Some (a', ys) | None => None end
-      | _ => None
-      end
-  | _, _ => None
-  end.
-
-Function combine_addr_64 (addr: addressing) (args: list valnum) : option(addressing * list valnum) :=
+Function combine_addr (addr: addressing) (args: list valnum) : option(addressing * list valnum) :=
   match addr, args with
   | Aindexed n, x::nil =>
       match get x with
-      | Some(Op (Oleal a) ys) =>
-          match offset_addressing a n with Some a' => Some (a', ys) | None => None end
+      | Some(Op (Oaddimm m) ys) =>
+          if Archi.ptr64 then None else Some(Aindexed (Ptrofs.add (Ptrofs.of_int m) n), ys)
+      | Some(Op (Oaddlimm m) ys) =>
+          if Archi.ptr64 then Some(Aindexed (Ptrofs.add (Ptrofs.of_int64 m) n), ys) else None
       | _ => None
       end
   | _, _ => None
   end.
 
-Definition combine_addr (addr: addressing) (args: list valnum) : option(addressing * list valnum) :=
-  if Archi.ptr64 then combine_addr_64 addr args else combine_addr_32 addr args.
-
 Function combine_op (op: operation) (args: list valnum) : option(operation * list valnum) :=
   match op, args with
-  | Olea addr, _ =>
-      match combine_addr_32 addr args with
-      | Some(addr', args') => Some(Olea addr', args')
-      | None => None
-      end
-  | Oleal addr, _ =>
-      match combine_addr_64 addr args with
-      | Some(addr', args') => Some(Oleal addr', args')
-      | None => None
+  | Oaddimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oaddimm m) ys) => Some(Oaddimm (Int.add m n), ys)
+      | _ => None
       end
   | Oandimm n, x :: nil =>
       match get x with
-      | Some(Op (Oandimm m) ys) => Some(Oandimm (Int.and m n), ys)
+      | Some(Op (Oandimm m) ys) =>
+          Some(let p := Int.and m n in
+               if Int.eq p m then (Omove, x :: nil) else (Oandimm p, ys))
       | _ => None
       end
   | Oorimm n, x :: nil =>
@@ -122,9 +105,16 @@ Function combine_op (op: operation) (args: list valnum) : option(operation * lis
       | Some(Op (Oxorimm m) ys) => Some(Oxorimm (Int.xor m n), ys)
       | _ => None
       end
+  | Oaddlimm n, x :: nil =>
+      match get x with
+      | Some(Op (Oaddlimm m) ys) => Some(Oaddlimm (Int64.add m n), ys)
+      | _ => None
+      end
   | Oandlimm n, x :: nil =>
       match get x with
-      | Some(Op (Oandlimm m) ys) => Some(Oandlimm (Int64.and m n), ys)
+      | Some(Op (Oandlimm m) ys) =>
+          Some(let p := Int64.and m n in
+               if Int64.eq p m then (Omove, x :: nil) else (Oandlimm p, ys))
       | _ => None
       end
   | Oorlimm n, x :: nil =>
@@ -146,5 +136,3 @@ Function combine_op (op: operation) (args: list valnum) : option(operation * lis
   end.
 
 End COMBINE.
-
-
diff --git a/verilog/CombineOpproof.v b/verilog/CombineOpproof.v
index 69abbf61..a24de1e5 100644
--- a/verilog/CombineOpproof.v
+++ b/verilog/CombineOpproof.v
@@ -2,7 +2,7 @@
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
-(*                 Xavier Leroy, INRIA Paris                           *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
@@ -15,8 +15,14 @@
 
 Require Import FunInd.
 Require Import Coqlib.
-Require Import Integers Values Memory.
-Require Import Op RTL CSEdomain.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Memory.
+Require Import Op.
+Require Import Registers.
+Require Import RTL.
+Require Import CSEdomain.
 Require Import CombineOp.
 
 Section COMBINE.
@@ -50,9 +56,6 @@ Proof.
   (* of cmp *)
   UseGetSound. rewrite <- H.
   destruct (eval_condition cond (map valu args) m); simpl; auto. destruct b; auto.
-  (* of and *)
-  UseGetSound. rewrite <- H.
-  destruct v; simpl; auto.
 Qed.
 
 Lemma combine_compimm_eq_0_sound:
@@ -66,8 +69,6 @@ Proof.
   UseGetSound. rewrite <- H.
   rewrite eval_negate_condition.
   destruct (eval_condition c (map valu args) m); simpl; auto. destruct b; auto.
-  (* of and *)
-  UseGetSound. rewrite <- H. destruct v; auto.
 Qed.
 
 Lemma combine_compimm_eq_1_sound:
@@ -102,43 +103,21 @@ Theorem combine_cond_sound:
 Proof.
   intros. functional inversion H; subst.
   (* compimm ne zero *)
-  simpl; eapply combine_compimm_ne_0_sound; eauto.
+  - simpl; eapply combine_compimm_ne_0_sound; eauto.
   (* compimm ne one *)
-  simpl; eapply combine_compimm_ne_1_sound; eauto.
+  - simpl; eapply combine_compimm_ne_1_sound; eauto.
   (* compimm eq zero *)
-  simpl; eapply combine_compimm_eq_0_sound; eauto.
+  - simpl; eapply combine_compimm_eq_0_sound; eauto.
   (* compimm eq one *)
-  simpl; eapply combine_compimm_eq_1_sound; eauto.
+  - simpl; eapply combine_compimm_eq_1_sound; eauto.
   (* compuimm ne zero *)
-  simpl; eapply combine_compimm_ne_0_sound; eauto.
+  - simpl; eapply combine_compimm_ne_0_sound; eauto.
   (* compuimm ne one *)
-  simpl; eapply combine_compimm_ne_1_sound; eauto.
+  - simpl; eapply combine_compimm_ne_1_sound; eauto.
   (* compuimm eq zero *)
-  simpl; eapply combine_compimm_eq_0_sound; eauto.
+  - simpl; eapply combine_compimm_eq_0_sound; eauto.
   (* compuimm eq one *)
-  simpl; eapply combine_compimm_eq_1_sound; eauto.
-Qed.
-
-Theorem combine_addr_32_sound:
-  forall addr args addr' args',
-  combine_addr_32 get addr args = Some(addr', args') ->
-  eval_addressing32 ge sp addr' (map valu args') = eval_addressing32 ge sp addr (map valu args).
-Proof.
-  intros. functional inversion H; subst.
-  (* indexed - lea *)
-  UseGetSound. simpl. unfold offset_addressing in H7. destruct (addressing_valid (offset_addressing_total a n)); inv H7.
-  eapply eval_offset_addressing_total_32; eauto.
-Qed.
-
-Theorem combine_addr_64_sound:
-  forall addr args addr' args',
-  combine_addr_64 get addr args = Some(addr', args') ->
-  eval_addressing64 ge sp addr' (map valu args') = eval_addressing64 ge sp addr (map valu args).
-Proof.
-  intros. functional inversion H; subst.
-  (* indexed - leal *)
-  UseGetSound. simpl. unfold offset_addressing in H7. destruct (addressing_valid (offset_addressing_total a n)); inv H7.
-  eapply eval_offset_addressing_total_64; eauto.
+  - simpl; eapply combine_compimm_eq_1_sound; eauto.
 Qed.
 
 Theorem combine_addr_sound:
@@ -146,9 +125,13 @@ Theorem combine_addr_sound:
   combine_addr get addr args = Some(addr', args') ->
   eval_addressing ge sp addr' (map valu args') = eval_addressing ge sp addr (map valu args).
 Proof.
-  unfold combine_addr, eval_addressing; intros; destruct Archi.ptr64.
-  apply combine_addr_64_sound; auto.
-  apply combine_addr_32_sound; auto.
+  intros. functional inversion H; subst.
+- (* indexed - addimm *)
+  UseGetSound. simpl. rewrite <- H0. destruct v; auto. simpl; rewrite H7; simpl.
+  rewrite Ptrofs.add_assoc. auto.
+- (* indexed - addimml *)
+  UseGetSound. simpl. rewrite <- H0. destruct v; auto. simpl; rewrite H7; simpl.
+  rewrite Ptrofs.add_assoc. auto.
 Qed.
 
 Theorem combine_op_sound:
@@ -157,24 +140,34 @@ Theorem combine_op_sound:
   eval_operation ge sp op' (map valu args') m = eval_operation ge sp op (map valu args) m.
 Proof.
   intros. functional inversion H; subst.
-(* lea-lea *)
-  simpl. eapply combine_addr_32_sound; eauto.
-(* leal-leal *)
-  simpl. eapply combine_addr_64_sound; eauto.
-(* andimm - andimm *)
-  UseGetSound; simpl. rewrite <- H0. rewrite Val.and_assoc. auto.
-(* orimm - orimm *)
-  UseGetSound; simpl. rewrite <- H0. rewrite Val.or_assoc. auto.
-(* xorimm - xorimm *)
-  UseGetSound; simpl. rewrite <- H0. rewrite Val.xor_assoc. auto.
-(* andimm - andimm *)
-  UseGetSound; simpl. rewrite <- H0. rewrite Val.andl_assoc. auto.
-(* orimm - orimm *)
-  UseGetSound; simpl. rewrite <- H0. rewrite Val.orl_assoc. auto.
-(* xorimm - xorimm *)
-  UseGetSound; simpl. rewrite <- H0. rewrite Val.xorl_assoc. auto.
-(* cmp *)
-  simpl. decEq; decEq. eapply combine_cond_sound; eauto.
+  (* addimm - addimm *)
+  - UseGetSound. FuncInv. simpl.
+    rewrite <- H0. rewrite Val.add_assoc. auto.
+  (* andimm - andimm *)
+  - UseGetSound; simpl.
+    generalize (Int.eq_spec p m0); rewrite H7; intros.
+    rewrite <- H0. rewrite Val.and_assoc. simpl. fold p. rewrite H1. auto.
+  - UseGetSound; simpl.
+    rewrite <- H0. rewrite Val.and_assoc. auto.
+  (* orimm - orimm *)
+  - UseGetSound. simpl. rewrite <- H0. rewrite Val.or_assoc. auto.
+  (* xorimm - xorimm *)
+  - UseGetSound. simpl. rewrite <- H0. rewrite Val.xor_assoc. auto.
+  (* addlimm - addlimm *)
+  - UseGetSound. FuncInv. simpl.
+    rewrite <- H0. rewrite Val.addl_assoc. auto.
+  (* andlimm - andlimm *)
+  - UseGetSound; simpl.
+    generalize (Int64.eq_spec p m0); rewrite H7; intros.
+    rewrite <- H0. rewrite Val.andl_assoc. simpl. fold p. rewrite H1. auto.
+  - UseGetSound; simpl.
+    rewrite <- H0. rewrite Val.andl_assoc. auto.
+  (* orlimm - orlimm *)
+  - UseGetSound. simpl. rewrite <- H0. rewrite Val.orl_assoc. auto.
+  (* xorlimm - xorlimm *)
+  - UseGetSound. simpl. rewrite <- H0. rewrite Val.xorl_assoc. auto.
+  (* cmp *)
+  - simpl. decEq; decEq. eapply combine_cond_sound; eauto.
 Qed.
 
 End COMBINE.
diff --git a/verilog/ConstpropOp.vp b/verilog/ConstpropOp.vp
index dd4b839a..aab2424d 100644
--- a/verilog/ConstpropOp.vp
+++ b/verilog/ConstpropOp.vp
@@ -13,28 +13,22 @@
 (** Strength reduction for operators and conditions.
     This is the machine-dependent part of [Constprop]. *)
 
+Require Archi.
 Require Import Coqlib Compopts.
 Require Import AST Integers Floats.
 Require Import Op Registers.
-Require Import ValueDomain ValueAOp.
-Require SelectOp.
+Require Import ValueDomain.
 
 (** * Converting known values to constants *)
 
-Definition Olea_ptr (a: addressing) := if Archi.ptr64 then Oleal a else Olea a.
-
 Definition const_for_result (a: aval) : option operation :=
   match a with
   | I n => Some(Ointconst n)
   | L n => if Archi.ptr64 then Some(Olongconst n) else None
   | F n => if Compopts.generate_float_constants tt then Some(Ofloatconst n) else None
   | FS n => if Compopts.generate_float_constants tt then Some(Osingleconst n) else None
-  | Ptr(Gl id ofs) =>
-      if SelectOp.symbol_is_external id then
-        if Ptrofs.eq ofs Ptrofs.zero then Some (Oindirectsymbol id) else None
-      else
-        Some (Olea_ptr (Aglobal id ofs))
-  | Ptr(Stk ofs) => Some(Olea_ptr (Ainstack ofs))
+  | Ptr(Gl id ofs) => Some(Oaddrsymbol id ofs)
+  | Ptr(Stk ofs) => Some(Oaddrstack ofs)
   | _ => None
   end.
 
@@ -45,7 +39,7 @@ Definition const_for_result (a: aval) : option operation :=
   one if some of its arguments are statically known.  These are again
   large pattern-matchings expressed in indirect style. *)
 
-Nondetfunction cond_strength_reduction
+Nondetfunction cond_strength_reduction 
               (cond: condition) (args: list reg) (vl: list aval) :=
   match cond, args, vl with
   | Ccomp c, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
@@ -64,7 +58,7 @@ Nondetfunction cond_strength_reduction
       (Ccompluimm (swap_comparison c) n1, r2 :: nil)
   | Ccomplu c, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
       (Ccompluimm c n2, r1 :: nil)
-  | _, _, _ =>
+  | _, _, _ => 
       (cond, args)
   end.
 
@@ -97,131 +91,10 @@ Nondetfunction make_cmp (c: condition) (args: list reg) (vl: list aval) :=
       make_cmp_base c args vl
   end.
 
-Definition make_select (c: condition) (ty: typ)
-                       (r1 r2: reg) (args: list reg) (vl: list aval) :=
-  match resolve_branch (eval_static_condition c vl) with
-  | Some b => (Omove, (if b then r1 else r2) :: nil)
-  | None =>
-      let (c', args') := cond_strength_reduction c args vl in
-      (Osel c' ty, r1 :: r2 :: args')
-  end.
-
-(** For addressing modes, we need to distinguish
-- reductions that produce pointers (i.e. that produce [Aglobal], [Ainstack], [Abased] and [Abasedscaled] addressing modes), which are valid only if the pointer size is right;
-- other reductions (producing [Aindexed] or [Aindexed2] modes), which are valid independently of the pointer size.
-*)
-
-Nondetfunction addr_strength_reduction_32_generic
-                (addr: addressing) (args: list reg) (vl: list aval) :=
-  match addr, args, vl with
-  | Aindexed2 ofs, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
-      (Aindexed (Int.signed n1 + ofs), r2 :: nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
-      (Aindexed (Int.signed n2 + ofs), r1 :: nil)
-  | Aindexed2scaled sc ofs, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
-      (Aindexed (Int.signed n2 * sc + ofs), r1 :: nil)
-  | Aindexed2scaled sc ofs, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
-      (Ascaled sc (Int.signed n1 + ofs), r2 :: nil)
-  | _, _ =>
-      (addr, args)
-  end.
-
-Nondetfunction addr_strength_reduction_32
-                (addr: addressing) (args: list reg) (vl: list aval) :=
-
-  if Archi.ptr64 then addr_strength_reduction_32_generic addr args vl else
-
-  match addr, args, vl with
-
-  | Aindexed ofs, r1 :: nil, Ptr(Gl symb n) :: nil =>
-      (Aglobal symb (Ptrofs.add n (Ptrofs.repr ofs)), nil)
-  | Aindexed ofs, r1 :: nil, Ptr(Stk n) :: nil =>
-      (Ainstack (Ptrofs.add n (Ptrofs.repr ofs)), nil)
-
-  | Aindexed2 ofs, r1 :: r2 :: nil, Ptr(Gl symb n1) :: I n2 :: nil =>
-      (Aglobal symb (Ptrofs.add (Ptrofs.add n1 (Ptrofs.of_int n2)) (Ptrofs.repr ofs)), nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, I n1 :: Ptr(Gl symb n2) :: nil =>
-      (Aglobal symb (Ptrofs.add (Ptrofs.add n2 (Ptrofs.of_int n1)) (Ptrofs.repr ofs)), nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, Ptr(Stk n1) :: I n2 :: nil =>
-      (Ainstack (Ptrofs.add (Ptrofs.add n1 (Ptrofs.of_int n2)) (Ptrofs.repr ofs)), nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, I n1 :: Ptr(Stk n2) :: nil =>
-      (Ainstack (Ptrofs.add (Ptrofs.add (Ptrofs.of_int n1) n2) (Ptrofs.repr ofs)), nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, Ptr(Gl symb n1) :: v2 :: nil =>
-      (Abased symb (Ptrofs.add n1 (Ptrofs.repr ofs)), r2 :: nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, v1 :: Ptr(Gl symb n2) :: nil =>
-      (Abased symb (Ptrofs.add n2 (Ptrofs.repr ofs)), r1 :: nil)
-
-  | Aindexed2scaled sc ofs, r1 :: r2 :: nil, Ptr(Gl symb n1) :: I n2 :: nil =>
-      (Aglobal symb (Ptrofs.add (Ptrofs.add n1 (Ptrofs.of_int (Int.mul n2 (Int.repr sc)))) (Ptrofs.repr ofs)), nil)
-
-  | Aindexed2scaled sc ofs, r1 :: r2 :: nil, Ptr(Gl symb n1) :: v2 :: nil =>
-      (Abasedscaled sc symb (Ptrofs.add n1 (Ptrofs.repr ofs)), r2 :: nil)
-
-  | Abased id ofs, r1 :: nil, I n1 :: nil =>
-      (Aglobal id (Ptrofs.add ofs (Ptrofs.of_int n1)), nil)
-
-  | Abasedscaled sc id ofs, r1 :: nil, I n1 :: nil =>
-      (Aglobal id (Ptrofs.add ofs (Ptrofs.of_int (Int.mul n1 (Int.repr sc)))), nil)
-
-  | _, _ =>
-      addr_strength_reduction_32_generic addr args vl
-  end.
-
-Nondetfunction addr_strength_reduction_64_generic
-                (addr: addressing) (args: list reg) (vl: list aval) :=
-  match addr, args, vl with
-  | Aindexed2 ofs, r1 :: r2 :: nil, L n1 :: v2 :: nil =>
-      (Aindexed (Int64.signed n1 + ofs), r2 :: nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
-      (Aindexed (Int64.signed n2 + ofs), r1 :: nil)
-  | Aindexed2scaled sc ofs, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
-      (Aindexed (Int64.signed n2 * sc + ofs), r1 :: nil)
-  | Aindexed2scaled sc ofs, r1 :: r2 :: nil, L n1 :: v2 :: nil =>
-      (Ascaled sc (Int64.signed n1 + ofs), r2 :: nil)
-  | _, _ =>
-      (addr, args)
-  end.
-
-Nondetfunction addr_strength_reduction_64
-                (addr: addressing) (args: list reg) (vl: list aval) :=
-
-  if negb Archi.ptr64 then addr_strength_reduction_64_generic addr args vl else
-
-  match addr, args, vl with
-
-  | Aindexed ofs, r1 :: nil, Ptr(Gl symb n) :: nil =>
-      (Aglobal symb (Ptrofs.add n (Ptrofs.repr ofs)), nil)
-  | Aindexed ofs, r1 :: nil, Ptr(Stk n) :: nil =>
-      (Ainstack (Ptrofs.add n (Ptrofs.repr ofs)), nil)
-
-  | Aindexed2 ofs, r1 :: r2 :: nil, Ptr(Gl symb n1) :: L n2 :: nil =>
-      (Aglobal symb (Ptrofs.add (Ptrofs.add n1 (Ptrofs.of_int64 n2)) (Ptrofs.repr ofs)), nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, L n1 :: Ptr(Gl symb n2) :: nil =>
-      (Aglobal symb (Ptrofs.add (Ptrofs.add n2 (Ptrofs.of_int64 n1)) (Ptrofs.repr ofs)), nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, Ptr(Stk n1) :: L n2 :: nil =>
-      (Ainstack (Ptrofs.add (Ptrofs.add n1 (Ptrofs.of_int64 n2)) (Ptrofs.repr ofs)), nil)
-  | Aindexed2 ofs, r1 :: r2 :: nil, L n1 :: Ptr(Stk n2) :: nil =>
-      (Ainstack (Ptrofs.add (Ptrofs.add (Ptrofs.of_int64 n1) n2) (Ptrofs.repr ofs)), nil)
-
-  | Aindexed2scaled sc ofs, r1 :: r2 :: nil, Ptr(Gl symb n1) :: L n2 :: nil =>
-      (Aglobal symb (Ptrofs.add (Ptrofs.add n1 (Ptrofs.of_int64 (Int64.mul n2 (Int64.repr sc)))) (Ptrofs.repr ofs)), nil)
-
-  | _, _ =>
-      addr_strength_reduction_64_generic addr args vl
-  end.
-
-Definition addr_strength_reduction
-                (addr: addressing) (args: list reg) (vl: list aval) :=
-  let addr_args' := 
-    if Archi.ptr64
-    then addr_strength_reduction_64 addr args vl
-    else addr_strength_reduction_32 addr args vl in
-  if addressing_valid (fst addr_args') then addr_args' else (addr, args).
-
 Definition make_addimm (n: int) (r: reg) :=
   if Int.eq n Int.zero
   then (Omove, r :: nil)
-  else (Olea (Aindexed (Int.signed n)), r :: nil).
+  else (Oaddimm n, r :: nil).
 
 Definition make_shlimm (n: int) (r1 r2: reg) :=
   if Int.eq n Int.zero then (Omove, r1 :: nil)
@@ -238,15 +111,15 @@ Definition make_shruimm (n: int) (r1 r2: reg) :=
   else if Int.ltu n Int.iwordsize then (Oshruimm n, r1 :: nil)
   else (Oshru, r1 :: r2 :: nil).
 
-Definition make_mulimm (n: int) (r: reg) :=
+Definition make_mulimm (n: int) (r1 r2: reg) :=
   if Int.eq n Int.zero then
     (Ointconst Int.zero, nil)
   else if Int.eq n Int.one then
-    (Omove, r :: nil)
+    (Omove, r1 :: nil)
   else
     match Int.is_power2 n with
-    | Some l => (Oshlimm l, r :: nil)
-    | None => (Omulimm n, r :: nil)
+    | Some l => (Oshlimm l, r1 :: nil)
+    | None => (Omul, r1 :: r2 :: nil)
     end.
 
 Definition make_andimm (n: int) (r: reg) (a: aval) :=
@@ -264,7 +137,6 @@ Definition make_orimm (n: int) (r: reg) :=
 
 Definition make_xorimm (n: int) (r: reg) :=
   if Int.eq n Int.zero then (Omove, r :: nil)
-  else if Int.eq n Int.mone then (Onot, r :: nil)
   else (Oxorimm n, r :: nil).
 
 Definition make_divimm n (r1 r2: reg) :=
@@ -296,7 +168,7 @@ Definition make_moduimm n (r1 r2: reg) :=
 Definition make_addlimm (n: int64) (r: reg) :=
   if Int64.eq n Int64.zero
   then (Omove, r :: nil)
-  else (Oleal (Aindexed (Int64.signed n)), r :: nil).
+  else (Oaddlimm n, r :: nil).
 
 Definition make_shllimm (n: int) (r1 r2: reg) :=
   if Int.eq n Int.zero then (Omove, r1 :: nil)
@@ -313,15 +185,15 @@ Definition make_shrluimm (n: int) (r1 r2: reg) :=
   else if Int.ltu n Int64.iwordsize' then (Oshrluimm n, r1 :: nil)
   else (Oshrlu, r1 :: r2 :: nil).
 
-Definition make_mullimm (n: int64) (r: reg) :=
+Definition make_mullimm (n: int64) (r1 r2: reg) :=
   if Int64.eq n Int64.zero then
     (Olongconst Int64.zero, nil)
   else if Int64.eq n Int64.one then
-    (Omove, r :: nil)
+    (Omove, r1 :: nil)
   else
     match Int64.is_power2' n with
-    | Some l => (Oshllimm l, r :: nil)
-    | None => (Omullimm n, r :: nil)
+    | Some l => (Oshllimm l, r1 :: nil)
+    | None => (Omull, r1 :: r2 :: nil)
     end.
 
 Definition make_andlimm (n: int64) (r: reg) (a: aval) :=
@@ -336,7 +208,6 @@ Definition make_orlimm (n: int64) (r: reg) :=
 
 Definition make_xorlimm (n: int64) (r: reg) :=
   if Int64.eq n Int64.zero then (Omove, r :: nil)
-  else if Int64.eq n Int64.mone then (Onotl, r :: nil)
   else (Oxorlimm n, r :: nil).
 
 Definition make_divlimm n (r1 r2: reg) :=
@@ -371,23 +242,19 @@ Definition make_mulfsimm (n: float32) (r r1 r2: reg) :=
 
 Definition make_cast8signed (r: reg) (a: aval) :=
   if vincl a (Sgn Ptop 8) then (Omove, r :: nil) else (Ocast8signed, r :: nil).
-Definition make_cast8unsigned (r: reg) (a: aval) :=
-  if vincl a (Uns Ptop 8) then (Omove, r :: nil) else (Ocast8unsigned, r :: nil).
 Definition make_cast16signed (r: reg) (a: aval) :=
   if vincl a (Sgn Ptop 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil).
-Definition make_cast16unsigned (r: reg) (a: aval) :=
-  if vincl a (Uns Ptop 16) then (Omove, r :: nil) else (Ocast16unsigned, r :: nil).
 
-Nondetfunction op_strength_reduction
+Nondetfunction op_strength_reduction 
               (op: operation) (args: list reg) (vl: list aval) :=
   match op, args, vl with
   | Ocast8signed, r1 :: nil, v1 :: nil => make_cast8signed r1 v1
-  | Ocast8unsigned, r1 :: nil, v1 :: nil => make_cast8unsigned r1 v1
   | Ocast16signed, r1 :: nil, v1 :: nil => make_cast16signed r1 v1
-  | Ocast16unsigned, r1 :: nil, v1 :: nil => make_cast16unsigned r1 v1
+  | Oadd, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_addimm n1 r2
+  | Oadd, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm n2 r1
   | Osub, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm (Int.neg n2) r1
-  | Omul, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_mulimm n1 r2
-  | Omul, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_mulimm n2 r1
+  | Omul, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_mulimm n1 r2 r1
+  | Omul, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_mulimm n2 r1 r2
   | Odiv, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divimm n2 r1 r2
   | Odivu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divuimm n2 r1 r2
   | Omodu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_moduimm n2 r1 r2
@@ -401,12 +268,11 @@ Nondetfunction op_strength_reduction
   | Oshl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shlimm n2 r1 r2
   | Oshr, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrimm n2 r1 r2
   | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2
-  | Olea addr, args, vl =>
-      let (addr', args') := addr_strength_reduction_32 addr args vl in
-      (Olea addr', args')
+  | Oaddl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_addlimm n1 r2
+  | Oaddl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm n2 r1
   | Osubl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm (Int64.neg n2) r1
-  | Omull, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_mullimm n1 r2
-  | Omull, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_mullimm n2 r1
+  | Omull, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_mullimm n1 r2 r1
+  | Omull, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_mullimm n2 r1 r2
   | Odivl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divlimm n2 r1 r2
   | Odivlu, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divluimm n2 r1 r2
   | Omodlu, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_modluimm n2 r1 r2
@@ -420,14 +286,24 @@ Nondetfunction op_strength_reduction
   | Oshll, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shllimm n2 r1 r2
   | Oshrl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrlimm n2 r1 r2
   | Oshrlu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrluimm n2 r1 r2
-  | Oleal addr, args, vl =>
-      let (addr', args') := addr_strength_reduction_64 addr args vl in
-      (Oleal addr', args')
   | Ocmp c, args, vl => make_cmp c args vl
-  | Osel c ty, r1 :: r2 :: args, v1 :: v2 :: vl => make_select c ty r1 r2 args vl
   | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
   | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
   | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
   | Omulfs, r1 :: r2 :: nil, FS n1 :: v2 :: nil => make_mulfsimm n1 r2 r1 r2
   | _, _, _ => (op, args)
   end.
+
+Nondetfunction addr_strength_reduction
+                (addr: addressing) (args: list reg) (vl: list aval) :=
+  match addr, args, vl with
+  | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil =>
+      if Archi.pic_code tt
+      then (addr, args)
+      else (Aglobal symb (Ptrofs.add n1 n), nil)
+  | Aindexed n, r1 :: nil, Ptr(Stk n1) :: nil =>
+      (Ainstack (Ptrofs.add n1 n), nil)
+  | _, _, _ =>
+      (addr, args)
+  end.
+
diff --git a/verilog/ConstpropOpproof.v b/verilog/ConstpropOpproof.v
index 09c6e91b..74dc4a05 100644
--- a/verilog/ConstpropOpproof.v
+++ b/verilog/ConstpropOpproof.v
@@ -14,7 +14,7 @@
 
 Require Import Coqlib Compopts.
 Require Import Integers Floats Values Memory Globalenvs Events.
-Require Import Op Registers RTL ValueDomain ValueAOp ValueAnalysis.
+Require Import Op Registers RTL ValueDomain.
 Require Import ConstpropOp.
 
 Section STRENGTH_REDUCTION.
@@ -81,13 +81,6 @@ Ltac SimplVM :=
   | _ => idtac
   end.
 
-Lemma eval_Olea_ptr:
-  forall a el,
-  eval_operation ge (Vptr sp Ptrofs.zero) (Olea_ptr a) el m = eval_addressing ge (Vptr sp Ptrofs.zero) a el.
-Proof.
-  unfold Olea_ptr, eval_addressing; intros. destruct Archi.ptr64; auto.
-Qed.
-
 Lemma const_for_result_correct:
   forall a op v,
   const_for_result a = Some op ->
@@ -107,17 +100,9 @@ Proof.
 - (* pointer *)
   destruct p; try discriminate; SimplVM.
   + (* global *)
-    destruct (SelectOp.symbol_is_external id).
-  * revert H2; predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero; intros EQ; inv EQ.
-    exists (Genv.symbol_address ge id Ptrofs.zero); auto.
-  * inv H2. exists (Genv.symbol_address ge id ofs); split.
-    rewrite eval_Olea_ptr. apply eval_addressing_Aglobal.
-    auto.
+    inv H2. exists (Genv.symbol_address ge id ofs); auto.
   + (* stack *)
-    inv H2. exists (Vptr sp ofs); split.
-    rewrite eval_Olea_ptr. rewrite eval_addressing_Ainstack.
-    simpl. rewrite Ptrofs.add_zero_l; auto.
-    auto.
+    inv H2. exists (Vptr sp ofs); split; auto. simpl. rewrite Ptrofs.add_zero_l; auto.
 Qed.
 
 Lemma cond_strength_reduction_correct:
@@ -139,170 +124,6 @@ Proof.
 - auto.
 Qed.
 
-Lemma addr_strength_reduction_32_generic_correct:
-  forall addr args vl res,
-  vl = map (fun r => AE.get r ae) args ->
-  eval_addressing32 ge (Vptr sp Ptrofs.zero) addr e##args = Some res ->
-  let (addr', args') := addr_strength_reduction_32_generic addr args vl in
-  exists res', eval_addressing32 ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'.
-Proof.
-Local Opaque Val.add.
-  assert (A: forall x y, Int.repr (Int.signed x + y) = Int.add x (Int.repr y)).
-  { intros; apply Int.eqm_samerepr; auto using Int.eqm_signed_unsigned with ints. }
-  assert (B: forall x y z, Int.repr (Int.signed x * y + z) = Int.add (Int.mul x (Int.repr y)) (Int.repr z)).
-  { intros; apply Int.eqm_samerepr; apply Int.eqm_add; auto with ints.
-    unfold Int.mul; auto using Int.eqm_signed_unsigned with ints. }
-  intros until res; intros VL EA.
-  unfold addr_strength_reduction_32_generic; destruct (addr_strength_reduction_32_generic_match addr args vl);
-  simpl in *; InvApproxRegs; SimplVM; try (inv EA).
-- econstructor; split; eauto. rewrite A, Val.add_assoc, Val.add_permut. auto.
-- econstructor; split; eauto. rewrite A, Val.add_assoc. auto.
-- Local Transparent Val.add.
-  econstructor; split; eauto. simpl. rewrite B. auto.
-- econstructor; split; eauto. rewrite A, Val.add_permut. auto.
-- exists res; auto.
-Qed.
-
-Lemma addr_strength_reduction_32_correct:
-  forall addr args vl res,
-  vl = map (fun r => AE.get r ae) args ->
-  eval_addressing32 ge (Vptr sp Ptrofs.zero) addr e##args = Some res ->
-  let (addr', args') := addr_strength_reduction_32 addr args vl in
-  exists res', eval_addressing32 ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'.
-Proof.
-  intros until res; intros VL EA. unfold addr_strength_reduction_32.
-  destruct Archi.ptr64 eqn:SF. apply addr_strength_reduction_32_generic_correct; auto.
-  assert (A: forall n, Ptrofs.of_int (Int.repr n) = Ptrofs.repr n) by auto with ptrofs.
-  assert (B: forall symb ofs n,
-             Genv.symbol_address ge symb (Ptrofs.add ofs (Ptrofs.repr n)) =
-             Val.add (Genv.symbol_address ge symb ofs) (Vint (Int.repr n))).
-  { intros. rewrite <- A. apply Genv.shift_symbol_address_32; auto. }
-Local Opaque Val.add.
-  destruct (addr_strength_reduction_32_match addr args vl);
-  simpl in *; InvApproxRegs; SimplVM; FuncInv; subst; rewrite ?SF.
-- econstructor; split; eauto. rewrite B. apply Val.add_lessdef; auto.
-- econstructor; split; eauto. rewrite Ptrofs.add_zero_l.
-Local Transparent Val.add.
-  inv H0; auto. rewrite H2. simpl; rewrite SF, A. auto.
-- econstructor; split; eauto.
-  unfold Ptrofs.add at 2. rewrite B.
-  fold (Ptrofs.add n1 (Ptrofs.of_int n2)).
-  rewrite Genv.shift_symbol_address_32 by auto.
-  rewrite ! Val.add_assoc. apply Val.add_lessdef; auto.
-- econstructor; split; eauto.
-  unfold Ptrofs.add at 2. rewrite B.
-  fold (Ptrofs.add n2 (Ptrofs.of_int n1)).
-  rewrite Genv.shift_symbol_address_32 by auto.
-  rewrite ! Val.add_assoc. rewrite Val.add_permut. apply Val.add_lessdef; auto.
-- econstructor; split; eauto. rewrite Ptrofs.add_zero_l. rewrite Val.add_assoc.
-  eapply Val.lessdef_trans. apply Val.add_lessdef; eauto.
-  simpl. rewrite SF. rewrite Ptrofs.add_assoc. apply Val.lessdef_same; do 3 f_equal. auto with ptrofs.
-- econstructor; split; eauto. rewrite Ptrofs.add_zero_l. rewrite Val.add_assoc, Val.add_permut.
-  eapply Val.lessdef_trans. apply Val.add_lessdef; eauto.
-  simpl. rewrite SF. rewrite <- (Ptrofs.add_commut n2). rewrite Ptrofs.add_assoc.
-  apply Val.lessdef_same; do 3 f_equal. auto with ptrofs.
-- econstructor; split; eauto. rewrite B. rewrite ! Val.add_assoc. rewrite (Val.add_commut (Vint (Int.repr ofs))).
-  apply Val.add_lessdef; auto.
-- econstructor; split; eauto. rewrite B. rewrite (Val.add_commut e#r1). rewrite ! Val.add_assoc.
-  rewrite (Val.add_commut (Vint (Int.repr ofs))). apply Val.add_lessdef; auto.
-- econstructor; split; eauto. rewrite B. rewrite Genv.shift_symbol_address_32 by auto.
-  rewrite ! Val.add_assoc. apply Val.add_lessdef; auto.
-- econstructor; split; eauto. rewrite B. rewrite ! Val.add_assoc.
-  rewrite (Val.add_commut (Vint (Int.repr ofs))). apply Val.add_lessdef; auto.
-- econstructor; split; eauto.
-  rewrite Genv.shift_symbol_address_32 by auto. auto.
-- econstructor; split; eauto.
-  rewrite Genv.shift_symbol_address_32 by auto. auto.
-- apply addr_strength_reduction_32_generic_correct; auto.
-Qed.
-
-Lemma addr_strength_reduction_64_generic_correct:
-  forall addr args vl res,
-  vl = map (fun r => AE.get r ae) args ->
-  eval_addressing64 ge (Vptr sp Ptrofs.zero) addr e##args = Some res ->
-  let (addr', args') := addr_strength_reduction_64_generic addr args vl in
-  exists res', eval_addressing64 ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'.
-Proof.
-Local Opaque Val.addl.
-  assert (A: forall x y, Int64.repr (Int64.signed x + y) = Int64.add x (Int64.repr y)).
-  { intros; apply Int64.eqm_samerepr; auto using Int64.eqm_signed_unsigned with ints. }
-  assert (B: forall x y z, Int64.repr (Int64.signed x * y + z) = Int64.add (Int64.mul x (Int64.repr y)) (Int64.repr z)).
-  { intros; apply Int64.eqm_samerepr; apply Int64.eqm_add; auto with ints.
-    unfold Int64.mul; auto using Int64.eqm_signed_unsigned with ints. }
-  intros until res; intros VL EA.
-  unfold addr_strength_reduction_64_generic; destruct (addr_strength_reduction_64_generic_match addr args vl);
-  simpl in *; InvApproxRegs; SimplVM; try (inv EA).
-- econstructor; split; eauto. rewrite A, Val.addl_assoc, Val.addl_permut. auto.
-- econstructor; split; eauto. rewrite A, Val.addl_assoc. auto.
-- Local Transparent Val.addl.
-  econstructor; split; eauto. simpl. rewrite B. auto.
-- econstructor; split; eauto. rewrite A, Val.addl_permut. auto.
-- exists res; auto.
-Qed.
-
-Lemma addr_strength_reduction_64_correct:
-  forall addr args vl res,
-  vl = map (fun r => AE.get r ae) args ->
-  eval_addressing64 ge (Vptr sp Ptrofs.zero) addr e##args = Some res ->
-  let (addr', args') := addr_strength_reduction_64 addr args vl in
-  exists res', eval_addressing64 ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'.
-Proof.
-  intros until res; intros VL EA. unfold addr_strength_reduction_64.
-  destruct (negb Archi.ptr64) eqn:SF. apply addr_strength_reduction_64_generic_correct; auto.
-  rewrite negb_false_iff in SF.
-  assert (A: forall n, Ptrofs.of_int64 (Int64.repr n) = Ptrofs.repr n) by auto with ptrofs.
-  assert (B: forall symb ofs n,
-             Genv.symbol_address ge symb (Ptrofs.add ofs (Ptrofs.repr n)) =
-             Val.addl (Genv.symbol_address ge symb ofs) (Vlong (Int64.repr n))).
-  { intros. rewrite <- A. apply Genv.shift_symbol_address_64; auto. }
-Local Opaque Val.addl.
-  destruct (addr_strength_reduction_64_match addr args vl);
-  simpl in *; InvApproxRegs; SimplVM; FuncInv; subst; rewrite ?SF.
-- econstructor; split; eauto. rewrite B. apply Val.addl_lessdef; auto.
-- econstructor; split; eauto. rewrite Ptrofs.add_zero_l.
-Local Transparent Val.addl.
-  inv H0; auto. rewrite H2. simpl; rewrite SF, A. auto.
-- econstructor; split; eauto.
-  unfold Ptrofs.add at 2. rewrite B.
-  fold (Ptrofs.add n1 (Ptrofs.of_int64 n2)).
-  rewrite Genv.shift_symbol_address_64 by auto.
-  rewrite ! Val.addl_assoc. apply Val.addl_lessdef; auto.
-- econstructor; split; eauto.
-  unfold Ptrofs.add at 2. rewrite B.
-  fold (Ptrofs.add n2 (Ptrofs.of_int64 n1)).
-  rewrite Genv.shift_symbol_address_64 by auto.
-  rewrite ! Val.addl_assoc. rewrite Val.addl_permut. apply Val.addl_lessdef; auto.
-- econstructor; split; eauto. rewrite Ptrofs.add_zero_l. rewrite Val.addl_assoc.
-  eapply Val.lessdef_trans. apply Val.addl_lessdef; eauto.
-  simpl. rewrite SF. rewrite Ptrofs.add_assoc. apply Val.lessdef_same; do 3 f_equal. auto with ptrofs.
-- econstructor; split; eauto. rewrite Ptrofs.add_zero_l. rewrite Val.addl_assoc, Val.addl_permut.
-  eapply Val.lessdef_trans. apply Val.addl_lessdef; eauto.
-  simpl. rewrite SF. rewrite <- (Ptrofs.add_commut n2). rewrite Ptrofs.add_assoc.
-  apply Val.lessdef_same; do 3 f_equal. auto with ptrofs.
-- econstructor; split; eauto. rewrite B. rewrite Genv.shift_symbol_address_64 by auto.
-  rewrite ! Val.addl_assoc. apply Val.addl_lessdef; auto.
-- apply addr_strength_reduction_64_generic_correct; auto.
-Qed.
-
-Lemma addr_strength_reduction_correct:
-  forall addr args vl res,
-  vl = map (fun r => AE.get r ae) args ->
-  eval_addressing ge (Vptr sp Ptrofs.zero) addr e##args = Some res ->
-  let (addr', args') := addr_strength_reduction addr args vl in
-  exists res', eval_addressing ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'.
-Proof.
-  intros until res. unfold addr_strength_reduction.
-  set (aa := if Archi.ptr64
-         then addr_strength_reduction_64 addr args vl
-         else addr_strength_reduction_32 addr args vl).
-  intros.
-  destruct (addressing_valid (fst aa)).
-- unfold aa, eval_addressing in *. destruct Archi.ptr64.
-+ apply addr_strength_reduction_64_correct; auto.
-+ apply addr_strength_reduction_32_correct; auto.
-- exists res; auto.
-Qed.
-
 Lemma make_cmp_base_correct:
   forall c args vl,
   vl = map (fun r => AE.get r ae) args ->
@@ -371,28 +192,6 @@ Proof.
 - apply make_cmp_base_correct; auto.
 Qed.
 
-Lemma make_select_correct:
-  forall c ty r1 r2 args vl,
-  vl = map (fun r => AE.get r ae) args ->
-  let (op', args') := make_select c ty r1 r2 args vl in
-  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some v
-         /\ Val.lessdef (Val.select (eval_condition c e##args m) e#r1 e#r2 ty) v.
-Proof.
-  unfold make_select; intros.
-  destruct (resolve_branch (eval_static_condition c vl)) as [b|] eqn:RB.
-- exists (if b then e#r1 else e#r2); split.
-+ simpl. destruct b; auto.
-+ destruct (eval_condition c e##args m) as [b'|] eqn:EC; simpl; auto.
-  assert (b = b').
-  { eapply resolve_branch_sound; eauto. 
-    rewrite <- EC. apply eval_static_condition_sound with bc. 
-    subst vl. exact (aregs_sound _ _ _ args MATCH). }
-  subst b'. apply Val.lessdef_normalize.
-- generalize (cond_strength_reduction_correct c args vl H).
-  destruct (cond_strength_reduction c args vl) as [cond' args']; intros EQ.
-  econstructor; split. simpl; eauto. rewrite EQ; auto.
-Qed.
-
 Lemma make_addimm_correct:
   forall n r,
   let (op, args) := make_addimm n r in
@@ -402,7 +201,8 @@ Proof.
   predSpec Int.eq Int.eq_spec n Int.zero; intros.
   subst. exists (e#r); split; auto.
   destruct (e#r); simpl; auto; rewrite ?Int.add_zero, ?Ptrofs.add_zero; auto.
-  exists (Val.add e#r (Vint n)); split; auto. simpl. rewrite Int.repr_signed; auto.
+  destruct Archi.ptr64; auto.
+  exists (Val.add e#r (Vint n)); split; auto.
 Qed.
 
 Lemma make_shlimm_correct:
@@ -448,8 +248,9 @@ Proof.
 Qed.
 
 Lemma make_mulimm_correct:
-  forall n r1,
-  let (op, args) := make_mulimm n r1 in
+  forall n r1 r2,
+  e#r2 = Vint n ->
+  let (op, args) := make_mulimm n r1 r2 in
   exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mul e#r1 (Vint n)) v.
 Proof.
   intros; unfold make_mulimm.
@@ -459,57 +260,89 @@ Proof.
   exists (e#r1); split; auto. destruct (e#r1); simpl; auto. rewrite Int.mul_one; auto.
   destruct (Int.is_power2 n) eqn:?; intros.
   rewrite (Val.mul_pow2 e#r1 _ _ Heqo). econstructor; split. simpl; eauto. auto.
-  econstructor; split; eauto. auto.
+  econstructor; split; eauto. simpl. rewrite H; auto.
 Qed.
 
 Lemma make_divimm_correct:
   forall n r1 r2 v,
-  Val.divs e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divs e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  destruct (Int.ltu i (Int.repr 31)) eqn:?.
-  exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
-  exists v; auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divs_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+      destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+      rewrite Val.divs_pow2 with (y:=v) (n:=n).
+      cbn.
+      all: auto.
+    }
+    exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_divuimm_correct:
   forall n r1 r2 v,
-  Val.divu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_divuimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divuimm.
-  predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
-  destruct (e#r1) eqn:?;
-    try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
-    inv H; auto.
-  destruct (Int.is_power2 n) eqn:?.
-  econstructor; split. simpl; eauto.
-  rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
-  exists v; auto.
+  predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+  { destruct (e # r1) eqn:Er1.
+    all: try (cbn; exists (e # r1); split; auto; fail).
+    rewrite Val.divu_one.
+    cbn.
+    rewrite Er1.
+    exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+   cbn.
+   exists (Val.shru e # r1 (Vint i)); split; auto.
+   destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+   rewrite Val.divu_pow2 with (y:=v) (n:=n).
+   all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
 Qed.
 
 Lemma make_moduimm_correct:
   forall n r1 r2 v,
-  Val.modu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.modu e#r1 e#r2) = v ->
   e#r2 = Vint n ->
   let (op, args) := make_moduimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_moduimm.
   destruct (Int.is_power2 n) eqn:?.
-  exists v; split; auto. simpl. decEq. eapply Val.modu_pow2; eauto. congruence.
-  exists v; auto.
+  { destruct (Val.modu e # r1 e # r2) eqn:MODU; cbn in H.
+    { subst v0.
+      exists v; split; auto.
+      cbn. decEq. eapply Val.modu_pow2; eauto. congruence.
+    }
+    subst v.
+    eexists; split; auto.
+    cbn. reflexivity.
+  }
+  exists v; split; auto.
+  cbn.
+  congruence.
 Qed.
 
 Lemma make_andimm_correct:
@@ -573,7 +406,8 @@ Proof.
   predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
   subst. exists (e#r); split; auto.
   destruct (e#r); simpl; auto; rewrite ? Int64.add_zero, ? Ptrofs.add_zero; auto.
-  exists (Val.addl e#r (Vlong n)); split; auto. simpl. rewrite Int64.repr_signed; auto.
+  destruct Archi.ptr64; auto.
+  exists (Val.addl e#r (Vlong n)); split; auto.
 Qed.
 
 Lemma make_shllimm_correct:
@@ -622,8 +456,9 @@ Proof.
 Qed.
 
 Lemma make_mullimm_correct:
-  forall n r1,
-  let (op, args) := make_mullimm n r1 in
+  forall n r1 r2,
+  e#r2 = Vlong n ->
+  let (op, args) := make_mullimm n r1 r2 in
   exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mull e#r1 (Vlong n)) v.
 Proof.
   intros; unfold make_mullimm.
@@ -636,53 +471,87 @@ Proof.
   destruct (e#r1); simpl; auto.
   erewrite Int64.is_power2'_range by eauto.
   erewrite Int64.mul_pow2' by eauto. auto.
-  econstructor; split; eauto. auto.
+  econstructor; split; eauto. simpl; rewrite H; auto.
 Qed.
 
 Lemma make_divlimm_correct:
   forall n r1 r2 v,
-  Val.divls e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divls e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divlimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divlimm.
-  destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
-  rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
-  exists v; auto.
-  exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+    destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+    {
+      cbn.
+      exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+      rewrite H0 in H.
+      destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+      {
+        subst v0.
+        rewrite Val.divls_pow2 with (y:=v) (n:=n).
+        cbn.
+        all: auto.
+      }
+      subst. auto.
+    }
+    cbn. subst. rewrite H0.
+    exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
 Qed.
 
 Lemma make_divluimm_correct:
   forall n r1 r2 v,
-  Val.divlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.divlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_divluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_divluimm.
   destruct (Int64.is_power2' n) eqn:?.
+  {
   econstructor; split. simpl; eauto.
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl.
-  erewrite Int64.is_power2'_range by eauto.
-  erewrite Int64.divu_pow2' by eauto.  auto.
-  exists v; auto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  { 
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.is_power2'_range by eauto.    
+    erewrite Int64.divu_pow2' by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_modluimm_correct:
   forall n r1 r2 v,
-  Val.modlu e#r1 e#r2 = Some v ->
+  Val.maketotal (Val.modlu e#r1 e#r2) = v ->
   e#r2 = Vlong n ->
   let (op, args) := make_modluimm n r1 r2 in
   exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
 Proof.
   intros; unfold make_modluimm.
   destruct (Int64.is_power2 n) eqn:?.
-  exists v; split; auto. simpl. decEq.
-  rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
-  simpl. erewrite Int64.modu_and by eauto. auto.
-  exists v; auto.
+  {
+  econstructor; split. simpl; eauto.
+  rewrite H0 in H. destruct (e#r1); inv H.
+  all: cbn; auto.
+  { 
+    destruct (Int64.eq n Int64.zero); cbn; auto.
+    erewrite Int64.modu_and by eauto.  auto.
+  }
+  }
+  exists v; split; auto.
+  cbn.
+  rewrite H.
+  reflexivity.
 Qed.
 
 Lemma make_andlimm_correct:
@@ -792,20 +661,6 @@ Proof.
   econstructor; split; simpl; eauto.
 Qed.
 
-Lemma make_cast8unsigned_correct:
-  forall r x,
-  vmatch bc e#r x ->
-  let (op, args) := make_cast8unsigned r x in
-  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.zero_ext 8 e#r) v.
-Proof.
-  intros; unfold make_cast8unsigned. destruct (vincl x (Uns Ptop 8)) eqn:INCL.
-  exists e#r; split; auto.
-  assert (V: vmatch bc e#r (Uns Ptop 8)).
-  { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
-  inv V; simpl; auto. rewrite is_uns_zero_ext in H4 by auto. rewrite H4; auto.
-  econstructor; split; simpl; eauto.
-Qed.
-
 Lemma make_cast16signed_correct:
   forall r x,
   vmatch bc e#r x ->
@@ -820,20 +675,6 @@ Proof.
   econstructor; split; simpl; eauto.
 Qed.
 
-Lemma make_cast16unsigned_correct:
-  forall r x,
-  vmatch bc e#r x ->
-  let (op, args) := make_cast16unsigned r x in
-  exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.zero_ext 16 e#r) v.
-Proof.
-  intros; unfold make_cast16unsigned. destruct (vincl x (Uns Ptop 16)) eqn:INCL.
-  exists e#r; split; auto.
-  assert (V: vmatch bc e#r (Uns Ptop 16)).
-  { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
-  inv V; simpl; auto. rewrite is_uns_zero_ext in H4 by auto. rewrite H4; auto.
-  econstructor; split; simpl; eauto.
-Qed.
-
 Lemma op_strength_reduction_correct:
   forall op args vl v,
   vl = map (fun r => AE.get r ae) args ->
@@ -843,102 +684,134 @@ Lemma op_strength_reduction_correct:
 Proof.
   intros until v; unfold op_strength_reduction;
   case (op_strength_reduction_match op args vl); simpl; intros.
-(* cast8signed *)
+- (* cast8signed *)
   InvApproxRegs; SimplVM; inv H0. apply make_cast8signed_correct; auto.
-(* cast8unsigned *)
-  InvApproxRegs; SimplVM; inv H0. apply make_cast8unsigned_correct; auto.
-(* cast16signed *)
+- (* cast16signed *)
   InvApproxRegs; SimplVM; inv H0. apply make_cast16signed_correct; auto.
-(* cast16unsigned *)
-  InvApproxRegs; SimplVM; inv H0. apply make_cast16unsigned_correct; auto.
-(* sub *)
+- (* add 1 *)
+  rewrite Val.add_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_addimm_correct; auto.
+- (* add 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_addimm_correct; auto.
+- (* sub *)
   InvApproxRegs; SimplVM; inv H0. rewrite Val.sub_add_opp. apply make_addimm_correct; auto.
-(* mul *)
+- (* mul 1 *)
   rewrite Val.mul_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
+- (* mul 2*)
   InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
-(* divs *)
-  assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
-  apply make_divimm_correct; auto.
-(* divu *)
+- (* divs *)
+  assert (e#r2 = Vint n2). { clear H0. InvApproxRegs; SimplVM; auto. }
+                           apply make_divimm_correct; auto.
+  congruence.
+- (* divu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_divuimm_correct; auto.
-(* modu *)
+  congruence.
+- (* modu *)
   assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_moduimm_correct; auto.
-(* and *)
+  congruence.
+- (* and 1 *)
   rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
+- (* and 2 *)
   InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
+- (* andimm *)
   inv H; inv H0. apply make_andimm_correct; auto.
-(* or *)
+- (* or 1 *)
   rewrite Val.or_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_orimm_correct; auto.
+- (* or 2 *)
   InvApproxRegs; SimplVM; inv H0. apply make_orimm_correct; auto.
-(* xor *)
+- (* xor 1 *)
   rewrite Val.xor_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_xorimm_correct; auto.
+- (* xor 2 *)
   InvApproxRegs; SimplVM; inv H0. apply make_xorimm_correct; auto.
-(* shl *)
+- (* shl *)
   InvApproxRegs; SimplVM; inv H0. apply make_shlimm_correct; auto.
-(* shr *)
+- (* shr *)
   InvApproxRegs; SimplVM; inv H0. apply make_shrimm_correct; auto.
-(* shru *)
+- (* shru *)
   InvApproxRegs; SimplVM; inv H0. apply make_shruimm_correct; auto.
-(* lea *)
-  exploit addr_strength_reduction_32_correct; eauto.
-  destruct (addr_strength_reduction_32 addr args0 vl0) as [addr' args'].
-  auto.
-(* subl *)
+- (* addl 1 *)
+  rewrite Val.addl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_addlimm_correct; auto.
+- (* addl 2 *)
+  InvApproxRegs; SimplVM; inv H0. apply make_addlimm_correct; auto.
+- (* subl *)
   InvApproxRegs; SimplVM; inv H0.
   replace (Val.subl e#r1 (Vlong n2)) with (Val.addl e#r1 (Vlong (Int64.neg n2))).
   apply make_addlimm_correct; auto.
-  unfold Val.addl, Val.subl. destruct Archi.ptr64 eqn:SF, e#r1; auto.
+  unfold Val.addl, Val.subl. destruct Archi.ptr64 eqn:SF, e#r1; auto. 
   rewrite Int64.sub_add_opp; auto.
   rewrite Ptrofs.sub_add_opp. do 2 f_equal. auto with ptrofs.
   rewrite Int64.sub_add_opp; auto.
-(* mull *)
+- (* mull 1 *)
   rewrite Val.mull_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
+- (* mull 2 *)
   InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
-(* divl *)
+- (* divl *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_divlimm_correct; auto.
-(* divlu *)
+  congruence.
+- (* divlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_divluimm_correct; auto.
-(* modlu *)
+  congruence.
+- (* modlu *)
   assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
   apply make_modluimm_correct; auto.
-(* andl *)
+  congruence.
+- (* andl 1 *)
   rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
+- (* andl 2 *)
   InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
+- (* andlimm *)
   inv H; inv H0. apply make_andlimm_correct; auto.
-(* orl *)
+- (* orl 1 *)
   rewrite Val.orl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_orlimm_correct; auto.
+- (* orl 2 *)
   InvApproxRegs; SimplVM; inv H0. apply make_orlimm_correct; auto.
-(* xorl *)
+- (* xorl 1 *)
   rewrite Val.xorl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_xorlimm_correct; auto.
+- (* xorl 2 *)
   InvApproxRegs; SimplVM; inv H0. apply make_xorlimm_correct; auto.
-(* shll *)
+- (* shll *)
   InvApproxRegs; SimplVM; inv H0. apply make_shllimm_correct; auto.
-(* shrl *)
+- (* shrl *)
   InvApproxRegs; SimplVM; inv H0. apply make_shrlimm_correct; auto.
-(* shrlu *)
+- (* shrlu *)
   InvApproxRegs; SimplVM; inv H0. apply make_shrluimm_correct; auto.
-(* leal *)
-  exploit addr_strength_reduction_64_correct; eauto.
-  destruct (addr_strength_reduction_64 addr args0 vl0) as [addr' args'].
-  auto.
-(* cond *)
+- (* cond *)
   inv H0. apply make_cmp_correct; auto.
-(* select *)
-  inv H0. apply make_select_correct; congruence.
-(* mulf *)
+- (* mulf 1 *)
   InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
+- (* mulf 2 *)
   InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) e#r2).
   rewrite <- H2. apply make_mulfimm_correct_2; auto.
-(* mulfs *)
+- (* mulfs 1 *)
   InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfsimm_correct; auto.
+- (* mulfs 2 *)
   InvApproxRegs; SimplVM; inv H0. fold (Val.mulfs (Vsingle n1) e#r2).
   rewrite <- H2. apply make_mulfsimm_correct_2; auto.
-(* default *)
+- (* default *)
   exists v; auto.
 Qed.
 
+Lemma addr_strength_reduction_correct:
+  forall addr args vl res,
+  vl = map (fun r => AE.get r ae) args ->
+  eval_addressing ge (Vptr sp Ptrofs.zero) addr e##args = Some res ->
+  let (addr', args') := addr_strength_reduction addr args vl in
+  exists res', eval_addressing ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'.
+Proof.
+  intros until res. unfold addr_strength_reduction.
+  destruct (addr_strength_reduction_match addr args vl); simpl;
+  intros VL EA; InvApproxRegs; SimplVM; try (inv EA).
+- destruct (Archi.pic_code tt).
++ exists (Val.offset_ptr e#r1 n); auto.
++ simpl. rewrite Genv.shift_symbol_address. econstructor; split; eauto. 
+  inv H0; simpl; auto.
+- rewrite Ptrofs.add_zero_l. econstructor; split; eauto.
+  change (Vptr sp (Ptrofs.add n1 n)) with (Val.offset_ptr (Vptr sp n1) n). 
+  inv H0; simpl; auto.
+- exists res; auto.
+Qed.
+
 End STRENGTH_REDUCTION.
diff --git a/verilog/Conventions1.v b/verilog/Conventions1.v
index b6fb2620..eeaae3c4 100644
--- a/verilog/Conventions1.v
+++ b/verilog/Conventions1.v
@@ -2,12 +2,17 @@
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
-(*                Xavier Leroy, INRIA Paris                            *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
 (** Function calling conventions and other conventions regarding the use of
@@ -15,7 +20,6 @@
 
 Require Import Coqlib Decidableplus.
 Require Import AST Machregs Locations.
-Require Import Errors.
 
 (** * Classification of machine registers *)
 
@@ -24,65 +28,67 @@ Require Import Errors.
 - Callee-save registers, whose value is preserved across a function call.
 - Caller-save registers that can be modified during a function call.
 
-  We follow the x86-32 and x86-64 application binary interfaces (ABI)
-  in our choice of callee- and caller-save registers.
+  We follow the RISC-V application binary interface (ABI) in our choice
+  of callee- and caller-save registers.
 *)
-  
+
 Definition is_callee_save (r: mreg) : bool :=
   match r with
-  | AX | CX | DX => false
-  | BX | BP => true
-  | SI | DI => negb Archi.ptr64 || Archi.win64 (**r callee-save in ELF 64 bits *)
-  | R8 | R9 | R10 | R11 => false
-  | R12 | R13 | R14 | R15 => true
-  | X0 | X1 | X2 | X3 | X4 | X5 | X6 | X7 => false
-  | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 => Archi.win64
-  | FP0 => false
+  | R5 | R6 | R7 => false
+  | R8 | R9 => true
+  | R10 | R11 | R12 | R13 | R14 | R15 | R16 | R17 => false
+  | R18 | R19 | R20 | R21 | R22 | R23 | R24 | R25 | R26 | R27 => true
+  | R28 | R29 | R30 => false
+  | F0 | F1 | F2 | F3 | F4 | F5 | F6 | F7 => false
+  | F8 | F9 => true
+  | F10 | F11 | F12 | F13 | F14 | F15 | F16 | F17 => false
+  | F18 | F19 | F20 | F21 | F22 | F23 | F24 | F25 | F26 | F27 => true
+  | F28 | F29 | F30 | F31 => false
   end.
 
 Definition int_caller_save_regs :=
-  if Archi.ptr64
-  then if Archi.win64
-       then AX :: CX :: DX :: R8 :: R9 :: R10 :: R11 :: nil
-       else AX :: CX :: DX :: SI :: DI :: R8 :: R9 :: R10 :: R11 :: nil
-  else AX :: CX :: DX :: nil.
+  R5  :: R6  :: R7 ::
+  R10 :: R11 :: R12 :: R13 :: R14 :: R15 :: R16 :: R17 ::
+  R28 :: R29 :: R30 ::
+  nil.
 
 Definition float_caller_save_regs :=
-  if Archi.ptr64
-  then if Archi.win64
-       then X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: nil
-       else X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 ::
-            X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil
-  else X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
+  F0  :: F1  :: F2  :: F3  :: F4  :: F5  :: F6  :: F7  ::
+  F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: F16 :: F17 ::
+  F28 :: F29 :: F30 :: F31 ::
+  nil.
 
 Definition int_callee_save_regs :=
-  if Archi.ptr64
-  then if Archi.win64
-       then BX :: SI :: DI :: BP :: R12 :: R13 :: R14 :: R15 :: nil
-       else BX :: BP :: R12 :: R13 :: R14 :: R15 :: nil
-  else BX :: SI :: DI :: BP :: nil.
+  R8  :: R9  ::
+  R18 :: R19 :: R20 :: R21 :: R22 :: R23 :: R24 :: R25 :: R26 :: R27 ::
+  nil.
 
 Definition float_callee_save_regs :=
-  if Archi.ptr64 && Archi.win64
-  then X6 :: X7 :: X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil
-  else nil.
+  F8  :: F9  ::
+  F18 :: F19 :: F20 :: F21 :: F22 :: F23 :: F24 :: F25 :: F26 :: F27 ::
+  nil.
 
 Definition destroyed_at_call :=
   List.filter (fun r => negb (is_callee_save r)) all_mregs.
 
+Definition dummy_int_reg   := R6.    (**r Used in [Coloring]. *)
+Definition dummy_float_reg := F0 .   (**r Used in [Coloring]. *)
+
+Definition callee_save_type := mreg_type.
+  
 Definition is_float_reg (r: mreg) :=
   match r with
-  | AX | BX | CX | DX | SI | DI | BP
-  | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15 => false
-  | X0 | X1 | X2 | X3 | X4 | X5 | X6 | X7
-  | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 | FP0 => true
+        | R5  | R6  | R7  | R8  | R9  | R10 | R11
+  | R12 | R13 | R14 | R15 | R16 | R17 | R18 | R19
+  | R20 | R21 | R22 | R23 | R24 | R25 | R26 | R27
+  | R28 | R29 | R30 => false
+
+  | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7
+  | F8  | F9  | F10 | F11 | F12 | F13 | F14 | F15
+  | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23
+  | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31 => true
   end.
 
-Definition dummy_int_reg := AX.     (**r Used in [Regalloc]. *)
-Definition dummy_float_reg := X0.   (**r Used in [Regalloc]. *)
-
-Definition callee_save_type := mreg_type.
-
 (** * Function calling conventions *)
 
 (** The functions in this section determine the locations (machine registers
@@ -99,43 +105,32 @@ Definition callee_save_type := mreg_type.
   of function arguments), but this leaves much liberty in choosing actual
   locations.  To ensure binary interoperability of code generated by our
   compiler with libraries compiled by another compiler, we
-  implement the standard x86-32 and x86-64 conventions. *)
+  implement the standard RISC-V conventions as found here:
+  https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md
+*)
 
 (** ** Location of function result *)
 
-(** In 32 bit mode, the result value of a function is passed back to the
-  caller in registers [AX] or [DX:AX] or [FP0], depending on the type
-  of the returned value.  We treat a function without result as a
-  function with one integer result. *)
-
-Definition loc_result_32 (s: signature) : rpair mreg :=
-  match proj_sig_res s with
-  | Tint | Tany32 => One AX
-  | Tfloat | Tsingle => One FP0
-  | Tany64 => One X0
-  | Tlong => Twolong DX AX
-  end.
-
-(** In 64 bit mode, he result value of a function is passed back to
-  the caller in registers [AX] or [X0]. *)
+(** The result value of a function is passed back to the caller in
+  registers [R10] or [F10] or [R10,R11], depending on the type of the
+  returned value.  We treat a function without result as a function
+  with one integer result. *)
 
-Definition loc_result_64 (s: signature) : rpair mreg :=
+Definition loc_result (s: signature) : rpair mreg :=
   match proj_sig_res s with
-  | Tint | Tlong | Tany32 | Tany64 => One AX
-  | Tfloat | Tsingle => One X0
+  | Tint | Tany32 => One R10
+  | Tfloat | Tsingle | Tany64 => One F10
+  | Tlong => if Archi.ptr64 then One R10 else Twolong R11 R10
   end.
 
-Definition loc_result :=
-  if Archi.ptr64 then loc_result_64 else loc_result_32.
-
 (** The result registers have types compatible with that given in the signature. *)
 
 Lemma loc_result_type:
   forall sig,
   subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
 Proof.
-  intros. unfold loc_result, loc_result_32, loc_result_64, mreg_type;
-  destruct Archi.ptr64; destruct (proj_sig_res sig); auto.
+  intros. unfold loc_result, mreg_type;
+  destruct (proj_sig_res sig); auto; destruct Archi.ptr64; auto.
 Qed.
 
 (** The result locations are caller-save registers *)
@@ -144,8 +139,8 @@ Lemma loc_result_caller_save:
   forall (s: signature),
   forall_rpair (fun r => is_callee_save r = false) (loc_result s).
 Proof.
-  intros. unfold loc_result, loc_result_32, loc_result_64, is_callee_save;
-  destruct Archi.ptr64; destruct (proj_sig_res s); simpl; auto.
+  intros. unfold loc_result, is_callee_save;
+  destruct (proj_sig_res s); simpl; auto; destruct Archi.ptr64; simpl; auto.
 Qed.
 
 (** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
@@ -156,13 +151,13 @@ Lemma loc_result_pair:
   | One _ => True
   | Twolong r1 r2 =>
        r1 <> r2 /\ proj_sig_res sg = Tlong
-    /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true
+    /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true 
     /\ Archi.ptr64 = false
   end.
 Proof.
-  intros. 
-  unfold loc_result, loc_result_32, loc_result_64, mreg_type;
-  destruct Archi.ptr64; destruct (proj_sig_res sg); auto.
+  intros.
+  unfold loc_result; destruct (proj_sig_res sg); auto.
+  unfold mreg_type; destruct Archi.ptr64; auto.
   split; auto. congruence.
 Qed.
 
@@ -171,104 +166,146 @@ Qed.
 Lemma loc_result_exten:
   forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2.
 Proof.
-  intros. unfold loc_result, loc_result_32, loc_result_64, proj_sig_res.
-  destruct Archi.ptr64; rewrite H; auto.
+  intros. unfold loc_result, proj_sig_res. rewrite H; auto.  
 Qed.
 
 (** ** Location of function arguments *)
 
-(** In the x86-32 ABI, all arguments are passed on stack. (Snif.) *)
+(** The RISC-V ABI states the following conventions for passing arguments
+  to a function.  First for non-variadic functions:
 
-Fixpoint loc_arguments_32
-    (tyl: list typ) (ofs: Z) {struct tyl} : list (rpair loc) :=
-  match tyl with
-  | nil => nil
-  | ty :: tys =>
-      match ty with
-      | Tlong => Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint)
-      | _     => One (S Outgoing ofs ty)
-      end
-      :: loc_arguments_32 tys (ofs + typesize ty)
-  end.
+- RV64: pass the first 8 integer arguments in integer registers
+  (a1...a8: int_param_regs), the first 8 FP arguments in FP registers
+  (fa1...fa8: float_param_regs) then in integer registers (a1...a8),
+  and the remaining arguments on the stack, in 8-byte slots.
+
+- RV32: same, but arguments of size 64 bits that must be passed in
+  integer registers are passed in two consecutive integer registers
+  (a(i), a(i+1)), or in a(8) and on a 32-bit word on the stack.
+  Stack-allocated arguments are aligned to their natural alignment.
+
+For variadic functions, the fixed arguments are passed as described
+above, then the variadic arguments receive special treatment:
 
-(** In the x86-64 ELF ABI:
-- The first 6 integer arguments are passed in registers [DI], [SI], [DX], [CX], [R8], [R9].
-- The first 8 floating-point arguments are passed in registers [X0] to [X7].
-- Extra arguments are passed on the stack, in [Outgoing] slots.
-  Consecutive stack slots are separated by 8 bytes, even if only 4 bytes
-  of data is used in a slot.
+- RV64: FP registers are not used for passing variadic arguments.
+  All variadic arguments, including FP arguments, are passed in the
+  remaining integer registers (a1...a8), then on the stack, in 8-byte
+  slots.
+
+- RV32: likewise, but arguments of 64-bit types (integers as well
+  as floats) are passed in two consecutive aligned integer registers
+  (a(2i), a(2i+1)), or on the stack, in aligned 8-byte slots.
+
+The passing of FP arguments to variadic functions in integer registers
+doesn't quite fit CompCert's model.  We do our best by passing the FP
+arguments in registers, as usual, and reserving the corresponding
+integer registers, so that fixup code can be introduced in the
+Asmexpand pass.
 *)
 
-Definition int_param_regs_elf64 := DI :: SI :: DX :: CX :: R8 :: R9 :: nil.
-Definition float_param_regs_elf64 := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
+Definition int_param_regs :=
+  R10 :: R11 :: R12 :: R13 :: R14 :: R15 :: R16 :: R17 :: nil.
+Definition float_param_regs :=
+  F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: F16 :: F17 :: nil.
+
+(** To evaluate FP arguments that must be passed in integer registers,
+    we can use any FP caller-save register that is not already used to pass
+    a fixed FP argument.  Since there are 8 integer registers for argument
+    passing, we need at most 8 extra more FP registers for these FP
+    arguments. *)
+
+Definition float_extra_param_regs :=
+  F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: nil.
+
+Definition int_arg (ri rf ofs: Z) (ty: typ)
+                   (rec: Z -> Z -> Z -> list (rpair loc)) :=
+  match list_nth_z int_param_regs ri with
+  | Some r =>
+      One(R r) :: rec (ri + 1) rf ofs
+  | None   =>
+      let ofs := align ofs (typesize ty) in
+      One(S Outgoing ofs ty)
+      :: rec ri rf (ofs + (if Archi.ptr64 then 2 else typesize ty))
+  end.
 
-Fixpoint loc_arguments_elf64
-    (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list (rpair loc) :=
-  match tyl with
-  | nil => nil
-  | (Tint | Tlong | Tany32 | Tany64) as ty :: tys =>
-      match list_nth_z int_param_regs_elf64 ir with
-      | None =>
-          One (S Outgoing ofs ty) :: loc_arguments_elf64 tys ir fr (ofs + 2)
-      | Some ireg =>
-          One (R ireg) :: loc_arguments_elf64 tys (ir + 1) fr ofs
-      end
-  | (Tfloat | Tsingle) as ty :: tys =>
-      match list_nth_z float_param_regs_elf64 fr with
+Definition float_arg (va: bool) (ri rf ofs: Z) (ty: typ)
+                     (rec: Z -> Z -> Z -> list (rpair loc)) :=
+  match list_nth_z (if va then nil else float_param_regs) rf with
+  | Some r =>
+      One (R r) :: rec ri (rf + 1) ofs
+  | None =>
+      (* We are out of FP registers, or cannot use them because vararg,
+         so try to put the argument in an extra FP register while
+         reserving an integer register or register pair into which
+         fixup code will move the extra FP register. *)
+      let regpair := negb Archi.ptr64 && zeq (typesize ty) 2 in
+      let ri' := if va && regpair then align ri 2 else ri in
+      match list_nth_z float_extra_param_regs ri' with
+      | Some r =>
+          let ri'' := ri' + (if Archi.ptr64 then 1 else typesize ty) in
+          let ofs'' := if regpair && zeq ri' 7 then ofs + 1 else ofs in
+          One (R r) :: rec ri'' rf ofs''
       | None =>
-          One (S Outgoing ofs ty) :: loc_arguments_elf64 tys ir fr (ofs + 2)
-      | Some freg =>
-          One (R freg) :: loc_arguments_elf64 tys ir (fr + 1) ofs
+          (* We are out of integer registers, pass argument on stack *)
+            let ofs := align ofs (typesize ty) in
+            One(S Outgoing ofs ty)
+            :: rec ri' rf (ofs + (if Archi.ptr64 then 2 else typesize ty))
       end
   end.
 
-(** In the x86-64 Win64 ABI:
-- The first 4 arguments are passed in registers [RCX], [RDX], [R8], and [R9]
-  (for integer arguments) and [X0] to [X3] (for floating-point arguments).
-  Each argument "burns" both an integer register and a FP integer.
-- The first 8 floating-point arguments are passed in registers [X0] to [X7].
-- Extra arguments are passed on the stack, in [Outgoing] slots.
-  Consecutive stack slots are separated by 8 bytes, even if only 4 bytes
-  of data is used in a slot.
-- Four 8-byte words are always reserved at the bottom of the outgoing area
-  so that the callee can use them to save the registers containing the
-  first four arguments.  This is handled in the Stacking phase.
-*)
-
-Definition int_param_regs_win64 := CX :: DX :: R8 :: R9 :: nil.
-Definition float_param_regs_win64 := X0 :: X1 :: X2 :: X3 :: nil.
+Definition split_long_arg (va: bool) (ri rf ofs: Z)
+                          (rec: Z -> Z -> Z -> list (rpair loc)) :=
+  let ri := if va then align ri 2 else ri in
+  match list_nth_z int_param_regs ri, list_nth_z int_param_regs (ri + 1) with
+  | Some r1, Some r2 =>
+      Twolong (R r2) (R r1) :: rec (ri + 2) rf ofs
+  | Some r1, None =>
+      Twolong (S Outgoing ofs Tint) (R r1) :: rec (ri + 1) rf (ofs + 1)
+  | None, _ =>
+      let ofs := align ofs 2 in
+      Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint) ::
+      rec ri rf (ofs + 2)
+  end.
 
-Fixpoint loc_arguments_win64
-    (tyl: list typ) (r ofs: Z) {struct tyl} : list (rpair loc) :=
+Fixpoint loc_arguments_rec
+    (tyl: list typ) (fixed ri rf ofs: Z) {struct tyl} : list (rpair loc) :=
   match tyl with
   | nil => nil
-  | (Tint | Tlong | Tany32 | Tany64) as ty :: tys =>
-      match list_nth_z int_param_regs_win64 r with
-      | None =>
-          One (S Outgoing ofs ty) :: loc_arguments_win64 tys r (ofs + 2)
-      | Some ireg =>
-          One (R ireg) :: loc_arguments_win64 tys (r + 1) ofs
-      end
-  | (Tfloat | Tsingle) as ty :: tys =>
-      match list_nth_z float_param_regs_win64 r with
-      | None =>
-          One (S Outgoing ofs ty) :: loc_arguments_win64 tys r (ofs + 2)
-      | Some freg =>
-          One (R freg) :: loc_arguments_win64 tys (r + 1) ofs
-      end
+  | (Tint | Tany32) as ty :: tys =>
+      (* pass in one integer register or on stack *)
+      int_arg ri rf ofs ty (loc_arguments_rec tys (fixed - 1))
+  | Tsingle as ty :: tys =>
+      (* pass in one FP register or on stack.
+         If vararg, reserve 1 integer register. *)
+      float_arg (zle fixed 0) ri rf ofs ty (loc_arguments_rec tys (fixed - 1))
+  | Tlong as ty :: tys =>
+      if Archi.ptr64 then
+        (* pass in one integer register or on stack *)
+        int_arg ri rf ofs ty (loc_arguments_rec tys (fixed - 1))
+      else
+        (* pass in register pair or on stack; align register pair if vararg *)
+        split_long_arg (zle fixed 0) ri rf ofs(loc_arguments_rec tys (fixed - 1))
+  | (Tfloat | Tany64) as ty :: tys =>
+      (* pass in one FP register or on stack.
+         If vararg, reserve 1 or 2 integer registers. *)
+      float_arg (zle fixed 0) ri rf ofs ty (loc_arguments_rec tys (fixed - 1))
+  end.
+
+(** Number of fixed arguments for a function with signature [s]. *)
+
+Definition fixed_arguments (s: signature) : Z :=
+  match s.(sig_cc).(cc_vararg) with
+  | Some n => n
+  | None => list_length_z s.(sig_args)
   end.
 
 (** [loc_arguments s] returns the list of locations where to store arguments
   when calling a function with signature [s].  *)
 
 Definition loc_arguments (s: signature) : list (rpair loc) :=
-  if Archi.ptr64
-  then if Archi.win64
-       then loc_arguments_win64 s.(sig_args) 0 0
-       else loc_arguments_elf64 s.(sig_args) 0 0 0
-  else loc_arguments_32 s.(sig_args) 0.
+  loc_arguments_rec s.(sig_args) (fixed_arguments s) 0 0 0.
 
-(** Argument locations are either caller-save registers or [Outgoing]
+(** Argument locations are either non-temporary registers or [Outgoing]
   stack slots at nonnegative offsets. *)
 
 Definition loc_argument_acceptable (l: loc) : Prop :=
@@ -278,175 +315,107 @@ Definition loc_argument_acceptable (l: loc) : Prop :=
   | _ => False
   end.
 
-Definition loc_argument_32_charact (ofs: Z) (l: loc) : Prop :=
-  match l with
-  | S Outgoing ofs' ty => ofs' >= ofs /\ typealign ty = 1
-  | _ => False
-  end.
-
-Definition loc_argument_elf64_charact (ofs: Z) (l: loc) : Prop :=
-  match l with
-  | R r => In r int_param_regs_elf64 \/ In r float_param_regs_elf64
-  | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs')
-  | _ => False
-  end.
-
-Definition loc_argument_win64_charact (ofs: Z) (l: loc) : Prop :=
-  match l with
-  | R r => In r int_param_regs_win64 \/ In r float_param_regs_win64
-  | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs')
-  | _ => False
-  end.
-
-Remark loc_arguments_32_charact:
-  forall tyl ofs p,
-  In p (loc_arguments_32 tyl ofs) -> forall_rpair (loc_argument_32_charact ofs) p.
-Proof.
-  assert (X: forall ofs1 ofs2 l, loc_argument_32_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_32_charact ofs1 l).
-  { destruct l; simpl; intros; auto. destruct sl; auto. intuition lia. }
-  induction tyl as [ | ty tyl]; simpl loc_arguments_32; intros.
-- contradiction.
-- destruct H.
-+ destruct ty; subst p; simpl; lia.
-+ apply IHtyl in H. generalize (typesize_pos ty); intros. destruct p; simpl in *.
-* eapply X; eauto; lia.
-* destruct H; split; eapply X; eauto; lia.
-Qed.
-
-Remark loc_arguments_elf64_charact:
-  forall tyl ir fr ofs p,
-  In p (loc_arguments_elf64 tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_elf64_charact ofs) p.
+Lemma loc_arguments_rec_charact:
+  forall va tyl ri rf ofs p,
+  ofs >= 0 ->
+  In p (loc_arguments_rec va tyl ri rf ofs) -> forall_rpair loc_argument_acceptable p.
 Proof.
-  assert (X: forall ofs1 ofs2 l, loc_argument_elf64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_elf64_charact ofs1 l).
-  { destruct l; simpl; intros; auto. destruct sl; auto. intuition lia. }
-  assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_elf64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_elf64_charact ofs1) p).
-  { destruct p; simpl; intuition eauto. }
-  assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)).
-  { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. }
-Opaque list_nth_z.
-  induction tyl; simpl loc_arguments_elf64; intros.
-  elim H.
-  assert (A: forall ty, In p
-      match list_nth_z int_param_regs_elf64 ir with
-      | Some ireg => One (R ireg) :: loc_arguments_elf64 tyl (ir + 1) fr ofs
-      | None => One (S Outgoing ofs ty) :: loc_arguments_elf64 tyl ir fr (ofs + 2)
-      end ->
-      forall_rpair (loc_argument_elf64_charact ofs) p).
-  { intros. destruct (list_nth_z int_param_regs_elf64 ir) as [r|] eqn:E; destruct H1.
-    subst. left. eapply list_nth_z_in; eauto.
-    eapply IHtyl; eauto.
-    subst. split. lia. assumption.
-    eapply Y; eauto. lia. }
-  assert (B: forall ty, In p
-      match list_nth_z float_param_regs_elf64 fr with
-      | Some ireg => One (R ireg) :: loc_arguments_elf64 tyl ir (fr + 1) ofs
-      | None => One (S Outgoing ofs ty) :: loc_arguments_elf64 tyl ir fr (ofs + 2)
-      end ->
-      forall_rpair (loc_argument_elf64_charact ofs) p).
-  { intros. destruct (list_nth_z float_param_regs_elf64 fr) as [r|] eqn:E; destruct H1.
-    subst. right. eapply list_nth_z_in; eauto.
-    eapply IHtyl; eauto.
-    subst. split. lia. assumption.
-    eapply Y; eauto. lia. }
-  destruct a; eauto.
-Qed.
-
-Remark loc_arguments_win64_charact:
-  forall tyl r ofs p,
-  In p (loc_arguments_win64 tyl r ofs) -> (2 | ofs) -> forall_rpair (loc_argument_win64_charact ofs) p.
-Proof.
-  assert (X: forall ofs1 ofs2 l, loc_argument_win64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_win64_charact ofs1 l).
-  { destruct l; simpl; intros; auto. destruct sl; auto. intuition lia. }
-  assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_win64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_win64_charact ofs1) p).
-  { destruct p; simpl; intuition eauto. }
-  assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)).
-  { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. }
-Opaque list_nth_z.
-  induction tyl; simpl loc_arguments_win64; intros.
-  elim H.
-  assert (A: forall ty, In p
-      match list_nth_z int_param_regs_win64 r with
-      | Some ireg => One (R ireg) :: loc_arguments_win64 tyl (r + 1) ofs
-      | None => One (S Outgoing ofs ty) :: loc_arguments_win64 tyl r (ofs + 2)
-      end ->
-      forall_rpair (loc_argument_win64_charact ofs) p).
-  { intros. destruct (list_nth_z int_param_regs_win64 r) as [r'|] eqn:E; destruct H1.
-    subst. left. eapply list_nth_z_in; eauto.
-    eapply IHtyl; eauto.
-    subst. split. lia. assumption.
-    eapply Y; eauto. lia. }
-  assert (B: forall ty, In p
-      match list_nth_z float_param_regs_win64 r with
-      | Some ireg => One (R ireg) :: loc_arguments_win64 tyl (r + 1) ofs
-      | None => One (S Outgoing ofs ty) :: loc_arguments_win64 tyl r (ofs + 2)
-      end ->
-      forall_rpair (loc_argument_win64_charact ofs) p).
-  { intros. destruct (list_nth_z float_param_regs_win64 r) as [r'|] eqn:E; destruct H1.
-    subst. right. eapply list_nth_z_in; eauto.
-    eapply IHtyl; eauto.
-    subst. split. lia. assumption.
-    eapply Y; eauto. lia. }
-  destruct a; eauto.
+  set (OK := fun (l: list (rpair loc)) =>
+             forall p, In p l -> forall_rpair loc_argument_acceptable p).
+  set (OKF := fun (f: Z -> Z -> Z -> list (rpair loc)) =>
+              forall ri rf ofs, ofs >= 0 -> OK (f ri rf ofs)).
+  assert (CSI: forall r, In r int_param_regs -> is_callee_save r = false).
+  { decide_goal. }
+  assert (CSF: forall r, In r float_param_regs -> is_callee_save r = false).
+  { decide_goal. }
+  assert (CSFX: forall r, In r float_extra_param_regs -> is_callee_save r = false).
+  { decide_goal. }
+  assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typesize ty) >= 0).
+  { intros. 
+    assert (ofs <= align ofs (typesize ty)) by (apply align_le; apply typesize_pos).
+    lia. }
+  assert (ALD: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs (typesize ty))).
+  { intros. eapply Z.divide_trans. apply typealign_typesize.
+    apply align_divides. apply typesize_pos. }
+  assert (SK: (if Archi.ptr64 then 2 else 1) > 0).
+  { destruct Archi.ptr64; lia. }
+  assert (SKK: forall ty, (if Archi.ptr64 then 2 else typesize ty) > 0).
+  { intros. destruct Archi.ptr64. lia. apply typesize_pos.  }
+  assert (A: forall ri rf ofs ty f,
+             OKF f -> ofs >= 0 -> OK (int_arg ri rf ofs ty f)).
+  { intros until f; intros OF OO; red; unfold int_arg; intros.
+    destruct (list_nth_z int_param_regs ri) as [r|] eqn:NTH; destruct H.
+  - subst p; simpl. apply CSI. eapply list_nth_z_in; eauto. 
+  - eapply OF; eauto. 
+  - subst p; simpl. auto using align_divides, typealign_pos.
+  - eapply OF; [idtac|eauto].
+    generalize (AL ofs ty OO) (SKK ty); lia.
+  }
+  assert (B: forall va ri rf ofs ty f,
+             OKF f -> ofs >= 0 -> OK (float_arg va ri rf ofs ty f)).
+  { intros until f; intros OF OO; red; unfold float_arg; intros.
+    destruct (list_nth_z (if va then nil else float_param_regs) rf) as [r|] eqn:NTH.
+  - destruct H.
+    + subst p; simpl. apply CSF. destruct va. simpl in NTH; discriminate. eapply list_nth_z_in; eauto.
+    + eapply OF; eauto.
+  - set (regpair := negb Archi.ptr64 && zeq (typesize ty) 2) in *.
+    set (ri' := if va && regpair then align ri 2 else ri) in *.
+    destruct (list_nth_z float_extra_param_regs ri') as [r|] eqn:NTH'; destruct H.
+    + subst p; simpl. apply CSFX. eapply list_nth_z_in; eauto.
+    + eapply OF; [|eauto]. destruct (regpair && zeq ri' 7); lia.
+    + subst p; simpl. auto.
+    + eapply OF; [|eauto]. generalize (AL ofs ty OO) (SKK ty); lia.
+  }
+  assert (C: forall va ri rf ofs f,
+             OKF f -> ofs >= 0 -> OK (split_long_arg va ri rf ofs f)).
+  { intros until f; intros OF OO; unfold split_long_arg.
+    set (ri' := if va then align ri 2 else ri).
+    set (ofs' := align ofs 2).
+    assert (OO': ofs' >= 0) by (apply (AL ofs Tlong); auto).
+    destruct (list_nth_z int_param_regs ri') as [r1|] eqn:NTH1;
+    [destruct (list_nth_z int_param_regs (ri'+1)) as [r2|] eqn:NTH2 | idtac].
+  - red; simpl; intros; destruct H.
+    + subst p; split; apply CSI; eauto using list_nth_z_in.
+    + eapply OF; [idtac|eauto]. lia.
+  - red; simpl; intros; destruct H.
+    + subst p; split. split; auto using Z.divide_1_l. apply CSI; eauto using list_nth_z_in.
+    + eapply OF; [idtac|eauto]. lia.
+  - red; simpl; intros; destruct H.
+    + subst p; repeat split; auto using Z.divide_1_l. lia. 
+    + eapply OF; [idtac|eauto]. lia.
+  }
+  cut (forall tyl fixed ri rf ofs, ofs >= 0 -> OK (loc_arguments_rec tyl fixed ri rf ofs)).
+  unfold OK. eauto.
+  induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; simpl.
+- red; simpl; tauto.
+- destruct ty1.
++ (* int *) apply A; unfold OKF; auto.
++ (* float *) apply B; unfold OKF; auto.
++ (* long *)
+  destruct Archi.ptr64.
+  apply A; unfold OKF; auto.
+  apply C; unfold OKF; auto.
++ (* single *) apply B; unfold OKF; auto.
++ (* any32 *) apply A; unfold OKF; auto.
++ (* any64 *) apply B; unfold OKF; auto.
 Qed.
 
 Lemma loc_arguments_acceptable:
   forall (s: signature) (p: rpair loc),
   In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p.
 Proof.
-  unfold loc_arguments; intros. destruct Archi.ptr64 eqn:SF; [destruct Archi.win64 eqn:W64|].
-- (* WIN 64 bits *)
-  assert (A: forall r, In r int_param_regs_win64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF; decide_goal).
-  assert (B: forall r, In r float_param_regs_win64 -> is_callee_save r = false) by (unfold is_callee_save; decide_goal).
-  assert (X: forall l, loc_argument_win64_charact 0 l -> loc_argument_acceptable l).
-  { unfold loc_argument_win64_charact, loc_argument_acceptable.
-    destruct l as [r | [] ofs ty]; auto.  intros [C|C]; auto.
-    intros [C D]. split; auto. apply Z.divide_trans with 2; auto.
-    exists (2 / typealign ty); destruct ty; reflexivity.
-  }
-  exploit loc_arguments_win64_charact; eauto using Z.divide_0_r.
-  unfold forall_rpair; destruct p; intuition auto.
-- (* ELF 64 bits *)
-  assert (A: forall r, In r int_param_regs_elf64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF, W64; decide_goal).
-  assert (B: forall r, In r float_param_regs_elf64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite W64; decide_goal).
-  assert (X: forall l, loc_argument_elf64_charact 0 l -> loc_argument_acceptable l).
-  { unfold loc_argument_elf64_charact, loc_argument_acceptable.
-    destruct l as [r | [] ofs ty]; auto.  intros [C|C]; auto.
-    intros [C D]. split; auto. apply Z.divide_trans with 2; auto.
-    exists (2 / typealign ty); destruct ty; reflexivity.
-  }
-  exploit loc_arguments_elf64_charact; eauto using Z.divide_0_r.
-  unfold forall_rpair; destruct p; intuition auto.
-
-- (* 32 bits *)
-  assert (X: forall l, loc_argument_32_charact 0 l -> loc_argument_acceptable l).
-  { destruct l as [r | [] ofs ty]; simpl; intuition auto. rewrite H2; apply Z.divide_1_l. }
-  exploit loc_arguments_32_charact; eauto.
-  unfold forall_rpair; destruct p; intuition auto.
+  unfold loc_arguments; intros. eapply loc_arguments_rec_charact; eauto. lia.
 Qed.
 
-Global Hint Resolve loc_arguments_acceptable: locs.
-
 Lemma loc_arguments_main:
   loc_arguments signature_main = nil.
 Proof.
-  unfold loc_arguments; destruct Archi.ptr64; auto; destruct Archi.win64; auto.
+  reflexivity.
 Qed.
 
 (** ** Normalization of function results and parameters *)
 
-(** In the x86 ABI, a return value of type "char" is returned in
-    register AL, leaving the top 24 bits of EAX unspecified.
-    Likewise, a return value of type "short" is returned in register
-    AH, leaving the top 16 bits of EAX unspecified.  Hence, return
-    values of small integer types need re-normalization after calls. *)
-
-Definition return_value_needs_normalization (t: rettype) : bool :=
-  match t with
-  | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => true
-  | _ => false
-  end.
-
-(** Function parameters are passed in normalized form and do not need
-    to be re-normalized at function entry. *)
+(** No normalization needed. *)
 
+Definition return_value_needs_normalization (t: rettype) := false.
 Definition parameter_needs_normalization (t: rettype) := false.
diff --git a/verilog/ExpansionOracle.ml b/verilog/ExpansionOracle.ml
index 3b63b80d..68d4e4d2 100644
--- a/verilog/ExpansionOracle.ml
+++ b/verilog/ExpansionOracle.ml
@@ -10,8 +10,1059 @@
 (*                                                             *)
 (* *************************************************************)
 
+open RTLpathLivegenaux
 open RTLpathCommon
+open Datatypes
+open Maps
+open RTL
+open Op
+open Asmgen
+open RTLpath
+open! Integers
+open Camlcoq
+open Option
+open AST
+open DebugPrint
 
-let expanse (sb : superblock) code pm = (code, pm)
+(** Mini CSE (a dynamic numbering is applied during expansion. 
+    The CSE algorithm is inspired by the "static" one used in backend/CSE.v *)
 
-let find_last_node_reg c = ()
+(** Managing virtual registers and node index *)
+
+let reg = ref 1
+
+let node = ref 1
+
+let p2i r = P.to_int r
+
+let r2p () = P.of_int !reg
+
+let n2p () = P.of_int !node
+
+let r2pi () =
+  reg := !reg + 1;
+  r2p ()
+
+let n2pi () =
+  node := !node + 1;
+  n2p ()
+
+(** Below are the types for rhs and equations *)
+
+type rhs = Sop of operation * int list | Smove
+
+type seq = Seq of int * rhs
+
+(** This is a mini abstraction to have a simpler representation during expansion
+    - Snop will be converted to Inop
+    - (Sr r) is inserted if the value was found in register r
+    - (Sexp dest rhs args succ) represent an instruction
+      (succesor may not be defined at this point, hence the use of type option)
+    - (Sfinalcond cond args succ1 succ2 info) represents a condition (which must
+      always be the last instruction in expansion list *)
+
+type expl =
+  | Snop of P.t
+  | Sr of P.t
+  | Sexp of P.t * rhs * P.t list * node option
+  | Sfinalcond of condition * P.t list * node * node * bool option
+
+(** Record used during the "dynamic" value numbering *)
+
+type numb = {
+  mutable nnext : int;  (** Next unusued value number *)
+  mutable seqs : seq list;  (** equations *)
+  mutable nreg : (P.t, int) Hashtbl.t;  (** mapping registers to values *)
+  mutable nval : (int, P.t list) Hashtbl.t;
+      (** reverse mapping values to registers containing it *)
+}
+
+let print_list_pos l =
+  debug "[";
+  List.iter (fun i -> debug "%d;" (p2i i)) l;
+  debug "]\n"
+
+let empty_numbering () =
+  { nnext = 1; seqs = []; nreg = Hashtbl.create 100; nval = Hashtbl.create 100 }
+
+let rec get_nvalues vn = function
+  | [] -> []
+  | r :: rs ->
+      let v =
+        match Hashtbl.find_opt !vn.nreg r with
+        | Some v ->
+            debug "getnval r=%d |-> v=%d\n" (p2i r) v;
+            v
+        | None ->
+            let n = !vn.nnext in
+            debug "getnval r=%d |-> v=%d\n" (p2i r) n;
+            !vn.nnext <- !vn.nnext + 1;
+            Hashtbl.replace !vn.nreg r n;
+            Hashtbl.replace !vn.nval n [ r ];
+            n
+      in
+      let vs = get_nvalues vn rs in
+      v :: vs
+
+let get_nval_ornil vn v =
+  match Hashtbl.find_opt !vn.nval v with None -> [] | Some l -> l
+
+let forget_reg vn rd =
+  match Hashtbl.find_opt !vn.nreg rd with
+  | Some v ->
+      debug "forget_reg: r=%d |-> v=%d\n" (p2i rd) v;
+      let old_regs = get_nval_ornil vn v in
+      debug "forget_reg: old_regs are:\n";
+      print_list_pos old_regs;
+      Hashtbl.replace !vn.nval v
+        (List.filter (fun n -> not (P.eq n rd)) old_regs)
+  | None -> debug "forget_reg: no mapping for r=%d\n" (p2i rd)
+
+let update_reg vn rd v =
+  debug "update_reg: update v=%d with r=%d\n" v (p2i rd);
+  forget_reg vn rd;
+  let old_regs = get_nval_ornil vn v in
+  Hashtbl.replace !vn.nval v (rd :: old_regs)
+
+let rec find_valnum_rhs rh = function
+  | [] -> None
+  | Seq (v, rh') :: tl -> if rh = rh' then Some v else find_valnum_rhs rh tl
+
+let set_unknown vn rd =
+  debug "set_unknown: rd=%d\n" (p2i rd);
+  forget_reg vn rd;
+  Hashtbl.remove !vn.nreg rd
+
+let set_res_unknown vn res = match res with BR r -> set_unknown vn r | _ -> ()
+
+let addrhs vn rd rh =
+  match find_valnum_rhs rh !vn.seqs with
+  | Some vres ->
+      debug "addrhs: Some v=%d\n" vres;
+      Hashtbl.replace !vn.nreg rd vres;
+      update_reg vn rd vres
+  | None ->
+      let n = !vn.nnext in
+      debug "addrhs: None v=%d\n" n;
+      !vn.nnext <- !vn.nnext + 1;
+      !vn.seqs <- Seq (n, rh) :: !vn.seqs;
+      update_reg vn rd n;
+      Hashtbl.replace !vn.nreg rd n
+
+let addsop vn v op rd =
+  debug "addsop\n";
+  if op = Omove then (
+    update_reg vn rd (List.hd v);
+    Hashtbl.replace !vn.nreg rd (List.hd v))
+  else addrhs vn rd (Sop (op, v))
+
+let rec kill_mem_operations = function
+  | (Seq (v, Sop (op, vl)) as eq) :: tl ->
+      if op_depends_on_memory op then kill_mem_operations tl
+      else eq :: kill_mem_operations tl
+  | [] -> []
+  | eq :: tl -> eq :: kill_mem_operations tl
+
+let reg_valnum vn v =
+  debug "reg_valnum: trying to find a mapping for v=%d\n" v;
+  match Hashtbl.find !vn.nval v with
+  | [] -> None
+  | r :: rs ->
+      debug "reg_valnum: found a mapping r=%d\n" (p2i r);
+      Some r
+
+let rec reg_valnums vn = function
+  | [] -> Some []
+  | v :: vs -> (
+      match (reg_valnum vn v, reg_valnums vn vs) with
+      | Some r, Some rs -> Some (r :: rs)
+      | _, _ -> None)
+
+let find_rhs vn rh =
+  match find_valnum_rhs rh !vn.seqs with
+  | None -> None
+  | Some vres -> reg_valnum vn vres
+
+(** Functions to perform the dynamic reduction during CSE *)
+
+let extract_arg l =
+  if List.length l > 0 then
+    match List.hd l with
+    | Sr r -> (r, List.tl l)
+    | Sexp (rd, _, _, _) -> (rd, l)
+    | _ -> failwith "extract_arg: final instruction arg can not be extracted"
+  else failwith "extract_arg: trying to extract on an empty list"
+
+let extract_final vn fl fdest succ =
+  if List.length fl > 0 then
+    match List.hd fl with
+    | Sr r ->
+        if not (P.eq r fdest) then (
+          let v = get_nvalues vn [ r ] in
+          addsop vn v Omove fdest;
+          Sexp (fdest, Smove, [ r ], Some succ) :: List.tl fl)
+        else Snop succ :: List.tl fl
+    | Sexp (rd, rh, args, None) ->
+        assert (rd = fdest);
+        Sexp (fdest, rh, args, Some succ) :: List.tl fl
+    | _ -> fl
+  else failwith "extract_final: trying to extract on an empty list"
+
+let addinst vn op args rd =
+  let v = get_nvalues vn args in
+  let rh = Sop (op, v) in
+  match find_rhs vn rh with
+  | Some r ->
+      debug "addinst: rhs found with r=%d\n" (p2i r);
+      Sr r
+  | None ->
+      addsop vn v op rd;
+      Sexp (rd, rh, args, None)
+
+(** Expansion functions *)
+
+type immt =
+  | Addiw
+  | Addil
+  | Andiw
+  | Andil
+  | Oriw
+  | Oril
+  | Xoriw
+  | Xoril
+  | Sltiw
+  | Sltiuw
+  | Sltil
+  | Sltiul
+
+let load_hilo32 vn dest hi lo =
+  let op1 = OEluiw hi in
+  if Int.eq lo Int.zero then [ addinst vn op1 [] dest ]
+  else
+    let r = r2pi () in
+    let op2 = OEaddiw (None, lo) in
+    let i1 = addinst vn op1 [] r in
+    let r', l = extract_arg [ i1 ] in
+    let i2 = addinst vn op2 [ r' ] dest in
+    i2 :: l
+
+let load_hilo64 vn dest hi lo =
+  let op1 = OEluil hi in
+  if Int64.eq lo Int64.zero then [ addinst vn op1 [] dest ]
+  else
+    let r = r2pi () in
+    let op2 = OEaddil (None, lo) in
+    let i1 = addinst vn op1 [] r in
+    let r', l = extract_arg [ i1 ] in
+    let i2 = addinst vn op2 [ r' ] dest in
+    i2 :: l
+
+let loadimm32 vn dest n =
+  match make_immed32 n with
+  | Imm32_single imm ->
+      let op1 = OEaddiw (Some X0_R, imm) in
+      [ addinst vn op1 [] dest ]
+  | Imm32_pair (hi, lo) -> load_hilo32 vn dest hi lo
+
+let loadimm64 vn dest n =
+  match make_immed64 n with
+  | Imm64_single imm ->
+      let op1 = OEaddil (Some X0_R, imm) in
+      [ addinst vn op1 [] dest ]
+  | Imm64_pair (hi, lo) -> load_hilo64 vn dest hi lo
+  | Imm64_large imm ->
+      let op1 = OEloadli imm in
+      [ addinst vn op1 [] dest ]
+
+let get_opimm optR imm = function
+  | Addiw -> OEaddiw (optR, imm)
+  | Andiw -> OEandiw imm
+  | Oriw -> OEoriw imm
+  | Xoriw -> OExoriw imm
+  | Sltiw -> OEsltiw imm
+  | Sltiuw -> OEsltiuw imm
+  | Addil -> OEaddil (optR, imm)
+  | Andil -> OEandil imm
+  | Oril -> OEoril imm
+  | Xoril -> OExoril imm
+  | Sltil -> OEsltil imm
+  | Sltiul -> OEsltiul imm
+
+let opimm32 vn a1 dest n optR op opimm =
+  match make_immed32 n with
+  | Imm32_single imm -> [ addinst vn (get_opimm optR imm opimm) [ a1 ] dest ]
+  | Imm32_pair (hi, lo) ->
+      let r = r2pi () in
+      let l = load_hilo32 vn r hi lo in
+      let r', l' = extract_arg l in
+      let i = addinst vn op [ a1; r' ] dest in
+      i :: l'
+
+let opimm64 vn a1 dest n optR op opimm =
+  match make_immed64 n with
+  | Imm64_single imm -> [ addinst vn (get_opimm optR imm opimm) [ a1 ] dest ]
+  | Imm64_pair (hi, lo) ->
+      let r = r2pi () in
+      let l = load_hilo64 vn r hi lo in
+      let r', l' = extract_arg l in
+      let i = addinst vn op [ a1; r' ] dest in
+      i :: l'
+  | Imm64_large imm ->
+      let r = r2pi () in
+      let op1 = OEloadli imm in
+      let i1 = addinst vn op1 [] r in
+      let r', l' = extract_arg [ i1 ] in
+      let i2 = addinst vn op [ a1; r' ] dest in
+      i2 :: l'
+
+let addimm32 vn a1 dest n optR = opimm32 vn a1 dest n optR Oadd Addiw
+
+let andimm32 vn a1 dest n = opimm32 vn a1 dest n None Oand Andiw
+
+let orimm32 vn a1 dest n = opimm32 vn a1 dest n None Oor Oriw
+
+let xorimm32 vn a1 dest n = opimm32 vn a1 dest n None Oxor Xoriw
+
+let sltimm32 vn a1 dest n = opimm32 vn a1 dest n None (OEsltw None) Sltiw
+
+let sltuimm32 vn a1 dest n = opimm32 vn a1 dest n None (OEsltuw None) Sltiuw
+
+let addimm64 vn a1 dest n optR = opimm64 vn a1 dest n optR Oaddl Addil
+
+let andimm64 vn a1 dest n = opimm64 vn a1 dest n None Oandl Andil
+
+let orimm64 vn a1 dest n = opimm64 vn a1 dest n None Oorl Oril
+
+let xorimm64 vn a1 dest n = opimm64 vn a1 dest n None Oxorl Xoril
+
+let sltimm64 vn a1 dest n = opimm64 vn a1 dest n None (OEsltl None) Sltil
+
+let sltuimm64 vn a1 dest n = opimm64 vn a1 dest n None (OEsltul None) Sltiul
+
+let is_inv_cmp = function Cle | Cgt -> true | _ -> false
+
+let make_optR is_x0 is_inv =
+  if is_x0 then if is_inv then Some X0_L else Some X0_R else None
+
+let cbranch_int32s is_x0 cmp a1 a2 info succ1 succ2 k =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> Sfinalcond (CEbeqw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cne -> Sfinalcond (CEbnew optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Clt -> Sfinalcond (CEbltw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cle -> Sfinalcond (CEbgew optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cgt -> Sfinalcond (CEbltw optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cge -> Sfinalcond (CEbgew optR, [ a1; a2 ], succ1, succ2, info) :: k
+
+let cbranch_int32u is_x0 cmp a1 a2 info succ1 succ2 k =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> Sfinalcond (CEbequw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cne -> Sfinalcond (CEbneuw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Clt -> Sfinalcond (CEbltuw optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cle -> Sfinalcond (CEbgeuw optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cgt -> Sfinalcond (CEbltuw optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cge -> Sfinalcond (CEbgeuw optR, [ a1; a2 ], succ1, succ2, info) :: k
+
+let cbranch_int64s is_x0 cmp a1 a2 info succ1 succ2 k =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> Sfinalcond (CEbeql optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cne -> Sfinalcond (CEbnel optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Clt -> Sfinalcond (CEbltl optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cle -> Sfinalcond (CEbgel optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cgt -> Sfinalcond (CEbltl optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cge -> Sfinalcond (CEbgel optR, [ a1; a2 ], succ1, succ2, info) :: k
+
+let cbranch_int64u is_x0 cmp a1 a2 info succ1 succ2 k =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> Sfinalcond (CEbequl optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cne -> Sfinalcond (CEbneul optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Clt -> Sfinalcond (CEbltul optR, [ a1; a2 ], succ1, succ2, info) :: k
+  | Cle -> Sfinalcond (CEbgeul optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cgt -> Sfinalcond (CEbltul optR, [ a2; a1 ], succ1, succ2, info) :: k
+  | Cge -> Sfinalcond (CEbgeul optR, [ a1; a2 ], succ1, succ2, info) :: k
+
+let cond_int32s vn is_x0 cmp a1 a2 dest =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> [ addinst vn (OEseqw optR) [ a1; a2 ] dest ]
+  | Cne -> [ addinst vn (OEsnew optR) [ a1; a2 ] dest ]
+  | Clt -> [ addinst vn (OEsltw optR) [ a1; a2 ] dest ]
+  | Cle ->
+      let r = r2pi () in
+      let op = OEsltw optR in
+      let i1 = addinst vn op [ a2; a1 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+  | Cgt -> [ addinst vn (OEsltw optR) [ a2; a1 ] dest ]
+  | Cge ->
+      let r = r2pi () in
+      let op = OEsltw optR in
+      let i1 = addinst vn op [ a1; a2 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let cond_int32u vn is_x0 cmp a1 a2 dest =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> [ addinst vn (OEsequw optR) [ a1; a2 ] dest ]
+  | Cne -> [ addinst vn (OEsneuw optR) [ a1; a2 ] dest ]
+  | Clt -> [ addinst vn (OEsltuw optR) [ a1; a2 ] dest ]
+  | Cle ->
+      let r = r2pi () in
+      let op = OEsltuw optR in
+      let i1 = addinst vn op [ a2; a1 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+  | Cgt -> [ addinst vn (OEsltuw optR) [ a2; a1 ] dest ]
+  | Cge ->
+      let r = r2pi () in
+      let op = OEsltuw optR in
+      let i1 = addinst vn op [ a1; a2 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let cond_int64s vn is_x0 cmp a1 a2 dest =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> [ addinst vn (OEseql optR) [ a1; a2 ] dest ]
+  | Cne -> [ addinst vn (OEsnel optR) [ a1; a2 ] dest ]
+  | Clt -> [ addinst vn (OEsltl optR) [ a1; a2 ] dest ]
+  | Cle ->
+      let r = r2pi () in
+      let op = OEsltl optR in
+      let i1 = addinst vn op [ a2; a1 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+  | Cgt -> [ addinst vn (OEsltl optR) [ a2; a1 ] dest ]
+  | Cge ->
+      let r = r2pi () in
+      let op = OEsltl optR in
+      let i1 = addinst vn op [ a1; a2 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let cond_int64u vn is_x0 cmp a1 a2 dest =
+  let optR = make_optR is_x0 (is_inv_cmp cmp) in
+  match cmp with
+  | Ceq -> [ addinst vn (OEsequl optR) [ a1; a2 ] dest ]
+  | Cne -> [ addinst vn (OEsneul optR) [ a1; a2 ] dest ]
+  | Clt -> [ addinst vn (OEsltul optR) [ a1; a2 ] dest ]
+  | Cle ->
+      let r = r2pi () in
+      let op = OEsltul optR in
+      let i1 = addinst vn op [ a2; a1 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+  | Cgt -> [ addinst vn (OEsltul optR) [ a2; a1 ] dest ]
+  | Cge ->
+      let r = r2pi () in
+      let op = OEsltul optR in
+      let i1 = addinst vn op [ a1; a2 ] r in
+      let r', l = extract_arg [ i1 ] in
+      addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let is_normal_cmp = function Cne -> false | _ -> true
+
+let cond_float vn cmp f1 f2 dest =
+  match cmp with
+  | Ceq -> [ addinst vn OEfeqd [ f1; f2 ] dest ]
+  | Cne -> [ addinst vn OEfeqd [ f1; f2 ] dest ]
+  | Clt -> [ addinst vn OEfltd [ f1; f2 ] dest ]
+  | Cle -> [ addinst vn OEfled [ f1; f2 ] dest ]
+  | Cgt -> [ addinst vn OEfltd [ f2; f1 ] dest ]
+  | Cge -> [ addinst vn OEfled [ f2; f1 ] dest ]
+
+let cond_single vn cmp f1 f2 dest =
+  match cmp with
+  | Ceq -> [ addinst vn OEfeqs [ f1; f2 ] dest ]
+  | Cne -> [ addinst vn OEfeqs [ f1; f2 ] dest ]
+  | Clt -> [ addinst vn OEflts [ f1; f2 ] dest ]
+  | Cle -> [ addinst vn OEfles [ f1; f2 ] dest ]
+  | Cgt -> [ addinst vn OEflts [ f2; f1 ] dest ]
+  | Cge -> [ addinst vn OEfles [ f2; f1 ] dest ]
+
+let expanse_cbranchimm_int32s vn cmp a1 n info succ1 succ2 =
+  if Int.eq n Int.zero then cbranch_int32s true cmp a1 a1 info succ1 succ2 []
+  else
+    let r = r2pi () in
+    let l = loadimm32 vn r n in
+    let r', l' = extract_arg l in
+    cbranch_int32s false cmp a1 r' info succ1 succ2 l'
+
+let expanse_cbranchimm_int32u vn cmp a1 n info succ1 succ2 =
+  if Int.eq n Int.zero then cbranch_int32u true cmp a1 a1 info succ1 succ2 []
+  else
+    let r = r2pi () in
+    let l = loadimm32 vn r n in
+    let r', l' = extract_arg l in
+    cbranch_int32u false cmp a1 r' info succ1 succ2 l'
+
+let expanse_cbranchimm_int64s vn cmp a1 n info succ1 succ2 =
+  if Int64.eq n Int64.zero then
+    cbranch_int64s true cmp a1 a1 info succ1 succ2 []
+  else
+    let r = r2pi () in
+    let l = loadimm64 vn r n in
+    let r', l' = extract_arg l in
+    cbranch_int64s false cmp a1 r' info succ1 succ2 l'
+
+let expanse_cbranchimm_int64u vn cmp a1 n info succ1 succ2 =
+  if Int64.eq n Int64.zero then
+    cbranch_int64u true cmp a1 a1 info succ1 succ2 []
+  else
+    let r = r2pi () in
+    let l = loadimm64 vn r n in
+    let r', l' = extract_arg l in
+    cbranch_int64u false cmp a1 r' info succ1 succ2 l'
+
+let expanse_condimm_int32s vn cmp a1 n dest =
+  if Int.eq n Int.zero then cond_int32s vn true cmp a1 a1 dest
+  else
+    match cmp with
+    | Ceq | Cne ->
+        let r = r2pi () in
+        let l = xorimm32 vn a1 r n in
+        let r', l' = extract_arg l in
+        cond_int32s vn true cmp r' r' dest @ l'
+    | Clt -> sltimm32 vn a1 dest n
+    | Cle ->
+        if Int.eq n (Int.repr Int.max_signed) then
+          let l = loadimm32 vn dest Int.one in
+          let r, l' = extract_arg l in
+          addinst vn (OEmayundef MUint) [ a1; r ] dest :: l'
+        else sltimm32 vn a1 dest (Int.add n Int.one)
+    | _ ->
+        let r = r2pi () in
+        let l = loadimm32 vn r n in
+        let r', l' = extract_arg l in
+        cond_int32s vn false cmp a1 r' dest @ l'
+
+let expanse_condimm_int32u vn cmp a1 n dest =
+  if Int.eq n Int.zero then cond_int32u vn true cmp a1 a1 dest
+  else
+    match cmp with
+    | Clt -> sltuimm32 vn a1 dest n
+    | _ ->
+        let r = r2pi () in
+        let l = loadimm32 vn r n in
+        let r', l' = extract_arg l in
+        cond_int32u vn false cmp a1 r' dest @ l'
+
+let expanse_condimm_int64s vn cmp a1 n dest =
+  if Int64.eq n Int64.zero then cond_int64s vn true cmp a1 a1 dest
+  else
+    match cmp with
+    | Ceq | Cne ->
+        let r = r2pi () in
+        let l = xorimm64 vn a1 r n in
+        let r', l' = extract_arg l in
+        cond_int64s vn true cmp r' r' dest @ l'
+    | Clt -> sltimm64 vn a1 dest n
+    | Cle ->
+        if Int64.eq n (Int64.repr Int64.max_signed) then
+          let l = loadimm32 vn dest Int.one in
+          let r, l' = extract_arg l in
+          addinst vn (OEmayundef MUlong) [ a1; r ] dest :: l'
+        else sltimm64 vn a1 dest (Int64.add n Int64.one)
+    | _ ->
+        let r = r2pi () in
+        let l = loadimm64 vn r n in
+        let r', l' = extract_arg l in
+        cond_int64s vn false cmp a1 r' dest @ l'
+
+let expanse_condimm_int64u vn cmp a1 n dest =
+  if Int64.eq n Int64.zero then cond_int64u vn true cmp a1 a1 dest
+  else
+    match cmp with
+    | Clt -> sltuimm64 vn a1 dest n
+    | _ ->
+        let r = r2pi () in
+        let l = loadimm64 vn r n in
+        let r', l' = extract_arg l in
+        cond_int64u vn false cmp a1 r' dest @ l'
+
+let expanse_cond_fp vn cnot fn_cond cmp f1 f2 dest =
+  let normal = is_normal_cmp cmp in
+  let normal' = if cnot then not normal else normal in
+  let insn = fn_cond vn cmp f1 f2 dest in
+  if normal' then insn
+  else
+    let r', l = extract_arg insn in
+    addinst vn (OExoriw Int.one) [ r' ] dest :: l
+
+let expanse_cbranch_fp vn cnot fn_cond cmp f1 f2 info succ1 succ2 =
+  let r = r2pi () in
+  let normal = is_normal_cmp cmp in
+  let normal' = if cnot then not normal else normal in
+  let insn = fn_cond vn cmp f1 f2 r in
+  let r', l = extract_arg insn in
+  if normal' then
+    Sfinalcond (CEbnew (Some X0_R), [ r'; r' ], succ1, succ2, info) :: l
+  else Sfinalcond (CEbeqw (Some X0_R), [ r'; r' ], succ1, succ2, info) :: l
+
+(** Form a list containing both sources and destination regs of an instruction *)
+
+let get_regindent = function Coq_inr _ -> [] | Coq_inl r -> [ r ]
+
+let get_regs_inst = function
+  | Inop _ -> []
+  | Iop (_, args, dest, _) -> dest :: args
+  | Iload (_, _, _, args, dest, _) -> dest :: args
+  | Istore (_, _, args, src, _) -> src :: args
+  | Icall (_, t, args, dest, _) -> dest :: (get_regindent t @ args)
+  | Itailcall (_, t, args) -> get_regindent t @ args
+  | Ibuiltin (_, args, dest, _) ->
+      AST.params_of_builtin_res dest @ AST.params_of_builtin_args args
+  | Icond (_, args, _, _, _) -> args
+  | Ijumptable (arg, _) -> [ arg ]
+  | Ireturn (Some r) -> [ r ]
+  | _ -> []
+
+(** Modify pathmap according to the size of the expansion list *)
+
+let write_pathmap initial esize pm' =
+  debug "write_pathmap: initial=%d, esize=%d\n" (p2i initial) esize;
+  let path = get_some @@ PTree.get initial !pm' in
+  let npsize = Camlcoq.Nat.of_int (esize + Camlcoq.Nat.to_int path.psize) in
+  let path' =
+    {
+      psize = npsize;
+      input_regs = path.input_regs;
+      pre_output_regs = path.pre_output_regs;
+      output_regs = path.output_regs;
+    }
+  in
+  pm' := PTree.set initial path' !pm'
+
+(** Write a single instruction in the tree and update order *)
+
+let write_inst target_node inst code' new_order =
+  code' := PTree.set (P.of_int target_node) inst !code';
+  new_order := P.of_int target_node :: !new_order
+
+(** Return olds args if the CSE numbering is empty *)
+
+let get_arguments vn vals args =
+  match reg_valnums vn vals with Some args' -> args' | None -> args
+
+(** Update the code tree with the expansion list *)
+
+let rec write_tree vn exp initial current code' new_order fturn =
+  debug "wt: node is %d\n" !node;
+  let target_node, next_node =
+    if fturn then (P.to_int initial, current) else (current, current - 1)
+  in
+  match exp with
+  | Sr r :: _ ->
+      failwith "write_tree: there are still some symbolic values in the list"
+  | Sexp (rd, Sop (op, vals), args, None) :: k ->
+      let args = get_arguments vn vals args in
+      let inst = Iop (op, args, rd, P.of_int next_node) in
+      write_inst target_node inst code' new_order;
+      write_tree vn k initial next_node code' new_order false
+  | [ Snop succ ] ->
+      let inst = Inop succ in
+      write_inst target_node inst code' new_order
+  | [ Sexp (rd, Sop (op, vals), args, Some succ) ] ->
+      let args = get_arguments vn vals args in
+      let inst = Iop (op, args, rd, succ) in
+      write_inst target_node inst code' new_order
+  | [ Sexp (rd, Smove, args, Some succ) ] ->
+      let inst = Iop (Omove, args, rd, succ) in
+      write_inst target_node inst code' new_order
+  | [ Sfinalcond (cond, args, succ1, succ2, info) ] ->
+      let inst = Icond (cond, args, succ1, succ2, info) in
+      write_inst target_node inst code' new_order
+  | [] -> ()
+  | _ -> failwith "write_tree: invalid list"
+
+(** Main expansion function - TODO gourdinl to split? *)
+let expanse (sb : superblock) code pm =
+  debug "#### New superblock for expansion oracle\n";
+  let new_order = ref [] in
+  let liveins = ref sb.liveins in
+  let exp = ref [] in
+  let was_branch = ref false in
+  let was_exp = ref false in
+  let code' = ref code in
+  let pm' = ref pm in
+  let vn = ref (empty_numbering ()) in
+  Array.iter
+    (fun n ->
+      was_branch := false;
+      was_exp := false;
+      let inst = get_some @@ PTree.get n code in
+      (if !Clflags.option_fexpanse_rtlcond then
+       match inst with
+       (* Expansion of conditions - Ocmp *)
+       | Iop (Ocmp (Ccomp c), a1 :: a2 :: nil, dest, succ) ->
+           debug "Iop/Ccomp\n";
+           exp := cond_int32s vn false c a1 a2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompu c), a1 :: a2 :: nil, dest, succ) ->
+           debug "Iop/Ccompu\n";
+           exp := cond_int32u vn false c a1 a2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompimm (c, imm)), a1 :: nil, dest, succ) ->
+           debug "Iop/Ccompimm\n";
+           exp := expanse_condimm_int32s vn c a1 imm dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompuimm (c, imm)), a1 :: nil, dest, succ) ->
+           debug "Iop/Ccompuimm\n";
+           exp := expanse_condimm_int32u vn c a1 imm dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompl c), a1 :: a2 :: nil, dest, succ) ->
+           debug "Iop/Ccompl\n";
+           exp := cond_int64s vn false c a1 a2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccomplu c), a1 :: a2 :: nil, dest, succ) ->
+           debug "Iop/Ccomplu\n";
+           exp := cond_int64u vn false c a1 a2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccomplimm (c, imm)), a1 :: nil, dest, succ) ->
+           debug "Iop/Ccomplimm\n";
+           exp := expanse_condimm_int64s vn c a1 imm dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompluimm (c, imm)), a1 :: nil, dest, succ) ->
+           debug "Iop/Ccompluimm\n";
+           exp := expanse_condimm_int64u vn c a1 imm dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompf c), f1 :: f2 :: nil, dest, succ) ->
+           debug "Iop/Ccompf\n";
+           exp := expanse_cond_fp vn false cond_float c f1 f2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Cnotcompf c), f1 :: f2 :: nil, dest, succ) ->
+           debug "Iop/Cnotcompf\n";
+           exp := expanse_cond_fp vn true cond_float c f1 f2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Ccompfs c), f1 :: f2 :: nil, dest, succ) ->
+           debug "Iop/Ccompfs\n";
+           exp := expanse_cond_fp vn false cond_single c f1 f2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocmp (Cnotcompfs c), f1 :: f2 :: nil, dest, succ) ->
+           debug "Iop/Cnotcompfs\n";
+           exp := expanse_cond_fp vn true cond_single c f1 f2 dest;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       (* Expansion of branches - Ccomp *)
+       | Icond (Ccomp c, a1 :: a2 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccomp\n";
+           exp := cbranch_int32s false c a1 a2 info succ1 succ2 [];
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompu c, a1 :: a2 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccompu\n";
+           exp := cbranch_int32u false c a1 a2 info succ1 succ2 [];
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompimm (c, imm), a1 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccompimm\n";
+           exp := expanse_cbranchimm_int32s vn c a1 imm info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompuimm (c, imm), a1 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccompuimm\n";
+           exp := expanse_cbranchimm_int32u vn c a1 imm info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompl c, a1 :: a2 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccompl\n";
+           exp := cbranch_int64s false c a1 a2 info succ1 succ2 [];
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccomplu c, a1 :: a2 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccomplu\n";
+           exp := cbranch_int64u false c a1 a2 info succ1 succ2 [];
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccomplimm (c, imm), a1 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccomplimm\n";
+           exp := expanse_cbranchimm_int64s vn c a1 imm info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompluimm (c, imm), a1 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccompluimm\n";
+           exp := expanse_cbranchimm_int64u vn c a1 imm info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompf c, f1 :: f2 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccompf\n";
+           exp :=
+             expanse_cbranch_fp vn false cond_float c f1 f2 info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Cnotcompf c, f1 :: f2 :: nil, succ1, succ2, info) ->
+           debug "Icond/Cnotcompf\n";
+           exp := expanse_cbranch_fp vn true cond_float c f1 f2 info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Ccompfs c, f1 :: f2 :: nil, succ1, succ2, info) ->
+           debug "Icond/Ccompfs\n";
+           exp :=
+             expanse_cbranch_fp vn false cond_single c f1 f2 info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | Icond (Cnotcompfs c, f1 :: f2 :: nil, succ1, succ2, info) ->
+           debug "Icond/Cnotcompfs\n";
+           exp :=
+             expanse_cbranch_fp vn true cond_single c f1 f2 info succ1 succ2;
+           was_branch := true;
+           was_exp := true
+       | _ -> ());
+      (if !Clflags.option_fexpanse_others && not !was_exp then
+       match inst with
+       | Iop (Ofloatconst f, nil, dest, succ) -> (
+           match make_immed64 (Floats.Float.to_bits f) with
+           | Imm64_single _ | Imm64_large _ -> ()
+           | Imm64_pair (hi, lo) ->
+               debug "Iop/Ofloatconst\n";
+               let r = r2pi () in
+               let l = load_hilo64 vn r hi lo in
+               let r', l' = extract_arg l in
+               exp := addinst vn Ofloat_of_bits [ r' ] dest :: l';
+               exp := extract_final vn !exp dest succ;
+               was_exp := true)
+       | Iop (Osingleconst f, nil, dest, succ) -> (
+           match make_immed32 (Floats.Float32.to_bits f) with
+           | Imm32_single imm -> ()
+           | Imm32_pair (hi, lo) ->
+               debug "Iop/Osingleconst\n";
+               let r = r2pi () in
+               let l = load_hilo32 vn r hi lo in
+               let r', l' = extract_arg l in
+               exp := addinst vn Osingle_of_bits [ r' ] dest :: l';
+               exp := extract_final vn !exp dest succ;
+               was_exp := true)
+       | Iop (Ointconst n, nil, dest, succ) ->
+           debug "Iop/Ointconst\n";
+           exp := loadimm32 vn dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Olongconst n, nil, dest, succ) ->
+           debug "Iop/Olongconst\n";
+           exp := loadimm64 vn dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oaddimm n, a1 :: nil, dest, succ) ->
+           debug "Iop/Oaddimm\n";
+           exp := addimm32 vn a1 dest n None;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oaddlimm n, a1 :: nil, dest, succ) ->
+           debug "Iop/Oaddlimm\n";
+           exp := addimm64 vn a1 dest n None;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oandimm n, a1 :: nil, dest, succ) ->
+           debug "Iop/Oandimm\n";
+           exp := andimm32 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oandlimm n, a1 :: nil, dest, succ) ->
+           debug "Iop/Oandlimm\n";
+           exp := andimm64 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oorimm n, a1 :: nil, dest, succ) ->
+           debug "Iop/Oorimm\n";
+           exp := orimm32 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oorlimm n, a1 :: nil, dest, succ) ->
+           debug "Iop/Oorlimm\n";
+           exp := orimm64 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oxorimm n, a1 :: nil, dest, succ) ->
+           debug "Iop/Oxorimm\n";
+           exp := xorimm32 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oxorlimm n, a1 :: nil, dest, succ) ->
+           debug "Iop/Oxorlimm\n";
+           exp := xorimm64 vn a1 dest n;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocast8signed, a1 :: nil, dest, succ) ->
+           debug "Iop/cast8signed\n";
+           let op = Oshlimm (Int.repr (Z.of_sint 24)) in
+           let r = r2pi () in
+           let i1 = addinst vn op [ a1 ] r in
+           let r', l = extract_arg [ i1 ] in
+           exp :=
+             addinst vn (Oshrimm (Int.repr (Z.of_sint 24))) [ r' ] dest :: l;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocast16signed, a1 :: nil, dest, succ) ->
+           debug "Iop/cast16signed\n";
+           let op = Oshlimm (Int.repr (Z.of_sint 16)) in
+           let r = r2pi () in
+           let i1 = addinst vn op [ a1 ] r in
+           let r', l = extract_arg [ i1 ] in
+           exp :=
+             addinst vn (Oshrimm (Int.repr (Z.of_sint 16))) [ r' ] dest :: l;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Ocast32unsigned, a1 :: nil, dest, succ) ->
+           debug "Iop/Ocast32unsigned\n";
+           let r1 = r2pi () in
+           let r2 = r2pi () in
+           let op1 = Ocast32signed in
+           let i1 = addinst vn op1 [ a1 ] r1 in
+           let r1', l1 = extract_arg [ i1 ] in
+
+           let op2 = Oshllimm (Int.repr (Z.of_sint 32)) in
+           let i2 = addinst vn op2 [ r1' ] r2 in
+           let r2', l2 = extract_arg (i2 :: l1) in
+
+           let op3 = Oshrluimm (Int.repr (Z.of_sint 32)) in
+           exp := addinst vn op3 [ r2' ] dest :: l2;
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oshrximm n, a1 :: nil, dest, succ) ->
+           if Int.eq n Int.zero then (
+             debug "Iop/Oshrximm1\n";
+             exp := [ addinst vn (OEmayundef (MUshrx n)) [ a1; a1 ] dest ])
+           else if Int.eq n Int.one then (
+             debug "Iop/Oshrximm2\n";
+             let r1 = r2pi () in
+             let r2 = r2pi () in
+             let op1 = Oshruimm (Int.repr (Z.of_sint 31)) in
+             let i1 = addinst vn op1 [ a1 ] r1 in
+             let r1', l1 = extract_arg [ i1 ] in
+
+             let op2 = Oadd in
+             let i2 = addinst vn op2 [ a1; r1' ] r2 in
+             let r2', l2 = extract_arg (i2 :: l1) in
+
+             let op3 = Oshrimm Int.one in
+             let i3 = addinst vn op3 [ r2' ] dest in
+             let r3, l3 = extract_arg (i3 :: l2) in
+             exp := addinst vn (OEmayundef (MUshrx n)) [ r3; r3 ] dest :: l3)
+           else (
+             debug "Iop/Oshrximm3\n";
+             let r1 = r2pi () in
+             let r2 = r2pi () in
+             let r3 = r2pi () in
+             let op1 = Oshrimm (Int.repr (Z.of_sint 31)) in
+             let i1 = addinst vn op1 [ a1 ] r1 in
+             let r1', l1 = extract_arg [ i1 ] in
+
+             let op2 = Oshruimm (Int.sub Int.iwordsize n) in
+             let i2 = addinst vn op2 [ r1' ] r2 in
+             let r2', l2 = extract_arg (i2 :: l1) in
+
+             let op3 = Oadd in
+             let i3 = addinst vn op3 [ a1; r2' ] r3 in
+             let r3', l3 = extract_arg (i3 :: l2) in
+
+             let op4 = Oshrimm n in
+             let i4 = addinst vn op4 [ r3' ] dest in
+             let r4, l4 = extract_arg (i4 :: l3) in
+             exp := addinst vn (OEmayundef (MUshrx n)) [ r4; r4 ] dest :: l4);
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | Iop (Oshrxlimm n, a1 :: nil, dest, succ) ->
+           if Int.eq n Int.zero then (
+             debug "Iop/Oshrxlimm1\n";
+             exp := [ addinst vn (OEmayundef (MUshrxl n)) [ a1; a1 ] dest ])
+           else if Int.eq n Int.one then (
+             debug "Iop/Oshrxlimm2\n";
+             let r1 = r2pi () in
+             let r2 = r2pi () in
+             let op1 = Oshrluimm (Int.repr (Z.of_sint 63)) in
+             let i1 = addinst vn op1 [ a1 ] r1 in
+             let r1', l1 = extract_arg [ i1 ] in
+
+             let op2 = Oaddl in
+             let i2 = addinst vn op2 [ a1; r1' ] r2 in
+             let r2', l2 = extract_arg (i2 :: l1) in
+
+             let op3 = Oshrlimm Int.one in
+             let i3 = addinst vn op3 [ r2' ] dest in
+             let r3, l3 = extract_arg (i3 :: l2) in
+             exp := addinst vn (OEmayundef (MUshrxl n)) [ r3; r3 ] dest :: l3)
+           else (
+             debug "Iop/Oshrxlimm3\n";
+             let r1 = r2pi () in
+             let r2 = r2pi () in
+             let r3 = r2pi () in
+             let op1 = Oshrlimm (Int.repr (Z.of_sint 63)) in
+             let i1 = addinst vn op1 [ a1 ] r1 in
+             let r1', l1 = extract_arg [ i1 ] in
+
+             let op2 = Oshrluimm (Int.sub Int64.iwordsize' n) in
+             let i2 = addinst vn op2 [ r1' ] r2 in
+             let r2', l2 = extract_arg (i2 :: l1) in
+
+             let op3 = Oaddl in
+             let i3 = addinst vn op3 [ a1; r2' ] r3 in
+             let r3', l3 = extract_arg (i3 :: l2) in
+
+             let op4 = Oshrlimm n in
+             let i4 = addinst vn op4 [ r3' ] dest in
+             let r4, l4 = extract_arg (i4 :: l3) in
+             exp := addinst vn (OEmayundef (MUshrxl n)) [ r4; r4 ] dest :: l4);
+           exp := extract_final vn !exp dest succ;
+           was_exp := true
+       | _ -> ());
+      (* Update the CSE numbering *)
+      (if not !was_exp then
+       match inst with
+       | Iop (op, args, dest, succ) ->
+           let v = get_nvalues vn args in
+           addsop vn v op dest
+       | Iload (_, _, _, _, dst, _) -> set_unknown vn dst
+       | Istore (chk, addr, args, src, s) ->
+           !vn.seqs <- kill_mem_operations !vn.seqs
+       | Icall (_, _, _, _, _) | Itailcall (_, _, _) | Ibuiltin (_, _, _, _) ->
+           vn := empty_numbering ()
+       | _ -> ());
+      (* Update code, liveins, pathmap, and order of the superblock for one expansion *)
+      if !was_exp then (
+        (if !was_branch && List.length !exp > 1 then
+         let lives = PTree.get n !liveins in
+         match lives with
+         | Some lives ->
+             let new_branch_pc = P.of_int (!node + 1) in
+             liveins := PTree.set new_branch_pc lives !liveins;
+             liveins := PTree.remove n !liveins
+         | _ -> ());
+        node := !node + List.length !exp - 1;
+        write_pathmap sb.instructions.(0) (List.length !exp - 1) pm';
+        write_tree vn (List.rev !exp) n !node code' new_order true)
+      else new_order := n :: !new_order)
+    sb.instructions;
+  sb.instructions <- Array.of_list (List.rev !new_order);
+  sb.liveins <- !liveins;
+  (!code', !pm')
+
+(** Compute the last used node and reg indexs *)
+
+let rec find_last_node_reg = function
+  | [] -> ()
+  | (pc, i) :: k ->
+      let rec traverse_list var = function
+        | [] -> ()
+        | e :: t ->
+            let e' = p2i e in
+            if e' > !var then var := e';
+            traverse_list var t
+      in
+      traverse_list node [ pc ];
+      traverse_list reg (get_regs_inst i);
+      find_last_node_reg k
diff --git a/verilog/ExtValues.v b/verilog/ExtValues.v
new file mode 100644
index 00000000..edf359ef
--- /dev/null
+++ b/verilog/ExtValues.v
@@ -0,0 +1,123 @@
+Require Import Coqlib.
+Require Import Integers.
+Require Import Values.
+Require Import Floats.
+Require Import Memory.
+Require Import Lia.
+    
+Definition bits_of_float x :=
+  match x with
+  | Vfloat f => Vlong (Float.to_bits f)
+  | _ => Vundef
+  end.
+
+Definition bits_of_single x :=
+  match x with
+  | Vsingle f => Vint (Float32.to_bits f)
+  | _ => Vundef
+  end.
+
+Definition float_of_bits x :=
+  match x with
+  | Vlong f => Vfloat (Float.of_bits f)
+  | _ => Vundef
+  end.
+
+Definition single_of_bits x :=
+  match x with
+  | Vint f => Vsingle (Float32.of_bits f)
+  | _ => Vundef
+  end.
+
+Definition bitwise_select_long b vtrue vfalse :=
+  Int64.or (Int64.and (Int64.neg b) vtrue)
+           (Int64.and (Int64.sub b Int64.one) vfalse).
+
+Lemma bitwise_select_long_true :
+  forall vtrue vfalse,
+    bitwise_select_long Int64.one vtrue vfalse = vtrue.
+Proof.
+  intros. unfold bitwise_select_long. cbn.
+  change (Int64.neg Int64.one) with Int64.mone.
+  rewrite Int64.and_commut.
+  rewrite Int64.and_mone.
+  rewrite Int64.sub_idem.
+  rewrite Int64.and_commut.
+  rewrite Int64.and_zero.
+  apply Int64.or_zero.
+Qed.
+
+Lemma bitwise_select_long_false :
+  forall vtrue vfalse,
+    bitwise_select_long Int64.zero vtrue vfalse = vfalse.
+Proof.
+  intros. unfold bitwise_select_long. cbn.
+  rewrite Int64.neg_zero.
+  rewrite Int64.and_commut.
+  rewrite Int64.and_zero.
+  rewrite Int64.sub_zero_r.
+  change (Int64.neg Int64.one) with Int64.mone.
+  rewrite Int64.and_commut.
+  rewrite Int64.and_mone.
+  rewrite Int64.or_commut.
+  apply Int64.or_zero.
+Qed.
+
+Definition select01_long (vb : val) (vtrue : val) (vfalse : val) : val :=
+  match vb with
+  | (Vint b) =>
+    if Int.eq b Int.one
+    then vtrue
+    else if Int.eq b Int.zero
+         then vfalse
+         else Vundef
+  | _ => Vundef
+  end.
+
+Lemma normalize_select01:
+  forall x y z, Val.normalize (select01_long x y z) AST.Tlong = select01_long x (Val.normalize y AST.Tlong) (Val.normalize z AST.Tlong).
+Proof.
+  unfold select01_long.
+  intros.
+  destruct x; cbn; trivial.
+  destruct (Int.eq i Int.one); trivial.
+  destruct (Int.eq i Int.zero); trivial.
+Qed.
+
+Lemma select01_long_true:
+  forall vt vf,
+    select01_long Vtrue vt vf = vt.
+Proof.
+  intros. unfold select01_long. cbn.
+  rewrite Int.eq_true. reflexivity.
+Qed.
+
+Lemma select01_long_false:
+  forall vt vf,
+    select01_long Vfalse vt vf = vf.
+Proof.
+  intros. unfold select01_long. cbn.
+  rewrite Int.eq_true.
+  rewrite Int.eq_false. reflexivity.
+  cbv. discriminate.
+Qed.
+
+Lemma float_bits_normalize:
+  forall v1,
+  ExtValues.float_of_bits (Val.normalize (ExtValues.bits_of_float v1) AST.Tlong) =
+  Val.normalize v1 AST.Tfloat.
+Proof.
+  destruct v1; cbn; trivial.
+  f_equal.
+  apply Float.of_to_bits.
+Qed.
+
+Lemma single_bits_normalize:
+  forall v1,
+  ExtValues.single_of_bits (Val.normalize (ExtValues.bits_of_single v1) AST.Tint) =
+  Val.normalize v1 AST.Tsingle.
+Proof.
+  destruct v1; cbn; trivial.
+  f_equal.
+  apply Float32.of_to_bits.
+Qed.
diff --git a/verilog/Machregs.v b/verilog/Machregs.v
index 6f3064b8..d469e594 100644
--- a/verilog/Machregs.v
+++ b/verilog/Machregs.v
@@ -3,11 +3,16 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
 Require Import String.
@@ -22,33 +27,49 @@ Require Import Op.
 
 (** The following type defines the machine registers that can be referenced
   as locations.  These include:
-- Integer registers that can be allocated to RTL pseudo-registers.
-- Floating-point registers that can be allocated to RTL pseudo-registers.
-- The special [FP0] register denoting the top of the X87 float stack.
+- Integer registers that can be allocated to RTL pseudo-registers ([Rxx]).
+- Floating-point registers that can be allocated to RTL pseudo-registers
+  ([Fxx]).
 
-  The type [mreg] does not include special-purpose or reserved
-  machine registers such as the stack pointer and the condition codes. *)
+  The type [mreg] does not include reserved machine registers such as
+  the zero register (x0), the link register (x1), the stack pointer
+  (x2), the global pointer (x3), and the thread pointer (x4).
+  Finally, register x31 is reserved for use as a temporary by the
+  assembly-code generator [Asmgen].
+*)
 
 Inductive mreg: Type :=
-  (** Allocatable integer regs *)
-  | AX | BX | CX | DX | SI | DI | BP
-  | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15  (**r only in 64-bit mode *)
-  (** Allocatable float regs *)
-  | X0 | X1 | X2 | X3 | X4 | X5 | X6 | X7
-  | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15  (**r only in 64-bit mode *)
-  (** Special float reg *)
-  | FP0.
+ (** Allocatable integer regs. *)
+              | R5:  mreg | R6:  mreg | R7:  mreg
+  | R8:  mreg | R9:  mreg | R10: mreg | R11: mreg
+  | R12: mreg | R13: mreg | R14: mreg | R15: mreg
+  | R16: mreg | R17: mreg | R18: mreg | R19: mreg
+  | R20: mreg | R21: mreg | R22: mreg | R23: mreg
+  | R24: mreg | R25: mreg | R26: mreg | R27: mreg
+  | R28: mreg | R29: mreg | R30: mreg
+  (** Allocatable double-precision float regs *)
+  | F0:  mreg | F1:  mreg | F2:  mreg | F3:  mreg
+  | F4:  mreg | F5:  mreg | F6:  mreg | F7:  mreg
+  | F8:  mreg | F9:  mreg | F10: mreg | F11: mreg
+  | F12: mreg | F13: mreg | F14: mreg | F15: mreg
+  | F16: mreg | F17: mreg | F18: mreg | F19: mreg
+  | F20: mreg | F21: mreg | F22: mreg | F23: mreg
+  | F24: mreg | F25: mreg | F26: mreg | F27: mreg
+  | F28: mreg | F29: mreg | F30: mreg | F31: mreg.
 
 Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}.
 Proof. decide equality. Defined.
 Global Opaque mreg_eq.
 
 Definition all_mregs :=
-     AX :: BX :: CX :: DX :: SI :: DI :: BP
-  :: R8 :: R9 :: R10 :: R11 :: R12 :: R13 :: R14 :: R15
-  :: X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7
-  :: X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15
-  :: FP0 :: nil.
+    R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: R12 :: R13 :: R14 :: R15
+ :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: R23
+ :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 :: R30
+ :: F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7
+ :: F8 :: F9 :: F10 :: F11 :: F12 :: F13 :: F14 :: F15
+ :: F16 :: F17 :: F18 :: F19 :: F20 :: F21 :: F22 :: F23
+ :: F24 :: F25 :: F26 :: F27 :: F28 :: F29 :: F30 :: F31
+ :: nil.
 
 Lemma all_mregs_complete:
   forall (r: mreg), In r all_mregs.
@@ -58,7 +79,7 @@ Proof.
 Qed.
 
 Instance Decidable_eq_mreg : forall (x y: mreg), Decidable (eq x y) := Decidable_eq mreg_eq.
-
+  
 Instance Finite_mreg : Finite mreg := {
   Finite_elements := all_mregs;
   Finite_elements_spec := all_mregs_complete
@@ -66,25 +87,40 @@ Instance Finite_mreg : Finite mreg := {
 
 Definition mreg_type (r: mreg): typ :=
   match r with
-  | AX | BX | CX | DX | SI | DI | BP => if Archi.ptr64 then Tany64 else Tany32
-  | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15 => Tany64
-  | X0 | X1 | X2 | X3 | X4 | X5 | X6 | X7 => Tany64
-  | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 => Tany64
-  | FP0 => Tany64
+        | R5  | R6  | R7  | R8  | R9  | R10 | R11
+  | R12 | R13 | R14 | R15 | R16 | R17 | R18 | R19
+  | R20 | R21 | R22 | R23 | R24 | R25 | R26 | R27
+  | R28 | R29 | R30 => if Archi.ptr64 then Tany64 else Tany32
+
+  | F0  | F1  | F2  | F3  | F4  | F5  | F6  | F7
+  | F8  | F9  | F10 | F11 | F12 | F13 | F14 | F15
+  | F16 | F17 | F18 | F19 | F20 | F21 | F22 | F23
+  | F24 | F25 | F26 | F27 | F28 | F29 | F30 | F31 => Tany64
   end.
 
-Local Open Scope positive_scope.
+Open Scope positive_scope.
 
 Module IndexedMreg <: INDEXED_TYPE.
   Definition t := mreg.
   Definition eq := mreg_eq.
   Definition index (r: mreg): positive :=
     match r with
-    | AX => 1 | BX => 2 | CX => 3 | DX => 4 | SI => 5 | DI => 6 | BP => 7
-    | R8 => 8 | R9 => 9 | R10 => 10 | R11 => 11 | R12 => 12 | R13 => 13 | R14 => 14 | R15 => 15
-    | X0 => 16 | X1 => 17 | X2 => 18 | X3 => 19 | X4 => 20 | X5 => 21 | X6 => 22 | X7 => 23
-    | X8 => 24 | X9 => 25 | X10 => 26 | X11 => 27 | X12 => 28 | X13 => 29 | X14 => 30 | X15 => 31
-    | FP0 => 32
+                | R5  =>  1 | R6  =>  2 | R7  =>  3
+    | R8  =>  4 | R9  =>  5 | R10 =>  6 | R11 =>  7
+    | R12 =>  8 | R13 =>  9 | R14 => 10 | R15 => 11
+    | R16 => 12 | R17 => 13 | R18 => 14 | R19 => 15
+    | R20 => 16 | R21 => 17 | R22 => 18 | R23 => 19
+    | R24 => 20 | R25 => 21 | R26 => 22 | R27 => 23
+    | R28 => 24 | R29 => 25 | R30 => 26
+
+    | F0  => 28 | F1  => 29 | F2  => 30 | F3  => 31
+    | F4  => 32 | F5  => 33 | F6  => 34 | F7  => 35
+    | F8  => 36 | F9  => 37 | F10 => 38 | F11 => 39
+    | F12 => 40 | F13 => 41 | F14 => 42 | F15 => 43
+    | F16 => 44 | F17 => 45 | F18 => 46 | F19 => 47
+    | F20 => 48 | F21 => 49 | F22 => 50 | F23 => 51
+    | F24 => 52 | F25 => 53 | F26 => 54 | F27 => 55
+    | F28 => 56 | F29 => 57 | F30 => 58 | F31 => 59
     end.
   Lemma index_inj:
     forall r1 r2, index r1 = index r2 -> r1 = r2.
@@ -93,25 +129,30 @@ Module IndexedMreg <: INDEXED_TYPE.
   Qed.
 End IndexedMreg.
 
-Definition is_stack_reg (r: mreg) : bool :=
-  match r with FP0 => true | _ => false end.
+Definition is_stack_reg (r: mreg) : bool := false.
 
 (** ** Names of registers *)
 
 Local Open Scope string_scope.
 
 Definition register_names :=
-  ("RAX", AX) :: ("RBX", BX) :: ("RCX", CX) :: ("RDX", DX) ::
-  ("RSI", SI) :: ("RDI", DI) :: ("RBP", BP) ::
-  ("EAX", AX) :: ("EBX", BX) :: ("ECX", CX) :: ("EDX", DX) ::
-  ("ESI", SI) :: ("EDI", DI) :: ("EBP", BP) ::
-  ("R8", R8) :: ("R9", R9) :: ("R10", R10) :: ("R11", R11) ::
-  ("R12", R12) :: ("R13", R13) :: ("R14", R14) :: ("R15", R15) ::
-  ("XMM0", X0) :: ("XMM1", X1) :: ("XMM2", X2) :: ("XMM3", X3) ::
-  ("XMM4", X4) :: ("XMM5", X5) :: ("XMM6", X6) :: ("XMM7", X7) ::
-  ("XMM8", X8) :: ("XMM9", X9) :: ("XMM10", X10) :: ("XMM11", X11) ::
-  ("XMM12", X12) :: ("XMM13", X13) :: ("XMM14", X14) :: ("XMM15", X15) ::
-  ("ST0", FP0) :: nil.
+                  ("X5",  R5)  :: ("X6",  R6)  :: ("X7",  R7)  ::
+  ("X8",  R8)  :: ("X9",  R9)  :: ("X10", R10) :: ("X11", R11) ::
+  ("X12", R12) :: ("X13", R13) :: ("X14", R14) :: ("X15", R15) ::
+  ("X16", R16) :: ("X17", R17) :: ("X18", R18) :: ("X19", R19) ::
+  ("X20", R20) :: ("X21", R21) :: ("X22", R22) :: ("X23", R23) ::
+  ("X24", R24) :: ("X25", R25) :: ("X26", R26) :: ("X27", R27) ::
+  ("X28", R28) :: ("X29", R29) :: ("X30", R30) ::
+
+  ("F0",  F0)  :: ("F1",  F1)  :: ("F2",  F2)  :: ("F3",  F3)  ::
+  ("F4",  F4)  :: ("F5",  F5)  :: ("F6",  F6)  :: ("F7",  F7)  ::
+  ("F8",  F8)  :: ("F9",  F9)  :: ("F10", F10) :: ("F11", F11) ::
+  ("F12", F12) :: ("F13", F13) :: ("F14", F14) :: ("F15", F15) ::
+  ("F16", F16) :: ("F17", F17) :: ("F18", F18) :: ("F19", F19) ::
+  ("F20", F20) :: ("F21", F21) :: ("F22", F22) :: ("F23", F23) ::
+  ("F24", F24) :: ("F25", F25) :: ("F26", F26) :: ("F27", F27) ::
+  ("F28", F28) :: ("F29", F29) :: ("F30", F30) :: ("F31", F31) ::
+  nil.
 
 Definition register_by_name (s: string) : option mreg :=
   let fix assoc (l: list (string * mreg)) : option mreg :=
@@ -125,39 +166,19 @@ Definition register_by_name (s: string) : option mreg :=
 
 Definition destroyed_by_op (op: operation): list mreg :=
   match op with
-  | Ocast8signed | Ocast8unsigned => AX :: nil
-  | Omulhs => AX :: DX :: nil
-  | Omulhu => AX :: DX :: nil
-  | Odiv => AX :: DX :: nil
-  | Odivu => AX :: DX :: nil
-  | Omod => AX :: DX :: nil
-  | Omodu => AX :: DX :: nil
-  | Oshrximm _ => CX :: nil
-  | Omullhs => AX :: DX :: nil
-  | Omullhu => AX :: DX :: nil
-  | Odivl => AX :: DX :: nil
-  | Odivlu => AX :: DX :: nil
-  | Omodl => AX :: DX :: nil
-  | Omodlu => AX :: DX :: nil
-  | Oshrxlimm _ => DX :: nil
-  | Ocmp _ => AX :: CX :: nil
+  | Ointoffloat | Ointuoffloat | Ointofsingle | Ointuofsingle
+  | Olongoffloat | Olonguoffloat | Olongofsingle | Olonguofsingle
+      => F6 :: nil
   | _ => nil
   end.
 
-Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg :=
-  nil.
+Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg := nil.
 
-Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
-  match chunk with
-  | Mint8signed | Mint8unsigned => if Archi.ptr64 then nil else AX :: CX :: nil
-  | _ => nil
-  end.
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := nil.
 
-Definition destroyed_by_cond (cond: condition): list mreg :=
-  nil.
+Definition destroyed_by_cond (cond: condition): list mreg := nil.
 
-Definition destroyed_by_jumptable: list mreg :=
-  AX :: DX :: nil.
+Definition destroyed_by_jumptable: list mreg := R5 :: nil.
 
 Fixpoint destroyed_by_clobber (cl: list string): list mreg :=
   match cl with
@@ -171,80 +192,56 @@ Fixpoint destroyed_by_clobber (cl: list string): list mreg :=
 
 Definition destroyed_by_builtin (ef: external_function): list mreg :=
   match ef with
-  | EF_memcpy sz al =>
-      if zle sz 32 then CX :: X7 :: nil else CX :: SI :: DI :: nil
-  | EF_vstore (Mint8unsigned|Mint8signed) =>
-      if Archi.ptr64 then nil else AX :: CX :: nil
-  | EF_builtin name sg =>
-      if string_dec name "__builtin_va_start" then AX :: nil
-      else if string_dec name "__builtin_write16_reversed"
-           || string_dec name "__builtin_write32_reversed"
-      then CX :: DX :: nil
-      else nil
   | EF_inline_asm txt sg clob => destroyed_by_clobber clob
+  | EF_memcpy sz al => R5 :: R6 :: R7 :: F0 :: nil
+  | EF_builtin name sg =>
+      if string_dec name "__builtin_clz"
+      || string_dec name "__builtin_clzl"
+      || string_dec name "__builtin_clzll" then
+        R5 :: R8 :: R9 :: nil
+      else if string_dec name "__builtin_ctz"
+      || string_dec name "__builtin_ctzl"
+      || string_dec name "__builtin_ctzll" then
+        R6 :: R8 :: R9 :: nil
+      else
+        nil
   | _ => nil
   end.
 
-Definition destroyed_at_function_entry: list mreg :=
-  (* must include [destroyed_by_setstack ty] *)
-  AX :: FP0 :: nil.
+Definition destroyed_by_setstack (ty: typ): list mreg := nil.
 
-Definition destroyed_by_setstack (ty: typ): list mreg :=
-  match ty with
-  | Tfloat | Tsingle => FP0 :: nil
-  | _ => nil
-  end.
+Definition destroyed_at_function_entry: list mreg := R30 :: nil.
 
-Definition destroyed_at_indirect_call: list mreg :=
-  AX :: nil.
+Definition temp_for_parent_frame: mreg := R30.
 
-Definition temp_for_parent_frame: mreg :=
-  AX.
+Definition destroyed_at_indirect_call: list mreg :=
+  R10 :: R11 :: R12 :: R13 :: R14 :: R15 :: R16 :: R17 :: nil.
 
-Definition mregs_for_operation (op: operation): list (option mreg) * option mreg :=
-  match op with
-  | Omulhs => (Some AX :: None :: nil, Some DX)
-  | Omulhu => (Some AX :: None :: nil, Some DX)
-  | Odiv => (Some AX :: Some CX :: nil, Some AX)
-  | Odivu => (Some AX :: Some CX :: nil, Some AX)
-  | Omod => (Some AX :: Some CX :: nil, Some DX)
-  | Omodu => (Some AX :: Some CX :: nil, Some DX)
-  | Oshl => (None :: Some CX :: nil, None)
-  | Oshr => (None :: Some CX :: nil, None)
-  | Oshru => (None :: Some CX :: nil, None)
-  | Oshrximm _ => (Some AX :: nil, Some AX)
-  | Omullhs => (Some AX :: None :: nil, Some DX)
-  | Omullhu => (Some AX :: None :: nil, Some DX)
-  | Odivl => (Some AX :: Some CX :: nil, Some AX)
-  | Odivlu => (Some AX :: Some CX :: nil, Some AX)
-  | Omodl => (Some AX :: Some CX :: nil, Some DX)
-  | Omodlu => (Some AX :: Some CX :: nil, Some DX)
-  | Oshll => (None :: Some CX :: nil, None)
-  | Oshrl => (None :: Some CX :: nil, None)
-  | Oshrlu => (None :: Some CX :: nil, None)
-  | Oshrxlimm _ => (Some AX :: nil, Some AX)
-  | _ => (nil, None)
-  end.
+Definition mregs_for_operation (op: operation): list (option mreg) * option mreg := (nil, None).
 
-Definition mregs_for_builtin (ef: external_function): list (option mreg) * list (option mreg) :=
+Definition mregs_for_builtin (ef: external_function): list (option mreg) * list(option mreg) :=
   match ef with
-  | EF_memcpy sz al =>
-     if zle sz 32 then (Some AX :: Some DX :: nil, nil) else (Some DI :: Some SI :: nil, nil)
   | EF_builtin name sg =>
-     if string_dec name "__builtin_negl" then
-       (Some DX :: Some AX :: nil, Some DX :: Some AX :: nil)
-     else if string_dec name "__builtin_addl"
-          || string_dec name "__builtin_subl" then
-       (Some DX :: Some AX :: Some CX :: Some BX :: nil, Some DX :: Some AX :: nil)
-     else if string_dec name "__builtin_mull" then
-       (Some AX :: Some DX :: nil, Some DX :: Some AX :: nil)
-     else if string_dec name "__builtin_va_start" then
-       (Some DX :: nil, nil)
-     else if (negb Archi.ptr64) && string_dec name "__builtin_bswap64" then
-       (Some AX :: Some DX :: nil, Some DX :: Some AX :: nil)
-     else
-       (nil, nil)
-  | _ => (nil, nil)
+      if (negb Archi.ptr64) && string_dec name "__builtin_bswap64" then
+        (Some R6 :: Some R5 :: nil, Some R5 :: Some R6 :: nil)
+      else if string_dec name "__builtin_clz"
+           || string_dec name "__builtin_clzl" then
+        (Some R5 :: nil, Some R7 :: nil)
+      else if string_dec name "__builtin_clzll" then
+        if Archi.ptr64
+        then (Some R5 :: nil, Some R7 :: nil)
+        else (Some R6 :: Some R5 :: nil, Some R7 :: nil)
+      else if string_dec name "__builtin_ctz"
+           || string_dec name "__builtin_ctzl" then
+        (Some R6 :: nil, Some R7 :: nil)
+      else if string_dec name "__builtin_ctzll" then
+        if Archi.ptr64
+        then (Some R6 :: nil, Some R7 :: nil)
+        else (Some R6 :: Some R5 :: nil, Some R7 :: nil)
+      else
+        (nil, nil)
+  | _ =>
+    (nil, nil)
   end.
 
 Global Opaque
@@ -255,110 +252,23 @@ Global Opaque
 
 (** Two-address operations.  Return [true] if the first argument and
   the result must be in the same location *and* are unconstrained
-  by [mregs_for_operation]. *)
+  by [mregs_for_operation].  There are two: the pseudo [Ocast32signed],
+  because it expands to a no-op owing to the representation of 32-bit
+  integers as their 64-bit sign extension; and [Ocast32unsigned],
+  because it builds on the same magic no-op. *)
 
 Definition two_address_op (op: operation) : bool :=
   match op with
-  | Omove => false
-  | Ointconst _ => false
-  | Olongconst _ => false
-  | Ofloatconst _ => false
-  | Osingleconst _ => false
-  | Oindirectsymbol _ => false
-  | Ocast8signed => false
-  | Ocast8unsigned => false
-  | Ocast16signed => false
-  | Ocast16unsigned => false
-  | Oneg => true
-  | Osub => true
-  | Omul => true
-  | Omulimm _ => true
-  | Omulhs => false
-  | Omulhu => false
-  | Odiv => false
-  | Odivu => false
-  | Omod => false
-  | Omodu => false
-  | Oand => true
-  | Oandimm _ => true
-  | Oor => true
-  | Oorimm _ => true
-  | Oxor => true
-  | Oxorimm _ => true
-  | Onot => true
-  | Oshl => true
-  | Oshlimm _ => true
-  | Oshr => true
-  | Oshrimm _ => true
-  | Oshrximm _ => false
-  | Oshru => true
-  | Oshruimm _ => true
-  | Ororimm _ => true
-  | Oshldimm _ => true
-  | Olea addr => false
-  | Omakelong => true
-  | Olowlong => true
-  | Ohighlong => true
-  | Ocast32signed => false
-  | Ocast32unsigned => false
-  | Onegl => true
-  | Oaddlimm _ => true
-  | Osubl => true
-  | Omull => true
-  | Omullimm _ => true
-  | Omullhs => false
-  | Omullhu => false
-  | Odivl => false
-  | Odivlu => false
-  | Omodl => false
-  | Omodlu => false
-  | Oandl => true
-  | Oandlimm _ => true
-  | Oorl => true
-  | Oorlimm _ => true
-  | Oxorl => true
-  | Oxorlimm _ => true
-  | Onotl => true
-  | Oshll => true
-  | Oshllimm _ => true
-  | Oshrl => true
-  | Oshrlimm _ => true
-  | Oshrxlimm _ => false
-  | Oshrlu => true
-  | Oshrluimm _ => true
-  | Ororlimm _ => true
-  | Oleal addr => false
-  | Onegf => true
-  | Oabsf => true
-  | Oaddf => true
-  | Osubf => true
-  | Omulf => true
-  | Odivf => true
-  | Onegfs => true
-  | Oabsfs => true
-  | Oaddfs => true
-  | Osubfs => true
-  | Omulfs => true
-  | Odivfs => true
-  | Osingleoffloat => false
-  | Ofloatofsingle => false
-  | Ointoffloat => false
-  | Ofloatofint => false
-  | Ointofsingle => false
-  | Osingleofint => false
-  | Olongoffloat => false
-  | Ofloatoflong => false
-  | Olongofsingle => false
-  | Osingleoflong => false
-  | Ocmp c => false
-  | Osel c op => true
+  | Ocast32signed | Ocast32unsigned => true
+  | _ => false
   end.
 
-(* Constraints on constant propagation for builtins *)
+(** Constraints on constant propagation for builtins *)
 
 Definition builtin_constraints (ef: external_function) :
                                        list builtin_arg_constraint :=
   match ef with
+  | EF_builtin id sg => nil
   | EF_vload _ => OK_addressing :: nil
   | EF_vstore _ => OK_addressing :: OK_default :: nil
   | EF_memcpy _ _ => OK_addrstack :: OK_addrstack :: nil
diff --git a/verilog/Machregsaux.ml b/verilog/Machregsaux.ml
index 46d08290..e3e47946 100644
--- a/verilog/Machregsaux.ml
+++ b/verilog/Machregsaux.ml
@@ -19,4 +19,4 @@ let class_of_type = function
   | AST.Tfloat | AST.Tsingle -> 1
   | AST.Tany32 | AST.Tany64 -> assert false
 
-let nr_regs = [| 29; 32 |]
+let nr_regs = [| 26; 32|]
diff --git a/verilog/Machregsaux.mli b/verilog/Machregsaux.mli
index 23ac1c9a..bb3777bf 100644
--- a/verilog/Machregsaux.mli
+++ b/verilog/Machregsaux.mli
@@ -15,6 +15,6 @@
 val is_scratch_register: string -> bool
 
 val class_of_type: AST.typ -> int
-
+  
 (* Number of registers in each class *)
 val nr_regs : int array
diff --git a/verilog/NeedOp.v b/verilog/NeedOp.v
index 775a23db..6041a34d 100644
--- a/verilog/NeedOp.v
+++ b/verilog/NeedOp.v
@@ -2,55 +2,31 @@
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
-(*                 Xavier Leroy, INRIA Paris                           *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
-(** Neededness analysis for x86_64 operators *)
-
 Require Import Coqlib.
-Require Import AST Integers Floats Values Memory Globalenvs.
-Require Import Op NeedDomain RTL.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs.
+Require Import Op RTL.
+Require Import NeedDomain.
+
+(** Neededness analysis for RISC-V operators *)
 
 Definition op1 (nv: nval) := nv :: nil.
 Definition op2 (nv: nval) := nv :: nv :: nil.
 
-Definition needs_of_condition (cond: condition): list nval :=
-  match cond with
-  | Cmaskzero n | Cmasknotzero n => op1 (maskzero n)
-  | _ => nil
-  end.
-
-Definition needs_of_addressing_32 (addr: addressing) (nv: nval): list nval :=
-  match addr with
-  | Aindexed n => op1 (modarith nv)
-  | Aindexed2 n => op2 (modarith nv)
-  | Ascaled sc ofs => op1 (modarith (modarith nv))
-  | Aindexed2scaled sc ofs => op2 (modarith nv)
-  | Aglobal s ofs => nil
-  | Abased s ofs => op1 (modarith nv)
-  | Abasedscaled sc s ofs => op1 (modarith (modarith nv))
-  | Ainstack ofs => nil
-  end.
-
-Definition needs_of_addressing_64 (addr: addressing) (nv: nval): list nval :=
-  match addr with
-  | Aindexed n => op1 (default nv)
-  | Aindexed2 n => op2 (default nv)
-  | Ascaled sc ofs => op1 (default nv)
-  | Aindexed2scaled sc ofs => op2 (default nv)
-  | Aglobal s ofs => nil
-  | Abased s ofs => op1 (default nv)
-  | Abasedscaled sc s ofs => op1 (default nv)
-  | Ainstack ofs => nil
-  end.
-
-Definition needs_of_addressing (addr: addressing) (nv: nval): list nval :=
-  if Archi.ptr64 then needs_of_addressing_64 addr nv else needs_of_addressing_32 addr nv.
+Definition needs_of_condition (cond: condition): list nval := nil.
 
 Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   match op with
@@ -59,15 +35,15 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Olongconst n => nil
   | Ofloatconst n => nil
   | Osingleconst n => nil
-  | Oindirectsymbol id => nil
+  | Oaddrsymbol id ofs => nil
+  | Oaddrstack ofs => nil
   | Ocast8signed => op1 (sign_ext 8 nv)
-  | Ocast8unsigned => op1 (zero_ext 8 nv)
   | Ocast16signed => op1 (sign_ext 16 nv)
-  | Ocast16unsigned => op1 (zero_ext 16 nv)
+  | Oadd => op2 (modarith nv)
+  | Oaddimm n => op1 (modarith nv)
   | Oneg => op1 (modarith nv)
   | Osub => op2 (default nv)
   | Omul => op2 (modarith nv)
-  | Omulimm n => op1 (modarith nv)
   | Omulhs | Omulhu | Odiv | Odivu | Omod | Omodu => op2 (default nv)
   | Oand => op2 (bitwise nv)
   | Oandimm n => op1 (andimm nv n)
@@ -75,60 +51,87 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Oorimm n => op1 (orimm nv n)
   | Oxor => op2 (bitwise nv)
   | Oxorimm n => op1 (bitwise nv)
-  | Onot => op1 (bitwise nv)
-  | Oshl => op2 (default nv)
+  | Oshl | Oshr | Oshru => op2 (default nv)
   | Oshlimm n => op1 (shlimm nv n)
-  | Oshr => op2 (default nv)
   | Oshrimm n => op1 (shrimm nv n)
-  | Oshrximm n => op1 (default nv)
-  | Oshru => op2 (default nv)
   | Oshruimm n => op1 (shruimm nv n)
-  | Ororimm n => op1 (ror nv n)
-  | Oshldimm n => op1 (default nv)
-  | Olea addr => needs_of_addressing_32 addr nv
+  | Oshrximm n => op1 (default nv)
   | Omakelong => op2 (default nv)
   | Olowlong | Ohighlong => op1 (default nv)
   | Ocast32signed => op1 (default nv)
   | Ocast32unsigned => op1 (default nv)
+  | Oaddl => op2 (default nv)
+  | Oaddlimm n => op1 (default nv)
   | Onegl => op1 (default nv)
-  | Oaddlimm _ => op1 (default nv)
   | Osubl => op2 (default nv)
   | Omull => op2 (default nv)
-  | Omullimm _ => op1 (default nv)
   | Omullhs | Omullhu | Odivl | Odivlu | Omodl | Omodlu => op2 (default nv)
   | Oandl => op2 (default nv)
-  | Oandlimm _ => op1 (default nv)
+  | Oandlimm n => op1 (default nv)
   | Oorl => op2 (default nv)
-  | Oorlimm _ => op1 (default nv)
+  | Oorlimm n => op1 (default nv)
   | Oxorl => op2 (default nv)
-  | Oxorlimm _ => op1 (default nv)
-  | Onotl => op1 (default nv)
-  | Oshll => op2 (default nv)
-  | Oshllimm _ => op1 (default nv)
-  | Oshrl => op2 (default nv)
-  | Oshrlimm _ => op1 (default nv)
+  | Oxorlimm n => op1 (default nv)
+  | Oshll | Oshrl | Oshrlu => op2 (default nv)
+  | Oshllimm n => op1 (default nv)
+  | Oshrlimm n => op1 (default nv)
+  | Oshrluimm n => op1 (default nv)
   | Oshrxlimm n => op1 (default nv)
-  | Oshrlu => op2 (default nv)
-  | Oshrluimm _ => op1 (default nv)
-  | Ororlimm _ => op1 (default nv)
-  | Oleal addr => needs_of_addressing_64 addr nv
   | Onegf | Oabsf => op1 (default nv)
   | Oaddf | Osubf | Omulf | Odivf => op2 (default nv)
   | Onegfs | Oabsfs => op1 (default nv)
   | Oaddfs | Osubfs | Omulfs | Odivfs => op2 (default nv)
-  | Osingleoffloat | Ofloatofsingle => op1 (default nv)
-  | Ointoffloat | Ofloatofint | Ointofsingle | Osingleofint => op1 (default nv)
-  | Olongoffloat | Ofloatoflong | Olongofsingle | Osingleoflong => op1 (default nv)
+  | Ofloatofsingle | Osingleoffloat => op1 (default nv)
+  | Ointoffloat | Ointuoffloat | Ofloatofint | Ofloatofintu => op1 (default nv)
+  | Olongoffloat | Olonguoffloat | Ofloatoflong | Ofloatoflongu => op1 (default nv)
+  | Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv)
+  | Olongofsingle | Olonguofsingle | Osingleoflong | Osingleoflongu => op1 (default nv)
   | Ocmp c => needs_of_condition c
-  | Osel c ty => nv :: nv :: needs_of_condition c
+  | OEseqw _ => op2 (default nv)
+  | OEsnew _ => op2 (default nv)
+  | OEsequw _ => op2 (default nv)
+  | OEsneuw _ => op2 (default nv)
+  | OEsltw _ => op2 (default nv)
+  | OEsltuw _ => op2 (default nv)
+  | OEsltiw _ => op1 (default nv)
+  | OEsltiuw _ => op1 (default nv)
+  | OExoriw _ => op1 (bitwise nv)
+  | OEluiw _ => op1 (default nv)
+  | OEaddiw _ _ => op1 (default nv)
+  | OEandiw n => op1 (andimm nv n)
+  | OEoriw n => op1 (orimm nv n)
+  | OEseql _ => op2 (default nv)
+  | OEsnel _ => op2 (default nv)
+  | OEsequl _ => op2 (default nv)
+  | OEsneul _ => op2 (default nv)
+  | OEsltl _ => op2 (default nv)
+  | OEsltul _ => op2 (default nv)
+  | OEsltil _ => op1 (default nv)
+  | OEsltiul _ => op1 (default nv)
+  | OExoril _ => op1 (default nv)
+  | OEluil _ => op1 (default nv)
+  | OEaddil _ _ => op1 (default nv)
+  | OEandil _ => op1 (default nv)
+  | OEoril _ => op1 (default nv)
+  | OEloadli _ => op1 (default nv)
+  | OEmayundef _ => op2 (default nv)
+  | OEfeqd => op2 (default nv)
+  | OEfltd => op2 (default nv)
+  | OEfled => op2 (default nv)
+  | OEfeqs => op2 (default nv)
+  | OEflts => op2 (default nv)
+  | OEfles => op2 (default nv)
+  | Obits_of_single => op1 (default nv)
+  | Obits_of_float => op1 (default nv)
+  | Osingle_of_bits => op1 (default nv)
+  | Ofloat_of_bits => op1 (default nv)
+  | Oselectl => All :: nv :: nv :: nil
   end.
 
 Definition operation_is_redundant (op: operation) (nv: nval): bool :=
   match op with
   | Ocast8signed => sign_ext_redundant 8 nv
-  | Ocast8unsigned => zero_ext_redundant 8 nv
   | Ocast16signed => sign_ext_redundant 16 nv
-  | Ocast16unsigned => zero_ext_redundant 16 nv
   | Oandimm n => andimm_redundant nv n
   | Oorimm n => orimm_redundant nv n
   | _ => false
@@ -160,40 +163,10 @@ Lemma needs_of_condition_sound:
   vagree_list args args' (needs_of_condition cond) ->
   eval_condition cond args' m' = Some b.
 Proof.
-  intros. destruct cond; simpl in H;
-  try (eapply default_needs_of_condition_sound; eauto; fail);
-  simpl in *; FuncInv; InvAgree.
-- eapply maskzero_sound; eauto.
-- destruct (Val.maskzero_bool v n) as [b'|] eqn:MZ; try discriminate.
-  erewrite maskzero_sound; eauto.
+  intros. unfold needs_of_condition in H0.
+  eapply default_needs_of_condition_sound; eauto.
 Qed.
 
-Lemma needs_of_addressing_32_sound:
-  forall sp addr args v nv args',
-  eval_addressing32 ge (Vptr sp Ptrofs.zero) addr args = Some v ->
-  vagree_list args args' (needs_of_addressing_32 addr nv) ->
-  exists v',
-     eval_addressing32 ge (Vptr sp Ptrofs.zero) addr args' = Some v'
-  /\ vagree v v' nv.
-Proof.
-  unfold needs_of_addressing_32; intros.
-  destruct addr; simpl in *; FuncInv; InvAgree; TrivialExists;
-  auto using add_sound, mul_sound with na.
-  apply add_sound; auto with na. apply add_sound; rewrite modarith_idem; auto.
-  apply add_sound; auto. apply add_sound; rewrite modarith_idem; auto with na.
-  apply mul_sound; rewrite modarith_idem; auto with na.
-Qed.
-
-(*
-Lemma needs_of_addressing_64_sound:
-  forall sp addr args v nv args',
-  eval_addressing64 ge (Vptr sp Ptrofs.zero) addr args = Some v ->
-  vagree_list args args' (needs_of_addressing_64 addr nv) ->
-  exists v',
-     eval_addressing64 ge (Vptr sp Ptrofs.zero) addr args' = Some v'
-  /\ vagree v v' nv.
-*)
-
 Lemma needs_of_operation_sound:
   forall op args v nv args',
   eval_operation ge (Vptr sp Ptrofs.zero) op args m = Some v ->
@@ -205,37 +178,42 @@ Lemma needs_of_operation_sound:
 Proof.
   unfold needs_of_operation; intros; destruct op; try (eapply default_needs_of_operation_sound; eauto; fail);
   simpl in *; FuncInv; InvAgree; TrivialExists.
-- apply sign_ext_sound; auto. compute; auto.
-- apply zero_ext_sound; auto. lia.
-- apply sign_ext_sound; auto. compute; auto.
-- apply zero_ext_sound; auto. lia.
+- apply sign_ext_sound; auto. compute; auto. 
+- apply sign_ext_sound; auto. compute; auto. 
+- apply add_sound; auto.
+- apply add_sound; auto with na.
 - apply neg_sound; auto.
 - apply mul_sound; auto.
-- apply mul_sound; auto with na.
 - apply and_sound; auto.
 - apply andimm_sound; auto.
 - apply or_sound; auto.
 - apply orimm_sound; auto.
 - apply xor_sound; auto.
 - apply xor_sound; auto with na.
-- apply notint_sound; auto.
 - apply shlimm_sound; auto.
 - apply shrimm_sound; auto.
 - apply shruimm_sound; auto.
-- apply ror_sound; auto.
-- eapply needs_of_addressing_32_sound; eauto.
-- change (eval_addressing64 ge (Vptr sp Ptrofs.zero) a args')
-    with (eval_operation ge (Vptr sp Ptrofs.zero) (Oleal a) args' m').
-  eapply default_needs_of_operation_sound; eauto.
-  destruct a; simpl in H0; auto.
-- destruct (eval_condition cond args m) as [b|] eqn:EC; simpl in H2.
-  erewrite needs_of_condition_sound by eauto.
-  subst v; simpl. auto with na.
-  subst v; auto with na.
-- destruct (eval_condition c args m) as [b|] eqn:EC.
-  erewrite needs_of_condition_sound by eauto.
-  apply select_sound; auto.
-  simpl; auto with na.
+- fold (Val.and (Vint n) v0);
+  fold (Val.and (Vint n) v2);
+  rewrite (Val.and_commut (Vint n) v0);
+  rewrite (Val.and_commut (Vint n) v2);
+  apply andimm_sound; auto.
+- fold (Val.or (Vint n) v0);
+  fold (Val.or (Vint n) v2);
+  rewrite (Val.or_commut (Vint n) v0);
+  rewrite (Val.or_commut (Vint n) v2);
+  apply orimm_sound; auto.
+- apply xor_sound; auto with na.
+- (* selectl *)
+  unfold ExtValues.select01_long.
+  destruct v0; auto with na.
+  assert (Val.lessdef (Vint i) v4) as LESSDEF by auto with na.
+  inv LESSDEF.
+  destruct (Int.eq i Int.one).
+  { apply normalize_sound; auto. }
+  destruct (Int.eq i Int.zero).
+  { apply normalize_sound; auto. }
+  cbn. auto with na.
 Qed.
 
 Lemma operation_is_redundant_sound:
@@ -247,13 +225,9 @@ Lemma operation_is_redundant_sound:
 Proof.
   intros. destruct op; simpl in *; try discriminate; inv H1; FuncInv; subst.
 - apply sign_ext_redundant_sound; auto. lia.
-- apply zero_ext_redundant_sound; auto. lia.
 - apply sign_ext_redundant_sound; auto. lia.
-- apply zero_ext_redundant_sound; auto. lia.
 - apply andimm_redundant_sound; auto.
 - apply orimm_redundant_sound; auto.
 Qed.
 
 End SOUNDNESS.
-
-
diff --git a/verilog/Op.v b/verilog/Op.v
index caa63235..9f94828f 100644
--- a/verilog/Op.v
+++ b/verilog/Op.v
@@ -3,11 +3,16 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
 (** Operators and addressing modes.  The abstract syntax and dynamic
@@ -17,55 +22,66 @@
 - [operation]: arithmetic and logical operations;
 - [addressing]: addressing modes for load and store operations.
 
-  These types are X86-64-specific and correspond roughly to what the
+  These types are processor-specific and correspond roughly to what the
   processor can compute in one instruction.  In other terms, these
   types reflect the state of the program after instruction selection.
   For a processor-independent set of operations, see the abstract
   syntax and dynamic semantics of the Cminor language.
 *)
-Require Import BoolEqual.
-Require Import Coqlib.
-Require Import AST.
-Require Import Integers.
-Require Import Floats.
-Require Import Values.
-Require Import Memory.
-Require Import Globalenvs.
-Require Import Events.
+
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require ExtValues.
 
 Set Implicit Arguments.
 
 (** Conditions (boolean-valued operators). *)
 
+(** Type to modelize the use of a special register in arith operations *)
+
+Inductive oreg: Type :=
+  | X0_L: oreg
+  | X0_R: oreg.
+
 Inductive condition : Type :=
-  | Ccomp (c: comparison)      (**r signed integer comparison *)
-  | Ccompu (c: comparison)     (**r unsigned integer comparison *)
+  | Ccomp (c: comparison)       (**r signed integer comparison *)
+  | Ccompu (c: comparison)      (**r unsigned integer comparison *)
   | Ccompimm (c: comparison) (n: int) (**r signed integer comparison with a constant *)
   | Ccompuimm (c: comparison) (n: int)  (**r unsigned integer comparison with a constant *)
   | Ccompl (c: comparison)      (**r signed 64-bit integer comparison *)
   | Ccomplu (c: comparison)     (**r unsigned 64-bit integer comparison *)
   | Ccomplimm (c: comparison) (n: int64) (**r signed 64-bit integer comparison with a constant *)
   | Ccompluimm (c: comparison) (n: int64)  (**r unsigned 64-bit integer comparison with a constant *)
-  | Ccompf (c: comparison)     (**r 64-bit floating-point comparison *)
-  | Cnotcompf (c: comparison)  (**r negation of a floating-point comparison *)
-  | Ccompfs (c: comparison)    (**r 32-bit floating-point comparison *)
-  | Cnotcompfs (c: comparison) (**r negation of a floating-point comparison *)
-  | Cmaskzero (n: int)         (**r test [(arg & constant) == 0] *)
-  | Cmasknotzero (n: int).     (**r test [(arg & constant) != 0] *)
-
-(** Addressing modes.  [r1], [r2], etc, are the arguments to the
-  addressing. *)
-
-Inductive addressing: Type :=
-  | Aindexed: Z -> addressing       (**r Address is [r1 + offset] *)
-  | Aindexed2: Z -> addressing      (**r Address is [r1 + r2 + offset] *)
-  | Ascaled: Z -> Z -> addressing   (**r Address is [r1 * scale + offset] *)
-  | Aindexed2scaled: Z -> Z -> addressing
-                                    (**r Address is [r1 + r2 * scale + offset] *)
-  | Aglobal: ident -> ptrofs -> addressing (**r Address is [symbol + offset] *)
-  | Abased: ident -> ptrofs -> addressing  (**r Address is [symbol + offset + r1] *)
-  | Abasedscaled: Z -> ident -> ptrofs -> addressing  (**r Address is [symbol + offset + r1 * scale] *)
-  | Ainstack: ptrofs -> addressing. (**r Address is [stack_pointer + offset] *)
+  | Ccompf (c: comparison)      (**r 64-bit floating-point comparison *)
+  | Cnotcompf (c: comparison)   (**r negation of a floating-point comparison *)
+  | Ccompfs (c: comparison)     (**r 32-bit floating-point comparison *)
+  | Cnotcompfs (c: comparison)  (**r negation of a floating-point comparison *)
+  (* Expansed branches *)
+  | CEbeqw (optR: option oreg)                      (**r branch-if-equal signed *)
+  | CEbnew (optR: option oreg)                      (**r branch-if-not-equal signed *)
+  | CEbequw (optR: option oreg)                     (**r branch-if-equal unsigned *)
+  | CEbneuw (optR: option oreg)                     (**r branch-if-not-equal unsigned *)
+  | CEbltw (optR: option oreg)                      (**r branch-if-less signed *)
+  | CEbltuw (optR: option oreg)                     (**r branch-if-less unsigned *)
+  | CEbgew (optR: option oreg)                      (**r branch-if-greater-or-equal signed *)
+  | CEbgeuw (optR: option oreg)                     (**r branch-if-greater-or-equal unsigned *)
+  | CEbeql (optR: option oreg)                      (**r branch-if-equal signed *)
+  | CEbnel (optR: option oreg)                      (**r branch-if-not-equal signed *)
+  | CEbequl (optR: option oreg)                     (**r branch-if-equal unsigned *)
+  | CEbneul (optR: option oreg)                     (**r branch-if-not-equal unsigned *)
+  | CEbltl (optR: option oreg)                      (**r branch-if-less signed *)
+  | CEbltul (optR: option oreg)                     (**r branch-if-less unsigned *)
+  | CEbgel (optR: option oreg)                      (**r branch-if-greater-or-equal signed *)
+  | CEbgeul (optR: option oreg).                    (**r branch-if-greater-or-equal unsigned *)
+
+(* This type will define the eval function of a OEmayundef operation. *)
+
+Inductive mayundef: Type :=
+  | MUint: mayundef
+  | MUlong: mayundef
+  | MUshrx: int -> mayundef
+  | MUshrxl: int -> mayundef.
 
 (** Arithmetic and logical operations.  In the descriptions, [rd] is the
   result of the operation and [r1], [r2], etc, are the arguments. *)
@@ -76,16 +92,16 @@ Inductive operation : Type :=
   | Olongconst (n: int64)    (**r [rd] is set to the given integer constant *)
   | Ofloatconst (n: float)   (**r [rd] is set to the given float constant *)
   | Osingleconst (n: float32)(**r [rd] is set to the given float constant *)
-  | Oindirectsymbol (id: ident) (**r [rd] is set to the address of the symbol *)
+  | Oaddrsymbol (id: ident) (ofs: ptrofs)  (**r [rd] is set to the address of the symbol plus the given offset *)
+  | Oaddrstack (ofs: ptrofs) (**r [rd] is set to the stack pointer plus the given offset *)
 (*c 32-bit integer arithmetic: *)
   | Ocast8signed             (**r [rd] is 8-bit sign extension of [r1] *)
-  | Ocast8unsigned           (**r [rd] is 8-bit zero extension of [r1] *)
   | Ocast16signed            (**r [rd] is 16-bit sign extension of [r1] *)
-  | Ocast16unsigned          (**r [rd] is 16-bit zero extension of [r1] *)
-  | Oneg                     (**r [rd = - r1] *)
+  | Oadd                     (**r [rd = r1 + r2] *)
+  | Oaddimm (n: int)         (**r [rd = r1 + n] *)
+  | Oneg                     (**r [rd = - r1]   *)                     
   | Osub                     (**r [rd = r1 - r2] *)
   | Omul                     (**r [rd = r1 * r2] *)
-  | Omulimm (n: int)         (**r [rd = r1 * n] *)
   | Omulhs                   (**r [rd = high part of r1 * r2, signed] *)
   | Omulhu                   (**r [rd = high part of r1 * r2, unsigned] *)
   | Odiv                     (**r [rd = r1 / r2] (signed) *)
@@ -98,28 +114,24 @@ Inductive operation : Type :=
   | Oorimm (n: int)          (**r [rd = r1 | n] *)
   | Oxor                     (**r [rd = r1 ^ r2] *)
   | Oxorimm (n: int)         (**r [rd = r1 ^ n] *)
-  | Onot                     (**r [rd = ~r1] *)
   | Oshl                     (**r [rd = r1 << r2] *)
   | Oshlimm (n: int)         (**r [rd = r1 << n] *)
   | Oshr                     (**r [rd = r1 >> r2] (signed) *)
   | Oshrimm (n: int)         (**r [rd = r1 >> n] (signed) *)
-  | Oshrximm (n: int)        (**r [rd = r1 / 2^n] (signed) *)
   | Oshru                    (**r [rd = r1 >> r2] (unsigned) *)
   | Oshruimm (n: int)        (**r [rd = r1 >> n] (unsigned) *)
-  | Ororimm (n: int)         (**r rotate right immediate *)
-  | Oshldimm (n: int)        (**r [rd = r1 << n | r2 >> (32-n)] *)
-  | Olea (a: addressing)     (**r effective address *)
+  | Oshrximm (n: int)        (**r [rd = r1 / 2^n] (signed) *)
 (*c 64-bit integer arithmetic: *)
   | Omakelong                (**r [rd = r1 << 32 | r2] *)
   | Olowlong                 (**r [rd = low-word(r1)] *)
   | Ohighlong                (**r [rd = high-word(r1)] *)
   | Ocast32signed            (**r [rd] is 32-bit sign extension of [r1] *)
   | Ocast32unsigned          (**r [rd] is 32-bit zero extension of [r1] *)
-  | Onegl                    (**r [rd = - r1] *)
+  | Oaddl                    (**r [rd = r1 + r2] *)
   | Oaddlimm (n: int64)      (**r [rd = r1 + n] *)
+  | Onegl                    (**r [rd = - r1] *)
   | Osubl                    (**r [rd = r1 - r2] *)
   | Omull                    (**r [rd = r1 * r2] *)
-  | Omullimm (n: int64)      (**r [rd = r1 * n] *)
   | Omullhs                  (**r [rd = high part of r1 * r2, signed] *)
   | Omullhu                  (**r [rd = high part of r1 * r2, unsigned] *)
   | Odivl                    (**r [rd = r1 / r2] (signed) *)
@@ -132,16 +144,13 @@ Inductive operation : Type :=
   | Oorlimm (n: int64)       (**r [rd = r1 | n] *)
   | Oxorl                    (**r [rd = r1 ^ r2] *)
   | Oxorlimm (n: int64)      (**r [rd = r1 ^ n] *)
-  | Onotl                    (**r [rd = ~r1] *)
   | Oshll                    (**r [rd = r1 << r2] *)
   | Oshllimm (n: int)        (**r [rd = r1 << n] *)
   | Oshrl                    (**r [rd = r1 >> r2] (signed) *)
   | Oshrlimm (n: int)        (**r [rd = r1 >> n] (signed) *)
-  | Oshrxlimm (n: int)       (**r [rd = r1 / 2^n] (signed) *)
   | Oshrlu                   (**r [rd = r1 >> r2] (unsigned) *)
   | Oshrluimm (n: int)       (**r [rd = r1 >> n] (unsigned) *)
-  | Ororlimm (n: int)        (**r rotate right immediate *)
-  | Oleal (a: addressing)    (**r effective address *)
+  | Oshrxlimm (n: int)       (**r [rd = r1 / 2^n] (signed) *)
 (*c Floating-point arithmetic: *)
   | Onegf                    (**r [rd = - r1] *)
   | Oabsf                    (**r [rd = abs(r1)] *)
@@ -159,84 +168,150 @@ Inductive operation : Type :=
   | Ofloatofsingle           (**r [rd] is [r1] extended to double-precision float *)
 (*c Conversions between int and float: *)
   | Ointoffloat              (**r [rd = signed_int_of_float64(r1)] *)
+  | Ointuoffloat             (**r [rd = unsigned_int_of_float64(r1)] *)
   | Ofloatofint              (**r [rd = float64_of_signed_int(r1)] *)
+  | Ofloatofintu             (**r [rd = float64_of_unsigned_int(r1)] *)
   | Ointofsingle             (**r [rd = signed_int_of_float32(r1)] *)
+  | Ointuofsingle            (**r [rd = unsigned_int_of_float32(r1)] *)
   | Osingleofint             (**r [rd = float32_of_signed_int(r1)] *)
+  | Osingleofintu            (**r [rd = float32_of_unsigned_int(r1)] *)
   | Olongoffloat             (**r [rd = signed_long_of_float64(r1)] *)
+  | Olonguoffloat            (**r [rd = unsigned_long_of_float64(r1)] *)
   | Ofloatoflong             (**r [rd = float64_of_signed_long(r1)] *)
+  | Ofloatoflongu            (**r [rd = float64_of_unsigned_long(r1)] *)
   | Olongofsingle            (**r [rd = signed_long_of_float32(r1)] *)
+  | Olonguofsingle           (**r [rd = unsigned_long_of_float32(r1)] *)
   | Osingleoflong            (**r [rd = float32_of_signed_long(r1)] *)
+  | Osingleoflongu           (**r [rd = float32_of_unsigned_int(r1)] *)
 (*c Boolean tests: *)
-  | Ocmp (cond: condition)  (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
-  | Osel: condition -> typ -> operation.
-                             (**r [rd = rs1] if condition holds, [rd = rs2] otherwise. *)
+  | Ocmp (cond: condition)   (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
+  (* Expansed conditions *)
+  | OEseqw (optR: option oreg)                  (**r [rd <- rs1 == rs2] signed *)
+  | OEsnew (optR: option oreg)                  (**r [rd <- rs1 != rs2] signed *)
+  | OEsequw (optR: option oreg)                 (**r [rd <- rs1 == rs2] unsigned *)
+  | OEsneuw (optR: option oreg)                 (**r [rd <- rs1 != rs2] unsigned *)
+  | OEsltw (optR: option oreg)                  (**r set-less-than *)
+  | OEsltuw (optR: option oreg)                 (**r set-less-than unsigned *)
+  | OEsltiw (n: int)                            (**r set-less-than immediate *)
+  | OEsltiuw (n: int)                           (**r set-less-than unsigned immediate *)
+  | OEaddiw (optR: option oreg) (n: int)        (**r add immediate *)
+  | OEandiw (n: int)                            (**r and immediate *)
+  | OEoriw (n: int)                             (**r or immediate *)
+  | OExoriw (n: int)                            (**r xor immediate *)
+  | OEluiw (n: int)                             (**r load upper-immediate *)
+  | OEseql (optR: option oreg)                  (**r [rd <- rs1 == rs2] signed *)
+  | OEsnel (optR: option oreg)                  (**r [rd <- rs1 != rs2] signed *)
+  | OEsequl (optR: option oreg)                 (**r [rd <- rs1 == rs2] unsigned *)
+  | OEsneul (optR: option oreg)                 (**r [rd <- rs1 != rs2] unsigned *)
+  | OEsltl (optR: option oreg)                  (**r set-less-than *)
+  | OEsltul (optR: option oreg)                 (**r set-less-than unsigned *)
+  | OEsltil (n: int64)                          (**r set-less-than immediate *)
+  | OEsltiul (n: int64)                         (**r set-less-than unsigned immediate *)
+  | OEaddil (optR: option oreg) (n: int64)      (**r add immediate *)
+  | OEandil (n: int64)                          (**r and immediate *)
+  | OEoril (n: int64)                           (**r or immediate *)
+  | OExoril (n: int64)                          (**r xor immediate *)
+  | OEluil (n: int64)                           (**r load upper-immediate *)
+  | OEloadli (n: int64)                         (**r load an immediate int64 *)
+  | OEmayundef (mu: mayundef)
+  | OEfeqd                                      (**r compare equal *)
+  | OEfltd                                      (**r compare less-than *)
+  | OEfled                                      (**r compare less-than/equal *)
+  | OEfeqs                                      (**r compare equal *)
+  | OEflts                                      (**r compare less-than *)
+  | OEfles                                      (**r compare less-than/equal *)
+  | Obits_of_single
+  | Obits_of_float
+  | Osingle_of_bits
+  | Ofloat_of_bits
+  | Oselectl.
+
+(** Addressing modes.  [r1], [r2], etc, are the arguments to the
+  addressing. *)
+
+Inductive addressing: Type :=
+  | Aindexed: ptrofs -> addressing    (**r Address is [r1 + offset] *)
+  | Aglobal: ident -> ptrofs -> addressing  (**r Address is global plus offset *)
+  | Ainstack: ptrofs -> addressing.   (**r Address is [stack_pointer + offset] *)
 
 (** Comparison functions (used in modules [CSE] and [Allocation]). *)
 
+Definition oreg_eq: forall (x y: oreg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
 Definition eq_condition (x y: condition) : {x=y} + {x<>y}.
 Proof.
-  generalize Int.eq_dec Int64.eq_dec; intro.
+  generalize Int.eq_dec Int64.eq_dec bool_dec oreg_eq; intros.
   assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality.
   decide equality.
+  all: destruct optR, optR0; decide equality.
 Defined.
 
 Definition eq_addressing (x y: addressing) : {x=y} + {x<>y}.
 Proof.
-  generalize ident_eq Ptrofs.eq_dec zeq; intros.
+  generalize ident_eq Ptrofs.eq_dec; intros.
   decide equality.
 Defined.
 
+Definition eq_operation: forall (x y: operation), {x=y} + {x<>y}.
+Proof.
+  generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec ident_eq eq_condition bool_dec Val.eq oreg_eq; intros.
+  decide equality.
+  all: try destruct optR, optR0; try decide equality.
+Defined.
+
+(* Alternate definition: 
 Definition beq_operation: forall (x y: operation), bool.
 Proof.
-  generalize Int.eq_dec Int64.eq_dec Float.eq_dec Float32.eq_dec ident_eq typ_eq eq_addressing eq_condition; boolean_equality.
+  generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec ident_eq eq_condition; boolean_equality.
 Defined.
 
 Definition eq_operation: forall (x y: operation), {x=y} + {x<>y}.
 Proof.
   decidable_equality_from beq_operation.
 Defined.
+*)
 
 Global Opaque eq_condition eq_addressing eq_operation.
 
-(** In addressing modes, offsets are 32-bit signed integers, even in
-    64-bit mode.  The following function checks that an addressing
-    mode is valid, i.e. that the offsets are in range.
-    The check always succeeds in 32-bit mode because offsets are
-    always 32-bit integers and are normalized as 32-bit signed integers
-    during code generation (see [Asmgen.normalize_addrmode_32]).
-
-    Moreover, in 64-bit mode, we use RIP-relative addressing for
-    access to globals.  (This is the "small code model" from the
-    x86_64 ELF ABI.)  Thus, for addressing global variables,
-    the offset from the variable plus the RIP-relative offset
-    must fit in 32 bits.  The "small code model" guarantees that
-    this will fit if the offset is between [-2^24] and [2^24-1],
-    under the assumption that no global variable is bigger than
-    [2^24] bytes. *)
-
-Definition offset_in_range (n: Z) : bool := 
-  zle Int.min_signed n && zle n Int.max_signed.
-
-Definition ptroffset_min := -16777216.      (**r [-2^24] *)
-Definition ptroffset_max := 16777215.       (**r [2^24 - 1] *)
-
-Definition ptroffset_in_range (n: ptrofs) : bool := 
-  let n := Ptrofs.signed n in zle ptroffset_min n && zle n ptroffset_max.
-
-Definition addressing_valid (a: addressing) : bool :=
-  if Archi.ptr64 then
-    match a with
-    | Aindexed n => offset_in_range n
-    | Aindexed2 n => offset_in_range n
-    | Ascaled sc ofs => offset_in_range ofs
-    | Aindexed2scaled sc ofs => offset_in_range ofs
-    | Aglobal s ofs => ptroffset_in_range ofs
-    | Abased s ofs => ptroffset_in_range ofs
-    | Abasedscaled sc s ofs => ptroffset_in_range ofs
-    | Ainstack ofs => offset_in_range (Ptrofs.signed ofs)
-    end
-  else true.
+(** Generic function to evaluate an instruction according to the given specific register *)
+  
+Definition zero32 := (Vint Int.zero).
+Definition zero64 := (Vlong Int64.zero).
   
+Definition apply_bin_oreg {B} (optR: option oreg) (sem: val -> val -> B) (v1 v2 vz: val): B :=
+  match optR with
+  | None => sem v1 v2
+  | Some X0_L => sem vz v1
+  | Some X0_R => sem v1 vz
+  end.
+
+(** Mayundef evaluation according to the above defined type *)
+
+Definition eval_may_undef (mu: mayundef) (v1 v2: val): val :=
+  match mu with
+  | MUint => match v1, v2 with
+             | Vint _, Vint _ => v2
+             | _, _ => Vundef
+             end
+  | MUlong => match v1, v2 with
+              | Vlong _, Vint _ => v2
+              | _, _ => Vundef
+              end
+  | MUshrx i =>
+      match v1, v2 with
+      | Vint _, Vint _ =>
+          if Int.ltu i (Int.repr 31) then v2 else Vundef
+      | _, _ => Vundef
+      end
+  | MUshrxl i =>
+      match v1, v2 with
+      | Vlong _, Vlong _ =>
+          if Int.ltu i (Int.repr 63) then v2 else Vundef
+      | _, _ => Vundef
+      end
+  end.
+
 (** * Evaluation functions *)
 
 (** Evaluation of conditions, operators and addressing modes applied
@@ -258,60 +333,34 @@ Definition eval_condition (cond: condition) (vl: list val) (m: mem): option bool
   | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2)
   | Ccompfs c, v1 :: v2 :: nil => Val.cmpfs_bool c v1 v2
   | Cnotcompfs c, v1 :: v2 :: nil => option_map negb (Val.cmpfs_bool c v1 v2)
-  | Cmaskzero n, v1 :: nil => Val.maskzero_bool v1 n
-  | Cmasknotzero n, v1 :: nil => option_map negb (Val.maskzero_bool v1 n)
+  (* Expansed branches *)
+  | CEbeqw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmp_bool Ceq) v1 v2 zero32
+  | CEbnew optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmp_bool Cne) v1 v2 zero32
+  | CEbequw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpu_bool (Mem.valid_pointer m) Ceq) v1 v2 zero32
+  | CEbneuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpu_bool (Mem.valid_pointer m) Cne) v1 v2 zero32
+  | CEbltw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmp_bool Clt) v1 v2 zero32
+  | CEbltuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpu_bool (Mem.valid_pointer m) Clt) v1 v2 zero32
+  | CEbgew optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmp_bool Cge) v1 v2 zero32
+  | CEbgeuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpu_bool (Mem.valid_pointer m) Cge) v1 v2 zero32
+  | CEbeql optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpl_bool Ceq) v1 v2 zero64
+  | CEbnel optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpl_bool Cne) v1 v2 zero64
+  | CEbequl optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmplu_bool (Mem.valid_pointer m) Ceq) v1 v2 zero64
+  | CEbneul optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmplu_bool (Mem.valid_pointer m) Cne) v1 v2 zero64
+  | CEbltl optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpl_bool Clt) v1 v2 zero64
+  | CEbltul optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmplu_bool (Mem.valid_pointer m) Clt) v1 v2 zero64
+  | CEbgel optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmpl_bool Cge) v1 v2 zero64
+  | CEbgeul optR, v1 :: v2 :: nil => apply_bin_oreg optR (Val.cmplu_bool (Mem.valid_pointer m) Cge) v1 v2 zero64
   | _, _ => None
   end.
 
-Definition eval_addressing32
-    (F V: Type) (genv: Genv.t F V) (sp: val)
-    (addr: addressing) (vl: list val) : option val :=
-  match addr, vl with
-  | Aindexed n, v1::nil =>
-      Some (Val.add v1 (Vint (Int.repr n)))
-  | Aindexed2 n, v1::v2::nil =>
-      Some (Val.add (Val.add v1 v2) (Vint (Int.repr n)))
-  | Ascaled sc ofs, v1::nil =>
-      Some (Val.add (Val.mul v1 (Vint (Int.repr sc))) (Vint (Int.repr ofs)))
-  | Aindexed2scaled sc ofs, v1::v2::nil =>
-      Some(Val.add v1 (Val.add (Val.mul v2 (Vint (Int.repr sc))) (Vint (Int.repr ofs))))
-  | Aglobal s ofs, nil =>
-      if Archi.ptr64 then None else Some (Genv.symbol_address genv s ofs)
-  | Abased s ofs, v1::nil =>
-      if Archi.ptr64 then None else Some (Val.add (Genv.symbol_address genv s ofs) v1)
-  | Abasedscaled sc s ofs, v1::nil =>
-      if Archi.ptr64 then None else Some (Val.add (Genv.symbol_address genv s ofs) (Val.mul v1 (Vint (Int.repr sc))))
-  | Ainstack ofs, nil =>
-      if Archi.ptr64 then None else Some(Val.offset_ptr sp ofs)
-  | _, _ => None
-  end.
+(** Assert sp is a pointer *)
 
-Definition eval_addressing64
-    (F V: Type) (genv: Genv.t F V) (sp: val)
-    (addr: addressing) (vl: list val) : option val :=
-  match addr, vl with
-  | Aindexed n, v1::nil =>
-      Some (Val.addl v1 (Vlong (Int64.repr n)))
-  | Aindexed2 n, v1::v2::nil =>
-      Some (Val.addl (Val.addl v1 v2) (Vlong (Int64.repr n)))
-  | Ascaled sc ofs, v1::nil =>
-      Some (Val.addl (Val.mull v1 (Vlong (Int64.repr sc))) (Vlong (Int64.repr ofs)))
-  | Aindexed2scaled sc ofs, v1::v2::nil =>
-      Some(Val.addl v1 (Val.addl (Val.mull v2 (Vlong (Int64.repr sc))) (Vlong (Int64.repr ofs))))
-  | Aglobal s ofs, nil =>
-      if Archi.ptr64 then Some (Genv.symbol_address genv s ofs) else None
-  | Ainstack ofs, nil =>
-      if Archi.ptr64 then Some(Val.offset_ptr sp ofs) else None
-  | _, _ => None
+Definition get_sp sp :=
+  match sp with
+  | Vptr _ _ => sp
+  | _ => Vundef
   end.
 
-Definition eval_addressing
-    (F V: Type) (genv: Genv.t F V) (sp: val)
-    (addr: addressing) (vl: list val) : option val :=
-  if Archi.ptr64
-  then eval_addressing64 genv sp addr vl
-  else eval_addressing32 genv sp addr vl.
-
 Definition eval_operation
     (F V: Type) (genv: Genv.t F V) (sp: val)
     (op: operation) (vl: list val) (m: mem): option val :=
@@ -321,126 +370,169 @@ Definition eval_operation
   | Olongconst n, nil => Some (Vlong n)
   | Ofloatconst n, nil => Some (Vfloat n)
   | Osingleconst n, nil => Some (Vsingle n)
-  | Oindirectsymbol id, nil => Some (Genv.symbol_address genv id Ptrofs.zero)
+  | Oaddrsymbol s ofs, nil => Some (Genv.symbol_address genv s ofs)
+  | Oaddrstack ofs, nil => Some (Val.offset_ptr sp ofs)
   | Ocast8signed, v1 :: nil => Some (Val.sign_ext 8 v1)
-  | Ocast8unsigned, v1 :: nil => Some (Val.zero_ext 8 v1)
   | Ocast16signed, v1 :: nil => Some (Val.sign_ext 16 v1)
-  | Ocast16unsigned, v1 :: nil => Some (Val.zero_ext 16 v1)
-  | Oneg, v1::nil => Some (Val.neg v1)
-  | Osub, v1::v2::nil => Some (Val.sub v1 v2)
-  | Omul, v1::v2::nil => Some (Val.mul v1 v2)
-  | Omulimm n, v1::nil => Some (Val.mul v1 (Vint n))
+  | Oadd, v1 :: v2 :: nil => Some (Val.add v1 v2)
+  | Oaddimm n, v1 :: nil => Some (Val.add v1 (Vint n))
+  | Oneg, v1 :: nil => Some (Val.neg v1)
+  | Osub, v1 :: v2 :: nil => Some (Val.sub v1 v2)
+  | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
   | Omulhs, v1::v2::nil => Some (Val.mulhs v1 v2)
   | Omulhu, v1::v2::nil => Some (Val.mulhu v1 v2)
-  | Odiv, v1::v2::nil => Val.divs v1 v2
-  | Odivu, v1::v2::nil => Val.divu v1 v2
-  | Omod, v1::v2::nil => Val.mods v1 v2
-  | Omodu, v1::v2::nil => Val.modu v1 v2
-  | Oand, v1::v2::nil => Some(Val.and v1 v2)
-  | Oandimm n, v1::nil => Some (Val.and v1 (Vint n))
-  | Oor, v1::v2::nil => Some(Val.or v1 v2)
-  | Oorimm n, v1::nil => Some (Val.or v1 (Vint n))
-  | Oxor, v1::v2::nil => Some(Val.xor v1 v2)
-  | Oxorimm n, v1::nil => Some (Val.xor v1 (Vint n))
-  | Onot, v1::nil => Some(Val.notint v1)
-  | Oshl, v1::v2::nil => Some (Val.shl v1 v2)
-  | Oshlimm n, v1::nil => Some (Val.shl v1 (Vint n))
-  | Oshr, v1::v2::nil => Some (Val.shr v1 v2)
-  | Oshrimm n, v1::nil => Some (Val.shr v1 (Vint n))
-  | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
-  | Oshru, v1::v2::nil => Some (Val.shru v1 v2)
-  | Oshruimm n, v1::nil => Some (Val.shru v1 (Vint n))
-  | Ororimm n, v1::nil => Some (Val.ror v1 (Vint n))
-  | Oshldimm n, v1::v2::nil => Some (Val.or (Val.shl v1 (Vint n))
-                                            (Val.shru v2 (Vint (Int.sub Int.iwordsize n))))
-  | Olea addr, _ => eval_addressing32 genv sp addr vl
-  | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
-  | Olowlong, v1::nil => Some(Val.loword v1)
-  | Ohighlong, v1::nil => Some(Val.hiword v1)
+  | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+  | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
+  | Omod, v1 :: v2 :: nil => Some (Val.maketotal (Val.mods v1 v2))
+  | Omodu, v1 :: v2 :: nil => Some (Val.maketotal (Val.modu v1 v2))
+  | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
+  | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
+  | Oor, v1 :: v2 :: nil => Some (Val.or v1 v2)
+  | Oorimm n, v1 :: nil => Some (Val.or v1 (Vint n))
+  | Oxor, v1 :: v2 :: nil => Some (Val.xor v1 v2)
+  | Oxorimm n, v1 :: nil => Some (Val.xor v1 (Vint n))
+  | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
+  | Oshlimm n, v1 :: nil => Some (Val.shl v1 (Vint n))
+  | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
+  | Oshrimm n, v1 :: nil => Some (Val.shr v1 (Vint n))
+  | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
+  | Oshruimm n, v1 :: nil => Some (Val.shru v1 (Vint n))
+  | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
+  | Omakelong, v1::v2::nil => Some (Val.longofwords v1 v2)
+  | Olowlong, v1::nil => Some (Val.loword v1)
+  | Ohighlong, v1::nil => Some (Val.hiword v1)
   | Ocast32signed, v1 :: nil => Some (Val.longofint v1)
   | Ocast32unsigned, v1 :: nil => Some (Val.longofintu v1)
-  | Onegl, v1::nil => Some (Val.negl v1)
+  | Oaddl, v1 :: v2 :: nil => Some (Val.addl v1 v2)
   | Oaddlimm n, v1::nil => Some (Val.addl v1 (Vlong n))
+  | Onegl, v1::nil => Some (Val.negl v1)
   | Osubl, v1::v2::nil => Some (Val.subl v1 v2)
   | Omull, v1::v2::nil => Some (Val.mull v1 v2)
-  | Omullimm n, v1::nil => Some (Val.mull v1 (Vlong n))
   | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
   | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
-  | Odivl, v1::v2::nil => Val.divls v1 v2
-  | Odivlu, v1::v2::nil => Val.divlu v1 v2
-  | Omodl, v1::v2::nil => Val.modls v1 v2
-  | Omodlu, v1::v2::nil => Val.modlu v1 v2
+  | Odivl, v1::v2::nil => Some (Val.maketotal (Val.divls v1 v2))
+  | Odivlu, v1::v2::nil => Some (Val.maketotal (Val.divlu v1 v2))
+  | Omodl, v1::v2::nil => Some (Val.maketotal (Val.modls v1 v2))
+  | Omodlu, v1::v2::nil => Some (Val.maketotal (Val.modlu v1 v2))
   | Oandl, v1::v2::nil => Some(Val.andl v1 v2)
   | Oandlimm n, v1::nil => Some (Val.andl v1 (Vlong n))
   | Oorl, v1::v2::nil => Some(Val.orl v1 v2)
   | Oorlimm n, v1::nil => Some (Val.orl v1 (Vlong n))
   | Oxorl, v1::v2::nil => Some(Val.xorl v1 v2)
   | Oxorlimm n, v1::nil => Some (Val.xorl v1 (Vlong n))
-  | Onotl, v1::nil => Some(Val.notl v1)
   | Oshll, v1::v2::nil => Some (Val.shll v1 v2)
   | Oshllimm n, v1::nil => Some (Val.shll v1 (Vint n))
   | Oshrl, v1::v2::nil => Some (Val.shrl v1 v2)
   | Oshrlimm n, v1::nil => Some (Val.shrl v1 (Vint n))
-  | Oshrxlimm n, v1::nil => Val.shrxl v1 (Vint n)
   | Oshrlu, v1::v2::nil => Some (Val.shrlu v1 v2)
   | Oshrluimm n, v1::nil => Some (Val.shrlu v1 (Vint n))
-  | Ororlimm n, v1::nil => Some (Val.rorl v1 (Vint n))
-  | Oleal addr, _ => eval_addressing64 genv sp addr vl
-  | Onegf, v1::nil => Some(Val.negf v1)
-  | Oabsf, v1::nil => Some(Val.absf v1)
-  | Oaddf, v1::v2::nil => Some(Val.addf v1 v2)
-  | Osubf, v1::v2::nil => Some(Val.subf v1 v2)
-  | Omulf, v1::v2::nil => Some(Val.mulf v1 v2)
-  | Odivf, v1::v2::nil => Some(Val.divf v1 v2)
-  | Onegfs, v1::nil => Some(Val.negfs v1)
-  | Oabsfs, v1::nil => Some(Val.absfs v1)
-  | Oaddfs, v1::v2::nil => Some(Val.addfs v1 v2)
-  | Osubfs, v1::v2::nil => Some(Val.subfs v1 v2)
-  | Omulfs, v1::v2::nil => Some(Val.mulfs v1 v2)
-  | Odivfs, v1::v2::nil => Some(Val.divfs v1 v2)
-  | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
-  | Ofloatofsingle, v1::nil => Some(Val.floatofsingle v1)
-  | Ointoffloat, v1::nil => Val.intoffloat v1
-  | Ofloatofint, v1::nil => Val.floatofint v1
-  | Ointofsingle, v1::nil => Val.intofsingle v1
-  | Osingleofint, v1::nil => Val.singleofint v1
-  | Olongoffloat, v1::nil => Val.longoffloat v1
-  | Ofloatoflong, v1::nil => Val.floatoflong v1
-  | Olongofsingle, v1::nil => Val.longofsingle v1
-  | Osingleoflong, v1::nil => Val.singleoflong v1
-  | Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m))
-  | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
+  | Oshrxlimm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
+  | Onegf, v1::nil => Some (Val.negf v1)
+  | Oabsf, v1::nil => Some (Val.absf v1)
+  | Oaddf, v1::v2::nil => Some (Val.addf v1 v2)
+  | Osubf, v1::v2::nil => Some (Val.subf v1 v2)
+  | Omulf, v1::v2::nil => Some (Val.mulf v1 v2)
+  | Odivf, v1::v2::nil => Some (Val.divf v1 v2)
+  | Onegfs, v1::nil => Some (Val.negfs v1)
+  | Oabsfs, v1::nil => Some (Val.absfs v1)
+  | Oaddfs, v1::v2::nil => Some (Val.addfs v1 v2)
+  | Osubfs, v1::v2::nil => Some (Val.subfs v1 v2)
+  | Omulfs, v1::v2::nil => Some (Val.mulfs v1 v2)
+  | Odivfs, v1::v2::nil => Some (Val.divfs v1 v2)
+  | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
+  | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
+  | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+  | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+  | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+  | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+  | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+  | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+  | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+  | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+  | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+  | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+  | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+  | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+  | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+  | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+  | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+  | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
+  | Obits_of_single, v1::nil => Some (ExtValues.bits_of_single v1)
+  | Obits_of_float, v1::nil => Some (ExtValues.bits_of_float v1)
+  | Osingle_of_bits, v1::nil => Some (ExtValues.single_of_bits v1)
+  | Ofloat_of_bits, v1::nil => Some (ExtValues.float_of_bits v1)
+  | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
+  (* Expansed conditions *)
+  | OEseqw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmp Ceq) v1 v2 zero32)
+  | OEsnew optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmp Cne) v1 v2 zero32)
+  | OEsequw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m) Ceq) v1 v2 zero32)
+  | OEsneuw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m) Cne) v1 v2 zero32)
+  | OEsltw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmp Clt) v1 v2 zero32)
+  | OEsltuw optR, v1::v2::nil => Some (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m) Clt) v1 v2 zero32)
+  | OEsltiw n, v1::nil => Some (Val.cmp Clt v1 (Vint n))
+  | OEsltiuw n, v1::nil => Some (Val.cmpu (Mem.valid_pointer m) Clt v1 (Vint n))
+  | OExoriw n, v1::nil => Some (Val.xor v1 (Vint n))
+  | OEluiw n, nil => Some (Val.shl (Vint n) (Vint (Int.repr 12)))
+  | OEaddiw optR n, nil => Some (apply_bin_oreg optR Val.add (Vint n) Vundef zero32)
+  | OEaddiw optR n, v1::nil => Some (apply_bin_oreg optR Val.add v1 (Vint n) Vundef)
+  | OEandiw n, v1::nil => Some (Val.and (Vint n) v1)
+  | OEoriw n, v1::nil => Some (Val.or (Vint n) v1)
+  | OEseql optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmpl Ceq) v1 v2 zero64))
+  | OEsnel optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmpl Cne) v1 v2 zero64))
+  | OEsequl optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m) Ceq) v1 v2 zero64))
+  | OEsneul optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m) Cne) v1 v2 zero64))
+  | OEsltl optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmpl Clt) v1 v2 zero64))
+  | OEsltul optR, v1::v2::nil => Some (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m) Clt) v1 v2 zero64))
+  | OEsltil n, v1::nil => Some (Val.maketotal (Val.cmpl Clt v1 (Vlong n)))
+  | OEsltiul n, v1::nil => Some (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Clt v1 (Vlong n)))
+  | OExoril n, v1::nil => Some (Val.xorl v1 (Vlong n))
+  | OEluil n, nil => Some (Vlong (Int64.sign_ext 32 (Int64.shl n (Int64.repr 12))))
+  | OEaddil optR n, nil => Some (apply_bin_oreg optR Val.addl (Vlong n) Vundef zero64)
+  | OEaddil optR n, v1::nil => Some (apply_bin_oreg optR Val.addl v1 (Vlong n) Vundef)
+  | OEandil n, v1::nil => Some (Val.andl (Vlong n) v1)
+  | OEoril n, v1::nil => Some (Val.orl (Vlong n) v1)
+  | OEloadli n, nil => Some (Vlong n)
+  | OEmayundef mu, v1 :: v2 :: nil => Some (eval_may_undef mu v1 v2)
+  | OEfeqd, v1::v2::nil => Some (Val.cmpf Ceq v1 v2)
+  | OEfltd, v1::v2::nil => Some (Val.cmpf Clt v1 v2)
+  | OEfled, v1::v2::nil => Some (Val.cmpf Cle v1 v2)
+  | OEfeqs, v1::v2::nil => Some (Val.cmpfs Ceq v1 v2)
+  | OEflts, v1::v2::nil => Some (Val.cmpfs Clt v1 v2)
+  | OEfles, v1::v2::nil => Some (Val.cmpfs Cle v1 v2)
+  | Oselectl, vb::vt::vf::nil => Some (Val.normalize (ExtValues.select01_long vb vt vf) Tlong)
   | _, _ => None
   end.
 
-Remark eval_addressing_Aglobal:
-  forall (F V: Type) (genv: Genv.t F V) sp id ofs,
-  eval_addressing genv sp (Aglobal id ofs) nil = Some (Genv.symbol_address genv id ofs).
-Proof.
-  intros. unfold eval_addressing, eval_addressing32, eval_addressing64; destruct Archi.ptr64; auto.
-Qed.
+Definition eval_addressing
+    (F V: Type) (genv: Genv.t F V) (sp: val)
+    (addr: addressing) (vl: list val) : option val :=
+  match addr, vl with
+  | Aindexed n, v1 :: nil => Some (Val.offset_ptr v1 n)
+  | Aglobal s ofs, nil => Some (Genv.symbol_address genv s ofs)
+  | Ainstack n, nil => Some (Val.offset_ptr sp n)
+  | _, _ => None
+  end.
 
 Remark eval_addressing_Ainstack:
   forall (F V: Type) (genv: Genv.t F V) sp ofs,
   eval_addressing genv sp (Ainstack ofs) nil = Some (Val.offset_ptr sp ofs).
 Proof.
-  intros. unfold eval_addressing, eval_addressing32, eval_addressing64; destruct Archi.ptr64; auto.
+  intros. reflexivity.
 Qed.
 
 Remark eval_addressing_Ainstack_inv:
   forall (F V: Type) (genv: Genv.t F V) sp ofs vl v,
   eval_addressing genv sp (Ainstack ofs) vl = Some v -> vl = nil /\ v = Val.offset_ptr sp ofs.
 Proof.
-  unfold eval_addressing, eval_addressing32, eval_addressing64;
-  intros; destruct Archi.ptr64; destruct vl; inv H; auto.
+  unfold eval_addressing; intros; destruct vl; inv H; auto.
 Qed.
 
 Ltac FuncInv :=
   match goal with
   | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ =>
-      destruct x; simpl in H; FuncInv
+      destruct x; cbn in H; FuncInv
   | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
-      destruct v; simpl in H; FuncInv
+      destruct v; cbn in H; FuncInv
   | H: (if Archi.ptr64 then _ else _) = Some _ |- _ =>
       destruct Archi.ptr64 eqn:?; FuncInv
   | H: (Some _ = Some _) |- _ =>
@@ -467,25 +559,32 @@ Definition type_of_condition (c: condition) : list typ :=
   | Cnotcompf _ => Tfloat :: Tfloat :: nil
   | Ccompfs _ => Tsingle :: Tsingle :: nil
   | Cnotcompfs _ => Tsingle :: Tsingle :: nil
-  | Cmaskzero _ => Tint :: nil
-  | Cmasknotzero _ => Tint :: nil
+  | CEbeqw _ => Tint :: Tint :: nil
+  | CEbnew _ => Tint :: Tint :: nil
+  | CEbequw _ => Tint :: Tint :: nil
+  | CEbneuw _ => Tint :: Tint :: nil
+  | CEbltw _ => Tint :: Tint :: nil
+  | CEbltuw _ => Tint :: Tint :: nil
+  | CEbgew _ => Tint :: Tint :: nil
+  | CEbgeuw _ => Tint :: Tint :: nil
+  | CEbeql _ => Tlong :: Tlong :: nil
+  | CEbnel _ => Tlong :: Tlong :: nil
+  | CEbequl _ => Tlong :: Tlong :: nil
+  | CEbneul _ => Tlong :: Tlong :: nil
+  | CEbltl _ => Tlong :: Tlong :: nil
+  | CEbltul _ => Tlong :: Tlong :: nil
+  | CEbgel _ => Tlong :: Tlong :: nil
+  | CEbgeul _ => Tlong :: Tlong :: nil
   end.
 
-Definition type_of_addressing_gen (tyA: typ) (addr: addressing): list typ :=
-  match addr with
-  | Aindexed _ => tyA :: nil
-  | Aindexed2 _ => tyA :: tyA :: nil
-  | Ascaled _ _ => tyA :: nil
-  | Aindexed2scaled _ _ => tyA :: tyA :: nil
-  | Aglobal _ _ => nil
-  | Abased _ _ => tyA :: nil
-  | Abasedscaled _ _ _ => tyA :: nil
-  | Ainstack _ => nil
-  end.
+(** The type of mayundef and addsp is dynamic *)
 
-Definition type_of_addressing := type_of_addressing_gen Tptr.
-Definition type_of_addressing32 := type_of_addressing_gen Tint.
-Definition type_of_addressing64 := type_of_addressing_gen Tlong.
+Definition type_of_mayundef mu :=
+  match mu with
+  | MUint | MUshrx _ => (Tint :: Tint :: nil, Tint)
+  | MUlong => (Tlong :: Tint :: nil, Tint)
+  | MUshrxl _ => (Tlong :: Tlong :: nil, Tlong)
+  end.
 
 Definition type_of_operation (op: operation) : list typ * typ :=
   match op with
@@ -494,15 +593,15 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Olongconst _ => (nil, Tlong)
   | Ofloatconst f => (nil, Tfloat)
   | Osingleconst f => (nil, Tsingle)
-  | Oindirectsymbol _ => (nil, Tptr)
+  | Oaddrsymbol _ _ => (nil, Tptr)
+  | Oaddrstack _ => (nil, Tptr)
   | Ocast8signed => (Tint :: nil, Tint)
-  | Ocast8unsigned => (Tint :: nil, Tint)
   | Ocast16signed => (Tint :: nil, Tint)
-  | Ocast16unsigned => (Tint :: nil, Tint)
+  | Oadd => (Tint :: Tint :: nil, Tint)
+  | Oaddimm _ => (Tint :: nil, Tint)
   | Oneg => (Tint :: nil, Tint)
   | Osub => (Tint :: Tint :: nil, Tint)
   | Omul => (Tint :: Tint :: nil, Tint)
-  | Omulimm _ => (Tint :: nil, Tint)
   | Omulhs => (Tint :: Tint :: nil, Tint)
   | Omulhu => (Tint :: Tint :: nil, Tint)
   | Odiv => (Tint :: Tint :: nil, Tint)
@@ -515,27 +614,23 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Oorimm _ => (Tint :: nil, Tint)
   | Oxor => (Tint :: Tint :: nil, Tint)
   | Oxorimm _ => (Tint :: nil, Tint)
-  | Onot => (Tint :: nil, Tint)
   | Oshl => (Tint :: Tint :: nil, Tint)
   | Oshlimm _ => (Tint :: nil, Tint)
   | Oshr => (Tint :: Tint :: nil, Tint)
   | Oshrimm _ => (Tint :: nil, Tint)
-  | Oshrximm _ => (Tint :: nil, Tint)
   | Oshru => (Tint :: Tint :: nil, Tint)
   | Oshruimm _ => (Tint :: nil, Tint)
-  | Ororimm _ => (Tint :: nil, Tint)
-  | Oshldimm _ => (Tint :: Tint :: nil, Tint)
-  | Olea addr => (type_of_addressing32 addr, Tint)
+  | Oshrximm _ => (Tint :: nil, Tint)
   | Omakelong => (Tint :: Tint :: nil, Tlong)
   | Olowlong => (Tlong :: nil, Tint)
   | Ohighlong => (Tlong :: nil, Tint)
   | Ocast32signed => (Tint :: nil, Tlong)
   | Ocast32unsigned => (Tint :: nil, Tlong)
-  | Onegl => (Tlong :: nil, Tlong)
+  | Oaddl => (Tlong :: Tlong :: nil, Tlong)
   | Oaddlimm _ => (Tlong :: nil, Tlong)
+  | Onegl => (Tlong :: nil, Tlong)
   | Osubl => (Tlong :: Tlong :: nil, Tlong)
   | Omull => (Tlong :: Tlong :: nil, Tlong)
-  | Omullimm _ => (Tlong :: nil, Tlong)
   | Omullhs => (Tlong :: Tlong :: nil, Tlong)
   | Omullhu => (Tlong :: Tlong :: nil, Tlong)
   | Odivl => (Tlong :: Tlong :: nil, Tlong)
@@ -548,16 +643,13 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Oorlimm _ => (Tlong :: nil, Tlong)
   | Oxorl => (Tlong :: Tlong :: nil, Tlong)
   | Oxorlimm _ => (Tlong :: nil, Tlong)
-  | Onotl => (Tlong :: nil, Tlong)
   | Oshll => (Tlong :: Tint :: nil, Tlong)
   | Oshllimm _ => (Tlong :: nil, Tlong)
   | Oshrl => (Tlong :: Tint :: nil, Tlong)
   | Oshrlimm _ => (Tlong :: nil, Tlong)
-  | Oshrxlimm _ => (Tlong :: nil, Tlong)
   | Oshrlu => (Tlong :: Tint :: nil, Tlong)
   | Oshrluimm _ => (Tlong :: nil, Tlong)
-  | Ororlimm _ => (Tlong :: nil, Tlong)
-  | Oleal addr => (type_of_addressing64 addr, Tlong)
+  | Oshrxlimm _ => (Tlong :: nil, Tlong)
   | Onegf => (Tfloat :: nil, Tfloat)
   | Oabsf => (Tfloat :: nil, Tfloat)
   | Oaddf => (Tfloat :: Tfloat :: nil, Tfloat)
@@ -573,15 +665,70 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osingleoffloat => (Tfloat :: nil, Tsingle)
   | Ofloatofsingle => (Tsingle :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
+  | Ointuoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
+  | Ofloatofintu => (Tint :: nil, Tfloat)
   | Ointofsingle => (Tsingle :: nil, Tint)
+  | Ointuofsingle => (Tsingle :: nil, Tint)
   | Osingleofint => (Tint :: nil, Tsingle)
+  | Osingleofintu => (Tint :: nil, Tsingle)
   | Olongoffloat => (Tfloat :: nil, Tlong)
+  | Olonguoffloat => (Tfloat :: nil, Tlong)
   | Ofloatoflong => (Tlong :: nil, Tfloat)
+  | Ofloatoflongu => (Tlong :: nil, Tfloat)
   | Olongofsingle => (Tsingle :: nil, Tlong)
+  | Olonguofsingle => (Tsingle :: nil, Tlong)
   | Osingleoflong => (Tlong :: nil, Tsingle)
+  | Osingleoflongu => (Tlong :: nil, Tsingle)
   | Ocmp c => (type_of_condition c, Tint)
-  | Osel c ty => (ty :: ty :: type_of_condition c, ty)
+  | OEseqw _ => (Tint :: Tint :: nil, Tint)
+  | OEsnew _ => (Tint :: Tint :: nil, Tint)
+  | OEsequw _ => (Tint :: Tint :: nil, Tint)
+  | OEsneuw _ => (Tint :: Tint :: nil, Tint)
+  | OEsltw _ => (Tint :: Tint :: nil, Tint)
+  | OEsltuw _ => (Tint :: Tint :: nil, Tint)
+  | OEsltiw _ => (Tint :: nil, Tint)
+  | OEsltiuw _ => (Tint :: nil, Tint)
+  | OExoriw _ => (Tint :: nil, Tint)
+  | OEluiw _ => (nil, Tint)
+  | OEaddiw None _ => (Tint :: nil, Tint)
+  | OEaddiw (Some _) _ => (nil, Tint)
+  | OEandiw _ => (Tint :: nil, Tint)
+  | OEoriw _ => (Tint :: nil, Tint)
+  | OEseql _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsnel _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsequl _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsneul _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsltl _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsltul _ => (Tlong :: Tlong :: nil, Tint)
+  | OEsltil _ => (Tlong :: nil, Tint)
+  | OEsltiul _ => (Tlong :: nil, Tint)
+  | OEandil _ => (Tlong :: nil, Tlong)
+  | OEoril _ => (Tlong :: nil, Tlong)
+  | OExoril _ => (Tlong :: nil, Tlong)
+  | OEluil _ => (nil, Tlong)
+  | OEaddil None _ => (Tlong :: nil, Tlong)
+  | OEaddil (Some _) _ => (nil, Tlong)
+  | OEloadli _ => (nil, Tlong)
+  | OEmayundef mu => type_of_mayundef mu
+  | OEfeqd => (Tfloat :: Tfloat :: nil, Tint)
+  | OEfltd => (Tfloat :: Tfloat :: nil, Tint)
+  | OEfled => (Tfloat :: Tfloat :: nil, Tint)
+  | OEfeqs => (Tsingle :: Tsingle :: nil, Tint)
+  | OEflts => (Tsingle :: Tsingle :: nil, Tint)
+  | OEfles => (Tsingle :: Tsingle :: nil, Tint)
+  | Obits_of_single => (Tsingle :: nil, Tint)
+  | Obits_of_float => (Tfloat :: nil, Tlong)
+  | Osingle_of_bits => (Tint :: nil, Tsingle)
+  | Ofloat_of_bits => (Tlong :: nil, Tfloat)
+  | Oselectl => (Tint :: Tlong :: Tlong :: nil, Tlong)
+  end.
+
+Definition type_of_addressing (addr: addressing) : list typ :=
+  match addr with
+  | Aindexed _ => Tptr :: nil
+  | Aglobal _ _ => nil
+  | Ainstack _ => nil
   end.
 
 (** Weak type soundness results for [eval_operation]:
@@ -605,33 +752,12 @@ Proof.
   intros. unfold Val.has_type, Val.addl. destruct Archi.ptr64, v1, v2; auto.
 Qed.
 
-Lemma type_of_addressing64_sound:
-  forall addr vl sp v,
-  eval_addressing64 genv sp addr vl = Some v ->
-  Val.has_type v Tlong.
+Remark type_mayundef:
+  forall mu v1 v2, Val.has_type (eval_may_undef mu v1 v2) (snd (type_of_mayundef mu)).
 Proof.
-  intros. destruct addr; simpl in H; FuncInv; subst; simpl; auto using type_addl.
-- unfold Genv.symbol_address; destruct (Genv.find_symbol genv i); simpl; auto.
-- destruct sp; simpl; auto.
-Qed.
-
-Lemma type_of_addressing32_sound:
-  forall addr vl sp v,
-  eval_addressing32 genv sp addr vl = Some v ->
-  Val.has_type v Tint.
-Proof.
-  intros. destruct addr; simpl in H; FuncInv; subst; simpl; auto using type_add.
-- unfold Genv.symbol_address; destruct (Genv.find_symbol genv i); simpl; auto.
-- destruct sp; simpl; auto.
-Qed.
-
-Corollary type_of_addressing_sound:
-  forall addr vl sp v,
-  eval_addressing genv sp addr vl = Some v ->
-  Val.has_type v Tptr.
-Proof.
-  unfold eval_addressing, Tptr; intros.
-  destruct Archi.ptr64; eauto using type_of_addressing64_sound, type_of_addressing32_sound.
+  intros. unfold eval_may_undef.
+  destruct mu eqn:EQMU, v1, v2; simpl; auto.
+  all: destruct Int.ltu; simpl; auto.
 Qed.
 
 Lemma type_of_operation_sound:
@@ -639,124 +765,287 @@ Lemma type_of_operation_sound:
   op <> Omove ->
   eval_operation genv sp op vl m = Some v ->
   Val.has_type v (snd (type_of_operation op)).
-Proof with (try exact I; try reflexivity).
+Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
   intros.
-  destruct op; simpl in H0; FuncInv; subst; simpl.
-  congruence.
-  exact I.
-  exact I.
-  exact I.
-  exact I.
-  unfold Genv.symbol_address; destruct (Genv.find_symbol genv id)...
-  destruct v0...
-  destruct v0...
-  destruct v0...
-  destruct v0...
-  destruct v0...
-  unfold Val.sub, Val.has_type; destruct Archi.ptr64, v0, v1... destruct (eq_block b b0)...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  destruct v0; destruct v1; simpl in *; inv H0. destruct (Int.eq i0 Int.zero); inv H2...
-  destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
-  destruct v0; destruct v1; simpl in *; inv H0. destruct (Int.eq i0 Int.zero); inv H2...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0...
-  destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
-  destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
-  destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
-  destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
-  destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
-  destruct v0...
-  destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
-  destruct v1; simpl... destruct (Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize)...
-  eapply type_of_addressing32_sound; eauto.
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0...
-  destruct v0...
-  destruct v0...
-  destruct v0...
-  unfold Val.addl, Val.has_type; destruct Archi.ptr64, v0...
-  unfold Val.subl, Val.has_type; destruct Archi.ptr64, v0, v1... destruct (eq_block b b0)...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  destruct v0; destruct v1; simpl in *; inv H0. destruct (Int64.eq i0 Int64.zero); inv H2...
-  destruct v0; destruct v1; simpl in *; inv H0.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
-  destruct v0; destruct v1; simpl in *; inv H0. destruct (Int64.eq i0 Int64.zero); inv H2...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0...
-  destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
-  destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
-  destruct v0; inv H0. destruct (Int.ltu n (Int.repr 63)); inv H2...
-  destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
-  destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
-  destruct v0...
-  eapply type_of_addressing64_sound; eauto.
-  destruct v0...
-  destruct v0...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1...
-  destruct v0; destruct v1...
-  destruct v0...
-  destruct v0...
-  destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
-  destruct v0; simpl in H0; inv H0...
-  destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
-  destruct v0; simpl in H0; inv H0...
-  destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
-  destruct v0; simpl in H0; inv H0...
-  destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
-  destruct v0; simpl in H0; inv H0...
-  destruct (eval_condition cond vl m); simpl... destruct b...
-  unfold Val.select. destruct (eval_condition c vl m). apply Val.normalize_type. exact I.
+  destruct op; simpl; simpl in H0; FuncInv; subst; simpl.
+  (* move *)
+  - simpl in H; congruence.
+  (* intconst, longconst, floatconst, singleconst *)
+  - exact I.
+  - exact I.
+  - exact I.
+  - exact I.
+  (* addrsymbol *)
+  - unfold Genv.symbol_address. destruct (Genv.find_symbol genv id)...
+  (* addrstack *)
+  - destruct sp... apply Val.Vptr_has_type.
+  (* castsigned *)
+  - destruct v0...
+  - destruct v0...
+  (* add, addimm *)
+  - apply type_add.
+  - apply type_add.
+  (* neg, sub *)
+  - destruct v0...
+  - unfold Val.sub. destruct v0; destruct v1... 
+    unfold Val.has_type; destruct Archi.ptr64...
+    destruct Archi.ptr64... destruct (eq_block b b0)...
+  (* mul, mulhs, mulhu *)
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  (* div, divu *)
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero
+         || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero); cbn; trivial.
+  (* mod, modu *)
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero
+         || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int.eq i0 Int.zero); cbn; trivial.
+  (* and, andimm *)
+  - destruct v0; destruct v1...
+  - destruct v0...
+  (* or, orimm *)
+  - destruct v0; destruct v1...
+  - destruct v0...
+  (* xor, xorimm *)
+  - destruct v0; destruct v1...
+  - destruct v0...
+  (* shl, shlimm *)
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
+  - destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
+  (* shr, shrimm *)
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
+  - destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
+  (* shru, shruimm *)
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
+  - destruct v0; simpl... destruct (Int.ltu n Int.iwordsize)...
+  (* shrx *)
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
+  (* makelong, lowlong, highlong *)
+  - destruct v0; destruct v1...
+  - destruct v0...
+  - destruct v0...
+  (* cast32 *)
+  - destruct v0...
+  - destruct v0...
+  (* addl, addlimm *)
+  - apply type_addl.
+  - apply type_addl.
+  (* negl, subl *)
+  - destruct v0...
+  - unfold Val.subl. destruct v0; destruct v1... 
+    unfold Val.has_type; destruct Archi.ptr64...
+    destruct Archi.ptr64... destruct (eq_block b b0)...
+  (* mull, mullhs, mullhu *)
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  (* divl, divlu *)
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
+  (* modl, modlu *)
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn; trivial.
+  - destruct v0; destruct v1; cbn; trivial.
+    destruct (Int64.eq i0 Int64.zero); cbn; trivial.
+  (* andl, andlimm *)
+  - destruct v0; destruct v1...
+  - destruct v0...
+  (* orl, orlimm *)
+  - destruct v0; destruct v1...
+  - destruct v0...
+  (* xorl, xorlimm *)
+  - destruct v0; destruct v1...
+  - destruct v0...
+  (* shll, shllimm *)
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
+  - destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
+  (* shr, shrimm *)
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
+  - destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
+  (* shru, shruimm *)
+  - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
+  - destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')...
+  (* shrxl *)
+  - destruct v0; cbn; trivial.
+    destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
+  (* negf, absf *)
+  - destruct v0...
+  - destruct v0...
+  (* addf, subf *)
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  (* mulf, divf *)
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  (* negfs, absfs *)
+  - destruct v0...
+  - destruct v0...
+  (* addfs, subfs *)
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  (* mulfs, divfs *)
+  - destruct v0; destruct v1...
+  - destruct v0; destruct v1...
+  (* singleoffloat, floatofsingle *)
+  - destruct v0...
+  - destruct v0...
+  (* intoffloat, intuoffloat *)
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_intu f); cbn; trivial.
+  (* floatofint, floatofintu *)
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
+  (* intofsingle, intuofsingle *)
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_int f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_intu f); cbn; trivial.
+  (* singleofint, singleofintu *)
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
+  (* longoffloat, longuoffloat *)
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float.to_longu f); cbn; trivial.
+  (* floatoflong, floatoflongu *)
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
+  (* longofsingle, longuofsingle *)
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_long f); cbn; trivial.
+  - destruct v0; cbn; trivial.
+    destruct (Float32.to_longu f); cbn; trivial.
+  (* singleoflong, singleoflongu *)
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
+  (* cmp *)
+  - destruct (eval_condition cond vl m)... destruct b...
+  (* OEseqw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmp;
+    destruct Val.cmp_bool... all: destruct b...
+  (* OEsnew *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmp;
+    destruct Val.cmp_bool... all: destruct b...
+  (* OEsequw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpu;
+    destruct Val.cmpu_bool... all: destruct b...
+  (* OEsneuw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpu;
+    destruct Val.cmpu_bool... all: destruct b...
+  (* OEsltw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmp;
+    destruct Val.cmp_bool... all: destruct b...
+  (* OEsltuw *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpu;
+    destruct Val.cmpu_bool... all: destruct b...
+  (* OEsltiw *)
+  - unfold Val.cmp; destruct Val.cmp_bool...
+    all: destruct b...
+  (* OEsltiuw *)
+  - unfold Val.cmpu; destruct Val.cmpu_bool... destruct b...
+  (* OEaddiw *)
+  - destruct optR as [[]|]; simpl in *; trivial.
+  - destruct optR as [[]|]; simpl in *; trivial;
+    apply type_add.
+  (* OEandiw *)
+  - destruct v0...
+  (* OEoriw *)
+  - destruct v0...
+  (* OExoriw *)
+  - destruct v0...
+  (* OEluiw *)
+  - destruct (Int.ltu _ _); cbn; trivial.
+  (* OEseql *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpl;
+    destruct Val.cmpl_bool... all: destruct b...
+  (* OEsnel *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpl;
+    destruct Val.cmpl_bool... all: destruct b...
+  (* OEsequl *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmplu;
+    destruct Val.cmplu_bool... all: destruct b...
+  (* OEsneul *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmplu;
+    destruct Val.cmplu_bool... all: destruct b...
+  (* OEsltl *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmpl;
+    destruct Val.cmpl_bool... all: destruct b...
+  (* OEsltul *)
+  - destruct optR as [[]|]; simpl; unfold Val.cmplu;
+    destruct Val.cmplu_bool... all: destruct b...
+  (* OEsltil *)
+  - unfold Val.cmpl; destruct Val.cmpl_bool...
+    all: destruct b...
+  (* OEsltiul *)
+  - unfold Val.cmplu; destruct Val.cmplu_bool... destruct b...
+  (* OEaddil *)
+  - destruct optR as [[]|]; simpl in *; trivial.
+  - destruct optR as [[]|]; simpl in *; trivial;
+    apply type_addl.
+  (* OEandil *)
+  - destruct v0...
+  (* OEoril *)
+  - destruct v0...
+  (* OExoril *)
+  - destruct v0...
+  (* OEluil *)
+  - simpl; trivial.
+  (* OEloadli *)
+  - trivial.
+  (* OEmayundef *)
+  - apply type_mayundef.
+  (* OEfeqd *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float.cmp; cbn; auto.
+  (* OEfltd *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float.cmp; cbn; auto.
+  (* OEfled *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float.cmp; cbn; auto.
+  (* OEfeqs *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float32.cmp; cbn; auto.
+  (* OEflts *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float32.cmp; cbn; auto.
+  (* OEfles *)
+  - destruct v0; destruct v1; cbn; auto.
+    destruct Float32.cmp; cbn; auto.
+  (* Bits_of_single, float *)
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
+  (* single, float of bits *)
+  - destruct v0; cbn; trivial.
+  - destruct v0; cbn; trivial.
+  (* selectl *)
+  - destruct v0; cbn; trivial.
+    destruct Int.eq; cbn.
+    apply Val.normalize_type.
+    destruct Int.eq; cbn; trivial.
+    apply Val.normalize_type.
 Qed.
 
-
+(* This should not be simplified to "false" because it breaks proofs elsewhere. *)
 Definition is_trapping_op (op : operation) :=
   match op with
-  | Odiv | Odivl | Odivu | Odivlu
-  | Omod | Omodl | Omodu | Omodlu
-  | Oshrximm _ | Oshrxlimm _
-  | Ointoffloat
-  | Ointofsingle
-  | Olongoffloat
-  | Olongofsingle 
-  | Osingleofint
-  | Osingleoflong
-  | Ofloatofint
-  | Ofloatoflong
-  | Olea _ | Oleal _ (* TODO this is suboptimal *) => true
+  | Omove => false
   | _ => false
   end.
 
@@ -772,11 +1061,14 @@ Lemma is_trapping_op_sound:
     eval_operation genv sp op vl m <> None.
 Proof.
   unfold args_of_operation.
-  destruct op; destruct eq_operation; intros; simpl in *; try congruence.
+  destruct op eqn:E; destruct eq_operation; intros; simpl in *; try congruence.
   all: try (destruct vl as [ | vh1 vl1]; try discriminate).
   all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
   all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
   all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+  all: try destruct optR as [[]|]; simpl in H0; try discriminate.
+  all: try destruct Archi.ptr64; simpl in *; try discriminate.
+  all: try destruct mu; simpl in *; try discriminate.
 Qed.
 End SOUNDNESS.
 
@@ -820,8 +1112,22 @@ Definition negate_condition (cond: condition): condition :=
   | Cnotcompf c => Ccompf c
   | Ccompfs c => Cnotcompfs c
   | Cnotcompfs c => Ccompfs c
-  | Cmaskzero n => Cmasknotzero n
-  | Cmasknotzero n => Cmaskzero n
+  | CEbeqw optR => CEbnew optR
+  | CEbnew optR => CEbeqw optR
+  | CEbequw optR => CEbneuw optR
+  | CEbneuw optR => CEbequw optR
+  | CEbltw optR => CEbgew optR
+  | CEbltuw optR => CEbgeuw optR
+  | CEbgew optR => CEbltw optR
+  | CEbgeuw optR => CEbltuw optR
+  | CEbeql optR => CEbnel optR
+  | CEbnel optR => CEbeql optR
+  | CEbequl optR => CEbneul optR
+  | CEbneul optR => CEbequl optR
+  | CEbltl optR => CEbgel optR
+  | CEbltul optR => CEbgeul optR
+  | CEbgel optR => CEbltl optR
+  | CEbgeul optR => CEbltul optR
   end.
 
 Lemma eval_negate_condition:
@@ -841,8 +1147,39 @@ Proof.
   repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0) as [[]|]; auto.
   repeat (destruct vl; auto).
   repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v v0) as [[]|]; auto.
-  destruct vl; auto. destruct vl; auto.
-  destruct vl; auto. destruct vl; auto. destruct (Val.maskzero_bool v n) as [[]|]; auto.
+
+  repeat (destruct vl; auto); replace (Cne) with (negate_comparison Ceq) by auto; destruct optR as [[]|];
+  apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto); replace (Ceq) with (negate_comparison Cne) by auto; destruct optR as [[]|];
+  apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto); replace (Cne) with (negate_comparison Ceq) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto); replace (Ceq) with (negate_comparison Cne) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto); replace (Cge) with (negate_comparison Clt) by auto; destruct optR as [[]|];
+  apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto); replace (Cge) with (negate_comparison Clt) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto); replace (Clt) with (negate_comparison Cge) by auto; destruct optR as [[]|];
+  apply Val.negate_cmp_bool.
+  repeat (destruct vl; auto); replace (Clt) with (negate_comparison Cge) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpu_bool.
+  repeat (destruct vl; auto); replace (Cne) with (negate_comparison Ceq) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto); replace (Ceq) with (negate_comparison Cne) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto); replace (Cne) with (negate_comparison Ceq) by auto; destruct optR as [[]|];
+  apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto); replace (Ceq) with (negate_comparison Cne) by auto; destruct optR as [[]|];
+  apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto); replace (Cge) with (negate_comparison Clt) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto); replace (Cge) with (negate_comparison Clt) by auto; destruct optR as [[]|];
+  apply Val.negate_cmplu_bool.
+  repeat (destruct vl; auto); replace (Clt) with (negate_comparison Cge) by auto; destruct optR as [[]|];
+  apply Val.negate_cmpl_bool.
+  repeat (destruct vl; auto); replace (Clt) with (negate_comparison Cge) by auto; destruct optR as [[]|];
+  apply Val.negate_cmplu_bool.
 Qed.
 
 (** Shifting stack-relative references.  This is used in [Stacking]. *)
@@ -855,8 +1192,7 @@ Definition shift_stack_addressing (delta: Z) (addr: addressing) :=
 
 Definition shift_stack_operation (delta: Z) (op: operation) :=
   match op with
-  | Olea  addr => Olea  (shift_stack_addressing delta addr)
-  | Oleal addr => Oleal (shift_stack_addressing delta addr)
+  | Oaddrstack ofs => Oaddrstack (Ptrofs.add ofs (Ptrofs.repr delta))
   | _ => op
   end.
 
@@ -869,29 +1205,8 @@ Qed.
 Lemma type_shift_stack_operation:
   forall delta op, type_of_operation (shift_stack_operation delta op) = type_of_operation op.
 Proof.
-  intros. destruct op; auto; simpl; decEq; destruct a; auto.
-Qed.
-
-Lemma eval_shift_stack_addressing32:
-  forall F V (ge: Genv.t F V) sp addr vl delta,
-  eval_addressing32 ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl =
-  eval_addressing32 ge (Vptr sp (Ptrofs.repr delta)) addr vl.
-Proof.
-  intros.
-  assert (A: forall i, Ptrofs.add Ptrofs.zero (Ptrofs.add i (Ptrofs.repr delta)) = Ptrofs.add (Ptrofs.repr delta) i).
-  { intros. rewrite Ptrofs.add_zero_l. apply Ptrofs.add_commut. }
-  destruct addr; simpl; rewrite ?A; reflexivity.
-Qed.
-
-Lemma eval_shift_stack_addressing64:
-  forall F V (ge: Genv.t F V) sp addr vl delta,
-  eval_addressing64 ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl =
-  eval_addressing64 ge (Vptr sp (Ptrofs.repr delta)) addr vl.
-Proof.
-  intros.
-  assert (A: forall i, Ptrofs.add Ptrofs.zero (Ptrofs.add i (Ptrofs.repr delta)) = Ptrofs.add (Ptrofs.repr delta) i).
-  { intros. rewrite Ptrofs.add_zero_l. apply Ptrofs.add_commut. }
-  destruct addr; simpl; rewrite ?A; reflexivity.
+  intros. destruct op; auto;
+  try destruct optR as [[]|]; simpl; auto.
 Qed.
 
 Lemma eval_shift_stack_addressing:
@@ -899,8 +1214,8 @@ Lemma eval_shift_stack_addressing:
   eval_addressing ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl =
   eval_addressing ge (Vptr sp (Ptrofs.repr delta)) addr vl.
 Proof.
-  intros. unfold eval_addressing.
-  destruct Archi.ptr64; auto using eval_shift_stack_addressing32, eval_shift_stack_addressing64.
+  intros. destruct addr; simpl; auto. destruct vl; auto.
+  rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto.
 Qed.
 
 Lemma eval_shift_stack_operation:
@@ -908,70 +1223,21 @@ Lemma eval_shift_stack_operation:
   eval_operation ge (Vptr sp Ptrofs.zero) (shift_stack_operation delta op) vl m =
   eval_operation ge (Vptr sp (Ptrofs.repr delta)) op vl m.
 Proof.
-  intros. destruct op; simpl; auto using eval_shift_stack_addressing32, eval_shift_stack_addressing64.
+  intros. destruct op eqn:E; simpl; auto; destruct vl; auto.
+  rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto.
 Qed.
 
 (** Offset an addressing mode [addr] by a quantity [delta], so that
   it designates the pointer [delta] bytes past the pointer designated
-  by [addr].  This may be undefined if an offset overflows, in which case
-  [None] is returned.  *)
+  by [addr].  May be undefined, in which case [None] is returned. *)
 
-Definition offset_addressing_total (addr: addressing) (delta: Z) : addressing :=
+Definition offset_addressing (addr: addressing) (delta: Z) : option addressing :=
   match addr with
-  | Aindexed n => Aindexed (n + delta)
-  | Aindexed2 n => Aindexed2 (n + delta)
-  | Ascaled sc n => Ascaled sc (n + delta)
-  | Aindexed2scaled sc n => Aindexed2scaled sc (n + delta)
-  | Aglobal s n => Aglobal s (Ptrofs.add n (Ptrofs.repr delta))
-  | Abased s n => Abased s (Ptrofs.add n (Ptrofs.repr delta))
-  | Abasedscaled sc s n => Abasedscaled sc s (Ptrofs.add n (Ptrofs.repr delta))
-  | Ainstack n => Ainstack (Ptrofs.add n (Ptrofs.repr delta))
+  | Aindexed n => Some(Aindexed (Ptrofs.add n (Ptrofs.repr delta)))
+  | Aglobal id n => Some(Aglobal id (Ptrofs.add n (Ptrofs.repr delta)))
+  | Ainstack n => Some(Ainstack (Ptrofs.add n (Ptrofs.repr delta)))
   end.
 
-Definition offset_addressing (addr: addressing) (delta: Z) : option addressing :=
-  let addr' := offset_addressing_total addr delta in
-  if addressing_valid addr' then Some addr' else None.
-
-Lemma eval_offset_addressing_total_32:
-  forall (F V: Type) (ge: Genv.t F V) sp addr args delta v,
-  eval_addressing32 ge sp addr args = Some v ->
-  eval_addressing32 ge sp (offset_addressing_total addr delta) args = Some(Val.add v (Vint (Int.repr delta))).
-Proof.
-  assert (A: forall x y, Int.add (Int.repr x) (Int.repr y) = Int.repr (x + y)).
-  { intros. apply Int.eqm_samerepr; auto with ints. }
-  assert (B: forall delta, Archi.ptr64 = false -> Ptrofs.repr delta = Ptrofs.of_int (Int.repr delta)).
-  { intros; symmetry; auto with ptrofs. }
-  intros. destruct addr; simpl in *; FuncInv; subst; simpl.
-- rewrite <- A, ! Val.add_assoc; auto.
-- rewrite <- A, ! Val.add_assoc; auto.
-- rewrite <- A, ! Val.add_assoc; auto.
-- rewrite <- A, ! Val.add_assoc; auto.
-- rewrite B, Genv.shift_symbol_address_32 by auto. auto.
-- rewrite B, Genv.shift_symbol_address_32 by auto. rewrite ! Val.add_assoc. do 2 f_equal. apply Val.add_commut.
-- rewrite B, Genv.shift_symbol_address_32 by auto. rewrite ! Val.add_assoc. do 2 f_equal. apply Val.add_commut.
-- destruct sp; simpl; auto. rewrite Heqb. rewrite Ptrofs.add_assoc. do 4 f_equal. symmetry; auto with ptrofs.
-Qed.
-
-Lemma eval_offset_addressing_total_64:
-  forall (F V: Type) (ge: Genv.t F V) sp addr args delta v,
-  eval_addressing64 ge sp addr args = Some v ->
-  eval_addressing64 ge sp (offset_addressing_total addr delta) args = Some(Val.addl v (Vlong (Int64.repr delta))).
-Proof.
-  assert (A: forall x y, Int64.add (Int64.repr x) (Int64.repr y) = Int64.repr (x + y)).
-  { intros. apply Int64.eqm_samerepr; auto with ints. }
-  assert (B: forall delta, Archi.ptr64 = true -> Ptrofs.repr delta = Ptrofs.of_int64 (Int64.repr delta)).
-  { intros; symmetry; auto with ptrofs. }
-  intros. destruct addr; simpl in *; FuncInv; subst; simpl.
-- rewrite <- A, ! Val.addl_assoc; auto.
-- rewrite <- A, ! Val.addl_assoc; auto.
-- rewrite <- A, ! Val.addl_assoc; auto.
-- rewrite <- A, ! Val.addl_assoc; auto.
-- rewrite B, Genv.shift_symbol_address_64 by auto. auto.
-- destruct sp; simpl; auto. rewrite Heqb. rewrite Ptrofs.add_assoc. do 4 f_equal. symmetry; auto with ptrofs.
-Qed.
-
-(** The following lemma is used only in [Allocproof] in cases where [Archi.ptr64 = false]. *)
-
 Lemma eval_offset_addressing:
   forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v,
   offset_addressing addr delta = Some addr' ->
@@ -979,8 +1245,17 @@ Lemma eval_offset_addressing:
   Archi.ptr64 = false ->
   eval_addressing ge sp addr' args = Some(Val.add v (Vint (Int.repr delta))).
 Proof.
-  intros. unfold offset_addressing in H. destruct (addressing_valid (offset_addressing_total addr delta)); inv H.
-  unfold eval_addressing in *; rewrite H1 in *. apply eval_offset_addressing_total_32; auto.
+  intros.
+  assert (A: forall x n,
+             Val.offset_ptr x (Ptrofs.add n (Ptrofs.repr delta)) =
+             Val.add (Val.offset_ptr x n) (Vint (Int.repr delta))).
+  { intros; destruct x; simpl; auto. rewrite H1. 
+    rewrite Ptrofs.add_assoc. f_equal; f_equal; f_equal. symmetry; auto with ptrofs. }
+  destruct addr; simpl in H; inv H; simpl in *; FuncInv; subst.
+- rewrite A; auto.
+- unfold Genv.symbol_address. destruct (Genv.find_symbol ge i); auto. 
+  simpl. rewrite H1. f_equal; f_equal; f_equal. symmetry; auto with ptrofs.
+- rewrite A; auto.
 Qed.
 
 (** Operations that are so cheap to recompute that CSE should not factor them out. *)
@@ -988,41 +1263,55 @@ Qed.
 Definition is_trivial_op (op: operation) : bool :=
   match op with
   | Omove => true
-  | Ointconst _ => true
-  | Olongconst _ => true
-  | Olea (Aglobal _ _) => true
-  | Olea (Ainstack _) => true
-  | Oleal (Aglobal _ _) => true
-  | Oleal (Ainstack _) => true
+  | Ointconst n => Int.eq (Int.sign_ext 12 n) n
+  | Olongconst n => Int64.eq (Int64.sign_ext 12 n) n
+  | Oaddrstack _ => true
   | _ => false
   end.
 
 (** Operations that depend on the memory state. *)
 
-Definition cond_depends_on_memory (c: condition) : bool :=
-  match c with
+Definition cond_depends_on_memory (cond : condition) : bool :=
+  match cond with
   | Ccompu _ => negb Archi.ptr64
   | Ccompuimm _ _ => negb Archi.ptr64
   | Ccomplu _ => Archi.ptr64
   | Ccompluimm _ _ => Archi.ptr64
+  | CEbequw _ => negb Archi.ptr64
+  | CEbneuw _ => negb Archi.ptr64
+  | CEbltuw _ => negb Archi.ptr64
+  | CEbgeuw _ => negb Archi.ptr64
+  | CEbequl _ => Archi.ptr64
+  | CEbneul _ => Archi.ptr64
+  | CEbltul _ => Archi.ptr64
+  | CEbgeul _ => Archi.ptr64
   | _ => false
   end.
 
 Definition op_depends_on_memory (op: operation) : bool :=
   match op with
-  | Ocmp c => cond_depends_on_memory c
-  | Osel c ty => cond_depends_on_memory c
+  | Ocmp cmp => cond_depends_on_memory cmp
+  | OEsequw _ => negb Archi.ptr64
+  | OEsneuw _ => negb Archi.ptr64
+  | OEsltiuw _ => negb Archi.ptr64
+  | OEsltuw _ => negb Archi.ptr64
+  | OEsequl _ => Archi.ptr64
+  | OEsneul _ => Archi.ptr64
+  | OEsltul _ => Archi.ptr64
+  | OEsltiul _ => Archi.ptr64
   | _ => false
   end.
 
 Lemma cond_depends_on_memory_correct:
-  forall c args m1 m2,
-  cond_depends_on_memory c = false ->
-  eval_condition c args m1 = eval_condition c args m2.
+  forall cond args m1 m2,
+  cond_depends_on_memory cond = false ->
+  eval_condition cond args m1 = eval_condition cond args m2.
 Proof.
-  intros until m2. 
-  destruct c; simpl; intros SF; auto; rewrite ? negb_false_iff in SF;
-  unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
+  intros until m2.
+  destruct cond; cbn; try congruence.
+  all: unfold Val.cmpu_bool, Val.cmplu_bool.
+  all: destruct Archi.ptr64; cbn; intro SF; try discriminate.
+  all: reflexivity.
 Qed.
 
 Lemma op_depends_on_memory_correct:
@@ -1030,11 +1319,14 @@ Lemma op_depends_on_memory_correct:
   op_depends_on_memory op = false ->
   eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
 Proof.
-  intros until m2. destruct op; simpl; try congruence; intros C.
-- f_equal; f_equal; apply cond_depends_on_memory_correct; auto.
-- destruct args; auto. destruct args; auto.
-  rewrite (cond_depends_on_memory_correct c args m1 m2 C).
-  auto.
+  intros until m2. destruct op; simpl; try congruence.
+  intro DEPEND.
+  f_equal. f_equal. apply cond_depends_on_memory_correct; trivial.
+  all: intros; repeat (destruct args; auto);
+       unfold Val.cmpu, Val.cmpu_bool, Val.cmplu, Val.cmplu_bool;
+       try destruct optR as [[]|]; simpl;
+       destruct v; try destruct v0; simpl; auto;
+       try apply negb_false_iff in H; try rewrite H; auto.
 Qed.
 
 Lemma cond_valid_pointer_eq:
@@ -1044,38 +1336,35 @@ Lemma cond_valid_pointer_eq:
 Proof.
   intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence.
   all: repeat (destruct args; simpl; try congruence);
-    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+       try destruct optR as [[]|]; simpl;
+       try destruct v, v0; try rewrite !MEM; auto;
+       try erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
 Qed.
-
+ 
 Lemma op_valid_pointer_eq:
   forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
   (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
   eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
 Proof.
-  intros until m2. destruct op eqn:OP; simpl; try congruence.
-  - intros MEM; destruct cond; simpl; try congruence;
-    repeat (destruct args; simpl; try congruence);
-    erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
-  - intro MEM; destruct c; simpl; try congruence;
-      repeat (destruct args; simpl; try congruence);
-      erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+  intros until m2. destruct op; simpl; try congruence.
+  intro MEM; erewrite cond_valid_pointer_eq; eauto.
+  all: intros MEM; repeat (destruct args; simpl; try congruence);
+       try destruct optR as [[]|]; simpl; try destruct v, v0; try rewrite !MEM; auto;
+       unfold Val.cmpu, Val.cmplu;
+       erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
 Qed.
 
 (** Global variables mentioned in an operation or addressing mode *)
 
 Definition globals_addressing (addr: addressing) : list ident :=
   match addr with
-  | Aglobal s n => s :: nil
-  | Abased s n => s :: nil
-  | Abasedscaled sc s n => s :: nil
+  | Aglobal s ofs => s :: nil
   | _ => nil
   end.
 
 Definition globals_operation (op: operation) : list ident :=
   match op with
-  | Oindirectsymbol s => s :: nil
-  | Olea addr => globals_addressing addr
-  | Oleal addr => globals_addressing addr
+  | Oaddrsymbol s ofs => s :: nil
   | _ => nil
   end.
 
@@ -1094,30 +1383,13 @@ Variable ge2: Genv.t F2 V2.
 Hypothesis agree_on_symbols:
   forall (s: ident), Genv.find_symbol ge2 s = Genv.find_symbol ge1 s.
 
-Lemma eval_addressing32_preserved:
-  forall sp addr vl,
-  eval_addressing32 ge2 sp addr vl = eval_addressing32 ge1 sp addr vl.
-Proof.
-  intros.
-  unfold eval_addressing32, Genv.symbol_address; destruct addr; try rewrite agree_on_symbols;
-  reflexivity.
-Qed.
-
-Lemma eval_addressing64_preserved:
-  forall sp addr vl,
-  eval_addressing64 ge2 sp addr vl = eval_addressing64 ge1 sp addr vl.
-Proof.
-  intros.
-  unfold eval_addressing64, Genv.symbol_address; destruct addr; try rewrite agree_on_symbols;
-  reflexivity.
-Qed.
-
 Lemma eval_addressing_preserved:
   forall sp addr vl,
   eval_addressing ge2 sp addr vl = eval_addressing ge1 sp addr vl.
 Proof.
   intros.
-  unfold eval_addressing; destruct Archi.ptr64; auto using eval_addressing32_preserved, eval_addressing64_preserved.
+  unfold eval_addressing; destruct addr; auto. destruct vl; auto. 
+  unfold Genv.symbol_address. rewrite agree_on_symbols; auto.
 Qed.
 
 Lemma eval_operation_preserved:
@@ -1125,8 +1397,8 @@ Lemma eval_operation_preserved:
   eval_operation ge2 sp op vl m = eval_operation ge1 sp op vl m.
 Proof.
   intros.
-  unfold eval_operation; destruct op; auto using eval_addressing32_preserved, eval_addressing64_preserved.
-  unfold Genv.symbol_address. rewrite agree_on_symbols. auto.
+  unfold eval_operation; destruct op; auto. destruct vl; auto.
+  unfold Genv.symbol_address. rewrite agree_on_symbols; auto.
 Qed.
 
 End GENV_TRANSF.
@@ -1186,6 +1458,90 @@ Ltac InvInject :=
   | _ => idtac
   end.
 
+Lemma eval_cmpu_bool_inj': forall b c v v' v0 v0',
+  Val.inject f v v' ->
+  Val.inject f v0 v0' ->
+  Val.cmpu_bool (Mem.valid_pointer m1) c v v0 = Some b ->
+  Val.cmpu_bool (Mem.valid_pointer m2) c v' v0' = Some b.
+Proof.
+  intros.
+  eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
+Qed.
+
+Lemma eval_cmpu_bool_inj: forall c v v' v0 v'0,
+  Val.inject f v v' ->
+  Val.inject f v0 v'0 ->
+  Val.inject f (Val.cmpu (Mem.valid_pointer m1) c v v0)
+  (Val.cmpu (Mem.valid_pointer m2) c v' v'0).
+Proof.
+  intros until v'0. intros HV1 HV2.
+  unfold Val.cmpu; 
+  destruct (Val.cmpu_bool (Mem.valid_pointer m1) c _ _) eqn:?; eauto.
+  exploit eval_cmpu_bool_inj'. eapply HV1. eapply HV2. eapply Heqo.
+  intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+Qed.
+
+Lemma eval_cmpu_bool_inj_opt: forall c v v' v0 v'0 optR,
+  Val.inject f v v' ->
+  Val.inject f v0 v'0 ->
+  Val.inject f (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m1) c) v v0 zero32)
+  (apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m2) c) v' v'0 zero32).
+Proof.
+  intros until optR. intros HV1 HV2.
+  destruct optR as [[]|]; simpl; unfold zero32, Val.cmpu; 
+  destruct (Val.cmpu_bool (Mem.valid_pointer m1) c _ _) eqn:?; eauto;
+  assert (HVI: Val.inject f (Vint Int.zero) (Vint Int.zero)) by apply Val.inject_int.
+  + exploit eval_cmpu_bool_inj'. eapply HVI. eapply HV1. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+  + exploit eval_cmpu_bool_inj'. eapply HV1. eapply HVI. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+  + exploit eval_cmpu_bool_inj'. eapply HV1. instantiate (1:=v'0).
+    eauto. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+Qed.
+
+Lemma eval_cmplu_bool_inj': forall b c v v' v0 v0',
+  Val.inject f v v' ->
+  Val.inject f v0 v0' ->
+  Val.cmplu_bool (Mem.valid_pointer m1) c v v0 = Some b ->
+  Val.cmplu_bool (Mem.valid_pointer m2) c v' v0' = Some b.
+Proof.
+  intros.
+  eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
+Qed.
+
+Lemma eval_cmplu_bool_inj: forall c v v' v0 v'0,
+  Val.inject f v v' ->
+  Val.inject f v0 v'0 ->
+  Val.inject f (Val.maketotal (Val.cmplu (Mem.valid_pointer m1) c v v0))
+  (Val.maketotal (Val.cmplu (Mem.valid_pointer m2) c v' v'0)).
+Proof.
+  intros until v'0. intros HV1 HV2.
+  unfold Val.cmplu; 
+  destruct (Val.cmplu_bool (Mem.valid_pointer m1) c _ _) eqn:?; eauto.
+  exploit eval_cmplu_bool_inj'. eapply HV1. eapply HV2. eapply Heqo.
+  intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+Qed.
+
+Lemma eval_cmplu_bool_inj_opt: forall c v v' v0 v'0 optR,
+  Val.inject f v v' ->
+  Val.inject f v0 v'0 ->
+  Val.inject f (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m1) c) v v0 zero64))
+  (Val.maketotal (apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m2) c) v' v'0 zero64)).
+Proof.
+  intros until optR. intros HV1 HV2.
+  destruct optR as [[]|]; simpl; unfold zero64, Val.cmplu; 
+  destruct (Val.cmplu_bool (Mem.valid_pointer m1) c _ _) eqn:?; eauto;
+  assert (HVI: Val.inject f (Vlong Int64.zero) (Vlong Int64.zero)) by apply Val.inject_long.
+  + exploit eval_cmplu_bool_inj'. eapply HVI. eapply HV1. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+  + exploit eval_cmplu_bool_inj'. eapply HV1. eapply HVI. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+  + exploit eval_cmplu_bool_inj'. eapply HV1. instantiate (1:=v'0).
+    eauto. eapply Heqo.
+    intros EQ; rewrite EQ; destruct b; simpl; constructor; eauto.
+Qed.
+
 Lemma eval_condition_inj:
   forall cond vl1 vl2 b,
   Val.inject_list f vl1 vl2 ->
@@ -1193,6 +1549,9 @@ Lemma eval_condition_inj:
   eval_condition cond vl2 m2 = Some b.
 Proof.
   intros. destruct cond; simpl in H0; FuncInv; InvInject; simpl; auto.
+  all: assert (HVI32: Val.inject f (Vint Int.zero) (Vint Int.zero)) by apply Val.inject_int;
+       assert (HVI64: Val.inject f (Vlong Int64.zero) (Vlong Int64.zero)) by apply Val.inject_long;
+       try unfold zero32, zero64.
 - inv H3; inv H2; simpl in H0; inv H0; auto.
 - eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
 - inv H3; simpl in H0; inv H0; auto.
@@ -1205,8 +1564,38 @@ Proof.
 - inv H3; inv H2; simpl in H0; inv H0; auto.
 - inv H3; inv H2; simpl in H0; inv H0; auto.
 - inv H3; inv H2; simpl in H0; inv H0; auto.
-- inv H3; try discriminate; auto.
-- inv H3; try discriminate; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmpu_bool_inj'; eauto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmpu_bool_inj'; eauto.
+- destruct optR as [[]|]; simpl;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmpu_bool_inj'; eauto.
+- destruct optR as [[]|]; simpl;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmpu_bool_inj'; eauto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmplu_bool_inj'; eauto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmplu_bool_inj'; eauto.
+- destruct optR as [[]|]; simpl;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmplu_bool_inj'; eauto.
+- destruct optR as [[]|]; simpl;
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+- destruct optR as [[]|]; unfold apply_bin_oreg in *;
+  eapply eval_cmplu_bool_inj'; eauto.
 Qed.
 
 Ltac TrivialExists :=
@@ -1216,34 +1605,308 @@ Ltac TrivialExists :=
   | _ => idtac
   end.
 
-Lemma eval_addressing32_inj:
-  forall addr sp1 vl1 sp2 vl2 v1,
-  (forall id ofs,
-      In id (globals_addressing addr) ->
-      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
-  Val.inject f sp1 sp2 ->
-  Val.inject_list f vl1 vl2 ->
-  eval_addressing32 ge1 sp1 addr vl1 = Some v1 ->
-  exists v2, eval_addressing32 ge2 sp2 addr vl2 = Some v2 /\ Val.inject f v1 v2.
-Proof.
-  assert (A: forall v1 v2 v1' v2', Val.inject f v1 v1' -> Val.inject f v2 v2' -> Val.inject f (Val.mul v1 v2) (Val.mul v1' v2')).
-  { intros. inv H; simpl; auto. inv H0; auto. }
-  intros. destruct addr; simpl in *; FuncInv; InvInject; TrivialExists; eauto using Val.add_inject, Val.offset_ptr_inject with coqlib.
-Qed.
-
-Lemma eval_addressing64_inj:
-  forall addr sp1 vl1 sp2 vl2 v1,
+Lemma eval_operation_inj:
+  forall op sp1 vl1 sp2 vl2 v1,
   (forall id ofs,
-      In id (globals_addressing addr) ->
+      In id (globals_operation op) ->
       Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
   Val.inject f sp1 sp2 ->
   Val.inject_list f vl1 vl2 ->
-  eval_addressing64 ge1 sp1 addr vl1 = Some v1 ->
-  exists v2, eval_addressing64 ge2 sp2 addr vl2 = Some v2 /\ Val.inject f v1 v2.
+  eval_operation ge1 sp1 op vl1 m1 = Some v1 ->
+  exists v2, eval_operation ge2 sp2 op vl2 m2 = Some v2 /\ Val.inject f v1 v2.
 Proof.
-  assert (A: forall v1 v2 v1' v2', Val.inject f v1 v1' -> Val.inject f v2 v2' -> Val.inject f (Val.mull v1 v2) (Val.mull v1' v2')).
-  { intros. inv H; simpl; auto. inv H0; auto. }
-  intros. destruct addr; simpl in *; FuncInv; InvInject; TrivialExists; eauto using Val.addl_inject, Val.offset_ptr_inject with coqlib.
+  intros until v1; intros GL; intros. destruct op; simpl in H1; simpl; FuncInv; InvInject; TrivialExists.
+  (* addrsymbol *)
+  - apply GL; simpl; auto.
+  (* addrstack *)
+  - apply Val.offset_ptr_inject; auto. 
+  (* castsigned *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* add, addimm *)
+  - apply Val.add_inject; auto.
+  - apply Val.add_inject; auto.
+  (* neg, sub *)
+  - inv H4; simpl; auto.
+  - apply Val.sub_inject; auto.
+  (* mul, mulhs, mulhu *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* div, divu *)
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int.eq i0 Int.zero
+              || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int.eq i0 Int.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
+  (* mod, modu *)
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int.eq i0 Int.zero
+              || Int.eq i (Int.repr (-2147483648)) && Int.eq i0 Int.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int.eq i0 Int.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_int.
+  (* and, andimm *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; simpl; auto.
+  (* or, orimm *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; simpl; auto.
+  (* xor, xorimm *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; simpl; auto.
+  (* shl, shlimm *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+  - inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
+  (* shr, shrimm *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+  - inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
+  (* shru, shruimm *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+  - inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
+  (* shrx *)
+  - inv H4; cbn; try apply Val.val_inject_undef.
+    destruct (Int.ltu n (Int.repr 31)); cbn.
+    apply Val.inject_int.
+    apply Val.val_inject_undef.
+  (* makelong, highlong, lowlong *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* cast32 *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* addl, addlimm *)
+  - apply Val.addl_inject; auto.
+  - apply Val.addl_inject; auto.
+  (* negl, subl *)
+  - inv H4; simpl; auto.
+  - apply Val.subl_inject; auto.
+  (* mull, mullhs, mullhu *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* divl, divlu *)
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int64.eq i0 Int64.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
+  (* modl, modlu *)
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int64.eq i0 Int64.zero
+         || Int64.eq i (Int64.repr (-9223372036854775808)) &&
+            Int64.eq i0 Int64.mone); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
+  - inv H4; inv H2; cbn.
+    all: try apply Val.val_inject_undef.
+    destruct (Int64.eq i0 Int64.zero); cbn.
+    apply Val.val_inject_undef.
+    apply Val.inject_long.
+  (* andl, andlimm *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; simpl; auto.
+  (* orl, orlimm *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; simpl; auto.
+  (* xorl, xorlimm *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; simpl; auto.
+  (* shll, shllimm *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  - inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
+  (* shr, shrimm *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  - inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
+  (* shru, shruimm *)
+  - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+  - inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
+  (* shrx *)
+  - inv H4; cbn; try apply Val.val_inject_undef.
+    destruct (Int.ltu n (Int.repr 63)); cbn.
+    apply Val.inject_long.
+    apply Val.val_inject_undef.
+  (* negf, absf *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* addf, subf *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* mulf, divf *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* negfs, absfs *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* addfs, subfs *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* mulfs, divfs *)
+  - inv H4; inv H2; simpl; auto.
+  - inv H4; inv H2; simpl; auto.
+  (* singleoffloat, floatofsingle *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* intoffloat, intuoffloat *)
+  - inv H4; cbn; auto.
+    destruct (Float.to_int f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_intu f0); cbn; auto.
+  (* floatofint, floatofintu *)
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
+  (* intofsingle, intuofsingle *)
+  - inv H4; cbn; auto.
+    destruct (Float32.to_int f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_intu f0); cbn; auto.
+  (* singleofint, singleofintu *)
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
+  (* longoffloat, longuoffloat *)
+  - inv H4; cbn; auto.
+    destruct (Float.to_long f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float.to_longu f0); cbn; auto.
+  (* floatoflong, floatoflongu *)
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
+  (* longofsingle, longuofsingle *)
+  - inv H4; cbn; auto.
+    destruct (Float32.to_long f0); cbn; auto.
+  - inv H4; cbn; auto.
+    destruct (Float32.to_longu f0); cbn; auto.
+  (* singleoflong, singleoflongu *)
+  - inv H4; cbn; auto.
+  - inv H4; cbn; auto.
+  (* cmp *)
+  - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
+    exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
+    destruct b; simpl; constructor.
+    simpl; constructor.
+  (* OEseqw *)
+  - destruct optR as [[]|]; simpl; unfold zero32, Val.cmp;
+    inv H4; inv H2; simpl; try destruct (Int.eq _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsnew *)
+  - destruct optR as [[]|]; simpl; unfold zero32, Val.cmp;
+    inv H4; inv H2; simpl; try destruct (Int.eq _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsequw *)
+  - apply eval_cmpu_bool_inj_opt; auto.
+  (* OEsneuw *)
+  - apply eval_cmpu_bool_inj_opt; auto.
+  (* OEsltw *)
+  - destruct optR as [[]|]; simpl; unfold zero32, Val.cmp;
+    inv H4; inv H2; simpl; try destruct (Int.lt _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsltuw *)
+  - apply eval_cmpu_bool_inj_opt; auto.
+  (* OEsltiw *)
+  - inv H4; simpl; cbn; auto; try destruct (Int.lt _ _); apply Val.inject_int.
+  (* OEsltiuw *)
+  - apply eval_cmpu_bool_inj; auto.
+  (* OEaddiw *)
+  - destruct optR as [[]|]; auto; simpl.
+    rewrite Int.add_zero_l; auto.
+    rewrite Int.add_commut, Int.add_zero_l; auto.
+  - destruct optR as [[]|]; auto; simpl;
+    eapply Val.add_inject; auto.
+  (* OEandiw *)
+  - inv H4; cbn; auto.
+  (* OEoriw *)
+  - inv H4; cbn; auto.
+  (* OExoriw *)
+  - inv H4; simpl; auto.
+  (* OEluiw *)
+  - destruct (Int.ltu _ _); auto.
+  (* OEseql *)
+  - destruct optR as [[]|]; simpl; unfold zero64, Val.cmpl;
+    inv H4; inv H2; simpl; try destruct (Int64.eq _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsnel *)
+  - destruct optR as [[]|]; simpl; unfold zero64, Val.cmpl;
+    inv H4; inv H2; simpl; try destruct (Int64.eq _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsequl *)
+  - apply eval_cmplu_bool_inj_opt; auto.
+  (* OEsneul *)
+  - apply eval_cmplu_bool_inj_opt; auto.
+  (* OEsltl *)
+  - destruct optR as [[]|]; simpl; unfold zero64, Val.cmpl;
+    inv H4; inv H2; simpl; try destruct (Int64.lt _ _); simpl; cbn; auto;
+    try apply Val.inject_int.
+  (* OEsltul *)
+  - apply eval_cmplu_bool_inj_opt; auto.
+  (* OEsltil *)
+  - inv H4; simpl; cbn; auto; try destruct (Int64.lt _ _); apply Val.inject_int.
+  (* OEsltiul *)
+  - apply eval_cmplu_bool_inj; auto.
+  (* OEaddil *)
+  - destruct optR as [[]|]; auto; simpl.
+    rewrite Int64.add_zero_l; auto.
+    rewrite Int64.add_commut, Int64.add_zero_l; auto.
+  - destruct optR as [[]|]; auto; simpl;
+    eapply Val.addl_inject; auto.
+  (* OEandil *)
+  - inv H4; cbn; auto.
+  (* OEoril *)
+  - inv H4; cbn; auto.
+  (* OExoril *)
+  - inv H4; simpl; auto.
+  (* OEmayundef *)
+  - destruct mu; inv H4; inv H2; simpl; auto;
+    try destruct (Int.ltu _ _); simpl; auto.
+    all: eapply Val.inject_ptr; eauto.
+  (* OEfeqd *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEfltd *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEfled *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEfeqs *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float32.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEflts *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float32.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* OEfles *)
+  - inv H4; inv H2; cbn; simpl; auto.
+    destruct Float32.cmp; unfold Vtrue, Vfalse; cbn; auto.
+  (* Bits_of_single, double *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* single, double of bits *)
+  - inv H4; simpl; auto.
+  - inv H4; simpl; auto.
+  (* selectl *)
+  - inv H4; trivial. cbn.
+    destruct (Int.eq i Int.one).
+    + auto using Val.normalize_inject.
+    + destruct (Int.eq i Int.zero); cbn; auto using Val.normalize_inject.
 Qed.
 
 Lemma eval_addressing_inj:
@@ -1256,7 +1919,10 @@ Lemma eval_addressing_inj:
   eval_addressing ge1 sp1 addr vl1 = Some v1 ->
   exists v2, eval_addressing ge2 sp2 addr vl2 = Some v2 /\ Val.inject f v1 v2.
 Proof.
-  unfold eval_addressing; intros. destruct Archi.ptr64; eauto using eval_addressing32_inj, eval_addressing64_inj.
+  intros. destruct addr; simpl in H2; simpl; FuncInv; InvInject; TrivialExists.
+  apply Val.offset_ptr_inject; auto.
+  apply H; simpl; auto.
+  apply Val.offset_ptr_inject; auto. 
 Qed.
 
 Lemma eval_addressing_inj_none:
@@ -1273,127 +1939,6 @@ Proof.
   destruct addr; simpl in *;
   inv Hinjvl; trivial; try discriminate; inv H0; trivial; try discriminate; inv H2; trivial; try discriminate.
 Qed.
-
-Lemma eval_operation_inj:
-  forall op sp1 vl1 sp2 vl2 v1,
-  (forall id ofs,
-      In id (globals_operation op) ->
-      Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
-  Val.inject f sp1 sp2 ->
-  Val.inject_list f vl1 vl2 ->
-  eval_operation ge1 sp1 op vl1 m1 = Some v1 ->
-  exists v2, eval_operation ge2 sp2 op vl2 m2 = Some v2 /\ Val.inject f v1 v2.
-Proof.
-  intros until v1; intros GL; intros. destruct op; simpl in H1; simpl; FuncInv; InvInject; TrivialExists.
-  apply GL; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  apply Val.sub_inject; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. TrivialExists.
-  inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
-  inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. TrivialExists.
-  inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
-  inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
-  inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
-  inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
-  inv H4; simpl in H1; try discriminate. simpl.
-  destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
-  inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
-  inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto. destruct (Int.ltu n Int.iwordsize); auto.
-  inv H2; simpl; auto. destruct (Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize); auto.
-  eapply eval_addressing32_inj; eauto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  apply Val.addl_inject; auto.
-  apply Val.subl_inject; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2. TrivialExists.
-  inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
-  inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2. TrivialExists.
-  inv H4; inv H3; simpl in H1; inv H1. simpl.
-    destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
-  inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
-  inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
-  inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
-  inv H4; simpl in H1; try discriminate. simpl. destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
-  inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
-  inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
-  inv H4; simpl; auto.
-  eapply eval_addressing64_inj; eauto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; inv H2; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl; auto.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
-  exists (Vint i); auto.
-  inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
-  exists (Vint i); auto.
-  inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
-  exists (Vlong i); auto.
-  inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
-  exists (Vlong i); auto.
-  inv H4; simpl in H1; inv H1. simpl. TrivialExists.
-  subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
-  exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
-  destruct b; simpl; constructor.
-  simpl; constructor.
-  apply Val.select_inject; auto.  
-  destruct (eval_condition c vl1 m1) eqn:?; auto.
-  right; symmetry; eapply eval_condition_inj; eauto.
-Qed.
-
 End EVAL_COMPAT.
 
 (** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *)
@@ -1512,7 +2057,6 @@ Proof.
   inv H0; trivial; try discriminate;
   inv H2; trivial; try discriminate.
 Qed.
-
 End EVAL_LESSDEF.
 
 (** Compatibility of the evaluation functions with memory injections. *)
@@ -1562,9 +2106,10 @@ Proof.
   rewrite eval_shift_stack_addressing.
   eapply eval_addressing_inj with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
   intros. apply symbol_address_inject.
-  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
 Qed.
 
+
 Lemma eval_addressing_inject_none:
   forall addr vl1 vl2,
   Val.inject_list f vl1 vl2 ->
@@ -1595,7 +2140,7 @@ Proof.
   intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
   intros; eapply Mem.different_pointers_inject; eauto.
   intros. apply symbol_address_inject.
-  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
+  econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. 
 Qed.
 
 End EVAL_INJECT.
@@ -1609,8 +2154,8 @@ Definition builtin_arg_ok_1
   | OK_const, (BA_int _ | BA_long _ | BA_float _ | BA_single _) => true
   | OK_addrstack, BA_addrstack _ => true
   | OK_addressing, BA_addrstack _ => true
-  | OK_addressing, BA_addrglobal _ _ => true
-  | OK_addressing, BA_addptr (BA _) (BA_int _ | BA_long _) => true
+  | OK_addressing, BA_addptr (BA _) (BA_int _) => true
+  | OK_addressing, BA_addptr (BA _) (BA_long _) => true
   | _, _ => false
   end.
 
@@ -1619,4 +2164,4 @@ Definition builtin_arg_ok
   match ba with
   | (BA _ | BA_splitlong (BA _) (BA _)) => true
   | _ => builtin_arg_ok_1 ba c
-  end.  
+  end.
diff --git a/verilog/OpWeights.ml b/verilog/OpWeights.ml
new file mode 100644
index 00000000..66cf6ce9
--- /dev/null
+++ b/verilog/OpWeights.ml
@@ -0,0 +1,308 @@
+open Op
+open PrepassSchedulingOracleDeps
+
+module FU74 = struct
+  (* Attempt at modeling the FU74 (HiFive Unmatched board) core *)
+
+  let resource_bounds = [| 2; 1; 1; 1; 1 |]
+  (* issue ; LSU ; BU ; FPU ; IMUL/IDIV *)
+
+  let nr_non_pipelined_units = 1
+
+  (* divider *)
+
+  let latency_of_op (op : operation) (nargs : int) =
+    match op with
+    | OEmayundef _ -> 0
+    | Omove | Ointconst _ | Olongconst _
+    | Oaddrsymbol (_, _)
+    | Oaddrstack _ | Ocast8signed | Ocast16signed | Oadd | Oaddimm _ | Oneg
+    | Osub | Oand | Oandimm _ | Oor | Oorimm _ | Oxor | Oxorimm _ | Oshl
+    | Oshlimm _ | Oshr | Oshrimm _ | Oshru | Oshruimm _ | Oshrximm _ | Olowlong
+    | Ocast32signed | Ocast32unsigned | Oaddl | Oaddlimm _ | Onegl | Osubl
+    | Oandl | Oandlimm _ | Oorl | Oorlimm _ | Oxorl | Oxorlimm _ | Oshll
+    | Oshllimm _ | Oshrl | Oshrlimm _ | Oshrlu | Oshrluimm _ | Oshrxlimm _
+    | Oselectl | Obits_of_single | Obits_of_float | OEseqw _ | OEsnew _
+    | OEsequw _ | OEsneuw _ | OEsltw _ | OEsltuw _ | OEsltiw _ | OEsltiuw _
+    | OEaddiw (_, _)
+    | OEandiw _ | OEoriw _ | OExoriw _ | OEluiw _ | OEseql _ | OEsnel _
+    | OEsequl _ | OEsneul _ | OEsltl _ | OEsltul _ | OEsltil _ | OEsltiul _
+    | OEaddil (_, _)
+    | OEandil _ | OEoril _ | OExoril _ | OEluil _ | OEloadli _ ->
+        1
+    | Osingleconst _ | Ofloatconst _ | Onegf | Oabsf | Onegfs | Oabsfs
+    | Osingleoffloat | Ofloatofsingle | Ofloatofint | Ofloatofintu
+    | Osingleofint | Osingleofintu | Osingle_of_bits ->
+        2
+    | Omul | Omulhs | Omulhu | Omull | Omullhs | Omullhu -> 3
+    | Omulf -> 7
+    | Omulfs -> 5
+    | Ointoffloat | Ointuoffloat | Ointofsingle | Ointuofsingle | Olongoffloat
+    | Olonguoffloat | Olongofsingle | Olonguofsingle | Osingleoflong
+    | Osingleoflongu | OEfeqd | OEfltd | OEfled | OEfeqs | OEflts | OEfles ->
+        4
+    | Ofloatoflong | Ofloatoflongu | Ofloat_of_bits -> 6
+    | Oaddf | Osubf | Oaddfs | Osubfs -> 7
+    | Ocmp cond -> (
+        match cond with
+        | Ccomp _ | Ccompu _ | Ccompimm _ | Ccompuimm _ | Ccompl _ | Ccomplu _
+        | Ccomplimm _ | Ccompluimm _ | CEbeqw _ | CEbnew _ | CEbequw _
+        | CEbneuw _ | CEbltw _ | CEbltuw _ | CEbgew _ | CEbgeuw _ | CEbeql _
+        | CEbnel _ | CEbequl _ | CEbneul _ | CEbltl _ | CEbltul _ | CEbgel _
+        | CEbgeul _ ->
+            1
+        | Ccompf _ | Cnotcompf _ | Ccompfs _ | Cnotcompfs _ -> 4)
+    | Odiv | Odivu | Omod | Omodu | Odivl | Odivlu | Omodl | Omodlu | Odivf
+    | Odivfs ->
+        68
+    | _ -> 1
+
+  let resources_of_op (op : operation) (nargs : int) =
+    match op with
+    | OEmayundef _ -> [| 0; 0; 0; 0; 0 |]
+    | Omove | Ointconst _ | Olongconst _
+    | Oaddrsymbol (_, _)
+    | Oaddrstack _ | Ocast8signed | Ocast16signed | Oadd | Oaddimm _ | Oneg
+    | Osub | Oand | Oandimm _ | Oor | Oorimm _ | Oxor | Oxorimm _ | Oshl
+    | Oshlimm _ | Oshr | Oshrimm _ | Oshru | Oshruimm _ | Oshrximm _ | Olowlong
+    | Ocast32signed | Ocast32unsigned | Oaddl | Oaddlimm _ | Onegl | Osubl
+    | Oandl | Oandlimm _ | Oorl | Oorlimm _ | Oxorl | Oxorlimm _ | Oshll
+    | Oshllimm _ | Oshrl | Oshrlimm _ | Oshrlu | Oshrluimm _ | Oshrxlimm _
+    | Oselectl | Obits_of_single | Obits_of_float | OEseqw _ | OEsnew _
+    | OEsequw _ | OEsneuw _ | OEsltw _ | OEsltuw _ | OEsltiw _ | OEsltiuw _
+    | OEaddiw (_, _)
+    | OEandiw _ | OEoriw _ | OExoriw _ | OEluiw _ | OEseql _ | OEsnel _
+    | OEsequl _ | OEsneul _ | OEsltl _ | OEsltul _ | OEsltil _ | OEsltiul _
+    | OEaddil (_, _)
+    | OEandil _ | OEoril _ | OExoril _ | OEluil _ | OEloadli _ ->
+        [| 1; 0; 0; 0; 0 |]
+    | Omul | Omulhs | Omulhu | Omull | Omullhs | Omullhu | Odiv | Odivu | Omod
+    | Omodu | Odivl | Odivlu | Omodl | Omodlu ->
+        [| 1; 0; 0; 0; 1 |]
+    | Omulf | Omulfs | Ointoffloat | Ointuoffloat | Ointofsingle | Ointuofsingle
+    | Olongoffloat | Olonguoffloat | Olongofsingle | Olonguofsingle
+    | Osingleoflong | Osingleoflongu | OEfeqd | OEfltd | OEfled | OEfeqs
+    | OEflts | OEfles | Ofloatoflong | Ofloatoflongu | Ofloat_of_bits | Oaddf
+    | Osubf | Oaddfs | Osubfs | Osingleconst _ | Ofloatconst _ | Onegf | Oabsf
+    | Onegfs | Oabsfs | Osingleoffloat | Ofloatofsingle | Ofloatofint
+    | Ofloatofintu | Osingleofint | Osingleofintu | Osingle_of_bits | Odivf
+    | Odivfs ->
+        [| 1; 0; 0; 1; 0 |]
+    | Ocmp cond -> (
+        match cond with
+        | Ccomp _ | Ccompu _ | Ccompimm _ | Ccompuimm _ | Ccompl _ | Ccomplu _
+        | Ccomplimm _ | Ccompluimm _ | CEbeqw _ | CEbnew _ | CEbequw _
+        | CEbneuw _ | CEbltw _ | CEbltuw _ | CEbgew _ | CEbgeuw _ | CEbeql _
+        | CEbnel _ | CEbequl _ | CEbneul _ | CEbltl _ | CEbltul _ | CEbgel _
+        | CEbgeul _ ->
+            [| 1; 0; 1; 0; 0 |]
+        | Ccompf _ | Cnotcompf _ | Ccompfs _ | Cnotcompfs _ ->
+            [| 1; 0; 1; 1; 0 |])
+    | _ -> [| 1; 0; 0; 0; 0 |]
+
+  let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+    match op with
+    | Odiv | Odivu | Omod | Omodu | Odivl | Odivlu | Omodl | Omodlu | Odivf
+    | Odivfs ->
+        [| 68 |]
+    | _ -> [| -1 |]
+
+  let resources_of_cond (cond : condition) (nargs : int) =
+    match cond with
+    | Ccompf _ |Cnotcompf _|Ccompfs _|Cnotcompfs _ -> [| 1; 0; 1; 1; 0 |]
+    | _ -> [| 1; 0; 1; 0; 0 |]
+
+  let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3
+
+  let latency_of_call _ _ = 6
+
+  let resources_of_load trap chunk addressing nargs = [| 1; 1; 0; 0; 0 |]
+
+  let resources_of_store chunk addressing nargs = [| 1; 1; 0; 0; 0 |]
+
+  let resources_of_call _ _ = resource_bounds
+
+  let resources_of_builtin _ = resource_bounds
+end
+
+module Rocket = struct
+  (* Attempt at modeling the Rocket core *)
+
+  let resource_bounds = [| 1 |]
+
+  let nr_non_pipelined_units = 1
+
+  (* divider *)
+
+  let latency_of_op (op : operation) (nargs : int) =
+    match op with
+    | Omul | Omulhs | Omulhu | Omull | Omullhs | Omullhu -> 4
+    | Onegf -> 1 (*r [rd = - r1] *)
+    | Oabsf (*r [rd = abs(r1)] *)
+    | Oaddf (*r [rd = r1 + r2] *)
+    | Osubf (*r [rd = r1 - r2] *)
+    | Omulf ->
+        6 (*r [rd = r1 * r2] *)
+    | Onegfs -> 1 (*r [rd = - r1] *)
+    | Oabsfs (*r [rd = abs(r1)] *)
+    | Oaddfs (*r [rd = r1 + r2] *)
+    | Osubfs (*r [rd = r1 - r2] *)
+    | Omulfs ->
+        4 (*r [rd = r1 * r2] *)
+    | Osingleoffloat (*r [rd] is [r1] truncated to single-precision float *)
+    | Ofloatofsingle (*r [rd] is [r1] extended to double-precision float *)
+    (*c Conversions between int and float: *)
+    | Ofloatconst _ | Osingleconst _
+    | Ointoffloat (*r [rd = signed_int_of_float64(r1)] *)
+    | Ointuoffloat (*r [rd = unsigned_int_of_float64(r1)] *)
+    | Ofloatofint (*r [rd = float64_of_signed_int(r1)] *)
+    | Ofloatofintu (*r [rd = float64_of_unsigned_int(r1)] *)
+    | Ointofsingle (*r [rd = signed_int_of_float32(r1)] *)
+    | Ointuofsingle (*r [rd = unsigned_int_of_float32(r1)] *)
+    | Osingleofint (*r [rd = float32_of_signed_int(r1)] *)
+    | Osingleofintu (*r [rd = float32_of_unsigned_int(r1)] *)
+    | Olongoffloat (*r [rd = signed_long_of_float64(r1)] *)
+    | Olonguoffloat (*r [rd = unsigned_long_of_float64(r1)] *)
+    | Ofloatoflong (*r [rd = float64_of_signed_long(r1)] *)
+    | Ofloatoflongu (*r [rd = float64_of_unsigned_long(r1)] *)
+    | Olongofsingle (*r [rd = signed_long_of_float32(r1)] *)
+    | Olonguofsingle (*r [rd = unsigned_long_of_float32(r1)] *)
+    | Osingleoflong (*r [rd = float32_of_signed_long(r1)] *)
+    | Osingleoflongu ->
+        2 (*r [rd = float32_of_unsigned_int(r1)] *)
+    | OEfeqd | OEfltd | OEfeqs | OEflts | OEfles | OEfled | Obits_of_single
+    | Obits_of_float | Osingle_of_bits | Ofloat_of_bits ->
+        2
+    | OEloadli _ -> 2
+    | Odiv | Odivu | Odivl | Odivlu -> 16
+    | Odivfs -> 35
+    | Odivf -> 50
+    | Ocmp cond -> (
+        match cond with
+        | Ccomp _ | Ccompu _ | Ccompimm _ | Ccompuimm _ | Ccompl _ | Ccomplu _
+        | Ccomplimm _ | Ccompluimm _ | CEbeqw _ | CEbnew _ | CEbequw _
+        | CEbneuw _ | CEbltw _ | CEbltuw _ | CEbgew _ | CEbgeuw _ | CEbeql _
+        | CEbnel _ | CEbequl _ | CEbneul _ | CEbltl _ | CEbltul _ | CEbgel _
+        | CEbgeul _ ->
+            1
+        | Ccompf _ | Cnotcompf _ -> 2
+        | Ccompfs _ | Cnotcompfs _ -> 2)
+    | OEmayundef _ -> 0
+    | _ -> 1
+
+  let resources_of_op (op : operation) (nargs : int) =
+    match op with OEmayundef _ -> [| 0 |] | _ -> resource_bounds
+
+  let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+    match op with
+    | Odiv | Odivu -> [| 29 |]
+    | Odivfs -> [| 20 |]
+    | Odivl | Odivlu | Odivf -> [| 50 |]
+    | _ -> [| -1 |]
+
+  let resources_of_cond (cond : condition) (nargs : int) = resource_bounds
+
+  let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3
+
+  let latency_of_call _ _ = 6
+
+  let resources_of_load trap chunk addressing nargs = resource_bounds
+
+  let resources_of_store chunk addressing nargs = resource_bounds
+
+  let resources_of_call _ _ = resource_bounds
+
+  let resources_of_builtin _ = resource_bounds
+end
+
+module SweRV_EH1 = struct
+  (* Attempt at modeling SweRV EH1
+     [| issues ; LSU ; multiplier |] *)
+  let resource_bounds = [| 2; 1; 1 |]
+
+  let nr_non_pipelined_units = 1
+
+  (* divider *)
+
+  let latency_of_op (op : operation) (nargs : int) =
+    match op with
+    | Omul | Omulhs | Omulhu | Omull | Omullhs | Omullhu -> 3
+    | Odiv | Odivu | Odivl | Odivlu -> 16
+    | _ -> 1
+
+  let resources_of_op (op : operation) (nargs : int) =
+    match op with
+    | Omul | Omulhs | Omulhu | Omull | Omullhs | Omullhu -> [| 1; 0; 1 |]
+    | Odiv | Odivu | Odivl | Odivlu -> [| 0; 0; 0 |]
+    | _ -> [| 1; 0; 0 |]
+
+  let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+    match op with
+    | Odiv | Odivu -> [| 29 |]
+    | Odivfs -> [| 20 |]
+    | Odivl | Odivlu | Odivf -> [| 50 |]
+    | _ -> [| -1 |]
+
+  let resources_of_cond (cond : condition) (nargs : int) = [| 1; 0; 0 |]
+
+  let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3
+
+  let latency_of_call _ _ = 6
+
+  let resources_of_load trap chunk addressing nargs = [| 1; 1; 0 |]
+
+  let resources_of_store chunk addressing nargs = [| 1; 1; 0 |]
+
+  let resources_of_call _ _ = resource_bounds
+
+  let resources_of_builtin _ = resource_bounds
+end
+
+let get_opweights () : opweights =
+  match !Clflags.option_mtune with
+  | "rocket" | "" ->
+      {
+        pipelined_resource_bounds = Rocket.resource_bounds;
+        nr_non_pipelined_units = Rocket.nr_non_pipelined_units;
+        latency_of_op = Rocket.latency_of_op;
+        resources_of_op = Rocket.resources_of_op;
+        non_pipelined_resources_of_op = Rocket.non_pipelined_resources_of_op;
+        latency_of_load = Rocket.latency_of_load;
+        resources_of_load = Rocket.resources_of_load;
+        resources_of_store = Rocket.resources_of_store;
+        resources_of_cond = Rocket.resources_of_cond;
+        latency_of_call = Rocket.latency_of_call;
+        resources_of_call = Rocket.resources_of_call;
+        resources_of_builtin = Rocket.resources_of_builtin;
+      }
+  | "SweRV_EH1" | "EH1" ->
+      {
+        pipelined_resource_bounds = SweRV_EH1.resource_bounds;
+        nr_non_pipelined_units = SweRV_EH1.nr_non_pipelined_units;
+        latency_of_op = SweRV_EH1.latency_of_op;
+        resources_of_op = SweRV_EH1.resources_of_op;
+        non_pipelined_resources_of_op = SweRV_EH1.non_pipelined_resources_of_op;
+        latency_of_load = SweRV_EH1.latency_of_load;
+        resources_of_load = SweRV_EH1.resources_of_load;
+        resources_of_store = SweRV_EH1.resources_of_store;
+        resources_of_cond = SweRV_EH1.resources_of_cond;
+        latency_of_call = SweRV_EH1.latency_of_call;
+        resources_of_call = SweRV_EH1.resources_of_call;
+        resources_of_builtin = SweRV_EH1.resources_of_builtin;
+      }
+  | "FU74" | "sifive-u74" ->
+      {
+        pipelined_resource_bounds = FU74.resource_bounds;
+        nr_non_pipelined_units = FU74.nr_non_pipelined_units;
+        latency_of_op = FU74.latency_of_op;
+        resources_of_op = FU74.resources_of_op;
+        non_pipelined_resources_of_op = FU74.non_pipelined_resources_of_op;
+        latency_of_load = FU74.latency_of_load;
+        resources_of_load = FU74.resources_of_load;
+        resources_of_store = FU74.resources_of_store;
+        resources_of_cond = FU74.resources_of_cond;
+        latency_of_call = FU74.latency_of_call;
+        resources_of_call = FU74.resources_of_call;
+        resources_of_builtin = FU74.resources_of_builtin;
+      }
+  | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx)
diff --git a/verilog/PrepassSchedulingOracle.ml b/verilog/PrepassSchedulingOracle.ml
index 42a3da23..53a81095 100644
--- a/verilog/PrepassSchedulingOracle.ml
+++ b/verilog/PrepassSchedulingOracle.ml
@@ -1,6 +1,485 @@
+open AST
 open RTL
+open Maps
+open InstructionScheduler
 open Registers
+open PrepassSchedulingOracleDeps
+   
+let use_alias_analysis () = false
+                          
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64 
+| Many64 -> 8;;
 
-(* Do not do anything *)
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+  let last_reg_reads : int list PTree.t ref = ref PTree.empty
+  and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+  and last_mem_reads : int list ref = ref []
+  and last_mem_write : int option ref = ref None
+  and last_branch : int option ref = ref None
+  and last_non_pipelined_op : int array = Array.make
+                                        opweights.nr_non_pipelined_units ( -1 )
+  and latency_constraints : latency_constraint list ref = ref [] in
+  let add_constraint instr_from instr_to latency =
+    assert (instr_from <= instr_to);
+    assert (latency >= 0);
+    if instr_from = instr_to
+    then (if latency = 0
+          then ()
+          else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+    else
+      latency_constraints :=
+        { instr_from = instr_from;
+          instr_to = instr_to;
+          latency = latency
+        }:: !latency_constraints
+  and get_last_reads reg =
+    match PTree.get reg !last_reg_reads
+     with Some l -> l
+        | None -> [] in
+  let add_input_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Read after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        last_mem_reads := i :: !last_mem_reads
+      end
+  and add_output_mem i =
+    if not (use_alias_analysis ())
+    then
+      begin
+        begin
+          (* Write after write *)
+          match !last_mem_write with
+          | None -> ()
+          | Some j -> add_constraint j i 1
+        end;
+        (* Write after read *)
+        List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+        last_mem_write := Some i;
+        last_mem_reads := []
+      end
+  and add_input_reg i reg =
+    begin
+      (* Read after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, latency) -> add_constraint j i latency
+    end;
+    last_reg_reads := PTree.set reg
+                       (i :: get_last_reads reg)
+                       !last_reg_reads
+  and add_output_reg i latency reg =
+    begin
+      (* Write after write *)
+      match PTree.get reg !last_reg_write with
+      | None -> ()
+      | Some (j, _) -> add_constraint j i 1
+    end;
+    begin
+      (* Write after read *)
+      List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+    end;
+    last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+    last_reg_reads := PTree.remove reg !last_reg_reads
+    in
+  let add_input_regs i regs = List.iter (add_input_reg i) regs in
+  let rec add_builtin_res i (res : reg builtin_res) =
+    match res with
+    | BR r -> add_output_reg i 10 r
+    | BR_none -> ()
+    | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+                               add_builtin_res i lo in
+  let rec add_builtin_arg i (ba : reg builtin_arg) =
+    match ba with
+    | BA r -> add_input_reg i r
+    | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+    | BA_loadstack(_,_) -> add_input_mem i
+    | BA_addrstack _ -> ()
+    | BA_loadglobal(_, _, _) -> add_input_mem i
+    | BA_addrglobal _ -> ()
+    | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+                              add_builtin_arg i lo
+    | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+                           add_builtin_arg i a2 in
+  let irreversible_action i =
+    match !last_branch with
+    | None -> ()
+    | Some j -> add_constraint j i 1 in
+  let set_branch i =
+    irreversible_action i;
+    last_branch := Some i in
+  let add_non_pipelined_resources i resources =
+    Array.iter2
+      (fun latency last ->
+        if latency >= 0 && last >= 0 then add_constraint last i latency)
+      resources last_non_pipelined_op;
+    Array.iteri (fun rsc latency ->
+        if latency >= 0
+        then last_non_pipelined_op.(rsc) <- i) resources
+  in
+  Array.iteri
+    begin
+      fun i (insn, other_uses) ->
+      List.iter (fun use ->
+          add_input_reg i use)
+        (Regset.elements other_uses);
+      
+      match insn with
+      | Inop _ -> ()
+      | Iop(op, inputs, output, _) ->
+         add_non_pipelined_resources i
+           (opweights.non_pipelined_resources_of_op op (List.length inputs));
+         (if Op.is_trapping_op op then irreversible_action i);
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+      | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+         (if trap=TRAP then irreversible_action i);
+         add_input_mem i;
+         add_input_regs i addr_regs;
+         add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+      | Istore(chunk, addressing, addr_regs, input, _) ->
+         irreversible_action i;
+         add_input_regs i addr_regs;
+         add_input_reg i input;
+         add_output_mem i
+      | Icall(signature, ef, inputs, output, _) ->
+         set_branch i;
+         (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         add_output_reg i (opweights.latency_of_call signature ef) output;
+         add_output_mem i;
+         failwith "Icall"
+      | Itailcall(signature, ef, inputs) ->
+         set_branch i;
+        (match ef with
+          | Datatypes.Coq_inl r -> add_input_reg i r
+          | Datatypes.Coq_inr symbol -> ()
+         );
+         add_input_mem i;
+         add_input_regs i inputs;
+         failwith "Itailcall"
+      | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+         set_branch i;
+         add_input_mem i;
+         List.iter (add_builtin_arg i) builtin_inputs;
+         add_builtin_res i builtin_output;
+         add_output_mem i;
+         failwith "Ibuiltin"
+      | Icond(cond, inputs, _, _, _) ->
+         set_branch i;
+         add_input_mem i;
+         add_input_regs i inputs
+      | Ijumptable(input, _) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ijumptable"
+      | Ireturn(Some input) ->
+         set_branch i;
+         add_input_reg i input;
+         failwith "Ireturn"
+      | Ireturn(None) ->
+         set_branch i;
+         failwith "Ireturn none"
+    end seqa;
+  !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+  | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+  | Iop(op, inputs, output, _) ->
+     opweights.resources_of_op op (List.length inputs)
+  | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+     opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+  | Istore(chunk, addressing, addr_regs, input, _) ->
+     opweights.resources_of_store chunk addressing (List.length addr_regs)
+  | Icall(signature, ef, inputs, output, _) ->
+     opweights.resources_of_call signature ef
+  | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+     opweights.resources_of_builtin ef
+  | Icond(cond, args, _, _ , _) ->
+     opweights.resources_of_cond cond (List.length args)
+  | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+    
+let print_sequence pp (seqa : instruction array) =
+  Array.iteri (
+      fun i (insn : instruction) ->
+      PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+               
+type 'a symbolic_term_node =
+  | STop of Op.operation * 'a list
+  | STinitial_reg of int
+  | STother of int;;
+
+type symbolic_term = {
+    hash_id : unique_id;
+    hash_ct : symbolic_term symbolic_term_node
+  };;
+
+let rec print_term channel term =
+  match term.hash_ct with
+  | STop(op, args) ->
+     PrintOp.print_operation print_term channel (op, args)
+  | STinitial_reg n -> Printf.fprintf channel "x%d" n
+  | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+    st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+    mutable st_next_id : unique_id };;
+
+let hash_init () = {
+    st_table = Hashtbl.create 20;
+    st_next_id = 0
+  };;
+
+let ground_to_id = function
+  | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+  | STinitial_reg r -> STinitial_reg r
+  | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+  let grounded = ground_to_id term in
+  match Hashtbl.find_opt table.st_table grounded with
+  | Some x -> x
+  | None ->
+     let term' = { hash_id = table.st_next_id;
+                   hash_ct = term } in
+     (if table.st_next_id = max_int then failwith "hash: max_int");
+     table.st_next_id <- table.st_next_id + 1;
+     Hashtbl.add table.st_table grounded term';
+     term';;
+
+type access = {
+    base : symbolic_term;
+    offset : int64;
+    length : int
+  };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+  match addressing, args with
+  | (Op.Aindexed ofs), [reg] -> Some
+     { base = get_reg reg;
+       offset = Camlcoq.camlint64_of_ptrofs ofs;
+       length = length_of_chunk chunk
+     }
+  | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+  let regs = ref PTree.empty
+  and table = hash_init() in
+  let assign reg term = regs := PTree.set reg term !regs
+  and hash term = hash_node table term in
+  let get_reg reg =
+    match PTree.get reg !regs with
+    | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+    | Some x -> x in
+  let targets = Array.make (Array.length seqa) None in
+  Array.iteri
+    begin
+      fun i insn ->
+      match insn with
+      | Iop(Op.Omove, [input], output, _) ->
+         assign output (get_reg input)
+      | Iop(op, inputs, output, _) ->
+         assign output (hash (STop(op, List.map get_reg inputs)))
+
+      | Iload(trap, chunk, addressing, args, output, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access;
+         assign output (hash (STother(i)))
+        
+      | Icall(_, _, _, output, _)
+      | Ibuiltin(_, _, BR output, _) -> 
+         assign output (hash (STother(i)))
+        
+      | Istore(chunk, addressing, args, va, _) ->
+         let access = access_of_addressing get_reg chunk addressing args in
+         targets.(i) <- access
+                                          
+      | Inop _ -> ()
+      | Ibuiltin(_, _, BR_none, _) -> ()
+      | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+      | Itailcall (_, _, _)
+      |Icond (_, _, _, _, _)
+      |Ijumptable (_, _)
+      |Ireturn _ -> ()
+    end seqa;
+  targets;;
+
+let print_access channel = function
+  | None -> Printf.fprintf channel "any"
+  | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+  let targets = symbolic_execution seqa in
+  Array.iteri
+    (fun i insn ->
+      match insn with
+      | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+                      i print_access targets.(i) 
+      | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+                       i print_access targets.(i)
+      | _ -> ()
+    ) seqa;;
+
+let may_overlap a0 b0 =
+  match a0, b0 with
+  | (None, _)  | (_ , None) -> true
+  | (Some a), (Some b) ->
+     if term_equal a.base b.base
+     then (max a.offset b.offset) <
+          (min (Int64.add (Int64.of_int a.length) a.offset)
+               (Int64.add (Int64.of_int b.length) b.offset))
+     else match a.base.hash_ct, b.base.hash_ct with
+          | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+            STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+             (ida=idb) &&
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | STop(Op.Oaddrstack _, []),
+            STop(Op.Oaddrsymbol _, [])
+          | STop(Op.Oaddrsymbol _, []),
+            STop(Op.Oaddrstack _, []) -> false
+          | STop(Op.Oaddrstack(ofsa),[]),
+            STop(Op.Oaddrstack(ofsb),[]) ->
+               let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+               and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+               (max ao bo) <
+               (min (Int64.add (Int64.of_int a.length) ao)
+                  (Int64.add (Int64.of_int b.length) bo))
+          | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+  let targets = symbolic_execution seqa
+  and deps = ref [] in
+  let add_constraint instr_from instr_to latency =
+    deps := { instr_from = instr_from;
+              instr_to = instr_to;
+              latency = latency
+            }:: !deps in
+  for i=0 to (Array.length seqa)-1
+  do
+    for j=0 to i-1
+    do
+      match seqa.(j), seqa.(i) with
+      | (Istore _), ((Iload _) | (Istore _)) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 1
+      | (Iload _), (Istore _) ->
+         if may_overlap targets.(j) targets.(i)
+         then add_constraint j i 0
+      | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+      | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+         add_constraint j i 1
+      | (Inop _ | Iop _), _
+      | _, (Inop _ | Iop _)
+      | (Iload _), (Iload _) -> ()
+    done
+  done;
+  !deps;;
+ *)
+
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
+      (typing : RTLtyping.regenv) reference_counting seqa =
+  let simple_deps = get_simple_dependencies opweights seqa in
+  { max_latency = -1;
+    resource_bounds = opweights.pipelined_resource_bounds;
+    live_regs_entry = live_entry_regs;
+    typing = typing;
+    reference_counting = Some reference_counting;
+      instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+    latency_constraints =
+      (* if (use_alias_analysis ())
+      then (get_alias_dependencies seqa) @ simple_deps
+      else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+  let nr_instructions = get_nr_instructions problem in
+  assert(nr_instructions = (Array.length early_ones));
+  match list_scheduler problem with
+  | Some fwd_schedule ->
+     let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+     let constraints' = ref problem.latency_constraints in
+     Array.iteri (fun i is_early ->
+         if is_early then
+           constraints' :=  {
+             instr_from = i;
+             instr_to = nr_instructions ;
+             latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+       early_ones;
+     validated_scheduler reverse_list_scheduler
+       { problem with latency_constraints = !constraints' }
+  | None -> None;;
+  
+let prepass_scheduler_by_name name problem early_ones =
+  match name with
+  | "zigzag" -> zigzag_scheduler problem early_ones
+  | _ -> scheduler_by_name name problem
+  
 let schedule_sequence (seqa : (instruction*Regset.t) array)
-      live_regs_entry typing reference = None
+      (live_regs_entry : Registers.Regset.t)
+      (typing : RTLtyping.regenv)
+      reference =
+  let opweights = OpWeights.get_opweights () in
+  try
+    if (Array.length seqa) <= 1
+    then None
+    else
+      begin
+      let nr_instructions = Array.length seqa in
+      (if !Clflags.option_debug_compcert > 6
+       then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+      let problem = define_problem opweights live_regs_entry
+                      typing reference seqa in
+      (if !Clflags.option_debug_compcert > 7
+       then (print_sequence stdout (Array.map fst seqa);
+             print_problem stdout problem));
+      match prepass_scheduler_by_name
+              (!Clflags.option_fprepass_sched)
+              problem
+              (Array.map (fun (ins, _) ->
+                   match ins with
+                   | Icond _ -> true
+                   | _ -> false) seqa) with
+      | None -> Printf.printf "no solution in prepass scheduling\n";
+                None
+      | Some solution ->
+         let positions = Array.init nr_instructions (fun i -> i) in
+         Array.sort (fun i j ->
+             let si = solution.(i) and sj = solution.(j) in
+             if si < sj then -1
+             else if si > sj then 1
+             else i - j) positions;
+         Some positions
+    end
+  with (Failure s) ->
+    Printf.printf "failure in prepass scheduling: %s\n" s;
+    None;;
+                                                            
diff --git a/verilog/PrepassSchedulingOracleDeps.ml b/verilog/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/verilog/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+  
+type opweights =
+  {
+    pipelined_resource_bounds : int array;
+    nr_non_pipelined_units : int;
+    latency_of_op : Op.operation -> int -> int;
+    resources_of_op : Op.operation -> int -> int array;
+    non_pipelined_resources_of_op : Op.operation -> int -> int array;
+    latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+    resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+    resources_of_cond : Op.condition -> int -> int array;
+    latency_of_call : AST.signature -> called_function -> int;
+    resources_of_call : AST.signature -> called_function -> int array;
+    resources_of_builtin : AST.external_function -> int array
+  };;
diff --git a/verilog/PrintOp.ml b/verilog/PrintOp.ml
index 6aa4d450..0d47192a 100644
--- a/verilog/PrintOp.ml
+++ b/verilog/PrintOp.ml
@@ -3,11 +3,16 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
 (** Pretty-printing of operators, conditions, addressing modes *)
@@ -25,6 +30,21 @@ let comparison_name = function
   | Cgt -> ">"
   | Cge -> ">="
 
+let mu_name pp = function
+  | MUint -> fprintf pp "MUint"
+  | MUlong -> fprintf pp "MUlong"
+  | MUshrx i -> fprintf pp "MUshrx(%ld)" (camlint_of_coqint i)
+  | MUshrxl i -> fprintf pp "MUshrxl(%ld)" (camlint_of_coqint i)
+
+let get_optR_s c reg pp r1 r2 = function
+  | None -> fprintf pp "(%a %s %a)" reg r1 (comparison_name c) reg r2
+  | Some X0_L -> fprintf pp "(X0 %s %a)" (comparison_name c) reg r1
+  | Some X0_R -> fprintf pp "(%a %s X0)" reg r1 (comparison_name c)
+
+let get_optR_a pp = function
+  | None -> failwith "PrintOp: None in get_optR_a instruction (problem with RTL expansions?)"
+  | Some X0_L | Some X0_R -> fprintf pp "X0"
+
 let print_condition reg pp = function
   | (Ccomp c, [r1;r2]) ->
       fprintf pp "%a %ss %a" reg r1 (comparison_name c) reg r2
@@ -33,7 +53,9 @@ let print_condition reg pp = function
   | (Ccompimm(c, n), [r1]) ->
       fprintf pp "%a %ss %ld" reg r1 (comparison_name c) (camlint_of_coqint n)
   | (Ccompuimm(c, n), [r1]) ->
-      fprintf pp "%a %su %lu" reg r1 (comparison_name c) (camlint_of_coqint n)
+      fprintf pp "%a %su %ld" reg r1 (comparison_name c) (camlint_of_coqint n)
+  | (Ccompf c, [r1;r2]) ->
+      fprintf pp "%a %sf %a" reg r1 (comparison_name c) reg r2
   | (Ccompl c, [r1;r2]) ->
       fprintf pp "%a %sls %a" reg r1 (comparison_name c) reg r2
   | (Ccomplu c, [r1;r2]) ->
@@ -42,53 +64,66 @@ let print_condition reg pp = function
       fprintf pp "%a %sls %Ld" reg r1 (comparison_name c) (camlint64_of_coqint n)
   | (Ccompluimm(c, n), [r1]) ->
       fprintf pp "%a %slu %Lu" reg r1 (comparison_name c) (camlint64_of_coqint n)
-  | (Ccompf c, [r1;r2]) ->
-      fprintf pp "%a %sf %a" reg r1 (comparison_name c) reg r2
   | (Cnotcompf c, [r1;r2]) ->
       fprintf pp "%a not(%sf) %a" reg r1 (comparison_name c) reg r2
   | (Ccompfs c, [r1;r2]) ->
       fprintf pp "%a %sfs %a" reg r1 (comparison_name c) reg r2
   | (Cnotcompfs c, [r1;r2]) ->
       fprintf pp "%a not(%sfs) %a" reg r1 (comparison_name c) reg r2
-  | (Cmaskzero n, [r1]) ->
-      fprintf pp "%a & 0x%lx == 0" reg r1 (camlint_of_coqint n)
-  | (Cmasknotzero n, [r1]) ->
-      fprintf pp "%a & 0x%lx != 0" reg r1 (camlint_of_coqint n)
+  | (CEbeqw optR, [r1;r2]) ->
+      fprintf pp "CEbeqw"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | (CEbnew optR, [r1;r2]) ->
+      fprintf pp "CEbnew"; (get_optR_s Cne reg pp r1 r2 optR)
+  | (CEbequw optR, [r1;r2]) ->
+      fprintf pp "CEbequw"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | (CEbneuw optR, [r1;r2]) ->
+      fprintf pp "CEbneuw"; (get_optR_s Cne reg pp r1 r2 optR)
+  | (CEbltw optR, [r1;r2]) ->
+      fprintf pp "CEbltw"; (get_optR_s Clt reg pp r1 r2 optR)
+  | (CEbltuw optR, [r1;r2]) ->
+      fprintf pp "CEbltuw"; (get_optR_s Clt reg pp r1 r2 optR)
+  | (CEbgew optR, [r1;r2]) ->
+      fprintf pp "CEbgew"; (get_optR_s Cge reg pp r1 r2 optR)
+  | (CEbgeuw optR, [r1;r2]) ->
+      fprintf pp "CEbgeuw"; (get_optR_s Cge reg pp r1 r2 optR)
+  | (CEbeql optR, [r1;r2]) ->
+      fprintf pp "CEbeql"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | (CEbnel optR, [r1;r2]) ->
+      fprintf pp "CEbnel"; (get_optR_s Cne reg pp r1 r2 optR)
+  | (CEbequl optR, [r1;r2]) ->
+      fprintf pp "CEbequl"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | (CEbneul optR, [r1;r2]) ->
+      fprintf pp "CEbneul"; (get_optR_s Cne reg pp r1 r2 optR)
+  | (CEbltl optR, [r1;r2]) ->
+      fprintf pp "CEbltl"; (get_optR_s Clt reg pp r1 r2 optR)
+  | (CEbltul optR, [r1;r2]) ->
+      fprintf pp "CEbltul"; (get_optR_s Clt reg pp r1 r2 optR)
+  | (CEbgel optR, [r1;r2]) ->
+      fprintf pp "CEbgel"; (get_optR_s Cge reg pp r1 r2 optR)
+  | (CEbgeul optR, [r1;r2]) ->
+      fprintf pp "CEbgeul"; (get_optR_s Cge reg pp r1 r2 optR)
   | _ ->
       fprintf pp "<bad condition>"
 
-let print_addressing reg pp = function
-  | Aindexed n, [r1] ->
-      fprintf pp "%a + %s" reg r1 (Z.to_string n)
-  | Aindexed2 n, [r1; r2] ->
-      fprintf pp "%a + %a + %s" reg r1 reg r2 (Z.to_string n)
-  | Ascaled(sc,n), [r1] ->
-      fprintf pp "%a * %s + %s" reg r1 (Z.to_string sc) (Z.to_string n)
-  | Aindexed2scaled(sc, n), [r1; r2] ->
-      fprintf pp "%a + %a * %s + %s" reg r1 reg r2 (Z.to_string sc) (Z.to_string n)
-  | Aglobal(id, ofs), [] -> fprintf pp "%s + %s" (extern_atom id) (Z.to_string ofs)
-  | Abased(id, ofs), [r1] -> fprintf pp "%s + %s + %a" (extern_atom id) (Z.to_string ofs) reg r1
-  | Abasedscaled(sc,id, ofs), [r1] -> fprintf pp "%s + %s + %a * %ld" (extern_atom id) (Z.to_string ofs) reg r1 (camlint_of_coqint sc)
-  | Ainstack ofs, [] -> fprintf pp "stack(%s)" (Z.to_string ofs)
-  | _ -> fprintf pp "<bad addressing>"
-
 let print_operation reg pp = function
   | Omove, [r1] -> reg pp r1
-  | Ointconst n, [] -> fprintf pp "%ld" (camlint_of_coqint n)
-  | Olongconst n, [] -> fprintf pp "%LdL" (camlint64_of_coqint n)
-  | Ofloatconst n, [] -> fprintf pp "%.15F" (camlfloat_of_coqfloat n)
-  | Osingleconst n, [] -> fprintf pp "%.15Ff" (camlfloat_of_coqfloat32 n)
-  | Oindirectsymbol id, [] -> fprintf pp "&%s" (extern_atom id)
+  | Ointconst n, [] -> fprintf pp "Ointconst(%ld)" (camlint_of_coqint n)
+  | Olongconst n, [] -> fprintf pp "Olongconst(%LdL)" (camlint64_of_coqint n)
+  | Ofloatconst n, [] -> fprintf pp "Ofloatconst(%F)" (camlfloat_of_coqfloat n)
+  | Osingleconst n, [] -> fprintf pp "Osingleconst(%Ff)" (camlfloat_of_coqfloat32 n)
+  | Oaddrsymbol(id, ofs), [] ->
+      fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs)
+  | Oaddrstack ofs, [] ->
+      fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs)
   | Ocast8signed, [r1] -> fprintf pp "int8signed(%a)" reg r1
-  | Ocast8unsigned, [r1] -> fprintf pp "int8unsigned(%a)" reg r1
   | Ocast16signed, [r1] -> fprintf pp "int16signed(%a)" reg r1
-  | Ocast16unsigned, [r1] -> fprintf pp "int16unsigned(%a)" reg r1
-  | Oneg, [r1] -> fprintf pp "(- %a)" reg r1
+  | Oadd, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2
+  | Oaddimm n, [r1] -> fprintf pp "%a + %ld" reg r1 (camlint_of_coqint n)
+  | Oneg, [r1] -> fprintf pp "-(%a)" reg r1
   | Osub, [r1;r2] -> fprintf pp "%a - %a" reg r1 reg r2
   | Omul, [r1;r2] -> fprintf pp "%a * %a" reg r1 reg r2
-  | Omulimm n, [r1] -> fprintf pp "%a * %ld" reg r1 (camlint_of_coqint n)
-  | Omulhs, [r1;r2] -> fprintf pp "mulhs(%a,%a)" reg r1 reg r2
-  | Omulhu, [r1;r2] -> fprintf pp "mulhu(%a,%a)" reg r1 reg r2
+  | Omulhs, [r1;r2] -> fprintf pp "%a *hs %a" reg r1 reg r2
+  | Omulhu, [r1;r2] -> fprintf pp "%a *hu %a" reg r1 reg r2
   | Odiv, [r1;r2] -> fprintf pp "%a /s %a" reg r1 reg r2
   | Odivu, [r1;r2] -> fprintf pp "%a /u %a" reg r1 reg r2
   | Omod, [r1;r2] -> fprintf pp "%a %%s %a" reg r1 reg r2
@@ -99,28 +134,26 @@ let print_operation reg pp = function
   | Oorimm n, [r1] ->  fprintf pp "%a | %ld" reg r1 (camlint_of_coqint n)
   | Oxor, [r1;r2] -> fprintf pp "%a ^ %a" reg r1 reg r2
   | Oxorimm n, [r1] -> fprintf pp "%a ^ %ld" reg r1 (camlint_of_coqint n)
-  | Onot, [r1] -> fprintf pp "not(%a)" reg r1
   | Oshl, [r1;r2] -> fprintf pp "%a << %a" reg r1 reg r2
   | Oshlimm n, [r1] -> fprintf pp "%a << %ld" reg r1 (camlint_of_coqint n)
   | Oshr, [r1;r2] -> fprintf pp "%a >>s %a" reg r1 reg r2
   | Oshrimm n, [r1] -> fprintf pp "%a >>s %ld" reg r1 (camlint_of_coqint n)
-  | Oshrximm n, [r1] -> fprintf pp "%a >>x %ld" reg r1 (camlint_of_coqint n)
   | Oshru, [r1;r2] -> fprintf pp "%a >>u %a" reg r1 reg r2
   | Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n)
-  | Ororimm n, [r1] -> fprintf pp "%a ror %ld" reg r1 (camlint_of_coqint n)
-  | Oshldimm n, [r1;r2] -> fprintf pp "(%a, %a) << %ld" reg r1 reg r2 (camlint_of_coqint n)
-  | Olea addr, args -> print_addressing reg pp (addr, args); fprintf pp " (int)"
+  | Oshrximm n, [r1] -> fprintf pp "%a >>x %ld" reg r1 (camlint_of_coqint n)
+
   | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
   | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
   | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
   | Ocast32signed, [r1] -> fprintf pp "long32signed(%a)" reg r1
   | Ocast32unsigned, [r1] -> fprintf pp "long32unsigned(%a)" reg r1
-  | Onegl, [r1] -> fprintf pp "(-l %a)" reg r1
+  | Oaddl, [r1;r2] -> fprintf pp "%a +l %a" reg r1 reg r2
+  | Oaddlimm n, [r1] -> fprintf pp "%a +l %Ld" reg r1 (camlint64_of_coqint n)
+  | Onegl, [r1] -> fprintf pp "-l (%a)" reg r1
   | Osubl, [r1;r2] -> fprintf pp "%a -l %a" reg r1 reg r2
   | Omull, [r1;r2] -> fprintf pp "%a *l %a" reg r1 reg r2
-  | Omullimm n, [r1] -> fprintf pp "%a *l %Ld" reg r1 (camlint64_of_coqint n)
-  | Omullhs, [r1;r2] -> fprintf pp "mullhs(%a,%a)" reg r1 reg r2
-  | Omullhu, [r1;r2] -> fprintf pp "mullhu(%a,%a)" reg r1 reg r2
+  | Omullhs, [r1;r2] -> fprintf pp "%a *lhs %a" reg r1 reg r2
+  | Omullhu, [r1;r2] -> fprintf pp "%a *lhu %a" reg r1 reg r2
   | Odivl, [r1;r2] -> fprintf pp "%a /ls %a" reg r1 reg r2
   | Odivlu, [r1;r2] -> fprintf pp "%a /lu %a" reg r1 reg r2
   | Omodl, [r1;r2] -> fprintf pp "%a %%ls %a" reg r1 reg r2
@@ -131,16 +164,14 @@ let print_operation reg pp = function
   | Oorlimm n, [r1] ->  fprintf pp "%a |l %Ld" reg r1 (camlint64_of_coqint n)
   | Oxorl, [r1;r2] -> fprintf pp "%a ^l %a" reg r1 reg r2
   | Oxorlimm n, [r1] -> fprintf pp "%a ^l %Ld" reg r1 (camlint64_of_coqint n)
-  | Onotl, [r1] -> fprintf pp "notl(%a)" reg r1
   | Oshll, [r1;r2] -> fprintf pp "%a <<l %a" reg r1 reg r2
-  | Oshllimm n, [r1] -> fprintf pp "%a <<l %ld" reg r1 (camlint_of_coqint n)
+  | Oshllimm n, [r1] -> fprintf pp "%a <<l %Ld" reg r1 (camlint64_of_coqint n)
   | Oshrl, [r1;r2] -> fprintf pp "%a >>ls %a" reg r1 reg r2
   | Oshrlimm n, [r1] -> fprintf pp "%a >>ls %ld" reg r1 (camlint_of_coqint n)
-  | Oshrxlimm n, [r1] -> fprintf pp "%a >>lx %ld" reg r1 (camlint_of_coqint n)
   | Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2
   | Oshrluimm n, [r1] -> fprintf pp "%a >>lu %ld" reg r1 (camlint_of_coqint n)
-  | Ororlimm n, [r1] -> fprintf pp "%a rorl %ld" reg r1 (camlint_of_coqint n)
-  | Oleal addr, args -> print_addressing reg pp (addr, args); fprintf pp " (long)"
+  | Oshrxlimm n, [r1] -> fprintf pp "%a >>lx %ld" reg r1 (camlint_of_coqint n)
+
   | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1
   | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1
   | Oaddf, [r1;r2] -> fprintf pp "%a +f %a" reg r1 reg r2
@@ -156,18 +187,68 @@ let print_operation reg pp = function
   | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
   | Ofloatofsingle, [r1] -> fprintf pp "floatofsingle(%a)" reg r1
   | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
+  | Ointuoffloat, [r1] -> fprintf pp "intuoffloat(%a)" reg r1
   | Ofloatofint, [r1] -> fprintf pp "floatofint(%a)" reg r1
-  | Ointofsingle, [r1] -> fprintf pp "intofsingle(%a)" reg r1
-  | Osingleofint, [r1] -> fprintf pp "singleofint(%a)" reg r1
+  | Ofloatofintu, [r1] -> fprintf pp "floatofintu(%a)" reg r1
   | Olongoffloat, [r1] -> fprintf pp "longoffloat(%a)" reg r1
+  | Olonguoffloat, [r1] -> fprintf pp "longuoffloat(%a)" reg r1
   | Ofloatoflong, [r1] -> fprintf pp "floatoflong(%a)" reg r1
+  | Ofloatoflongu, [r1] -> fprintf pp "floatoflongu(%a)" reg r1
+  | Ointofsingle, [r1] -> fprintf pp "intofsingle(%a)" reg r1
+  | Ointuofsingle, [r1] -> fprintf pp "intuofsingle(%a)" reg r1
+  | Osingleofint, [r1] -> fprintf pp "singleofint(%a)" reg r1
+  | Osingleofintu, [r1] -> fprintf pp "singleofintu(%a)" reg r1
   | Olongofsingle, [r1] -> fprintf pp "longofsingle(%a)" reg r1
+  | Olonguofsingle, [r1] -> fprintf pp "longuofsingle(%a)" reg r1
   | Osingleoflong, [r1] -> fprintf pp "singleoflong(%a)" reg r1
+  | Osingleoflongu, [r1] -> fprintf pp "singleoflongu(%a)" reg r1
   | Ocmp c, args -> print_condition reg pp (c, args)
-  | Osel (c, ty), r1::r2::args ->
-      fprintf pp "%a ?%s %a : %a"
-         (print_condition reg) (c, args)
-         (PrintAST.name_of_type ty) reg r1 reg r2
+  | OEseqw optR, [r1;r2] -> fprintf pp "OEseqw"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | OEsnew optR, [r1;r2] -> fprintf pp "OEsnew"; (get_optR_s Cne reg pp r1 r2 optR)
+  | OEsequw optR, [r1;r2] -> fprintf pp "OEsequw"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | OEsneuw optR, [r1;r2] -> fprintf pp "OEsneuw"; (get_optR_s Cne reg pp r1 r2 optR)
+  | OEsltw optR, [r1;r2] -> fprintf pp "OEsltw"; (get_optR_s Clt reg pp r1 r2 optR)
+  | OEsltuw optR, [r1;r2] -> fprintf pp "OEsltuw"; (get_optR_s Clt reg pp r1 r2 optR)
+  | OEsltiw n, [r1] -> fprintf pp "OEsltiw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEsltiuw n, [r1] -> fprintf pp "OEsltiuw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OExoriw n, [r1] -> fprintf pp "OExoriw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEluiw n, _ -> fprintf pp "OEluiw(%ld)" (camlint_of_coqint n)
+  | OEaddiw (optR, n), [] -> fprintf pp "OEaddiw(%a,%ld)" get_optR_a optR (camlint_of_coqint n)
+  | OEaddiw (optR, n), [r1] -> fprintf pp "OEaddiw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEandiw n, [r1] -> fprintf pp "OEandiw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEoriw n, [r1] -> fprintf pp "OEoriw(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEseql optR, [r1;r2] -> fprintf pp "OEseql"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | OEsnel optR, [r1;r2] -> fprintf pp "OEsnel"; (get_optR_s Cne reg pp r1 r2 optR)
+  | OEsequl optR, [r1;r2] -> fprintf pp "OEsequl"; (get_optR_s Ceq reg pp r1 r2 optR)
+  | OEsneul optR, [r1;r2] -> fprintf pp "OEsneul"; (get_optR_s Cne reg pp r1 r2 optR)
+  | OEsltl optR, [r1;r2] -> fprintf pp "OEsltl"; (get_optR_s Clt reg pp r1 r2 optR)
+  | OEsltul optR, [r1;r2] -> fprintf pp "OEsltul"; (get_optR_s Clt reg pp r1 r2 optR)
+  | OEsltil n, [r1] -> fprintf pp "OEsltil(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEsltiul n, [r1] -> fprintf pp "OEsltiul(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OExoril n, [r1] -> fprintf pp "OExoril(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEluil n, _ -> fprintf pp "OEluil(%ld)" (camlint_of_coqint n)
+  | OEaddil (optR, n), [] -> fprintf pp "OEaddil(%a,%ld)" get_optR_a optR (camlint_of_coqint n)
+  | OEaddil (optR, n), [r1] -> fprintf pp "OEaddil(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEandil n, [r1] -> fprintf pp "OEandil(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEoril n, [r1] -> fprintf pp "OEoril(%a,%ld)" reg r1 (camlint_of_coqint n)
+  | OEloadli n, _ -> fprintf pp "OEloadli(%ld)" (camlint_of_coqint n)
+  | OEmayundef mu, [r1;r2] -> fprintf pp "OEmayundef (%a,%a,%a)" mu_name mu reg r1 reg r2
+  | OEfeqd, [r1;r2] -> fprintf pp "OEfeqd(%a,%s,%a)" reg r1 (comparison_name Ceq) reg r2
+  | OEfltd, [r1;r2] -> fprintf pp "OEfltd(%a,%s,%a)" reg r1 (comparison_name Clt) reg r2
+  | OEfled, [r1;r2] -> fprintf pp "OEfled(%a,%s,%a)" reg r1 (comparison_name Cle) reg r2
+  | OEfeqs, [r1;r2] -> fprintf pp "OEfeqs(%a,%s,%a)" reg r1 (comparison_name Ceq) reg r2
+  | OEflts, [r1;r2] -> fprintf pp "OEflts(%a,%s,%a)" reg r1 (comparison_name Clt) reg r2
+  | OEfles, [r1;r2] -> fprintf pp "OEfles(%a,%s,%a)" reg r1 (comparison_name Cle) reg r2
+  | Obits_of_single, [r1] -> fprintf pp "bits_of_single(%a)" reg r1
+  | Obits_of_float, [r1] -> fprintf pp "bits_of_float(%a)" reg r1
+  | Osingle_of_bits, [r1] -> fprintf pp "single_of_bits(%a)" reg r1
+  | Ofloat_of_bits, [r1] -> fprintf pp "float_of_bits(%a)" reg r1
+  | Oselectl, [rb;rt;rf] -> fprintf pp "selectl(b:%a, t:%a, f:%a)" reg rb reg rt reg rf
   | _ -> fprintf pp "<bad operator>"
 
-
+let print_addressing reg pp = function
+  | Aindexed n, [r1] -> fprintf pp "%a + %Ld" reg r1 (camlint64_of_ptrofs n)
+  | Aglobal(id, ofs), [] ->
+      fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs)
+  | Ainstack ofs, [] -> fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs)
+  | _ -> fprintf pp "<bad addressing>"
diff --git a/verilog/RTLpathSE_simplify.v b/verilog/RTLpathSE_simplify.v
index 55bf0e52..2739bc5d 120000..100644
--- a/verilog/RTLpathSE_simplify.v
+++ b/verilog/RTLpathSE_simplify.v
@@ -1 +1,2092 @@
-../aarch64/RTLpathSE_simplify.v
-\ No newline at end of file
+Require Import Coqlib Floats Values Memory.
+Require Import Integers.
+Require Import Op Registers.
+Require Import RTLpathSE_theory.
+Require Import RTLpathSE_simu_specs.
+Require Import Asmgen Asmgenproof1.
+Require Import Lia.
+
+(** Useful functions for conditions/branches expansion *)
+
+Definition is_inv_cmp_int (cmp: comparison) : bool :=
+  match cmp with | Cle | Cgt => true | _ => false end.
+
+Definition is_inv_cmp_float (cmp: comparison) : bool :=
+  match cmp with | Cge | Cgt => true | _ => false end.
+
+Definition make_optR (is_x0 is_inv: bool) : option oreg :=
+  if is_x0 then
+    (if is_inv then Some (X0_L)
+    else Some (X0_R))
+  else None.
+
+(** Functions to manage lists of "fake" values *)
+
+Definition make_lhsv_cmp (is_inv: bool) (hv1 hv2: hsval) : list_hsval :=
+  let (hvfirst, hvsec) := if is_inv then (hv1, hv2) else (hv2, hv1) in
+  let lhsv := fScons hvfirst fSnil in
+  fScons hvsec lhsv.
+
+Definition make_lhsv_single (hvs: hsval) : list_hsval :=
+  fScons hvs fSnil.
+
+(** * Expansion functions *)
+
+(** ** Immediate loads *)
+
+Definition load_hilo32 (hi lo: int) :=
+  if Int.eq lo Int.zero then
+    fSop (OEluiw hi) fSnil
+  else
+    let hvs := fSop (OEluiw hi) fSnil in
+    let hl := make_lhsv_single hvs in
+    fSop (OEaddiw None lo) hl.
+
+Definition load_hilo64 (hi lo: int64) :=
+  if Int64.eq lo Int64.zero then
+    fSop (OEluil hi) fSnil
+  else
+    let hvs := fSop (OEluil hi) fSnil in
+    let hl := make_lhsv_single hvs in
+    fSop (OEaddil None lo) hl.
+
+Definition loadimm32 (n: int) :=
+  match make_immed32 n with
+  | Imm32_single imm =>
+      fSop (OEaddiw (Some X0_R) imm) fSnil
+  | Imm32_pair hi lo => load_hilo32 hi lo
+  end.
+
+Definition loadimm64 (n: int64) :=
+  match make_immed64 n with
+  | Imm64_single imm =>
+      fSop (OEaddil (Some X0_R) imm) fSnil
+  | Imm64_pair hi lo => load_hilo64 hi lo
+  | Imm64_large imm => fSop (OEloadli imm) fSnil
+  end.
+
+Definition opimm32 (hv1: hsval) (n: int) (op: operation) (opimm: int -> operation) :=
+  match make_immed32 n with
+  | Imm32_single imm =>
+      let hl := make_lhsv_single hv1 in
+      fSop (opimm imm) hl
+  | Imm32_pair hi lo =>
+      let hvs := load_hilo32 hi lo in
+      let hl := make_lhsv_cmp false hv1 hvs in
+      fSop op hl
+  end.
+
+Definition opimm64 (hv1: hsval) (n: int64) (op: operation) (opimm: int64 -> operation) :=
+  match make_immed64 n with
+  | Imm64_single imm =>
+      let hl := make_lhsv_single hv1 in
+      fSop (opimm imm) hl
+  | Imm64_pair hi lo =>
+      let hvs := load_hilo64 hi lo in
+      let hl := make_lhsv_cmp false hv1 hvs in
+      fSop op hl
+  | Imm64_large imm =>
+      let hvs := fSop (OEloadli imm) fSnil in
+      let hl := make_lhsv_cmp false hv1 hvs in
+      fSop op hl
+  end.
+
+Definition addimm32 (hv1: hsval) (n: int) (or: option oreg) := opimm32 hv1 n Oadd (OEaddiw or).
+Definition andimm32 (hv1: hsval) (n: int) := opimm32 hv1 n Oand OEandiw.
+Definition orimm32 (hv1: hsval) (n: int) := opimm32 hv1 n Oor OEoriw.
+Definition xorimm32 (hv1: hsval) (n: int) := opimm32 hv1 n Oxor OExoriw.
+Definition sltimm32 (hv1: hsval) (n: int) := opimm32 hv1 n (OEsltw None) OEsltiw.
+Definition sltuimm32 (hv1: hsval) (n: int) := opimm32 hv1 n (OEsltuw None) OEsltiuw.
+Definition addimm64 (hv1: hsval) (n: int64) (or: option oreg) := opimm64 hv1 n Oaddl (OEaddil or).
+Definition andimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n Oandl OEandil.
+Definition orimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n Oorl OEoril.
+Definition xorimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n Oxorl OExoril.
+Definition sltimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n (OEsltl None) OEsltil.
+Definition sltuimm64 (hv1: hsval) (n: int64) := opimm64 hv1 n (OEsltul None) OEsltiul.
+
+(** ** Comparisons intructions *)
+
+Definition cond_int32s (cmp: comparison) (lhsv: list_hsval) (optR: option oreg) :=
+  match cmp with
+  | Ceq => fSop (OEseqw optR) lhsv
+  | Cne => fSop (OEsnew optR) lhsv
+  | Clt | Cgt => fSop (OEsltw optR) lhsv
+  | Cle | Cge =>
+      let hvs := (fSop (OEsltw optR) lhsv) in
+      let hl := make_lhsv_single hvs in
+      fSop (OExoriw Int.one) hl
+  end.
+
+Definition cond_int32u (cmp: comparison) (lhsv: list_hsval) (optR: option oreg) :=
+  match cmp with
+  | Ceq => fSop (OEsequw optR) lhsv
+  | Cne => fSop (OEsneuw optR) lhsv
+  | Clt | Cgt => fSop (OEsltuw optR) lhsv
+  | Cle | Cge =>
+      let hvs := (fSop (OEsltuw optR) lhsv) in
+      let hl := make_lhsv_single hvs in
+      fSop (OExoriw Int.one) hl
+  end.
+
+Definition cond_int64s (cmp: comparison) (lhsv: list_hsval) (optR: option oreg) :=
+  match cmp with
+  | Ceq => fSop (OEseql optR) lhsv
+  | Cne => fSop (OEsnel optR) lhsv
+  | Clt | Cgt => fSop (OEsltl optR) lhsv
+  | Cle | Cge =>
+      let hvs := (fSop (OEsltl optR) lhsv) in
+      let hl := make_lhsv_single hvs in
+      fSop (OExoriw Int.one) hl
+  end.
+
+Definition cond_int64u (cmp: comparison) (lhsv: list_hsval) (optR: option oreg) :=
+  match cmp with
+  | Ceq => fSop (OEsequl optR) lhsv
+  | Cne => fSop (OEsneul optR) lhsv
+  | Clt | Cgt => fSop (OEsltul optR) lhsv
+  | Cle | Cge =>
+      let hvs := (fSop (OEsltul optR) lhsv) in
+      let hl := make_lhsv_single hvs in
+      fSop (OExoriw Int.one) hl
+  end.
+
+Definition expanse_condimm_int32s (cmp: comparison) (hv1: hsval) (n: int) :=
+  let is_inv := is_inv_cmp_int cmp in
+  if Int.eq n Int.zero then
+    let optR := make_optR true is_inv in
+    let hl := make_lhsv_cmp is_inv hv1 hv1 in
+    cond_int32s cmp hl optR
+  else
+    match cmp with
+    | Ceq | Cne =>
+        let optR := make_optR true is_inv in
+        let hvs := xorimm32 hv1 n in
+        let hl := make_lhsv_cmp false hvs hvs in
+        cond_int32s cmp hl optR
+    | Clt => sltimm32 hv1 n
+    | Cle =>
+        if Int.eq n (Int.repr Int.max_signed) then
+          let hvs := loadimm32 Int.one in
+          let hl := make_lhsv_cmp false hv1 hvs in
+          fSop (OEmayundef MUint) hl
+        else sltimm32 hv1 (Int.add n Int.one)
+    | _ =>
+        let optR := make_optR false is_inv in
+        let hvs := loadimm32 n in
+        let hl := make_lhsv_cmp is_inv hv1 hvs in
+        cond_int32s cmp hl optR
+    end.
+
+Definition expanse_condimm_int32u (cmp: comparison) (hv1: hsval) (n: int) :=
+  let is_inv := is_inv_cmp_int cmp in
+  if Int.eq n Int.zero then
+    let optR := make_optR true is_inv in
+    let hl := make_lhsv_cmp is_inv hv1 hv1 in
+    cond_int32u cmp hl optR
+  else
+    match cmp with
+    | Clt => sltuimm32 hv1 n
+    | _ =>
+        let optR := make_optR false is_inv in
+        let hvs := loadimm32 n in
+        let hl := make_lhsv_cmp is_inv hv1 hvs in
+        cond_int32u cmp hl optR
+    end.
+
+Definition expanse_condimm_int64s (cmp: comparison) (hv1: hsval) (n: int64) :=
+  let is_inv := is_inv_cmp_int cmp in
+  if Int64.eq n Int64.zero then
+    let optR := make_optR true is_inv in
+    let hl := make_lhsv_cmp is_inv hv1 hv1 in
+    cond_int64s cmp hl optR
+  else
+    match cmp with
+    | Ceq | Cne =>
+        let optR := make_optR true is_inv in
+        let hvs := xorimm64 hv1 n in
+        let hl := make_lhsv_cmp false hvs hvs in
+        cond_int64s cmp hl optR
+    | Clt => sltimm64 hv1 n
+    | Cle =>
+        if Int64.eq n (Int64.repr Int64.max_signed) then
+          let hvs := loadimm32 Int.one in
+          let hl := make_lhsv_cmp false hv1 hvs in
+          fSop (OEmayundef MUlong) hl
+        else sltimm64 hv1 (Int64.add n Int64.one)
+    | _ =>
+        let optR := make_optR false is_inv in
+        let hvs := loadimm64 n in
+        let hl := make_lhsv_cmp is_inv hv1 hvs in
+        cond_int64s cmp hl optR
+    end.
+
+Definition expanse_condimm_int64u (cmp: comparison) (hv1: hsval) (n: int64) :=
+  let is_inv := is_inv_cmp_int cmp in
+  if Int64.eq n Int64.zero then
+    let optR := make_optR true is_inv in
+    let hl := make_lhsv_cmp is_inv hv1 hv1 in
+    cond_int64u cmp hl optR
+  else
+    match cmp with
+    | Clt => sltuimm64 hv1 n
+    | _ =>
+        let optR := make_optR false is_inv in
+        let hvs := loadimm64 n in
+        let hl := make_lhsv_cmp is_inv hv1 hvs in
+        cond_int64u cmp hl optR
+    end.
+
+Definition cond_float (cmp: comparison) (lhsv: list_hsval) :=
+  match cmp with
+  | Ceq | Cne => fSop OEfeqd lhsv
+  | Clt | Cgt => fSop OEfltd lhsv
+  | Cle | Cge => fSop OEfled lhsv
+  end.
+
+Definition cond_single (cmp: comparison) (lhsv: list_hsval) :=
+  match cmp with
+  | Ceq | Cne => fSop OEfeqs lhsv
+  | Clt | Cgt => fSop OEflts lhsv
+  | Cle | Cge => fSop OEfles lhsv
+  end.
+
+Definition is_normal_cmp cmp :=
+  match cmp with | Cne => false | _ => true end.
+
+Definition expanse_cond_fp (cnot: bool) fn_cond cmp (lhsv: list_hsval) :=
+  let normal := is_normal_cmp cmp in
+  let normal' := if cnot then negb normal else normal in
+  let hvs := fn_cond cmp lhsv in
+  let hl := make_lhsv_single hvs in
+  if normal' then hvs else fSop (OExoriw Int.one) hl.
+
+(** ** Branches instructions *)
+
+Definition transl_cbranch_int32s (cmp: comparison) (optR: option oreg) :=
+  match cmp with
+  | Ceq => CEbeqw optR
+  | Cne => CEbnew optR
+  | Clt => CEbltw optR
+  | Cle => CEbgew optR
+  | Cgt => CEbltw optR
+  | Cge => CEbgew optR
+  end.
+
+Definition transl_cbranch_int32u (cmp: comparison) (optR: option oreg) :=
+  match cmp with
+  | Ceq => CEbequw optR
+  | Cne => CEbneuw optR
+  | Clt => CEbltuw optR
+  | Cle => CEbgeuw optR
+  | Cgt => CEbltuw optR
+  | Cge => CEbgeuw optR
+  end.
+
+Definition transl_cbranch_int64s (cmp: comparison) (optR: option oreg) :=
+  match cmp with
+  | Ceq => CEbeql optR
+  | Cne => CEbnel optR
+  | Clt => CEbltl optR
+  | Cle => CEbgel optR
+  | Cgt => CEbltl optR
+  | Cge => CEbgel optR
+  end.
+
+Definition transl_cbranch_int64u (cmp: comparison) (optR: option oreg) :=
+  match cmp with
+  | Ceq => CEbequl optR
+  | Cne => CEbneul optR
+  | Clt => CEbltul optR
+  | Cle => CEbgeul optR
+  | Cgt => CEbltul optR
+  | Cge => CEbgeul optR
+  end.
+
+Definition expanse_cbranch_fp (cnot: bool) fn_cond cmp (lhsv: list_hsval) : (condition * list_hsval) :=
+  let normal := is_normal_cmp cmp in
+  let normal' := if cnot then negb normal else normal in
+  let hvs := fn_cond cmp lhsv in
+  let hl := make_lhsv_cmp false hvs hvs in
+  if normal' then ((CEbnew (Some X0_R)), hl) else ((CEbeqw (Some X0_R)), hl).
+
+(** * Target simplifications using "fake" values *)
+
+Definition target_op_simplify (op: operation) (lr: list reg) (hst: hsistate_local): option hsval :=
+  match op, lr with
+  | Ocmp (Ccomp c), a1 :: a2 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hv2 := fsi_sreg_get hst a2 in
+      let is_inv := is_inv_cmp_int c in
+      let optR := make_optR false is_inv in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond_int32s c lhsv optR)
+  | Ocmp (Ccompu c), a1 :: a2 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hv2 := fsi_sreg_get hst a2 in
+      let is_inv := is_inv_cmp_int c in
+      let optR := make_optR false is_inv in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond_int32u c lhsv optR)
+  | Ocmp (Ccompimm c imm), a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (expanse_condimm_int32s c hv1 imm)
+  | Ocmp (Ccompuimm c imm), a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (expanse_condimm_int32u c hv1 imm)
+  | Ocmp (Ccompl c), a1 :: a2 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hv2 := fsi_sreg_get hst a2 in
+      let is_inv := is_inv_cmp_int c in
+      let optR := make_optR false is_inv in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond_int64s c lhsv optR)
+  | Ocmp (Ccomplu c), a1 :: a2 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hv2 := fsi_sreg_get hst a2 in
+      let is_inv := is_inv_cmp_int c in
+      let optR := make_optR false is_inv in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond_int64u c lhsv optR)
+  | Ocmp (Ccomplimm c imm), a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (expanse_condimm_int64s c hv1 imm)
+  | Ocmp (Ccompluimm c imm), a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (expanse_condimm_int64u c hv1 imm)
+  | Ocmp (Ccompf c), f1 :: f2 :: nil =>
+      let hv1 := fsi_sreg_get hst f1 in
+      let hv2 := fsi_sreg_get hst f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cond_fp false cond_float c lhsv)
+  | Ocmp (Cnotcompf c), f1 :: f2 :: nil =>
+      let hv1 := fsi_sreg_get hst f1 in
+      let hv2 := fsi_sreg_get hst f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cond_fp true cond_float c lhsv)
+  | Ocmp (Ccompfs c), f1 :: f2 :: nil =>
+      let hv1 := fsi_sreg_get hst f1 in
+      let hv2 := fsi_sreg_get hst f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cond_fp false cond_single c lhsv)
+  | Ocmp (Cnotcompfs c), f1 :: f2 :: nil =>
+      let hv1 := fsi_sreg_get hst f1 in
+      let hv2 := fsi_sreg_get hst f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cond_fp true cond_single c lhsv)
+  | Ofloatconst f, nil =>
+      let hvs := loadimm64 (Float.to_bits f) in
+      let hl := make_lhsv_single hvs in
+      Some (fSop (Ofloat_of_bits) hl)
+  | Osingleconst f, nil =>
+      let hvs := loadimm32 (Float32.to_bits f) in
+      let hl := make_lhsv_single hvs in
+      Some (fSop (Osingle_of_bits) hl)
+  | Ointconst n, nil =>
+      Some (loadimm32 n)
+  | Olongconst n, nil =>
+      Some (loadimm64 n)
+  | Oaddimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (addimm32 hv1 n None)
+  | Oaddlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (addimm64 hv1 n None)
+  | Oandimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (andimm32 hv1 n)
+  | Oandlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (andimm64 hv1 n)
+  | Oorimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (orimm32 hv1 n)
+  | Oorlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (orimm64 hv1 n)
+  | Oxorimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (xorimm32 hv1 n)
+  | Oxorlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      Some (xorimm64 hv1 n)
+  | Ocast8signed, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      let hvs := fSop (Oshlimm (Int.repr 24)) hl in
+      let hl' := make_lhsv_single hvs in
+      Some (fSop (Oshrimm (Int.repr 24)) hl')
+  | Ocast16signed, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      let hvs := fSop (Oshlimm (Int.repr 16)) hl in
+      let hl' := make_lhsv_single hvs in
+      Some (fSop (Oshrimm (Int.repr 16)) hl')
+  | Ocast32unsigned, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      let cast32s_s := fSop Ocast32signed hl in
+      let cast32s_l := make_lhsv_single cast32s_s in
+      let sllil_s := fSop (Oshllimm (Int.repr 32)) cast32s_l in
+      let sllil_l := make_lhsv_single sllil_s in
+      Some (fSop (Oshrluimm (Int.repr 32)) sllil_l)
+  | Oshrximm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      if Int.eq n Int.zero then
+        let lhl := make_lhsv_cmp false hv1 hv1 in
+        Some (fSop (OEmayundef (MUshrx n)) lhl)
+      else
+        if Int.eq n Int.one then
+          let srliw_s := fSop (Oshruimm (Int.repr 31)) hl in
+          let srliw_l := make_lhsv_cmp false hv1 srliw_s in
+          let addw_s := fSop Oadd srliw_l in
+          let addw_l := make_lhsv_single addw_s in
+          let sraiw_s := fSop (Oshrimm Int.one) addw_l in
+          let sraiw_l := make_lhsv_cmp false sraiw_s sraiw_s in
+          Some (fSop (OEmayundef (MUshrx n)) sraiw_l)
+        else 
+          let sraiw_s := fSop (Oshrimm (Int.repr 31)) hl in
+          let sraiw_l := make_lhsv_single sraiw_s in
+          let srliw_s := fSop (Oshruimm (Int.sub Int.iwordsize n)) sraiw_l in
+          let srliw_l := make_lhsv_cmp false hv1 srliw_s in
+          let addw_s := fSop Oadd srliw_l in
+          let addw_l := make_lhsv_single addw_s in
+          let sraiw_s' := fSop (Oshrimm n) addw_l in
+          let sraiw_l' := make_lhsv_cmp false sraiw_s' sraiw_s' in
+          Some (fSop (OEmayundef (MUshrx n)) sraiw_l')
+  | Oshrxlimm n, a1 :: nil =>
+      let hv1 := fsi_sreg_get hst a1 in
+      let hl := make_lhsv_single hv1 in
+      if Int.eq n Int.zero then
+        let lhl := make_lhsv_cmp false hv1 hv1 in
+        Some (fSop (OEmayundef (MUshrxl n)) lhl)
+      else
+        if Int.eq n Int.one then
+          let srlil_s := fSop (Oshrluimm (Int.repr 63)) hl in
+          let srlil_l := make_lhsv_cmp false hv1 srlil_s in
+          let addl_s := fSop Oaddl srlil_l in
+          let addl_l := make_lhsv_single addl_s in
+          let srail_s := fSop (Oshrlimm Int.one) addl_l in
+          let srail_l := make_lhsv_cmp false srail_s srail_s in
+          Some (fSop (OEmayundef (MUshrxl n)) srail_l)
+        else
+          let srail_s := fSop (Oshrlimm (Int.repr 63)) hl in
+          let srail_l := make_lhsv_single srail_s in
+          let srlil_s := fSop (Oshrluimm (Int.sub Int64.iwordsize' n)) srail_l in
+          let srlil_l := make_lhsv_cmp false hv1 srlil_s in
+          let addl_s := fSop Oaddl srlil_l in
+          let addl_l := make_lhsv_single addl_s in
+          let srail_s' := fSop (Oshrlimm n) addl_l in
+          let srail_l' := make_lhsv_cmp false srail_s' srail_s' in
+          Some (fSop (OEmayundef (MUshrxl n)) srail_l')
+  | _, _ => None
+  end.
+
+Definition target_cbranch_expanse (prev: hsistate_local) (cond: condition) (args: list reg) : option (condition * list_hsval) :=
+  match cond, args with
+  | (Ccomp c), (a1 :: a2 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let cond := transl_cbranch_int32s c (make_optR false is_inv) in
+      let hv1 := fsi_sreg_get prev a1 in
+      let hv2 := fsi_sreg_get prev a2 in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond, lhsv)
+  | (Ccompu c), (a1 :: a2 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let cond := transl_cbranch_int32u c (make_optR false is_inv) in
+      let hv1 := fsi_sreg_get prev a1 in
+      let hv2 := fsi_sreg_get prev a2 in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond, lhsv)
+  | (Ccompimm c n), (a1 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let hv1 := fsi_sreg_get prev a1 in
+      (if Int.eq n Int.zero then
+        let lhsv := make_lhsv_cmp is_inv hv1 hv1 in
+        let cond := transl_cbranch_int32s c (make_optR true is_inv) in
+        Some (cond, lhsv)
+      else
+        let hvs := loadimm32 n in
+        let lhsv := make_lhsv_cmp is_inv hv1 hvs in
+        let cond := transl_cbranch_int32s c (make_optR false is_inv) in
+        Some (cond, lhsv))
+  | (Ccompuimm c n), (a1 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let hv1 := fsi_sreg_get prev a1 in
+      (if Int.eq n Int.zero then
+        let lhsv := make_lhsv_cmp is_inv hv1 hv1 in
+        let cond := transl_cbranch_int32u c (make_optR true is_inv) in
+        Some (cond, lhsv)
+      else
+        let hvs := loadimm32 n in
+        let lhsv := make_lhsv_cmp is_inv hv1 hvs in
+        let cond := transl_cbranch_int32u c (make_optR false is_inv) in
+         Some (cond, lhsv))
+  | (Ccompl c), (a1 :: a2 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let cond := transl_cbranch_int64s c (make_optR false is_inv) in
+      let hv1 := fsi_sreg_get prev a1 in
+      let hv2 := fsi_sreg_get prev a2 in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond, lhsv)
+  | (Ccomplu c), (a1 :: a2 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let cond := transl_cbranch_int64u c (make_optR false is_inv) in
+      let hv1 := fsi_sreg_get prev a1 in
+      let hv2 := fsi_sreg_get prev a2 in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (cond, lhsv)
+  | (Ccomplimm c n), (a1 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let hv1 := fsi_sreg_get prev a1 in
+      (if Int64.eq n Int64.zero then
+        let lhsv := make_lhsv_cmp is_inv hv1 hv1 in
+        let cond := transl_cbranch_int64s c (make_optR true is_inv) in
+        Some (cond, lhsv)
+      else
+        let hvs := loadimm64 n in
+        let lhsv := make_lhsv_cmp is_inv hv1 hvs in
+        let cond := transl_cbranch_int64s c (make_optR false is_inv) in
+        Some (cond, lhsv))
+  | (Ccompluimm c n), (a1 :: nil) =>
+      let is_inv := is_inv_cmp_int c in
+      let hv1 := fsi_sreg_get prev a1 in
+      (if Int64.eq n Int64.zero then
+        let lhsv := make_lhsv_cmp is_inv hv1 hv1 in
+        let cond := transl_cbranch_int64u c (make_optR true is_inv) in
+        Some (cond, lhsv)
+      else
+        let hvs := loadimm64 n in
+        let lhsv := make_lhsv_cmp is_inv hv1 hvs in
+        let cond := transl_cbranch_int64u c (make_optR false is_inv) in
+        Some (cond, lhsv))
+  | (Ccompf c), (f1 :: f2 :: nil) =>
+      let hv1 := fsi_sreg_get prev f1 in
+      let hv2 := fsi_sreg_get prev f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cbranch_fp false cond_float c lhsv)
+  | (Cnotcompf c), (f1 :: f2 :: nil) =>
+      let hv1 := fsi_sreg_get prev f1 in
+      let hv2 := fsi_sreg_get prev f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cbranch_fp true cond_float c lhsv)
+  | (Ccompfs c), (f1 :: f2 :: nil) =>
+      let hv1 := fsi_sreg_get prev f1 in
+      let hv2 := fsi_sreg_get prev f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cbranch_fp false cond_single c lhsv)
+  | (Cnotcompfs c), (f1 :: f2 :: nil) =>
+      let hv1 := fsi_sreg_get prev f1 in
+      let hv2 := fsi_sreg_get prev f2 in
+      let is_inv := is_inv_cmp_float c in
+      let lhsv := make_lhsv_cmp is_inv hv1 hv2 in
+      Some (expanse_cbranch_fp true cond_single c lhsv)
+  | _, _ => None
+   end.
+
+(** * Auxiliary lemmas on comparisons *)
+
+(** ** Signed ints *)
+
+Lemma xor_neg_ltle_cmp: forall v1 v2,
+  Some (Val.xor (Val.cmp Clt v1 v2) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmp_bool Cle v2 v1)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  unfold Val.cmp; simpl;
+  try rewrite Int.eq_sym;
+  try destruct (Int.eq _ _); try destruct (Int.lt _ _) eqn:ELT ; simpl;
+  try rewrite Int.xor_one_one; try rewrite Int.xor_zero_one;
+  auto.
+Qed.
+
+(** ** Unsigned ints *)
+
+Lemma xor_neg_ltle_cmpu: forall mptr v1 v2,
+  Some (Val.xor (Val.cmpu (Mem.valid_pointer mptr) Clt v1 v2) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpu_bool (Mem.valid_pointer mptr) Cle v2 v1)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  unfold Val.cmpu; simpl;
+  try rewrite Int.eq_sym;
+  try destruct (Int.eq _ _); try destruct (Int.ltu _ _) eqn:ELT ; simpl;
+  try rewrite Int.xor_one_one; try rewrite Int.xor_zero_one;
+  auto.
+  1,2:
+    unfold Val.cmpu, Val.cmpu_bool;
+    destruct Archi.ptr64; try destruct (_ && _); try destruct (_ || _);
+    try destruct (eq_block _ _); auto.
+  unfold Val.cmpu, Val.cmpu_bool; simpl;
+  destruct Archi.ptr64; try destruct (_ || _); simpl; auto;
+  destruct (eq_block b b0); destruct (eq_block b0 b);
+  try congruence;
+  try destruct (_ || _); simpl; try destruct (Ptrofs.ltu _ _);
+  simpl; auto;
+  repeat destruct (_ && _); simpl; auto.
+Qed.
+
+Remark ltu_12_wordsize:
+  Int.ltu (Int.repr 12) Int.iwordsize = true.
+Proof.
+  unfold Int.iwordsize, Int.zwordsize. simpl.
+  unfold Int.ltu. apply zlt_true.
+  rewrite !Int.unsigned_repr; try cbn; try lia.
+Qed.
+
+(** ** Signed longs *)
+
+Lemma xor_neg_ltle_cmpl: forall v1 v2,
+  Some (Val.xor (Val.maketotal (Val.cmpl Clt v1 v2)) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpl_bool Cle v2 v1)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  destruct (Int64.lt _ _); auto.
+Qed.
+
+Lemma xor_neg_ltge_cmpl: forall v1 v2,
+  Some (Val.xor (Val.maketotal (Val.cmpl Clt v1 v2)) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpl_bool Cge v1 v2)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  destruct (Int64.lt _ _); auto.
+Qed.
+
+Lemma xorl_zero_eq_cmpl: forall c v1 v2,
+  c = Ceq \/ c = Cne ->
+  Some
+    (Val.maketotal
+     (option_map Val.of_bool
+       (Val.cmpl_bool c (Val.xorl v1 v2) (Vlong Int64.zero)))) =
+  Some (Val.of_optbool (Val.cmpl_bool c v1 v2)).
+Proof.
+  intros. destruct c; inv H; try discriminate;
+  destruct v1, v2; simpl; auto;
+  destruct (Int64.eq i i0) eqn:EQ0.
+  1,3:
+    apply Int64.same_if_eq in EQ0; subst;
+    rewrite Int64.xor_idem;
+    rewrite Int64.eq_true; trivial.
+  1,2:
+    destruct (Int64.eq (Int64.xor i i0) Int64.zero) eqn:EQ1; simpl; try congruence;
+    rewrite Int64.xor_is_zero in EQ1; congruence.
+Qed.
+
+Lemma cmp_ltle_add_one: forall v n,
+  Int.eq n (Int.repr Int.max_signed) = false ->
+  Some (Val.of_optbool (Val.cmp_bool Clt v (Vint (Int.add n Int.one)))) =
+  Some (Val.of_optbool (Val.cmp_bool Cle v (Vint n))).
+Proof.
+  intros v n EQMAX. unfold Val.cmp_bool; destruct v; simpl; auto.
+  unfold Int.lt. replace (Int.signed (Int.add n Int.one)) with (Int.signed n + 1).
+  destruct (zlt (Int.signed n) (Int.signed i)).
+  rewrite zlt_false by lia. auto.
+  rewrite zlt_true by lia. auto.
+  rewrite Int.add_signed. symmetry; apply Int.signed_repr. 
+  specialize (Int.eq_spec n (Int.repr Int.max_signed)).
+  rewrite EQMAX; simpl; intros.
+  assert (Int.signed n <> Int.max_signed).
+  { red; intros E. elim H. rewrite <- (Int.repr_signed n). rewrite E. auto. }
+  generalize (Int.signed_range n); lia.
+Qed.
+
+Lemma cmpl_ltle_add_one: forall v n,
+  Int64.eq n (Int64.repr Int64.max_signed) = false ->
+  Some (Val.of_optbool (Val.cmpl_bool Clt v (Vlong (Int64.add n Int64.one)))) =
+  Some (Val.of_optbool (Val.cmpl_bool Cle v (Vlong n))).
+Proof.
+  intros v n EQMAX. unfold Val.cmpl_bool; destruct v; simpl; auto.
+  unfold Int64.lt. replace (Int64.signed (Int64.add n Int64.one)) with (Int64.signed n + 1).
+  destruct (zlt (Int64.signed n) (Int64.signed i)).
+  rewrite zlt_false by lia. auto.
+  rewrite zlt_true by lia. auto.
+  rewrite Int64.add_signed. symmetry; apply Int64.signed_repr. 
+  specialize (Int64.eq_spec n (Int64.repr Int64.max_signed)).
+  rewrite EQMAX; simpl; intros.
+  assert (Int64.signed n <> Int64.max_signed).
+  { red; intros E. elim H. rewrite <- (Int64.repr_signed n). rewrite E. auto. }
+  generalize (Int64.signed_range n); lia.
+Qed.
+
+Remark lt_maxsgn_false_int: forall i,
+  Int.lt (Int.repr Int.max_signed) i = false.
+Proof.
+  intros; unfold Int.lt.
+  specialize  Int.signed_range with i; intros.
+  rewrite zlt_false; auto. destruct H.
+  rewrite Int.signed_repr; try (cbn; lia).
+  apply Z.le_ge. trivial.
+Qed.
+
+Remark lt_maxsgn_false_long: forall i,
+  Int64.lt (Int64.repr Int64.max_signed) i = false.
+Proof.
+  intros; unfold Int64.lt.
+  specialize  Int64.signed_range with i; intros.
+  rewrite zlt_false; auto. destruct H.
+  rewrite Int64.signed_repr; try (cbn; lia).
+  apply Z.le_ge. trivial.
+Qed.
+
+(** ** Unsigned longs *)
+
+Lemma xor_neg_ltle_cmplu: forall mptr v1 v2,
+  Some (Val.xor (Val.maketotal (Val.cmplu (Mem.valid_pointer mptr) Clt v1 v2)) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer mptr) Cle v2 v1)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  destruct (Int64.ltu _ _); auto.
+  1,2: unfold Val.cmplu; simpl; auto;
+  destruct (Archi.ptr64); simpl;
+  try destruct (eq_block _ _); simpl;
+  try destruct (_ && _); simpl;
+  try destruct (Ptrofs.cmpu _ _);
+  try destruct cmp; simpl; auto.
+  unfold Val.cmplu; simpl;
+  destruct Archi.ptr64; try destruct (_ || _); simpl; auto;
+  destruct (eq_block b b0); destruct (eq_block b0 b);
+  try congruence;
+  try destruct (_ || _); simpl; try destruct (Ptrofs.ltu _ _);
+  simpl; auto;
+  repeat destruct (_ && _); simpl; auto.
+Qed.
+
+Lemma xor_neg_ltge_cmplu: forall mptr v1 v2,
+  Some (Val.xor (Val.maketotal (Val.cmplu (Mem.valid_pointer mptr) Clt v1 v2)) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer mptr) Cge v1 v2)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence.
+  destruct (Int64.ltu _ _); auto.
+  1,2: unfold Val.cmplu; simpl; auto;
+  destruct (Archi.ptr64); simpl;
+  try destruct (eq_block _ _); simpl;
+  try destruct (_ && _); simpl;
+  try destruct (Ptrofs.cmpu _ _);
+  try destruct cmp; simpl; auto.
+  unfold Val.cmplu; simpl;
+  destruct Archi.ptr64; try destruct (_ || _); simpl; auto;
+  destruct (eq_block b b0); destruct (eq_block b0 b);
+  try congruence;
+  try destruct (_ || _); simpl; try destruct (Ptrofs.ltu _ _);
+  simpl; auto;
+  repeat destruct (_ && _); simpl; auto.
+Qed.
+
+(** ** Floats *)
+
+Lemma xor_neg_eqne_cmpf: forall v1 v2,
+  Some (Val.xor (Val.cmpf Ceq v1 v2) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpf_bool Cne v1 v2)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence;
+  unfold Val.cmpf; simpl.
+  rewrite Float.cmp_ne_eq.
+  destruct (Float.cmp _ _ _); simpl; auto.
+Qed.
+
+(** ** Singles *)
+
+Lemma xor_neg_eqne_cmpfs: forall v1 v2,
+  Some (Val.xor (Val.cmpfs Ceq v1 v2) (Vint Int.one)) =
+  Some (Val.of_optbool (Val.cmpfs_bool Cne v1 v2)).
+Proof.
+  intros. eapply f_equal.
+  destruct v1, v2; simpl; try congruence;
+  unfold Val.cmpfs; simpl.
+  rewrite Float32.cmp_ne_eq.
+  destruct (Float32.cmp _ _ _); simpl; auto.
+Qed.
+
+(** ** More useful lemmas *)
+
+Lemma xor_neg_optb: forall v,
+  Some (Val.xor (Val.of_optbool (option_map negb v))
+    (Vint Int.one)) = Some (Val.of_optbool v).
+Proof.
+  intros.
+  destruct v; simpl; trivial.
+  destruct b; simpl; auto.
+Qed.
+
+Lemma xor_neg_optb': forall v,
+  Some (Val.xor (Val.of_optbool v) (Vint Int.one)) =
+  Some (Val.of_optbool (option_map negb v)).
+Proof.
+  intros.
+  destruct v; simpl; trivial.
+  destruct b; simpl; auto.
+Qed.
+
+Lemma optbool_mktotal: forall v,
+  Val.maketotal (option_map Val.of_bool v) =
+  Val.of_optbool v.
+Proof.
+  intros.
+  destruct v; simpl; auto.
+Qed.
+
+(* TODO gourdinl move to common/Values ? *)
+Theorem swap_cmpf_bool:
+  forall c x y,
+  Val.cmpf_bool (swap_comparison c) x y = Val.cmpf_bool c y x.
+Proof.
+  destruct x; destruct y; simpl; auto. rewrite Float.cmp_swap. auto.
+Qed.
+
+Theorem swap_cmpfs_bool:
+  forall c x y,
+  Val.cmpfs_bool (swap_comparison c) x y = Val.cmpfs_bool c y x.
+Proof.
+  destruct x; destruct y; simpl; auto. rewrite Float32.cmp_swap. auto.
+Qed.
+
+(** * Intermediates lemmas on each expanded instruction *)
+
+Lemma simplify_ccomp_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (cond_int32s c
+          (make_lhsv_cmp (is_inv_cmp_int c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)) None)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmp_bool c v v0)).
+Proof.
+  intros.
+  unfold cond_int32s in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmp.
+  - apply xor_neg_ltle_cmp.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmp_bool; trivial.
+  - replace (Clt) with (negate_comparison Cge) by auto;
+    rewrite Val.negate_cmp_bool.
+    rewrite xor_neg_optb; trivial.
+Qed.
+
+Lemma simplify_ccompu_correct ge sp hst st c r r0 rs0 m m0 v v0: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj
+       (cond_int32u c
+          (make_lhsv_cmp (is_inv_cmp_int c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)) None)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpu_bool (Mem.valid_pointer m) c v v0)).
+Proof.
+  intros.
+  erewrite (cmpu_bool_valid_pointer_eq (Mem.valid_pointer m) (Mem.valid_pointer m0)).
+  2: eauto.
+  unfold cond_int32u in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpu.
+  - apply xor_neg_ltle_cmpu.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmpu_bool; trivial.
+  - replace (Clt) with (negate_comparison Cge) by auto;
+    rewrite Val.negate_cmpu_bool.
+    rewrite xor_neg_optb; trivial.
+Qed.
+
+Lemma simplify_ccompimm_correct ge sp hst st c r n rs0 m m0 v: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj (expanse_condimm_int32s c (fsi_sreg_get hst r) n)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmp_bool c v (Vint n))).
+Proof.
+  intros.
+  unfold expanse_condimm_int32s, cond_int32s in *; destruct c;
+  intros; destruct (Int.eq n Int.zero) eqn:EQIMM; simpl;
+  try apply Int.same_if_eq in EQIMM; subst;
+  unfold loadimm32, sltimm32, xorimm32, opimm32, load_hilo32;
+  try erewrite !fsi_sreg_get_correct; eauto;
+  try rewrite OKv1;
+  unfold Val.cmp, zero32.
+  all:
+    try apply xor_neg_ltle_cmp; 
+    try apply xor_neg_ltge_cmp; trivial.
+  4: 
+    try destruct (Int.eq n (Int.repr Int.max_signed)) eqn:EQMAX; subst;
+    try apply Int.same_if_eq in EQMAX; subst; simpl.
+  4:
+    intros; try (specialize make_immed32_sound with (Int.one);
+    destruct (make_immed32 Int.one) eqn:EQMKI_A1); intros; simpl.
+  6:
+    intros; try (specialize make_immed32_sound with (Int.add n Int.one);
+    destruct (make_immed32 (Int.add n Int.one)) eqn:EQMKI_A2); intros; simpl.
+  1,2,3,8,9:
+    intros; try (specialize make_immed32_sound with (n);
+    destruct (make_immed32 n) eqn:EQMKI); intros; simpl.
+  all: 
+    try destruct (Int.eq lo Int.zero) eqn:EQLO32;
+    try apply Int.same_if_eq in EQLO32; subst;
+    try erewrite fSop_correct; eauto; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1;
+    try rewrite OK2;
+    try rewrite (Int.add_commut _ Int.zero), Int.add_zero_l in H; subst;
+    unfold Val.cmp, eval_may_undef, zero32, Val.add; simpl;
+    destruct v; auto.
+  all:
+    try rewrite ltu_12_wordsize;
+    try rewrite <- H;
+    try (apply cmp_ltle_add_one; auto);
+    try rewrite Int.add_commut, Int.add_zero_l in *;
+    try rewrite Int.add_commut;
+    try rewrite <- H; try rewrite cmp_ltle_add_one;
+    try rewrite Int.add_zero_l;
+    try (
+    simpl; trivial;
+    try rewrite Int.xor_is_zero;
+    try destruct (Int.lt _ _) eqn:EQLT; trivial;
+    try rewrite lt_maxsgn_false_int in EQLT;
+    simpl; trivial; try discriminate; fail).
+Qed.
+
+Lemma simplify_ccompuimm_correct ge sp hst st c r n rs0 m m0 v: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj (expanse_condimm_int32u c (fsi_sreg_get hst r) n)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpu_bool (Mem.valid_pointer m) c v (Vint n))).
+Proof.
+  intros.
+  assert (HMEM: Val.cmpu_bool (Mem.valid_pointer m) c v (Vint n) =
+          Val.cmpu_bool (Mem.valid_pointer m0) c v (Vint n)).
+  erewrite (cmpu_bool_valid_pointer_eq (Mem.valid_pointer m) (Mem.valid_pointer m0)); eauto.
+  unfold expanse_condimm_int32u, cond_int32u in *; destruct c;
+  intros; destruct (Int.eq n Int.zero) eqn:EQIMM; simpl;
+  try apply Int.same_if_eq in EQIMM; subst;
+  unfold loadimm32, sltuimm32, opimm32, load_hilo32;
+  try erewrite !fsi_sreg_get_correct; eauto;
+  try rewrite OKv1; trivial;
+  try rewrite xor_neg_ltle_cmpu;
+  unfold Val.cmpu, zero32.
+  all:
+    try (specialize make_immed32_sound with n;
+    destruct (make_immed32 n) eqn:EQMKI);
+    try destruct (Int.eq lo Int.zero) eqn:EQLO;
+    try apply Int.same_if_eq in EQLO; subst;
+    intros; subst;
+    try erewrite fSop_correct; eauto; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1;
+    try rewrite OK2;
+    rewrite HMEM;
+    unfold eval_may_undef, Val.cmpu;
+    destruct v; simpl; auto;
+    try rewrite EQIMM; try destruct (Archi.ptr64) eqn:EQARCH; simpl;
+    try rewrite ltu_12_wordsize; trivial;
+    try rewrite Int.add_commut, Int.add_zero_l in *;
+    try rewrite Int.add_zero_l;
+    try destruct (Int.ltu _ _) eqn:EQLTU; simpl;
+    try rewrite EQLTU; simpl; try rewrite EQIMM;
+    try rewrite EQARCH; trivial.
+Qed.
+
+Lemma simplify_ccompl_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (cond_int64s c
+          (make_lhsv_cmp (is_inv_cmp_int c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)) None)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpl_bool c v v0)).
+Proof.
+  intros.
+  unfold cond_int64s in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpl.
+  1,2,3: rewrite optbool_mktotal; trivial.
+  - apply xor_neg_ltle_cmpl.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmpl_bool; trivial.
+    rewrite optbool_mktotal; trivial.
+  - apply xor_neg_ltge_cmpl.
+Qed.
+
+Lemma simplify_ccomplu_correct ge sp hst st c r r0 rs0 m m0 v v0: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj
+       (cond_int64u c
+          (make_lhsv_cmp (is_inv_cmp_int c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)) None)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) c v v0)).
+Proof.
+  intros.
+  erewrite (cmplu_bool_valid_pointer_eq (Mem.valid_pointer m) (Mem.valid_pointer m0)).
+  2: eauto.
+  unfold cond_int64u in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmplu.
+  1,2,3: rewrite optbool_mktotal; trivial.
+  - apply xor_neg_ltle_cmplu.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmplu_bool; trivial.
+    rewrite optbool_mktotal; trivial.
+  - apply xor_neg_ltge_cmplu.
+Qed.
+
+Lemma simplify_ccomplimm_correct ge sp hst st c r n rs0 m m0 v: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj (expanse_condimm_int64s c (fsi_sreg_get hst r) n)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpl_bool c v (Vlong n))).
+Proof.
+  intros.
+  unfold expanse_condimm_int64s, cond_int64s in *; destruct c;
+  intros; destruct (Int64.eq n Int64.zero) eqn:EQIMM; simpl;
+  try apply Int64.same_if_eq in EQIMM; subst;
+  unfold loadimm32, loadimm64, sltimm64, xorimm64, opimm64, load_hilo32, load_hilo64;
+  try erewrite !fsi_sreg_get_correct; eauto;
+  try rewrite OKv1;
+  unfold Val.cmpl, zero64.
+  all:
+    try apply xor_neg_ltle_cmpl; 
+    try apply xor_neg_ltge_cmpl;
+    try rewrite optbool_mktotal; trivial.
+  4: 
+    try destruct (Int64.eq n (Int64.repr Int64.max_signed)) eqn:EQMAX; subst;
+    try apply Int64.same_if_eq in EQMAX; subst; simpl.
+  4:
+    intros; try (specialize make_immed32_sound with (Int.one);
+    destruct (make_immed32 Int.one) eqn:EQMKI_A1); intros; simpl.
+  6:
+    intros; try (specialize make_immed64_sound with (Int64.add n Int64.one);
+    destruct (make_immed64 (Int64.add n Int64.one)) eqn:EQMKI_A2); intros; simpl.
+  1,2,3,9,10:
+    intros; try (specialize make_immed64_sound with (n);
+    destruct (make_immed64 n) eqn:EQMKI); intros; simpl.
+  all: 
+    try destruct (Int.eq lo Int.zero) eqn:EQLO32;
+    try apply Int.same_if_eq in EQLO32; subst;
+    try destruct (Int64.eq lo Int64.zero) eqn:EQLO64;
+    try apply Int64.same_if_eq in EQLO64; subst;
+    try erewrite fSop_correct; eauto; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1;
+    try rewrite OK2;
+    try rewrite (Int64.add_commut _ Int64.zero), Int64.add_zero_l in H; subst;
+    try fold (Val.cmpl Clt v (Vlong imm));
+    try rewrite xor_neg_ltge_cmpl; trivial;
+    try rewrite xor_neg_ltle_cmpl; trivial;
+    unfold Val.cmpl, Val.addl;
+    try rewrite xorl_zero_eq_cmpl; trivial;
+    try rewrite optbool_mktotal; trivial;
+    unfold eval_may_undef, zero32, Val.add; simpl;
+    destruct v; auto.
+  1,2,3,4,5,6,7,8,9,10,11,12:
+      try rewrite <- optbool_mktotal; trivial;
+      try rewrite Int64.add_commut, Int64.add_zero_l in *;
+      try fold (Val.cmpl Clt (Vlong i) (Vlong imm));
+      try fold (Val.cmpl Clt (Vlong i) (Vlong (Int64.sign_ext 32 (Int64.shl hi (Int64.repr 12)))));
+      try fold (Val.cmpl Clt (Vlong i) (Vlong (Int64.add (Int64.sign_ext 32 (Int64.shl hi (Int64.repr 12))) lo)));
+      try rewrite xor_neg_ltge_cmpl; trivial;
+      try rewrite xor_neg_ltle_cmpl; trivial.
+  6:
+    rewrite <- H;
+    try apply cmpl_ltle_add_one; auto.
+  all:
+    try rewrite <- H;
+    try apply cmpl_ltle_add_one; auto;
+    try rewrite <- cmpl_ltle_add_one; auto;
+    try rewrite ltu_12_wordsize;
+    try rewrite Int.add_commut, Int.add_zero_l in *;
+    try rewrite Int64.add_commut, Int64.add_zero_l in *;
+    try rewrite Int64.add_zero_l;
+    simpl; try rewrite lt_maxsgn_false_long;
+    try (rewrite <- H; trivial; fail);
+    simpl; trivial.
+Qed.
+
+Lemma simplify_ccompluimm_correct ge sp hst st c r n rs0 m m0 v: forall
+  (SMEM : forall (m : mem) (b : Values.block) (ofs : Z),
+           seval_smem ge sp (si_smem st) rs0 m0 = Some m ->
+           Mem.valid_pointer m b ofs = Mem.valid_pointer m0 b ofs)
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OK2 : seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+  seval_sval ge sp
+    (hsval_proj (expanse_condimm_int64u c (fsi_sreg_get hst r) n)) rs0 m0 =
+  Some (Val.of_optbool (Val.cmplu_bool (Mem.valid_pointer m) c v (Vlong n))).
+Proof.
+  intros.
+  assert (HMEM: Val.cmplu_bool (Mem.valid_pointer m) c v (Vlong n) =
+          Val.cmplu_bool (Mem.valid_pointer m0) c v (Vlong n)).
+  erewrite (cmplu_bool_valid_pointer_eq (Mem.valid_pointer m) (Mem.valid_pointer m0)); eauto.
+  unfold expanse_condimm_int64u, cond_int64u in *; destruct c;
+  intros; destruct (Int64.eq n Int64.zero) eqn:EQIMM; simpl;
+  unfold loadimm64, sltuimm64, opimm64, load_hilo64;
+  try erewrite !fsi_sreg_get_correct; eauto;
+  try rewrite OKv1;
+  unfold Val.cmplu, zero64.
+  (* Simplify make immediate and decompose subcases *)
+  all:
+    try (specialize make_immed64_sound with n;
+    destruct (make_immed64 n) eqn:EQMKI);
+    try destruct (Int64.eq lo Int64.zero) eqn:EQLO;
+    try erewrite fSop_correct; eauto; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1;
+    try rewrite OK2;
+    rewrite HMEM.
+  (* Ceq, Cne, Clt = itself *)
+  all: intros; try apply Int64.same_if_eq in EQIMM; subst; trivial.
+  (* Cle = xor (Clt) *)
+  all: try apply xor_neg_ltle_cmplu; trivial.
+  (* Others subcases with swap/negation *)
+  all:
+    unfold Val.cmplu, eval_may_undef, zero64, Val.addl;
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l in *; trivial;
+    try rewrite Int64.add_zero_l;
+    try (rewrite <- xor_neg_ltle_cmplu; unfold Val.cmplu;
+    trivial; fail);
+    try (replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite Val.swap_cmplu_bool; trivial; fail);
+    try (replace (Clt) with (negate_comparison Cge) by auto;
+    rewrite Val.negate_cmplu_bool; rewrite xor_neg_optb; trivial; fail);
+    try rewrite optbool_mktotal; trivial.
+  all:
+    try destruct v; simpl; auto;
+    try destruct (Archi.ptr64); simpl;
+    try rewrite EQIMM;
+    try rewrite HMEM; trivial;
+    try destruct (Int64.ltu _ _);
+    try rewrite <- xor_neg_ltge_cmplu; unfold Val.cmplu;
+    try rewrite <- optbool_mktotal; trivial.
+Qed.
+
+Lemma simplify_ccompf_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (expanse_cond_fp false cond_float c
+          (make_lhsv_cmp (is_inv_cmp_float c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)))) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpf_bool c v v0)).
+Proof.
+  intros.
+  unfold expanse_cond_fp in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpf.
+  - apply xor_neg_eqne_cmpf.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite swap_cmpf_bool; trivial.
+  - replace (Cle) with (swap_comparison Cge) by auto;
+    rewrite swap_cmpf_bool; trivial.
+Qed.
+
+Lemma simplify_cnotcompf_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (expanse_cond_fp true cond_float c
+          (make_lhsv_cmp (is_inv_cmp_float c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)))) rs0 m0 =
+  Some (Val.of_optbool (option_map negb (Val.cmpf_bool c v v0))).
+Proof.
+  intros.
+  unfold expanse_cond_fp in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpf.
+  1,3,4: apply xor_neg_optb'.
+  all: destruct v, v0; simpl; trivial.
+  rewrite Float.cmp_ne_eq; rewrite negb_involutive; trivial.
+  1: replace (Clt) with (swap_comparison Cgt) by auto; rewrite <- Float.cmp_swap; simpl.
+  2: replace (Cle) with (swap_comparison Cge) by auto; rewrite <- Float.cmp_swap; simpl.
+  all: destruct (Float.cmp _ _ _); trivial.
+Qed.
+
+Lemma simplify_ccompfs_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (expanse_cond_fp false cond_single c
+          (make_lhsv_cmp (is_inv_cmp_float c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)))) rs0 m0 =
+  Some (Val.of_optbool (Val.cmpfs_bool c v v0)).
+Proof.
+  intros.
+  unfold expanse_cond_fp in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpfs.
+  - apply xor_neg_eqne_cmpfs.
+  - replace (Clt) with (swap_comparison Cgt) by auto;
+    rewrite swap_cmpfs_bool; trivial.
+  - replace (Cle) with (swap_comparison Cge) by auto;
+    rewrite swap_cmpfs_bool; trivial.
+Qed.
+
+Lemma simplify_cnotcompfs_correct ge sp hst st c r r0 rs0 m0 v v0: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (OKv1 : seval_sval ge sp (si_sreg st r) rs0 m0 = Some v)
+  (OKv2 : seval_sval ge sp (si_sreg st r0) rs0 m0 = Some v0),
+  seval_sval ge sp
+    (hsval_proj
+       (expanse_cond_fp true cond_single c
+          (make_lhsv_cmp (is_inv_cmp_float c) (fsi_sreg_get hst r)
+             (fsi_sreg_get hst r0)))) rs0 m0 =
+  Some (Val.of_optbool (option_map negb (Val.cmpfs_bool c v v0))).
+Proof.
+  intros.
+  unfold expanse_cond_fp in *; destruct c; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  rewrite OKv1, OKv2; trivial;
+  unfold Val.cmpfs.
+  1,3,4: apply xor_neg_optb'.
+  all: destruct v, v0; simpl; trivial.
+  rewrite Float32.cmp_ne_eq; rewrite negb_involutive; trivial.
+  1: replace (Clt) with (swap_comparison Cgt) by auto; rewrite <- Float32.cmp_swap; simpl.
+  2: replace (Cle) with (swap_comparison Cge) by auto; rewrite <- Float32.cmp_swap; simpl.
+  all: destruct (Float32.cmp _ _ _); trivial.
+Qed.
+
+Lemma simplify_floatconst_correct ge sp rs0 m0 args m n fsv lr st: forall
+  (H : match lr with
+    | nil =>
+        Some
+          (fSop Ofloat_of_bits
+             (make_lhsv_single (loadimm64 (Float.to_bits n))))
+    | _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Ofloatconst n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold loadimm64, load_hilo64; simpl;
+  specialize make_immed64_sound with (Float.to_bits n);
+  destruct (make_immed64 (Float.to_bits n)) eqn:EQMKI; intros;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO;
+  simpl.
+  - try rewrite Int64.add_commut, Int64.add_zero_l; inv H;
+    try rewrite Float.of_to_bits; trivial.
+  - apply Int64.same_if_eq in EQLO; subst.
+    try rewrite Int64.add_commut, Int64.add_zero_l in H.
+    rewrite <- H; try rewrite Float.of_to_bits; trivial.
+  - rewrite <- H; try rewrite Float.of_to_bits; trivial.
+  - rewrite <- H; try rewrite Float.of_to_bits; trivial.
+Qed.
+
+Lemma simplify_singleconst_correct ge sp rs0 m0 args m n fsv lr st: forall
+  (H : match lr with
+    | nil =>
+        Some
+          (fSop Osingle_of_bits
+             (make_lhsv_single (loadimm32 (Float32.to_bits n))))
+    | _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Osingleconst n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold loadimm32, load_hilo32; simpl;
+  specialize make_immed32_sound with (Float32.to_bits n);
+  destruct (make_immed32 (Float32.to_bits n)) eqn:EQMKI; intros;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO;
+  simpl.
+  { try rewrite Int.add_commut, Int.add_zero_l; inv H;
+    try rewrite Float32.of_to_bits; trivial. }
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l in H; simpl;
+    rewrite ltu_12_wordsize; simpl; try rewrite <- H;
+    try rewrite Float32.of_to_bits; trivial.
+Qed.
+
+Lemma simplify_addimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (addimm32 (fsi_sreg_get hst a1) n None)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oaddimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold addimm32, opimm32, load_hilo32, make_lhsv_cmp; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.add (Vint imm) v); rewrite Val.add_commut; trivial.
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_addlimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (addimm64 (fsi_sreg_get hst a1) n None)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oaddlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold addimm64, opimm64, load_hilo64, make_lhsv_cmp; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.addl (Vlong imm) v); rewrite Val.addl_commut; trivial.
+  all:
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_andimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (andimm32 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oandimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold andimm32, opimm32, load_hilo32, make_lhsv_cmp; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.and (Vint imm) v); rewrite Val.and_commut; trivial.
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_andlimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (andimm64 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oandlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold andimm64, opimm64, load_hilo64, make_lhsv_cmp; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.andl (Vlong imm) v); rewrite Val.andl_commut; trivial.
+  all:
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_orimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (orimm32 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oorimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold orimm32, opimm32, load_hilo32, make_lhsv_cmp; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.or (Vint imm) v); rewrite Val.or_commut; trivial.
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_orlimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (orimm64 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oorlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold orimm64, opimm64, load_hilo64, make_lhsv_cmp; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.orl (Vlong imm) v); rewrite Val.orl_commut; trivial.
+  all:
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_xorimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (xorimm32 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oxorimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold xorimm32, opimm32, load_hilo32, make_lhsv_cmp; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.xor (Vint imm) v); rewrite Val.xor_commut; trivial.
+  all:
+    try apply Int.same_if_eq in EQLO; subst;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_xorlimm_correct ge sp rs0 m0 lr n hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+    | nil => None
+    | a1 :: nil => Some (xorimm64 (fsi_sreg_get hst a1) n)
+    | a1 :: _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oxorlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold xorimm64, opimm64, load_hilo64, make_lhsv_cmp; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  fold (Val.xorl (Vlong imm) v); rewrite Val.xorl_commut; trivial.
+  all:
+    try apply Int64.same_if_eq in EQLO; subst;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite ltu_12_wordsize; trivial.
+Qed.
+
+Lemma simplify_intconst_correct ge sp rs0 m0 args m n fsv lr st: forall
+  (H : match lr with
+    | nil => Some (loadimm32 n)
+    | _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Ointconst n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold loadimm32, load_hilo32, make_lhsv_single; simpl;
+  specialize make_immed32_sound with (n);
+  destruct (make_immed32 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int.eq lo Int.zero) eqn:EQLO; simpl;
+  try apply Int.same_if_eq in EQLO; subst;
+  try rewrite Int.add_commut, Int.add_zero_l;
+  try rewrite ltu_12_wordsize; try rewrite H; trivial.
+Qed.
+
+Lemma simplify_longconst_correct ge sp rs0 m0 args m n fsv lr st: forall
+  (H : match lr with
+    | nil => Some (loadimm64 n)
+    | _ :: _ => None
+    end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Olongconst n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  unfold loadimm64, load_hilo64, make_lhsv_single; simpl;
+  specialize make_immed64_sound with (n);
+  destruct (make_immed64 (n)) eqn:EQMKI; intros; simpl;
+  try destruct (Int64.eq lo Int64.zero) eqn:EQLO; simpl;
+  try apply Int64.same_if_eq in EQLO; subst;
+  try rewrite Int64.add_commut, Int64.add_zero_l;
+  try rewrite Int64.add_commut;
+  try rewrite ltu_12_wordsize; try rewrite H; trivial.
+Qed.
+
+Lemma simplify_cast8signed_correct ge sp rs0 m0 lr hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+       | nil => None
+       | a1 :: nil =>
+           Some
+             (fSop (Oshrimm (Int.repr 24))
+                (make_lhsv_single
+                   (fSop (Oshlimm (Int.repr 24))
+                      (make_lhsv_single (fsi_sreg_get hst a1)))))
+       | a1 :: _ :: _ => None
+       end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp Ocast8signed args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  unfold Val.shr, Val.shl, Val.sign_ext;
+  destruct v; simpl; auto.
+  assert (A: Int.ltu (Int.repr 24) Int.iwordsize = true) by auto.
+  rewrite A. rewrite Int.sign_ext_shr_shl; simpl; trivial. cbn; lia.
+Qed.
+
+Lemma simplify_cast16signed_correct ge sp rs0 m0 lr hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+       | nil => None
+       | a1 :: nil =>
+           Some
+             (fSop (Oshrimm (Int.repr 16))
+                (make_lhsv_single
+                   (fSop (Oshlimm (Int.repr 16))
+                      (make_lhsv_single (fsi_sreg_get hst a1)))))
+       | a1 :: _ :: _ => None
+       end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp Ocast16signed args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  unfold Val.shr, Val.shl, Val.sign_ext;
+  destruct v; simpl; auto.
+  assert (A: Int.ltu (Int.repr 16) Int.iwordsize = true) by auto.
+  rewrite A. rewrite Int.sign_ext_shr_shl; simpl; trivial. cbn; lia.
+Qed.
+
+Lemma simplify_shrximm_correct ge sp rs0 m0 lr hst fsv st args m n: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+       | nil => None
+       | a1 :: nil =>
+           if Int.eq n Int.zero
+           then
+            Some
+              (fSop (OEmayundef (MUshrx n))
+                 (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                    (fsi_sreg_get hst a1)))
+           else
+            if Int.eq n Int.one
+            then
+             Some
+               (fSop (OEmayundef (MUshrx n))
+                  (make_lhsv_cmp false
+                     (fSop (Oshrimm Int.one)
+                        (make_lhsv_single
+                           (fSop Oadd
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshruimm (Int.repr 31))
+                                    (make_lhsv_single (fsi_sreg_get hst a1)))))))
+                     (fSop (Oshrimm Int.one)
+                        (make_lhsv_single
+                           (fSop Oadd
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshruimm (Int.repr 31))
+                                    (make_lhsv_single (fsi_sreg_get hst a1)))))))))
+            else
+             Some
+               (fSop (OEmayundef (MUshrx n))
+                  (make_lhsv_cmp false
+                     (fSop (Oshrimm n)
+                        (make_lhsv_single
+                           (fSop Oadd
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshruimm (Int.sub Int.iwordsize n))
+                                    (make_lhsv_single
+                                       (fSop (Oshrimm (Int.repr 31))
+                                          (make_lhsv_single
+                                             (fsi_sreg_get hst a1)))))))))
+                     (fSop (Oshrimm n)
+                        (make_lhsv_single
+                           (fSop Oadd
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshruimm (Int.sub Int.iwordsize n))
+                                    (make_lhsv_single
+                                       (fSop (Oshrimm (Int.repr 31))
+                                          (make_lhsv_single
+                                             (fsi_sreg_get hst a1)))))))))))
+       | a1 :: _ :: _ => None
+       end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oshrximm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence).
+  assert (A: Int.ltu Int.zero (Int.repr 31) = true) by auto.
+  assert (B: Int.ltu (Int.repr 31) Int.iwordsize = true) by auto.
+  assert (C: Int.ltu Int.one Int.iwordsize = true) by auto.
+  destruct (Int.eq n Int.zero) eqn:EQ0;
+  destruct (Int.eq n Int.one) eqn:EQ1.
+  { apply Int.same_if_eq in EQ0.
+    apply Int.same_if_eq in EQ1; subst. discriminate. }
+  all:
+    simpl in OK1; inv OK1; inv H; simpl;
+    erewrite !fsi_sreg_get_correct; eauto;
+    destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1;
+    destruct (Val.shrx v (Vint n)) eqn:TOTAL; cbn;
+    unfold eval_may_undef.
+  2,4,6:
+    unfold Val.shrx in TOTAL;
+    destruct v; simpl in TOTAL; simpl; try congruence;
+    try rewrite B; simpl; try rewrite C; simpl;
+    try destruct (Val.shr _ _);
+    destruct (Int.ltu n (Int.repr 31)); try congruence.
+  - destruct v; simpl in TOTAL; try congruence;
+    apply Int.same_if_eq in EQ0; subst;
+    rewrite A, Int.shrx_zero in TOTAL;
+    [auto | cbn; lia].
+  - apply Int.same_if_eq in EQ1; subst;
+    unfold Val.shr, Val.shru, Val.shrx, Val.add; simpl;
+    destruct v; simpl in *; try discriminate; trivial.
+    rewrite B, C.
+    rewrite Int.shrx1_shr in TOTAL; auto.
+  - exploit Val.shrx_shr_2; eauto. rewrite EQ0.
+    intros; subst.
+    destruct v; simpl in *; try discriminate; trivial.
+    rewrite B in *.
+    destruct Int.ltu eqn:EQN0 in TOTAL; try discriminate.
+    simpl in *.
+    destruct Int.ltu eqn:EQN1 in TOTAL; try discriminate.
+    replace Int.iwordsize with (Int.repr 32) in * by auto.
+    rewrite !EQN1. simpl in *.
+    destruct Int.ltu eqn:EQN2 in TOTAL; try discriminate.
+    rewrite !EQN2. rewrite EQN0.
+    reflexivity.
+Qed.
+
+Lemma simplify_shrxlimm_correct ge sp rs0 m0 lr hst fsv st args m n: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+       | nil => None
+       | a1 :: nil =>
+           if Int.eq n Int.zero
+           then
+            Some
+              (fSop (OEmayundef (MUshrxl n))
+                 (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                    (fsi_sreg_get hst a1)))
+           else
+            if Int.eq n Int.one
+            then
+             Some
+               (fSop (OEmayundef (MUshrxl n))
+                  (make_lhsv_cmp false
+                     (fSop (Oshrlimm Int.one)
+                        (make_lhsv_single
+                           (fSop Oaddl
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshrluimm (Int.repr 63))
+                                    (make_lhsv_single (fsi_sreg_get hst a1)))))))
+                     (fSop (Oshrlimm Int.one)
+                        (make_lhsv_single
+                           (fSop Oaddl
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshrluimm (Int.repr 63))
+                                    (make_lhsv_single (fsi_sreg_get hst a1)))))))))
+            else
+             Some
+               (fSop (OEmayundef (MUshrxl n))
+                  (make_lhsv_cmp false
+                     (fSop (Oshrlimm n)
+                        (make_lhsv_single
+                           (fSop Oaddl
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshrluimm (Int.sub Int64.iwordsize' n))
+                                    (make_lhsv_single
+                                       (fSop (Oshrlimm (Int.repr 63))
+                                          (make_lhsv_single
+                                             (fsi_sreg_get hst a1)))))))))
+                     (fSop (Oshrlimm n)
+                        (make_lhsv_single
+                           (fSop Oaddl
+                              (make_lhsv_cmp false (fsi_sreg_get hst a1)
+                                 (fSop (Oshrluimm (Int.sub Int64.iwordsize' n))
+                                    (make_lhsv_single
+                                       (fSop (Oshrlimm (Int.repr 63))
+                                          (make_lhsv_single
+                                             (fsi_sreg_get hst a1)))))))))))
+       | a1 :: _ :: _ => None
+       end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp (Oshrxlimm n) args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence).
+  assert (A: Int.ltu Int.zero (Int.repr 63) = true) by auto.
+  assert (B: Int.ltu (Int.repr 63) Int64.iwordsize' = true) by auto.
+  assert (C: Int.ltu Int.one Int64.iwordsize' = true) by auto.
+  destruct (Int.eq n Int.zero) eqn:EQ0;
+  destruct (Int.eq n Int.one) eqn:EQ1.
+  { apply Int.same_if_eq in EQ0.
+    apply Int.same_if_eq in EQ1; subst. discriminate. }
+  all:
+    simpl in OK1; inv OK1; inv H; simpl;
+    erewrite !fsi_sreg_get_correct; eauto;
+    destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1;
+    destruct (Val.shrxl v (Vint n)) eqn:TOTAL; cbn;
+    unfold eval_may_undef.
+  2,4,6:
+    unfold Val.shrxl in TOTAL;
+    destruct v; simpl in TOTAL; simpl; try congruence;
+    try rewrite B; simpl; try rewrite C; simpl;
+    try destruct (Val.shrl _ _);
+    destruct (Int.ltu n (Int.repr 63)); try congruence.
+  - destruct v; simpl in TOTAL; try congruence;
+    apply Int.same_if_eq in EQ0; subst;
+    rewrite A, Int64.shrx'_zero in *.
+    assumption.
+  - apply Int.same_if_eq in EQ1; subst;
+    unfold Val.shrl, Val.shrlu, Val.shrxl, Val.addl; simpl;
+    destruct v; simpl in *; try discriminate; trivial.
+    rewrite B, C.
+    rewrite Int64.shrx'1_shr' in TOTAL; auto.
+  - exploit Val.shrxl_shrl_2; eauto. rewrite EQ0.
+    intros; subst.
+    destruct v; simpl in *; try discriminate; trivial.
+    rewrite B in *.
+    destruct Int.ltu eqn:EQN0 in TOTAL; try discriminate.
+    simpl in *.
+    destruct Int.ltu eqn:EQN1 in TOTAL; try discriminate.
+    replace Int64.iwordsize' with (Int.repr 64) in * by auto.
+    rewrite !EQN1. simpl in *.
+    destruct Int.ltu eqn:EQN2 in TOTAL; try discriminate.
+    rewrite !EQN2. rewrite EQN0.
+    reflexivity.
+Qed.
+
+Lemma simplify_cast32unsigned_correct ge sp rs0 m0 lr hst fsv st args m: forall
+  (SREG: forall r: positive,
+          hsi_sreg_eval ge sp hst r rs0 m0 =
+          seval_sval ge sp (si_sreg st r) rs0 m0)
+  (H : match lr with
+      | nil => None
+      | a1 :: nil =>
+          Some
+            (fSop (Oshrluimm (Int.repr 32))
+               (make_lhsv_single
+                  (fSop (Oshllimm (Int.repr 32))
+                     (make_lhsv_single
+                        (fSop Ocast32signed
+                           (make_lhsv_single (fsi_sreg_get hst a1)))))))
+      | a1 :: _ :: _ => None
+      end = Some fsv)
+  (OK1 : seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args),
+  seval_sval ge sp (hsval_proj fsv) rs0 m0 =
+  eval_operation ge sp Ocast32unsigned args m.
+Proof.
+  intros.
+  repeat (destruct lr; simpl; try congruence);
+  simpl in OK1; inv OK1; inv H; simpl;
+  erewrite !fsi_sreg_get_correct; eauto;
+  destruct (seval_sval ge sp (si_sreg st p) rs0 m0) eqn:OKv1; try congruence; inv H1.
+  unfold Val.shrlu, Val.shll, Val.longofint, Val.longofintu.
+  destruct v; simpl; auto.
+  assert (A: Int.ltu (Int.repr 32) Int64.iwordsize' = true) by auto.
+  rewrite A. rewrite Int64.shru'_shl'; auto.
+  replace (Int.ltu (Int.repr 32) (Int.repr 32)) with (false) by auto.
+  rewrite cast32unsigned_from_cast32signed.
+  replace Int64.zwordsize with 64 by auto.
+  rewrite Int.unsigned_repr; cbn; try lia.
+  replace (Int.sub (Int.repr 32) (Int.repr 32)) with (Int.zero) by auto.
+  rewrite Int64.shru'_zero. reflexivity.
+Qed.
+
+(** * Main proof of simplification *)
+
+Lemma target_op_simplify_correct op lr hst fsv ge sp rs0 m0 st args m: forall
+   (H: target_op_simplify op lr hst = Some fsv)
+   (REF: hsilocal_refines ge sp rs0 m0 hst st)
+   (OK0: hsok_local ge sp rs0 m0 hst)
+   (OK1: seval_list_sval ge sp (list_sval_inj (map (si_sreg st) lr)) rs0 m0 = Some args)
+   (OK2: seval_smem ge sp (si_smem st) rs0 m0 = Some m),
+   seval_sval ge sp (hsval_proj fsv) rs0 m0 = eval_operation ge sp op args m.
+Proof.
+  unfold target_op_simplify; simpl.
+  intros H (LREF & SREF & SREG & SMEM) ? ? ?.
+  destruct op; try congruence.
+  eapply simplify_intconst_correct; eauto.
+  eapply simplify_longconst_correct; eauto.
+  eapply simplify_floatconst_correct; eauto. 
+  eapply simplify_singleconst_correct; eauto. 
+  eapply simplify_cast8signed_correct; eauto.
+  eapply simplify_cast16signed_correct; eauto.
+  eapply simplify_addimm_correct; eauto.
+  eapply simplify_andimm_correct; eauto.
+  eapply simplify_orimm_correct; eauto.
+  eapply simplify_xorimm_correct; eauto.
+  eapply simplify_shrximm_correct; eauto.
+  eapply simplify_cast32unsigned_correct; eauto.
+  eapply simplify_addlimm_correct; eauto.
+  eapply simplify_andlimm_correct; eauto.
+  eapply simplify_orlimm_correct; eauto.
+  eapply simplify_xorlimm_correct; eauto.
+  eapply simplify_shrxlimm_correct; eauto.
+  (* Ocmp expansions *)
+  destruct cond; repeat (destruct lr; simpl; try congruence);
+  simpl in OK1;
+  try (destruct (seval_sval ge sp (si_sreg st r) rs0 m0) eqn:OKv1; try congruence);
+  try (destruct (seval_sval ge sp (si_sreg st r0) rs0 m0) eqn:OKv2; try congruence);
+  inv H; inv OK1.
+  - eapply simplify_ccomp_correct; eauto.
+  - eapply simplify_ccompu_correct; eauto.
+  - eapply simplify_ccompimm_correct; eauto.
+  - eapply simplify_ccompuimm_correct; eauto.
+  - eapply simplify_ccompl_correct; eauto.
+  - eapply simplify_ccomplu_correct; eauto.
+  - eapply simplify_ccomplimm_correct; eauto.
+  - eapply simplify_ccompluimm_correct; eauto.
+  - eapply simplify_ccompf_correct; eauto.
+  - eapply simplify_cnotcompf_correct; eauto.
+  - eapply simplify_ccompfs_correct; eauto.
+  - eapply simplify_cnotcompfs_correct; eauto.
+Qed.
+
+Lemma target_cbranch_expanse_correct hst c l ge sp rs0 m0 st c' l': forall
+  (TARGET: target_cbranch_expanse hst c l = Some (c', l'))
+  (LREF : hsilocal_refines ge sp rs0 m0 hst st)
+  (OK: hsok_local ge sp rs0 m0 hst),
+  seval_condition ge sp c' (hsval_list_proj l') (si_smem st) rs0 m0 =
+  seval_condition ge sp c (list_sval_inj (map (si_sreg st) l)) (si_smem st) rs0 m0.
+Proof.
+  unfold target_cbranch_expanse, seval_condition; simpl.
+  intros H (LREF & SREF & SREG & SMEM) ?.
+  destruct c; try congruence;
+  repeat (destruct l; simpl in H; try congruence).
+  1,2,5,6:
+    destruct c; inv H; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try (destruct (seval_smem ge sp (si_smem st) rs0 m0) eqn:OKmem; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r) rs0 m0) eqn:OKv1; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r0) rs0 m0) eqn:OKv2; try congruence);
+    try replace (Cle) with (swap_comparison Cge) by auto;
+    try replace (Clt) with (swap_comparison Cgt) by auto;
+    try rewrite Val.swap_cmp_bool; trivial;
+    try rewrite Val.swap_cmpu_bool; trivial;
+    try rewrite Val.swap_cmpl_bool; trivial;
+    try rewrite Val.swap_cmplu_bool; trivial.
+  1,2,3,4:
+    try destruct (Int.eq n Int.zero) eqn: EQIMM;
+    try apply Int.same_if_eq in EQIMM;
+    try destruct (Int64.eq n Int64.zero) eqn: EQIMM;
+    try apply Int64.same_if_eq in EQIMM;
+    destruct c; inv H; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try (destruct (seval_smem ge sp (si_smem st) rs0 m0) eqn:OKmem; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r) rs0 m0) eqn:OKv1; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r0) rs0 m0) eqn:OKv2; try congruence);
+    unfold loadimm32, load_hilo32, Val.cmp, Val.cmpu, zero32;
+    unfold loadimm64, load_hilo64, Val.cmpl, Val.cmplu, zero64;
+    intros; try (specialize make_immed32_sound with (n);
+    destruct (make_immed32 n) eqn:EQMKI); intros; simpl;
+    intros; try (specialize make_immed64_sound with (n);
+    destruct (make_immed64 n) eqn:EQMKI); intros; simpl;
+    try rewrite EQLO; simpl;
+    try destruct (Int.eq lo Int.zero) eqn:EQLO;
+    try destruct (Int64.eq lo Int64.zero) eqn:EQLO;
+    try apply Int.same_if_eq in EQLO; simpl; trivial;
+    try apply Int64.same_if_eq in EQLO; simpl; trivial;
+    unfold eval_may_undef; 
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try rewrite OKv1; simpl; trivial;
+    try destruct v; try rewrite H;
+    try rewrite ltu_12_wordsize; try rewrite EQLO;
+    try rewrite Int.add_commut, Int.add_zero_l;
+    try rewrite Int64.add_commut, Int64.add_zero_l;
+    try rewrite Int64.add_commut;
+    try rewrite Int.add_zero_l; try rewrite Int64.add_zero_l;
+    auto; simpl;
+    try rewrite H in EQIMM;
+    try rewrite EQLO in EQIMM;
+    try rewrite Int.add_commut, Int.add_zero_l in EQIMM;
+    try rewrite Int64.add_commut, Int64.add_zero_l in EQIMM;
+    try rewrite EQIMM; simpl;
+    try destruct (Archi.ptr64); trivial.
+
+    1,2,3,4:
+    destruct c; inv H; simpl;
+    try erewrite !fsi_sreg_get_correct; eauto;
+    try (destruct (seval_smem ge sp (si_smem st) rs0 m0) eqn:OKmem; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r) rs0 m0) eqn:OKv1; try congruence);
+    try (destruct (seval_sval ge sp (si_sreg st r0) rs0 m0) eqn:OKv2; try congruence);
+    unfold zero32, zero64, Val.cmpf, Val.cmpfs;
+    destruct v, v0; simpl; trivial;
+    try rewrite Float.cmp_ne_eq;
+    try rewrite Float32.cmp_ne_eq;
+    try rewrite <- Float.cmp_swap; simpl;
+    try rewrite <- Float32.cmp_swap; simpl;
+    try destruct (Float.cmp _ _); simpl;
+    try destruct (Float32.cmp _ _); simpl;
+    try rewrite Int.eq_true; simpl;
+    try rewrite Int.eq_false; try apply Int.one_not_zero;
+    simpl; trivial.
+Qed.
+Global Opaque target_op_simplify.
+Global Opaque target_cbranch_expanse.
diff --git a/verilog/SelectLong.vp b/verilog/SelectLong.vp
index 4f9fb518..0ccc4725 100644
--- a/verilog/SelectLong.vp
+++ b/verilog/SelectLong.vp
@@ -3,11 +3,16 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*                 Xavier Leroy, INRIA Paris                           *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
 (** Instruction selection for 64-bit integer operations *)
@@ -56,101 +61,77 @@ Definition longofintu (e: expr) :=
   | None =>  Eop Ocast32unsigned (e ::: Enil)
   end.
 
-Nondetfunction notl (e: expr) :=
-  if Archi.splitlong then SplitLong.notl e else
-  match e with
-  | Eop (Olongconst n) Enil => longconst (Int64.not n)
-  | Eop Onotl (t1:::Enil) => t1
-  | _ => Eop Onotl (e:::Enil)
-  end.
+(** ** Integer addition and pointer addition *)
 
-Nondetfunction andlimm (n1: int64) (e2: expr) := 
-  if Int64.eq n1 Int64.zero then longconst Int64.zero else
-  if Int64.eq n1 Int64.mone then e2 else
-  match e2 with
-  | Eop (Olongconst n2) Enil =>
-      longconst (Int64.and n1 n2)
-  | Eop (Oandlimm n2) (t2:::Enil) =>
-      Eop (Oandlimm (Int64.and n1 n2)) (t2:::Enil)
-  | _ =>
-      Eop (Oandlimm n1) (e2:::Enil)
+Nondetfunction addlimm (n: int64) (e: expr) :=
+  if Int64.eq n Int64.zero then e else
+  match e with
+  | Eop (Olongconst m) Enil => longconst (Int64.add n m)
+  | Eop (Oaddrsymbol s m) Enil   => Eop (Oaddrsymbol s (Ptrofs.add (Ptrofs.of_int64 n) m)) Enil
+  | Eop (Oaddrstack m) Enil      => Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int64 n) m)) Enil
+  | Eop (Oaddlimm m) (t ::: Enil) => Eop (Oaddlimm(Int64.add n m)) (t ::: Enil)
+  | _ => Eop (Oaddlimm n) (e ::: Enil)
   end.
 
-Nondetfunction andl (e1: expr) (e2: expr) :=
-  if Archi.splitlong then SplitLong.andl e1 e2 else
+Nondetfunction addl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.addl e1 e2 else
   match e1, e2 with
-  | Eop (Olongconst n1) Enil, t2 => andlimm n1 t2
-  | t1, Eop (Olongconst n2) Enil => andlimm n2 t1
-  | _, _ => Eop Oandl (e1:::e2:::Enil)
+  | Eop (Olongconst n1) Enil, t2 => addlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => addlimm n2 t1
+  | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm (Int64.add n1 n2) (Eop Oaddl (t1:::t2:::Enil))
+  | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddrstack n2) Enil =>
+      Eop Oaddl (Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int64 n1) n2)) Enil ::: t1 ::: Enil)
+  | Eop (Oaddrstack n1) Enil, Eop (Oaddlimm n2) (t2:::Enil) =>
+      Eop Oaddl (Eop (Oaddrstack (Ptrofs.add n1 (Ptrofs.of_int64 n2))) Enil ::: t2 ::: Enil)
+  | Eop (Oaddlimm n1) (t1:::Enil), t2 =>
+      addlimm n1 (Eop Oaddl (t1:::t2:::Enil))
+  | t1, Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm n2 (Eop Oaddl (t1:::t2:::Enil))
+  | _, _ => Eop Oaddl (e1:::e2:::Enil)
   end.
 
-Nondetfunction orlimm (n1: int64) (e2: expr) :=
-  if Int64.eq n1 Int64.zero then e2 else
-  if Int64.eq n1 Int64.mone then longconst Int64.mone else
-  match e2 with
-  | Eop (Olongconst n2) Enil => longconst (Int64.or n1 n2)
-  | Eop (Oorlimm n2) (t2:::Enil) => Eop (Oorlimm (Int64.or n1 n2)) (t2:::Enil)
-  | _ => Eop (Oorlimm n1) (e2:::Enil)
-  end.
+(** ** Integer and pointer subtraction *)
 
-Nondetfunction orl (e1: expr) (e2: expr) :=
-  if Archi.splitlong then SplitLong.orl e1 e2 else
+Nondetfunction subl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.subl e1 e2 else
   match e1, e2 with
-  | Eop (Olongconst n1) Enil, t2 => orlimm n1 t2
-  | t1, Eop (Olongconst n2) Enil => orlimm n2 t1
-  | Eop (Oshllimm n1) (t1:::Enil), Eop (Oshrluimm n2) (t2:::Enil) =>
-      if Int.eq (Int.add n1 n2) Int64.iwordsize' && same_expr_pure t1 t2
-      then Eop (Ororlimm n2) (t1:::Enil)
-      else Eop Oorl (e1:::e2:::Enil)
-  | Eop (Oshrluimm n2) (t2:::Enil), Eop (Oshllimm n1) (t1:::Enil) =>
-      if Int.eq (Int.add n1 n2) Int64.iwordsize' && same_expr_pure t1 t2
-      then Eop (Ororlimm n2) (t1:::Enil)
-      else Eop Oorl (e1:::e2:::Enil)
-  | _, _ =>
-      Eop Oorl (e1:::e2:::Enil)
+  | t1, Eop (Olongconst n2) Enil =>
+      addlimm (Int64.neg n2) t1
+  | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm (Int64.sub n1 n2) (Eop Osubl (t1:::t2:::Enil))
+  | Eop (Oaddlimm n1) (t1:::Enil), t2 =>
+      addlimm n1 (Eop Osubl (t1:::t2:::Enil))
+  | t1, Eop (Oaddlimm n2) (t2:::Enil) =>
+      addlimm (Int64.neg n2) (Eop Osubl (t1:::t2:::Enil))
+  | _, _ => Eop Osubl (e1:::e2:::Enil)
   end.
 
-Nondetfunction xorlimm (n1: int64) (e2: expr) :=
-  if Int64.eq n1 Int64.zero then e2 else
-  if Int64.eq n1 Int64.mone then notl e2 else
-  match e2 with
-  | Eop (Olongconst n2) Enil => longconst (Int64.xor n1 n2)
-  | Eop (Oxorlimm n2) (t2:::Enil) => Eop (Oxorlimm (Int64.xor n1 n2)) (t2:::Enil)
-  | Eop Onotl (t2:::Enil) => Eop (Oxorlimm (Int64.not n1)) (t2:::Enil)
-  | _ => Eop (Oxorlimm n1) (e2:::Enil)
+Definition negl (e: expr) :=
+  if Archi.splitlong then SplitLong.negl e else
+  match is_longconst e with
+  | Some n => longconst (Int64.neg n)
+  | None =>  Eop Onegl (e ::: Enil)
   end.
 
-Nondetfunction xorl (e1: expr) (e2: expr) :=
-  if Archi.splitlong then SplitLong.xorl e1 e2 else
-  match e1, e2 with
-  | Eop (Olongconst n1) Enil, t2 => xorlimm n1 t2
-  | t1, Eop (Olongconst n2) Enil => xorlimm n2 t1
-  | _, _ => Eop Oxorl (e1:::e2:::Enil)
-  end.
+(** ** Immediate shifts *)
 
 Nondetfunction shllimm (e1: expr) (n: int) :=
   if Archi.splitlong then SplitLong.shllimm e1 n else
-  if Int.eq n Int.zero then e1 else
-  if negb (Int.ltu n Int64.iwordsize') then
-    Eop Oshll (e1:::Eop (Ointconst n) Enil:::Enil)
-  else
-    match e1 with
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int64.iwordsize') then
+    Eop Oshll (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
     | Eop (Olongconst n1) Enil =>
-        Eop (Olongconst(Int64.shl' n1 n)) Enil
+        longconst (Int64.shl' n1 n)
     | Eop (Oshllimm n1) (t1:::Enil) =>
         if Int.ltu (Int.add n n1) Int64.iwordsize'
         then Eop (Oshllimm (Int.add n n1)) (t1:::Enil)
         else Eop (Oshllimm n) (e1:::Enil)
-    | Eop (Oleal (Aindexed n1)) (t1:::Enil) =>
-        if shift_is_scale n
-        then Eop (Oleal (Ascaled (Int64.unsigned (Int64.shl' Int64.one n))
-                                 (Int64.unsigned (Int64.shl' (Int64.repr n1) n)))) (t1:::Enil)
-        else Eop (Oshllimm n) (e1:::Enil)
     | _ =>
-        if shift_is_scale n
-        then Eop (Oleal (Ascaled (Int64.unsigned (Int64.shl' Int64.one n)) 0)) (e1:::Enil)
-        else Eop (Oshllimm n) (e1:::Enil)
-    end.
+        Eop (Oshllimm n) (e1:::Enil)
+  end.
 
 Nondetfunction shrluimm (e1: expr) (n: int) :=
   if Archi.splitlong then SplitLong.shrluimm e1 n else
@@ -160,7 +141,7 @@ Nondetfunction shrluimm (e1: expr) (n: int) :=
   else
     match e1 with
     | Eop (Olongconst n1) Enil =>
-        Eop (Olongconst(Int64.shru' n1 n)) Enil
+        longconst (Int64.shru' n1 n)
     | Eop (Oshrluimm n1) (t1:::Enil) =>
         if Int.ltu (Int.add n n1) Int64.iwordsize'
         then Eop (Oshrluimm (Int.add n n1)) (t1:::Enil)
@@ -177,7 +158,7 @@ Nondetfunction shrlimm (e1: expr) (n: int) :=
   else
   match e1 with
   | Eop (Olongconst n1) Enil =>
-      Eop (Olongconst(Int64.shr' n1 n)) Enil
+      longconst (Int64.shr' n1 n)
   | Eop (Oshrlimm n1) (t1:::Enil) =>
       if Int.ltu (Int.add n n1) Int64.iwordsize'
       then Eop (Oshrlimm (Int.add n n1)) (t1:::Enil)
@@ -186,6 +167,8 @@ Nondetfunction shrlimm (e1: expr) (n: int) :=
       Eop (Oshrlimm n) (e1:::Enil)
   end.
 
+(** ** General shifts *)
+
 Definition shll (e1: expr) (e2: expr) :=
   if Archi.splitlong then SplitLong.shll e1 e2 else
   match is_intconst e2 with
@@ -207,57 +190,7 @@ Definition shrlu (e1: expr) (e2: expr) :=
   | _ => Eop Oshrlu (e1:::e2:::Enil)
   end.
 
-Nondetfunction addlimm (n: int64) (e: expr) :=
-  if Int64.eq n Int64.zero then e else
-  match e with
-  | Eop (Olongconst m) Enil => longconst (Int64.add n m)
-  | Eop (Oleal addr) args   => Eop (Oleal (offset_addressing_total addr (Int64.signed n))) args
-  | _                       => Eop (Oleal (Aindexed (Int64.signed n))) (e ::: Enil)
-  end.
-
-Nondetfunction addl (e1: expr) (e2: expr) :=
-  if Archi.splitlong then SplitLong.addl e1 e2 else
-  match e1, e2 with
-  | Eop (Olongconst n1) Enil, t2 => addlimm n1 t2
-  | t1, Eop (Olongconst n2) Enil => addlimm n2 t1
-  | Eop (Oleal (Aindexed n1)) (t1:::Enil), Eop (Oleal (Aindexed n2)) (t2:::Enil) =>
-      Eop (Oleal (Aindexed2 (n1 + n2))) (t1:::t2:::Enil)
-  | Eop (Oleal (Aindexed n1)) (t1:::Enil), Eop (Oleal (Ascaled sc n2)) (t2:::Enil) =>
-      Eop (Oleal (Aindexed2scaled sc (n1 + n2))) (t1:::t2:::Enil)
-  | Eop (Oleal (Ascaled sc n1)) (t1:::Enil), Eop (Oleal (Aindexed n2)) (t2:::Enil) =>
-      Eop (Oleal (Aindexed2scaled sc (n1 + n2))) (t2:::t1:::Enil)
-  | Eop (Oleal (Ascaled sc n)) (t1:::Enil), t2 =>
-      Eop (Oleal (Aindexed2scaled sc n)) (t2:::t1:::Enil)
-  | t1, Eop (Oleal (Ascaled sc n)) (t2:::Enil) =>
-      Eop (Oleal (Aindexed2scaled sc n)) (t1:::t2:::Enil)
-  | Eop (Oleal (Aindexed n)) (t1:::Enil), t2 =>
-      Eop (Oleal (Aindexed2 n)) (t1:::t2:::Enil)
-  | t1, Eop (Oleal (Aindexed n)) (t2:::Enil) =>
-      Eop (Oleal (Aindexed2 n)) (t1:::t2:::Enil)
-  | _, _ =>
-      Eop (Oleal (Aindexed2 0)) (e1:::e2:::Enil)
-  end.
-
-Definition negl (e: expr) :=
-  if Archi.splitlong then SplitLong.negl e else
-  match is_longconst e with
-  | Some n => longconst (Int64.neg n)
-  | None =>  Eop Onegl (e ::: Enil)
-  end.
-
-Nondetfunction subl (e1: expr) (e2: expr) :=
-  if Archi.splitlong then SplitLong.subl e1 e2 else
-  match e1, e2 with
-  | t1, Eop (Olongconst n2) Enil => addlimm (Int64.neg n2) t1
-  | Eop (Oleal (Aindexed n1)) (t1:::Enil), Eop (Oleal (Aindexed n2)) (t2:::Enil) =>
-      addlimm (Int64.repr (n1 - n2)) (Eop Osubl (t1:::t2:::Enil))
-  | Eop (Oleal (Aindexed n1)) (t1:::Enil), t2 =>
-      addlimm (Int64.repr n1) (Eop Osubl (t1:::t2:::Enil))
-  | t1, Eop (Oleal (Aindexed n2)) (t2:::Enil) =>
-      addlimm (Int64.repr (- n2)) (Eop Osubl (t1:::t2:::Enil))
-  | _, _ =>
-      Eop Osubl (e1:::e2:::Enil)
-  end.
+(** ** Integer multiply *)
 
 Definition mullimm_base (n1: int64) (e2: expr) :=
   match Int64.one_bits' n1 with
@@ -266,7 +199,7 @@ Definition mullimm_base (n1: int64) (e2: expr) :=
   | i :: j :: nil =>
       Elet e2 (addl (shllimm (Eletvar 0) i) (shllimm (Eletvar 0) j))
   | _ =>
-      Eop (Omullimm n1) (e2:::Enil)
+      Eop Omull (e2 ::: longconst n1 ::: Enil)
   end.
 
 Nondetfunction mullimm (n1: int64) (e2: expr) :=
@@ -275,7 +208,7 @@ Nondetfunction mullimm (n1: int64) (e2: expr) :=
   else if Int64.eq n1 Int64.one then e2
   else match e2 with
   | Eop (Olongconst n2) Enil => longconst (Int64.mul n1 n2)
-  | Eop (Oleal (Aindexed n2)) (t2:::Enil) => addlimm (Int64.mul n1 (Int64.repr n2)) (mullimm_base n1 t2)
+  | Eop (Oaddlimm n2) (t2:::Enil) => addlimm (Int64.mul n1 n2) (mullimm_base n1 t2)
   | _ => mullimm_base n1 e2
   end.
 
@@ -295,9 +228,69 @@ Definition mullhs (e1: expr) (n2: int64) :=
   if Archi.splitlong then SplitLong.mullhs e1 n2 else
   Eop Omullhs (e1 ::: longconst n2 ::: Enil).
 
-Definition shrxlimm (e: expr) (n: int) :=
-  if Archi.splitlong then SplitLong.shrxlimm e n else
-  if Int.eq n Int.zero then e else Eop (Oshrxlimm n) (e ::: Enil).
+(** ** Bitwise and, or, xor *)
+
+Nondetfunction andlimm (n1: int64) (e2: expr) := 
+  if Int64.eq n1 Int64.zero then longconst Int64.zero else
+  if Int64.eq n1 Int64.mone then e2 else
+  match e2 with
+  | Eop (Olongconst n2) Enil =>
+      longconst (Int64.and n1 n2)
+  | Eop (Oandlimm n2) (t2:::Enil) =>
+      Eop (Oandlimm (Int64.and n1 n2)) (t2:::Enil)
+  | _ =>
+      Eop (Oandlimm n1) (e2:::Enil)
+  end.
+
+Nondetfunction andl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.andl e1 e2 else
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => andlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => andlimm n2 t1
+  | _, _ => Eop Oandl (e1:::e2:::Enil)
+  end.
+
+Nondetfunction orlimm (n1: int64) (e2: expr) :=
+  if Int64.eq n1 Int64.zero then e2 else
+  if Int64.eq n1 Int64.mone then longconst Int64.mone else
+  match e2 with
+  | Eop (Olongconst n2) Enil => longconst (Int64.or n1 n2)
+  | Eop (Oorlimm n2) (t2:::Enil) => Eop (Oorlimm (Int64.or n1 n2)) (t2:::Enil)
+  | _ => Eop (Oorlimm n1) (e2:::Enil)
+  end.
+
+Nondetfunction orl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.orl e1 e2 else
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => orlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => orlimm n2 t1
+  | _, _ => Eop Oorl (e1:::e2:::Enil)
+  end.
+
+Nondetfunction xorlimm (n1: int64) (e2: expr) :=
+  if Int64.eq n1 Int64.zero then e2 else
+  match e2 with
+  | Eop (Olongconst n2) Enil => longconst (Int64.xor n1 n2)
+  | Eop (Oxorlimm n2) (t2:::Enil) =>
+      let n := Int64.xor n1 n2 in
+      if Int64.eq n Int64.zero then t2 else Eop (Oxorlimm n) (t2:::Enil)
+  | _ => Eop (Oxorlimm n1) (e2:::Enil)
+  end.
+
+Nondetfunction xorl (e1: expr) (e2: expr) :=
+  if Archi.splitlong then SplitLong.xorl e1 e2 else
+  match e1, e2 with
+  | Eop (Olongconst n1) Enil, t2 => xorlimm n1 t2
+  | t1, Eop (Olongconst n2) Enil => xorlimm n2 t1
+  | _, _ => Eop Oxorl (e1:::e2:::Enil)
+  end.
+
+(** ** Integer logical negation *)
+
+Definition notl (e: expr) :=
+  if Archi.splitlong then SplitLong.notl e else xorlimm Int64.mone e.
+
+(** ** Integer division and modulus *)
 
 Definition divlu_base (e1: expr) (e2: expr) :=
   if Archi.splitlong then SplitLong.divlu_base e1 e2 else Eop Odivlu (e1:::e2:::Enil).
@@ -308,6 +301,12 @@ Definition divls_base (e1: expr) (e2: expr) :=
 Definition modls_base (e1: expr) (e2: expr) :=
   if Archi.splitlong then SplitLong.modls_base e1 e2 else Eop Omodl (e1:::e2:::Enil).
 
+Definition shrxlimm (e: expr) (n: int) :=
+  if Archi.splitlong then SplitLong.shrxlimm e n else
+  if Int.eq n Int.zero then e else Eop (Oshrxlimm n) (e ::: Enil).
+
+(** ** Comparisons *)
+
 Definition cmplu (c: comparison) (e1 e2: expr) :=
   if Archi.splitlong then SplitLong.cmplu c e1 e2 else
   match is_longconst e1, is_longconst e2 with
@@ -328,20 +327,38 @@ Definition cmpl (c: comparison) (e1 e2: expr) :=
   | None, None => Eop (Ocmp (Ccompl c)) (e1:::e2:::Enil)
   end.
 
+(** ** Floating-point conversions *)
+
 Definition longoffloat (e: expr) :=
   if Archi.splitlong then SplitLong.longoffloat e else 
   Eop Olongoffloat (e:::Enil).
 
+Definition longuoffloat (e: expr) :=
+  if Archi.splitlong then SplitLong.longuoffloat e else 
+  Eop Olonguoffloat (e:::Enil).
+
 Definition floatoflong (e: expr) := 
   if Archi.splitlong then SplitLong.floatoflong e else 
   Eop Ofloatoflong (e:::Enil).
 
+Definition floatoflongu (e: expr) := 
+  if Archi.splitlong then SplitLong.floatoflongu e else 
+  Eop Ofloatoflongu (e:::Enil).
+
 Definition longofsingle (e: expr) := 
   if Archi.splitlong then SplitLong.longofsingle e else 
   Eop Olongofsingle (e:::Enil).
 
+Definition longuofsingle (e: expr) := 
+  if Archi.splitlong then SplitLong.longuofsingle e else 
+  Eop Olonguofsingle (e:::Enil).
+
 Definition singleoflong (e: expr) := 
   if Archi.splitlong then SplitLong.singleoflong e else 
   Eop Osingleoflong (e:::Enil).
 
+Definition singleoflongu (e: expr) := 
+  if Archi.splitlong then SplitLong.singleoflongu e else 
+  Eop Osingleoflongu (e:::Enil).
+
 End SELECT.
diff --git a/verilog/SelectLongproof.v b/verilog/SelectLongproof.v
index f008f39e..0fc578bf 100644
--- a/verilog/SelectLongproof.v
+++ b/verilog/SelectLongproof.v
@@ -3,11 +3,16 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*                 Xavier Leroy, INRIA Paris                           *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
 (** Correctness of instruction selection for 64-bit integer operations *)
@@ -104,102 +109,101 @@ Proof.
 - TrivialExists.
 Qed.
 
-Theorem eval_notl: unary_constructor_sound notl Val.notl.
-Proof.
-  unfold notl; destruct Archi.splitlong. apply SplitLongproof.eval_notl.
-  red; intros. destruct (notl_match a).
-- InvEval. econstructor; split. apply eval_longconst. auto.
-- InvEval. subst. exists v1; split; auto. destruct v1; simpl; auto. rewrite Int64.not_involutive; auto.
-- TrivialExists.
-Qed.
-
-Theorem eval_andlimm: forall n, unary_constructor_sound (andlimm n) (fun v => Val.andl v (Vlong n)).
-Proof.
-  unfold andlimm; intros; red; intros.
-  predSpec Int64.eq Int64.eq_spec n Int64.zero.
-  exists (Vlong Int64.zero); split. apply eval_longconst.
-  subst. destruct x; simpl; auto. rewrite Int64.and_zero; auto.
-  predSpec Int64.eq Int64.eq_spec n Int64.mone.
-  exists x; split. assumption.
-  subst. destruct x; simpl; auto. rewrite Int64.and_mone; auto.
-  destruct (andlimm_match a); InvEval; subst.
-- econstructor; split. apply eval_longconst. simpl. rewrite Int64.and_commut; auto.
-- TrivialExists. simpl. rewrite Val.andl_assoc. rewrite Int64.and_commut; auto.
-- TrivialExists.
-Qed.
-
-Theorem eval_andl: binary_constructor_sound andl Val.andl.
+Theorem eval_negl: unary_constructor_sound negl Val.negl.
 Proof.
-  unfold andl; destruct Archi.splitlong. apply SplitLongproof.eval_andl.
-  red; intros. destruct (andl_match a b).
-- InvEval. rewrite Val.andl_commut. apply eval_andlimm; auto.
-- InvEval. apply eval_andlimm; auto.
+  unfold negl. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_negl; auto.
+  red; intros. destruct (is_longconst a) as [n|] eqn:C.
+- exploit is_longconst_sound; eauto. intros EQ; subst x.
+  econstructor; split. apply eval_longconst. auto.
 - TrivialExists.
 Qed.
 
-Theorem eval_orlimm: forall n, unary_constructor_sound (orlimm n) (fun v => Val.orl v (Vlong n)).
+Theorem eval_addlimm: forall n, unary_constructor_sound (addlimm n) (fun v => Val.addl v (Vlong n)).
 Proof.
-  unfold orlimm; intros; red; intros.
+  unfold addlimm; intros; red; intros.
   predSpec Int64.eq Int64.eq_spec n Int64.zero.
-  exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.or_zero; auto.
-  predSpec Int64.eq Int64.eq_spec n Int64.mone.
-  econstructor; split. apply eval_longconst. subst. destruct x; simpl; auto. rewrite Int64.or_mone; auto.
-  destruct (orlimm_match a); InvEval; subst.
-- econstructor; split. apply eval_longconst. simpl. rewrite Int64.or_commut; auto.
-- TrivialExists. simpl. rewrite Val.orl_assoc. rewrite Int64.or_commut; auto.
+  subst. exists x; split; auto.
+  destruct x; simpl; rewrite ?Int64.add_zero, ?Ptrofs.add_zero; auto.
+  destruct Archi.ptr64; auto.
+  destruct (addlimm_match a); InvEval.
+- econstructor; split. apply eval_longconst. rewrite Int64.add_commut; auto.
+- econstructor; split. EvalOp. simpl; eauto. 
+  unfold Genv.symbol_address. destruct (Genv.find_symbol ge s); simpl; auto. 
+  destruct Archi.ptr64; auto. rewrite Ptrofs.add_commut; auto. 
+- econstructor; split. EvalOp. simpl; eauto. 
+  destruct sp; simpl; auto. destruct Archi.ptr64; auto. 
+  rewrite Ptrofs.add_assoc, (Ptrofs.add_commut m0). auto. 
+- subst x. rewrite Val.addl_assoc. rewrite Int64.add_commut. TrivialExists.
 - TrivialExists.
 Qed.
 
-Theorem eval_orl: binary_constructor_sound orl Val.orl.
+Theorem eval_addl: binary_constructor_sound addl Val.addl.
 Proof.
-  unfold orl; destruct Archi.splitlong. apply SplitLongproof.eval_orl.
-  red; intros.
-  assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop Oorl (a:::b:::Enil)) v /\ Val.lessdef (Val.orl x y) v) by TrivialExists.
-  assert (ROR: forall v n1 n2,
-    Int.add n1 n2 = Int64.iwordsize' ->
-    Val.lessdef (Val.orl (Val.shll v (Vint n1)) (Val.shrlu v (Vint n2)))
-                (Val.rorl v (Vint n2))).
-  { intros. destruct v; simpl; auto.
-    destruct (Int.ltu n1 Int64.iwordsize') eqn:N1; auto.
-    destruct (Int.ltu n2 Int64.iwordsize') eqn:N2; auto.
-    simpl. rewrite <- Int64.or_ror'; auto. }
-  destruct (orl_match a b).
-- InvEval. rewrite Val.orl_commut. apply eval_orlimm; auto.
-- InvEval. apply eval_orlimm; auto.
-- predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int64.iwordsize'; auto.
-  destruct (same_expr_pure t1 t2) eqn:?; auto.
-  InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst.
-  exists (Val.rorl v0 (Vint n2)); split. EvalOp. apply ROR; auto.
-- predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int64.iwordsize'; auto.
-  destruct (same_expr_pure t1 t2) eqn:?; auto.
-  InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst.
-  exists (Val.rorl v1 (Vint n2)); split. EvalOp. rewrite Val.orl_commut. apply ROR; auto.
-- apply DEFAULT.
-Qed.
+  unfold addl. destruct Archi.splitlong eqn:SL. 
+  apply SplitLongproof.eval_addl. apply Archi.splitlong_ptr32; auto.
+(*
+  assert (SF: Archi.ptr64 = true).
+  { Local Transparent Archi.splitlong. unfold Archi.splitlong in SL. 
+    destruct Archi.ptr64; simpl in *; congruence. }  
+*)
+(*
+  assert (B: forall id ofs n,
+             Genv.symbol_address ge id (Ptrofs.add ofs (Ptrofs.repr n)) =
+             Val.addl (Genv.symbol_address ge id ofs) (Vlong (Int64.repr n))).
+  { intros. replace (Ptrofs.repr n) with (Ptrofs.of_int64 (Int64.repr n)) by auto with ptrofs.
+    apply Genv.shift_symbol_address_64; auto. }
 
-Theorem eval_xorlimm: forall n, unary_constructor_sound (xorlimm n) (fun v => Val.xorl v (Vlong n)).
-Proof.
-  unfold xorlimm; intros; red; intros.
-  predSpec Int64.eq Int64.eq_spec n Int64.zero.
-  exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.xor_zero; auto.
-  predSpec Int64.eq Int64.eq_spec n Int64.mone.
-  replace (Val.xorl x (Vlong n)) with (Val.notl x). apply eval_notl; auto.
-  subst n. destruct x; simpl; auto.
-  destruct (xorlimm_match a); InvEval; subst.
-- econstructor; split. apply eval_longconst. simpl. rewrite Int64.xor_commut; auto.
-- TrivialExists. simpl. rewrite Val.xorl_assoc. rewrite Int64.xor_commut; auto.
-- TrivialExists. simpl. destruct v1; simpl; auto. unfold Int64.not.
-  rewrite Int64.xor_assoc. apply f_equal. apply f_equal. apply f_equal.
-  apply Int64.xor_commut.
-- TrivialExists.
+*)
+  red; intros until y.
+  case (addl_match a b); intros; InvEval.
+  - rewrite Val.addl_commut. apply eval_addlimm; auto.
+  - apply eval_addlimm; auto.
+  - subst.
+    replace (Val.addl (Val.addl v1 (Vlong n1)) (Val.addl v0 (Vlong n2)))
+       with (Val.addl (Val.addl v1 v0) (Val.addl (Vlong n1) (Vlong n2))).
+    apply eval_addlimm. EvalOp.
+    repeat rewrite Val.addl_assoc. decEq. apply Val.addl_permut.
+  - subst. econstructor; split.
+    EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto.
+    rewrite Val.addl_commut. destruct sp; simpl; auto.
+    destruct v1; simpl; auto.
+    destruct Archi.ptr64 eqn:SF; auto. 
+    apply Val.lessdef_same. f_equal. rewrite ! Ptrofs.add_assoc. f_equal. 
+    rewrite (Ptrofs.add_commut (Ptrofs.of_int64 n1)), Ptrofs.add_assoc. f_equal. auto with ptrofs.
+    destruct Archi.ptr64 eqn:SF; auto.
+  - subst. econstructor; split.
+    EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto.
+    destruct sp; simpl; auto.
+    destruct v1; simpl; auto.
+    destruct Archi.ptr64 eqn:SF; auto. 
+    apply Val.lessdef_same. f_equal. rewrite ! Ptrofs.add_assoc. f_equal. f_equal.
+    rewrite Ptrofs.add_commut. auto with ptrofs.
+    destruct Archi.ptr64 eqn:SF; auto.
+  - subst.
+    replace (Val.addl (Val.addl v1 (Vlong n1)) y)
+       with (Val.addl (Val.addl v1 y) (Vlong n1)).
+    apply eval_addlimm. EvalOp.
+    repeat rewrite Val.addl_assoc. decEq. apply Val.addl_commut.
+  - subst.
+    replace (Val.addl x (Val.addl v1 (Vlong n2)))
+       with (Val.addl (Val.addl x v1) (Vlong n2)).
+    apply eval_addlimm. EvalOp.
+    repeat rewrite Val.addl_assoc. reflexivity.
+  - TrivialExists.
 Qed.
 
-Theorem eval_xorl: binary_constructor_sound xorl Val.xorl.
+Theorem eval_subl: binary_constructor_sound subl Val.subl.
 Proof.
-  unfold xorl; destruct Archi.splitlong. apply SplitLongproof.eval_xorl.
-  red; intros. destruct (xorl_match a b).
-- InvEval. rewrite Val.xorl_commut. apply eval_xorlimm; auto.
-- InvEval. apply eval_xorlimm; auto.
+  unfold subl. destruct Archi.splitlong eqn:SL.
+  apply SplitLongproof.eval_subl. apply Archi.splitlong_ptr32; auto.
+  red; intros; destruct (subl_match a b); InvEval.
+- rewrite Val.subl_addl_opp. apply eval_addlimm; auto.
+- subst. rewrite Val.subl_addl_l. rewrite Val.subl_addl_r.
+  rewrite Val.addl_assoc. simpl. rewrite Int64.add_commut. rewrite <- Int64.sub_add_opp.
+  apply eval_addlimm; EvalOp.
+- subst. rewrite Val.subl_addl_l. apply eval_addlimm; EvalOp.
+- subst. rewrite Val.subl_addl_r.
+  apply eval_addlimm; EvalOp.
 - TrivialExists.
 Qed.
 
@@ -215,20 +219,13 @@ Proof.
   assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshllimm n) (a:::Enil)) v
                          /\  Val.lessdef (Val.shll x (Vint n)) v) by TrivialExists.
   destruct (shllimm_match a); InvEval.
-- TrivialExists. simpl; rewrite LT; auto.
+- econstructor; split. apply eval_longconst. simpl; rewrite LT; auto.
 - destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto.
   subst. econstructor; split. EvalOp. simpl; eauto.
   destruct v1; simpl; auto. rewrite LT'.
   destruct (Int.ltu n1 Int64.iwordsize') eqn:LT1; auto.
   simpl; rewrite LT. rewrite Int.add_commut, Int64.shl'_shl'; auto. rewrite Int.add_commut; auto.
-- destruct (shift_is_scale n); auto.
-  TrivialExists. simpl. destruct v1; simpl; auto.
-  rewrite LT. rewrite ! Int64.repr_unsigned. rewrite Int64.shl'_one_two_p.
-  rewrite ! Int64.shl'_mul_two_p.  rewrite Int64.mul_add_distr_l. auto.
-- destruct (shift_is_scale n); auto.
-  TrivialExists. simpl. destruct x; simpl; auto.
-  rewrite LT. rewrite ! Int64.repr_unsigned. rewrite Int64.shl'_one_two_p.
-  rewrite ! Int64.shl'_mul_two_p. rewrite Int64.add_zero. auto.
+- apply DEFAULT.  
 - TrivialExists. constructor; eauto. constructor. EvalOp. simpl; eauto. constructor. auto.
 Qed.
 
@@ -244,7 +241,7 @@ Proof.
   assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshrluimm n) (a:::Enil)) v
                          /\  Val.lessdef (Val.shrlu x (Vint n)) v) by TrivialExists.
   destruct (shrluimm_match a); InvEval.
-- TrivialExists. simpl; rewrite LT; auto.
+- econstructor; split. apply eval_longconst. simpl; rewrite LT; auto.
 - destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto.
   subst. econstructor; split. EvalOp. simpl; eauto.
   destruct v1; simpl; auto. rewrite LT'.
@@ -266,7 +263,7 @@ Proof.
   assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshrlimm n) (a:::Enil)) v
                          /\  Val.lessdef (Val.shrl x (Vint n)) v) by TrivialExists.
   destruct (shrlimm_match a); InvEval.
-- TrivialExists. simpl; rewrite LT; auto.
+- econstructor; split. apply eval_longconst. simpl; rewrite LT; auto.
 - destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto.
   subst. econstructor; split. EvalOp. simpl; eauto.
   destruct v1; simpl; auto. rewrite LT'.
@@ -300,82 +297,17 @@ Proof.
 - TrivialExists.
 Qed.
 
-Theorem eval_negl: unary_constructor_sound negl Val.negl.
-Proof.
-  unfold negl. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_negl; auto.
-  red; intros. destruct (is_longconst a) as [n|] eqn:C.
-- exploit is_longconst_sound; eauto. intros EQ; subst x.
-  econstructor; split. apply eval_longconst. auto.
-- TrivialExists.
-Qed.
-
-Theorem eval_addlimm: forall n, unary_constructor_sound (addlimm n) (fun v => Val.addl v (Vlong n)).
-Proof.
-  unfold addlimm; intros; red; intros.
-  predSpec Int64.eq Int64.eq_spec n Int64.zero.
-  subst. exists x; split; auto.
-  destruct x; simpl; rewrite ?Int64.add_zero, ?Ptrofs.add_zero; auto.
-  destruct (addlimm_match a); InvEval.
-- econstructor; split. apply eval_longconst. rewrite Int64.add_commut; auto.
-- inv H. simpl in H6. TrivialExists. simpl.
-  erewrite eval_offset_addressing_total_64 by eauto. rewrite Int64.repr_signed; auto.
-- TrivialExists. simpl. rewrite Int64.repr_signed; auto.
-Qed.
-
-Theorem eval_addl: binary_constructor_sound addl Val.addl.
-Proof.
-  assert (A: forall x y, Int64.repr (x + y) = Int64.add (Int64.repr x) (Int64.repr y)).
-  { intros; apply Int64.eqm_samerepr; auto with ints. }
-  assert (B: forall id ofs n, Archi.ptr64 = true ->
-             Genv.symbol_address ge id (Ptrofs.add ofs (Ptrofs.repr n)) =
-             Val.addl (Genv.symbol_address ge id ofs) (Vlong (Int64.repr n))).
-  { intros. replace (Ptrofs.repr n) with (Ptrofs.of_int64 (Int64.repr n)) by auto with ptrofs.
-    apply Genv.shift_symbol_address_64; auto. }
-  unfold addl. destruct Archi.splitlong eqn:SL.
-  apply SplitLongproof.eval_addl. apply Archi.splitlong_ptr32; auto.
-  red; intros; destruct (addl_match a b); InvEval.
-- rewrite Val.addl_commut. apply eval_addlimm; auto.
-- apply eval_addlimm; auto.
-- subst. TrivialExists. simpl. rewrite A, Val.addl_permut_4. auto.
-- subst. TrivialExists. simpl. rewrite A, Val.addl_assoc. decEq; decEq. rewrite Val.addl_permut. auto.
-- subst. TrivialExists. simpl. rewrite A, Val.addl_permut_4. rewrite <- Val.addl_permut. rewrite <- Val.addl_assoc. auto.
-- subst. TrivialExists. simpl. rewrite Val.addl_commut; auto.
-- subst. TrivialExists.
-- subst. TrivialExists. simpl. rewrite ! Val.addl_assoc. rewrite (Val.addl_commut y). auto.
-- subst. TrivialExists. simpl. rewrite ! Val.addl_assoc. auto.
-- TrivialExists. simpl.
-  unfold Val.addl. destruct Archi.ptr64, x, y; auto.
-  + rewrite Int64.add_zero; auto.
-  + rewrite Ptrofs.add_assoc, Ptrofs.add_zero. auto.
-  + rewrite Ptrofs.add_assoc, Ptrofs.add_zero. auto.
-  + rewrite Int64.add_zero; auto.
-Qed.
-
-Theorem eval_subl: binary_constructor_sound subl Val.subl.
-Proof.
-  unfold subl. destruct Archi.splitlong eqn:SL.
-  apply SplitLongproof.eval_subl. apply Archi.splitlong_ptr32; auto.
-  red; intros; destruct (subl_match a b); InvEval.
-- rewrite Val.subl_addl_opp. apply eval_addlimm; auto.
-- subst. rewrite Val.subl_addl_l. rewrite Val.subl_addl_r.
-  rewrite Val.addl_assoc. simpl. rewrite Int64.add_commut. rewrite <- Int64.sub_add_opp.
-  replace (Int64.repr (n1 - n2)) with (Int64.sub (Int64.repr n1) (Int64.repr n2)).
-  apply eval_addlimm; EvalOp.
-  apply Int64.eqm_samerepr; auto with ints.
-- subst. rewrite Val.subl_addl_l. apply eval_addlimm; EvalOp.
-- subst. rewrite Val.subl_addl_r.
-  replace (Int64.repr (-n2)) with (Int64.neg (Int64.repr n2)).
-  apply eval_addlimm; EvalOp.
-  apply Int64.eqm_samerepr; auto with ints.
-- TrivialExists.
-Qed.
-
 Theorem eval_mullimm_base: forall n, unary_constructor_sound (mullimm_base n) (fun v => Val.mull v (Vlong n)).
 Proof.
   intros; unfold mullimm_base. red; intros.
+  assert (DEFAULT: exists v,
+                eval_expr ge sp e m le (Eop Omull (a ::: longconst n ::: Enil)) v
+             /\ Val.lessdef (Val.mull x (Vlong n)) v).
+  { econstructor; split. EvalOp. constructor. eauto. constructor. apply eval_longconst. constructor. simpl; eauto.
+    auto. }
   generalize (Int64.one_bits'_decomp n); intros D.
   destruct (Int64.one_bits' n) as [ | i [ | j [ | ? ? ]]] eqn:B.
-- TrivialExists.
+- apply DEFAULT.
 - replace (Val.mull x (Vlong n)) with (Val.shll x (Vint i)).
   apply eval_shllimm; auto.
   simpl in D. rewrite D, Int64.add_zero. destruct x; simpl; auto.
@@ -393,14 +325,14 @@ Proof.
   rewrite (Int64.one_bits'_range n) in B2 by (rewrite B; auto with coqlib).
   inv B1; inv B2. simpl in B3; inv B3.
   rewrite Int64.mul_add_distr_r. rewrite <- ! Int64.shl'_mul. auto.
-- TrivialExists.
+- apply DEFAULT.
 Qed.
 
 Theorem eval_mullimm: forall n, unary_constructor_sound (mullimm n) (fun v => Val.mull v (Vlong n)).
 Proof.
   unfold mullimm. intros; red; intros.
   destruct Archi.splitlong eqn:SL.
-  eapply SplitLongproof.eval_mullimm; eauto.
+  eapply SplitLongproof.eval_mullimm; eauto.  
   predSpec Int64.eq Int64.eq_spec n Int64.zero.
   exists (Vlong Int64.zero); split. apply eval_longconst.
   destruct x; simpl; auto. subst n; rewrite Int64.mul_zero; auto.
@@ -410,11 +342,12 @@ Proof.
   destruct (mullimm_match a); InvEval.
 - econstructor; split. apply eval_longconst. rewrite Int64.mul_commut; auto.
 - exploit (eval_mullimm_base n); eauto. intros (v2 & A2 & B2).
-  exploit (eval_addlimm (Int64.mul n (Int64.repr n2))). eexact A2. intros (v3 & A3 & B3).
+  exploit (eval_addlimm (Int64.mul n n2)). eexact A2. intros (v3 & A3 & B3).
   exists v3; split; auto.
-  destruct v1; simpl; auto.
+  subst x. destruct v1; simpl; auto.
   simpl in B2; inv B2. simpl in B3; inv B3. rewrite Int64.mul_add_distr_l.
   rewrite (Int64.mul_commut n). auto.
+  destruct Archi.ptr64; simpl; auto.
 - apply eval_mullimm_base; auto.
 Qed.
 
@@ -427,39 +360,105 @@ Proof.
 - TrivialExists.
 Qed.
 
-Theorem eval_mullhu:
+Theorem eval_mullhu: 
   forall n, unary_constructor_sound (fun a => mullhu a n) (fun v => Val.mullhu v (Vlong n)).
 Proof.
   unfold mullhu; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhu; auto.
   red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto.
 Qed.
 
-Theorem eval_mullhs:
+Theorem eval_mullhs: 
   forall n, unary_constructor_sound (fun a => mullhs a n) (fun v => Val.mullhs v (Vlong n)).
 Proof.
   unfold mullhs; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhs; auto.
   red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto.
 Qed.
 
-Theorem eval_shrxlimm:
-  forall le a n x z,
-  eval_expr ge sp e m le a x ->
-  Val.shrxl x (Vint n) = Some z ->
-  exists v, eval_expr ge sp e m le (shrxlimm a n) v /\ Val.lessdef z v.
+Theorem eval_andlimm: forall n, unary_constructor_sound (andlimm n) (fun v => Val.andl v (Vlong n)).
 Proof.
-  unfold shrxlimm; intros. destruct Archi.splitlong eqn:SL.
-+ eapply SplitLongproof.eval_shrxlimm; eauto using Archi.splitlong_ptr32.
-+ predSpec Int.eq Int.eq_spec n Int.zero.
-- subst n. destruct x; simpl in H0; inv H0. econstructor; split; eauto.
-  change (Int.ltu Int.zero (Int.repr 63)) with true. simpl. rewrite Int64.shrx'_zero; auto.
+  unfold andlimm; intros; red; intros.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  exists (Vlong Int64.zero); split. apply eval_longconst.
+  subst. destruct x; simpl; auto. rewrite Int64.and_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone.
+  exists x; split. assumption.
+  subst. destruct x; simpl; auto. rewrite Int64.and_mone; auto.
+  destruct (andlimm_match a); InvEval; subst.
+- econstructor; split. apply eval_longconst. simpl. rewrite Int64.and_commut; auto.
+- TrivialExists. simpl. rewrite Val.andl_assoc. rewrite Int64.and_commut; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_andl: binary_constructor_sound andl Val.andl.
+Proof.
+  unfold andl; destruct Archi.splitlong. apply SplitLongproof.eval_andl.
+  red; intros. destruct (andl_match a b).
+- InvEval. rewrite Val.andl_commut. apply eval_andlimm; auto.
+- InvEval. apply eval_andlimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_orlimm: forall n, unary_constructor_sound (orlimm n) (fun v => Val.orl v (Vlong n)).
+Proof.
+  unfold orlimm; intros; red; intros.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.or_zero; auto.
+  predSpec Int64.eq Int64.eq_spec n Int64.mone.
+  econstructor; split. apply eval_longconst. subst. destruct x; simpl; auto. rewrite Int64.or_mone; auto.
+  destruct (orlimm_match a); InvEval; subst.
+- econstructor; split. apply eval_longconst. simpl. rewrite Int64.or_commut; auto.
+- TrivialExists. simpl. rewrite Val.orl_assoc. rewrite Int64.or_commut; auto.
 - TrivialExists.
 Qed.
 
+Theorem eval_orl: binary_constructor_sound orl Val.orl.
+Proof.
+  unfold orl; destruct Archi.splitlong. apply SplitLongproof.eval_orl.
+  red; intros.
+  destruct (orl_match a b).
+- InvEval. rewrite Val.orl_commut. apply eval_orlimm; auto.
+- InvEval. apply eval_orlimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_xorlimm: forall n, unary_constructor_sound (xorlimm n) (fun v => Val.xorl v (Vlong n)).
+Proof.
+  unfold xorlimm; intros; red; intros.
+  predSpec Int64.eq Int64.eq_spec n Int64.zero.
+  exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.xor_zero; auto.
+  destruct (xorlimm_match a); InvEval; subst.
+- econstructor; split. apply eval_longconst. simpl. rewrite Int64.xor_commut; auto.
+- rewrite Val.xorl_assoc. simpl. rewrite (Int64.xor_commut n2).
+  predSpec Int64.eq Int64.eq_spec (Int64.xor n n2) Int64.zero.
++ rewrite H. exists v1; split; auto. destruct v1; simpl; auto. rewrite Int64.xor_zero; auto. 
++ TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_xorl: binary_constructor_sound xorl Val.xorl.
+Proof.
+  unfold xorl; destruct Archi.splitlong. apply SplitLongproof.eval_xorl.
+  red; intros. destruct (xorl_match a b).
+- InvEval. rewrite Val.xorl_commut. apply eval_xorlimm; auto.
+- InvEval. apply eval_xorlimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_notl: unary_constructor_sound notl Val.notl.
+Proof.
+  unfold notl; destruct Archi.splitlong. apply SplitLongproof.eval_notl.
+  red; intros. rewrite Val.notl_xorl. apply eval_xorlimm; auto.
+Qed.
+
 Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
 Proof.
   unfold divls_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_divls_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
@@ -467,6 +466,10 @@ Proof.
   unfold modls_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_modls_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
@@ -474,6 +477,10 @@ Proof.
   unfold divlu_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_divlu_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
@@ -481,6 +488,27 @@ Proof.
   unfold modlu_base; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_modlu_base; eauto.
   TrivialExists.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
+Qed.
+
+Theorem eval_shrxlimm:
+  forall le a n x z,
+  eval_expr ge sp e m le a x ->
+  Val.shrxl x (Vint n) = Some z ->
+  exists v, eval_expr ge sp e m le (shrxlimm a n) v /\ Val.lessdef z v.
+Proof.
+  unfold shrxlimm; intros. destruct Archi.splitlong eqn:SL.
++ eapply SplitLongproof.eval_shrxlimm; eauto using Archi.splitlong_ptr32.
++ predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. destruct x; simpl in H0; inv H0. econstructor; split; eauto.
+  change (Int.ltu Int.zero (Int.repr 63)) with true. simpl. rewrite Int64.shrx'_zero; auto.
+- TrivialExists.
+  cbn.
+  rewrite H0.
+  reflexivity.
 Qed.
 
 Theorem eval_cmplu:
@@ -530,6 +558,15 @@ Proof.
   unfold longoffloat; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longoffloat; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
+Qed.
+
+Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
+Proof.
+  unfold longuoffloat; red; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_longuoffloat; eauto.
+  TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
@@ -537,6 +574,15 @@ Proof.
   unfold floatoflong; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_floatoflong; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
+Qed.
+
+Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
+Proof.
+  unfold floatoflongu; red; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_floatoflongu; eauto.
+  TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
@@ -544,6 +590,15 @@ Proof.
   unfold longofsingle; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_longofsingle; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
+Qed.
+
+Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
+Proof.
+  unfold longuofsingle; red; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_longuofsingle; eauto.
+  TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
@@ -551,6 +606,15 @@ Proof.
   unfold singleoflong; red; intros. destruct Archi.splitlong eqn:SL.
   eapply SplitLongproof.eval_singleoflong; eauto.
   TrivialExists.
+  cbn; rewrite H0; reflexivity.
+Qed.
+
+Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
+Proof.
+  unfold singleoflongu; red; intros. destruct Archi.splitlong eqn:SL.
+  eapply SplitLongproof.eval_singleoflongu; eauto.
+  TrivialExists.
+  cbn; rewrite H0; reflexivity.
 Qed.
 
 End CMCONSTR.
diff --git a/verilog/SelectOp.vp b/verilog/SelectOp.vp
index 2a09207b..9932aaf8 100644
--- a/verilog/SelectOp.vp
+++ b/verilog/SelectOp.vp
@@ -2,12 +2,17 @@
 (*                                                                     *)
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
-(*                 Xavier Leroy, INRIA Paris                           *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
 (** Instruction selection for operators *)
@@ -36,146 +41,98 @@
   module [Selection] implements the actual instruction selection pass.
 *)
 
+Require Archi.
 Require Import Coqlib.
 Require Import Compopts.
 Require Import AST Integers Floats Builtins.
 Require Import Op CminorSel.
-Require Import OpHelpers.
-Require Archi.
 
 Local Open Scope cminorsel_scope.
 
 (** ** Constants **)
 
-(** External oracle to determine whether a symbol should be addressed
-  through [Oindirectsymbol] or can be addressed via [Oleal Aglobal].
-  This is to accommodate MacOS X's limitations on references to data
-  symbols imported from shared libraries.  It can also help with PIC
-  code under ELF. *)
-
-Parameter symbol_is_external: ident -> bool.
-
-Definition Olea_ptr (a: addressing) := if Archi.ptr64 then Oleal a else Olea a.
-
 Definition addrsymbol (id: ident) (ofs: ptrofs) :=
-  if symbol_is_external id then
-    if Ptrofs.eq ofs Ptrofs.zero
-    then Eop (Oindirectsymbol id) Enil
-    else Eop (Olea_ptr (Aindexed (Ptrofs.unsigned ofs))) (Eop (Oindirectsymbol id) Enil ::: Enil)
-  else
-    Eop (Olea_ptr (Aglobal id ofs)) Enil.
+  Eop (Oaddrsymbol id ofs) Enil.
 
 Definition addrstack (ofs: ptrofs) :=
-  Eop (Olea_ptr (Ainstack ofs)) Enil.
-
-(** ** Integer logical negation *)
-
-Nondetfunction notint (e: expr) :=
-  match e with
-  | Eop (Ointconst n) Enil => Eop (Ointconst (Int.not n)) Enil
-  | Eop (Oxorimm n) (e1 ::: Enil) => Eop (Oxorimm (Int.not n)) (e1 ::: Enil)
-  | _ => Eop Onot (e ::: Enil)
-  end.
+  Eop (Oaddrstack ofs) Enil.
 
 (** ** Integer addition and pointer addition *)
 
 Nondetfunction addimm (n: int) (e: expr) :=
   if Int.eq n Int.zero then e else
   match e with
-  | Eop (Ointconst m) Enil => Eop (Ointconst(Int.add n m)) Enil
-  | Eop (Olea addr) args   => Eop (Olea (offset_addressing_total addr (Int.signed n))) args
-  | _                      => Eop (Olea (Aindexed (Int.signed n))) (e ::: Enil)
+  | Eop (Ointconst m) Enil => Eop (Ointconst (Int.add n m)) Enil
+  | Eop (Oaddrsymbol s m) Enil   => Eop (Oaddrsymbol s (Ptrofs.add (Ptrofs.of_int n) m)) Enil
+  | Eop (Oaddrstack m) Enil      => Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int n) m)) Enil
+  | Eop (Oaddimm m) (t ::: Enil) => Eop (Oaddimm(Int.add n m)) (t ::: Enil)
+  | _ => Eop (Oaddimm n) (e ::: Enil)
   end.
 
 Nondetfunction add (e1: expr) (e2: expr) :=
   match e1, e2 with
   | Eop (Ointconst n1) Enil, t2 => addimm n1 t2
   | t1, Eop (Ointconst n2) Enil => addimm n2 t1
-  | Eop (Olea (Aindexed n1)) (t1:::Enil), Eop (Olea (Aindexed n2)) (t2:::Enil) =>
-      Eop (Olea (Aindexed2 (n1 + n2))) (t1:::t2:::Enil)
-  | Eop (Olea (Aindexed n1)) (t1:::Enil), Eop (Olea (Ascaled sc n2)) (t2:::Enil) =>
-      Eop (Olea (Aindexed2scaled sc (n1 + n2))) (t1:::t2:::Enil)
-  | Eop (Olea (Ascaled sc n1)) (t1:::Enil), Eop (Olea (Aindexed n2)) (t2:::Enil) =>
-      Eop (Olea (Aindexed2scaled sc (n1 + n2))) (t2:::t1:::Enil)
-  | Eop (Olea (Aindexed n1)) (t1:::Enil), Eop (Olea (Aglobal id ofs)) Enil =>
-      Eop (Olea (Abased id (Ptrofs.add ofs (Ptrofs.repr n1)))) (t1:::Enil)
-  | Eop (Olea (Aglobal id ofs)) Enil, Eop (Olea (Aindexed n2)) (t2:::Enil) =>
-      Eop (Olea (Abased id (Ptrofs.add ofs (Ptrofs.repr n2)))) (t2:::Enil)
-  | Eop (Olea (Ascaled sc n1)) (t1:::Enil), Eop (Olea (Aglobal id ofs)) Enil =>
-      Eop (Olea (Abasedscaled sc id (Ptrofs.add ofs (Ptrofs.repr n1)))) (t1:::Enil)
-  | Eop (Olea (Aglobal id ofs)) Enil, Eop (Olea (Ascaled sc n2)) (t2:::Enil) =>
-      Eop (Olea (Abasedscaled sc id (Ptrofs.add ofs (Ptrofs.repr n2)))) (t2:::Enil)
-  | Eop (Olea (Ascaled sc n)) (t1:::Enil), t2 =>
-      Eop (Olea (Aindexed2scaled sc n)) (t2:::t1:::Enil)
-  | t1, Eop (Olea (Ascaled sc n)) (t2:::Enil) =>
-      Eop (Olea (Aindexed2scaled sc n)) (t1:::t2:::Enil)
-  | Eop (Olea (Aindexed n)) (t1:::Enil), t2 =>
-      Eop (Olea (Aindexed2 n)) (t1:::t2:::Enil)
-  | t1, Eop (Olea (Aindexed n)) (t2:::Enil) =>
-      Eop (Olea (Aindexed2 n)) (t1:::t2:::Enil)
-  | _, _ =>
-      Eop (Olea (Aindexed2 0)) (e1:::e2:::Enil)
-  end.
-
-(** ** Opposite *)
-
-Nondetfunction negint (e: expr) :=
-  match e with
-  | Eop (Ointconst n) Enil => Eop (Ointconst (Int.neg n)) Enil
-  | _ =>  Eop Oneg (e ::: Enil)
+  | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) =>
+      addimm (Int.add n1 n2) (Eop Oadd (t1:::t2:::Enil))
+  | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddrstack n2) Enil =>
+      Eop Oadd (Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int n1) n2)) Enil ::: t1 ::: Enil)
+  | Eop (Oaddrstack n1) Enil, Eop (Oaddimm n2) (t2:::Enil) =>
+      Eop Oadd (Eop (Oaddrstack (Ptrofs.add n1 (Ptrofs.of_int n2))) Enil ::: t2 ::: Enil)
+  | Eop (Oaddimm n1) (t1:::Enil), t2 =>
+      addimm n1 (Eop Oadd (t1:::t2:::Enil))
+  | t1, Eop (Oaddimm n2) (t2:::Enil) =>
+      addimm n2 (Eop Oadd (t1:::t2:::Enil))
+  | _, _ => Eop Oadd (e1:::e2:::Enil)
   end.
 
 (** ** Integer and pointer subtraction *)
 
 Nondetfunction sub (e1: expr) (e2: expr) :=
   match e1, e2 with
-  | t1, Eop (Ointconst n2) Enil => addimm (Int.neg n2) t1
-  | Eop (Olea (Aindexed n1)) (t1:::Enil), Eop (Olea (Aindexed n2)) (t2:::Enil) =>
-      addimm (Int.repr (n1 - n2)) (Eop Osub (t1:::t2:::Enil))
-  | Eop (Olea (Aindexed n1)) (t1:::Enil), t2 =>
-      addimm (Int.repr n1) (Eop Osub (t1:::t2:::Enil))
-  | t1, Eop (Olea (Aindexed n2)) (t2:::Enil) =>
-      addimm (Int.repr (-n2)) (Eop Osub (t1:::t2:::Enil))
-  | _, _ =>
-      Eop Osub (e1:::e2:::Enil)
+  | t1, Eop (Ointconst n2) Enil =>
+      addimm (Int.neg n2) t1
+  | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) =>
+      addimm (Int.sub n1 n2) (Eop Osub (t1:::t2:::Enil))
+  | Eop (Oaddimm n1) (t1:::Enil), t2 =>
+      addimm n1 (Eop Osub (t1:::t2:::Enil))
+  | t1, Eop (Oaddimm n2) (t2:::Enil) =>
+      addimm (Int.neg n2) (Eop Osub (t1:::t2:::Enil))
+  | _, _ => Eop Osub (e1:::e2:::Enil)
   end.
 
-(** ** Immediate shifts *)
+Nondetfunction negint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Ointconst (Int.neg n)) Enil
+  | _ => Eop Oneg (e ::: Enil)
+  end.
 
-Definition shift_is_scale (n: int) : bool :=
-  Int.eq n (Int.repr 1) || Int.eq n (Int.repr 2) || Int.eq n (Int.repr 3).
+(** ** Immediate shifts *)
 
 Nondetfunction shlimm (e1: expr) (n: int) :=
-  if Int.eq n Int.zero then e1 else
-  if negb (Int.ltu n Int.iwordsize) then
-    Eop Oshl (e1:::Eop (Ointconst n) Enil:::Enil)
-  else
-    match e1 with
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int.iwordsize) then
+    Eop Oshl (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
     | Eop (Ointconst n1) Enil =>
-        Eop (Ointconst(Int.shl n1 n)) Enil
+        Eop (Ointconst (Int.shl n1 n)) Enil
     | Eop (Oshlimm n1) (t1:::Enil) =>
         if Int.ltu (Int.add n n1) Int.iwordsize
         then Eop (Oshlimm (Int.add n n1)) (t1:::Enil)
         else Eop (Oshlimm n) (e1:::Enil)
-    | Eop (Olea (Aindexed n1)) (t1:::Enil) =>
-        if shift_is_scale n
-        then Eop (Olea (Ascaled (Int.unsigned (Int.shl Int.one n))
-                                (Int.unsigned (Int.shl (Int.repr n1) n)))) (t1:::Enil)
-        else Eop (Oshlimm n) (e1:::Enil)
     | _ =>
-        if shift_is_scale n
-        then Eop (Olea (Ascaled (Int.unsigned (Int.shl Int.one n)) 0)) (e1:::Enil)
-        else Eop (Oshlimm n) (e1:::Enil)
-    end.
+        Eop (Oshlimm n) (e1:::Enil)
+  end.
 
 Nondetfunction shruimm (e1: expr) (n: int) :=
-  if Int.eq n Int.zero then e1 else
-  if negb (Int.ltu n Int.iwordsize) then
-    Eop Oshru (e1:::Eop (Ointconst n) Enil:::Enil)
-  else
-    match e1 with
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int.iwordsize) then
+    Eop Oshru (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
     | Eop (Ointconst n1) Enil =>
-        Eop (Ointconst(Int.shru n1 n)) Enil
+        Eop (Ointconst (Int.shru n1 n)) Enil
     | Eop (Oshruimm n1) (t1:::Enil) =>
         if Int.ltu (Int.add n n1) Int.iwordsize
         then Eop (Oshruimm (Int.add n n1)) (t1:::Enil)
@@ -185,20 +142,20 @@ Nondetfunction shruimm (e1: expr) (n: int) :=
     end.
 
 Nondetfunction shrimm (e1: expr) (n: int) :=
-  if Int.eq n Int.zero then e1 else
-  if negb (Int.ltu n Int.iwordsize) then
-    Eop Oshr (e1:::Eop (Ointconst n) Enil:::Enil)
-  else
-  match e1 with
-  | Eop (Ointconst n1) Enil =>
-      Eop (Ointconst(Int.shr n1 n)) Enil
-  | Eop (Oshrimm n1) (t1:::Enil) =>
-      if Int.ltu (Int.add n n1) Int.iwordsize
-      then Eop (Oshrimm (Int.add n n1)) (t1:::Enil)
-      else Eop (Oshrimm n) (e1:::Enil)
-  | _ =>
-      Eop (Oshrimm n) (e1:::Enil)
-  end.
+  if Int.eq n Int.zero then
+    e1
+  else if negb (Int.ltu n Int.iwordsize) then
+    Eop Oshr (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+  else match e1 with
+    | Eop (Ointconst n1) Enil =>
+        Eop (Ointconst (Int.shr n1 n)) Enil
+    | Eop (Oshrimm n1) (t1:::Enil) =>
+        if Int.ltu (Int.add n n1) Int.iwordsize
+        then Eop (Oshrimm (Int.add n n1)) (t1:::Enil)
+        else Eop (Oshrimm n) (e1:::Enil)
+    | _ =>
+        Eop (Oshrimm n) (e1:::Enil)
+    end.
 
 (** ** Integer multiply *)
 
@@ -209,15 +166,15 @@ Definition mulimm_base (n1: int) (e2: expr) :=
   | i :: j :: nil =>
       Elet e2 (add (shlimm (Eletvar 0) i) (shlimm (Eletvar 0) j))
   | _ =>
-      Eop (Omulimm n1) (e2:::Enil)
+      Eop Omul (Eop (Ointconst n1) Enil ::: e2 ::: Enil)
   end.
 
 Nondetfunction mulimm (n1: int) (e2: expr) :=
   if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil
   else if Int.eq n1 Int.one then e2
   else match e2 with
-  | Eop (Ointconst n2) Enil => Eop (Ointconst(Int.mul n1 n2)) Enil
-  | Eop (Olea (Aindexed n2)) (t2:::Enil) => addimm (Int.mul n1 (Int.repr n2)) (mulimm_base n1 t2)
+  | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.mul n1 n2)) Enil
+  | Eop (Oaddimm n2) (t2:::Enil) => addimm (Int.mul n1 n2) (mulimm_base n1 t2)
   | _ => mulimm_base n1 e2
   end.
 
@@ -228,8 +185,25 @@ Nondetfunction mul (e1: expr) (e2: expr) :=
   | _, _ => Eop Omul (e1:::e2:::Enil)
   end.
 
-Definition mulhs (e1: expr) (e2: expr) := Eop Omulhs (e1 ::: e2 ::: Enil).
-Definition mulhu (e1: expr) (e2: expr) := Eop Omulhu (e1 ::: e2 ::: Enil).
+Definition mulhs (e1: expr) (e2: expr) :=
+  if Archi.ptr64 then
+    Eop Olowlong
+      (Eop (Oshrlimm (Int.repr 32))
+         (Eop Omull (Eop Ocast32signed (e1 ::: Enil) :::
+                     Eop Ocast32signed (e2 ::: Enil) ::: Enil) ::: Enil)
+          ::: Enil)
+  else
+    Eop Omulhs (e1 ::: e2 ::: Enil).
+
+Definition mulhu (e1: expr) (e2: expr) :=
+  if Archi.ptr64 then
+    Eop Olowlong
+      (Eop (Oshrluimm (Int.repr 32))
+         (Eop Omull (Eop Ocast32unsigned (e1 ::: Enil) :::
+                     Eop Ocast32unsigned (e2 ::: Enil) ::: Enil) ::: Enil)
+          ::: Enil)
+  else
+    Eop Omulhu (e1 ::: e2 ::: Enil).
 
 (** ** Bitwise and, or, xor *)
 
@@ -237,16 +211,9 @@ Nondetfunction andimm (n1: int) (e2: expr) :=
   if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil
   else if Int.eq n1 Int.mone then e2
   else match e2 with
-  | Eop (Ointconst n2) Enil =>
-      Eop (Ointconst (Int.and n1 n2)) Enil
-  | Eop (Oandimm n2) (t2:::Enil) =>
-     Eop (Oandimm (Int.and n1 n2)) (t2:::Enil)
-  | Eop Ocast8unsigned (t2:::Enil) =>
-     Eop (Oandimm (Int.and n1 (Int.repr 255))) (t2:::Enil)
-  | Eop Ocast16unsigned (t2:::Enil) =>
-     Eop (Oandimm (Int.and n1 (Int.repr 65535))) (t2:::Enil)
-  | _ =>
-     Eop (Oandimm n1) (e2:::Enil)
+  | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.and n1 n2)) Enil
+  | Eop (Oandimm n2) (t2:::Enil) => Eop (Oandimm (Int.and n1 n2)) (t2:::Enil)
+  | _ => Eop (Oandimm n1) (e2:::Enil)
   end.
 
 Nondetfunction and (e1: expr) (e2: expr) :=
@@ -260,51 +227,26 @@ Nondetfunction orimm (n1: int) (e2: expr) :=
   if Int.eq n1 Int.zero then e2
   else if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil
   else match e2 with
-  | Eop (Ointconst n2) Enil =>
-      Eop (Ointconst (Int.or n1 n2)) Enil
-  | Eop (Oorimm n2) (t2:::Enil) =>
-     Eop (Oorimm (Int.or n1 n2)) (t2:::Enil)
-  | _ =>
-     Eop (Oorimm n1) (e2:::Enil)
-  end.
-
-Definition same_expr_pure (e1 e2: expr) :=
-  match e1, e2 with
-  | Evar v1, Evar v2 => if ident_eq v1 v2 then true else false
-  | _, _ => false
+       | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil
+       | Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil)
+       | _ => Eop (Oorimm n1) (e2:::Enil)
   end.
 
 Nondetfunction or (e1: expr) (e2: expr) :=
   match e1, e2 with
   | Eop (Ointconst n1) Enil, t2 => orimm n1 t2
   | t1, Eop (Ointconst n2) Enil => orimm n2 t1
-  | Eop (Oshlimm n1) (t1:::Enil), Eop (Oshruimm n2) (t2:::Enil) =>
-      if Int.eq (Int.add n1 n2) Int.iwordsize then
-        if same_expr_pure t1 t2
-        then Eop (Ororimm n2) (t1:::Enil)
-        else Eop (Oshldimm n1) (t1:::t2:::Enil)
-      else Eop Oor (e1:::e2:::Enil)
-  | Eop (Oshruimm n2) (t2:::Enil), Eop (Oshlimm n1) (t1:::Enil) =>
-      if Int.eq (Int.add n1 n2) Int.iwordsize then
-        if same_expr_pure t1 t2
-        then Eop (Ororimm n2) (t1:::Enil)
-        else Eop (Oshldimm n1) (t1:::t2:::Enil)
-      else Eop Oor (e1:::e2:::Enil)
-  | _, _ =>
-      Eop Oor (e1:::e2:::Enil)
+  | _, _ => Eop Oor (e1:::e2:::Enil)
   end.
 
 Nondetfunction xorimm (n1: int) (e2: expr) :=
-  if Int.eq n1 Int.zero then e2
-  else match e2 with
-  | Eop (Ointconst n2) Enil =>
-      Eop (Ointconst (Int.xor n1 n2)) Enil
+  if Int.eq n1 Int.zero then e2 else
+  match e2 with
+  | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.xor n1 n2)) Enil
   | Eop (Oxorimm n2) (t2:::Enil) =>
-     Eop (Oxorimm (Int.xor n1 n2)) (t2:::Enil)
-  | Eop Onot (t2:::Enil) =>
-     Eop (Oxorimm (Int.not n1)) (t2:::Enil)
-  | _ =>
-     Eop (Oxorimm n1) (e2:::Enil)
+      let n := Int.xor n1 n2 in
+      if Int.eq n Int.zero then t2 else Eop (Oxorimm n) (t2:::Enil)
+  | _ => Eop (Oxorimm n1) (e2:::Enil)
   end.
 
 Nondetfunction xor (e1: expr) (e2: expr) :=
@@ -314,16 +256,37 @@ Nondetfunction xor (e1: expr) (e2: expr) :=
   | _, _ => Eop Oxor (e1:::e2:::Enil)
   end.
 
+(** ** Integer logical negation *)
+
+Definition notint (e: expr) := xorimm Int.mone e.
+
 (** ** Integer division and modulus *)
 
-Definition divu_base (e1: expr) (e2: expr) := Eop Odivu (e1:::e2:::Enil).
-Definition modu_base (e1: expr) (e2: expr) := Eop Omodu (e1:::e2:::Enil).
 Definition divs_base (e1: expr) (e2: expr) := Eop Odiv (e1:::e2:::Enil).
 Definition mods_base (e1: expr) (e2: expr) := Eop Omod (e1:::e2:::Enil).
+Definition divu_base (e1: expr) (e2: expr) := Eop Odivu (e1:::e2:::Enil).
+Definition modu_base (e1: expr) (e2: expr) := Eop Omodu (e1:::e2:::Enil).
 
 Definition shrximm (e1: expr) (n2: int) :=
   if Int.eq n2 Int.zero then e1 else Eop (Oshrximm n2) (e1:::Enil).
 
+(* Alternate definition, not convenient for strength reduction during constant propagation *)
+(*
+(* n2 will be less than 31. *)
+
+Definition shrximm_inner (e1: expr) (n2: int) :=
+  Eop (Oshruimm (Int.sub Int.iwordsize n2))
+      ((Eop (Oshrimm (Int.repr (Int.zwordsize - 1)))
+            (e1 ::: Enil))
+         ::: Enil).
+
+Definition shrximm (e1: expr) (n2: int) :=
+  if Int.eq n2 Int.zero then e1
+  else Eop (Oshrimm n2)
+           ((Eop Oadd (e1 ::: shrximm_inner e1 n2 ::: Enil))
+              ::: Enil).
+*)
+
 (** ** General shifts *)
 
 Nondetfunction shl (e1: expr) (e2: expr) :=
@@ -380,16 +343,6 @@ Nondetfunction compimm (default: comparison -> int -> condition)
         Eop (Ocmp (negate_condition c)) el
       else
         Eop (Ointconst Int.one) Enil
-  | Ceq, Eop (Oandimm n1) (t1 ::: Enil) =>
-      if Int.eq_dec n2 Int.zero then
-        Eop (Ocmp (Cmaskzero n1)) (t1 ::: Enil)
-      else
-        Eop (Ocmp (default c n2)) (e1 ::: Enil)
-  | Cne, Eop (Oandimm n1) (t1 ::: Enil) =>
-      if Int.eq_dec n2 Int.zero then
-        Eop (Ocmp (Cmasknotzero n1)) (t1 ::: Enil)
-      else
-        Eop (Ocmp (default c n2)) (e1 ::: Enil)
   | _, _ =>
        Eop (Ocmp (default c n2)) (e1 ::: Enil)
   end.
@@ -422,76 +375,32 @@ Definition compfs (c: comparison) (e1: expr) (e2: expr) :=
 
 (** ** Integer conversions *)
 
-Nondetfunction cast8unsigned (e: expr) :=
-  match e with
-  | Eop (Ointconst n) Enil =>
-      Eop (Ointconst (Int.zero_ext 8 n)) Enil
-  | Eop (Oandimm n) (t:::Enil) =>
-      andimm (Int.and (Int.repr 255) n) t
-  | _ =>
-      Eop Ocast8unsigned (e:::Enil)
-  end.
+Definition cast8unsigned (e: expr) := andimm (Int.repr 255) e.
 
 Nondetfunction cast8signed (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil =>
-      Eop (Ointconst (Int.sign_ext 8 n)) Enil
-  | _ =>
-      Eop Ocast8signed (e ::: Enil)
+  | Eop (Ointconst n) Enil => Eop (Ointconst (Int.sign_ext 8 n)) Enil
+  | _ =>  Eop Ocast8signed (e ::: Enil)
   end.
 
-Nondetfunction cast16unsigned (e: expr) :=
-  match e with
-  | Eop (Ointconst n) Enil =>
-      Eop (Ointconst (Int.zero_ext 16 n)) Enil
-  | Eop (Oandimm n) (t:::Enil) =>
-      andimm (Int.and (Int.repr 65535) n) t
-  | _ =>
-      Eop Ocast16unsigned (e:::Enil)
-  end.
+Definition cast16unsigned (e: expr) := andimm (Int.repr 65535) e.
 
 Nondetfunction cast16signed (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil =>
-      Eop (Ointconst (Int.sign_ext 16 n)) Enil
-  | _ =>
-      Eop Ocast16signed (e ::: Enil)
-  end.
-
-(** ** Selection *)
-
-Definition select_supported (ty: typ) : bool :=
-  match ty with
-  | Tint => true
-  | Tlong => Archi.ptr64
-  | _ => false
+  | Eop (Ointconst n) Enil => Eop (Ointconst (Int.sign_ext 16 n)) Enil
+  | _ => Eop Ocast16signed (e ::: Enil)
   end.
 
-(** [Asmgen.mk_sel] cannot always handle the conditions that are
-    implemented as a "and" of two processor flags.  However it can
-    handle the negation of those conditions, which are implemented
-    as an "or".  So, for the risky conditions we just take their
-    negation and swap the two arguments of the [select]. *)
-
-Definition select_swap (cond: condition) :=
-  match cond with
-  | Ccompf Cne | Ccompfs Cne | Cnotcompf Ceq | Cnotcompfs Ceq => true
-  | _ => false
-  end.
-
-Definition select (ty: typ) (cond: condition) (args: exprlist) (e1 e2: expr) :=
-  if select_supported ty then
-    if select_swap cond
-    then Some (Eop (Osel (negate_condition cond) ty) (e2 ::: e1 ::: args))
-    else Some (Eop (Osel cond ty) (e1 ::: e2 ::: args))
-  else None.
-
 (** ** Floating-point conversions *)
 
-Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
-Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
+Definition intoffloat  (e: expr) := Eop Ointoffloat (e ::: Enil).
+Definition intuoffloat (e: expr) := Eop Ointuoffloat (e ::: Enil).
 
-Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
+Nondetfunction floatofintu (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
+  | _ => Eop Ofloatofintu (e ::: Enil)
+  end.
 
 Nondetfunction floatofint (e: expr) :=
   match e with
@@ -499,81 +408,77 @@ Nondetfunction floatofint (e: expr) :=
   | _ => Eop Ofloatofint (e ::: Enil)
   end.
 
-Definition intuoffloat (e: expr) :=
-  if Archi.splitlong then
-    Elet e
-      (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
-        (Econdition (CEcond (Ccompf Clt) None (Eletvar 1 ::: Eletvar 0 ::: Enil))
-          (intoffloat (Eletvar 1))
-          (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat
-  else
-    Eop Olowlong (Eop Olongoffloat (e ::: Enil) ::: Enil).
-
-Nondetfunction floatofintu (e: expr) :=
-  match e with
-  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
-  | _ =>
-    if Archi.splitlong then
-      let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
-      Elet e
-        (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) None (Eletvar O ::: Enil))
-          (floatofint (Eletvar O))
-          (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
-    else
-      Eop Ofloatoflong (Eop Ocast32unsigned (e ::: Enil) ::: Enil)
-  end.
-
 Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
+Definition singleofint (e: expr) := Eop Osingleofint (e ::: Enil).
 
-Nondetfunction singleofint (e: expr) :=
-  match e with
-  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_int n)) Enil
-  | _ => Eop Osingleofint (e ::: Enil)
-  end.
+Definition intuofsingle (e: expr) := Eop Ointuofsingle (e ::: Enil).
+Definition singleofintu (e: expr) := Eop Osingleofintu (e ::: Enil).
 
-Definition intuofsingle (e: expr) := intuoffloat (floatofsingle e).
+Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
+Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
 
-Nondetfunction singleofintu (e: expr) :=
-  match e with
-  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_intu n)) Enil
-  | _ => singleoffloat (floatofintu e)
+(** ** Selection *)
+
+Definition same_expr_pure (e1 e2: expr) :=
+  match e1, e2 with
+  | Evar v1, Evar v2 => if ident_eq v1 v2 then true else false
+  | _, _ => false
   end.
 
-(** ** Addressing modes *)
+Definition select (ty: typ) (cond: condition) (args: exprlist) (e1 e2: expr)
+  : option expr :=
+  if same_expr_pure e1 e2
+  then Some e1
+  else
+     if Archi.ptr64 then
+      match ty with
+      | Tlong => Some (Eop Oselectl
+                       ((Eop (Ocmp cond) args) ::: e1 ::: e2 ::: Enil))
+      | Tint => Some (Eop Olowlong ((Eop Oselectl
+                       ((Eop (Ocmp cond) args) :::
+		         (Eop Ocast32signed (e1 ::: Enil)) :::
+			 (Eop Ocast32signed (e2 ::: Enil)) ::: Enil)) ::: Enil))
+      | Tfloat => Some (Eop Ofloat_of_bits ((Eop Oselectl
+                       ((Eop (Ocmp cond) args) :::
+		         (Eop Obits_of_float (e1 ::: Enil)) :::
+			 (Eop Obits_of_float (e2 ::: Enil)) ::: Enil)) ::: Enil))
+      | Tsingle => Some
+         (Eop Osingle_of_bits
+           ((Eop Olowlong ((Eop Oselectl
+             ((Eop (Ocmp cond) args) :::
+         (Eop Ocast32signed ((Eop Obits_of_single (e1 ::: Enil)) ::: Enil)) :::
+	 (Eop Ocast32signed ((Eop Obits_of_single (e2 ::: Enil)) ::: Enil))
+	 ::: Enil)) ::: Enil)) ::: Enil))
+      | _ => None
+      end
+      else None.
+ 
+(** ** Recognition of addressing modes for load and store operations *)
 
 Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
   match e with
-  | Eop (Olea addr) args =>
-      if (negb Archi.ptr64) && addressing_valid addr then (addr, args) else (Aindexed 0, e:::Enil)
-  | Eop (Oleal addr) args =>
-      if Archi.ptr64 && addressing_valid addr then (addr, args) else (Aindexed 0, e:::Enil)
-  | _ => (Aindexed 0, e:::Enil)
+  | Eop (Oaddrstack n) Enil => (Ainstack n, Enil)
+  | Eop (Oaddrsymbol id ofs) Enil => if Archi.pic_code tt then (Aindexed Ptrofs.zero, e:::Enil) else (Aglobal id ofs, Enil)
+  | Eop (Oaddimm n) (e1:::Enil) => (Aindexed (Ptrofs.of_int n), e1:::Enil)
+  | Eop (Oaddlimm n) (e1:::Enil) => (Aindexed (Ptrofs.of_int64 n), e1:::Enil)
+  | _ => (Aindexed Ptrofs.zero, e:::Enil)
   end.
 
 (** ** Arguments of builtins *)
 
-Nondetfunction builtin_arg_addr (addr: Op.addressing) (el: exprlist) :=
-  match addr, el with
-  | Aindexed n, e1 ::: Enil =>
-      BA_addptr (BA e1) (if Archi.ptr64 then BA_long (Int64.repr n) else BA_int (Int.repr n))
-  | Aglobal id ofs, Enil => BA_addrglobal id ofs
-  | Ainstack ofs, Enil => BA_addrstack ofs
-  | _, _ => BA (Eop (Olea_ptr addr) el)
-  end.
-
 Nondetfunction builtin_arg (e: expr) :=
   match e with
   | Eop (Ointconst n) Enil => BA_int n
-  | Eop (Olongconst n) Enil => BA_long n
-  | Eop (Olea addr) el =>
-      if Archi.ptr64 then BA e else builtin_arg_addr addr el
-  | Eop (Oleal addr) el =>
-      if Archi.ptr64 then builtin_arg_addr addr el else BA e
+  | Eop (Oaddrsymbol id ofs) Enil => BA_addrglobal id ofs
+  | Eop (Oaddrstack ofs) Enil => BA_addrstack ofs
   | Eop Omakelong (Eop (Ointconst h) Enil ::: Eop (Ointconst l) Enil ::: Enil) =>
         BA_long (Int64.ofwords h l)
   | Eop Omakelong (h ::: l ::: Enil) => BA_splitlong (BA h) (BA l)
-  | Eload chunk (Aglobal id ofs) Enil => BA_loadglobal chunk id ofs
   | Eload chunk (Ainstack ofs) Enil => BA_loadstack chunk ofs
+  | Eop (Oaddimm n) (e1:::Enil) =>
+      if Archi.ptr64 then BA e else BA_addptr (BA e1) (BA_int n)
+  | Eop (Oaddlimm n) (e1:::Enil) =>
+      if Archi.ptr64 then BA_addptr (BA e1) (BA_long n) else BA e
   | _ => BA e
   end.
 
@@ -587,4 +492,9 @@ Definition divfs_base (e1: expr) (e2: expr) :=
 (** Platform-specific known builtins *)
 
 Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
-  None.
+  match b with
+  | BI_bits_of_float => Some (Eop Obits_of_single args)
+  | BI_bits_of_double => Some (Eop Obits_of_float args)
+  | BI_float_of_bits => Some (Eop Osingle_of_bits args)
+  | BI_double_of_bits => Some (Eop Ofloat_of_bits args)
+  end.
diff --git a/verilog/SelectOpproof.v b/verilog/SelectOpproof.v
index c43beb56..f450fe6c 100644
--- a/verilog/SelectOpproof.v
+++ b/verilog/SelectOpproof.v
@@ -3,22 +3,28 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
 (** Correctness of instruction selection for operators *)
 
-Require Import Coqlib.
+Require Import Coqlib Zbits.
 Require Import AST Integers Floats.
 Require Import Values Memory Builtins Globalenvs.
 Require Import Cminor Op CminorSel.
 Require Import SelectOp.
 Require Import OpHelpers.
 Require Import OpHelpersproof.
+Require Import Lia.
 
 Local Open Scope cminorsel_scope.
 
@@ -49,7 +55,7 @@ Ltac InvEval1 :=
 Ltac InvEval2 :=
   match goal with
   | [ H: (eval_operation _ _ _ nil _ = Some _) |- _ ] =>
-      simpl in H; FuncInv
+      simpl in H; inv H
   | [ H: (eval_operation _ _ _ (_ :: nil) _ = Some _) |- _ ] =>
       simpl in H; FuncInv
   | [ H: (eval_operation _ _ _ (_ :: _ :: nil) _ = Some _) |- _ ] =>
@@ -60,7 +66,7 @@ Ltac InvEval2 :=
       idtac
   end.
 
-Ltac InvEval := InvEval1; InvEval2; InvEval2; subst.
+Ltac InvEval := InvEval1; InvEval2; InvEval2.
 
 Ltac TrivialExists :=
   match goal with
@@ -79,9 +85,9 @@ Variable e: env.
 Variable m: mem.
 
 (** We now show that the code generated by "smart constructor" functions
-  such as [SelectOp.notint] behaves as expected.  Continuing the
+  such as [Selection.notint] behaves as expected.  Continuing the
   [notint] example, we show that if the expression [e]
-  evaluates to some integer value [Vint n], then [SelectOp.notint e]
+  evaluates to some integer value [Vint n], then [Selection.notint e]
   evaluates to a value [Vint (Int.not n)] which is indeed the integer
   negation of the value of [e].
 
@@ -109,44 +115,22 @@ Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> va
   eval_expr ge sp e m le b y ->
   exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v.
 
-Lemma eval_Olea_ptr:
-  forall a el m,
-  eval_operation ge sp (Olea_ptr a) el m = eval_addressing ge sp a el.
-Proof.
-  unfold Olea_ptr, eval_addressing; intros. destruct Archi.ptr64; auto.
-Qed.
-
 Theorem eval_addrsymbol:
   forall le id ofs,
   exists v, eval_expr ge sp e m le (addrsymbol id ofs) v /\ Val.lessdef (Genv.symbol_address ge id ofs) v.
 Proof.
-  intros. unfold addrsymbol. exists (Genv.symbol_address ge id ofs); split; auto.
-  destruct (symbol_is_external id).
-  predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero.
-  subst. EvalOp.
-  EvalOp. econstructor. EvalOp. simpl; eauto. econstructor.
-  unfold Olea_ptr; destruct Archi.ptr64 eqn:SF; simpl;
-  [ rewrite <- Genv.shift_symbol_address_64 by auto | rewrite <- Genv.shift_symbol_address_32 by auto ];
-  f_equal; f_equal;
-  rewrite Ptrofs.add_zero_l;
-  [ apply Ptrofs.of_int64_to_int64 | apply Ptrofs.of_int_to_int ];
+  intros. unfold addrsymbol. econstructor; split.
+  EvalOp. simpl; eauto.
   auto.
-  EvalOp. (*rewrite eval_Olea_ptr. apply eval_addressing_Aglobal. *)
 Qed.
 
 Theorem eval_addrstack:
   forall le ofs,
   exists v, eval_expr ge sp e m le (addrstack ofs) v /\ Val.lessdef (Val.offset_ptr sp ofs) v.
 Proof.
-  intros. unfold addrstack. TrivialExists. (*rewrite eval_Olea_ptr. apply eval_addressing_Ainstack.*)
-Qed.
-
-Theorem eval_notint: unary_constructor_sound notint Val.notint.
-Proof.
-  unfold notint; red; intros until x. case (notint_match a); intros; InvEval.
-- TrivialExists.
-- rewrite Val.not_xor. rewrite Val.xor_assoc. TrivialExists.
-- TrivialExists.
+  intros. unfold addrstack. econstructor; split.
+  EvalOp. simpl; eauto.
+  auto.
 Qed.
 
 Theorem eval_addimm:
@@ -154,72 +138,80 @@ Theorem eval_addimm:
 Proof.
   red; unfold addimm; intros until x.
   predSpec Int.eq Int.eq_spec n Int.zero.
-- subst n. intros. exists x; split; auto.
-  destruct x; simpl; rewrite ?Int.add_zero, ?Ptrofs.add_zero; auto.
-- case (addimm_match a); intros; InvEval.
-+ TrivialExists; simpl. rewrite Int.add_commut. auto.
-+ inv H0. simpl in H6. TrivialExists. simpl.
-  erewrite eval_offset_addressing_total_32 by eauto. rewrite Int.repr_signed; auto.
-+ TrivialExists. simpl. rewrite Int.repr_signed; auto.
+  - subst n. intros. exists x; split; auto.
+    destruct x; simpl; auto.
+    rewrite Int.add_zero; auto.
+    destruct Archi.ptr64; auto. rewrite Ptrofs.add_zero; auto. 
+  - case (addimm_match a); intros; InvEval; simpl.
+    + TrivialExists; simpl. rewrite Int.add_commut. auto.
+    + econstructor; split. EvalOp. simpl; eauto. 
+      unfold Genv.symbol_address. destruct (Genv.find_symbol ge s); simpl; auto.
+      destruct Archi.ptr64; auto. rewrite Ptrofs.add_commut; auto.
+    + econstructor; split. EvalOp. simpl; eauto. 
+      destruct sp; simpl; auto. destruct Archi.ptr64; auto. 
+      rewrite Ptrofs.add_assoc. rewrite (Ptrofs.add_commut m0). auto.
+    + TrivialExists; simpl. subst x. rewrite Val.add_assoc. rewrite Int.add_commut. auto.
+    + TrivialExists.
 Qed.
 
 Theorem eval_add: binary_constructor_sound add Val.add.
 Proof.
-  assert (A: forall x y, Int.repr (x + y) = Int.add (Int.repr x) (Int.repr y)).
-  { intros; apply Int.eqm_samerepr; auto with ints. }
-  assert (B: forall id ofs n, Archi.ptr64 = false ->
-             Genv.symbol_address ge id (Ptrofs.add ofs (Ptrofs.repr n)) =
-             Val.add (Genv.symbol_address ge id ofs) (Vint (Int.repr n))).
-  { intros. replace (Ptrofs.repr n) with (Ptrofs.of_int (Int.repr n)) by auto with ptrofs.
-    apply Genv.shift_symbol_address_32; auto. }
   red; intros until y.
   unfold add; case (add_match a b); intros; InvEval.
-- rewrite Val.add_commut. apply eval_addimm; auto.
-- apply eval_addimm; auto.
-- TrivialExists. simpl. rewrite A, Val.add_permut_4. auto.
-- TrivialExists. simpl. rewrite A, Val.add_assoc. decEq; decEq. rewrite Val.add_permut. auto.
-- TrivialExists. simpl. rewrite A, Val.add_permut_4. rewrite <- Val.add_permut. rewrite <- Val.add_assoc. auto.
-- TrivialExists. simpl. rewrite Heqb0. rewrite B by auto. rewrite ! Val.add_assoc.
-  rewrite (Val.add_commut v1). rewrite Val.add_permut. rewrite Val.add_assoc. auto.
-- TrivialExists. simpl. rewrite Heqb0. rewrite B by auto. rewrite Val.add_assoc. do 2 f_equal. apply Val.add_commut.
-- TrivialExists. simpl. rewrite Heqb0. rewrite B by auto. rewrite !Val.add_assoc.
-  rewrite (Val.add_commut (Vint (Int.repr n1))). rewrite Val.add_permut. do 2 f_equal. apply Val.add_commut.
-- TrivialExists. simpl. rewrite Heqb0. rewrite B by auto. rewrite !Val.add_assoc.
-  rewrite (Val.add_commut (Vint (Int.repr n2))). rewrite Val.add_permut. auto.
-- TrivialExists. simpl. rewrite Val.add_permut. rewrite Val.add_assoc.
-    decEq; decEq. apply Val.add_commut.
-- TrivialExists.
-- TrivialExists. simpl. repeat rewrite Val.add_assoc. decEq; decEq. apply Val.add_commut.
-- TrivialExists. simpl. rewrite Val.add_assoc; auto.
-- TrivialExists. simpl.
-  unfold Val.add; destruct Archi.ptr64, x, y; auto.
-  + rewrite Int.add_zero; auto.
-  + rewrite Int.add_zero; auto.
-  + rewrite Ptrofs.add_assoc, Ptrofs.add_zero. auto.
-  + rewrite Ptrofs.add_assoc, Ptrofs.add_zero. auto.
+  - rewrite Val.add_commut. apply eval_addimm; auto.
+  - apply eval_addimm; auto.
+  - subst.
+    replace (Val.add (Val.add v1 (Vint n1)) (Val.add v0 (Vint n2)))
+       with (Val.add (Val.add v1 v0) (Val.add (Vint n1) (Vint n2))).
+    apply eval_addimm. EvalOp.
+    repeat rewrite Val.add_assoc. decEq. apply Val.add_permut.
+  - subst. econstructor; split.
+    EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto.
+    rewrite Val.add_commut. destruct sp; simpl; auto.
+    destruct v1; simpl; auto.
+    destruct Archi.ptr64 eqn:SF; auto. 
+    apply Val.lessdef_same. f_equal. rewrite ! Ptrofs.add_assoc. f_equal. 
+    rewrite (Ptrofs.add_commut (Ptrofs.of_int n1)), Ptrofs.add_assoc. f_equal. auto with ptrofs.
+    destruct Archi.ptr64 eqn:SF; auto.
+  - subst. econstructor; split.
+    EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto.
+    destruct sp; simpl; auto.
+    destruct v1; simpl; auto.
+    destruct Archi.ptr64 eqn:SF; auto. 
+    apply Val.lessdef_same. f_equal. rewrite ! Ptrofs.add_assoc. f_equal. f_equal.
+    rewrite Ptrofs.add_commut. auto with ptrofs.
+    destruct Archi.ptr64 eqn:SF; auto.
+  - subst.
+    replace (Val.add (Val.add v1 (Vint n1)) y)
+       with (Val.add (Val.add v1 y) (Vint n1)).
+    apply eval_addimm. EvalOp.
+    repeat rewrite Val.add_assoc. decEq. apply Val.add_commut.
+  - subst.
+    replace (Val.add x (Val.add v1 (Vint n2)))
+       with (Val.add (Val.add x v1) (Vint n2)).
+    apply eval_addimm. EvalOp.
+    repeat rewrite Val.add_assoc. reflexivity.
+  - TrivialExists.
 Qed.
 
 Theorem eval_sub: binary_constructor_sound sub Val.sub.
 Proof.
   red; intros until y.
   unfold sub; case (sub_match a b); intros; InvEval.
-- rewrite Val.sub_add_opp. apply eval_addimm; auto.
-- rewrite Val.sub_add_l. rewrite Val.sub_add_r.
+  - rewrite Val.sub_add_opp. apply eval_addimm; auto.
+  - subst. rewrite Val.sub_add_l. rewrite Val.sub_add_r.
     rewrite Val.add_assoc. simpl. rewrite Int.add_commut. rewrite <- Int.sub_add_opp.
-    replace (Int.repr (n1 - n2)) with (Int.sub (Int.repr n1) (Int.repr n2)).
     apply eval_addimm; EvalOp.
-    apply Int.eqm_samerepr; auto with ints.
-- rewrite Val.sub_add_l. apply eval_addimm; EvalOp.
-- rewrite Val.sub_add_r. replace (Int.repr (-n2)) with (Int.neg (Int.repr n2)). apply eval_addimm; EvalOp.
-    apply Int.eqm_samerepr; auto with ints.
-- TrivialExists.
+  - subst. rewrite Val.sub_add_l. apply eval_addimm; EvalOp.
+  - subst. rewrite Val.sub_add_r. apply eval_addimm; EvalOp.
+  - TrivialExists.
 Qed.
 
-Theorem eval_negint: unary_constructor_sound negint Val.neg.
+Theorem eval_negint: unary_constructor_sound negint (fun v => Val.sub Vzero v).
 Proof.
   red; intros until x. unfold negint. case (negint_match a); intros; InvEval.
-- TrivialExists.
-- TrivialExists.
+  TrivialExists.
+  TrivialExists.
 Qed.
 
 Theorem eval_shlimm:
@@ -227,318 +219,352 @@ Theorem eval_shlimm:
                                     (fun x => Val.shl x (Vint n)).
 Proof.
   red; intros until x.  unfold shlimm.
+
   predSpec Int.eq Int.eq_spec n Int.zero.
   intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shl_zero; auto.
+
   destruct (Int.ltu n Int.iwordsize) eqn:LT; simpl.
   destruct (shlimm_match a); intros; InvEval.
-- exists (Vint (Int.shl n1 n)); split. EvalOp.
-  simpl. rewrite LT. auto.
-- destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?.
-+ exists (Val.shl v1 (Vint (Int.add n n1))); split. EvalOp.
-  destruct v1; simpl; auto.
-  rewrite Heqb.
-  destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto.
-  destruct (Int.ltu n Int.iwordsize) eqn:?; simpl; auto.
-  rewrite Int.add_commut. rewrite Int.shl_shl; auto. rewrite Int.add_commut; auto.
-+ TrivialExists. econstructor. EvalOp. simpl; eauto. constructor.
-  simpl. auto.
-- destruct (shift_is_scale n).
-+ econstructor; split. EvalOp. simpl. eauto.
-  rewrite ! Int.repr_unsigned.
-  destruct v1; simpl; auto. rewrite LT.
-  rewrite Int.shl_mul. rewrite Int.mul_add_distr_l. rewrite (Int.shl_mul (Int.repr n1)). auto.
-+ TrivialExists. econstructor. EvalOp. simpl; eauto. constructor. auto.
-- destruct (shift_is_scale n).
-+ econstructor; split. EvalOp. simpl. eauto.
-  destruct x; simpl; auto. rewrite LT.
-  rewrite Int.repr_unsigned. rewrite Int.add_zero. rewrite Int.shl_mul. auto.
-+ TrivialExists.
-- intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
-  auto.
+  - exists (Vint (Int.shl n1 n)); split. EvalOp.
+    simpl. rewrite LT. auto.
+  - destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?.
+    + exists (Val.shl v1 (Vint (Int.add n n1))); split. EvalOp.
+      subst. destruct v1; simpl; auto.
+      rewrite Heqb.
+      destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto.
+      destruct (Int.ltu n Int.iwordsize) eqn:?; simpl; auto.
+      rewrite Int.add_commut. rewrite Int.shl_shl; auto. rewrite Int.add_commut; auto.
+    + subst. TrivialExists. econstructor. EvalOp. simpl; eauto. constructor.
+      simpl. auto.
+  - TrivialExists.
+  - intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
+    auto.
 Qed.
 
 Theorem eval_shruimm:
   forall n, unary_constructor_sound (fun a => shruimm a n)
                                     (fun x => Val.shru x (Vint n)).
 Proof.
-  red; intros until x.  unfold shruimm.
+  red; intros until x. unfold shruimm.
+
   predSpec Int.eq Int.eq_spec n Int.zero.
   intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shru_zero; auto.
+
   destruct (Int.ltu n Int.iwordsize) eqn:LT; simpl.
   destruct (shruimm_match a); intros; InvEval.
-- exists (Vint (Int.shru n1 n)); split. EvalOp.
-  simpl. rewrite LT; auto.
-- destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?.
-+ exists (Val.shru v1 (Vint (Int.add n n1))); split. EvalOp.
-  subst. destruct v1; simpl; auto.
-  rewrite Heqb.
-  destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto.
-  rewrite LT. rewrite Int.add_commut. rewrite Int.shru_shru; auto. rewrite Int.add_commut; auto.
-+ TrivialExists. econstructor. EvalOp. simpl; eauto. constructor.
-  simpl. auto.
-- TrivialExists.
-- intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
-  auto.
+  - exists (Vint (Int.shru n1 n)); split. EvalOp.
+    simpl. rewrite LT; auto.
+  - destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?.
+    exists (Val.shru v1 (Vint (Int.add n n1))); split. EvalOp.
+    subst. destruct v1; simpl; auto.
+    rewrite Heqb.
+    destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto.
+    rewrite LT. rewrite Int.add_commut. rewrite Int.shru_shru; auto. rewrite Int.add_commut; auto.
+    subst. TrivialExists. econstructor. EvalOp. simpl; eauto. constructor.
+    simpl. auto.
+  - TrivialExists.
+  - intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
+    auto.
 Qed.
 
 Theorem eval_shrimm:
   forall n, unary_constructor_sound (fun a => shrimm a n)
                                     (fun x => Val.shr x (Vint n)).
 Proof.
-  red; intros until x.  unfold shrimm.
+  red; intros until x. unfold shrimm.
+
   predSpec Int.eq Int.eq_spec n Int.zero.
   intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shr_zero; auto.
+
   destruct (Int.ltu n Int.iwordsize) eqn:LT; simpl.
   destruct (shrimm_match a); intros; InvEval.
-- exists (Vint (Int.shr n1 n)); split. EvalOp.
-  simpl. rewrite LT; auto.
-- destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?.
-+ exists (Val.shr v1 (Vint (Int.add n n1))); split. EvalOp.
-  subst. destruct v1; simpl; auto.
-  rewrite Heqb.
-  destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto.
-  rewrite LT.
-  rewrite Int.add_commut. rewrite Int.shr_shr; auto. rewrite Int.add_commut; auto.
-+ TrivialExists. econstructor. EvalOp. simpl; eauto. constructor.
-  simpl. auto.
-- TrivialExists.
-- intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
-  auto.
+  - exists (Vint (Int.shr n1 n)); split. EvalOp.
+    simpl. rewrite LT; auto.
+  - destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?.
+    exists (Val.shr v1 (Vint (Int.add n n1))); split. EvalOp.
+    subst. destruct v1; simpl; auto.
+    rewrite Heqb.
+    destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto.
+    rewrite LT.
+    rewrite Int.add_commut. rewrite Int.shr_shr; auto. rewrite Int.add_commut; auto.
+    subst. TrivialExists. econstructor. EvalOp. simpl; eauto. constructor.
+    simpl. auto.
+  - TrivialExists.
+  - intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
+    auto.
 Qed.
 
 Lemma eval_mulimm_base:
   forall n, unary_constructor_sound (mulimm_base n) (fun x => Val.mul x (Vint n)).
 Proof.
   intros; red; intros; unfold mulimm_base.
-  generalize (Int.one_bits_decomp n) (Int.one_bits_range n); intros D R.
-  destruct (Int.one_bits n) as [ | i l].
-  TrivialExists.
-  destruct l as [ | j l ].
-  replace (Val.mul x (Vint n)) with (Val.shl x (Vint i)). apply eval_shlimm; auto.
-  destruct x; auto; simpl. rewrite D; simpl; rewrite Int.add_zero.
-  rewrite R by auto with coqlib. rewrite Int.shl_mul. auto.
-  destruct l as [ | k l ].
-  exploit (eval_shlimm i (x :: le) (Eletvar 0) x). constructor; auto. intros [v1 [A1 B1]].
-  exploit (eval_shlimm j (x :: le) (Eletvar 0) x). constructor; auto. intros [v2 [A2 B2]].
-  exploit eval_add. eexact A1. eexact A2. intros [v3 [A3 B3]].
-  exists v3; split. econstructor; eauto.
-  rewrite D; simpl; rewrite Int.add_zero.
-  replace (Vint (Int.add (Int.shl Int.one i) (Int.shl Int.one j)))
-     with (Val.add (Val.shl Vone (Vint i)) (Val.shl Vone (Vint j))).
-  rewrite Val.mul_add_distr_r.
-  repeat rewrite Val.shl_mul.
-  apply Val.lessdef_trans with (Val.add v1 v2); auto. apply Val.add_lessdef; auto.
-  simpl. rewrite ! R by auto with coqlib. auto.
-  TrivialExists.
+
+  assert (DFL: exists v, eval_expr ge sp e m le (Eop Omul (Eop (Ointconst n) Enil ::: a ::: Enil)) v /\ Val.lessdef (Val.mul x (Vint n)) v).
+  TrivialExists. econstructor. EvalOp. simpl; eauto. econstructor. eauto. constructor.
+  rewrite Val.mul_commut. auto.
+
+  generalize (Int.one_bits_decomp n).
+  generalize (Int.one_bits_range n).
+  destruct (Int.one_bits n).
+  - intros. auto.
+  - destruct l.
+    + intros. rewrite H1. simpl.
+      rewrite Int.add_zero.
+      replace (Vint (Int.shl Int.one i)) with (Val.shl Vone (Vint i)). rewrite Val.shl_mul.
+      apply eval_shlimm. auto. simpl. rewrite H0; auto with coqlib.
+    + destruct l.
+      intros. rewrite H1. simpl.
+      exploit (eval_shlimm i (x :: le) (Eletvar 0) x). constructor; auto. intros [v1 [A1 B1]].
+      exploit (eval_shlimm i0 (x :: le) (Eletvar 0) x). constructor; auto. intros [v2 [A2 B2]].
+      exploit (eval_add (x :: le)). eexact A1. eexact A2. intros [v [A B]].
+      exists v; split. econstructor; eauto.
+      rewrite Int.add_zero.
+      replace (Vint (Int.add (Int.shl Int.one i) (Int.shl Int.one i0)))
+         with (Val.add (Val.shl Vone (Vint i)) (Val.shl Vone (Vint i0))).
+      rewrite Val.mul_add_distr_r.
+      repeat rewrite Val.shl_mul. eapply Val.lessdef_trans. 2: eauto. apply Val.add_lessdef; auto.
+      simpl. repeat rewrite H0; auto with coqlib.
+      intros. auto.
 Qed.
 
 Theorem eval_mulimm:
   forall n, unary_constructor_sound (mulimm n) (fun x => Val.mul x (Vint n)).
 Proof.
   intros; red; intros until x; unfold mulimm.
+
   predSpec Int.eq Int.eq_spec n Int.zero.
   intros. exists (Vint Int.zero); split. EvalOp.
   destruct x; simpl; auto. subst n. rewrite Int.mul_zero. auto.
+
   predSpec Int.eq Int.eq_spec n Int.one.
   intros. exists x; split; auto.
   destruct x; simpl; auto. subst n. rewrite Int.mul_one. auto.
-- case (mulimm_match a); intros; InvEval.
-+ TrivialExists. simpl. rewrite Int.mul_commut; auto.
-+ rewrite Val.mul_add_distr_l.
-  exploit eval_mulimm_base; eauto. instantiate (1 := n). intros [v' [A1 B1]].
-  exploit (eval_addimm (Int.mul n (Int.repr n2)) le (mulimm_base n t2) v'). auto. intros [v'' [A2 B2]].
-  exists v''; split; auto. eapply Val.lessdef_trans. eapply Val.add_lessdef; eauto.
-  rewrite Val.mul_commut; auto.
-+ apply eval_mulimm_base; auto.
+
+  case (mulimm_match a); intros; InvEval.
+  - TrivialExists. simpl. rewrite Int.mul_commut; auto.
+  - subst. rewrite Val.mul_add_distr_l.
+    exploit eval_mulimm_base; eauto. instantiate (1 := n). intros [v' [A1 B1]].
+    exploit (eval_addimm (Int.mul n n2) le (mulimm_base n t2) v'). auto. intros [v'' [A2 B2]].
+    exists v''; split; auto. eapply Val.lessdef_trans. eapply Val.add_lessdef; eauto.
+    rewrite Val.mul_commut; auto.
+  - apply eval_mulimm_base; auto.
 Qed.
 
 Theorem eval_mul: binary_constructor_sound mul Val.mul.
 Proof.
   red; intros until y.
   unfold mul; case (mul_match a b); intros; InvEval.
-- rewrite Val.mul_commut. apply eval_mulimm. auto.
-- apply eval_mulimm. auto.
-- TrivialExists.
+  rewrite Val.mul_commut. apply eval_mulimm. auto.
+  apply eval_mulimm. auto.
+  TrivialExists.
 Qed.
 
 Theorem eval_mulhs: binary_constructor_sound mulhs Val.mulhs.
 Proof.
-  unfold mulhs; red; intros; TrivialExists.
+  red; intros. unfold mulhs; destruct Archi.ptr64 eqn:SF.
+- econstructor; split.
+  EvalOp. constructor. EvalOp. constructor. EvalOp. constructor. EvalOp. simpl; eauto. 
+  constructor. EvalOp. simpl; eauto. constructor. 
+  simpl; eauto. constructor. simpl; eauto. constructor. simpl; eauto.
+  destruct x; simpl; auto. destruct y; simpl; auto.
+  change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl.
+  apply Val.lessdef_same. f_equal. 
+  transitivity (Int.repr (Z.shiftr (Int.signed i * Int.signed i0) 32)).
+  unfold Int.mulhs; f_equal. rewrite Zshiftr_div_two_p by lia. reflexivity.
+  apply Int.same_bits_eq; intros n N.
+  change Int.zwordsize with 32 in *.
+  assert (N1: 0 <= n < 64) by lia.
+  rewrite Int64.bits_loword by auto.
+  rewrite Int64.bits_shr' by auto.
+  change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64.
+  rewrite zlt_true by lia.
+  rewrite Int.testbit_repr by auto. 
+  unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia).
+  transitivity (Z.testbit (Int.signed i * Int.signed i0) (n + 32)).
+  rewrite Z.shiftr_spec by lia. auto.
+  apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. 
+  change Int64.zwordsize with 64; lia.
+- TrivialExists.
 Qed.
-  
+
 Theorem eval_mulhu: binary_constructor_sound mulhu Val.mulhu.
 Proof.
-  unfold mulhu; red; intros; TrivialExists.
+  red; intros. unfold mulhu; destruct Archi.ptr64 eqn:SF.
+- econstructor; split.
+  EvalOp. constructor. EvalOp. constructor. EvalOp. constructor. EvalOp. simpl; eauto. 
+  constructor. EvalOp. simpl; eauto. constructor. 
+  simpl; eauto. constructor. simpl; eauto. constructor. simpl; eauto.
+  destruct x; simpl; auto. destruct y; simpl; auto.
+  change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl.
+  apply Val.lessdef_same. f_equal. 
+  transitivity (Int.repr (Z.shiftr (Int.unsigned i * Int.unsigned i0) 32)).
+  unfold Int.mulhu; f_equal. rewrite Zshiftr_div_two_p by lia. reflexivity.
+  apply Int.same_bits_eq; intros n N.
+  change Int.zwordsize with 32 in *.
+  assert (N1: 0 <= n < 64) by lia.
+  rewrite Int64.bits_loword by auto.
+  rewrite Int64.bits_shru' by auto.
+  change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64.
+  rewrite zlt_true by lia.
+  rewrite Int.testbit_repr by auto. 
+  unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia).
+  transitivity (Z.testbit (Int.unsigned i * Int.unsigned i0) (n + 32)).
+  rewrite Z.shiftr_spec by lia. auto.
+  apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. 
+  change Int64.zwordsize with 64; lia.
+- TrivialExists.
 Qed.
-  
+
 Theorem eval_andimm:
   forall n, unary_constructor_sound (andimm n) (fun x => Val.and x (Vint n)).
 Proof.
   intros; red; intros until x. unfold andimm.
+
   predSpec Int.eq Int.eq_spec n Int.zero.
   intros. exists (Vint Int.zero); split. EvalOp.
   destruct x; simpl; auto. subst n. rewrite Int.and_zero. auto.
+
   predSpec Int.eq Int.eq_spec n Int.mone.
   intros. exists x; split; auto.
-  destruct x; simpl; auto. subst n. rewrite Int.and_mone. auto.
-  case (andimm_match a); intros; InvEval.
-- TrivialExists. simpl. rewrite Int.and_commut; auto.
-- TrivialExists. simpl. rewrite Val.and_assoc. rewrite Int.and_commut. auto.
-- rewrite Val.zero_ext_and. TrivialExists. rewrite Val.and_assoc.
-  rewrite Int.and_commut. auto. lia.
-- rewrite Val.zero_ext_and. TrivialExists. rewrite Val.and_assoc.
-  rewrite Int.and_commut. auto. lia.
-- TrivialExists.
+  subst. destruct x; simpl; auto. rewrite Int.and_mone; auto.
+
+  case (andimm_match a); intros.
+  - InvEval. TrivialExists. simpl. rewrite Int.and_commut; auto.
+  - InvEval. subst. rewrite Val.and_assoc. simpl. rewrite Int.and_commut. TrivialExists.
+  - TrivialExists.
 Qed.
 
 Theorem eval_and: binary_constructor_sound and Val.and.
 Proof.
   red; intros until y; unfold and; case (and_match a b); intros; InvEval.
-- rewrite Val.and_commut. apply eval_andimm; auto.
-- apply eval_andimm; auto.
-- TrivialExists.
+  - rewrite Val.and_commut. apply eval_andimm; auto.
+  - apply eval_andimm; auto.
+  - TrivialExists.
 Qed.
 
 Theorem eval_orimm:
   forall n, unary_constructor_sound (orimm n) (fun x => Val.or x (Vint n)).
 Proof.
   intros; red; intros until x. unfold orimm.
+
   predSpec Int.eq Int.eq_spec n Int.zero.
-  intros. exists x; split. auto.
-  destruct x; simpl; auto. subst n. rewrite Int.or_zero. auto.
+  intros. subst. exists x; split; auto.
+  destruct x; simpl; auto. rewrite Int.or_zero; auto.
+
   predSpec Int.eq Int.eq_spec n Int.mone.
   intros. exists (Vint Int.mone); split. EvalOp.
   destruct x; simpl; auto. subst n. rewrite Int.or_mone. auto.
+
   destruct (orimm_match a); intros; InvEval.
-- TrivialExists. simpl. rewrite Int.or_commut; auto.
-- subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists.
-- TrivialExists.
+  - TrivialExists. simpl. rewrite Int.or_commut; auto.
+  - subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists.
+  - TrivialExists.
 Qed.
 
-Remark eval_same_expr:
-  forall a1 a2 le v1 v2,
-  same_expr_pure a1 a2 = true ->
-  eval_expr ge sp e m le a1 v1 ->
-  eval_expr ge sp e m le a2 v2 ->
-  a1 = a2 /\ v1 = v2.
-Proof.
-  intros until v2.
-  destruct a1; simpl; try (intros; discriminate).
-  destruct a2; simpl; try (intros; discriminate).
-  case (ident_eq i i0); intros.
-  subst i0. inversion H0. inversion H1. split. auto. congruence.
-  discriminate.
-Qed.
-
-Remark int_add_sub_eq:
-  forall x y z, Int.add x y = z -> Int.sub z x = y.
-Proof.
-  intros. subst z. rewrite Int.sub_add_l. rewrite Int.sub_idem. apply Int.add_zero_l.
-Qed.
-
-Lemma eval_or: binary_constructor_sound or Val.or.
-Proof.
-  red; intros until y; unfold or; case (or_match a b); intros.
-  (* intconst *)
-- InvEval. rewrite Val.or_commut. apply eval_orimm; auto.
-- InvEval. apply eval_orimm; auto.
-- (* shlimm - shruimm *)
-  predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize.
-  destruct (same_expr_pure t1 t2) eqn:?.
-  InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst.
-  exists (Val.ror v0 (Vint n2)); split. EvalOp.
-  destruct v0; simpl; auto.
-  destruct (Int.ltu n1 Int.iwordsize) eqn:?; auto.
-  destruct (Int.ltu n2 Int.iwordsize) eqn:?; auto.
-  simpl. rewrite <- Int.or_ror; auto.
-  InvEval. econstructor; split; eauto. EvalOp.
-  simpl. erewrite int_add_sub_eq; eauto.
-  TrivialExists.
-- (* shruimm - shlimm *)
-  predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize.
-  destruct (same_expr_pure t1 t2) eqn:?.
-  InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst.
-  exists (Val.ror v1 (Vint n2)); split. EvalOp.
-  destruct v1; simpl; auto.
-  destruct (Int.ltu n2 Int.iwordsize) eqn:?; auto.
-  destruct (Int.ltu n1 Int.iwordsize) eqn:?; auto.
-  simpl. rewrite Int.or_commut. rewrite <- Int.or_ror; auto.
-  InvEval. econstructor; split; eauto. EvalOp.
-  simpl. erewrite int_add_sub_eq; eauto.
-  rewrite Val.or_commut; auto.
-  TrivialExists.
-- (* default *)
-  TrivialExists.
+Theorem eval_or: binary_constructor_sound or Val.or.
+Proof.
+  red; intros until y; unfold or; case (or_match a b); intros; InvEval.
+  - rewrite Val.or_commut. apply eval_orimm; auto.
+  - apply eval_orimm; auto.
+  - TrivialExists.
 Qed.
 
 Theorem eval_xorimm:
   forall n, unary_constructor_sound (xorimm n) (fun x => Val.xor x (Vint n)).
 Proof.
   intros; red; intros until x. unfold xorimm.
+
   predSpec Int.eq Int.eq_spec n Int.zero.
   intros. exists x; split. auto.
   destruct x; simpl; auto. subst n. rewrite Int.xor_zero. auto.
-  destruct (xorimm_match a); intros; InvEval.
-- TrivialExists. simpl. rewrite Int.xor_commut; auto.
-- rewrite Val.xor_assoc. simpl. rewrite Int.xor_commut. TrivialExists.
-- rewrite Val.not_xor. rewrite Val.xor_assoc.
-  rewrite (Val.xor_commut (Vint Int.mone)). TrivialExists.
-- TrivialExists.
+
+  intros. destruct (xorimm_match a); intros; InvEval.
+  - TrivialExists. simpl. rewrite Int.xor_commut; auto.
+  - subst. rewrite Val.xor_assoc. simpl. rewrite Int.xor_commut. 
+    predSpec Int.eq Int.eq_spec (Int.xor n2 n) Int.zero.
+    + exists v1; split; auto. destruct v1; simpl; auto. rewrite H0, Int.xor_zero; auto.  
+    + TrivialExists.
+  - TrivialExists.
 Qed.
 
 Theorem eval_xor: binary_constructor_sound xor Val.xor.
 Proof.
   red; intros until y; unfold xor; case (xor_match a b); intros; InvEval.
-- rewrite Val.xor_commut. apply eval_xorimm; auto.
-- apply eval_xorimm; auto.
-- TrivialExists.
+  - rewrite Val.xor_commut. apply eval_xorimm; auto.
+  - apply eval_xorimm; auto.
+  - TrivialExists.
+Qed.
+
+Theorem eval_notint: unary_constructor_sound notint Val.notint.
+Proof.
+  unfold notint; red; intros. rewrite Val.not_xor. apply eval_xorimm; auto.
 Qed.
 
 Theorem eval_divs_base:
   forall le a b x y z,
-  eval_expr ge sp e m le a x ->
-  eval_expr ge sp e m le b y ->
-  Val.divs x y = Some z ->
-  exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
+    eval_expr ge sp e m le a x ->
+    eval_expr ge sp e m le b y ->
+    Val.divs x y = Some z ->
+    exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold divs_base. exists z; split. EvalOp. auto.
+  intros. unfold divs_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
-Theorem eval_divu_base:
+Theorem eval_mods_base:
   forall le a b x y z,
-  eval_expr ge sp e m le a x ->
-  eval_expr ge sp e m le b y ->
-  Val.divu x y = Some z ->
-  exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
+    eval_expr ge sp e m le a x ->
+    eval_expr ge sp e m le b y ->
+    Val.mods x y = Some z ->
+    exists v, eval_expr ge sp e m le (mods_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold divu_base. exists z; split. EvalOp. auto.
+  intros. unfold mods_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
-Theorem eval_mods_base:
+Theorem eval_divu_base:
   forall le a b x y z,
-  eval_expr ge sp e m le a x ->
-  eval_expr ge sp e m le b y ->
-  Val.mods x y = Some z ->
-  exists v, eval_expr ge sp e m le (mods_base a b) v /\ Val.lessdef z v.
+    eval_expr ge sp e m le a x ->
+    eval_expr ge sp e m le b y ->
+    Val.divu x y = Some z ->
+    exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold mods_base. exists z; split. EvalOp. auto.
+  intros. unfold divu_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_modu_base:
   forall le a b x y z,
-  eval_expr ge sp e m le a x ->
-  eval_expr ge sp e m le b y ->
-  Val.modu x y = Some z ->
-  exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v.
+    eval_expr ge sp e m le a x ->
+    eval_expr ge sp e m le b y ->
+    Val.modu x y = Some z ->
+    exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold modu_base. exists z; split. EvalOp. auto.
+  intros. unfold modu_base. exists z; split. EvalOp.
+  2: apply Val.lessdef_refl.
+  cbn.
+  rewrite H1.
+  cbn.
+  trivial.
 Qed.
 
 Theorem eval_shrximm:
   forall le a n x z,
-  eval_expr ge sp e m le a x ->
-  Val.shrx x (Vint n) = Some z ->
-  exists v, eval_expr ge sp e m le (shrximm a n) v /\ Val.lessdef z v.
+    eval_expr ge sp e m le a x ->
+    Val.shrx x (Vint n) = Some z ->
+    exists v, eval_expr ge sp e m le (shrximm a n) v /\ Val.lessdef z v.
 Proof.
   intros. unfold shrximm.
   predSpec Int.eq Int.eq_spec n Int.zero.
@@ -548,28 +574,33 @@ Proof.
   replace (Int.shrx i Int.zero) with i. auto.
   unfold Int.shrx, Int.divs. rewrite Int.shl_zero.
   change (Int.signed Int.one) with 1. rewrite Z.quot_1_r. rewrite Int.repr_signed; auto.
-  econstructor; split. EvalOp. auto.
+  econstructor; split. EvalOp.
+  cbn.
+  rewrite H0.
+  cbn.
+  reflexivity.
+  apply Val.lessdef_refl.
 Qed.
 
 Theorem eval_shl: binary_constructor_sound shl Val.shl.
 Proof.
   red; intros until y; unfold shl; case (shl_match b); intros.
-- InvEval. apply eval_shlimm; auto.
-- TrivialExists.
+  InvEval. apply eval_shlimm; auto.
+  TrivialExists.
 Qed.
 
 Theorem eval_shr: binary_constructor_sound shr Val.shr.
 Proof.
   red; intros until y; unfold shr; case (shr_match b); intros.
-- InvEval. apply eval_shrimm; auto.
-- TrivialExists.
+  InvEval. apply eval_shrimm; auto.
+  TrivialExists.
 Qed.
 
 Theorem eval_shru: binary_constructor_sound shru Val.shru.
 Proof.
   red; intros until y; unfold shru; case (shru_match b); intros.
-- InvEval. apply eval_shruimm; auto.
-- TrivialExists.
+  InvEval. apply eval_shruimm; auto.
+  TrivialExists.
 Qed.
 
 Theorem eval_negf: unary_constructor_sound negf Val.negf.
@@ -642,51 +673,41 @@ Lemma eval_compimm:
 Proof.
   intros until x.
   unfold compimm; case (compimm_match c a); intros.
-- (* constant *)
-  InvEval. rewrite sem_int. TrivialExists. simpl. destruct (intsem c0 n1 n2); auto.
-- (* eq cmp *)
-  InvEval. inv H. simpl in H5. inv H5.
-  destruct (Int.eq_dec n2 Int.zero). subst n2. TrivialExists.
-  simpl. rewrite eval_negate_condition.
-  destruct (eval_condition c0 vl m); simpl.
-  unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto.
-  rewrite sem_undef; auto.
-  destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists.
-  simpl. destruct (eval_condition c0 vl m); simpl.
-  unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto.
-  rewrite sem_undef; auto.
-  exists (Vint Int.zero); split. EvalOp.
-  destruct (eval_condition c0 vl m); simpl.
-  unfold Vtrue, Vfalse. destruct b; rewrite sem_eq; rewrite Int.eq_false; auto.
-  rewrite sem_undef; auto.
-- (* ne cmp *)
-  InvEval. inv H. simpl in H5. inv H5.
-  destruct (Int.eq_dec n2 Int.zero). subst n2. TrivialExists.
-  simpl. destruct (eval_condition c0 vl m); simpl.
-  unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto.
-  rewrite sem_undef; auto.
-  destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists.
-  simpl. rewrite eval_negate_condition. destruct (eval_condition c0 vl m); simpl.
-  unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto.
-  rewrite sem_undef; auto.
-  exists (Vint Int.one); split. EvalOp.
-  destruct (eval_condition c0 vl m); simpl.
-  unfold Vtrue, Vfalse. destruct b; rewrite sem_ne; rewrite Int.eq_false; auto.
-  rewrite sem_undef; auto.
-- (* eq andimm *)
-  destruct (Int.eq_dec n2 Int.zero). InvEval; subst.
-  econstructor; split. EvalOp. simpl; eauto.
-  destruct v1; simpl; try (rewrite sem_undef; auto). rewrite sem_eq.
-  destruct (Int.eq (Int.and i n1) Int.zero); auto.
-  TrivialExists. simpl. rewrite sem_default. auto.
-- (* ne andimm *)
-  destruct (Int.eq_dec n2 Int.zero). InvEval; subst.
-  econstructor; split. EvalOp. simpl; eauto.
-  destruct v1; simpl; try (rewrite sem_undef; auto). rewrite sem_ne.
-  destruct (Int.eq (Int.and i n1) Int.zero); auto.
-  TrivialExists. simpl. rewrite sem_default. auto.
-- (* default *)
-  TrivialExists. simpl. rewrite sem_default. auto.
+(* constant *)
+  - InvEval. rewrite sem_int. TrivialExists. simpl. destruct (intsem c0 n1 n2); auto.
+(* eq cmp *)
+  - InvEval. inv H. simpl in H5. inv H5.
+    destruct (Int.eq_dec n2 Int.zero).
+    + subst n2. TrivialExists.
+      simpl. rewrite eval_negate_condition.
+      destruct (eval_condition c0 vl m); simpl.
+      unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto.
+      rewrite sem_undef; auto.
+    + destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists.
+      simpl. destruct (eval_condition c0 vl m); simpl.
+      unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto.
+      rewrite sem_undef; auto.
+      exists (Vint Int.zero); split. EvalOp.
+      destruct (eval_condition c0 vl m); simpl.
+      unfold Vtrue, Vfalse. destruct b; rewrite sem_eq; rewrite Int.eq_false; auto.
+      rewrite sem_undef; auto.
+(* ne cmp *)
+  - InvEval. inv H. simpl in H5. inv H5.
+    destruct (Int.eq_dec n2 Int.zero).
+    + subst n2. TrivialExists.
+      simpl. destruct (eval_condition c0 vl m); simpl.
+      unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto.
+      rewrite sem_undef; auto.
+    + destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists.
+      simpl. rewrite eval_negate_condition. destruct (eval_condition c0 vl m); simpl.
+      unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto.
+      rewrite sem_undef; auto.
+      exists (Vint Int.one); split. EvalOp.
+      destruct (eval_condition c0 vl m); simpl.
+      unfold Vtrue, Vfalse. destruct b; rewrite sem_ne; rewrite Int.eq_false; auto.
+      rewrite sem_undef; auto.
+(* default *)
+  - TrivialExists. simpl. rewrite sem_default. auto.
 Qed.
 
 Hypothesis sem_swap:
@@ -744,11 +765,8 @@ Qed.
 
 Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
 Proof.
-  red; intros until x. unfold cast8unsigned. destruct (cast8unsigned_match a); intros; InvEval.
-  TrivialExists.
-  subst. rewrite Val.zero_ext_and. rewrite Val.and_assoc.
-  rewrite Int.and_commut. apply eval_andimm; auto. lia.
-  TrivialExists.
+  red; intros until x. unfold cast8unsigned.
+  rewrite Val.zero_ext_and. apply eval_andimm. lia.
 Qed.
 
 Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16).
@@ -760,47 +778,8 @@ Qed.
 
 Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16).
 Proof.
-  red; intros until x. unfold cast16unsigned. destruct (cast16unsigned_match a); intros; InvEval.
-  TrivialExists.
-  subst. rewrite Val.zero_ext_and. rewrite Val.and_assoc.
-  rewrite Int.and_commut. apply eval_andimm; auto. lia.
-  TrivialExists.
-Qed.
-
-Theorem eval_select: 
-  forall le ty cond al vl a1 v1 a2 v2 a b,
-  select ty cond al a1 a2 = Some a ->
-  eval_exprlist ge sp e m le al vl ->
-  eval_expr ge sp e m le a1 v1 ->
-  eval_expr ge sp e m le a2 v2 ->
-  eval_condition cond vl m = Some b ->
-  exists v, 
-     eval_expr ge sp e m le a v
-  /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v.
-Proof.
-  unfold select; intros. 
-  destruct (select_supported ty); try discriminate.
-  destruct (select_swap cond); inv H.
-- exists (Val.select (Some (negb b)) v2 v1 ty); split.
-  apply eval_Eop with (v2 :: v1 :: vl).
-  constructor; auto. constructor; auto.
-  simpl. rewrite eval_negate_condition, H3; auto.
-  destruct b; auto.
-- exists (Val.select (Some b) v1 v2 ty); split.
-  apply eval_Eop with (v1 :: v2 :: vl).
-  constructor; auto. constructor; auto.
-  simpl. rewrite H3; auto.
-  auto.
-Qed.
-
-Theorem eval_singleoffloat: unary_constructor_sound singleoffloat Val.singleoffloat.
-Proof.
-  red; intros. unfold singleoffloat. TrivialExists.
-Qed.
-
-Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle.
-Proof.
-  red; intros. unfold floatofsingle. TrivialExists.
+  red; intros until x. unfold cast8unsigned.
+  rewrite Val.zero_ext_and. apply eval_andimm. lia.
 Qed.
 
 Theorem eval_intoffloat:
@@ -810,17 +789,7 @@ Theorem eval_intoffloat:
   exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intoffloat. TrivialExists.
-Qed.
-
-Theorem eval_floatofint:
-  forall le a x y,
-  eval_expr ge sp e m le a x ->
-  Val.floatofint x = Some y ->
-  exists v, eval_expr ge sp e m le (floatofint a) v /\ Val.lessdef y v.
-Proof.
-  intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
-  TrivialExists.
-  TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuoffloat:
@@ -829,42 +798,8 @@ Theorem eval_intuoffloat:
   Val.intuoffloat x = Some y ->
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
-  intros. destruct x; simpl in H0; try discriminate.
-  destruct (Float.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
-  exists (Vint n); split; auto. unfold intuoffloat.
-  destruct Archi.splitlong.
-- set (im := Int.repr Int.half_modulus).
-  set (fm := Float.of_intu im).
-  assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f)).
-    constructor. auto.
-  assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar O) (Vfloat fm)).
-    constructor. auto.
-  econstructor. eauto.
-  econstructor. instantiate (1 := Vfloat fm). EvalOp.
-  eapply eval_Econdition with (va := Float.cmp Clt f fm).
-  eauto with evalexpr.
-  destruct (Float.cmp Clt f fm) eqn:?.
-  exploit Float.to_intu_to_int_1; eauto. intro EQ.
-  EvalOp. simpl. rewrite EQ; auto.
-  exploit Float.to_intu_to_int_2; eauto.
-  change Float.ox8000_0000 with im. fold fm. intro EQ.
-  set (t2 := subf (Eletvar (S O)) (Eletvar O)).
-  set (t3 := intoffloat t2).
-  exploit (eval_subf (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f) (Eletvar O)); eauto.
-  fold t2. intros [v2 [A2 B2]]. simpl in B2. inv B2.
-  exploit (eval_addimm Float.ox8000_0000 (Vfloat fm :: Vfloat f :: le) t3).
-    unfold t3. unfold intoffloat. EvalOp. simpl. rewrite EQ. simpl. eauto.
-  intros [v4 [A4 B4]]. simpl in B4. inv B4.
-  rewrite Int.sub_add_opp in A4. rewrite Int.add_assoc in A4.
-  rewrite (Int.add_commut (Int.neg im)) in A4.
-  rewrite Int.add_neg_zero in A4.
-  rewrite Int.add_zero in A4.
-  auto.
-- apply Float.to_intu_to_long in Heqo. repeat econstructor. eauto.
-  simpl. rewrite Heqo; reflexivity.
-  simpl. unfold Int64.loword. rewrite Int64.unsigned_repr, Int.repr_unsigned; auto.
-  assert (Int.modulus < Int64.max_unsigned) by reflexivity. 
-  generalize (Int.unsigned_range n); lia.
+  intros; unfold intuoffloat. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_floatofintu:
@@ -874,27 +809,21 @@ Theorem eval_floatofintu:
   exists v, eval_expr ge sp e m le (floatofintu a) v /\ Val.lessdef y v.
 Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros.
-- InvEval. TrivialExists.
-- destruct x; simpl in H0; try discriminate. inv H0.
-  exists (Vfloat (Float.of_intu i)); split; auto.
-  destruct Archi.splitlong.
-+ econstructor. eauto.
-  set (fm := Float.of_intu Float.ox8000_0000).
-  assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)).
-    constructor. auto.
-  eapply eval_Econdition with (va := Int.ltu i Float.ox8000_0000).
-  eauto with evalexpr.
-  destruct (Int.ltu i Float.ox8000_0000) eqn:?.
-  rewrite Float.of_intu_of_int_1; auto.
-  unfold floatofint. EvalOp.
-  exploit (eval_addimm (Int.neg Float.ox8000_0000) (Vint i :: le) (Eletvar 0)); eauto.
-  simpl. intros [v [A B]]. inv B.
-  unfold addf. EvalOp.
-  constructor. unfold floatofint. EvalOp. simpl; eauto.
-  constructor. EvalOp. simpl; eauto. constructor. simpl; eauto.
-  fold fm. rewrite Float.of_intu_of_int_2; auto.
-  rewrite Int.sub_add_opp. auto.
-+ rewrite Float.of_intu_of_long. repeat econstructor. eauto. reflexivity. 
+  InvEval. simpl in H0. TrivialExists.
+  TrivialExists.
+  cbn. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_floatofint:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.floatofint x = Some y ->
+  exists v, eval_expr ge sp e m le (floatofint a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold floatofint. case (floatofint_match a); intros.
+  InvEval. simpl in H0. TrivialExists.
+  TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intofsingle:
@@ -904,6 +833,7 @@ Theorem eval_intofsingle:
   exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
 Proof.
   intros; unfold intofsingle. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofint:
@@ -912,9 +842,8 @@ Theorem eval_singleofint:
   Val.singleofint x = Some y ->
   exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
 Proof.
-  intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
-  TrivialExists.
-  TrivialExists.
+  intros; unfold singleofint; TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_intuofsingle:
@@ -923,12 +852,8 @@ Theorem eval_intuofsingle:
   Val.intuofsingle x = Some y ->
   exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
 Proof.
-  intros. destruct x; simpl in H0; try discriminate.
-  destruct (Float32.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
-  unfold intuofsingle. apply eval_intuoffloat with (Vfloat (Float.of_single f)).
-  unfold floatofsingle. EvalOp.
-  simpl. change (Float.of_single f) with (Float32.to_double f).
-  erewrite Float32.to_intu_double; eauto. auto.
+  intros; unfold intuofsingle. TrivialExists.
+  cbn. rewrite H0. reflexivity.
 Qed.
 
 Theorem eval_singleofintu:
@@ -937,14 +862,146 @@ Theorem eval_singleofintu:
   Val.singleofintu x = Some y ->
   exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
 Proof.
-  intros until y; unfold singleofintu. case (singleofintu_match a); intros.
-  InvEval. TrivialExists.
-  destruct x; simpl in H0; try discriminate. inv H0.
-  exploit eval_floatofintu. eauto. simpl. reflexivity.
-  intros (v & A & B).
-  exists (Val.singleoffloat v); split.
-  unfold singleoffloat; EvalOp.
-  inv B; simpl. rewrite Float32.of_intu_double. auto.
+  intros; unfold intuofsingle. TrivialExists.
+  cbn. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_singleoffloat: unary_constructor_sound singleoffloat Val.singleoffloat.
+Proof.
+  red; intros. unfold singleoffloat. TrivialExists.
+Qed.
+
+Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle.
+Proof.
+  red; intros. unfold floatofsingle. TrivialExists.
+Qed.
+
+Lemma mod_small_negative:
+  forall a modulus,
+    modulus > 0 -> -modulus < a < 0 -> a mod modulus = a + modulus.
+Proof.
+  intros.
+  replace (a mod modulus) with ((a + modulus) mod modulus).
+  apply Z.mod_small.
+  lia.
+  rewrite <- Zplus_mod_idemp_r.
+  rewrite Z.mod_same by lia.
+  rewrite Z.add_0_r.
+  reflexivity.
+Qed.
+
+Remark normalize_low_long: forall
+  (PTR64 : Archi.ptr64 = true) v1,
+    Val.loword (Val.normalize (Val.longofint v1) Tlong) = Val.normalize v1 Tint.
+Proof.
+  intros.
+  destruct v1; cbn; try rewrite PTR64; trivial.
+  f_equal.
+  unfold Int64.loword.
+  unfold Int.signed.
+  destruct zlt.
+  { rewrite Int64.int_unsigned_repr.
+    apply Int.repr_unsigned.
+  }
+  pose proof (Int.unsigned_range i).
+  rewrite Int64.unsigned_repr_eq.
+  replace ((Int.unsigned i - Int.modulus) mod Int64.modulus)
+    with (Int64.modulus + Int.unsigned i - Int.modulus).
+  {
+    rewrite <- (Int.repr_unsigned i) at 2.
+    apply Int.eqm_samerepr.
+    unfold Int.eqm, eqmod.
+    change Int.modulus with 4294967296 in *.
+    change Int64.modulus with 18446744073709551616 in *.
+    exists 4294967295.
+    lia.
+  }
+  { rewrite mod_small_negative.
+    lia.
+    constructor.
+    constructor.
+    change Int.modulus with 4294967296 in *.
+    change Int.half_modulus with 2147483648 in *.
+    change Int64.modulus with 18446744073709551616 in *.
+    lia.
+    lia.
+  }
+Qed.
+
+Lemma same_expr_pure_correct:
+  forall le a1 a2 v1 v2
+    (PURE : same_expr_pure a1 a2 = true)
+    (EVAL1 : eval_expr ge sp e m le a1 v1)
+    (EVAL2 : eval_expr ge sp e m le a2 v2),
+    v1 = v2.
+Proof.
+  intros.
+  destruct a1; destruct a2; cbn in *; try discriminate.
+  inv EVAL1. inv EVAL2.
+  destruct (ident_eq i i0); congruence.
+Qed.
+  
+Theorem eval_select:
+  forall le ty cond al vl a1 v1 a2 v2 a b,
+  select ty cond al a1 a2 = Some a ->
+  eval_exprlist ge sp e m le al vl ->
+  eval_expr ge sp e m le a1 v1 ->
+  eval_expr ge sp e m le a2 v2 ->
+  eval_condition cond vl m = Some b ->
+  exists v, 
+     eval_expr ge sp e m le a v
+  /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v.
+Proof.
+  unfold select; intros.
+  pose proof (same_expr_pure_correct le a1 a2 v1 v2) as PURE.
+  destruct (same_expr_pure a1 a2).
+  { rewrite <- PURE by auto.
+    inv H.
+    exists v1. split. assumption.
+    unfold Val.select.
+    destruct b; apply Val.lessdef_normalize.
+  }
+  clear PURE.
+  destruct Archi.ptr64 eqn:PTR64.
+  2: discriminate.
+  destruct ty; cbn in *; try discriminate.
+  - (* Tint *)
+    inv H. TrivialExists.
+    + cbn. repeat econstructor; eassumption.
+    + cbn. f_equal. rewrite ExtValues.normalize_select01.
+      rewrite H3. destruct b.
+      * rewrite ExtValues.select01_long_true. apply normalize_low_long; assumption.
+      * rewrite ExtValues.select01_long_false. apply normalize_low_long; assumption.
+        
+  - (* Tfloat *)
+    inv H. TrivialExists.
+    + cbn. repeat econstructor; eassumption.
+    + cbn. f_equal. rewrite ExtValues.normalize_select01.
+      rewrite H3. destruct b.
+      * rewrite ExtValues.select01_long_true.
+        apply ExtValues.float_bits_normalize.
+      * rewrite ExtValues.select01_long_false.
+        apply ExtValues.float_bits_normalize.
+
+  - (* Tlong *)
+    inv H. TrivialExists.
+    + cbn. repeat econstructor; eassumption.
+    + cbn. f_equal. rewrite ExtValues.normalize_select01.
+      rewrite H3. destruct b.
+      * rewrite ExtValues.select01_long_true. reflexivity.
+      * rewrite ExtValues.select01_long_false. reflexivity.
+
+  - (* Tsingle *)
+    inv H. TrivialExists.
+    + cbn. repeat econstructor; eassumption.
+    + cbn. f_equal. rewrite ExtValues.normalize_select01.
+      rewrite H3. destruct b.
+      * rewrite ExtValues.select01_long_true.
+        rewrite normalize_low_long by assumption.
+        apply ExtValues.single_bits_normalize.
+      * rewrite ExtValues.select01_long_false.
+        rewrite normalize_low_long by assumption.
+        apply ExtValues.single_bits_normalize.
 Qed.
 
 Theorem eval_addressing:
@@ -957,62 +1014,43 @@ Theorem eval_addressing:
     eval_addressing ge sp mode vl = Some v
   end.
 Proof.
-  intros until ofs.
-  assert (A: v = Vptr b ofs -> eval_addressing ge sp (Aindexed 0) (v :: nil) = Some v).
-  { intros. subst v. unfold eval_addressing.
-    destruct Archi.ptr64 eqn:SF; simpl; rewrite SF; rewrite Ptrofs.add_zero; auto. }
-  assert (D: forall a,
-             eval_expr ge sp e m le a v ->
-             v = Vptr b ofs ->
-             exists vl, eval_exprlist ge sp e m le (a ::: Enil) vl
-                     /\ eval_addressing ge sp (Aindexed 0) vl = Some v).
-  { intros. exists (v :: nil); split. constructor; auto. constructor. auto. }
-  unfold addressing; case (addressing_match a); intros.
-- destruct (negb Archi.ptr64 && addressing_valid addr) eqn:E.
-+ inv H. InvBooleans. apply negb_true_iff in H. unfold eval_addressing; rewrite H.
-  exists vl; auto.
-+ apply D; auto.
-- destruct (Archi.ptr64 && addressing_valid addr) eqn:E.
-+ inv H. InvBooleans. unfold eval_addressing; rewrite H.
-  exists vl; auto.
-+ apply D; auto.
-- apply D; auto.
-Qed.
-
-Theorem eval_builtin_arg_addr:
-  forall addr al vl v,
-  eval_exprlist ge sp e m nil al vl ->
-  Op.eval_addressing ge sp addr vl = Some v ->
-  CminorSel.eval_builtin_arg ge sp e m (builtin_arg_addr addr al) v.
-Proof.
-  intros until v. unfold builtin_arg_addr; case (builtin_arg_addr_match addr al); intros; InvEval.
-- set (v2 := if Archi.ptr64 then Vlong (Int64.repr n) else Vint (Int.repr n)).
-  assert (EQ: v = if Archi.ptr64 then Val.addl v1 v2 else Val.add v1 v2).
-  { unfold Op.eval_addressing in H0; unfold v2; destruct Archi.ptr64; simpl in H0; inv H0; auto. }
-  rewrite EQ. constructor. constructor; auto. unfold v2; destruct Archi.ptr64; constructor.
-- rewrite eval_addressing_Aglobal in H0. inv H0. constructor.
-- rewrite eval_addressing_Ainstack in H0. inv H0. constructor.
-- constructor. econstructor. eauto. rewrite eval_Olea_ptr. auto. 
-Qed. 
+  intros until v. unfold addressing; case (addressing_match a); intros; InvEval.
+  - exists (@nil val);  split. eauto with evalexpr. simpl. auto.
+  - destruct (Archi.pic_code tt).
+  + exists (Vptr b ofs0 :: nil); split.
+    constructor. EvalOp. simpl. congruence. constructor. simpl. rewrite Ptrofs.add_zero. congruence.
+  + exists (@nil val); split. constructor. simpl; auto.
+  - exists (v1 :: nil); split. eauto with evalexpr. simpl.
+    destruct v1; simpl in H; try discriminate. destruct Archi.ptr64 eqn:SF; inv H. 
+    simpl. auto.
+  - exists (v1 :: nil); split. eauto with evalexpr. simpl.
+    destruct v1; simpl in H; try discriminate. destruct Archi.ptr64 eqn:SF; inv H. 
+    simpl. auto.
+  - exists (v :: nil);  split. eauto with evalexpr. subst. simpl. rewrite Ptrofs.add_zero; auto.
+Qed.
 
 Theorem eval_builtin_arg:
   forall a v,
   eval_expr ge sp e m nil a v ->
   CminorSel.eval_builtin_arg ge sp e m (builtin_arg a) v.
 Proof.
-  intros until v. unfold builtin_arg; case (builtin_arg_match a); intros; InvEval.
-- constructor.
-- constructor.
-- destruct Archi.ptr64 eqn:SF. 
+  intros until v. unfold builtin_arg; case (builtin_arg_match a); intros.
+- InvEval. constructor.
+- InvEval. constructor.
+- InvEval. constructor.
+- InvEval. simpl in H5. inv H5. constructor.
+- InvEval. subst v. constructor; auto.
+- inv H. InvEval. simpl in H6; inv H6. constructor; auto.
+- destruct Archi.ptr64 eqn:SF.
 + constructor; auto.
-+ inv H. eapply eval_builtin_arg_addr. eauto. unfold Op.eval_addressing; rewrite SF; assumption.
-- destruct Archi.ptr64 eqn:SF. 
-+ inv H. eapply eval_builtin_arg_addr. eauto. unfold Op.eval_addressing; rewrite SF; assumption.
++ InvEval. replace v with (if Archi.ptr64 then Val.addl v1 (Vint n) else Val.add v1 (Vint n)).
+  repeat constructor; auto.
+  rewrite SF; auto.
+- destruct Archi.ptr64 eqn:SF.
++ InvEval. replace v with (if Archi.ptr64 then Val.addl v1 (Vlong n) else Val.add v1 (Vlong n)).
+  repeat constructor; auto.
+  rewrite SF; auto.
 + constructor; auto.
-- simpl in H5. inv H5. constructor.
-- constructor; auto.
-- inv H. InvEval. rewrite eval_addressing_Aglobal in H6. inv H6. constructor; auto.
-- inv H. InvEval. rewrite eval_addressing_Ainstack in H6. inv H6. constructor; auto.
 - constructor; auto.
 Qed.
 
@@ -1046,7 +1084,10 @@ Theorem eval_platform_builtin:
   platform_builtin_sem bf vl = Some v ->
   exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
 Proof.
-  intros. discriminate.
+  destruct bf; intros until le; intro Heval.
+  all: try (inversion Heval; subst a; clear Heval;
+       exists v; split; trivial;
+       repeat (try econstructor; try eassumption)).
 Qed.
 
 End CMCONSTR.
diff --git a/verilog/Stacklayout.v b/verilog/Stacklayout.v
index 002b86bf..25f02aab 100644
--- a/verilog/Stacklayout.v
+++ b/verilog/Stacklayout.v
@@ -15,32 +15,31 @@
 Require Import Coqlib.
 Require Import AST Memory Separation.
 Require Import Bounds.
-Require Archi.
 
 Local Open Scope sep_scope.
 
 (** The general shape of activation records is as follows,
   from bottom (lowest offsets) to top:
-- For the Win64 ABI: 32 reserved bytes
 - Space for outgoing arguments to function calls.
 - Back link to parent frame
-- Saved values of integer callee-save registers used by the function.
-- Saved values of float callee-save registers used by the function.
+- Return address
+- Saved values of callee-save registers used by the function.
 - Local stack slots.
-- Space for the stack-allocated data declared in Cminor
-- Return address.
+- Space for the stack-allocated data declared in Cminor.
+
+The stack pointer is kept 16-aligned.
 *)
 
-Definition fe_ofs_arg := if Archi.win64 then 32 else 0.
+Definition fe_ofs_arg := 0.
 
 Definition make_env (b: bounds) : frame_env :=
   let w := if Archi.ptr64 then 8 else 4 in
-  let olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w in  (* back link *)
-  let ocs := olink + w in                           (* callee-saves *)
+  let olink := align (4 * b.(bound_outgoing)) w in  (* back link *)
+  let oretaddr := olink + w in                      (* return address *)
+  let ocs := oretaddr + w in                        (* callee-saves *)
   let ol :=  align (size_callee_save_area b ocs) 8 in (* locals *)
   let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *)
-  let oretaddr := align (ostkdata + b.(bound_stack_data)) w in (* return address *)
-  let sz := oretaddr + w in (* total size *)
+  let sz := align (ostkdata + b.(bound_stack_data)) 16 in
   {| fe_size := sz;
      fe_ofs_link := olink;
      fe_ofs_retaddr := oretaddr;
@@ -63,42 +62,39 @@ Proof.
 Local Opaque Z.add Z.mul sepconj range.
   intros; simpl.
   set (w := if Archi.ptr64 then 8 else 4).
-  set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w).
-  set (ocs := olink + w).
+  set (olink := align (4 * b.(bound_outgoing)) w).
+  set (oretaddr := olink + w).
+  set (ocs := oretaddr + w).
   set (ol :=  align (size_callee_save_area b ocs) 8).
   set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
-  set (oretaddr := align (ostkdata + b.(bound_stack_data)) w).
   replace (size_chunk Mptr) with w by (rewrite size_chunk_Mptr; auto).
   assert (0 < w) by (unfold w; destruct Archi.ptr64; lia).
   generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
-  assert (0 <= fe_ofs_arg) by (unfold fe_ofs_arg; destruct Archi.win64; lia).
   assert (0 <= 4 * b.(bound_outgoing)) by lia.
-  assert (fe_ofs_arg + 4 * b.(bound_outgoing) <= olink) by (apply align_le; lia).
-  assert (olink + w <= ocs) by (unfold ocs; lia).
-  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
+  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia).
+  assert (olink + w <= oretaddr) by (unfold oretaddr; lia).
+  assert (oretaddr + w <= ocs) by (unfold ocs; lia).
+  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). 
   assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia).
   assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia).
-  assert (ostkdata + bound_stack_data b <= oretaddr) by (apply align_le; lia).
 (* Reorder as:
      outgoing
      back link
+     retaddr
      callee-save
-     local
-     retaddr *)
+     local *)
   rewrite sep_swap12.
   rewrite sep_swap23.
-  rewrite sep_swap45.
   rewrite sep_swap34.
+  rewrite sep_swap45.
 (* Apply range_split and range_split2 repeatedly *)
-  apply range_drop_left with 0. lia. 
-  apply range_split_2. fold olink. lia. lia.
+  unfold fe_ofs_arg.
+  apply range_split_2. fold olink; lia. lia.
+  apply range_split. lia.
   apply range_split. lia.
   apply range_split_2. fold ol. lia. lia.
   apply range_drop_right with ostkdata. lia.
-  rewrite sep_swap.
-  apply range_drop_left with (ostkdata + bound_stack_data b). lia.
-  rewrite sep_swap.
-  exact H.
+  eapply sep_drop2. eexact H.
 Qed.
 
 Lemma frame_env_range:
@@ -108,22 +104,21 @@ Lemma frame_env_range:
 Proof.
   intros; simpl.
   set (w := if Archi.ptr64 then 8 else 4).
-  set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w).
-  set (ocs := olink + w).
+  set (olink := align (4 * b.(bound_outgoing)) w).
+  set (oretaddr := olink + w).
+  set (ocs := oretaddr + w).
   set (ol :=  align (size_callee_save_area b ocs) 8).
   set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
-  set (oretaddr := align (ostkdata + b.(bound_stack_data)) w).
   assert (0 < w) by (unfold w; destruct Archi.ptr64; lia).
   generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
-  assert (0 <= fe_ofs_arg) by (unfold fe_ofs_arg; destruct Archi.win64; lia).
   assert (0 <= 4 * b.(bound_outgoing)) by lia.
-  assert (fe_ofs_arg + 4 * b.(bound_outgoing) <= olink) by (apply align_le; lia).
-  assert (olink + w <= ocs) by (unfold ocs; lia).
-  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
+  assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia).
+  assert (olink + w <= oretaddr) by (unfold oretaddr; lia).
+  assert (oretaddr + w <= ocs) by (unfold ocs; lia).
+  assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). 
   assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia).
   assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia).
-  assert (ostkdata + bound_stack_data b <= oretaddr) by (apply align_le; lia).
-  split. lia. lia.
+  split. lia. apply align_le. lia. 
 Qed.
 
 Lemma frame_env_aligned:
@@ -137,16 +132,16 @@ Lemma frame_env_aligned:
 Proof.
   intros; simpl.
   set (w := if Archi.ptr64 then 8 else 4).
-  set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w).
-  set (ocs := olink + w).
+  set (olink := align (4 * b.(bound_outgoing)) w).
+  set (oretaddr := olink + w).
+  set (ocs := oretaddr + w).
   set (ol :=  align (size_callee_save_area b ocs) 8).
   set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
-  set (oretaddr := align (ostkdata + b.(bound_stack_data)) w).
   assert (0 < w) by (unfold w; destruct Archi.ptr64; lia).
   replace (align_chunk Mptr) with w by (rewrite align_chunk_Mptr; auto).
-  split. exists (fe_ofs_arg / 8). unfold fe_ofs_arg; destruct Archi.win64; reflexivity.
+  split. apply Z.divide_0_r.
   split. apply align_divides; lia.
   split. apply align_divides; lia.
   split. apply align_divides; lia.
-  apply align_divides; lia.
+  apply Z.divide_add_r. apply align_divides; lia. apply Z.divide_refl.
 Qed.
diff --git a/verilog/TargetPrinter.ml b/verilog/TargetPrinter.ml
index 00e70f65..aab6b9b8 100644
--- a/verilog/TargetPrinter.ml
+++ b/verilog/TargetPrinter.ml
@@ -3,14 +3,19 @@
 (*              The Compcert verified compiler                         *)
 (*                                                                     *)
 (*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*           Prashanth Mundkur, SRI International                      *)
 (*                                                                     *)
 (*  Copyright Institut National de Recherche en Informatique et en     *)
 (*  Automatique.  All rights reserved.  This file is distributed       *)
 (*  under the terms of the INRIA Non-Commercial License Agreement.     *)
 (*                                                                     *)
+(*  The contributions by Prashanth Mundkur are reused and adapted      *)
+(*  under the terms of a Contributor License Agreement between         *)
+(*  SRI International and INRIA.                                       *)
+(*                                                                     *)
 (* *********************************************************************)
 
-(* Printing x86-64 assembly code in asm syntax *)
+(* Printing RISC-V assembly code in asm syntax *)
 
 open Printf
 open Camlcoq
@@ -21,1003 +26,642 @@ open AisAnnot
 open PrintAsmaux
 open Fileinfo
 
-module StringSet = Set.Make(String)
-
-(* Basic printing functions used in definition of the systems *)
-
-let int64_reg_name = function
-  | RAX -> "%rax"  | RBX -> "%rbx"  | RCX -> "%rcx"  | RDX -> "%rdx"
-  | RSI -> "%rsi"  | RDI -> "%rdi"  | RBP -> "%rbp"  | RSP -> "%rsp"
-  | R8  -> "%r8"   | R9  -> "%r9"   | R10 -> "%r10"  | R11 -> "%r11"
-  | R12 -> "%r12"  | R13 -> "%r13"  | R14 -> "%r14"  | R15 -> "%r15"
-
-let int32_reg_name = function
-  | RAX -> "%eax"  | RBX -> "%ebx"  | RCX -> "%ecx"  | RDX -> "%edx"
-  | RSI -> "%esi"  | RDI -> "%edi"  | RBP -> "%ebp"  | RSP -> "%esp"
-  | R8  -> "%r8d"  | R9  -> "%r9d"  | R10 -> "%r10d" | R11 -> "%r11d"
-  | R12 -> "%r12d" | R13 -> "%r13d" | R14 -> "%r14d" | R15 -> "%r15d"
-
-let int8_reg_name = function
-  | RAX -> "%al"  | RBX -> "%bl"  | RCX -> "%cl"  | RDX -> "%dl"
-  | RSI -> "%sil" | RDI -> "%dil" | RBP -> "%bpl" | RSP -> "%spl"
-  | R8  -> "%r8b"  | R9  -> "%r9b"  | R10 -> "%r10b" | R11 -> "%r11b"
-  | R12 -> "%r12b" | R13 -> "%r13b" | R14 -> "%r14b" | R15 -> "%r15b"
-
-let int16_reg_name = function
-  | RAX -> "%ax"  | RBX -> "%bx"  | RCX -> "%cx"  | RDX -> "%dx"
-  | RSI -> "%si"  | RDI -> "%di"  | RBP -> "%bp"  | RSP -> "%sp"
-  | R8  -> "%r8w"  | R9  -> "%r9w"  | R10 -> "%r10w" | R11 -> "%r11w"
-  | R12 -> "%r12w" | R13 -> "%r13w" | R14 -> "%r14w" | R15 -> "%r15w"
-
-let float_reg_name = function
-  | XMM0 -> "%xmm0"  | XMM1 -> "%xmm1"  | XMM2 -> "%xmm2"  | XMM3 -> "%xmm3"
-  | XMM4 -> "%xmm4"  | XMM5 -> "%xmm5"  | XMM6 -> "%xmm6"  | XMM7 -> "%xmm7"
-  | XMM8 -> "%xmm8"  | XMM9 -> "%xmm9"  | XMM10 -> "%xmm10" | XMM11 -> "%xmm11"
-  | XMM12 -> "%xmm12" | XMM13 -> "%xmm13" | XMM14 -> "%xmm14" | XMM15 -> "%xmm15"
-
-let ireg8 oc r = output_string oc (int8_reg_name r)
-let ireg16 oc r = output_string oc (int16_reg_name r)
-let ireg32 oc r = output_string oc (int32_reg_name r)
-let ireg64 oc r = output_string oc (int64_reg_name r)
-let ireg = if Archi.ptr64 then ireg64 else ireg32
-let freg oc r = output_string oc (float_reg_name r)
-
-let preg_asm oc ty = function
-  | IR r -> if ty = Tlong then ireg64 oc r else ireg32 oc r
-  | FR r -> freg oc r
-  | _    -> assert false
-
-let preg_annot = function
-  | IR r -> if Archi.ptr64 then int64_reg_name r else int32_reg_name r
-  | FR r -> float_reg_name r
-  | _ -> assert false
-
-let ais_int64_reg_name = function
-  | RAX -> "rax"  | RBX -> "rbx"  | RCX -> "rcx"  | RDX -> "rdx"
-  | RSI -> "rsi"  | RDI -> "rdi"  | RBP -> "rbp"  | RSP -> "rsp"
-  | R8  -> "r8"   | R9  -> "r9"   | R10 -> "r10"  | R11 -> "r11"
-  | R12 -> "r12"  | R13 -> "r13"  | R14 -> "r14"  | R15 -> "r15"
-
-let ais_int32_reg_name = function
-  | RAX -> "eax"  | RBX -> "ebx"  | RCX -> "ecx"  | RDX -> "edx"
-  | RSI -> "esi"  | RDI -> "edi"  | RBP -> "ebp"  | RSP -> "esp"
-  | R8  -> "r8d"  | R9  -> "r9d"  | R10 -> "r10d" | R11 -> "r11d"
-  | R12 -> "r12d" | R13 -> "r13d" | R14 -> "r14d" | R15 -> "r15d"
-
-let preg_ais_annot = function
-  | IR r -> if Archi.ptr64 then ais_int64_reg_name r else ais_int32_reg_name r
-  | FR r -> float_reg_name r
-  | _ -> assert false
-
-let z oc n = output_string oc (Z.to_string n)
-
-(* 32/64 bit dependencies *)
-
-let data_pointer = if Archi.ptr64 then ".quad" else ".long"
-
-(* Base-2 log of a Caml integer *)
-let rec log2 n =
-  assert (n > 0);
-  if n = 1 then 0 else 1 + log2 (n lsr 1)
-
-(* System dependent printer functions *)
-module type SYSTEM =
-    sig
-      val comment: string
-      val raw_symbol: out_channel -> string -> unit
-      val symbol: out_channel -> P.t -> unit
-      val label: out_channel -> int -> unit
-      val name_of_section: section_name -> string
-      val stack_alignment: int
-      val print_align: out_channel -> int -> unit
-      val print_mov_rs: out_channel -> ireg -> ident -> unit
-      val print_fun_info:  out_channel -> P.t -> unit
-      val print_var_info: out_channel -> P.t -> unit
-      val print_epilogue: out_channel -> unit
-      val print_comm_decl: out_channel -> P.t -> Z.t -> int -> unit
-      val print_lcomm_decl: out_channel -> P.t -> Z.t -> int -> unit
-    end
-
-(* Printer functions for ELF *)
-module ELF_System : SYSTEM =
+(* Module containing the printing functions *)
+
+module Target : TARGET =
   struct
 
-    (* The comment delimiter *)
-    let comment = "#"
+(* Basic printing functions *)
 
-    let raw_symbol oc s =
-      fprintf oc "%s" s
+    let comment = "#"
 
-    let symbol = elf_symbol
+    let symbol        = elf_symbol
+    let symbol_offset = elf_symbol_offset
+    let label         = elf_label
+
+    let print_label oc lbl = label oc (transl_label lbl)
+
+    let use_abi_name = false
+
+    let int_reg_num_name = function
+                     | X1  -> "x1"  | X2  -> "x2"  | X3  -> "x3"
+      | X4  -> "x4"  | X5  -> "x5"  | X6  -> "x6"  | X7  -> "x7"
+      | X8  -> "x8"  | X9  -> "x9"  | X10 -> "x10" | X11 -> "x11"
+      | X12 -> "x12" | X13 -> "x13" | X14 -> "x14" | X15 -> "x15"
+      | X16 -> "x16" | X17 -> "x17" | X18 -> "x18" | X19 -> "x19"
+      | X20 -> "x20" | X21 -> "x21" | X22 -> "x22" | X23 -> "x23"
+      | X24 -> "x24" | X25 -> "x25" | X26 -> "x26" | X27 -> "x27"
+      | X28 -> "x28" | X29 -> "x29" | X30 -> "x30" | X31 -> "x31"
+
+    let int_reg_abi_name = function
+                     | X1  -> "ra"  | X2  -> "sp"  | X3  -> "gp"
+      | X4  -> "tp"  | X5  -> "t0"  | X6  -> "t1"  | X7  -> "t2"
+      | X8  -> "s0"  | X9  -> "s1"  | X10 -> "a0"  | X11 -> "a1"
+      | X12 -> "a2"  | X13 -> "a3"  | X14 -> "a4"  | X15 -> "a5"
+      | X16 -> "a6"  | X17 -> "a7"  | X18 -> "s2"  | X19 -> "s3"
+      | X20 -> "s4"  | X21 -> "s5"  | X22 -> "s6"  | X23 -> "s7"
+      | X24 -> "s8"  | X25 -> "s9"  | X26 -> "s10" | X27 -> "s11"
+      | X28 -> "t3"  | X29 -> "t4"  | X30 -> "t5"  | X31 -> "t6"
+
+    let float_reg_num_name = function
+      | F0  -> "f0"  | F1  -> "f1"  | F2  -> "f2"  | F3  -> "f3"
+      | F4  -> "f4"  | F5  -> "f5"  | F6  -> "f6"  | F7  -> "f7"
+      | F8  -> "f8"  | F9  -> "f9"  | F10 -> "f10" | F11 -> "f11"
+      | F12 -> "f12" | F13 -> "f13" | F14 -> "f14" | F15 -> "f15"
+      | F16 -> "f16" | F17 -> "f17" | F18 -> "f18" | F19 -> "f19"
+      | F20 -> "f20" | F21 -> "f21" | F22 -> "f22" | F23 -> "f23"
+      | F24 -> "f24" | F25 -> "f25" | F26 -> "f26" | F27 -> "f27"
+      | F28 -> "f28" | F29 -> "f29" | F30 -> "f30" | F31 -> "f31"
+
+    let float_reg_abi_name = function
+      | F0  -> "ft0" | F1  -> "ft1" | F2  -> "ft2" | F3  -> "ft3"
+      | F4  -> "ft4" | F5  -> "ft5" | F6  -> "ft6" | F7  -> "ft7"
+      | F8  -> "fs0" | F9  -> "fs1" | F10 -> "fa0" | F11 -> "fa1"
+      | F12 -> "fa2" | F13 -> "fa3" | F14 -> "fa4" | F15 -> "fa5"
+      | F16 -> "fa6" | F17 -> "fa7" | F18 -> "fs2" | F19 -> "fs3"
+      | F20 -> "fs4" | F21 -> "fs5" | F22 -> "fs6" | F23 -> "fs7"
+      | F24 -> "fs8" | F25 -> "fs9" | F26 ->"fs10" | F27 -> "fs11"
+      | F28 -> "ft3" | F29 -> "ft4" | F30 -> "ft5" | F31 -> "ft6"
+
+    let int_reg_name   = if use_abi_name then int_reg_abi_name   else int_reg_num_name
+    let float_reg_name = if use_abi_name then float_reg_abi_name else float_reg_num_name
+
+    let ireg oc r = output_string oc (int_reg_name r)
+    let freg oc r = output_string oc (float_reg_name r)
+
+    let ireg0 oc = function
+      | X0 -> output_string oc "x0"
+      | X r -> ireg oc r
+
+    let preg_asm oc ty = function
+      | IR r -> ireg oc r
+      | FR r -> freg oc r
+      | _    -> assert false
+
+    let preg_annot = function
+      | IR r -> int_reg_name r
+      | FR r -> float_reg_name r
+      | _ -> assert false
 
-    let label = elf_label
+(* Names of sections *)
 
     let name_of_section = function
-      | Section_text -> ".text"
+      | Section_text         -> ".text"
       | Section_data(i, true) ->
          failwith "_Thread_local unsupported on this platform"
       | Section_data(i, false) | Section_small_data i ->
           variable_section ~sec:".data" ~bss:".bss" i
       | Section_const i | Section_small_const i ->
           variable_section ~sec:".section	.rodata" i
-      | Section_string -> ".section	.rodata"
-      | Section_literal -> ".section	.rodata.cst8,\"aM\",@progbits,8"
-      | Section_jumptable -> ".text"
+      | Section_string       -> ".section	.rodata"
+      | Section_literal      -> ".section	.rodata"
+      | Section_jumptable    -> ".section	.rodata"
+      | Section_debug_info _ -> ".section	.debug_info,\"\",%progbits"
+      | Section_debug_loc    -> ".section	.debug_loc,\"\",%progbits"
+      | Section_debug_abbrev -> ".section	.debug_abbrev,\"\",%progbits"
+      | Section_debug_line _ -> ".section	.debug_line,\"\",%progbits"
+      | Section_debug_ranges -> ".section	.debug_ranges,\"\",%progbits"
+      | Section_debug_str    -> ".section	.debug_str,\"MS\",%progbits,1"
       | Section_user(s, wr, ex) ->
-          sprintf ".section	\"%s\",\"a%s%s\",@progbits"
+          sprintf ".section	\"%s\",\"a%s%s\",%%progbits"
             s (if wr then "w" else "") (if ex then "x" else "")
-      | Section_debug_info _ -> ".section	.debug_info,\"\",@progbits"
-      | Section_debug_loc -> ".section	.debug_loc,\"\",@progbits"
-      | Section_debug_line _ -> ".section	.debug_line,\"\",@progbits"
-      | Section_debug_abbrev -> ".section	.debug_abbrev,\"\",@progbits"
-      | Section_debug_ranges -> ".section	.debug_ranges,\"\",@progbits"
-      | Section_debug_str -> ".section	.debug_str,\"MS\",@progbits,1"
       | Section_ais_annotation -> sprintf ".section	\"__compcert_ais_annotations\",\"\",@note"
 
-    let stack_alignment = 16
-
-    let print_align oc n =
-      fprintf oc "	.align	%d\n" n
-
-    let print_mov_rs oc rd id =
-      if Archi.ptr64
-      then fprintf oc "	movq    %a@GOTPCREL(%%rip), %a\n" symbol id ireg64 rd
-      else fprintf oc "	movl	$%a, %a\n" symbol id ireg32 rd
-
-    let print_fun_info = elf_print_fun_info
-
-    let print_var_info = elf_print_var_info
-
-    let print_atexit oc to_be_called =
-      if Archi.ptr64
-      then
-        begin
-          fprintf oc "	leaq	%s(%%rip), %%rdi\n" to_be_called;
-	  fprintf oc "	jmp	atexit\n"
-        end
-      else
-        begin
-          fprintf oc "	pushl	$%s\n" to_be_called;
-          fprintf oc "	call	atexit\n";
-          fprintf oc "	addl	$4, %%esp\n";
-          fprintf oc "	ret\n"
-        end
-
-    let x86_profiling_stub oc nr_items
-          profiling_id_table_name
-          profiling_counter_table_name =
-      if Archi.ptr64
-      then
-        begin
-	  fprintf oc "  leaq	%s(%%rip), %%rdx\n" profiling_counter_table_name;
-	  fprintf oc "  leaq	%s(%%rip), %%rsi\n" profiling_id_table_name;
-          fprintf oc "  movl	$%d, %%edi\n" nr_items;
-          fprintf oc "  jmp	%s\n" profiling_write_table_helper
-        end
-      else
-        begin
-          fprintf oc "	pushl	$%s\n" profiling_counter_table_name;
-	  fprintf oc "	pushl	$%s\n" profiling_id_table_name;
-	  fprintf oc "	pushl	$%d\n" nr_items;
-          fprintf oc "	call	%s\n" profiling_write_table_helper ;
-          fprintf oc "	addl	$12, %%esp\n";
-          fprintf oc "	ret\n"
-        end;;
-    
-    let print_epilogue oc =
-      print_profiling_epilogue elf_text_print_fun_info (Init_atexit print_atexit) x86_profiling_stub oc;;
-
-    let print_comm_decl oc name sz al =
-      fprintf oc "	.comm	%a, %s, %d\n" symbol name (Z.to_string sz) al
-
-    let print_lcomm_decl oc name sz al =
-      fprintf oc "	.local	%a\n" symbol name;
-      print_comm_decl oc name sz al
-
-  end
-
-(* Printer functions for MacOS *)
-module MacOS_System : SYSTEM =
-  struct
-
-    (* The comment delimiter.
-        `##` instead of `#` to please the Clang assembler. *)
-    let comment = "##"
-
-    let raw_symbol oc s =
-     fprintf oc "_%s" s
-
-    let symbol oc symb =
-      raw_symbol oc (extern_atom symb)
-
-    let label oc lbl =
-      fprintf oc "L%d" lbl
-
-    let name_of_section = function
-      | Section_text -> ".text"
-      | Section_data(i, true) ->
-         failwith "_Thread_local unsupported on this platform"
-      | Section_data(i, false) | Section_small_data i ->
-          variable_section ~sec:".data" i
-      | Section_const i  | Section_small_const i ->
-          variable_section ~sec:".const" ~reloc:".const_data" i
-      | Section_string -> ".const"
-      | Section_literal -> ".const"
-      | Section_jumptable -> ".text"
-      | Section_user(s, wr, ex) ->
-          sprintf ".section	\"%s\", %s, %s"
-            (if wr then "__DATA" else "__TEXT") s
-            (if ex then "regular, pure_instructions" else "regular")
-      | Section_debug_info _ ->	".section	__DWARF,__debug_info,regular,debug"
-      | Section_debug_loc  -> ".section	__DWARF,__debug_loc,regular,debug"
-      | Section_debug_line _ -> ".section	__DWARF,__debug_line,regular,debug"
-      | Section_debug_str -> ".section	__DWARF,__debug_str,regular,debug"
-      | Section_debug_ranges -> ".section	__DWARF,__debug_ranges,regular,debug"
-      | Section_debug_abbrev -> ".section	__DWARF,__debug_abbrev,regular,debug"
-      | Section_ais_annotation -> assert false (* Not supported under MacOS *)
-
-
-    let stack_alignment =  16 (* mandatory *)
-
-    let print_align oc n =
-      fprintf oc "	.align	%d\n" (log2 n)
-
-    let print_mov_rs oc rd id =
-      fprintf oc "	movq    %a@GOTPCREL(%%rip), %a\n" symbol id ireg64 rd
-
-    let print_fun_info _ _ = ()
-
-    let print_var_info _ _ = ()
-
-    let print_epilogue oc = ()
-
-    let print_comm_decl oc name sz al =
-      fprintf oc "	.comm	%a, %s, %d\n"
-                 symbol name (Z.to_string sz) (log2 al)
-
-    let print_lcomm_decl oc name sz al =
-      fprintf oc "	.lcomm	%a, %s, %d\n"
-                 symbol name (Z.to_string sz) (log2 al)
-
-  end
-
-(* Printer functions for Cygwin *)
-module Cygwin_System : SYSTEM =
-  struct
-
-    (* The comment delimiter *)
-    let comment = "#"
-
-    let symbol_prefix =
-      if Archi.ptr64 then "" else "_"
-
-    let raw_symbol oc s =
-       fprintf oc "%s%s" symbol_prefix s
-
-    let symbol oc symb =
-      raw_symbol oc (extern_atom symb)
-
-    let label oc lbl =
-       fprintf oc "L%d" lbl
-
-    let name_of_section = function
-      | Section_text -> ".text"
-      | Section_data(i, true) ->
-        failwith "_Thread_local unsupported on this platform"
-      | Section_data(i, false) | Section_small_data i ->
-          variable_section ~sec:".data" ~bss:".bss" i
-      | Section_const i | Section_small_const i ->
-          variable_section ~sec:".section	.rdata,\"dr\"" i
-      | Section_string -> ".section	.rdata,\"dr\""
-      | Section_literal -> ".section	.rdata,\"dr\""
-      | Section_jumptable -> ".text"
-      | Section_user(s, wr, ex) ->
-          sprintf ".section	%s, \"%s\"\n"
-            s (if ex then "xr" else if wr then "d" else "dr")
-      | Section_debug_info _ -> ".section	.debug_info,\"dr\""
-      | Section_debug_loc ->  ".section	.debug_loc,\"dr\""
-      | Section_debug_line _ -> ".section	.debug_line,\"dr\""
-      | Section_debug_abbrev -> ".section	.debug_abbrev,\"dr\""
-      | Section_debug_ranges -> ".section	.debug_ranges,\"dr\""
-      | Section_debug_str-> assert false (* Should not be used *)
-      | Section_ais_annotation -> assert false (* Not supported for coff binaries *)
-
-    let stack_alignment = 8
-      (* minimum is 4 for 32 bits, 8 for 64 bits; 8 is better for perfs *)
-
-    let print_align oc n =
-      fprintf oc "	.balign	%d\n" n
-
-    let indirect_symbols : StringSet.t ref = ref StringSet.empty
-
-    let print_mov_rs oc rd id =
-      if Archi.ptr64 then begin
-        let s = extern_atom id in
-        indirect_symbols := StringSet.add s !indirect_symbols;
-        fprintf oc "	movq	.refptr.%s(%%rip), %a\n" s ireg rd
-      end else begin
-        fprintf oc "	movl	$%a, %a\n" symbol id ireg rd
-      end
-
-    let print_fun_info _ _  = ()
-
-    let print_var_info _ _ = ()
+    let section oc sec =
+      fprintf oc "	%s\n" (name_of_section sec)
 
-    let declare_indirect_symbol oc s =
-      fprintf oc "	.section	.rdata$.refptr.%s, \"dr\"\n" s;
-      fprintf oc "	.globl	.refptr.%s\n" s;
-      fprintf oc "	.linkonce	discard\n";
-      fprintf oc ".refptr.%s:\n" s;
-      fprintf oc "	.quad	%s\n" s
+(* Associate labels to floating-point constants and to symbols. *)
 
-    let print_epilogue oc =
-      if Archi.ptr64 then begin
-        StringSet.iter (declare_indirect_symbol oc) !indirect_symbols;
-        indirect_symbols := StringSet.empty
+    let emit_constants oc lit =
+      if exists_constants () then begin
+         section oc lit;
+         if Hashtbl.length literal64_labels > 0 then
+           begin
+             fprintf oc "	.align 3\n";
+             Hashtbl.iter
+               (fun bf lbl -> fprintf oc "%a:	.quad	0x%Lx\n" label lbl bf)
+               literal64_labels
+           end;
+         if Hashtbl.length literal32_labels > 0 then
+           begin
+             fprintf oc "	.align	2\n";
+             Hashtbl.iter
+               (fun bf lbl ->
+                  fprintf oc "%a:	.long	0x%lx\n" label lbl bf)
+               literal32_labels
+           end;
+         reset_literals ()
       end
 
-    let print_comm_decl oc name sz al =
-      fprintf oc "	.comm   %a, %s, %d\n" 
-                 symbol name (Z.to_string sz) (log2 al)
-
-    let print_lcomm_decl oc name sz al =
-      fprintf oc "	.lcomm   %a, %s, %d\n" 
-                 symbol name (Z.to_string sz)
-                 (if Archi.ptr64 then al else log2 al)
-
-  end
-
+(* Generate code to load the address of id + ofs in register r *)
 
-module Target(System: SYSTEM):TARGET =
-  struct
-    open System
-    let symbol = symbol
-
-(*  Basic printing functions *)
-
-    let addressing_gen ireg oc (Addrmode(base, shift, cst)) =
-      begin match cst with
-      | Datatypes.Coq_inl n ->
-          fprintf oc "%s" (Z.to_string n)
-      | Datatypes.Coq_inr(id, ofs) ->
-          if Archi.ptr64 then begin
-            (* RIP-relative addressing *)
-            let ofs' = Z.to_int64 ofs in
-            if ofs' = 0L
-            then fprintf oc "%a(%%rip)" symbol id
-            else fprintf oc "(%a + %Ld)(%%rip)" symbol id ofs'
-          end else begin
-            (* Absolute addressing *)
-            let ofs' = Z.to_int32 ofs in
-            if ofs' = 0l
-            then fprintf oc "%a" symbol id
-            else fprintf oc "(%a + %ld)" symbol id ofs'
-          end
-      end;
-      begin match base, shift with
-      | None, None -> ()
-      | Some r1, None -> fprintf oc "(%a)" ireg r1
-      | None, Some(r2,sc) -> fprintf oc "(,%a,%a)" ireg r2 z sc
-      | Some r1, Some(r2,sc) -> fprintf oc "(%a,%a,%a)" ireg r1 ireg r2 z sc
+    let loadsymbol oc r id ofs =
+      if Archi.pic_code () then begin
+        assert (ofs = Integers.Ptrofs.zero);
+        fprintf oc "	la	%a, %s\n" ireg r (extern_atom id)
+      end else begin
+        fprintf oc "	lui	%a, %%hi(%a)\n"
+                                ireg r symbol_offset (id, ofs);
+        fprintf oc "	addi	%a, %a, %%lo(%a)\n"
+                                ireg r ireg r symbol_offset (id, ofs)
       end
 
-    let addressing32 = addressing_gen ireg32
-    let addressing64 = addressing_gen ireg64
-    let addressing = addressing_gen ireg
-
-    let name_of_condition = function
-      | Cond_e -> "e" | Cond_ne -> "ne"
-      | Cond_b -> "b" | Cond_be -> "be" | Cond_ae -> "ae" | Cond_a -> "a"
-      | Cond_l -> "l" | Cond_le -> "le" | Cond_ge -> "ge" | Cond_g -> "g"
-      | Cond_p -> "p" | Cond_np -> "np"
-
-    let name_of_neg_condition = function
-      | Cond_e -> "ne" | Cond_ne -> "e"
-      | Cond_b -> "ae" | Cond_be -> "a" | Cond_ae -> "b" | Cond_a -> "be"
-      | Cond_l -> "ge" | Cond_le -> "g" | Cond_ge -> "l" | Cond_g -> "le"
-      | Cond_p -> "np" | Cond_np -> "p"
-
-
-(* Names of sections *)
-
-    let section oc sec =
-      fprintf oc "	%s\n" (name_of_section sec)
-
-(* For "abs" and "neg" FP operations *)
-
-    let need_masks = ref false
-
 (* Emit .file / .loc debugging directives *)
 
     let print_file_line oc file line =
       print_file_line oc comment file line
 
-(* In 64-bit mode use RIP-relative addressing to designate labels *)
+(*
+    let print_location oc loc =
+      if loc <> Cutil.no_loc then print_file_line oc (fst loc) (snd loc)
+*)
 
-    let rip_rel =
-      if Archi.ptr64 then "(%rip)" else ""
+(* Add "w" suffix to 32-bit instructions if we are in 64-bit mode *)
+  
+    let w oc =
+      if Archi.ptr64 then output_string oc "w"
 
-(* Large 64-bit immediates (bigger than a 32-bit signed integer) are
-   not supported by the processor.  Turn them into memory operands. *)
+(* Offset part of a load or store *)
 
-    let intconst64 oc n =
-      let n1 = camlint64_of_coqint n in
-      let n2 = Int64.to_int32 n1 in
-      if n1 = Int64.of_int32 n2 then
-        (* fit in a 32-bit signed integer, can use as immediate *)
-        fprintf oc "$%ld" n2
-      else begin
-        (* put the constant in memory and use a PC-relative memory operand *)
-        let lbl = label_literal64 n1 in
-        fprintf oc "%a(%%rip)" label lbl
-      end
+    let offset oc = function
+    | Ofsimm n -> ptrofs oc n
+    | Ofslow(id, ofs) -> fprintf oc "%%lo(%a)" symbol_offset (id, ofs)
 
-    let print_profiling_logger oc id kind =
-      assert (kind >= 0);
-      assert (kind <= 1);
-      let ofs = profiling_offset id kind in
-      if Archi.ptr64
-      then
-        begin
-          fprintf oc "%s profiling %a %d: atomic increment\n" comment
-            Profilingaux.pp_id id kind;
-          fprintf oc "	lock addq	$1, %s+%d(%%rip)\n"
-            profiling_counter_table_name ofs
-        end
-      else
-        begin
-          fprintf oc "%s begin profiling %a %d: increment\n" comment
-            Profilingaux.pp_id id kind;
-          fprintf oc "	addl	$1, %s+%d\n" profiling_counter_table_name ofs;
-          fprintf oc "	adcl	$1, %s+%d\n" profiling_counter_table_name (ofs+4);
-          fprintf oc "%s end profiling %a %d: increment\n" comment
-            Profilingaux.pp_id id kind;
-        end
-      
 (* Printing of instructions *)
-
-(* Reminder on X86 assembly syntaxes:
-                              AT&T syntax         Intel syntax
-                              (used by GNU as)    (used in reference manuals)
-   dst <- op(src)             op src, dst         op dst, src
-   dst <- op(dst, src2)       op src2, dst        op dst, src2
-   dst <- op(dst, src2, src3) op src3, src2, dst  op dst, src2, src3
-*)
-
     let print_instruction oc = function
-        (* Moves *)
-      | Pmov_rr(rd, r1) ->
-          if Archi.ptr64
-          then fprintf oc "	movq	%a, %a\n" ireg64 r1 ireg64 rd
-          else fprintf oc "	movl	%a, %a\n" ireg32 r1 ireg32 rd
-      | Pmovl_ri(rd, n) ->
-          fprintf oc "	movl	$%ld, %a\n" (camlint_of_coqint n) ireg32 rd
-      | Pmovq_ri(rd, n) ->
-          let n1 = camlint64_of_coqint n in
-          let n2 = Int64.to_int32 n1 in
-          if n1 = Int64.of_int32 n2 then
-            fprintf oc "	movq	$%ld, %a\n" n2 ireg64 rd
-          else
-            fprintf oc "	movabsq	$%Ld, %a\n" n1 ireg64 rd
-      | Pmov_rs(rd, id) ->
-          print_mov_rs oc rd id
-      | Pmovl_rm(rd, a) ->
-          fprintf oc "	movl	%a, %a\n" addressing a ireg32 rd
-      | Pmovq_rm(rd, a) ->
-          fprintf oc "	movq	%a, %a\n" addressing a ireg64 rd
-      | Pmov_rm_a(rd, a) ->
-          if Archi.ptr64
-          then fprintf oc "	movq	%a, %a\n" addressing a ireg64 rd
-          else fprintf oc "	movl	%a, %a\n" addressing a ireg32 rd
-      | Pmovl_mr(a, r1) ->
-          fprintf oc "	movl	%a, %a\n" ireg32 r1 addressing a
-      | Pmovq_mr(a, r1) ->
-          fprintf oc "	movq	%a, %a\n" ireg64 r1 addressing a
-      | Pmov_mr_a(a, r1) ->
-          if Archi.ptr64
-          then fprintf oc "	movq	%a, %a\n" ireg64 r1 addressing a
-          else fprintf oc "	movl	%a, %a\n" ireg32 r1 addressing a
-      | Pmovsd_ff(rd, r1) ->
-          fprintf oc "	movapd	%a, %a\n" freg r1 freg rd
-      | Pmovsd_fi(rd, n) ->
-          let b = camlint64_of_coqint (Floats.Float.to_bits n) in
-          let lbl = label_literal64 b in
-          fprintf oc "	movsd	%a%s, %a %s %.18g\n"
-                     label lbl rip_rel
-                     freg rd comment (camlfloat_of_coqfloat n)
-      | Pmovsd_fm(rd, a) | Pmovsd_fm_a(rd, a) ->
-          fprintf oc "	movsd	%a, %a\n" addressing a freg rd
-      | Pmovsd_mf(a, r1) | Pmovsd_mf_a(a, r1) ->
-          fprintf oc "	movsd	%a, %a\n" freg r1 addressing a
-      | Pmovss_fi(rd, n) ->
-          let b = camlint_of_coqint (Floats.Float32.to_bits n) in
-          let lbl = label_literal32 b in
-          fprintf oc "	movss	%a%s, %a %s %.18g\n"
-                     label lbl rip_rel
-                     freg rd comment (camlfloat_of_coqfloat32 n)
-      | Pmovss_fm(rd, a) ->
-          fprintf oc "	movss	%a, %a\n" addressing a freg rd
-      | Pmovss_mf(a, r1) ->
-          fprintf oc "	movss	%a, %a\n" freg r1 addressing a
-      | Pfldl_m(a) ->
-          fprintf oc "	fldl	%a\n" addressing a
-      | Pfstpl_m(a) ->
-          fprintf oc "	fstpl	%a\n" addressing a
-      | Pflds_m(a) ->
-          fprintf oc "	flds	%a\n" addressing a
-      | Pfstps_m(a) ->
-          fprintf oc "	fstps	%a\n" addressing a
-            (* Moves with conversion *)
-      | Pmovb_mr(a, r1) ->
-          fprintf oc "	movb	%a, %a\n" ireg8 r1 addressing a
-      | Pmovw_mr(a, r1) ->
-          fprintf oc "	movw	%a, %a\n" ireg16 r1 addressing a
-      | Pmovzb_rr(rd, r1) ->
-          fprintf oc "	movzbl	%a, %a\n" ireg8 r1 ireg32 rd
-      | Pmovzb_rm(rd, a) ->
-          fprintf oc "	movzbl	%a, %a\n" addressing a ireg32 rd
-      | Pmovsb_rr(rd, r1) ->
-          fprintf oc "	movsbl	%a, %a\n" ireg8 r1 ireg32 rd
-      | Pmovsb_rm(rd, a) ->
-          fprintf oc "	movsbl	%a, %a\n" addressing a ireg32 rd
-      | Pmovzw_rr(rd, r1) ->
-          fprintf oc "	movzwl	%a, %a\n" ireg16 r1 ireg32 rd
-      | Pmovzw_rm(rd, a) ->
-          fprintf oc "	movzwl	%a, %a\n" addressing a ireg32 rd
-      | Pmovsw_rr(rd, r1) ->
-          fprintf oc "	movswl	%a, %a\n" ireg16 r1 ireg32 rd
-      | Pmovsw_rm(rd, a) ->
-          fprintf oc "	movswl	%a, %a\n" addressing a ireg32 rd
-      | Pmovzl_rr(rd, r1) ->
-          fprintf oc "	movl	%a, %a\n" ireg32 r1 ireg32 rd
-          (* movl sets the high 32 bits of the destination to zero *)
-      | Pmovsl_rr(rd, r1) ->
-          fprintf oc "	movslq	%a, %a\n" ireg32 r1 ireg64 rd
-      | Pmovls_rr(rd) ->
-          ()   (* nothing to do *)
-      | Pcvtsd2ss_ff(rd, r1) ->
-          fprintf oc "	cvtsd2ss %a, %a\n" freg r1 freg rd
-      | Pcvtss2sd_ff(rd, r1) ->
-          fprintf oc "	cvtss2sd %a, %a\n" freg r1 freg rd
-      | Pcvttsd2si_rf(rd, r1) ->
-          fprintf oc "	cvttsd2si %a, %a\n" freg r1 ireg32 rd
-      | Pcvtsi2sd_fr(rd, r1) ->
-          fprintf oc "	cvtsi2sd %a, %a\n" ireg32 r1 freg rd
-      | Pcvttss2si_rf(rd, r1) ->
-          fprintf oc "	cvttss2si %a, %a\n" freg r1 ireg32 rd
-      | Pcvtsi2ss_fr(rd, r1) ->
-          fprintf oc "	cvtsi2ss %a, %a\n" ireg32 r1 freg rd
-      | Pcvttsd2sl_rf(rd, r1) ->
-          fprintf oc "	cvttsd2si %a, %a\n" freg r1 ireg64 rd
-      | Pcvtsl2sd_fr(rd, r1) ->
-          fprintf oc "	cvtsi2sdq %a, %a\n" ireg64 r1 freg rd
-      | Pcvttss2sl_rf(rd, r1) ->
-          fprintf oc "	cvttss2si %a, %a\n" freg r1 ireg64 rd
-      | Pcvtsl2ss_fr(rd, r1) ->
-          fprintf oc "	cvtsi2ssq %a, %a\n" ireg64 r1 freg rd
-            (* Arithmetic and logical operations over integers *)
-      | Pleal(rd, a) ->
-          fprintf oc "	leal	%a, %a\n" addressing32 a ireg32 rd
-      | Pleaq(rd, a) ->
-          fprintf oc "	leaq	%a, %a\n" addressing64 a ireg64 rd
-      | Pnegl(rd) ->
-          fprintf oc "	negl	%a\n" ireg32 rd
-      | Pnegq(rd) ->
-          fprintf oc "	negq	%a\n" ireg64 rd
-      | Paddl_ri (res,n) ->
-	  fprintf oc "	addl	$%ld, %a\n" (camlint_of_coqint n) ireg32 res
-      | Paddq_ri (res,n) ->
-	  fprintf oc "	addq	%a, %a\n" intconst64 n ireg64 res
-      | Psubl_rr(rd, r1) ->
-          fprintf oc "	subl	%a, %a\n" ireg32 r1 ireg32 rd
-      | Psubq_rr(rd, r1) ->
-          fprintf oc "	subq	%a, %a\n" ireg64 r1 ireg64 rd
-      | Pimull_rr(rd, r1) ->
-          fprintf oc "	imull	%a, %a\n" ireg32 r1 ireg32 rd
-      | Pimulq_rr(rd, r1) ->
-          fprintf oc "	imulq	%a, %a\n" ireg64 r1 ireg64 rd
-      | Pimull_ri(rd, n) ->
-          fprintf oc "	imull	$%a, %a\n" coqint n ireg32 rd
-      | Pimulq_ri(rd, n) ->
-          fprintf oc "	imulq	%a, %a\n" intconst64 n ireg64 rd
-      | Pimull_r(r1) ->
-          fprintf oc "	imull	%a\n" ireg32 r1
-      | Pimulq_r(r1) ->
-          fprintf oc "	imulq	%a\n" ireg64 r1
-      | Pmull_r(r1) ->
-          fprintf oc "	mull	%a\n" ireg32 r1
-      | Pmulq_r(r1) ->
-          fprintf oc "	mulq	%a\n" ireg64 r1
-      | Pcltd ->
-          fprintf oc "	cltd\n"
-      | Pcqto ->
-          fprintf oc "	cqto\n";
-      | Pdivl(r1) ->
-          fprintf oc "	divl	%a\n" ireg32 r1
-      | Pdivq(r1) ->
-          fprintf oc "	divq	%a\n" ireg64 r1
-      | Pidivl(r1) ->
-          fprintf oc "	idivl	%a\n" ireg32 r1
-      | Pidivq(r1) ->
-          fprintf oc "	idivq	%a\n" ireg64 r1
-      | Pandl_rr(rd, r1) ->
-          fprintf oc "	andl	%a, %a\n" ireg32 r1 ireg32 rd
-      | Pandq_rr(rd, r1) ->
-          fprintf oc "	andq	%a, %a\n" ireg64 r1 ireg64 rd
-      | Pandl_ri(rd, n) ->
-          fprintf oc "	andl	$%a, %a\n" coqint n ireg32 rd
-      | Pandq_ri(rd, n) ->
-          fprintf oc "	andq	%a, %a\n" intconst64 n ireg64 rd
-      | Porl_rr(rd, r1) ->
-          fprintf oc "	orl	%a, %a\n" ireg32 r1 ireg32 rd
-      | Porq_rr(rd, r1) ->
-          fprintf oc "	orq	%a, %a\n" ireg64 r1 ireg64 rd
-      | Porl_ri(rd, n) ->
-          fprintf oc "	orl	$%a, %a\n" coqint n ireg32 rd
-      | Porq_ri(rd, n) ->
-          fprintf oc "	orq	%a, %a\n" intconst64 n ireg64 rd
-      | Pxorl_r(rd) ->
-          fprintf oc "	xorl	%a, %a\n" ireg32 rd ireg32 rd
-      | Pxorq_r(rd) ->
-          fprintf oc "	xorq	%a, %a\n" ireg64 rd ireg64 rd
-      | Pxorl_rr(rd, r1) ->
-          fprintf oc "	xorl	%a, %a\n" ireg32 r1 ireg32 rd
-      | Pxorq_rr(rd, r1) ->
-          fprintf oc "	xorq	%a, %a\n" ireg64 r1 ireg64 rd
-      | Pxorl_ri(rd, n) ->
-          fprintf oc "	xorl	$%a, %a\n" coqint n ireg32 rd
-      | Pxorq_ri(rd, n) ->
-          fprintf oc "	xorq	%a, %a\n" intconst64 n ireg64 rd
-      | Pnotl(rd) ->
-          fprintf oc "	notl	%a\n" ireg32 rd
-      | Pnotq(rd) ->
-          fprintf oc "	notq	%a\n" ireg64 rd
-      | Psall_rcl(rd) ->
-          fprintf oc "	sall	%%cl, %a\n" ireg32 rd
-      | Psalq_rcl(rd) ->
-          fprintf oc "	salq	%%cl, %a\n" ireg64 rd
-      | Psall_ri(rd, n) ->
-          fprintf oc "	sall	$%a, %a\n" coqint n ireg32 rd
-      | Psalq_ri(rd, n) ->
-          fprintf oc "	salq	$%a, %a\n" coqint n ireg64 rd
-      | Pshrl_rcl(rd) ->
-          fprintf oc "	shrl	%%cl, %a\n" ireg32 rd
-      | Pshrq_rcl(rd) ->
-          fprintf oc "	shrq	%%cl, %a\n" ireg64 rd
-      | Pshrl_ri(rd, n) ->
-          fprintf oc "	shrl	$%a, %a\n" coqint n ireg32 rd
-      | Pshrq_ri(rd, n) ->
-          fprintf oc "	shrq	$%a, %a\n" coqint n ireg64 rd
-      | Psarl_rcl(rd) ->
-          fprintf oc "	sarl	%%cl, %a\n" ireg32 rd
-      | Psarq_rcl(rd) ->
-          fprintf oc "	sarq	%%cl, %a\n" ireg64 rd
-      | Psarl_ri(rd, n) ->
-          fprintf oc "	sarl	$%a, %a\n" coqint n ireg32 rd
-      | Psarq_ri(rd, n) ->
-          fprintf oc "	sarq	$%a, %a\n" coqint n ireg64 rd
-      | Pshld_ri(rd, r1, n) ->
-          fprintf oc "	shldl	$%a, %a, %a\n" coqint n ireg32 r1 ireg32 rd
-      | Prorl_ri(rd, n) ->
-          fprintf oc "	rorl	$%a, %a\n" coqint n ireg32 rd
-      | Prorq_ri(rd, n) ->
-          fprintf oc "	rorq	$%a, %a\n" coqint n ireg64 rd
-      | Pcmpl_rr(r1, r2) ->
-          fprintf oc "	cmpl	%a, %a\n" ireg32 r2 ireg32 r1
-      | Pcmpq_rr(r1, r2) ->
-          fprintf oc "	cmpq	%a, %a\n" ireg64 r2 ireg64 r1
-      | Pcmpl_ri(r1, n) ->
-          fprintf oc "	cmpl	$%a, %a\n" coqint n ireg32 r1
-      | Pcmpq_ri(r1, n) ->
-          fprintf oc "	cmpq	%a, %a\n" intconst64 n ireg64 r1
-      | Ptestl_rr(r1, r2) ->
-          fprintf oc "	testl	%a, %a\n" ireg32 r2 ireg32 r1
-      | Ptestq_rr(r1, r2) ->
-          fprintf oc "	testq	%a, %a\n" ireg64 r2 ireg64 r1
-      | Ptestl_ri(r1, n) ->
-          fprintf oc "	testl	$%a, %a\n" coqint n ireg32 r1
-      | Ptestq_ri(r1, n) ->
-          fprintf oc "	testl	%a, %a\n" intconst64 n ireg64 r1
-      | Pcmov(c, rd, r1) ->
-          fprintf oc "	cmov%s	%a, %a\n" (name_of_condition c) ireg r1 ireg rd
-      | Psetcc(c, rd) ->
-          fprintf oc "	set%s	%a\n" (name_of_condition c) ireg8 rd;
-          fprintf oc "	movzbl	%a, %a\n" ireg8 rd ireg32 rd
-            (* Arithmetic operations over floats *)
-      | Paddd_ff(rd, r1) ->
-          fprintf oc "	addsd	%a, %a\n" freg r1 freg rd
-      | Psubd_ff(rd, r1) ->
-          fprintf oc "	subsd	%a, %a\n" freg r1 freg rd
-      | Pmuld_ff(rd, r1) ->
-          fprintf oc "	mulsd	%a, %a\n" freg r1 freg rd
-      | Pdivd_ff(rd, r1) ->
-          fprintf oc "	divsd	%a, %a\n" freg r1 freg rd
-      | Pnegd (rd) ->
-          need_masks := true;
-          fprintf oc "	xorpd	%a%s, %a\n"
-                     raw_symbol "__negd_mask" rip_rel freg rd
-      | Pabsd (rd) ->
-          need_masks := true;
-          fprintf oc "	andpd	%a%s, %a\n"
-                     raw_symbol "__absd_mask" rip_rel freg rd
-      | Pcomisd_ff(r1, r2) ->
-          fprintf oc "	comisd	%a, %a\n" freg r2 freg r1
-      | Pxorpd_f (rd) ->
-          fprintf oc "	xorpd	%a, %a\n" freg rd freg rd
-      | Padds_ff(rd, r1) ->
-          fprintf oc "	addss	%a, %a\n" freg r1 freg rd
-      | Psubs_ff(rd, r1) ->
-          fprintf oc "	subss	%a, %a\n" freg r1 freg rd
-      | Pmuls_ff(rd, r1) ->
-          fprintf oc "	mulss	%a, %a\n" freg r1 freg rd
-      | Pdivs_ff(rd, r1) ->
-          fprintf oc "	divss	%a, %a\n" freg r1 freg rd
-      | Pnegs (rd) ->
-          need_masks := true;
-          fprintf oc "	xorpd	%a%s, %a\n"
-                     raw_symbol "__negs_mask" rip_rel freg rd
-      | Pabss (rd) ->
-          need_masks := true;
-          fprintf oc "	andpd	%a%s, %a\n"
-                     raw_symbol "__abss_mask" rip_rel freg rd
-      | Pcomiss_ff(r1, r2) ->
-          fprintf oc "	comiss	%a, %a\n" freg r2 freg r1
-      | Pxorps_f (rd) ->
-          fprintf oc "	xorpd	%a, %a\n" freg rd freg rd
-            (* Branches and calls *)
-      | Pjmp_l(l) ->
-          fprintf oc "	jmp	%a\n" label (transl_label l)
-      | Pjmp_s(f, sg) ->
-          fprintf oc "	jmp	%a\n" symbol f
-      | Pjmp_r(r, sg) ->
-          fprintf oc "	jmp	*%a\n" ireg r
-      | Pjcc(c, l) ->
-          let l = transl_label l in
-          fprintf oc "	j%s	%a\n" (name_of_condition c) label l
-      | Pjcc2(c1, c2, l) ->
-          let l = transl_label l in
-          let l' = new_label() in
-          fprintf oc "	j%s	%a\n" (name_of_neg_condition c1) label l';
-          fprintf oc "	j%s	%a\n" (name_of_condition c2) label l;
-          fprintf oc "%a:\n" label l'
-      | Pjmptbl(r, tbl) ->
-          let l = new_label() in
-          jumptables := (l, tbl) :: !jumptables;
-          if Archi.ptr64 then begin
-            let (tmp1, tmp2) =
-              if r = RAX then (RDX, RAX) else (RAX, RDX) in
-            fprintf oc "	leaq	%a(%%rip), %a\n" label l ireg tmp1;
-            fprintf oc "	movslq	(%a, %a, 4), %a\n" ireg tmp1 ireg r ireg tmp2;
-            fprintf oc "	addq	%a, %a\n" ireg tmp2 ireg tmp1;
-            fprintf oc "	jmp	*%a\n" ireg tmp1
-          end else begin
-            fprintf oc "	jmp	*%a(, %a, 4)\n" label l ireg r
-          end
-      | Pcall_s(f, sg) ->
-          fprintf oc "	call	%a\n" symbol f;
-          if (not Archi.ptr64) && sg.sig_cc.cc_structret then
-            fprintf oc "	pushl	%%eax\n"
-      | Pcall_r(r, sg) ->
-          fprintf oc "	call	*%a\n" ireg r;
-          if (not Archi.ptr64) && sg.sig_cc.cc_structret then
-            fprintf oc "	pushl	%%eax\n"
-      | Pret ->
-          if (not Archi.ptr64)
-          && (!current_function_sig).sig_cc.cc_structret then begin
-            fprintf oc "	movl	4(%%esp), %%eax\n";
-            fprintf oc "	ret	$4\n"
-          end else begin
-            fprintf oc "	ret\n"
-          end
-      (* Instructions produced by Asmexpand *)
-      | Padcl_ri (res,n) ->
-	 fprintf oc "  	adcl	$%ld, %a\n" (camlint_of_coqint n) ireg32 res;
-      | Padcl_rr (res,a1) ->
-	 fprintf oc "  	adcl	%a, %a\n" ireg32 a1 ireg32 res;
-      | Paddl_rr (res,a1) ->
-	 fprintf oc "	addl	%a, %a\n" ireg32 a1 ireg32 res;
-      | Paddl_mi (addr,n) ->
-	 fprintf oc "	addl	$%ld, %a\n" (camlint_of_coqint n) addressing addr
-      | Pbsfl (res,a1) ->
-	 fprintf oc "	bsfl	%a, %a\n" ireg32 a1 ireg32 res
-      | Pbsfq (res,a1) ->
-	 fprintf oc "	bsfq	%a, %a\n" ireg64 a1 ireg64 res
-      | Pbsrl (res,a1) ->
-	 fprintf oc "	bsrl	%a, %a\n" ireg32 a1 ireg32 res
-      | Pbsrq (res,a1) ->
-	 fprintf oc "	bsrq	%a, %a\n" ireg64 a1 ireg64 res
-      | Pbswap64 res ->
-	 fprintf oc "	bswap	%a\n" ireg64 res
-      | Pbswap32 res ->
-	 fprintf oc "	bswap	%a\n" ireg32 res
-      | Pbswap16 res ->
-	 fprintf oc "	rolw	$8, %a\n" ireg16 res
-      | Pcfi_adjust sz ->
-	 cfi_adjust oc (camlint_of_coqint sz)
-      | Pfmadd132 (res,a1,a2) ->
-	 fprintf oc "	vfmadd132sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfmadd213 (res,a1,a2) ->
-	 fprintf oc "	vfmadd213sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfmadd231 (res,a1,a2) ->
-	 fprintf oc "	vfmadd231sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfmsub132 (res,a1,a2) ->
-	 fprintf oc "	vfmsub132sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfmsub213 (res,a1,a2) ->
-	 fprintf oc "	vfmsub213sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfmsub231 (res,a1,a2) ->
-	 fprintf oc "	vfmsub231sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfnmadd132 (res,a1,a2) ->
-	 fprintf oc "	vfnmadd132sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfnmadd213 (res,a1,a2) ->
-	 fprintf oc "	vfnmadd213sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfnmadd231 (res,a1,a2) ->
-	 fprintf oc "	vfnmadd231sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfnmsub132 (res,a1,a2) ->
-	 fprintf oc "	vfnmsub132sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfnmsub213 (res,a1,a2) ->
-	 fprintf oc "	vfnmsub213sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pfnmsub231 (res,a1,a2) ->
-	 fprintf oc "	vfnmsub231sd	%a, %a, %a\n" freg a2 freg a1 freg res
-      | Pmaxsd (res,a1) ->
-	 fprintf oc "	maxsd	%a, %a\n" freg a1 freg res
-      | Pminsd (res,a1) ->
-	 fprintf oc "	minsd	%a, %a\n" freg a1 freg res
-      | Pmovb_rm (rd,a) ->
-	 fprintf oc "	movb	%a, %a\n" addressing a ireg8 rd
-      | Pmovq_rf (rd, r1) ->
-         fprintf oc "	movq	%a, %a\n" freg r1 ireg64 rd
-      | Pmovsq_mr(a, rs) ->
-          fprintf oc "	movq	%a, %a\n" freg rs addressing a
-      | Pmovsq_rm(rd, a) ->
-          fprintf oc "	movq	%a, %a\n" addressing a freg rd
-      | Pmovsb ->
-	 fprintf oc "	movsb\n";
-      | Pmovsw ->
-	 fprintf oc "	movsw\n";
-      | Pmovw_rm (rd, a) ->
-          fprintf oc "	movw	%a, %a\n" addressing a ireg16 rd
+      | Pmv(rd, rs) ->
+         fprintf oc "	mv	%a, %a\n"     ireg rd ireg rs
+
+      (* 32-bit integer register-immediate instructions *)
+      | Paddiw (rd, rs, imm) ->
+         fprintf oc "	addi%t	%a, %a, %a\n" w ireg rd ireg0 rs coqint imm
+      | Psltiw (rd, rs, imm) ->
+         fprintf oc "	slti	%a, %a, %a\n" ireg rd ireg0 rs coqint imm
+      | Psltiuw (rd, rs, imm) ->
+         fprintf oc "	sltiu	%a, %a, %a\n" ireg rd ireg0 rs coqint imm
+      | Pandiw (rd, rs, imm) ->
+         fprintf oc "	andi	%a, %a, %a\n" ireg rd ireg0 rs coqint imm
+      | Poriw (rd, rs, imm) ->
+         fprintf oc "	ori	%a, %a, %a\n" ireg rd ireg0 rs coqint imm
+      | Pxoriw (rd, rs, imm) ->
+         fprintf oc "	xori	%a, %a, %a\n" ireg rd ireg0 rs coqint imm
+      | Pslliw (rd, rs, imm) ->
+         fprintf oc "	slli%t	%a, %a, %a\n" w ireg rd ireg0 rs coqint imm
+      | Psrliw (rd, rs, imm) ->
+         fprintf oc "	srli%t	%a, %a, %a\n" w ireg rd ireg0 rs coqint imm
+      | Psraiw (rd, rs, imm) ->
+         fprintf oc "	srai%t	%a, %a, %a\n" w ireg rd ireg0 rs coqint imm
+      | Pluiw (rd, imm) ->
+         fprintf oc "	lui	%a, %a\n"     ireg rd coqint imm
+
+      (* 32-bit integer register-register instructions *)
+      | Paddw(rd, rs1, rs2) ->
+         fprintf oc "	add%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+      | Psubw(rd, rs1, rs2) ->
+         fprintf oc "	sub%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+
+      | Pmulw(rd, rs1, rs2) ->
+         fprintf oc "	mul%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+      | Pmulhw(rd, rs1, rs2) ->  assert (not Archi.ptr64);
+         fprintf oc "	mulh	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Pmulhuw(rd, rs1, rs2) ->  assert (not Archi.ptr64);
+         fprintf oc "	mulhu	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+
+      | Pdivw(rd, rs1, rs2) ->
+         fprintf oc "	div%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+      | Pdivuw(rd, rs1, rs2) ->
+         fprintf oc "	divu%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+      | Premw(rd, rs1, rs2) ->
+         fprintf oc "	rem%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+      | Premuw(rd, rs1, rs2) ->
+         fprintf oc "	remu%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+
+      | Psltw(rd, rs1, rs2) ->
+         fprintf oc "	slt	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Psltuw(rd, rs1, rs2) ->
+         fprintf oc "	sltu	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+
+      | Pandw(rd, rs1, rs2) ->
+         fprintf oc "	and	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Porw(rd, rs1, rs2) ->
+         fprintf oc "	or	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Pxorw(rd, rs1, rs2) ->
+         fprintf oc "	xor	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Psllw(rd, rs1, rs2) ->
+         fprintf oc "	sll%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+      | Psrlw(rd, rs1, rs2) ->
+         fprintf oc "	srl%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+      | Psraw(rd, rs1, rs2) ->
+         fprintf oc "	sra%t	%a, %a, %a\n" w ireg rd ireg0 rs1 ireg0 rs2
+
+      (* 64-bit integer register-immediate instructions *)
+      | Paddil (rd, rs, imm) -> assert Archi.ptr64;
+         fprintf oc "	addi	%a, %a, %a\n" ireg rd ireg0 rs coqint64 imm
+      | Psltil (rd, rs, imm) -> assert Archi.ptr64;
+         fprintf oc "	slti	%a, %a, %a\n" ireg rd ireg0 rs coqint64 imm
+      | Psltiul (rd, rs, imm) -> assert Archi.ptr64;
+         fprintf oc "	sltiu	%a, %a, %a\n" ireg rd ireg0 rs coqint64 imm
+      | Pandil (rd, rs, imm) -> assert Archi.ptr64;
+         fprintf oc "	andi	%a, %a, %a\n" ireg rd ireg0 rs coqint64 imm
+      | Poril (rd, rs, imm) -> assert Archi.ptr64;
+         fprintf oc "	ori	%a, %a, %a\n" ireg rd ireg0 rs coqint64 imm
+      | Pxoril (rd, rs, imm) -> assert Archi.ptr64;
+         fprintf oc "	xori	%a, %a, %a\n" ireg rd ireg0 rs coqint64 imm
+      | Psllil (rd, rs, imm) -> assert Archi.ptr64;
+         fprintf oc "	slli	%a, %a, %a\n" ireg rd ireg0 rs coqint64 imm
+      | Psrlil (rd, rs, imm) -> assert Archi.ptr64;
+         fprintf oc "	srli	%a, %a, %a\n" ireg rd ireg0 rs coqint64 imm
+      | Psrail (rd, rs, imm) -> assert Archi.ptr64;
+         fprintf oc "	srai	%a, %a, %a\n" ireg rd ireg0 rs coqint64 imm
+      | Pluil (rd, imm) -> assert Archi.ptr64;
+         fprintf oc "	lui	%a, %a\n"     ireg rd coqint64 imm
+
+      (* 64-bit integer register-register instructions *)
+      | Paddl(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	add	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Psubl(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	sub	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+
+      | Pmull(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	mul	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Pmulhl(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	mulh	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Pmulhul(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	mulhu	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+
+      | Pdivl(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	div	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Pdivul(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	divu	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Preml(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	rem	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Premul(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	remu	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+
+      | Psltl(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	slt	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Psltul(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	sltu	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+
+      | Pandl(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	and	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Porl(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	or	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Pxorl(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	xor	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Pslll(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	sll	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Psrll(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	srl	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+      | Psral(rd, rs1, rs2) -> assert Archi.ptr64;
+         fprintf oc "	sra	%a, %a, %a\n" ireg rd ireg0 rs1 ireg0 rs2
+  
+      (* Unconditional jumps.  Links are always to X1/RA. *)
+      (* TODO: fix up arguments for calls to variadics, to move *)
+      (* floating point arguments to integer registers.  How? *)
+      | Pj_l(l) ->
+         fprintf oc "	j	%a\n" print_label l
+      | Pj_s(s, sg) ->
+         fprintf oc "	j	%a\n" symbol s
+      | Pj_r(r, sg) ->
+         fprintf oc "	jr	%a\n" ireg r
+      | Pjal_s(s, sg) ->
+         fprintf oc "	call	%a\n" symbol s
+      | Pjal_r(r, sg) ->
+         fprintf oc "	jalr	%a\n" ireg r
+
+      (* Conditional branches, 32-bit comparisons *)
+      | Pbeqw(rs1, rs2, l) ->
+         fprintf oc "	beq	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbnew(rs1, rs2, l) ->
+         fprintf oc "	bne	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbltw(rs1, rs2, l) ->
+         fprintf oc "	blt	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbltuw(rs1, rs2, l) ->
+         fprintf oc "	bltu	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbgew(rs1, rs2, l) ->
+         fprintf oc "	bge	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbgeuw(rs1, rs2, l) ->
+         fprintf oc "	bgeu	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+
+      (* Conditional branches, 64-bit comparisons *)
+      | Pbeql(rs1, rs2, l) -> assert Archi.ptr64;
+         fprintf oc "	beq	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbnel(rs1, rs2, l) -> assert Archi.ptr64;
+         fprintf oc "	bne	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbltl(rs1, rs2, l) -> assert Archi.ptr64;
+         fprintf oc "	blt	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbltul(rs1, rs2, l) -> assert Archi.ptr64;
+         fprintf oc "	bltu	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbgel(rs1, rs2, l) -> assert Archi.ptr64;
+         fprintf oc "	bge	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+      | Pbgeul(rs1, rs2, l) -> assert Archi.ptr64;
+         fprintf oc "	bgeu	%a, %a, %a\n" ireg0 rs1 ireg0 rs2 print_label l
+
+      (* Loads and stores *)
+      | Plb(rd, ra, ofs) ->
+         fprintf oc "	lb	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+      | Plbu(rd, ra, ofs) ->
+         fprintf oc "	lbu	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+      | Plh(rd, ra, ofs) ->
+         fprintf oc "	lh	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+      | Plhu(rd, ra, ofs) ->
+         fprintf oc "	lhu	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+      | Plw(rd, ra, ofs) | Plw_a(rd, ra, ofs) ->
+         fprintf oc "	lw	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+      | Pld(rd, ra, ofs) | Pld_a(rd, ra, ofs) -> assert Archi.ptr64;
+         fprintf oc "	ld	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+
+      | Psb(rd, ra, ofs) ->
+         fprintf oc "	sb	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+      | Psh(rd, ra, ofs) ->
+         fprintf oc "	sh	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+      | Psw(rd, ra, ofs) | Psw_a(rd, ra, ofs) ->
+         fprintf oc "	sw	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+      | Psd(rd, ra, ofs) | Psd_a(rd, ra, ofs) -> assert Archi.ptr64;
+         fprintf oc "	sd	%a, %a(%a)\n" ireg rd offset ofs ireg ra
+
+
+      (* Synchronization *)
+      | Pfence ->
+         fprintf oc "	fence\n"
+
+      (* floating point register move.
+         fmv.d preserves single-precision register contents, and hence
+         is applicable to both single- and double-precision moves.
+       *)
+      | Pfmv (fd,fs) ->
+         fprintf oc "	fmv.d	%a, %a\n"     freg fd freg fs
+      | Pfmvxs (rd,fs) ->
+         fprintf oc "	fmv.x.s	%a, %a\n"     ireg rd freg fs
+      | Pfmvsx (fd,rs) ->
+         fprintf oc "	fmv.s.x	%a, %a\n"     freg fd ireg rs
+      | Pfmvxd (rd,fs) ->
+         fprintf oc "	fmv.x.d	%a, %a\n"     ireg rd freg fs
+      | Pfmvdx (fd,rs) ->
+         fprintf oc "	fmv.d.x	%a, %a\n"     freg fd ireg rs
+
+      (* 32-bit (single-precision) floating point *)
+      | Pfls (fd, ra, ofs) ->
+         fprintf oc "	flw	%a, %a(%a)\n" freg fd offset ofs ireg ra
+      | Pfss (fs, ra, ofs) ->
+         fprintf oc "	fsw	%a, %a(%a)\n" freg fs offset ofs ireg ra
+
+      | Pfnegs (fd, fs) ->
+         fprintf oc "	fneg.s	%a, %a\n"     freg fd freg fs
+      | Pfabss (fd, fs) ->
+         fprintf oc "	fabs.s	%a, %a\n"     freg fd freg fs
+
+      | Pfadds (fd, fs1, fs2) ->
+         fprintf oc "	fadd.s	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfsubs (fd, fs1, fs2) ->
+         fprintf oc "	fsub.s	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfmuls (fd, fs1, fs2) ->
+         fprintf oc "	fmul.s	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfdivs (fd, fs1, fs2) ->
+         fprintf oc "	fdiv.s	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfmins (fd, fs1, fs2) ->
+         fprintf oc "	fmin.s	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfmaxs (fd, fs1, fs2) ->
+         fprintf oc "	fmax.s	%a, %a, %a\n" freg fd freg fs1 freg fs2
+
+      | Pfeqs (rd, fs1, fs2) ->
+         fprintf oc "	feq.s   %a, %a, %a\n" ireg rd freg fs1 freg fs2
+      | Pflts (rd, fs1, fs2) ->
+         fprintf oc "	flt.s   %a, %a, %a\n" ireg rd freg fs1 freg fs2
+      | Pfles (rd, fs1, fs2) ->
+         fprintf oc "	fle.s   %a, %a, %a\n" ireg rd freg fs1 freg fs2
+
+      | Pfsqrts (fd, fs) ->
+         fprintf oc "	fsqrt.s %a, %a\n"     freg fd freg fs
+
+      | Pfmadds (fd, fs1, fs2, fs3) ->
+         fprintf oc "	fmadd.s	%a, %a, %a, %a\n" freg fd freg fs1 freg fs2 freg fs3
+      | Pfmsubs (fd, fs1, fs2, fs3) ->
+         fprintf oc "	fmsub.s	%a, %a, %a, %a\n" freg fd freg fs1 freg fs2 freg fs3
+      | Pfnmadds (fd, fs1, fs2, fs3) ->
+         fprintf oc "	fnmadd.s	%a, %a, %a, %a\n" freg fd freg fs1 freg fs2 freg fs3
+      | Pfnmsubs (fd, fs1, fs2, fs3) ->
+         fprintf oc "	fnmsub.s	%a, %a, %a, %a\n" freg fd freg fs1 freg fs2 freg fs3
+
+      | Pfcvtws (rd, fs) ->
+         fprintf oc "	fcvt.w.s	%a, %a, rtz\n" ireg rd freg fs
+      | Pfcvtwus (rd, fs) ->
+         fprintf oc "	fcvt.wu.s	%a, %a, rtz\n" ireg rd freg fs
+      | Pfcvtsw (fd, rs) ->
+         fprintf oc "	fcvt.s.w	%a, %a\n" freg fd ireg0 rs
+      | Pfcvtswu (fd, rs) ->
+         fprintf oc "	fcvt.s.wu	%a, %a\n" freg fd ireg0 rs
+
+      | Pfcvtls (rd, fs) -> assert Archi.ptr64;
+         fprintf oc "	fcvt.l.s	%a, %a, rtz\n" ireg rd freg fs
+      | Pfcvtlus (rd, fs) -> assert Archi.ptr64;
+         fprintf oc "	fcvt.lu.s	%a, %a, rtz\n" ireg rd freg fs
+      | Pfcvtsl (fd, rs) -> assert Archi.ptr64;
+         fprintf oc "	fcvt.s.l	%a, %a\n" freg fd ireg0 rs
+      | Pfcvtslu (fd, rs) -> assert Archi.ptr64;
+         fprintf oc "	fcvt.s.lu	%a, %a\n" freg fd ireg0 rs
+
+      (* 64-bit (double-precision) floating point *)
+      | Pfld (fd, ra, ofs) | Pfld_a (fd, ra, ofs) ->
+         fprintf oc "	fld	%a, %a(%a)\n" freg fd offset ofs ireg ra
+      | Pfsd (fs, ra, ofs) | Pfsd_a (fs, ra, ofs) ->
+         fprintf oc "	fsd	%a, %a(%a)\n" freg fs offset ofs ireg ra
+
+      | Pfnegd (fd, fs) ->
+         fprintf oc "	fneg.d	%a, %a\n"     freg fd freg fs
+      | Pfabsd (fd, fs) ->
+         fprintf oc "	fabs.d	%a, %a\n"     freg fd freg fs
+
+      | Pfaddd (fd, fs1, fs2) ->
+         fprintf oc "	fadd.d	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfsubd (fd, fs1, fs2) ->
+         fprintf oc "	fsub.d	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfmuld (fd, fs1, fs2) ->
+         fprintf oc "	fmul.d	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfdivd (fd, fs1, fs2) ->
+         fprintf oc "	fdiv.d	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfmind (fd, fs1, fs2) ->
+         fprintf oc "	fmin.d	%a, %a, %a\n" freg fd freg fs1 freg fs2
+      | Pfmaxd (fd, fs1, fs2) ->
+         fprintf oc "	fmax.d	%a, %a, %a\n" freg fd freg fs1 freg fs2
+
+      | Pfeqd (rd, fs1, fs2) ->
+         fprintf oc "	feq.d	%a, %a, %a\n" ireg rd freg fs1 freg fs2
+      | Pfltd (rd, fs1, fs2) ->
+         fprintf oc "	flt.d	%a, %a, %a\n" ireg rd freg fs1 freg fs2
+      | Pfled (rd, fs1, fs2) ->
+         fprintf oc "	fle.d	%a, %a, %a\n" ireg rd freg fs1 freg fs2
+
+      | Pfsqrtd (fd, fs) ->
+         fprintf oc "	fsqrt.d	%a, %a\n" freg fd freg fs
+
+      | Pfmaddd (fd, fs1, fs2, fs3) ->
+         fprintf oc "	fmadd.d	%a, %a, %a, %a\n" freg fd freg fs1 freg fs2 freg fs3
+      | Pfmsubd (fd, fs1, fs2, fs3) ->
+         fprintf oc "	fmsub.d	%a, %a, %a, %a\n" freg fd freg fs1 freg fs2 freg fs3
+      | Pfnmaddd (fd, fs1, fs2, fs3) ->
+         fprintf oc "	fnmadd.d	%a, %a, %a, %a\n" freg fd freg fs1 freg fs2 freg fs3
+      | Pfnmsubd (fd, fs1, fs2, fs3) ->
+         fprintf oc "	fnmsub.d	%a, %a, %a, %a\n" freg fd freg fs1 freg fs2 freg fs3
+
+      | Pfcvtwd (rd, fs) ->
+         fprintf oc "	fcvt.w.d	%a, %a, rtz\n" ireg rd freg fs
+      | Pfcvtwud (rd, fs) ->
+         fprintf oc "	fcvt.wu.d	%a, %a, rtz\n" ireg rd freg fs
+      | Pfcvtdw (fd, rs) ->
+         fprintf oc "	fcvt.d.w	%a, %a\n" freg fd ireg0 rs
+      | Pfcvtdwu (fd, rs) ->
+         fprintf oc "	fcvt.d.wu	%a, %a\n" freg fd ireg0 rs
+
+      | Pfcvtld (rd, fs) -> assert Archi.ptr64;
+         fprintf oc "	fcvt.l.d	%a, %a, rtz\n" ireg rd freg fs
+      | Pfcvtlud (rd, fs) -> assert Archi.ptr64;
+         fprintf oc "	fcvt.lu.d	%a, %a, rtz\n" ireg rd freg fs
+      | Pfcvtdl (fd, rs) -> assert Archi.ptr64;
+         fprintf oc "	fcvt.d.l	%a, %a\n" freg fd ireg0 rs
+      | Pfcvtdlu (fd, rs) -> assert Archi.ptr64;
+         fprintf oc "	fcvt.d.lu	%a, %a\n" freg fd ireg0 rs
+
+      | Pfcvtds (fd, fs) ->
+         fprintf oc "	fcvt.d.s	%a, %a\n" freg fd freg fs
+      | Pfcvtsd (fd, fs) ->
+         fprintf oc "	fcvt.s.d	%a, %a\n" freg fd freg fs
+
+      (* Pseudo-instructions expanded in Asmexpand *)
+      | Pselectl(_, _, _, _) ->
+         assert false
+      | Pallocframe(sz, ofs) ->
+         assert false
+      | Pfreeframe(sz, ofs) ->
+         assert false
+      | Pseqw _ | Psnew _ | Pseql _ | Psnel _ | Pcvtl2w _ | Pcvtw2l _ ->
+         assert false
+
+      (* Pseudo-instructions that remain *)
+      | Plabel lbl ->
+         fprintf oc "%a:\n" print_label lbl
+      | Ploadsymbol(rd, id, ofs) ->
+         loadsymbol oc rd id ofs
+      | Ploadsymbol_high(rd, id, ofs) ->
+         fprintf oc "	lui	%a, %%hi(%a)\n" ireg rd symbol_offset (id, ofs)
+      | Ploadli(rd, n) ->
+         let d = camlint64_of_coqint n in
+         let lbl = label_literal64 d in
+         fprintf oc "	ld	%a, %a %s %Lx\n" ireg rd label lbl comment d
+      | Ploadfi(rd, f) ->
+         let d   = camlint64_of_coqint(Floats.Float.to_bits f) in
+         let lbl = label_literal64 d in
+         fprintf oc "	fld	%a, %a, x31 %s %.18g\n"
+                    freg rd label lbl comment (camlfloat_of_coqfloat f)
+      | Ploadsi(rd, f) ->
+         let s   = camlint_of_coqint(Floats.Float32.to_bits f) in
+         let lbl = label_literal32 s in
+         fprintf oc "	flw	%a, %a, x31 %s %.18g\n"
+                    freg rd label lbl comment (camlfloat_of_coqfloat32 f)
+      | Pbtbl(r, tbl) ->
+         let lbl = new_label() in
+         fprintf oc "%s jumptable [ " comment;
+         List.iter (fun l -> fprintf oc "%a " print_label l) tbl;
+         fprintf oc "]\n";
+         fprintf oc "	sll	x5, %a, 2\n" ireg r;
+         fprintf oc "	la	x31, %a\n" label lbl;
+         fprintf oc "	add	x5, x31, x5\n";
+         fprintf oc "	lw	x5, 0(x5)\n";
+         fprintf oc "	add	x5, x31, x5\n";
+         fprintf oc "	jr	x5\n";
+         jumptables := (lbl, tbl) :: !jumptables;
+         fprintf oc "%s end pseudoinstr btbl\n" comment
       | Pnop ->
-          fprintf oc "	nop\n"
-      | Prep_movsl ->
-	 fprintf oc "	rep	movsl\n"
-      | Psbbl_rr (res,a1) ->
-	 fprintf oc "	sbbl	%a, %a\n" ireg32 a1 ireg32 res
-      | Psqrtsd (res,a1) ->
-	 fprintf oc "	sqrtsd	%a, %a\n" freg a1 freg res
-      | Psubl_ri (res,n) ->
-	  fprintf oc "	subl	$%ld, %a\n" (camlint_of_coqint n) ireg32 res;
-      | Psubq_ri (res,n) ->
-	  fprintf oc "	subq	%a, %a\n" intconst64 n ireg64 res;
-      (* Pseudo-instructions *)
-      | Plabel(l) ->
-          fprintf oc "%a:\n" label (transl_label l)
-      | Pallocframe(sz, ofs_ra, ofs_link)
-      | Pfreeframe(sz, ofs_ra, ofs_link) ->
-	 assert false
+        fprintf oc "	nop\n"
       | Pbuiltin(ef, args, res) ->
-          begin match ef with
-            | EF_annot(kind,txt, targs) ->
-                begin match (P.to_int kind) with
-                  | 1 ->  let annot = annot_text preg_annot "esp" (camlstring_of_coqstring txt) args in
-                    fprintf oc "%s annotation: %S\n" comment annot
-                  | 2 -> let lbl = new_label () in
-                    fprintf oc "%a:\n" label lbl;
-                    let sp = if Archi.ptr64 then "rsp" else "esp" in
-                    add_ais_annot lbl preg_ais_annot sp (camlstring_of_coqstring txt) args
-                  | _ -> assert false
-                end
+         begin match ef with
+           | EF_annot(kind,txt, targs) ->
+             begin match (P.to_int kind) with
+               | 1 -> let annot = annot_text preg_annot "x2" (camlstring_of_coqstring txt) args  in
+                 fprintf oc "%s annotation: %S\n" comment annot
+               | 2 -> let lbl = new_label () in
+                 fprintf oc "%a:\n" label lbl;
+                 add_ais_annot lbl preg_annot "x2" (camlstring_of_coqstring txt) args
+               | _ -> assert false
+             end
           | EF_debug(kind, txt, targs) ->
-              print_debug_info comment print_file_line preg_annot "%esp" oc
+              print_debug_info comment print_file_line preg_annot "sp" oc
                                (P.to_int kind) (extern_atom txt) args
           | EF_inline_asm(txt, sg, clob) ->
               fprintf oc "%s begin inline assembly\n\t" comment;
               print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
               fprintf oc "%s end inline assembly\n" comment
-          | EF_profiling(id, coq_kind) ->
-            print_profiling_logger oc id (Z.to_int coq_kind)          
           | _ ->
               assert false
-          end
+         end
 
-    let print_literal64 oc n lbl =
-      fprintf oc "%a:	.quad	0x%Lx\n" label lbl n
-    let print_literal32 oc n lbl =
-      fprintf oc "%a:	.long	0x%lx\n" label lbl n
+    let get_section_names name =
+      let (text, lit) =
+        match C2C.atom_sections name with
+        | t :: l :: _ -> (t, l)
+        | _    -> (Section_text, Section_literal) in
+      text,lit,Section_jumptable
 
-    let print_jumptable oc jmptbl =
-      let print_jumptable (lbl, tbl) =
-        let print_entry l =
-          if Archi.ptr64 then
-            fprintf oc "	.long	%a - %a\n" label (transl_label l) label lbl
-          else
-            fprintf oc "	.long	%a\n" label (transl_label l)
-        in
-        fprintf oc "%a:" label lbl;
-        List.iter print_entry tbl
-      in
-      if !jumptables <> [] then begin
-        section oc jmptbl;
-        print_align oc 4;
-        List.iter print_jumptable !jumptables;
-        jumptables := []
-      end
+    let print_align oc alignment =
+      fprintf oc "	.balign %d\n" alignment
 
-    let print_align = print_align
+    let print_jumptable oc jmptbl =
+      let print_tbl oc (lbl, tbl) =
+        fprintf oc "%a:\n" label lbl;
+        List.iter
+          (fun l -> fprintf oc "	.long	%a - %a\n"
+                               print_label l label lbl)
+          tbl in
+      if !jumptables <> [] then
+        begin
+          section oc jmptbl;
+          fprintf oc "	.balign 4\n";
+          List.iter (print_tbl oc) !jumptables;
+          jumptables := []
+        end
 
-    let print_comm_symb oc sz name align =
-      if C2C.atom_is_static name
-      then System.print_lcomm_decl oc name sz align
-      else System.print_comm_decl oc name sz align
+    let print_fun_info = elf_print_fun_info
 
-    let name_of_section = name_of_section
+    let print_optional_fun_info _ = ()
 
-    let emit_constants oc lit =
-       if exists_constants () then begin
-         section oc lit;
-         print_align oc 8;
-         Hashtbl.iter (print_literal64 oc) literal64_labels;
-         Hashtbl.iter (print_literal32 oc) literal32_labels;
-         reset_literals ()
-       end
+    let print_var_info = elf_print_var_info
 
-    let cfi_startproc = cfi_startproc
-    let cfi_endproc = cfi_endproc
+    let print_comm_symb oc sz name align =
+      if C2C.atom_is_static name then
+        fprintf oc "	.local	%a\n" symbol name;
+        fprintf oc "	.comm	%a, %s, %d\n"
+        symbol name
+        (Z.to_string sz)
+        align
 
     let print_instructions oc fn =
       current_function_sig := fn.fn_sig;
       List.iter (print_instruction oc) fn.fn_code
 
-    let print_optional_fun_info _ = ()
 
-    let get_section_names name =
-      match C2C.atom_sections name with
-      | [t;l;j] -> (t, l, j)
-      |    _    -> (Section_text, Section_literal, Section_jumptable)
-
-    let print_fun_info = print_fun_info
+(* Data *)
 
-    let print_var_info = print_var_info
+    let address = if Archi.ptr64 then ".quad" else ".long"
 
     let print_prologue oc =
-      need_masks := false;
+      fprintf oc "	.option %s\n" (if Archi.pic_code() then "pic" else "nopic");
       if !Clflags.option_g then begin
         section oc Section_text;
-        if Configuration.system <> "bsd" then cfi_section oc
       end
 
     let print_epilogue oc =
-      if !need_masks then begin
-        section oc Section_literal;
-        print_align oc 16;
-        fprintf oc "%a:	.quad   0x8000000000000000, 0\n"
-          raw_symbol "__negd_mask";
-        fprintf oc "%a:	.quad   0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF\n"
-          raw_symbol "__absd_mask";
-        fprintf oc "%a:	.long   0x80000000, 0, 0, 0\n"
-          raw_symbol "__negs_mask";
-        fprintf oc "%a:	.long   0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
-          raw_symbol "__abss_mask"
-      end;
-      System.print_epilogue oc;
       if !Clflags.option_g then begin
         Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
         section oc Section_text;
       end
 
-    let comment = comment
-
-    let default_falignment = 16
+    let default_falignment = 2
 
-    let label = label
+    let cfi_startproc oc = ()
+    let cfi_endproc oc = ()
 
-    let address = if Archi.ptr64 then ".quad" else ".long"
-
-end
+  end
 
 let sel_target () =
- let module S = (val (match Configuration.system with
-  | "linux" | "bsd" -> (module ELF_System:SYSTEM)
-  | "macos" -> (module MacOS_System:SYSTEM)
-  | "cygwin" -> (module Cygwin_System:SYSTEM)
-  | _ -> invalid_arg ("System " ^ Configuration.system ^ " not supported")  ):SYSTEM) in
- (module Target(S):TARGET)
+  (module Target:TARGET)
diff --git a/verilog/ValueAOp.v b/verilog/ValueAOp.v
index e5584b6a..e0314c6a 100644
--- a/verilog/ValueAOp.v
+++ b/verilog/ValueAOp.v
@@ -13,8 +13,45 @@
 Require Import Coqlib Compopts.
 Require Import AST Integers Floats Values Memory Globalenvs.
 Require Import Op RTL ValueDomain.
+Require Import Zbits Lia.
 
-(** Value analysis for x86_64 operators *)
+(** Value analysis for RISC V operators *)
+
+Definition zero32 := (I Int.zero).
+Definition zero64 := (L Int64.zero).
+
+(** Functions to select a special register (see Op.v) *)
+
+Definition apply_bin_oreg {B} (optR: option oreg) (sem: aval -> aval -> B) (v1 v2 vz: aval): B :=
+  match optR with
+  | None => sem v1 v2
+  | Some X0_L => sem vz v1
+  | Some X0_R => sem v1 vz
+  end.
+
+Definition eval_may_undef (mu: mayundef) (v1 v2: aval): aval :=
+  match mu with
+  | MUint => match v1, v2 with
+             | I _, I _ => v2
+             | _, _ => Ifptr Ptop
+             end
+  | MUlong => match v1, v2 with
+              | L _, I _ => v2
+              | _, _ => Ifptr Ptop
+              end
+  | MUshrx i =>
+      match v1, v2 with
+      | I _, I _ =>
+          if Int.ltu i (Int.repr 31) then v2 else Ifptr Ptop
+      | _, _ => Ifptr Ptop
+      end
+  | MUshrxl i =>
+      match v1, v2 with
+      | L _, L _ =>
+          if Int.ltu i (Int.repr 63) then v2 else Ifptr Ptop
+      | _, _ => Ifptr Ptop
+      end
+  end.
 
 Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
   match cond, vl with
@@ -30,41 +67,65 @@ Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
   | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2)
   | Ccompfs c, v1 :: v2 :: nil => cmpfs_bool c v1 v2
   | Cnotcompfs c, v1 :: v2 :: nil => cnot (cmpfs_bool c v1 v2)
-  | Cmaskzero n, v1 :: nil => maskzero v1 n
-  | Cmasknotzero n, v1 :: nil => cnot (maskzero v1 n)
+  | CEbeqw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmp_bool Ceq) v1 v2 zero32
+  | CEbnew optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmp_bool Cne) v1 v2 zero32
+  | CEbequw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpu_bool Ceq) v1 v2 zero32
+  | CEbneuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpu_bool Cne) v1 v2 zero32
+  | CEbltw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmp_bool Clt) v1 v2 zero32
+  | CEbltuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpu_bool Clt) v1 v2 zero32
+  | CEbgew optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmp_bool Cge) v1 v2 zero32
+  | CEbgeuw optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpu_bool Cge) v1 v2 zero32
+  | CEbeql optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpl_bool Ceq) v1 v2 zero64
+  | CEbnel optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpl_bool Cne) v1 v2 zero64
+  | CEbequl optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmplu_bool Ceq) v1 v2 zero64
+  | CEbneul optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmplu_bool Cne) v1 v2 zero64
+  | CEbltl optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpl_bool Clt) v1 v2 zero64
+  | CEbltul optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmplu_bool Clt) v1 v2 zero64
+  | CEbgel optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmpl_bool Cge) v1 v2 zero64
+  | CEbgeul optR, v1 :: v2 :: nil => apply_bin_oreg optR (cmplu_bool Cge) v1 v2 zero64
   | _, _ => Bnone
   end.
 
-Definition eval_static_addressing_32 (addr: addressing) (vl: list aval): aval :=
+Definition eval_static_addressing (addr: addressing) (vl: list aval): aval :=
   match addr, vl with
-  | Aindexed n, v1::nil => add v1 (I (Int.repr n))
-  | Aindexed2 n, v1::v2::nil => add (add v1 v2) (I (Int.repr n))
-  | Ascaled sc ofs, v1::nil => add (mul v1 (I (Int.repr sc))) (I (Int.repr ofs))
-  | Aindexed2scaled sc ofs, v1::v2::nil => add v1 (add (mul v2 (I (Int.repr sc))) (I (Int.repr ofs)))
+  | Aindexed n, v1::nil => offset_ptr v1 n
   | Aglobal s ofs, nil => Ptr (Gl s ofs)
-  | Abased s ofs, v1::nil => add (Ptr (Gl s ofs)) v1
-  | Abasedscaled sc s ofs, v1::nil => add (Ptr (Gl s ofs)) (mul v1 (I (Int.repr sc)))
-  | Ainstack ofs, nil => Ptr(Stk ofs)
+  | Ainstack ofs, nil => Ptr (Stk ofs)
   | _, _ => Vbot
   end.
 
-Definition eval_static_addressing_64 (addr: addressing) (vl: list aval): aval :=
-  match addr, vl with
-  | Aindexed n, v1::nil => addl v1 (L (Int64.repr n))
-  | Aindexed2 n, v1::v2::nil => addl (addl v1 v2) (L (Int64.repr n))
-  | Ascaled sc ofs, v1::nil => addl (mull v1 (L (Int64.repr sc))) (L (Int64.repr ofs))
-  | Aindexed2scaled sc ofs, v1::v2::nil => addl v1 (addl (mull v2 (L (Int64.repr sc))) (L (Int64.repr ofs)))
-  | Aglobal s ofs, nil => Ptr (Gl s ofs)
-  | Abased s ofs, v1::nil => addl (Ptr (Gl s ofs)) v1
-  | Abasedscaled sc s ofs, v1::nil => addl (Ptr (Gl s ofs)) (mull v1 (L (Int64.repr sc)))
-  | Ainstack ofs, nil => Ptr(Stk ofs)
-  | _, _ => Vbot
+Definition bits_of_single (v : aval) : aval :=
+  match v with
+  | FS f => I (Float32.to_bits f)
+  | _ => ntop1 v
   end.
 
-Definition eval_static_addressing (addr: addressing) (vl: list aval): aval :=
-  if Archi.ptr64
-  then eval_static_addressing_64 addr vl
-  else eval_static_addressing_32 addr vl.
+Definition bits_of_float (v : aval) : aval :=
+  match v with
+  | F f => L (Float.to_bits f)
+  | _ => ntop1 v
+  end.
+
+Definition single_of_bits (v : aval) : aval :=
+  match v with
+  | I f => FS (Float32.of_bits f)
+  | _ => ntop1 v
+  end.
+
+Definition float_of_bits (v : aval) : aval :=
+  match v with
+  | L f => F (Float.of_bits f)
+  | _ => ntop1 v
+  end.
+
+Definition select01_long (vb : aval) (vt : aval) (vf : aval) :=
+  match vb with
+  | I b =>
+    if Int.eq b Int.one then add_undef vt
+    else if Int.eq b Int.zero then add_undef vf
+         else add_undef (vlub vt vf)
+  | _ => add_undef (vlub vt vf)
+  end.
 
 Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   match op, vl with
@@ -73,70 +134,63 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Olongconst n, nil => L n
   | Ofloatconst n, nil => if propagate_float_constants tt then F n else ntop
   | Osingleconst n, nil => if propagate_float_constants tt then FS n else ntop
-  | Oindirectsymbol id, nil => Ifptr (Gl id Ptrofs.zero)
+  | Oaddrsymbol id ofs, nil => Ptr (Gl id ofs)
+  | Oaddrstack ofs, nil => Ptr (Stk ofs)
   | Ocast8signed, v1 :: nil => sign_ext 8 v1
-  | Ocast8unsigned, v1 :: nil => zero_ext 8 v1
   | Ocast16signed, v1 :: nil => sign_ext 16 v1
-  | Ocast16unsigned, v1 :: nil => zero_ext 16 v1
+  | Oadd, v1::v2::nil => add v1 v2
+  | Oaddimm n, v1::nil => add v1 (I n)
   | Oneg, v1::nil => neg v1
   | Osub, v1::v2::nil => sub v1 v2
   | Omul, v1::v2::nil => mul v1 v2
-  | Omulimm n, v1::nil => mul v1 (I n)
   | Omulhs, v1::v2::nil => mulhs v1 v2
   | Omulhu, v1::v2::nil => mulhu v1 v2
-  | Odiv, v1::v2::nil => divs v1 v2
-  | Odivu, v1::v2::nil => divu v1 v2
-  | Omod, v1::v2::nil => mods v1 v2
-  | Omodu, v1::v2::nil => modu v1 v2
+  | Odiv, v1::v2::nil => divs_total v1 v2
+  | Odivu, v1::v2::nil => divu_total v1 v2
+  | Omod, v1::v2::nil => mods_total v1 v2
+  | Omodu, v1::v2::nil => modu_total v1 v2
   | Oand, v1::v2::nil => and v1 v2
   | Oandimm n, v1::nil => and v1 (I n)
   | Oor, v1::v2::nil => or v1 v2
   | Oorimm n, v1::nil => or v1 (I n)
   | Oxor, v1::v2::nil => xor v1 v2
   | Oxorimm n, v1::nil => xor v1 (I n)
-  | Onot, v1::nil => notint v1
   | Oshl, v1::v2::nil => shl v1 v2
   | Oshlimm n, v1::nil => shl v1 (I n)
   | Oshr, v1::v2::nil => shr v1 v2
   | Oshrimm n, v1::nil => shr v1 (I n)
-  | Oshrximm n, v1::nil => shrx v1 (I n)
   | Oshru, v1::v2::nil => shru v1 v2
   | Oshruimm n, v1::nil => shru v1 (I n)
-  | Ororimm n, v1::nil => ror v1 (I n)
-  | Oshldimm n, v1::v2::nil => or (shl v1 (I n)) (shru v2 (I (Int.sub Int.iwordsize n)))
-  | Olea addr, _ => eval_static_addressing_32 addr vl
+  | Oshrximm n, v1::nil => shrx v1 (I n)
   | Omakelong, v1::v2::nil => longofwords v1 v2
   | Olowlong, v1::nil => loword v1
   | Ohighlong, v1::nil => hiword v1
   | Ocast32signed, v1::nil => longofint v1
   | Ocast32unsigned, v1::nil => longofintu v1
-  | Onegl, v1::nil => negl v1
+  | Oaddl, v1::v2::nil => addl v1 v2
   | Oaddlimm n, v1::nil => addl v1 (L n)
+  | Onegl, v1::nil => negl v1
   | Osubl, v1::v2::nil => subl v1 v2
   | Omull, v1::v2::nil => mull v1 v2
-  | Omullimm n, v1::nil => mull v1 (L n)
   | Omullhs, v1::v2::nil => mullhs v1 v2
   | Omullhu, v1::v2::nil => mullhu v1 v2
-  | Odivl, v1::v2::nil => divls v1 v2
-  | Odivlu, v1::v2::nil => divlu v1 v2
-  | Omodl, v1::v2::nil => modls v1 v2
-  | Omodlu, v1::v2::nil => modlu v1 v2
+  | Odivl, v1::v2::nil => divls_total v1 v2
+  | Odivlu, v1::v2::nil => divlu_total v1 v2
+  | Omodl, v1::v2::nil => modls_total v1 v2
+  | Omodlu, v1::v2::nil => modlu_total v1 v2
   | Oandl, v1::v2::nil => andl v1 v2
   | Oandlimm n, v1::nil => andl v1 (L n)
   | Oorl, v1::v2::nil => orl v1 v2
   | Oorlimm n, v1::nil => orl v1 (L n)
   | Oxorl, v1::v2::nil => xorl v1 v2
   | Oxorlimm n, v1::nil => xorl v1 (L n)
-  | Onotl, v1::nil => notl v1
   | Oshll, v1::v2::nil => shll v1 v2
   | Oshllimm n, v1::nil => shll v1 (I n)
   | Oshrl, v1::v2::nil => shrl v1 v2
   | Oshrlimm n, v1::nil => shrl v1 (I n)
-  | Oshrxlimm n, v1::nil => shrxl v1 (I n)
   | Oshrlu, v1::v2::nil => shrlu v1 v2
   | Oshrluimm n, v1::nil => shrlu v1 (I n)
-  | Ororlimm n, v1::nil => rorl v1 (I n)
-  | Oleal addr, _ => eval_static_addressing_64 addr vl
+  | Oshrxlimm n, v1::nil => shrxl v1 (I n)
   | Onegf, v1::nil => negf v1
   | Oabsf, v1::nil => absf v1
   | Oaddf, v1::v2::nil => addf v1 v2
@@ -151,16 +205,64 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Odivfs, v1::v2::nil => divfs v1 v2
   | Osingleoffloat, v1::nil => singleoffloat v1
   | Ofloatofsingle, v1::nil => floatofsingle v1
-  | Ointoffloat, v1::nil => intoffloat v1
+  | Ointoffloat, v1::nil => intoffloat_total v1
+  | Ointuoffloat, v1::nil => intuoffloat_total v1
   | Ofloatofint, v1::nil => floatofint v1
-  | Ointofsingle, v1::nil => intofsingle v1
+  | Ofloatofintu, v1::nil => floatofintu v1
+  | Ointofsingle, v1::nil => intofsingle_total v1
+  | Ointuofsingle, v1::nil => intuofsingle_total v1
   | Osingleofint, v1::nil => singleofint v1
-  | Olongoffloat, v1::nil => longoffloat v1
+  | Osingleofintu, v1::nil => singleofintu v1
+  | Olongoffloat, v1::nil => longoffloat_total v1
+  | Olonguoffloat, v1::nil => longuoffloat_total v1
   | Ofloatoflong, v1::nil => floatoflong v1
-  | Olongofsingle, v1::nil => longofsingle v1
+  | Ofloatoflongu, v1::nil => floatoflongu v1
+  | Olongofsingle, v1::nil => longofsingle_total v1
+  | Olonguofsingle, v1::nil => longuofsingle_total v1
   | Osingleoflong, v1::nil => singleoflong v1
+  | Osingleoflongu, v1::nil => singleoflongu v1
   | Ocmp c, _ => of_optbool (eval_static_condition c vl)
-  | Osel c ty, v1::v2::vl => select (eval_static_condition c vl) v1 v2
+  | OEseqw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmp_bool Ceq) v1 v2 zero32)
+  | OEsnew optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmp_bool Cne) v1 v2 zero32)
+  | OEsequw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpu_bool Ceq) v1 v2 zero32)
+  | OEsneuw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpu_bool Cne) v1 v2 zero32)
+  | OEsltw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmp_bool Clt) v1 v2 zero32)
+  | OEsltuw optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpu_bool Clt) v1 v2 zero32)
+  | OEsltiw n, v1::nil => of_optbool (cmp_bool Clt v1 (I n))
+  | OEsltiuw n, v1::nil => of_optbool (cmpu_bool Clt v1 (I n))
+  | OExoriw n, v1::nil => xor v1 (I n)
+  | OEluiw n, nil => shl (I n) (I (Int.repr 12))
+  | OEaddiw optR n, nil => apply_bin_oreg optR add (I n) (Ifptr Ptop) zero32
+  | OEaddiw optR n, v1::nil => apply_bin_oreg optR add v1 (I n) (Ifptr Ptop)
+  | OEandiw n, v1::nil => and (I n) v1
+  | OEoriw n, v1::nil => or (I n) v1
+  | OEseql optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpl_bool Ceq) v1 v2 zero64)
+  | OEsnel optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpl_bool Cne) v1 v2 zero64)
+  | OEsequl optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmplu_bool Ceq) v1 v2 zero64)
+  | OEsneul optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmplu_bool Cne) v1 v2 zero64)
+  | OEsltl optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmpl_bool Clt) v1 v2 zero64)
+  | OEsltul optR, v1::v2::nil => of_optbool (apply_bin_oreg optR (cmplu_bool Clt) v1 v2 zero64)
+  | OEsltil n, v1::nil => of_optbool (cmpl_bool Clt v1 (L n))
+  | OEsltiul n, v1::nil => of_optbool (cmplu_bool Clt v1 (L n))
+  | OEandil n, v1::nil => andl (L n) v1
+  | OEoril n, v1::nil => orl (L n) v1
+  | OExoril n, v1::nil => xorl v1 (L n)
+  | OEluil n, nil => sign_ext 32 (shll (L n) (L (Int64.repr 12)))
+  | OEaddil optR n, nil => apply_bin_oreg optR addl (L n) (Ifptr Ptop) zero64
+  | OEaddil optR n, v1::nil => apply_bin_oreg optR addl v1 (L n) (Ifptr Ptop)
+  | OEloadli n, nil => L (n)
+  | OEmayundef mu, v1 :: v2 :: nil => eval_may_undef mu v1 v2
+  | OEfeqd, v1::v2::nil => of_optbool (cmpf_bool Ceq v1 v2)
+  | OEfltd, v1::v2::nil => of_optbool (cmpf_bool Clt v1 v2)
+  | OEfled, v1::v2::nil => of_optbool (cmpf_bool Cle v1 v2)
+  | OEfeqs, v1::v2::nil => of_optbool (cmpfs_bool Ceq v1 v2)
+  | OEflts, v1::v2::nil => of_optbool (cmpfs_bool Clt v1 v2)
+  | OEfles, v1::v2::nil => of_optbool (cmpfs_bool Cle v1 v2)
+  | Obits_of_single, v1::nil => bits_of_single v1
+  | Obits_of_float, v1::nil => bits_of_float v1
+  | Osingle_of_bits, v1::nil => single_of_bits v1
+  | Ofloat_of_bits, v1::nil => float_of_bits v1
+  | Oselectl, vb::vt::vf::nil => select01_long vb vt vf
   | _, _ => Vbot
   end.
 
@@ -172,6 +274,75 @@ Hypothesis GENV: genv_match bc ge.
 Variable sp: block.
 Hypothesis STACK: bc sp = BCstack.
 
+Lemma bits_of_single_sound:
+  forall v x, vmatch bc v x -> vmatch bc (ExtValues.bits_of_single v) (bits_of_single x).
+Proof.
+  unfold ExtValues.bits_of_single; intros. inv H; cbn; constructor.
+Qed.
+
+Lemma bits_of_float_sound:
+  forall v x, vmatch bc v x -> vmatch bc (ExtValues.bits_of_float v) (bits_of_float x).
+Proof.
+  unfold ExtValues.bits_of_float; intros. inv H; cbn; constructor.
+Qed.
+
+Lemma single_of_bits_sound:
+  forall v x, vmatch bc v x -> vmatch bc (ExtValues.single_of_bits v) (single_of_bits x).
+Proof.
+  unfold ExtValues.bits_of_single; intros. inv H; cbn; constructor.
+Qed.
+
+Lemma float_of_bits_sound:
+  forall v x, vmatch bc v x -> vmatch bc (ExtValues.float_of_bits v) (float_of_bits x).
+Proof.
+  unfold ExtValues.bits_of_float; intros. inv H; cbn; constructor.
+Qed.
+
+  
+Lemma select01_long_sound:
+  forall vb xb vt xt vf xf
+         (MATCH_b : vmatch bc vb xb)
+         (MATCH_t : vmatch bc vt xt)
+         (MATCH_f : vmatch bc vf xf),
+    vmatch bc (Val.normalize (ExtValues.select01_long vb vt vf) Tlong)
+              (select01_long xb xt xf).
+Proof.
+  intros.
+  inv MATCH_b; cbn; try apply add_undef_undef.
+  - destruct (Int.eq i Int.one). { apply add_undef_normalize; trivial. }
+    destruct (Int.eq i Int.zero). { apply add_undef_normalize; trivial. }
+    cbn. apply add_undef_undef.
+  - destruct (Int.eq i Int.one).
+    { apply add_undef_normalize.
+      apply vmatch_lub_l.
+      trivial. }
+    destruct (Int.eq i Int.zero).
+    { apply add_undef_normalize.
+      apply vmatch_lub_r.
+      trivial. }
+    cbn. apply add_undef_undef.
+  - destruct (Int.eq i Int.one).
+    { apply add_undef_normalize.
+      apply vmatch_lub_l.
+      trivial. }
+    destruct (Int.eq i Int.zero).
+    { apply add_undef_normalize.
+      apply vmatch_lub_r.
+      trivial. }
+    cbn. apply add_undef_undef.
+  - destruct (Int.eq i Int.one).
+    { apply add_undef_normalize.
+      apply vmatch_lub_l.
+      trivial. }
+    destruct (Int.eq i Int.zero).
+    { apply add_undef_normalize.
+      apply vmatch_lub_r.
+      trivial. }
+    cbn. apply add_undef_undef.
+Qed.
+
+Hint Resolve bits_of_single_sound bits_of_float_sound single_of_bits_sound float_of_bits_sound select01_long_sound : va.
+
 Theorem eval_static_condition_sound:
   forall cond vargs m aargs,
   list_forall2 (vmatch bc) vargs aargs ->
@@ -183,7 +354,9 @@ Proof.
   destruct cond; simpl; eauto with va.
   inv H2.
   destruct cond; simpl; eauto with va.
-  destruct cond; auto with va.
+  17: destruct cond; simpl; eauto with va.
+  all: destruct optR as [[]|]; unfold apply_bin_oreg, Op.apply_bin_oreg;
+       unfold zero32, Op.zero32, zero64, Op.zero64; eauto with va.
 Qed.
 
 Lemma symbol_address_sound:
@@ -214,36 +387,79 @@ Ltac InvHyps :=
   | _ => idtac
   end.
 
-Theorem eval_static_addressing_32_sound:
+Theorem eval_static_addressing_sound:
   forall addr vargs vres aargs,
-  eval_addressing32 ge (Vptr sp Ptrofs.zero) addr vargs = Some vres ->
+  eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = Some vres ->
   list_forall2 (vmatch bc) vargs aargs ->
-  vmatch bc vres (eval_static_addressing_32 addr aargs).
+  vmatch bc vres (eval_static_addressing addr aargs).
 Proof.
-  unfold eval_addressing32, eval_static_addressing_32; intros;
+  unfold eval_addressing, eval_static_addressing; intros;
   destruct addr; InvHyps; eauto with va.
   rewrite Ptrofs.add_zero_l; eauto with va.
 Qed.
 
-Theorem eval_static_addressing_64_sound:
-  forall addr vargs vres aargs,
-  eval_addressing64 ge (Vptr sp Ptrofs.zero) addr vargs = Some vres ->
-  list_forall2 (vmatch bc) vargs aargs ->
-  vmatch bc vres (eval_static_addressing_64 addr aargs).
+Lemma of_optbool_maketotal_sound:
+  forall ob ab, cmatch ob ab -> vmatch bc (Val.maketotal (option_map Val.of_bool ob)) (of_optbool ab).
 Proof.
-  unfold eval_addressing64, eval_static_addressing_64; intros;
-  destruct addr; InvHyps; eauto with va.
-  rewrite Ptrofs.add_zero_l; eauto with va.
+  intros.
+  assert (DEFAULT: vmatch bc (Val.maketotal (option_map Val.of_bool ob)) (Uns Pbot 1)).
+  {
+    destruct ob; simpl; auto with va.
+    destruct b; constructor; try lia.
+    change 1 with (usize Int.one). apply is_uns_usize.
+    red; intros. apply Int.bits_zero.
+  }
+  inv H; auto. simpl. destruct b; constructor.
 Qed.
 
-Theorem eval_static_addressing_sound:
-  forall addr vargs vres aargs,
-  eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = Some vres ->
-  list_forall2 (vmatch bc) vargs aargs ->
-  vmatch bc vres (eval_static_addressing addr aargs).
+Lemma eval_cmpu_sound c: forall a1 b1 a0 b0 optR m,
+  c = Ceq \/ c = Cne \/ c = Clt->
+  vmatch bc a1 b1 ->
+  vmatch bc a0 b0 ->
+  vmatch bc (Op.apply_bin_oreg optR (Val.cmpu (Mem.valid_pointer m) c) a1 a0 Op.zero32)
+  (of_optbool (apply_bin_oreg optR (cmpu_bool c) b1 b0 zero32)).
 Proof.
-  unfold eval_addressing, eval_static_addressing; intros.
-  destruct Archi.ptr64; eauto using eval_static_addressing_32_sound, eval_static_addressing_64_sound.
+  intros.
+  destruct optR as [[]|]; unfold Op.apply_bin_oreg, apply_bin_oreg;
+  apply of_optbool_sound; unfold Op.zero32, zero32; eauto with va.
+Qed.
+
+Lemma eval_cmplu_sound c: forall a1 b1 a0 b0 optR m,
+  c = Ceq \/ c = Cne \/ c = Clt->
+  vmatch bc a1 b1 ->
+  vmatch bc a0 b0 ->
+  vmatch bc
+    (Val.maketotal
+       (Op.apply_bin_oreg optR (Val.cmplu (Mem.valid_pointer m) c) a1 a0
+          Op.zero64))
+    (of_optbool (apply_bin_oreg optR (cmplu_bool c) b1 b0 zero64)).
+Proof.
+  intros.
+  destruct optR as [[]|]; unfold Op.apply_bin_oreg, apply_bin_oreg;
+  apply of_optbool_maketotal_sound; unfold Op.zero64, zero64; eauto with va.
+Qed.
+
+Lemma eval_cmp_sound: forall a1 b1 a0 b0 optR cmp,
+  vmatch bc a1 b1 ->
+  vmatch bc a0 b0 ->
+  vmatch bc (Op.apply_bin_oreg optR (Val.cmp cmp) a1 a0 Op.zero32)
+  (of_optbool (apply_bin_oreg optR (cmp_bool cmp) b1 b0 zero32)).
+Proof.
+  intros.
+  destruct optR as [[]|]; unfold Op.apply_bin_oreg, apply_bin_oreg;
+  apply of_optbool_sound; unfold Op.zero32, zero32; eauto with va.
+Qed.
+
+Lemma eval_cmpl_sound: forall a1 b1 a0 b0 optR cmp,
+  vmatch bc a1 b1 ->
+  vmatch bc a0 b0 ->
+  vmatch bc
+    (Val.maketotal (Op.apply_bin_oreg optR (Val.cmpl cmp) a1 a0 Op.zero64))
+    (of_optbool (apply_bin_oreg optR (cmpl_bool cmp) b1 b0 zero64)).
+Proof.
+  intros.
+  destruct optR as [[]|]; unfold Op.apply_bin_oreg, apply_bin_oreg;
+  apply of_optbool_maketotal_sound; unfold Op.zero64, zero64; eauto with va.
 Qed.
 
 Theorem eval_static_operation_sound:
@@ -256,30 +472,42 @@ Proof.
   destruct op; InvHyps; eauto with va.
   destruct (propagate_float_constants tt); constructor.
   destruct (propagate_float_constants tt); constructor.
-  eapply eval_static_addressing_32_sound; eauto.
-  eapply eval_static_addressing_64_sound; eauto.
+  rewrite Ptrofs.add_zero_l; eauto with va.
   apply of_optbool_sound. eapply eval_static_condition_sound; eauto.
-  apply select_sound; auto. eapply eval_static_condition_sound; eauto.
-Qed.
-(*
-Theorem eval_static_addressing_sound_none:
-  forall addr vargs aargs,
-  eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = None ->
-  list_forall2 (vmatch bc) vargs aargs ->
-  (eval_static_addressing addr aargs) = Vbot.
-Proof.
-  unfold eval_addressing, eval_static_addressing.
-  intros until aargs. intros Heval_none Hlist.
-  destruct (Archi.ptr64).
-  inv Hlist.
-  destruct addr; trivial; discriminate.
-  inv H0.
-  destruct addr; trivial; try discriminate. simpl in *.
-  inv H2.
-  destruct addr; trivial; discriminate.
-  inv H3;
-  destruct addr; trivial; discriminate.
+
+  3,4,6: apply eval_cmpu_sound; auto.
+  1,2,3: apply eval_cmp_sound; auto.
+  unfold Val.cmp; apply of_optbool_sound; eauto with va.
+  unfold Val.cmpu; apply of_optbool_sound; eauto with va.
+  
+  { destruct optR as [[]|]; simpl; eauto with va. }
+  { destruct optR as [[]|];
+    unfold apply_bin_oreg, Op.apply_bin_oreg; eauto with va. }
+  { fold (Val.and (Vint n) a1); eauto with va. }
+  { fold (Val.or (Vint n) a1); eauto with va. }
+  { simpl; try destruct (Int.ltu _ _); eauto with va; unfold ntop1;
+  try apply vmatch_ifptr_undef. }
+  9: { destruct optR as [[]|]; simpl; eauto with va. }
+  9: { destruct optR as [[]|];
+    unfold apply_bin_oreg, Op.apply_bin_oreg; eauto with va. }
+  9: { fold (Val.andl (Vlong n) a1); eauto with va. }
+  9: { fold (Val.orl (Vlong n) a1); eauto with va. }
+  9: { simpl; unfold ntop1, sign_ext, Int64.sign_ext, sgn; simpl;
+  apply vmatch_ifptr_l. }
+  
+  1,10: simpl; eauto with va.
+  10:
+    unfold Op.eval_may_undef, eval_may_undef; destruct mu;
+    inv H1; inv H0; eauto with va;
+    try destruct (Int.ltu _ _); simpl;
+    try eapply vmatch_ifptr_p, pmatch_top'; eauto with va.
+  
+  4,5,7: apply eval_cmplu_sound; auto.
+  1,3,4: apply eval_cmpl_sound; auto.
+  2: { unfold Val.cmpl; apply of_optbool_maketotal_sound; eauto with va. }
+  2: { unfold Val.cmplu; apply of_optbool_maketotal_sound; eauto with va. }
+  all: unfold Val.cmpf; apply of_optbool_sound; eauto with va.
 Qed.
-*)
+
 End SOUNDNESS.
 
diff --git a/verilog/extractionMachdep.v b/verilog/extractionMachdep.v
index 26a3f0a7..890735ba 100644
--- a/verilog/extractionMachdep.v
+++ b/verilog/extractionMachdep.v
@@ -14,21 +14,15 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(* Additional extraction directives specific to the x86-64 port *)
+(* Additional extraction directives specific to the RISC-V port *)
 
-Require Archi SelectOp.
+Require Archi Asm.
 
 (* Archi *)
 
-Extract Constant Archi.win64 =>
-  "match Configuration.system with
-    | ""cygwin"" when ptr64 -> true
-    | _ -> false".
+Extract Constant Archi.ptr64 => " Configuration.model = ""64"" ".
+Extract Constant Archi.pic_code => "fun () -> false".  (* for the time being *)
 
-(* SelectOp *)
-
-Extract Constant SelectOp.symbol_is_external =>
-  "match Configuration.system with
-    | ""macos"" -> C2C.atom_is_extern
-    | ""cygwin"" when Archi.ptr64 -> C2C.atom_is_extern
-    | _ -> (fun _ -> false)".
+(* Asm *)
+Extract Constant Asm.low_half => "fun _ _ _ -> assert false".
+Extract Constant Asm.high_half => "fun _ _ _ -> assert false".
author	Yann Herklotz <git@yannherklotz.com>	2022-03-14 00:13:04 +0000
committer	Yann Herklotz <git@yannherklotz.com>	2022-03-14 00:13:04 +0000
commit	20096af8d044ccea01360822834c748e17acd572 (patch)
tree	8cd763523cf298bde2ee014d14ec3a7ee2db09e4
parent	9c72ffe762dc2f90109d5991f74ee0ee4e9a8ec3 (diff)
download	compcert-kvx-20096af8d044ccea01360822834c748e17acd572.tar.gz compcert-kvx-20096af8d044ccea01360822834c748e17acd572.zip