aboutsummaryrefslogtreecommitdiffstats
path: root/kvx
diff options
context:
space:
mode:
Diffstat (limited to 'kvx')
-rw-r--r--kvx/Archi.v80
-rw-r--r--kvx/Asm.v758
-rw-r--r--kvx/AsmToJSON.ml23
-rw-r--r--kvx/Asmaux.v19
-rw-r--r--kvx/Asmblock.v394
-rw-r--r--kvx/Asmblockdeps.v1845
-rw-r--r--kvx/Asmblockgen.v1211
-rw-r--r--kvx/Asmblockgenproof.v1808
-rw-r--r--kvx/Asmblockgenproof0.v982
-rw-r--r--kvx/Asmblockgenproof1.v2500
-rw-r--r--kvx/Asmblockprops.v357
-rw-r--r--kvx/Asmexpand.ml642
-rw-r--r--kvx/Asmgen.v41
-rw-r--r--kvx/Asmgenproof.v96
-rw-r--r--kvx/Asmvliw.v1729
l---------kvx/BTL_SEsimplify.v1
-rw-r--r--kvx/Builtins1.v61
-rw-r--r--kvx/CBuiltins.ml145
-rw-r--r--kvx/CSE2deps.v35
-rw-r--r--kvx/CSE2depsproof.v146
-rw-r--r--kvx/Chunks.v36
-rw-r--r--kvx/CombineOp.v141
-rw-r--r--kvx/CombineOpproof.v176
-rw-r--r--kvx/ConstpropOp.vp312
-rw-r--r--kvx/ConstpropOpproof.v749
-rw-r--r--kvx/Conventions1.v431
-rw-r--r--kvx/DecBoolOps.v30
-rw-r--r--kvx/DuplicateOpcodeHeuristic.ml41
l---------kvx/ExpansionOracle.ml1
-rw-r--r--kvx/ExtFloats.v54
-rw-r--r--kvx/ExtValues.v756
-rw-r--r--kvx/Machregs.v245
-rw-r--r--kvx/Machregsaux.ml35
-rw-r--r--kvx/Machregsaux.mli20
-rw-r--r--kvx/NeedOp.v415
-rw-r--r--kvx/Op.v2014
-rw-r--r--kvx/OpWeights.ml115
-rw-r--r--kvx/Peephole.v158
-rw-r--r--kvx/PostpassScheduling.v526
-rw-r--r--kvx/PostpassSchedulingOracle.ml1036
-rw-r--r--kvx/PostpassSchedulingproof.v690
l---------kvx/PrepassSchedulingOracle.ml1
l---------kvx/PrepassSchedulingOracleDeps.ml1
-rw-r--r--kvx/PrintOp.ml229
-rw-r--r--kvx/SelectLong.vp463
-rw-r--r--kvx/SelectLongproof.v951
-rw-r--r--kvx/SelectOp.vp758
-rw-r--r--kvx/SelectOpproof.v1901
-rw-r--r--kvx/Stacklayout.v151
-rw-r--r--kvx/TargetPrinter.ml892
-rw-r--r--kvx/ValueAOp.v599
-rwxr-xr-xkvx/bitmasks.py12
-rw-r--r--kvx/extractionMachdep.v32
-rw-r--r--kvx/unittest/Makefile13
-rw-r--r--kvx/unittest/postpass_test.ml12
55 files changed, 26869 insertions, 0 deletions
diff --git a/kvx/Archi.v b/kvx/Archi.v
new file mode 100644
index 00000000..6d59a3d1
--- /dev/null
+++ b/kvx/Archi.v
@@ -0,0 +1,80 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Architecture-dependent parameters for MPPA KVX. Mostly copied from the Risc-V backend *)
+
+Require Import ZArith List.
+Require Import Binary Bits.
+
+Definition ptr64 := true.
+
+Definition big_endian := false.
+
+Definition align_int64 := 8%Z.
+Definition align_float64 := 8%Z.
+
+Definition splitlong := false.
+
+Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
+Proof.
+ unfold splitlong. congruence.
+Qed.
+
+(** FIXME - Check the properties below *)
+
+(** Section 7.3: "Except when otherwise stated, if the result of a
+ floating-point operation is NaN, it is the canonical NaN. The
+ canonical NaN has a positive sign and all significand bits clear
+ except the MSB, a.k.a. the quiet bit."
+ We need to extend the [choose_binop_pl] functions to account for
+ this case. *)
+
+Definition default_nan_64 := (false, iter_nat 51 _ xO xH).
+Definition default_nan_32 := (false, iter_nat 22 _ xO xH).
+
+(* Always choose the first NaN argument, if any *)
+
+Definition choose_nan_64 (l: list (bool * positive)) : bool * positive :=
+ match l with nil => default_nan_64 | n :: _ => n end.
+
+Definition choose_nan_32 (l: list (bool * positive)) : bool * positive :=
+ match l with nil => default_nan_32 | n :: _ => n end.
+
+Definition fpu_returns_default_qNaN := false.
+
+Lemma choose_nan_64_idem: forall n,
+ choose_nan_64 (n :: n :: nil) = choose_nan_64 (n :: nil).
+Proof. auto. Qed.
+
+Lemma choose_nan_32_idem: forall n,
+ choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
+Proof. auto. Qed.
+
+Definition fma_order {A: Type} (x y z: A) := (x, z, y).
+
+Definition fma_invalid_mul_is_nan := false.
+Definition float_of_single_preserves_sNaN := false.
+
+Global Opaque ptr64 big_endian splitlong
+ default_nan_64 choose_nan_64
+ default_nan_32 choose_nan_32
+ fma_order fma_invalid_mul_is_nan
+ float_of_single_preserves_sNaN.
+
+(** Whether to generate position-independent code or not *)
+
+Parameter pic_code: unit -> bool.
+
+Definition has_notrap_loads := true.
diff --git a/kvx/Asm.v b/kvx/Asm.v
new file mode 100644
index 00000000..fd20316c
--- /dev/null
+++ b/kvx/Asm.v
@@ -0,0 +1,758 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Abstract syntax for KVX textual assembly language.
+
+ Each emittable instruction is defined here. ';;' is also defined as an instruction.
+ The goal of this representation is to stay compatible with the rest of the generic backend of CompCert
+ We define [unfold : list bblock -> list instruction]
+ An Asm function is then defined as : [fn_sig], [fn_blocks], [fn_code], and a proof of [unfold fn_blocks = fn_code]
+ [fn_code] has no semantic. Instead, the semantic of Asm is given by using the AsmVLIW semantic on [fn_blocks] *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import ExtValues.
+Require Import Memory.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Locations.
+Require Stacklayout.
+Require Import Conventions.
+Require Export Asmvliw.
+Require Import Linking.
+Require Import Errors.
+
+(** Definitions for OCaml code *)
+Definition label := positive.
+
+(* Necessary definition for Asmexpandaux.mli *)
+Definition preg := preg.
+
+Inductive addressing : Type :=
+ | AOff (ofs: offset)
+ | AReg (ro: ireg)
+ | ARegXS (ro: ireg)
+.
+
+(** * Syntax *)
+Inductive instruction : Type :=
+ (** pseudo instructions *)
+ | Pallocframe (sz: Z) (pos: ptrofs) (**r allocate new stack frame *)
+ | Pfreeframe (sz: Z) (pos: ptrofs) (**r deallocate stack frame and restore previous frame *)
+ | Plabel (lbl: label) (**r define a code label *)
+ | Ploadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (**r load the address of a symbol *)
+ | Pbuiltin: external_function -> list (builtin_arg preg)
+ -> builtin_res preg -> instruction (**r built-in function (pseudo) *)
+ | Psemi (**r semi colon separating bundles *)
+ | Pnop (**r instruction that does nothing *)
+
+ (** Control flow instructions *)
+ | Pget (rd: ireg) (rs: preg) (**r get system register *)
+ | Pset (rd: preg) (rs: ireg) (**r set system register *)
+ | Pret (**r return *)
+ | Pcall (l: label) (**r function call *)
+ | Picall (rs: ireg) (**r function call on register *)
+ (* Pgoto is for tailcalls, Pj_l is for jumping to a particular label *)
+ | Pgoto (l: label) (**r goto *)
+ | Pigoto (rs: ireg) (**r goto from register *)
+ | Pj_l (l: label) (**r jump to label *)
+ | Pcb (bt: btest) (r: ireg) (l: label) (**r branch based on btest *)
+ | Pcbu (bt: btest) (r: ireg) (l: label) (**r branch based on btest with unsigned semantics *)
+ | Pjumptable (r: ireg) (labels: list label)
+
+ (* For builtins *)
+ | Ploopdo (count: ireg) (loopend: label)
+ | Pgetn (n: int) (dst: ireg)
+ | Psetn (n: int) (src: ireg)
+ | Pwfxl (n: int) (src: ireg)
+ | Pwfxm (n: int) (src: ireg)
+ | Pldu (dst: ireg) (addr: ireg)
+ | Plbzu (dst: ireg) (addr: ireg)
+ | Plhzu (dst: ireg) (addr: ireg)
+ | Plwzu (dst: ireg) (addr: ireg)
+ | Pawait
+ | Psleep
+ | Pstop
+ | Pbarrier
+ | Pfence
+ | Pdinval
+ | Pdinvall (addr: ireg)
+ | Pdtouchl (addr: ireg)
+ | Piinval
+ | Piinvals (addr: ireg)
+ | Pitouchl (addr: ireg)
+ | Pdzerol (addr: ireg)
+(*| Pafaddd (addr: ireg) (incr_res: ireg)
+ | Pafaddw (addr: ireg) (incr_res: ireg) *) (* see #157 *)
+ | Palclrd (dst: ireg) (addr: ireg)
+ | Palclrw (dst: ireg) (addr: ireg)
+ | Pclzll (rd rs: ireg)
+ | Pclzw (rd rs: ireg)
+ | Pctzll (rd rs: ireg)
+ | Pctzw (rd rs: ireg)
+ | Pstsud (rd rs1 rs2: ireg)
+
+ (** Loads *)
+ | Plb (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load byte *)
+ | Plbu (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load byte unsigned *)
+ | Plh (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load half word *)
+ | Plhu (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load half word unsigned *)
+ | Plw (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load int32 *)
+ | Plw_a (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load any32 *)
+ | Pld (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load int64 *)
+ | Pld_a (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load any64 *)
+ | Pfls (trap: trapping_mode) (rd: freg) (ra: ireg) (ofs: addressing) (**r load float *)
+ | Pfld (trap: trapping_mode) (rd: freg) (ra: ireg) (ofs: addressing) (**r load 64-bit float *)
+ | Plq (rs: gpreg_q) (ra: ireg) (ofs: addressing) (**r load 2*64-bit *)
+ | Plo (rs: gpreg_o) (ra: ireg) (ofs: addressing) (**r load 4*64-bit *)
+
+ (** Stores *)
+ | Psb (rs: ireg) (ra: ireg) (ofs: addressing) (**r store byte *)
+ | Psh (rs: ireg) (ra: ireg) (ofs: addressing) (**r store half byte *)
+ | Psw (rs: ireg) (ra: ireg) (ofs: addressing) (**r store int32 *)
+ | Psw_a (rs: ireg) (ra: ireg) (ofs: addressing) (**r store any32 *)
+ | Psd (rs: ireg) (ra: ireg) (ofs: addressing) (**r store int64 *)
+ | Psd_a (rs: ireg) (ra: ireg) (ofs: addressing) (**r store any64 *)
+ | Pfss (rs: freg) (ra: ireg) (ofs: addressing) (**r store float *)
+ | Pfsd (rs: freg) (ra: ireg) (ofs: addressing) (**r store 64-bit float *)
+
+ | Psq (rs: gpreg_q) (ra: ireg) (ofs: addressing) (**r store 2*64-bit *)
+ | Pso (rs: gpreg_o) (ra: ireg) (ofs: addressing) (**r store 2*64-bit *)
+
+ (** Arith RR *)
+ | Pmv (rd rs: ireg) (**r register move *)
+ | Pnegw (rd rs: ireg) (**r negate word *)
+ | Pnegl (rd rs: ireg) (**r negate long *)
+ | Pcvtl2w (rd rs: ireg) (**r Convert Long to Word *)
+ | Psxwd (rd rs: ireg) (**r Sign Extend Word to Double Word *)
+ | Pzxwd (rd rs: ireg) (**r Zero Extend Word to Double Word *)
+
+ | Pextfz (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields unsigned *)
+ | Pextfs (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields signed *)
+
+ | Pextfzl (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields unsigned *)
+ | Pextfsl (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields signed *)
+
+ | Pinsf (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r insert bitfield *)
+ | Pinsfl (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r insert bitfield *)
+
+ | Pfabsd (rd rs: ireg) (**r float absolute double *)
+ | Pfabsw (rd rs: ireg) (**r float absolute word *)
+ | Pfnegd (rd rs: ireg) (**r float negate double *)
+ | Pfnegw (rd rs: ireg) (**r float negate word *)
+ | Pfnarrowdw (rd rs: ireg) (**r float narrow 64 -> 32 bits *)
+ | Pfwidenlwd (rd rs: ireg) (**r float widen 32 -> 64 bits *)
+ | Pfloatwrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (32 -> 32) *)
+ | Pfloatuwrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (u32 -> 32) *)
+ | Pfloatudrnsz (rd rs: ireg) (**r Floating Point Conversion from unsigned integer (64 bits) *)
+ | Pfloatdrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (64 bits) *)
+ | Pfixedwrzz (rd rs: ireg) (**r Integer conversion from floating point *)
+ | Pfixeduwrzz (rd rs: ireg) (**r Integer conversion from floating point (f32 -> 32 bits unsigned *)
+ | Pfixeddrzz (rd rs: ireg) (**r Integer conversion from floating point (i64 -> 64 bits) *)
+ | Pfixeddrzz_i32 (rd rs: ireg) (**r Integer conversion from floating point (i32 -> f64) *)
+ | Pfixedudrzz (rd rs: ireg) (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *)
+ | Pfixedudrzz_i32 (rd rs: ireg) (**r unsigned Integer conversion from floating point (u32 -> 64 bits) *)
+
+ (** Arith RI32 *)
+ | Pmake (rd: ireg) (imm: int) (**r load immediate *)
+
+ (** Arith RI64 *)
+ | Pmakel (rd: ireg) (imm: int64) (**r load immediate long *)
+
+ (** Arith RF32 *)
+ | Pmakefs (rd: ireg) (imm: float32)
+
+ (** Arith RF64 *)
+ | Pmakef (rd: ireg) (imm: float)
+
+ (** Arith RRR *)
+ | Pcompw (it: itest) (rd rs1 rs2: ireg) (**r comparison word *)
+ | Pcompl (it: itest) (rd rs1 rs2: ireg) (**r comparison long *)
+ | Pfcompw (ft: ftest) (rd rs1 rs2: ireg) (**r comparison float *)
+ | Pfcompl (ft: ftest) (rd rs1 rs2: ireg) (**r comparison float64 *)
+
+ | Paddw (rd rs1 rs2: ireg) (**r add word *)
+ | Paddxw (shift : shift1_4) (rd rs1 rs2: ireg) (**r add word *)
+ | Psubw (rd rs1 rs2: ireg) (**r sub word *)
+ | Prevsubxw (shift : shift1_4) (rd rs1 rs2: ireg) (**r add word *)
+ | Pmulw (rd rs1 rs2: ireg) (**r mul word *)
+ | Pandw (rd rs1 rs2: ireg) (**r and word *)
+ | Pnandw (rd rs1 rs2: ireg) (**r nand word *)
+ | Porw (rd rs1 rs2: ireg) (**r or word *)
+ | Pnorw (rd rs1 rs2: ireg) (**r nor word *)
+ | Pxorw (rd rs1 rs2: ireg) (**r xor word *)
+ | Pnxorw (rd rs1 rs2: ireg) (**r xor word *)
+ | Pandnw (rd rs1 rs2: ireg) (**r andn word *)
+ | Pornw (rd rs1 rs2: ireg) (**r orn word *)
+ | Psraw (rd rs1 rs2: ireg) (**r shift right arithmetic word *)
+ | Psrxw (rd rs1 rs2: ireg) (**r shift right arithmetic word round to 0*)
+ | Psrlw (rd rs1 rs2: ireg) (**r shift right logical word *)
+ | Psllw (rd rs1 rs2: ireg) (**r shift left logical word *)
+ | Pmaddw (rd rs1 rs2: ireg) (**r multiply-add words *)
+ | Pmsubw (rd rs1 rs2: ireg) (**r multiply-add words *)
+ | Pfmaddfw (rd rs1 rs2: ireg) (**r float fused multiply-add words *)
+ | Pfmsubfw (rd rs1 rs2: ireg) (**r float fused multiply-subtract words *)
+ | Pfmaddfl (rd rs1 rs2: ireg) (**r float fused multiply-add longs *)
+ | Pfmsubfl (rd rs1 rs2: ireg) (**r float fused multiply-subtract longs *)
+
+ | Paddl (rd rs1 rs2: ireg) (**r add long *)
+ | Paddxl (shift : shift1_4) (rd rs1 rs2: ireg) (**r add long shift *)
+ | Psubl (rd rs1 rs2: ireg) (**r sub long *)
+ | Prevsubxl (shift : shift1_4) (rd rs1 rs2: ireg) (**r sub long shift *)
+ | Pandl (rd rs1 rs2: ireg) (**r and long *)
+ | Pnandl (rd rs1 rs2: ireg) (**r nand long *)
+ | Porl (rd rs1 rs2: ireg) (**r or long *)
+ | Pnorl (rd rs1 rs2: ireg) (**r nor long *)
+ | Pxorl (rd rs1 rs2: ireg) (**r xor long *)
+ | Pnxorl (rd rs1 rs2: ireg) (**r nxor long *)
+ | Pandnl (rd rs1 rs2: ireg) (**r andn long *)
+ | Pornl (rd rs1 rs2: ireg) (**r orn long *)
+ | Pmull (rd rs1 rs2: ireg) (**r mul long (low part) *)
+ | Pslll (rd rs1 rs2: ireg) (**r shift left logical long *)
+ | Psrll (rd rs1 rs2: ireg) (**r shift right logical long *)
+ | Psral (rd rs1 rs2: ireg) (**r shift right arithmetic long *)
+ | Psrxl (rd rs1 rs2: ireg) (**r shift right arithmetic long round to 0*)
+ | Pmaddl (rd rs1 rs2: ireg) (**r multiply-add long *)
+ | Pmsubl (rd rs1 rs2: ireg) (**r multiply-add long *)
+
+ | Pfaddd (rd rs1 rs2: ireg) (**r Float addition double *)
+ | Pfaddw (rd rs1 rs2: ireg) (**r Float addition word *)
+ | Pfsbfd (rd rs1 rs2: ireg) (**r Float sub double *)
+ | Pfsbfw (rd rs1 rs2: ireg) (**r Float sub word *)
+ | Pfmuld (rd rs1 rs2: ireg) (**r Float mul double *)
+ | Pfmulw (rd rs1 rs2: ireg) (**r Float mul word *)
+ | Pfmind (rd rs1 rs2: ireg) (**r Float min double *)
+ | Pfminw (rd rs1 rs2: ireg) (**r Float min word *)
+ | Pfmaxd (rd rs1 rs2: ireg) (**r Float max double *)
+ | Pfmaxw (rd rs1 rs2: ireg) (**r Float max word *)
+ | Pfinvw (rd rs1: ireg) (**r Float invert word *)
+
+ (** Arith RRI32 *)
+ | Pcompiw (it: itest) (rd rs: ireg) (imm: int) (**r comparison imm word *)
+
+ | Paddiw (rd rs: ireg) (imm: int) (**r add imm word *)
+ | Paddxiw (shift : shift1_4) (rd rs: ireg) (imm: int) (**r add imm word *)
+ | Prevsubiw (rd rs: ireg) (imm: int) (**r subtract imm word *)
+ | Prevsubxiw (shift : shift1_4) (rd rs: ireg) (imm: int) (**r subtract imm word *)
+ | Pmuliw (rd rs: ireg) (imm: int) (**r mul imm word *)
+ | Pandiw (rd rs: ireg) (imm: int) (**r and imm word *)
+ | Pnandiw (rd rs: ireg) (imm: int) (**r nand imm word *)
+ | Poriw (rd rs: ireg) (imm: int) (**r or imm word *)
+ | Pnoriw (rd rs: ireg) (imm: int) (**r nor imm word *)
+ | Pxoriw (rd rs: ireg) (imm: int) (**r xor imm word *)
+ | Pnxoriw (rd rs: ireg) (imm: int) (**r nxor imm word *)
+ | Pandniw (rd rs: ireg) (imm: int) (**r andn imm word *)
+ | Porniw (rd rs: ireg) (imm: int) (**r orn imm word *)
+ | Psraiw (rd rs: ireg) (imm: int) (**r shift right arithmetic imm word *)
+ | Psrxiw (rd rs: ireg) (imm: int) (**r shift right arithmetic imm word round to 0*)
+ | Psrliw (rd rs: ireg) (imm: int) (**r shift right logical imm word *)
+ | Pslliw (rd rs: ireg) (imm: int) (**r shift left logical imm word *)
+ | Proriw (rd rs: ireg) (imm: int) (**r rotate right imm word *)
+ | Pmaddiw (rd rs: ireg) (imm: int) (**r multiply add imm word *)
+ | Psllil (rd rs: ireg) (imm: int) (**r shift left logical immediate long *)
+ | Psrxil (rd rs: ireg) (imm: int) (**r shift right arithmetic imm word round to 0*)
+ | Psrlil (rd rs: ireg) (imm: int) (**r shift right logical immediate long *)
+ | Psrail (rd rs: ireg) (imm: int) (**r shift right arithmetic immediate long *)
+
+ (** Arith RRI64 *)
+ | Pcompil (it: itest) (rd rs: ireg) (imm: int64) (**r comparison imm long *)
+ | Paddil (rd rs: ireg) (imm: int64) (**r add immediate long *)
+ | Paddxil (shift : shift1_4) (rd rs: ireg) (imm: int64) (**r add immediate long *)
+ | Prevsubil (rd rs: ireg) (imm: int64) (**r subtract imm long *)
+ | Prevsubxil (shift : shift1_4) (rd rs: ireg) (imm: int64) (**r subtract imm long *)
+ | Pmulil (rd rs: ireg) (imm: int64) (**r add immediate long *)
+ | Pandil (rd rs: ireg) (imm: int64) (**r and immediate long *)
+ | Pnandil (rd rs: ireg) (imm: int64) (**r and immediate long *)
+ | Poril (rd rs: ireg) (imm: int64) (**r or immediate long *)
+ | Pnoril (rd rs: ireg) (imm: int64) (**r and immediate long *)
+ | Pxoril (rd rs: ireg) (imm: int64) (**r xor immediate long *)
+ | Pnxoril (rd rs: ireg) (imm: int64) (**r xor immediate long *)
+ | Pandnil (rd rs: ireg) (imm: int64) (**r andn long *)
+ | Pornil (rd rs: ireg) (imm: int64) (**r orn long *)
+ | Pmaddil (rd rs: ireg) (imm: int64) (**r multiply add imm long *)
+ | Pcmove (bt: btest) (rcond rd rs : ireg) (** conditional move *)
+ | Pcmoveu (bt: btest) (rcond rd rs : ireg) (** conditional move, unsigned semantics *)
+ | Pcmoveiw (bt: btest) (rcond rd : ireg) (imm: int) (** conditional move *)
+ | Pcmoveuiw (bt: btest) (rcond rd : ireg) (imm: int) (** conditional move, unsigned semantics *)
+ | Pcmoveil (bt: btest) (rcond rd : ireg) (imm: int64) (** conditional move *)
+ | Pcmoveuil (bt: btest) (rcond rd : ireg) (imm: int64) (** conditional move, unsigned semantics *)
+.
+
+(** Correspondance between Asmblock and Asm *)
+
+Definition control_to_instruction (c: control) :=
+ match c with
+ | PExpand (Asmvliw.Pbuiltin ef args res) => Pbuiltin ef args res
+ | PCtlFlow Asmvliw.Pret => Pret
+ | PCtlFlow (Asmvliw.Pcall l) => Pcall l
+ | PCtlFlow (Asmvliw.Picall r) => Picall r
+ | PCtlFlow (Asmvliw.Pgoto l) => Pgoto l
+ | PCtlFlow (Asmvliw.Pigoto l) => Pigoto l
+ | PCtlFlow (Asmvliw.Pj_l l) => Pj_l l
+ | PCtlFlow (Asmvliw.Pcb bt r l) => Pcb bt r l
+ | PCtlFlow (Asmvliw.Pcbu bt r l) => Pcbu bt r l
+ | PCtlFlow (Asmvliw.Pjumptable r label) => Pjumptable r label
+ end.
+
+Definition basic_to_instruction (b: basic) :=
+ match b with
+ (** Special basics *)
+ | Asmvliw.Pget rd rs => Pget rd rs
+ | Asmvliw.Pset rd rs => Pset rd rs
+ | Asmvliw.Pnop => Pnop
+ | Asmvliw.Pallocframe sz pos => Pallocframe sz pos
+ | Asmvliw.Pfreeframe sz pos => Pfreeframe sz pos
+
+ (** PArith basics *)
+ (* R *)
+ | PArithR (Asmvliw.Ploadsymbol id ofs) r => Ploadsymbol r id ofs
+
+ (* RR *)
+ | PArithRR Asmvliw.Pmv rd rs => Pmv rd rs
+ | PArithRR Asmvliw.Pnegw rd rs => Pnegw rd rs
+ | PArithRR Asmvliw.Pnegl rd rs => Pnegl rd rs
+ | PArithRR Asmvliw.Pcvtl2w rd rs => Pcvtl2w rd rs
+ | PArithRR Asmvliw.Psxwd rd rs => Psxwd rd rs
+ | PArithRR Asmvliw.Pzxwd rd rs => Pzxwd rd rs
+ | PArithRR (Asmvliw.Pextfz stop start) rd rs => Pextfz rd rs stop start
+ | PArithRR (Asmvliw.Pextfs stop start) rd rs => Pextfs rd rs stop start
+ | PArithRR (Asmvliw.Pextfzl stop start) rd rs => Pextfzl rd rs stop start
+ | PArithRR (Asmvliw.Pextfsl stop start) rd rs => Pextfsl rd rs stop start
+ | PArithRR Asmvliw.Pfabsd rd rs => Pfabsd rd rs
+ | PArithRR Asmvliw.Pfabsw rd rs => Pfabsw rd rs
+ | PArithRR Asmvliw.Pfnegd rd rs => Pfnegd rd rs
+ | PArithRR Asmvliw.Pfnegw rd rs => Pfnegw rd rs
+ | PArithRR Asmvliw.Pfinvw rd rs => Pfinvw rd rs
+ | PArithRR Asmvliw.Pfnarrowdw rd rs => Pfnarrowdw rd rs
+ | PArithRR Asmvliw.Pfwidenlwd rd rs => Pfwidenlwd rd rs
+ | PArithRR Asmvliw.Pfloatuwrnsz rd rs => Pfloatuwrnsz rd rs
+ | PArithRR Asmvliw.Pfloatwrnsz rd rs => Pfloatwrnsz rd rs
+ | PArithRR Asmvliw.Pfloatudrnsz rd rs => Pfloatudrnsz rd rs
+ | PArithRR Asmvliw.Pfloatdrnsz rd rs => Pfloatdrnsz rd rs
+ | PArithRR Asmvliw.Pfixedwrzz rd rs => Pfixedwrzz rd rs
+ | PArithRR Asmvliw.Pfixeduwrzz rd rs => Pfixeduwrzz rd rs
+ | PArithRR Asmvliw.Pfixeddrzz rd rs => Pfixeddrzz rd rs
+ | PArithRR Asmvliw.Pfixedudrzz rd rs => Pfixedudrzz rd rs
+ | PArithRR Asmvliw.Pfixeddrzz_i32 rd rs => Pfixeddrzz_i32 rd rs
+ | PArithRR Asmvliw.Pfixedudrzz_i32 rd rs => Pfixedudrzz_i32 rd rs
+
+ (* RI32 *)
+ | PArithRI32 Asmvliw.Pmake rd imm => Pmake rd imm
+
+ (* RI64 *)
+ | PArithRI64 Asmvliw.Pmakel rd imm => Pmakel rd imm
+
+ (* RF32 *)
+ | PArithRF32 Asmvliw.Pmakefs rd imm => Pmakefs rd imm
+
+ (* RF64 *)
+ | PArithRF64 Asmvliw.Pmakef rd imm => Pmakef rd imm
+
+ (* RRR *)
+ | PArithRRR (Asmvliw.Pcompw it) rd rs1 rs2 => Pcompw it rd rs1 rs2
+ | PArithRRR (Asmvliw.Pcompl it) rd rs1 rs2 => Pcompl it rd rs1 rs2
+ | PArithRRR (Asmvliw.Pfcompw ft) rd rs1 rs2 => Pfcompw ft rd rs1 rs2
+ | PArithRRR (Asmvliw.Pfcompl ft) rd rs1 rs2 => Pfcompl ft rd rs1 rs2
+ | PArithRRR Asmvliw.Paddw rd rs1 rs2 => Paddw rd rs1 rs2
+ | PArithRRR (Asmvliw.Paddxw shift) rd rs1 rs2 => Paddxw shift rd rs1 rs2
+ | PArithRRR Asmvliw.Psubw rd rs1 rs2 => Psubw rd rs1 rs2
+ | PArithRRR (Asmvliw.Prevsubxw shift) rd rs1 rs2 => Prevsubxw shift rd rs1 rs2
+ | PArithRRR Asmvliw.Pmulw rd rs1 rs2 => Pmulw rd rs1 rs2
+ | PArithRRR Asmvliw.Pandw rd rs1 rs2 => Pandw rd rs1 rs2
+ | PArithRRR Asmvliw.Pnandw rd rs1 rs2 => Pnandw rd rs1 rs2
+ | PArithRRR Asmvliw.Porw rd rs1 rs2 => Porw rd rs1 rs2
+ | PArithRRR Asmvliw.Pnorw rd rs1 rs2 => Pnorw rd rs1 rs2
+ | PArithRRR Asmvliw.Pxorw rd rs1 rs2 => Pxorw rd rs1 rs2
+ | PArithRRR Asmvliw.Pnxorw rd rs1 rs2 => Pnxorw rd rs1 rs2
+ | PArithRRR Asmvliw.Pandnw rd rs1 rs2 => Pandnw rd rs1 rs2
+ | PArithRRR Asmvliw.Pornw rd rs1 rs2 => Pornw rd rs1 rs2
+ | PArithRRR Asmvliw.Psraw rd rs1 rs2 => Psraw rd rs1 rs2
+ | PArithRRR Asmvliw.Psrxw rd rs1 rs2 => Psrxw rd rs1 rs2
+ | PArithRRR Asmvliw.Psrlw rd rs1 rs2 => Psrlw rd rs1 rs2
+ | PArithRRR Asmvliw.Psllw rd rs1 rs2 => Psllw rd rs1 rs2
+
+ | PArithRRR Asmvliw.Paddl rd rs1 rs2 => Paddl rd rs1 rs2
+ | PArithRRR (Asmvliw.Paddxl shift) rd rs1 rs2 => Paddxl shift rd rs1 rs2
+ | PArithRRR Asmvliw.Psubl rd rs1 rs2 => Psubl rd rs1 rs2
+ | PArithRRR (Asmvliw.Prevsubxl shift) rd rs1 rs2 => Prevsubxl shift rd rs1 rs2
+ | PArithRRR Asmvliw.Pandl rd rs1 rs2 => Pandl rd rs1 rs2
+ | PArithRRR Asmvliw.Pnandl rd rs1 rs2 => Pnandl rd rs1 rs2
+ | PArithRRR Asmvliw.Porl rd rs1 rs2 => Porl rd rs1 rs2
+ | PArithRRR Asmvliw.Pnorl rd rs1 rs2 => Pnorl rd rs1 rs2
+ | PArithRRR Asmvliw.Pxorl rd rs1 rs2 => Pxorl rd rs1 rs2
+ | PArithRRR Asmvliw.Pnxorl rd rs1 rs2 => Pnxorl rd rs1 rs2
+ | PArithRRR Asmvliw.Pandnl rd rs1 rs2 => Pandnl rd rs1 rs2
+ | PArithRRR Asmvliw.Pornl rd rs1 rs2 => Pornl rd rs1 rs2
+ | PArithRRR Asmvliw.Pmull rd rs1 rs2 => Pmull rd rs1 rs2
+ | PArithRRR Asmvliw.Pslll rd rs1 rs2 => Pslll rd rs1 rs2
+ | PArithRRR Asmvliw.Psrll rd rs1 rs2 => Psrll rd rs1 rs2
+ | PArithRRR Asmvliw.Psral rd rs1 rs2 => Psral rd rs1 rs2
+ | PArithRRR Asmvliw.Psrxl rd rs1 rs2 => Psrxl rd rs1 rs2
+
+ | PArithRRR Asmvliw.Pfaddd rd rs1 rs2 => Pfaddd rd rs1 rs2
+ | PArithRRR Asmvliw.Pfaddw rd rs1 rs2 => Pfaddw rd rs1 rs2
+ | PArithRRR Asmvliw.Pfsbfd rd rs1 rs2 => Pfsbfd rd rs1 rs2
+ | PArithRRR Asmvliw.Pfsbfw rd rs1 rs2 => Pfsbfw rd rs1 rs2
+ | PArithRRR Asmvliw.Pfmuld rd rs1 rs2 => Pfmuld rd rs1 rs2
+ | PArithRRR Asmvliw.Pfmulw rd rs1 rs2 => Pfmulw rd rs1 rs2
+ | PArithRRR Asmvliw.Pfmind rd rs1 rs2 => Pfmind rd rs1 rs2
+ | PArithRRR Asmvliw.Pfminw rd rs1 rs2 => Pfminw rd rs1 rs2
+ | PArithRRR Asmvliw.Pfmaxd rd rs1 rs2 => Pfmaxd rd rs1 rs2
+ | PArithRRR Asmvliw.Pfmaxw rd rs1 rs2 => Pfmaxw rd rs1 rs2
+
+ (* RRI32 *)
+ | PArithRRI32 (Asmvliw.Pcompiw it) rd rs imm => Pcompiw it rd rs imm
+ | PArithRRI32 Asmvliw.Paddiw rd rs imm => Paddiw rd rs imm
+ | PArithRRI32 (Asmvliw.Paddxiw shift) rd rs imm => Paddxiw shift rd rs imm
+ | PArithRRI32 Asmvliw.Prevsubiw rd rs imm => Prevsubiw rd rs imm
+ | PArithRRI32 (Asmvliw.Prevsubxiw shift) rd rs imm => Prevsubxiw shift rd rs imm
+ | PArithRRI32 Asmvliw.Pmuliw rd rs imm => Pmuliw rd rs imm
+ | PArithRRI32 Asmvliw.Pandiw rd rs imm => Pandiw rd rs imm
+ | PArithRRI32 Asmvliw.Pnandiw rd rs imm => Pnandiw rd rs imm
+ | PArithRRI32 Asmvliw.Poriw rd rs imm => Poriw rd rs imm
+ | PArithRRI32 Asmvliw.Pnoriw rd rs imm => Pnoriw rd rs imm
+ | PArithRRI32 Asmvliw.Pxoriw rd rs imm => Pxoriw rd rs imm
+ | PArithRRI32 Asmvliw.Pnxoriw rd rs imm => Pnxoriw rd rs imm
+ | PArithRRI32 Asmvliw.Pandniw rd rs imm => Pandniw rd rs imm
+ | PArithRRI32 Asmvliw.Porniw rd rs imm => Porniw rd rs imm
+ | PArithRRI32 Asmvliw.Psraiw rd rs imm => Psraiw rd rs imm
+ | PArithRRI32 Asmvliw.Psrxiw rd rs imm => Psrxiw rd rs imm
+ | PArithRRI32 Asmvliw.Psrliw rd rs imm => Psrliw rd rs imm
+ | PArithRRI32 Asmvliw.Pslliw rd rs imm => Pslliw rd rs imm
+ | PArithRRI32 Asmvliw.Proriw rd rs imm => Proriw rd rs imm
+ | PArithRRI32 Asmvliw.Psllil rd rs imm => Psllil rd rs imm
+ | PArithRRI32 Asmvliw.Psrlil rd rs imm => Psrlil rd rs imm
+ | PArithRRI32 Asmvliw.Psrxil rd rs imm => Psrxil rd rs imm
+ | PArithRRI32 Asmvliw.Psrail rd rs imm => Psrail rd rs imm
+
+ (* RRI64 *)
+ | PArithRRI64 (Asmvliw.Pcompil it) rd rs imm => Pcompil it rd rs imm
+ | PArithRRI64 Asmvliw.Paddil rd rs imm => Paddil rd rs imm
+ | PArithRRI64 (Asmvliw.Paddxil shift) rd rs imm => Paddxil shift rd rs imm
+ | PArithRRI64 Asmvliw.Prevsubil rd rs imm => Prevsubil rd rs imm
+ | PArithRRI64 (Asmvliw.Prevsubxil shift) rd rs imm => Prevsubxil shift rd rs imm
+ | PArithRRI64 Asmvliw.Pmulil rd rs imm => Pmulil rd rs imm
+ | PArithRRI64 Asmvliw.Pandil rd rs imm => Pandil rd rs imm
+ | PArithRRI64 Asmvliw.Pnandil rd rs imm => Pnandil rd rs imm
+ | PArithRRI64 Asmvliw.Poril rd rs imm => Poril rd rs imm
+ | PArithRRI64 Asmvliw.Pnoril rd rs imm => Pnoril rd rs imm
+ | PArithRRI64 Asmvliw.Pxoril rd rs imm => Pxoril rd rs imm
+ | PArithRRI64 Asmvliw.Pnxoril rd rs imm => Pnxoril rd rs imm
+ | PArithRRI64 Asmvliw.Pandnil rd rs imm => Pandnil rd rs imm
+ | PArithRRI64 Asmvliw.Pornil rd rs imm => Pornil rd rs imm
+
+ (** ARRR *)
+ | PArithARRR Asmvliw.Pmaddw rd rs1 rs2 => Pmaddw rd rs1 rs2
+ | PArithARRR Asmvliw.Pmaddl rd rs1 rs2 => Pmaddl rd rs1 rs2
+ | PArithARRR Asmvliw.Pmsubw rd rs1 rs2 => Pmsubw rd rs1 rs2
+ | PArithARRR Asmvliw.Pmsubl rd rs1 rs2 => Pmsubl rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmaddfw rd rs1 rs2 => Pfmaddfw rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmaddfl rd rs1 rs2 => Pfmaddfl rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmsubfw rd rs1 rs2 => Pfmsubfw rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmsubfl rd rs1 rs2 => Pfmsubfl rd rs1 rs2
+ | PArithARRR (Asmvliw.Pcmove cond) rd rs1 rs2=> Pcmove cond rd rs1 rs2
+ | PArithARRR (Asmvliw.Pcmoveu cond) rd rs1 rs2=> Pcmoveu cond rd rs1 rs2
+
+ (** ARR *)
+ | PArithARR (Asmvliw.Pinsf stop start) rd rs => Pinsf rd rs stop start
+ | PArithARR (Asmvliw.Pinsfl stop start) rd rs => Pinsfl rd rs stop start
+
+ (** ARRI32 *)
+ | PArithARRI32 Asmvliw.Pmaddiw rd rs1 imm => Pmaddiw rd rs1 imm
+ | PArithARRI32 (Asmvliw.Pcmoveiw cond) rd rs1 imm => Pcmoveiw cond rd rs1 imm
+ | PArithARRI32 (Asmvliw.Pcmoveuiw cond) rd rs1 imm => Pcmoveuiw cond rd rs1 imm
+
+ (** ARRI64 *)
+ | PArithARRI64 Asmvliw.Pmaddil rd rs1 imm => Pmaddil rd rs1 imm
+ | PArithARRI64 (Asmvliw.Pcmoveil cond) rd rs1 imm => Pcmoveil cond rd rs1 imm
+ | PArithARRI64 (Asmvliw.Pcmoveuil cond) rd rs1 imm => Pcmoveuil cond rd rs1 imm
+ (** Load *)
+ | PLoadRRO trap Asmvliw.Plb rd ra ofs => Plb trap rd ra (AOff ofs)
+ | PLoadRRO trap Asmvliw.Plbu rd ra ofs => Plbu trap rd ra (AOff ofs)
+ | PLoadRRO trap Asmvliw.Plh rd ra ofs => Plh trap rd ra (AOff ofs)
+ | PLoadRRO trap Asmvliw.Plhu rd ra ofs => Plhu trap rd ra (AOff ofs)
+ | PLoadRRO trap Asmvliw.Plw rd ra ofs => Plw trap rd ra (AOff ofs)
+ | PLoadRRO trap Asmvliw.Plw_a rd ra ofs => Plw_a trap rd ra (AOff ofs)
+ | PLoadRRO trap Asmvliw.Pld rd ra ofs => Pld trap rd ra (AOff ofs)
+ | PLoadRRO trap Asmvliw.Pld_a rd ra ofs => Pld_a trap rd ra (AOff ofs)
+ | PLoadRRO trap Asmvliw.Pfls rd ra ofs => Pfls trap rd ra (AOff ofs)
+ | PLoadRRO trap Asmvliw.Pfld rd ra ofs => Pfld trap rd ra (AOff ofs)
+
+ | PLoadQRRO qrs ra ofs => Plq qrs ra (AOff ofs)
+ | PLoadORRO qrs ra ofs => Plo qrs ra (AOff ofs)
+
+ | PLoadRRR trap Asmvliw.Plb rd ra ro => Plb trap rd ra (AReg ro)
+ | PLoadRRR trap Asmvliw.Plbu rd ra ro => Plbu trap rd ra (AReg ro)
+ | PLoadRRR trap Asmvliw.Plh rd ra ro => Plh trap rd ra (AReg ro)
+ | PLoadRRR trap Asmvliw.Plhu rd ra ro => Plhu trap rd ra (AReg ro)
+ | PLoadRRR trap Asmvliw.Plw rd ra ro => Plw trap rd ra (AReg ro)
+ | PLoadRRR trap Asmvliw.Plw_a rd ra ro => Plw_a trap rd ra (AReg ro)
+ | PLoadRRR trap Asmvliw.Pld rd ra ro => Pld trap rd ra (AReg ro)
+ | PLoadRRR trap Asmvliw.Pld_a rd ra ro => Pld_a trap rd ra (AReg ro)
+ | PLoadRRR trap Asmvliw.Pfls rd ra ro => Pfls trap rd ra (AReg ro)
+ | PLoadRRR trap Asmvliw.Pfld rd ra ro => Pfld trap rd ra (AReg ro)
+
+ | PLoadRRRXS trap Asmvliw.Plb rd ra ro => Plb trap rd ra (ARegXS ro)
+ | PLoadRRRXS trap Asmvliw.Plbu rd ra ro => Plbu trap rd ra (ARegXS ro)
+ | PLoadRRRXS trap Asmvliw.Plh rd ra ro => Plh trap rd ra (ARegXS ro)
+ | PLoadRRRXS trap Asmvliw.Plhu rd ra ro => Plhu trap rd ra (ARegXS ro)
+ | PLoadRRRXS trap Asmvliw.Plw rd ra ro => Plw trap rd ra (ARegXS ro)
+ | PLoadRRRXS trap Asmvliw.Plw_a rd ra ro => Plw_a trap rd ra (ARegXS ro)
+ | PLoadRRRXS trap Asmvliw.Pld rd ra ro => Pld trap rd ra (ARegXS ro)
+ | PLoadRRRXS trap Asmvliw.Pld_a rd ra ro => Pld_a trap rd ra (ARegXS ro)
+ | PLoadRRRXS trap Asmvliw.Pfls rd ra ro => Pfls trap rd ra (ARegXS ro)
+ | PLoadRRRXS trap Asmvliw.Pfld rd ra ro => Pfld trap rd ra (ARegXS ro)
+
+ (** Store *)
+ | PStoreRRO Asmvliw.Psb rd ra ofs => Psb rd ra (AOff ofs)
+ | PStoreRRO Asmvliw.Psh rd ra ofs => Psh rd ra (AOff ofs)
+ | PStoreRRO Asmvliw.Psw rd ra ofs => Psw rd ra (AOff ofs)
+ | PStoreRRO Asmvliw.Psw_a rd ra ofs => Psw_a rd ra (AOff ofs)
+ | PStoreRRO Asmvliw.Psd rd ra ofs => Psd rd ra (AOff ofs)
+ | PStoreRRO Asmvliw.Psd_a rd ra ofs => Psd_a rd ra (AOff ofs)
+ | PStoreRRO Asmvliw.Pfss rd ra ofs => Pfss rd ra (AOff ofs)
+ | PStoreRRO Asmvliw.Pfsd rd ra ofs => Pfsd rd ra (AOff ofs)
+
+ | PStoreRRR Asmvliw.Psb rd ra ro => Psb rd ra (AReg ro)
+ | PStoreRRR Asmvliw.Psh rd ra ro => Psh rd ra (AReg ro)
+ | PStoreRRR Asmvliw.Psw rd ra ro => Psw rd ra (AReg ro)
+ | PStoreRRR Asmvliw.Psw_a rd ra ro => Psw_a rd ra (AReg ro)
+ | PStoreRRR Asmvliw.Psd rd ra ro => Psd rd ra (AReg ro)
+ | PStoreRRR Asmvliw.Psd_a rd ra ro => Psd_a rd ra (AReg ro)
+ | PStoreRRR Asmvliw.Pfss rd ra ro => Pfss rd ra (AReg ro)
+ | PStoreRRR Asmvliw.Pfsd rd ra ro => Pfsd rd ra (AReg ro)
+
+ | PStoreRRRXS Asmvliw.Psb rd ra ro => Psb rd ra (ARegXS ro)
+ | PStoreRRRXS Asmvliw.Psh rd ra ro => Psh rd ra (ARegXS ro)
+ | PStoreRRRXS Asmvliw.Psw rd ra ro => Psw rd ra (ARegXS ro)
+ | PStoreRRRXS Asmvliw.Psw_a rd ra ro => Psw_a rd ra (ARegXS ro)
+ | PStoreRRRXS Asmvliw.Psd rd ra ro => Psd rd ra (ARegXS ro)
+ | PStoreRRRXS Asmvliw.Psd_a rd ra ro => Psd_a rd ra (ARegXS ro)
+ | PStoreRRRXS Asmvliw.Pfss rd ra ro => Pfss rd ra (ARegXS ro)
+ | PStoreRRRXS Asmvliw.Pfsd rd ra ro => Pfsd rd ra (ARegXS ro)
+
+ | PStoreQRRO qrs ra ofs => Psq qrs ra (AOff ofs)
+ | PStoreORRO qrs ra ofs => Pso qrs ra (AOff ofs)
+ end.
+
+(** * Semantics (given through the existence of well-formed VLIW program) *)
+
+Section RELSEM.
+
+Definition code := list instruction.
+
+Fixpoint unfold_label (ll: list label) :=
+ match ll with
+ | nil => nil
+ | l :: ll => Plabel l :: unfold_label ll
+ end.
+
+Fixpoint unfold_body (lb: list basic) :=
+ match lb with
+ | nil => nil
+ | b :: lb => basic_to_instruction b :: unfold_body lb
+ end.
+
+Definition unfold_exit (oc: option control) :=
+ match oc with
+ | None => nil
+ | Some c => control_to_instruction c :: nil
+ end.
+
+Definition unfold_bblock (b: bblock) := unfold_label (header b) ++
+ (match (body b), (exit b) with
+ | (((Asmvliw.Pfreeframe _ _ | Asmvliw.Pallocframe _ _)::nil) as bo), None =>
+ unfold_body bo
+ | bo, ex => unfold_body bo ++ unfold_exit ex ++ Psemi :: nil
+ end).
+
+Fixpoint unfold (lb: bblocks) :=
+ match lb with
+ | nil => nil
+ | b :: lb => (unfold_bblock b) ++ unfold lb
+ end.
+
+Record function : Type := mkfunction { fn_sig: signature; fn_blocks: bblocks; fn_code: code;
+ correct: unfold fn_blocks = fn_code }.
+
+Definition fundef := AST.fundef function.
+Definition program := AST.program fundef unit.
+Definition genv := Genv.t fundef unit.
+
+Definition function_proj (f: function) := Asmvliw.mkfunction (fn_sig f) (fn_blocks f).
+
+Definition fundef_proj (fu: fundef) : Asmvliw.fundef :=
+ match fu with
+ | Internal f => Internal (function_proj f)
+ | External ef => External ef
+ end.
+
+Definition globdef_proj (gd: globdef fundef unit) : globdef Asmvliw.fundef unit :=
+ match gd with
+ | Gfun f => Gfun (fundef_proj f)
+ | Gvar gu => Gvar gu
+ end.
+
+Program Definition genv_trans (ge: genv) : Asmvliw.genv :=
+ {| Genv.genv_public := Genv.genv_public ge;
+ Genv.genv_symb := Genv.genv_symb ge;
+ Genv.genv_defs := PTree.map1 globdef_proj (Genv.genv_defs ge);
+ Genv.genv_next := Genv.genv_next ge |}.
+Next Obligation.
+ destruct ge. cbn in *. eauto.
+Qed. Next Obligation.
+ destruct ge; cbn in *.
+ rewrite PTree.gmap1 in H.
+ destruct (genv_defs ! b) eqn:GEN.
+ - eauto.
+ - discriminate.
+Qed. Next Obligation.
+ destruct ge; cbn in *.
+ eauto.
+Qed.
+
+Fixpoint prog_defs_proj (l: list (ident * globdef fundef unit))
+ : list (ident * globdef Asmvliw.fundef unit) :=
+ match l with
+ | nil => nil
+ | (i, gd) :: l => (i, globdef_proj gd) :: prog_defs_proj l
+ end.
+
+Definition program_proj (p: program) : Asmvliw.program :=
+ {| prog_defs := prog_defs_proj (prog_defs p);
+ prog_public := prog_public p;
+ prog_main := prog_main p
+ |}.
+
+End RELSEM.
+
+Definition semantics (p: program) := Asmvliw.semantics (program_proj p).
+
+(** Determinacy of the [Asm] semantics. *)
+
+Lemma semantics_determinate: forall p, determinate (semantics p).
+Proof.
+ intros. apply semantics_determinate.
+Qed.
+
+(** transf_program *)
+
+Program Definition transf_function (f: Asmvliw.function) : function :=
+ {| fn_sig := Asmvliw.fn_sig f; fn_blocks := Asmvliw.fn_blocks f;
+ fn_code := unfold (Asmvliw.fn_blocks f) |}.
+
+Lemma transf_function_proj: forall f, function_proj (transf_function f) = f.
+Proof.
+ intros f. destruct f as [sig blks]. unfold function_proj. cbn. auto.
+Qed.
+
+Definition transf_fundef : Asmvliw.fundef -> fundef := AST.transf_fundef transf_function.
+
+Lemma transf_fundef_proj: forall f, fundef_proj (transf_fundef f) = f.
+Proof.
+ intros f. destruct f as [f|e]; cbn; auto.
+ rewrite transf_function_proj. auto.
+Qed.
+
+Definition transf_program : Asmvliw.program -> program := transform_program transf_fundef.
+
+Lemma program_equals {A B: Type} : forall (p1 p2: AST.program A B),
+ prog_defs p1 = prog_defs p2 ->
+ prog_public p1 = prog_public p2 ->
+ prog_main p1 = prog_main p2 ->
+ p1 = p2.
+Proof.
+ intros. destruct p1. destruct p2. cbn in *. subst. auto.
+Qed.
+
+Lemma transf_program_proj: forall p, program_proj (transf_program p) = p.
+Proof.
+ intros p. destruct p as [defs pub main]. unfold program_proj. cbn.
+ apply program_equals; cbn; auto.
+ induction defs.
+ - cbn; auto.
+ - cbn. rewrite IHdefs.
+ destruct a as [id gd]; cbn.
+ destruct gd as [f|v]; cbn; auto.
+ rewrite transf_fundef_proj. auto.
+Qed.
+
+Definition match_prog (p: Asmvliw.program) (tp: program) :=
+ match_program (fun _ f tf => tf = transf_fundef f) eq p tp.
+
+Lemma transf_program_match:
+ forall p tp, transf_program p = tp -> match_prog p tp.
+Proof.
+ intros. rewrite <- H. eapply match_transform_program; eauto.
+Qed.
+
+Lemma cons_extract {A: Type} : forall (l: list A) a b, a = b -> a::l = b::l.
+Proof.
+ intros. congruence.
+Qed.
+
+Lemma match_program_transf:
+ forall p tp, match_prog p tp -> transf_program p = tp.
+Proof.
+ intros p tp H. inversion_clear H. inv H1.
+ destruct p as [defs pub main]. destruct tp as [tdefs tpub tmain]. cbn in *.
+ subst. unfold transf_program. unfold transform_program. cbn.
+ apply program_equals; cbn; auto.
+ induction H0; cbn; auto.
+ rewrite IHlist_forall2. apply cons_extract.
+ destruct a1 as [ida gda]. destruct b1 as [idb gdb].
+ cbn in *.
+ inv H. inv H2.
+ - cbn in *. subst. auto.
+ - cbn in *. subst. inv H. auto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: Asmvliw.program.
+Variable tprog: program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Definition match_states (s1 s2: state) := s1 = s2.
+
+Lemma symbols_preserved:
+ forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof (Genv.find_symbol_match TRANSF).
+
+Lemma senv_preserved:
+ Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSF).
+
+
+Theorem transf_program_correct:
+ forward_simulation (Asmvliw.semantics prog) (semantics tprog).
+Proof.
+ pose proof (match_program_transf prog tprog TRANSF) as TR.
+ subst. unfold semantics. rewrite transf_program_proj.
+
+ eapply forward_simulation_step with (match_states := match_states); cbn; auto.
+ - intros. exists s1. split; auto. congruence.
+ - intros. inv H. auto.
+ - intros. exists s1'. inv H0. split; auto. congruence.
+Qed.
+
+End PRESERVATION.
diff --git a/kvx/AsmToJSON.ml b/kvx/AsmToJSON.ml
new file mode 100644
index 00000000..8a6a97a7
--- /dev/null
+++ b/kvx/AsmToJSON.ml
@@ -0,0 +1,23 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Bernhard Schommer, AbsInt Angewandte Informatik GmbH *)
+(* *)
+(* AbsInt Angewandte Informatik GmbH. All rights reserved. This file *)
+(* is distributed under the terms of the INRIA Non-Commercial *)
+(* License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(* Simple functions to serialize RISC-V Asm to JSON *)
+
+(* Dummy function *)
+let destination: string option ref = ref None
+
+let sdump_folder = ref ""
+
+let print_if prog sourcename =
+ ()
+
+let pp_mnemonics pp = ()
diff --git a/kvx/Asmaux.v b/kvx/Asmaux.v
new file mode 100644
index 00000000..2abd445e
--- /dev/null
+++ b/kvx/Asmaux.v
@@ -0,0 +1,19 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Asm.
+Require Import AST.
+
+(** Constant only needed by Asmexpandaux.ml *)
+Program Definition dummy_function := {| fn_code := nil; fn_sig := signature_main; fn_blocks := nil |}.
diff --git a/kvx/Asmblock.v b/kvx/Asmblock.v
new file mode 100644
index 00000000..17ebac32
--- /dev/null
+++ b/kvx/Asmblock.v
@@ -0,0 +1,394 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Sequential block semantics for KVX assembly. The syntax is given in AsmVLIW *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Memory.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Locations.
+Require Stacklayout.
+Require Import Conventions.
+Require Import Errors.
+Require Export Asmvliw.
+Require Import Lia.
+
+(* Notations necessary to hook Asmvliw definitions *)
+Notation undef_caller_save_regs := Asmvliw.undef_caller_save_regs.
+Notation regset := Asmvliw.regset.
+Notation extcall_arg := Asmvliw.extcall_arg.
+Notation extcall_arg_pair := Asmvliw.extcall_arg_pair.
+Notation extcall_arguments := Asmvliw.extcall_arguments.
+Notation set_res := Asmvliw.set_res.
+Notation function := Asmvliw.function.
+Notation bblocks := Asmvliw.bblocks.
+Notation header := Asmvliw.header.
+Notation body := Asmvliw.body.
+Notation exit := Asmvliw.exit.
+Notation correct := Asmvliw.correct.
+
+(** * Auxiliary utilies on basic blocks *)
+
+(** ** A unified view of Kalray instructions *)
+
+Inductive instruction : Type :=
+ | PBasic (i: basic)
+ | PControl (i: control)
+.
+
+Coercion PBasic: basic >-> instruction.
+Coercion PControl: control >-> instruction.
+
+Definition code := list instruction.
+Definition bcode := list basic.
+
+Fixpoint basics_to_code (l: list basic) :=
+ match l with
+ | nil => nil
+ | bi::l => (PBasic bi)::(basics_to_code l)
+ end.
+
+Fixpoint code_to_basics (c: code) :=
+ match c with
+ | (PBasic i)::c =>
+ match code_to_basics c with
+ | None => None
+ | Some l => Some (i::l)
+ end
+ | _::c => None
+ | nil => Some nil
+ end.
+
+Lemma code_to_basics_id: forall c, code_to_basics (basics_to_code c) = Some c.
+Proof.
+ intros. induction c as [|i c]; cbn; auto.
+ rewrite IHc. auto.
+Qed.
+
+Lemma code_to_basics_dist:
+ forall c c' l l',
+ code_to_basics c = Some l ->
+ code_to_basics c' = Some l' ->
+ code_to_basics (c ++ c') = Some (l ++ l').
+Proof.
+ induction c as [|i c]; cbn; auto.
+ - intros. inv H. cbn. auto.
+ - intros. destruct i; try discriminate. destruct (code_to_basics c) eqn:CTB; try discriminate.
+ inv H. erewrite IHc; eauto. auto.
+Qed.
+
+(**
+ Asmblockgen will have to translate a Mach control into a list of instructions of the form
+ i1 :: i2 :: i3 :: ctl :: nil ; where i1..i3 are basic instructions, ctl is a control instruction
+ These functions provide way to extract the basic / control instructions
+*)
+
+Fixpoint extract_basic (c: code) :=
+ match c with
+ | nil => nil
+ | PBasic i :: c => i :: (extract_basic c)
+ | PControl i :: c => nil
+ end.
+
+Fixpoint extract_ctl (c: code) :=
+ match c with
+ | nil => None
+ | PBasic i :: c => extract_ctl c
+ | PControl i :: nil => Some i
+ | PControl i :: _ => None (* if the first found control instruction isn't the last *)
+ end.
+
+(** ** Wellformness of basic blocks *)
+
+Ltac exploreInst :=
+ repeat match goal with
+ | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var
+ | [ H : OK _ = OK _ |- _ ] => monadInv H
+ | [ |- context[if ?b then _ else _] ] => destruct b
+ | [ |- context[match ?m with | _ => _ end] ] => destruct m
+ | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m
+ | [ H : bind _ _ = OK _ |- _ ] => monadInv H
+ | [ H : Error _ = OK _ |- _ ] => inversion H
+ end.
+
+Definition non_empty_bblock (body: list basic) (exit: option control): Prop
+ := body <> nil \/ exit <> None.
+
+Lemma non_empty_bblock_refl:
+ forall body exit,
+ non_empty_bblock body exit <->
+ Is_true (non_empty_bblockb body exit).
+Proof.
+ intros. split.
+ - destruct body; destruct exit.
+ all: cbn; auto. intros. inversion H; contradiction.
+ - destruct body; destruct exit.
+ all: cbn; auto.
+ all: intros; try (right; discriminate); try (left; discriminate).
+ contradiction.
+Qed.
+
+Definition builtin_alone (body: list basic) (exit: option control) := forall ef args res,
+ exit = Some (PExpand (Pbuiltin ef args res)) -> body = nil.
+
+
+Lemma builtin_alone_refl:
+ forall body exit,
+ builtin_alone body exit <-> Is_true (builtin_aloneb body exit).
+Proof.
+ intros. split.
+ - destruct body; destruct exit.
+ all: cbn; auto.
+ all: exploreInst; cbn; auto.
+ unfold builtin_alone. intros. assert (Some (Pbuiltin e l b0) = Some (Pbuiltin e l b0)); auto.
+ assert (b :: body = nil). eapply H; eauto. discriminate.
+ - destruct body; destruct exit.
+ all: cbn; auto; try constructor.
+ + exploreInst; try discriminate.
+ cbn. contradiction.
+ + intros. discriminate.
+Qed.
+
+Definition wf_bblock (body: list basic) (exit: option control) :=
+ non_empty_bblock body exit /\ builtin_alone body exit.
+
+Lemma wf_bblock_refl:
+ forall body exit,
+ wf_bblock body exit <-> Is_true (wf_bblockb body exit).
+Proof.
+ intros. split.
+ - intros. inv H. apply non_empty_bblock_refl in H0. apply builtin_alone_refl in H1.
+ apply andb_prop_intro. auto.
+ - intros. apply andb_prop_elim in H. inv H.
+ apply non_empty_bblock_refl in H0. apply builtin_alone_refl in H1.
+ unfold wf_bblock. split; auto.
+Qed.
+
+Ltac bblock_auto_correct := (apply non_empty_bblock_refl; try discriminate; try (left; discriminate); try (right; discriminate)).
+
+Lemma Istrue_proof_irrelevant (b: bool): forall (p1 p2:Is_true b), p1=p2.
+Proof.
+ destruct b; cbn; auto.
+ - destruct p1, p2; auto.
+ - destruct p1.
+Qed.
+
+Lemma bblock_equality bb1 bb2: header bb1=header bb2 -> body bb1 = body bb2 -> exit bb1 = exit bb2 -> bb1 = bb2.
+Proof.
+ destruct bb1 as [h1 b1 e1 c1], bb2 as [h2 b2 e2 c2]; cbn.
+ intros; subst.
+ rewrite (Istrue_proof_irrelevant _ c1 c2).
+ auto.
+Qed.
+
+Program Definition bblock_single_inst (i: instruction) :=
+ match i with
+ | PBasic b => {| header:=nil; body:=(b::nil); exit:=None |}
+ | PControl ctl => {| header:=nil; body:=nil; exit:=(Some ctl) |}
+ end.
+Next Obligation.
+ apply wf_bblock_refl. constructor.
+ right. discriminate.
+ constructor.
+Qed.
+
+Lemma length_nonil {A: Type} : forall l:(list A), l <> nil -> (length l > 0)%nat.
+Proof.
+ intros. destruct l; try (contradict H; auto; fail).
+ cbn. lia.
+Qed.
+
+Lemma to_nat_pos : forall z:Z, (Z.to_nat z > 0)%nat -> z > 0.
+Proof.
+ intros. destruct z; auto.
+ - contradict H. cbn. apply gt_irrefl.
+ - apply Zgt_pos_0.
+ - contradict H. cbn. apply gt_irrefl.
+Qed.
+
+Lemma size_positive (b:bblock): size b > 0.
+Proof.
+ unfold size. destruct b as [hd bdy ex cor]. cbn.
+ destruct ex; destruct bdy; try (apply to_nat_pos; rewrite Nat2Z.id; cbn; lia).
+ inversion cor; contradict H; cbn; auto.
+Qed.
+
+
+Program Definition no_header (bb : bblock) := {| header := nil; body := body bb; exit := exit bb |}.
+Next Obligation.
+ destruct bb; cbn. assumption.
+Defined.
+
+Lemma no_header_size:
+ forall bb, size (no_header bb) = size bb.
+Proof.
+ intros. destruct bb as [hd bdy ex COR]. unfold no_header. cbn. reflexivity.
+Qed.
+
+Program Definition stick_header (h : list label) (bb : bblock) := {| header := h; body := body bb; exit := exit bb |}.
+Next Obligation.
+ destruct bb; cbn. assumption.
+Defined.
+
+Lemma stick_header_size:
+ forall h bb, size (stick_header h bb) = size bb.
+Proof.
+ intros. destruct bb. unfold stick_header. cbn. reflexivity.
+Qed.
+
+Lemma stick_header_no_header:
+ forall bb, stick_header (header bb) (no_header bb) = bb.
+Proof.
+ intros. destruct bb as [hd bdy ex COR]. cbn. unfold no_header; unfold stick_header; cbn. reflexivity.
+Qed.
+
+(** * Sequential Semantics of basic blocks *)
+Section RELSEM.
+
+(** Execution of arith instructions *)
+
+Variable ge: genv.
+
+Definition exec_arith_instr (ai: ar_instruction) (rs: regset): regset := parexec_arith_instr ge ai rs rs.
+
+(** Auxiliaries for memory accesses *)
+
+Definition exec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rs: regset) (m: mem) (d a: ireg) (ofs: offset) := parexec_load_offset trap chunk rs rs m m d a ofs.
+
+Definition exec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ireg) := parexec_load_reg trap chunk rs rs m m d a ro.
+
+Definition exec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ireg) := parexec_load_regxs trap chunk rs rs m m d a ro.
+
+Definition exec_load_q_offset (rs: regset) (m: mem) (d : gpreg_q) (a: ireg) (ofs: offset) := parexec_load_q_offset rs rs m m d a ofs.
+
+Definition exec_load_o_offset (rs: regset) (m: mem) (d : gpreg_o) (a: ireg) (ofs: offset) := parexec_load_o_offset rs rs m m d a ofs.
+
+Definition exec_store_offset (chunk: memory_chunk) (rs: regset) (m: mem) (s a: ireg) (ofs: offset) := parexec_store_offset chunk rs rs m m s a ofs.
+
+Definition exec_store_q_offset (rs: regset) (m: mem) (s : gpreg_q) (a: ireg) (ofs: offset) := parexec_store_q_offset rs rs m m s a ofs.
+
+Definition exec_store_o_offset (rs: regset) (m: mem) (s : gpreg_o) (a: ireg) (ofs: offset) := parexec_store_o_offset rs rs m m s a ofs.
+
+Definition exec_store_reg (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro: ireg) := parexec_store_reg chunk rs rs m m s a ro.
+
+Definition exec_store_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro: ireg) := parexec_store_regxs chunk rs rs m m s a ro.
+
+(** * basic instructions *)
+
+Definition exec_basic_instr (bi: basic) (rs: regset) (m: mem) : outcome := bstep ge bi rs rs m m.
+
+Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
+ match body with
+ | nil => Next rs m
+ | bi::body' =>
+ match exec_basic_instr bi rs m with
+ | Next rs' m' => exec_body body' rs' m'
+ | Stuck => Stuck
+ end
+ end.
+
+
+Theorem builtin_body_nil:
+ forall bb ef args res, exit bb = Some (PExpand (Pbuiltin ef args res)) -> body bb = nil.
+Proof.
+ intros. destruct bb as [hd bdy ex WF]. cbn in *.
+ apply wf_bblock_refl in WF. inv WF. unfold builtin_alone in H1.
+ eapply H1; eauto.
+Qed.
+
+Theorem exec_body_app:
+ forall l l' rs m rs'' m'',
+ exec_body (l ++ l') rs m = Next rs'' m'' ->
+ exists rs' m',
+ exec_body l rs m = Next rs' m'
+ /\ exec_body l' rs' m' = Next rs'' m''.
+Proof.
+ induction l.
+ - intros. cbn in H. repeat eexists. auto.
+ - intros. rewrite <- app_comm_cons in H. cbn in H.
+ destruct (exec_basic_instr a rs m) eqn:EXEBI.
+ + apply IHl in H. destruct H as (rs1 & m1 & EXEB1 & EXEB2).
+ repeat eexists. cbn. rewrite EXEBI. eauto. auto.
+ + discriminate.
+Qed.
+
+(** Position corresponding to a label *)
+
+Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) : outcome := par_goto_label f lbl rs rs m.
+
+Definition eval_branch (f: function) (l: label) (rs: regset) (m: mem) (res: option bool) : outcome := par_eval_branch f l rs rs m res.
+
+Definition exec_control (f: function) (oc: option control) (rs: regset) (m: mem) : outcome := parexec_control ge f oc rs rs m.
+
+Definition exec_bblock (f: function) (b: bblock) (rs0: regset) (m: mem) : outcome :=
+ match exec_body (body b) rs0 m with
+ | Next rs' m' =>
+ let rs1 := nextblock b rs' in exec_control f (exit b) rs1 m'
+ | Stuck => Stuck
+ end.
+
+
+(** Execution of the instruction at [rs PC]. *)
+
+Inductive step: state -> trace -> state -> Prop :=
+ | exec_step_internal:
+ forall b ofs f bi rs m rs' m',
+ rs PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bi ->
+ exec_bblock f bi rs m = Next rs' m' ->
+ step (State rs m) E0 (State rs' m')
+ | exec_step_builtin:
+ forall b ofs f ef args res rs m vargs t vres rs' m' bi,
+ rs PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) f.(fn_blocks) = Some bi ->
+ exit bi = Some (PExpand (Pbuiltin ef args res)) ->
+ eval_builtin_args ge rs (rs SP) m args vargs ->
+ external_call ef ge vargs m t vres m' ->
+ rs' = nextblock bi
+ (set_res res vres
+ (undef_regs (map preg_of (destroyed_by_builtin ef))
+ (rs#RTMP <- Vundef))) ->
+ step (State rs m) t (State rs' m')
+ | exec_step_external:
+ forall b ef args res rs m t rs' m',
+ rs PC = Vptr b Ptrofs.zero ->
+ Genv.find_funct_ptr ge b = Some (External ef) ->
+ external_call ef ge args m t res m' ->
+ extcall_arguments rs m (ef_sig ef) args ->
+ rs' = (set_pair (loc_external_result (ef_sig ef) ) res (undef_caller_save_regs rs))#PC <- (rs RA) ->
+ step (State rs m) t (State rs' m')
+ .
+
+End RELSEM.
+
+Definition semantics (p: program) :=
+ Semantics step (initial_state p) final_state (Genv.globalenv p).
+
+Definition data_preg (r: preg) : bool :=
+ match r with
+ | RA => false
+ | IR GPRA => false
+ | IR RTMP => false
+ | IR _ => true
+ | PC => false
+ end.
diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v
new file mode 100644
index 00000000..a9786e0a
--- /dev/null
+++ b/kvx/Asmblockdeps.v
@@ -0,0 +1,1845 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * Translation from [Asmvliw] to [AbstractBB] *)
+
+(** We define a specific instance [L] of [AbstractBB] and translate [bblocks] from [Asmvliw] into [L].
+ [AbstractBB] will then define two semantics for [L]: a sequential and a parallel one.
+ We prove a bisimulation between the parallel semantics of [L] and [AsmVLIW].
+ We also prove a bisimulation between the sequential semantics of [L] and [Asmblock].
+ Then, the checkers on [Asmblock] and [Asmvliw] are deduced from those of [L].
+ *)
+
+Require Import AST.
+Require Import Asmblock.
+Require Import Asmblockgenproof0 Asmblockprops.
+Require Import Values.
+Require Import Globalenvs.
+Require Import Memory.
+Require Import Errors.
+Require Import Integers.
+Require Import Floats.
+Require Import ZArith.
+Require Import Coqlib.
+Require Import ImpSimuTest.
+Require Import Axioms.
+Require Import Parallelizability.
+Require Import Asmvliw Permutation.
+Require Import Chunks.
+
+Require Import Lia.
+
+
+Import ListNotations.
+Local Open Scope list_scope.
+
+Open Scope impure.
+
+(** Definition of [L] *)
+
+Module P<: ImpParam.
+Module R := Pos.
+
+Section IMPPARAM.
+
+Definition env := Genv.t fundef unit.
+
+Inductive genv_wrap := Genv (ge: env) (fn: function).
+Definition genv := genv_wrap.
+
+Variable Ge: genv.
+
+Inductive value_wrap :=
+ | Val (v: val)
+ | Memstate (m: mem)
+.
+
+Definition value := value_wrap.
+
+Inductive control_op :=
+ | Oj_l (l: label)
+ | Ocb (bt: btest) (l: label)
+ | Ocbu (bt: btest) (l: label)
+ | Odiv
+ | Odivu
+ | OError
+ | OIncremPC (sz: Z)
+ | Ojumptable (l: list label)
+.
+
+Inductive arith_op :=
+ | OArithR (n: arith_name_r)
+ | OArithRR (n: arith_name_rr)
+ | OArithRI32 (n: arith_name_ri32) (imm: int)
+ | OArithRI64 (n: arith_name_ri64) (imm: int64)
+ | OArithRF32 (n: arith_name_rf32) (imm: float32)
+ | OArithRF64 (n: arith_name_rf64) (imm: float)
+ | OArithRRR (n: arith_name_rrr)
+ | OArithRRI32 (n: arith_name_rri32) (imm: int)
+ | OArithRRI64 (n: arith_name_rri64) (imm: int64)
+ | OArithARRR (n: arith_name_arrr)
+ | OArithARR (n: arith_name_arr)
+ | OArithARRI32 (n: arith_name_arri32) (imm: int)
+ | OArithARRI64 (n: arith_name_arri64) (imm: int64)
+.
+
+Coercion OArithR: arith_name_r >-> arith_op.
+Coercion OArithRR: arith_name_rr >-> arith_op.
+Coercion OArithRI32: arith_name_ri32 >-> Funclass.
+Coercion OArithRI64: arith_name_ri64 >-> Funclass.
+Coercion OArithRF32: arith_name_rf32 >-> Funclass.
+Coercion OArithRF64: arith_name_rf64 >-> Funclass.
+Coercion OArithRRR: arith_name_rrr >-> arith_op.
+Coercion OArithRRI32: arith_name_rri32 >-> Funclass.
+Coercion OArithRRI64: arith_name_rri64 >-> Funclass.
+
+Inductive load_op :=
+ | OLoadRRO (n: load_name) (trap: trapping_mode) (ofs: offset)
+ | OLoadRRR (n: load_name) (trap: trapping_mode)
+ | OLoadRRRXS (n: load_name) (trap: trapping_mode)
+.
+
+Coercion OLoadRRO: load_name >-> Funclass.
+
+Inductive store_op :=
+ | OStoreRRO (n: store_name) (ofs: offset)
+ | OStoreRRR (n: store_name)
+ | OStoreRRRXS (n: store_name)
+.
+
+Coercion OStoreRRO: store_name >-> Funclass.
+
+Inductive op_wrap :=
+ | Arith (ao: arith_op)
+ | Load (lo: load_op)
+ | Store (so: store_op)
+ | Control (co: control_op)
+ | Allocframe (sz: Z) (pos: ptrofs)
+ | Allocframe2 (sz: Z) (pos: ptrofs)
+ | Freeframe (sz: Z) (pos: ptrofs)
+ | Freeframe2 (sz: Z) (pos: ptrofs)
+ | Constant (v: val)
+ | Fail
+.
+
+Coercion Arith: arith_op >-> op_wrap.
+Coercion Load: load_op >-> op_wrap.
+Coercion Store: store_op >-> op_wrap.
+Coercion Control: control_op >-> op_wrap.
+
+Definition op := op_wrap.
+
+Definition arith_eval (ao: arith_op) (l: list value) :=
+ let (ge, fn) := Ge in
+ match ao, l with
+ | OArithR n, [] => Some (Val (arith_eval_r ge n))
+
+ | OArithRR n, [Val v] => Some (Val (arith_eval_rr n v))
+
+ | OArithRI32 n i, [] => Some (Val (arith_eval_ri32 n i))
+ | OArithRI64 n i, [] => Some (Val (arith_eval_ri64 n i))
+ | OArithRF32 n i, [] => Some (Val (arith_eval_rf32 n i))
+ | OArithRF64 n i, [] => Some (Val (arith_eval_rf64 n i))
+
+ | OArithRRR n, [Val v1; Val v2] => Some (Val (arith_eval_rrr n v1 v2))
+ | OArithRRI32 n i, [Val v] => Some (Val (arith_eval_rri32 n v i))
+ | OArithRRI64 n i, [Val v] => Some (Val (arith_eval_rri64 n v i))
+
+ | OArithARR n, [Val v1; Val v2] => Some (Val (arith_eval_arr n v1 v2))
+ | OArithARRR n, [Val v1; Val v2; Val v3] => Some (Val (arith_eval_arrr n v1 v2 v3))
+ | OArithARRI32 n i, [Val v1; Val v2] => Some (Val (arith_eval_arri32 n v1 v2 i))
+ | OArithARRI64 n i, [Val v1; Val v2] => Some (Val (arith_eval_arri64 n v1 v2 i))
+
+ | _, _ => None
+ end.
+
+Definition exec_incorrect_load trap :=
+ match trap with
+ | TRAP => None
+ | NOTRAP => Some (Val Vundef)
+ end.
+
+Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v: val) (ofs: offset) :=
+ let (ge, fn) := Ge in
+ match (eval_offset ofs) with
+ | OK ptr => match Mem.loadv chunk m (Val.offset_ptr v ptr) with
+ | None => exec_incorrect_load trap
+ | Some vl => Some (Val vl)
+ end
+ | _ => None
+ end.
+
+Definition exec_load_deps_reg (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) :=
+ match Mem.loadv chunk m (Val.addl v vo) with
+ | None => exec_incorrect_load trap
+ | Some vl => Some (Val vl)
+ end.
+
+Definition exec_load_deps_regxs (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) :=
+ match Mem.loadv chunk m (Val.addl v (Val.shll vo (scale_of_chunk chunk))) with
+ | None => exec_incorrect_load trap
+ | Some vl => Some (Val vl)
+ end.
+
+Definition load_eval (lo: load_op) (l: list value) :=
+ match lo, l with
+ | OLoadRRO n trap ofs, [Val v; Memstate m] => exec_load_deps_offset trap (load_chunk n) m v ofs
+ | OLoadRRR n trap, [Val v; Val vo; Memstate m] => exec_load_deps_reg trap (load_chunk n) m v vo
+ | OLoadRRRXS n trap, [Val v; Val vo; Memstate m] => exec_load_deps_regxs trap (load_chunk n) m v vo
+ | _, _ => None
+ end.
+
+Definition exec_store_deps_offset (chunk: memory_chunk) (m: mem) (vs va: val) (ofs: offset) :=
+ let (ge, fn) := Ge in
+ match (eval_offset ofs) with
+ | OK ptr => match Mem.storev chunk m (Val.offset_ptr va ptr) vs with
+ | None => None
+ | Some m' => Some (Memstate m')
+ end
+ | _ => None
+ end.
+
+Definition exec_store_deps_reg (chunk: memory_chunk) (m: mem) (vs va vo: val) :=
+ match Mem.storev chunk m (Val.addl va vo) vs with
+ | None => None
+ | Some m' => Some (Memstate m')
+ end.
+
+Definition exec_store_deps_regxs (chunk: memory_chunk) (m: mem) (vs va vo: val) :=
+ match Mem.storev chunk m (Val.addl va (Val.shll vo (scale_of_chunk chunk))) vs with
+ | None => None
+ | Some m' => Some (Memstate m')
+ end.
+
+Definition store_eval (so: store_op) (l: list value) :=
+ match so, l with
+ | OStoreRRO n ofs, [Val vs; Val va; Memstate m] => exec_store_deps_offset (store_chunk n) m vs va ofs
+ | OStoreRRR n, [Val vs; Val va; Val vo; Memstate m] => exec_store_deps_reg (store_chunk n) m vs va vo
+ | OStoreRRRXS n, [Val vs; Val va; Val vo; Memstate m] => exec_store_deps_regxs (store_chunk n) m vs va vo
+ | _, _ => None
+ end.
+
+Local Open Scope Z.
+
+Remark size_chunk_positive: forall chunk,
+ (size_chunk chunk) > 0.
+Proof.
+ destruct chunk; simpl; lia.
+Qed.
+
+Remark size_chunk_small: forall chunk,
+ (size_chunk chunk) <= 8.
+Proof.
+ destruct chunk; simpl; lia.
+Qed.
+
+Definition disjoint_chunks
+ (ofs1 : offset) (chunk1 : memory_chunk)
+ (ofs2 : offset) (chunk2 : memory_chunk) :=
+ Intv.disjoint ((Ptrofs.unsigned ofs1),
+ ((Ptrofs.unsigned ofs1) + (size_chunk chunk1)))
+ ((Ptrofs.unsigned ofs2),
+ ((Ptrofs.unsigned ofs2) + (size_chunk chunk2))).
+
+Definition small_offset_threshold := 18446744073709551608.
+
+Lemma store_store_disjoint_offsets :
+ forall n1 n2 ofs1 ofs2 vs1 vs2 va m0 m1 m2 m1' m2',
+ (disjoint_chunks ofs1 (store_chunk n1) ofs2 (store_chunk n2)) ->
+ (Ptrofs.unsigned ofs1) < small_offset_threshold ->
+ (Ptrofs.unsigned ofs2) < small_offset_threshold ->
+ store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m0] = Some (Memstate m1) ->
+ store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m1] = Some (Memstate m2) ->
+ store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m0] = Some (Memstate m1') ->
+ store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m1'] = Some (Memstate m2') ->
+ m2 = m2'.
+Proof.
+ intros until m2'.
+ intros DISJOINT SMALL1 SMALL2 STORE0 STORE1 STORE0' STORE1'.
+ unfold disjoint_chunks in DISJOINT.
+ destruct vs1 as [v1 | ]; simpl in STORE0, STORE1'; try congruence.
+ destruct vs2 as [v2 | ]; simpl in STORE1, STORE0'; try congruence.
+ destruct va as [base | ]; try congruence.
+ unfold exec_store_deps_offset in *.
+ destruct Ge.
+ unfold eval_offset in *; simpl in *.
+ unfold Mem.storev in *.
+ unfold Val.offset_ptr in *.
+ destruct base as [ | | | | | wblock wpofs] in * ; try congruence.
+ destruct (Mem.store _ _ _ _ _) eqn:E0; try congruence.
+ inv STORE0.
+ destruct (Mem.store (store_chunk n2) _ _ _ _) eqn:E1; try congruence.
+ inv STORE1.
+ destruct (Mem.store (store_chunk n2) m0 _ _ _) eqn:E0'; try congruence.
+ inv STORE0'.
+ destruct (Mem.store _ m1' _ _ _) eqn:E1'; try congruence.
+ inv STORE1'.
+ assert (Some m2 = Some m2').
+ 2: congruence.
+ rewrite <- E1.
+ rewrite <- E1'.
+ eapply Mem.store_store_other.
+ 2, 3: eassumption.
+
+ right.
+ pose proof (size_chunk_positive (store_chunk n1)).
+ pose proof (size_chunk_positive (store_chunk n2)).
+ pose proof (size_chunk_small (store_chunk n1)).
+ pose proof (size_chunk_small (store_chunk n2)).
+ destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]];
+ unfold Intv.empty in DIS; simpl in DIS.
+ 1, 2: lia.
+ pose proof (Ptrofs.unsigned_range ofs1).
+ pose proof (Ptrofs.unsigned_range ofs2).
+ unfold small_offset_threshold in *.
+ destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1;
+ destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2;
+ change Ptrofs.modulus with 18446744073709551616 in *;
+ lia.
+Qed.
+
+Lemma load_store_disjoint_offsets :
+ forall n1 n2 tm ofs1 ofs2 vs va m0 m1,
+ (disjoint_chunks ofs1 (store_chunk n1) ofs2 (load_chunk n2)) ->
+ (Ptrofs.unsigned ofs1) < small_offset_threshold ->
+ (Ptrofs.unsigned ofs2) < small_offset_threshold ->
+ store_eval (OStoreRRO n1 ofs1) [vs; va; Memstate m0] = Some (Memstate m1) ->
+ load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m1] =
+ load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m0].
+Proof.
+ intros until m1.
+ intros DISJOINT SMALL1 SMALL2 STORE0.
+ destruct vs as [v | ]; simpl in STORE0; try congruence.
+ destruct va as [base | ]; try congruence.
+ unfold exec_store_deps_offset in *.
+ unfold eval_offset in *; simpl in *.
+ unfold exec_load_deps_offset.
+ unfold Mem.storev, Mem.loadv in *.
+ destruct Ge in *.
+ unfold eval_offset in *.
+ unfold Val.offset_ptr in *.
+ destruct base as [ | | | | | wblock wpofs] in * ; try congruence.
+ destruct (Mem.store _ _ _ _) eqn:E0; try congruence.
+ inv STORE0.
+ assert (
+ (Mem.load (load_chunk n2) m1 wblock
+ (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) =
+ (Mem.load (load_chunk n2) m0 wblock
+ (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) ) as LOADS.
+ {
+ eapply Mem.load_store_other.
+ eassumption.
+ right.
+ pose proof (size_chunk_positive (store_chunk n1)).
+ pose proof (size_chunk_positive (load_chunk n2)).
+ pose proof (size_chunk_small (store_chunk n1)).
+ pose proof (size_chunk_small (load_chunk n2)).
+ destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]];
+ unfold Intv.empty in DIS; simpl in DIS.
+ 1,2: lia.
+
+ pose proof (Ptrofs.unsigned_range ofs1).
+ pose proof (Ptrofs.unsigned_range ofs2).
+ unfold small_offset_threshold in *.
+ destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1;
+ destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2;
+ change Ptrofs.modulus with 18446744073709551616 in *;
+ lia.
+ }
+ destruct (Mem.load _ m1 _ _) in *; destruct (Mem.load _ m0 _ _) in *; congruence.
+Qed.
+
+Definition goto_label_deps (f: function) (lbl: label) (vpc: val) :=
+ match label_pos lbl 0 (fn_blocks f) with
+ | None => None
+ | Some pos =>
+ match vpc with
+ | Vptr b ofs => Some (Val (Vptr b (Ptrofs.repr pos)))
+ | _ => None
+ end
+ end.
+
+Definition eval_branch_deps (f: function) (l: label) (vpc: val) (res: option bool) :=
+ match res with
+ | Some true => goto_label_deps f l vpc
+ | Some false => Some (Val vpc)
+ | None => None
+ end.
+
+Definition control_eval (o: control_op) (l: list value) :=
+ let (ge, fn) := Ge in
+ match o, l with
+ | (Ojumptable tbl), [Val index; Val vpc] =>
+ match index with
+ | Vint n =>
+ match list_nth_z tbl (Int.unsigned n) with
+ | None => None
+ | Some lbl => goto_label_deps fn lbl vpc
+ end
+ | _ => None
+ end
+ | Oj_l l, [Val vpc] => goto_label_deps fn l vpc
+ | Ocb bt l, [Val v; Val vpc] =>
+ match cmp_for_btest bt with
+ | (Some c, Int) => eval_branch_deps fn l vpc (Val.cmp_bool c v (Vint (Int.repr 0)))
+ | (Some c, Long) => eval_branch_deps fn l vpc (Val.cmpl_bool c v (Vlong (Int64.repr 0)))
+ | (None, _) => None
+ end
+ | Ocbu bt l, [Val v; Val vpc] =>
+ match cmpu_for_btest bt with
+ | (Some c, Int) => eval_branch_deps fn l vpc (Val.mxcmpu_bool c v (Vint (Int.repr 0)))
+ | (Some c, Long) => eval_branch_deps fn l vpc (Val.mxcmplu_bool c v (Vlong (Int64.repr 0)))
+ | (None, _) => None
+ end
+ | Odiv, [Val v1; Val v2] =>
+ match Val.divs v1 v2 with
+ | Some v => Some (Val v)
+ | None => None
+ end
+ | Odivu, [Val v1; Val v2] =>
+ match Val.divu v1 v2 with
+ | Some v => Some (Val v)
+ | None => None
+ end
+ | OIncremPC sz, [Val vpc] => Some (Val (Val.offset_ptr vpc (Ptrofs.repr sz)))
+ | OError, _ => None
+ | _, _ => None
+ end.
+
+Definition op_eval (o: op) (l: list value) :=
+ match o, l with
+ | Arith o, l => arith_eval o l
+ | Load o, l => load_eval o l
+ | Store o, l => store_eval o l
+ | Control o, l => control_eval o l
+ | Allocframe sz pos, [Val spv; Memstate m] =>
+ let (m1, stk) := Mem.alloc m 0 sz in
+ let sp := (Vptr stk Ptrofs.zero) in
+ match Mem.storev Mptr m1 (Val.offset_ptr sp pos) spv with
+ | None => None
+ | Some m => Some (Memstate m)
+ end
+ | Allocframe2 sz pos, [Val spv; Memstate m] =>
+ let (m1, stk) := Mem.alloc m 0 sz in
+ let sp := (Vptr stk Ptrofs.zero) in
+ match Mem.storev Mptr m1 (Val.offset_ptr sp pos) spv with
+ | None => None
+ | Some m => Some (Val sp)
+ end
+ | Freeframe sz pos, [Val spv; Memstate m] =>
+ match Mem.loadv Mptr m (Val.offset_ptr spv pos) with
+ | None => None
+ | Some v =>
+ match spv with
+ | Vptr stk ofs =>
+ match Mem.free m stk 0 sz with
+ | None => None
+ | Some m' => Some (Memstate m')
+ end
+ | _ => None
+ end
+ end
+ | Freeframe2 sz pos, [Val spv; Memstate m] =>
+ match Mem.loadv Mptr m (Val.offset_ptr spv pos) with
+ | None => None
+ | Some v =>
+ match spv with
+ | Vptr stk ofs =>
+ match Mem.free m stk 0 sz with
+ | None => None
+ | Some m' => Some (Val v)
+ end
+ | _ => None
+ end
+ end
+ | Constant v, [] => Some (Val v)
+ | Fail, _ => None
+ | _, _ => None
+ end.
+
+
+Definition arith_op_eq (o1 o2: arith_op): ?? bool :=
+ match o1 with
+ | OArithR n1 =>
+ match o2 with OArithR n2 => struct_eq n1 n2 | _ => RET false end
+ | OArithRR n1 =>
+ match o2 with OArithRR n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithRI32 n1 i1 =>
+ match o2 with OArithRI32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRI64 n1 i1 =>
+ match o2 with OArithRI64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRF32 n1 i1 =>
+ match o2 with OArithRF32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRF64 n1 i1 =>
+ match o2 with OArithRF64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRRR n1 =>
+ match o2 with OArithRRR n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithRRI32 n1 i1 =>
+ match o2 with OArithRRI32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRRI64 n1 i1 =>
+ match o2 with OArithRRI64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithARRR n1 =>
+ match o2 with OArithARRR n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithARR n1 =>
+ match o2 with OArithARR n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithARRI32 n1 i1 =>
+ match o2 with OArithARRI32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithARRI64 n1 i1 =>
+ match o2 with OArithARRI64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ end.
+
+Ltac my_wlp_simplify := wlp_xsimplify ltac:(intros; subst; simpl in * |- *; congruence || intuition eauto with wlp).
+
+Lemma arith_op_eq_correct o1 o2:
+ WHEN arith_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
+Proof.
+ destruct o1, o2; my_wlp_simplify; try congruence.
+Qed.
+Hint Resolve arith_op_eq_correct: wlp.
+Opaque arith_op_eq_correct.
+
+Definition offset_eq (ofs1 ofs2 : offset): ?? bool :=
+ RET (Ptrofs.eq ofs1 ofs2).
+
+Lemma offset_eq_correct ofs1 ofs2:
+ WHEN offset_eq ofs1 ofs2 ~> b THEN b = true -> ofs1 = ofs2.
+Proof.
+ wlp_simplify.
+ pose (Ptrofs.eq_spec ofs1 ofs2).
+ rewrite H in *.
+ trivial.
+Qed.
+Hint Resolve offset_eq_correct: wlp.
+
+Definition trapping_mode_eq trap1 trap2 :=
+ RET (match trap1, trap2 with
+ | TRAP, TRAP | NOTRAP, NOTRAP => true
+ | TRAP, NOTRAP | NOTRAP, TRAP => false
+ end).
+Lemma trapping_mode_eq_correct t1 t2:
+ WHEN trapping_mode_eq t1 t2 ~> b THEN b = true -> t1 = t2.
+Proof.
+ wlp_simplify.
+ destruct t1; destruct t2; trivial; discriminate.
+Qed.
+Hint Resolve trapping_mode_eq_correct: wlp.
+
+Definition load_op_eq (o1 o2: load_op): ?? bool :=
+ match o1 with
+ | OLoadRRO n1 trap ofs1 =>
+ match o2 with
+ | OLoadRRO n2 trap2 ofs2 => iandb (phys_eq n1 n2) (iandb (offset_eq ofs1 ofs2) (trapping_mode_eq trap trap2))
+ | _ => RET false
+ end
+ | OLoadRRR n1 trap =>
+ match o2 with
+ | OLoadRRR n2 trap2 => iandb (phys_eq n1 n2) (trapping_mode_eq trap trap2)
+ | _ => RET false
+ end
+ | OLoadRRRXS n1 trap =>
+ match o2 with
+ | OLoadRRRXS n2 trap2 => iandb (phys_eq n1 n2) (trapping_mode_eq trap trap2)
+ | _ => RET false
+ end
+ end.
+
+Lemma load_op_eq_correct o1 o2:
+ WHEN load_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
+Proof.
+ destruct o1, o2; wlp_simplify; try discriminate.
+ { f_equal.
+ destruct trap, trap0; simpl in *; trivial; discriminate.
+ pose (Ptrofs.eq_spec ofs ofs0).
+ rewrite H in *. trivial. }
+ all: destruct trap, trap0; simpl in *; trivial; discriminate.
+Qed.
+Hint Resolve load_op_eq_correct: wlp.
+Opaque load_op_eq_correct.
+
+Definition store_op_eq (o1 o2: store_op): ?? bool :=
+ match o1 with
+ | OStoreRRO n1 ofs1 =>
+ match o2 with OStoreRRO n2 ofs2 => iandb (phys_eq n1 n2) (offset_eq ofs1 ofs2) | _ => RET false end
+ | OStoreRRR n1 =>
+ match o2 with OStoreRRR n2 => phys_eq n1 n2 | _ => RET false end
+ | OStoreRRRXS n1 =>
+ match o2 with OStoreRRRXS n2 => phys_eq n1 n2 | _ => RET false end
+ end.
+
+Lemma store_op_eq_correct o1 o2:
+ WHEN store_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
+Proof.
+ destruct o1, o2; wlp_simplify; try discriminate.
+ - f_equal. pose (Ptrofs.eq_spec ofs ofs0).
+ rewrite H in *. trivial.
+ - congruence.
+ - congruence.
+Qed.
+Hint Resolve store_op_eq_correct: wlp.
+Opaque store_op_eq_correct.
+
+Definition control_op_eq (c1 c2: control_op): ?? bool :=
+ match c1 with
+ | Oj_l l1 =>
+ match c2 with Oj_l l2 => phys_eq l1 l2 | _ => RET false end
+ | Ocb bt1 l1 =>
+ match c2 with Ocb bt2 l2 => iandb (phys_eq bt1 bt2) (phys_eq l1 l2) | _ => RET false end
+ | Ocbu bt1 l1 =>
+ match c2 with Ocbu bt2 l2 => iandb (phys_eq bt1 bt2) (phys_eq l1 l2) | _ => RET false end
+ | Ojumptable tbl1 =>
+ match c2 with Ojumptable tbl2 => phys_eq tbl1 tbl2 | _ => RET false end
+ | Odiv =>
+ match c2 with Odiv => RET true | _ => RET false end
+ | Odivu =>
+ match c2 with Odivu => RET true | _ => RET false end
+ | OIncremPC sz1 =>
+ match c2 with OIncremPC sz2 => RET (Z.eqb sz1 sz2) | _ => RET false end
+ | OError =>
+ match c2 with OError => RET true | _ => RET false end
+ end.
+
+Lemma control_op_eq_correct c1 c2:
+ WHEN control_op_eq c1 c2 ~> b THEN b = true -> c1 = c2.
+Proof.
+ destruct c1, c2; wlp_simplify; try rewrite Z.eqb_eq in * |-; try congruence.
+Qed.
+Hint Resolve control_op_eq_correct: wlp.
+Opaque control_op_eq_correct.
+
+Definition op_eq (o1 o2: op): ?? bool :=
+ match o1 with
+ | Arith i1 =>
+ match o2 with Arith i2 => arith_op_eq i1 i2 | _ => RET false end
+ | Load i1 =>
+ match o2 with Load i2 => load_op_eq i1 i2 | _ => RET false end
+ | Store i1 =>
+ match o2 with Store i2 => store_op_eq i1 i2 | _ => RET false end
+ | Control i1 =>
+ match o2 with Control i2 => control_op_eq i1 i2 | _ => RET false end
+ | Allocframe sz1 pos1 =>
+ match o2 with Allocframe sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end
+ | Allocframe2 sz1 pos1 =>
+ match o2 with Allocframe2 sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end
+ | Freeframe sz1 pos1 =>
+ match o2 with Freeframe sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end
+ | Freeframe2 sz1 pos1 =>
+ match o2 with Freeframe2 sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end
+ | Constant c1 =>
+ match o2 with Constant c2 => phys_eq c1 c2 | _ => RET false end
+ | Fail =>
+ match o2 with Fail => RET true | _ => RET false end
+ end.
+
+Theorem op_eq_correct o1 o2:
+ WHEN op_eq o1 o2 ~> b THEN b=true -> o1 = o2.
+Proof.
+ destruct o1, o2; wlp_simplify; try rewrite Z.eqb_eq in * |- ; try congruence.
+Qed.
+Hint Resolve op_eq_correct: wlp.
+Global Opaque op_eq_correct.
+
+End IMPPARAM.
+
+End P.
+
+Module L <: ISeqLanguage with Module LP:=P.
+
+Module LP:=P.
+
+Include MkSeqLanguage P.
+
+End L.
+
+Module IST := ImpSimu L ImpPosDict.
+
+Import L.
+Import P.
+
+(** Compilation from [Asmvliw] to [L] *)
+
+Local Open Scope positive_scope.
+
+Definition pmem : R.t := 1.
+
+Definition ireg_to_pos (ir: ireg) : R.t :=
+ match ir with
+ | GPR0 => 1 | GPR1 => 2 | GPR2 => 3 | GPR3 => 4 | GPR4 => 5 | GPR5 => 6 | GPR6 => 7 | GPR7 => 8 | GPR8 => 9 | GPR9 => 10
+ | GPR10 => 11 | GPR11 => 12 | GPR12 => 13 | GPR13 => 14 | GPR14 => 15 | GPR15 => 16 | GPR16 => 17 | GPR17 => 18 | GPR18 => 19 | GPR19 => 20
+ | GPR20 => 21 | GPR21 => 22 | GPR22 => 23 | GPR23 => 24 | GPR24 => 25 | GPR25 => 26 | GPR26 => 27 | GPR27 => 28 | GPR28 => 29 | GPR29 => 30
+ | GPR30 => 31 | GPR31 => 32 | GPR32 => 33 | GPR33 => 34 | GPR34 => 35 | GPR35 => 36 | GPR36 => 37 | GPR37 => 38 | GPR38 => 39 | GPR39 => 40
+ | GPR40 => 41 | GPR41 => 42 | GPR42 => 43 | GPR43 => 44 | GPR44 => 45 | GPR45 => 46 | GPR46 => 47 | GPR47 => 48 | GPR48 => 49 | GPR49 => 50
+ | GPR50 => 51 | GPR51 => 52 | GPR52 => 53 | GPR53 => 54 | GPR54 => 55 | GPR55 => 56 | GPR56 => 57 | GPR57 => 58 | GPR58 => 59 | GPR59 => 60
+ | GPR60 => 61 | GPR61 => 62 | GPR62 => 63 | GPR63 => 64
+ end
+.
+
+Lemma ireg_to_pos_discr: forall r r', r <> r' -> ireg_to_pos r <> ireg_to_pos r'.
+Proof.
+ destruct r; destruct r'; try contradiction; discriminate.
+Qed.
+
+Definition ppos (r: preg) : R.t :=
+ match r with
+ | RA => 2
+ | PC => 3
+ | IR ir => 3 + ireg_to_pos ir
+ end
+.
+
+Notation "# r" := (ppos r) (at level 100, right associativity).
+
+Lemma not_eq_add:
+ forall k n n', n <> n' -> k + n <> k + n'.
+Proof.
+ intros k n n' H1 H2. apply H1; clear H1. eapply Pos.add_reg_l; eauto.
+Qed.
+
+Lemma ppos_discr: forall r r', r <> r' -> ppos r <> ppos r'.
+Proof.
+ destruct r; destruct r'.
+ all: try discriminate; try contradiction.
+ - intros. apply not_eq_add. apply ireg_to_pos_discr. congruence.
+ - intros. unfold ppos. replace (3 + ireg_to_pos g) with ((1 + ireg_to_pos g) + 2).
+ apply Pos.add_no_neutral.
+ rewrite Pos.add_comm, Pos.add_assoc. reflexivity.
+ - intros. unfold ppos. rewrite Pos.add_comm. apply Pos.add_no_neutral.
+ - intros. unfold ppos. apply not_eq_sym.
+ replace (3 + ireg_to_pos g) with ((1 + ireg_to_pos g) + 2). apply Pos.add_no_neutral.
+ rewrite Pos.add_comm, Pos.add_assoc. reflexivity.
+ - intros. unfold ppos. apply not_eq_sym. rewrite Pos.add_comm. apply Pos.add_no_neutral.
+Qed.
+
+Lemma ppos_pmem_discr: forall r, pmem <> ppos r.
+Proof.
+ intros. destruct r.
+ - unfold ppos. unfold pmem. apply not_eq_sym. rewrite Pos.add_comm. replace 3 with (2 + 1). rewrite Pos.add_assoc. apply Pos.add_no_neutral.
+ reflexivity.
+ - unfold ppos. unfold pmem. discriminate.
+ - unfold ppos. unfold pmem. discriminate.
+Qed.
+
+(** Inversion functions, used for debug traces *)
+
+Definition pos_to_ireg (p: R.t) : option gpreg :=
+ match p with
+ | 1 => Some GPR0 | 2 => Some GPR1 | 3 => Some GPR2 | 4 => Some GPR3 | 5 => Some GPR4 | 6 => Some GPR5 | 7 => Some GPR6 | 8 => Some GPR7 | 9 => Some GPR8 | 10 => Some GPR9
+ | 11 => Some GPR10 | 12 => Some GPR11 | 13 => Some GPR12 | 14 => Some GPR13 | 15 => Some GPR14 | 16 => Some GPR15 | 17 => Some GPR16 | 18 => Some GPR17 | 19 => Some GPR18 | 20 => Some GPR19
+ | 21 => Some GPR20 | 22 => Some GPR21 | 23 => Some GPR22 | 24 => Some GPR23 | 25 => Some GPR24 | 26 => Some GPR25 | 27 => Some GPR26 | 28 => Some GPR27 | 29 => Some GPR28 | 30 => Some GPR29
+ | 31 => Some GPR30 | 32 => Some GPR31 | 33 => Some GPR32 | 34 => Some GPR33 | 35 => Some GPR34 | 36 => Some GPR35 | 37 => Some GPR36 | 38 => Some GPR37 | 39 => Some GPR38 | 40 => Some GPR39
+ | 41 => Some GPR40 | 42 => Some GPR41 | 43 => Some GPR42 | 44 => Some GPR43 | 45 => Some GPR44 | 46 => Some GPR45 | 47 => Some GPR46 | 48 => Some GPR47 | 49 => Some GPR48 | 50 => Some GPR49
+ | 51 => Some GPR50 | 52 => Some GPR51 | 53 => Some GPR52 | 54 => Some GPR53 | 55 => Some GPR54 | 56 => Some GPR55 | 57 => Some GPR56 | 58 => Some GPR57 | 59 => Some GPR58 | 60 => Some GPR59
+ | 61 => Some GPR60 | 62 => Some GPR61 | 63 => Some GPR62 | 64 => Some GPR63
+ | _ => None
+ end.
+
+Definition inv_ppos (p: R.t) : option preg :=
+ match p with
+ | 1 => None
+ | 2 => Some RA | 3 => Some PC
+ | n => match pos_to_ireg (n-3) with
+ | None => None
+ | Some gpr => Some (IR gpr)
+ end
+ end.
+
+Notation "a @ b" := (Econs a b) (at level 102, right associativity).
+
+(** Translations of instructions *)
+
+Definition trans_control (ctl: control) : inst :=
+ match ctl with
+ | Pret => [(#PC, PReg(#RA))]
+ | Pcall s => [(#RA, PReg(#PC)); (#PC, Op (Arith (OArithR (Ploadsymbol s Ptrofs.zero))) Enil)]
+ | Picall r => [(#RA, PReg(#PC)); (#PC, PReg(#r))]
+ | Pgoto s => [(#PC, Op (Arith (OArithR (Ploadsymbol s Ptrofs.zero))) Enil)]
+ | Pigoto r => [(#PC, PReg(#r))]
+ | Pj_l l => [(#PC, Op (Control (Oj_l l)) (PReg(#PC) @ Enil))]
+ | Pcb bt r l => [(#PC, Op (Control (Ocb bt l)) (PReg(#r) @ PReg(#PC) @ Enil))]
+ | Pcbu bt r l => [(#PC, Op (Control (Ocbu bt l)) (PReg(#r) @ PReg(#PC) @ Enil))]
+ | Pjumptable r labels => [(#PC, Op (Control (Ojumptable labels)) (PReg(#r) @ PReg(#PC) @ Enil));
+ (#GPR62, Op (Constant Vundef) Enil);
+ (#GPR63, Op (Constant Vundef) Enil) ]
+ | Pbuiltin ef args res => [(#PC, Op (Control (OError)) Enil)]
+ end.
+
+Definition trans_exit (ex: option control) : L.inst :=
+ match ex with
+ | None => []
+ | Some ctl => trans_control ctl
+ end
+.
+
+Definition trans_arith (ai: ar_instruction) : inst :=
+ match ai with
+ | PArithR n d => [(#d, Op (Arith (OArithR n)) Enil)]
+ | PArithRR n d s => [(#d, Op (Arith (OArithRR n)) (PReg(#s) @ Enil))]
+ | PArithRI32 n d i => [(#d, Op (Arith (OArithRI32 n i)) Enil)]
+ | PArithRI64 n d i => [(#d, Op (Arith (OArithRI64 n i)) Enil)]
+ | PArithRF32 n d i => [(#d, Op (Arith (OArithRF32 n i)) Enil)]
+ | PArithRF64 n d i => [(#d, Op (Arith (OArithRF64 n i)) Enil)]
+ | PArithRRR n d s1 s2 => [(#d, Op (Arith (OArithRRR n)) (PReg(#s1) @ PReg(#s2) @ Enil))]
+ | PArithRRI32 n d s i => [(#d, Op (Arith (OArithRRI32 n i)) (PReg(#s) @ Enil))]
+ | PArithRRI64 n d s i => [(#d, Op (Arith (OArithRRI64 n i)) (PReg(#s) @ Enil))]
+ | PArithARRR n d s1 s2 => [(#d, Op (Arith (OArithARRR n)) (PReg(#d) @ PReg(#s1) @ PReg(#s2) @ Enil))]
+ | PArithARR n d s => [(#d, Op (Arith (OArithARR n)) (PReg(#d) @ PReg(#s) @ Enil))]
+ | PArithARRI32 n d s i => [(#d, Op (Arith (OArithARRI32 n i)) (PReg(#d) @ PReg(#s) @ Enil))]
+ | PArithARRI64 n d s i => [(#d, Op (Arith (OArithARRI64 n i)) (PReg(#d) @ PReg(#s) @ Enil))]
+ end.
+
+
+Definition trans_basic (b: basic) : inst :=
+ match b with
+ | PArith ai => trans_arith ai
+ | PLoadRRO trap n d a ofs => [(#d, Op (Load (OLoadRRO n trap ofs)) (PReg (#a) @ PReg pmem @ Enil))]
+ | PLoadRRR trap n d a ro => [(#d, Op (Load (OLoadRRR n trap)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PLoadRRRXS trap n d a ro => [(#d, Op (Load (OLoadRRRXS n trap)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PStoreRRO n s a ofs => [(pmem, Op (Store (OStoreRRO n ofs)) (PReg (#s) @ PReg (#a) @ PReg pmem @ Enil))]
+ | PLoadQRRO qd a ofs =>
+ let (d0, d1) := gpreg_q_expand qd in
+ [(#d0, Op (Load (OLoadRRO Pld_a TRAP ofs)) (PReg (#a) @ PReg pmem @ Enil));
+ (#d1, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil))]
+ | PLoadORRO od a ofs =>
+ match gpreg_o_expand od with
+ | (d0, d1, d2, d3) =>
+ [(#d0, Op (Load (OLoadRRO Pld_a TRAP ofs)) (PReg (#a) @ PReg pmem @ Enil));
+ (#d1, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil));
+ (#d2, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 16)))) (Old(PReg (#a)) @ PReg pmem @ Enil));
+ (#d3, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 24)))) (Old(PReg (#a)) @ PReg pmem @ Enil))]
+ end
+ | PStoreRRR n s a ro => [(pmem, Op (Store (OStoreRRR n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PStoreRRRXS n s a ro => [(pmem, Op (Store (OStoreRRRXS n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PStoreQRRO qs a ofs =>
+ let (s0, s1) := gpreg_q_expand qs in
+ [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))]
+ | PStoreORRO os a ofs =>
+ match gpreg_o_expand os with
+ | (s0, s1, s2, s3) =>
+ [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 16)))) (PReg (#s2) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 24)))) (PReg (#s3) @ PReg (#a) @ PReg pmem @ Enil))]
+ end
+ | Pallocframe sz pos => [(#FP, PReg (#SP)); (#SP, Op (Allocframe2 sz pos) (PReg (#SP) @ PReg pmem @ Enil)); (#RTMP, Op (Constant Vundef) Enil);
+ (pmem, Op (Allocframe sz pos) (Old (PReg (#SP)) @ PReg pmem @ Enil))]
+ | Pfreeframe sz pos => [(pmem, Op (Freeframe sz pos) (PReg (#SP) @ PReg pmem @ Enil));
+ (#SP, Op (Freeframe2 sz pos) (PReg (#SP) @ Old (PReg pmem) @ Enil));
+ (#RTMP, Op (Constant Vundef) Enil)]
+ | Pget rd ra => match ra with
+ | RA => [(#rd, PReg(#ra))]
+ | _ => [(#rd, Op Fail Enil)]
+ end
+ | Pset ra rd => match ra with
+ | RA => [(#ra, PReg(#rd))]
+ | _ => [(#rd, Op Fail Enil)]
+ end
+ | Pnop => []
+ end.
+
+Fixpoint trans_body (b: list basic) : list L.inst :=
+ match b with
+ | nil => nil
+ | b :: lb => (trans_basic b) :: (trans_body lb)
+ end.
+
+Definition trans_pcincr (sz: Z) (k: L.inst) := (#PC, Op (Control (OIncremPC sz)) (PReg(#PC) @ Enil)) :: k.
+
+Definition trans_block (b: Asmvliw.bblock) : L.bblock :=
+ trans_body (body b) ++ (trans_pcincr (size b) (trans_exit (exit b)) :: nil).
+
+Theorem trans_block_noheader_inv: forall bb, trans_block (no_header bb) = trans_block bb.
+Proof.
+ intros. destruct bb as [hd bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.
+Qed.
+
+Theorem trans_block_header_inv: forall bb hd, trans_block (stick_header hd bb) = trans_block bb.
+Proof.
+ intros. destruct bb as [hdr bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.
+Qed.
+
+(** Lemmas on the translation *)
+
+Definition state := L.mem.
+Definition exec := L.run.
+
+Definition match_states (s: Asmvliw.state) (s': state) :=
+ let (rs, m) := s in
+ s' pmem = Memstate m
+ /\ forall r, s' (#r) = Val (rs r).
+
+Definition match_outcome (o:outcome) (s: option state) :=
+ match o with
+ | Next rs m => exists s', s=Some s' /\ match_states (State rs m) s'
+ | Stuck => s=None
+ end.
+
+Notation "a <[ b <- c ]>" := (assign a b c) (at level 102, right associativity).
+
+Definition trans_state (s: Asmvliw.state) : state :=
+ let (rs, m) := s in
+ fun x => if (Pos.eq_dec x pmem) then Memstate m
+ else match (inv_ppos x) with
+ | Some r => Val (rs r)
+ | None => Val Vundef
+ end.
+
+Lemma not_eq_IR:
+ forall r r', r <> r' -> IR r <> IR r'.
+Proof.
+ intros. congruence.
+Qed.
+
+(** Parallelizability test of a bblock (bundle), and bisimulation of the Asmblock and L parallel semantics *)
+
+Module PChk := ParallelChecks L PosPseudoRegSet.
+
+Definition bblock_para_check (p: Asmvliw.bblock) : bool :=
+ PChk.is_parallelizable (trans_block p).
+
+Section SECT_PAR.
+
+Import PChk.
+
+Ltac Simplif :=
+ ((rewrite nextblock_inv by eauto with asmgen)
+ || (rewrite nextblock_inv1 by eauto with asmgen)
+ || (rewrite Pregmap.gss)
+ || (rewrite nextblock_pc)
+ || (rewrite Pregmap.gso by eauto with asmgen)
+ || (rewrite assign_diff by (auto; try discriminate; try (apply ppos_discr; try discriminate; congruence); try (apply ppos_pmem_discr);
+ try (apply not_eq_sym; apply ppos_discr; try discriminate; congruence); try (apply not_eq_sym; apply ppos_pmem_discr); auto))
+ || (rewrite assign_eq)
+ ); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+Arguments Pos.add: simpl never.
+Arguments ppos: simpl never.
+
+Variable Ge: genv.
+
+Lemma trans_arith_par_correct ge fn rsr mr sr rsw mw sw rsw' i:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_arith_instr ge i rsr rsw = rsw' ->
+ exists sw',
+ inst_prun Ge (trans_arith i) sw sr sr = Some sw'
+ /\ match_states (State rsw' mw) sw'.
+Proof.
+ intros GENV MSR MSW PARARITH. subst. inv MSR. inv MSW.
+ unfold parexec_arith_instr. destruct i.
+(* Ploadsymbol *)
+ - destruct i. eexists; split; [| split].
+ * simpl. reflexivity.
+ * Simpl.
+ * simpl. intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRR *)
+ - eexists; split; [| split].
+ * simpl. rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRI32 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRI64 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRF32 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRF64 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRRR *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rs1). rewrite (H0 rs2). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRRI32 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRRI64 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARRR *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs1). rewrite (H0 rs2). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARR *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARRI32 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARRI64 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+Qed.
+
+
+
+Theorem bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw bi:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ match_outcome (bstep ge bi rsr rsw mr mw) (inst_prun Ge (trans_basic bi) sw sr sr).
+Proof.
+
+(* a little tactic to automate reasoning on preg_eq *)
+Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
+Local Ltac preg_eq_discr r rd :=
+ destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto);
+ rewrite (assign_diff _ (#rd) (#r) _); auto;
+ rewrite Pregmap.gso; auto.
+
+ intros GENV MSR MSW; inversion MSR as (H & H0); inversion MSW as (H1 & H2).
+ destruct bi; simpl.
+(* Arith *)
+ - exploit trans_arith_par_correct. 5: eauto. all: eauto.
+(* Load *)
+ - destruct i.
+ (* Load Offset *)
+ + destruct i; simpl load_chunk. all:
+ unfold parexec_load_offset; simpl; unfold exec_load_deps_offset; erewrite GENV, H, H0;
+ unfold eval_offset;
+ simpl; auto;
+ destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl.
+
+ (* Load Reg *)
+ + destruct i; simpl load_chunk. all:
+ unfold parexec_load_reg; simpl; unfold exec_load_deps_reg; rewrite H, H0; rewrite (H0 rofs);
+ destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl.
+
+ (* Load Reg XS *)
+ + destruct i; simpl load_chunk. all:
+ unfold parexec_load_regxs; simpl; unfold exec_load_deps_regxs; rewrite H, H0; rewrite (H0 rofs);
+ destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl.
+
+ (* Load Quad word *)
+ + unfold parexec_load_q_offset.
+ destruct (gpreg_q_expand rd) as [rd0 rd1]; destruct Ge; simpl.
+ rewrite H0, H.
+ destruct (Mem.loadv Many64 mr _) as [load0 | ]; simpl; auto.
+ rewrite !(assign_diff _ _ pmem), H; auto.
+ destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 8)))) as [load1| ]; simpl; auto.
+ eexists; intuition eauto.
+ { rewrite !(assign_diff _ _ pmem); auto. }
+ { preg_eq_discr r rd1.
+ preg_eq_discr r rd0. }
+
+ (* Load Octuple word *)
+ + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
+ unfold parexec_load_o_offset.
+ destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3]; destruct Ge; simpl.
+ rewrite H0, H.
+ destruct (Mem.loadv Many64 mr (Val.offset_ptr (rsr ra) ofs)) as [load0 | ]; simpl; auto.
+ rewrite !(assign_diff _ _ pmem), !H; auto.
+ destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 8)))) as [load1| ]; simpl; auto.
+ rewrite !(assign_diff _ _ pmem), !H; auto.
+ destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 16)))) as [load2| ]; simpl; auto.
+ rewrite !(assign_diff _ _ pmem), !H; auto.
+ destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 24)))) as [load3| ]; simpl; auto.
+ eexists; intuition eauto.
+ { rewrite !(assign_diff _ _ pmem); auto. }
+ { preg_eq_discr r rd3.
+ preg_eq_discr r rd2.
+ preg_eq_discr r rd1.
+ preg_eq_discr r rd0. }
+
+(* Store *)
+ - destruct i.
+ (* Store Offset *)
+ + destruct i; simpl store_chunk. all:
+ unfold parexec_store_offset; simpl; unfold exec_store_deps_offset; erewrite GENV, H, H0; rewrite (H0 ra);
+ unfold eval_offset; simpl; auto;
+ destruct (Mem.storev _ _ _ _) eqn:MEML; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl.
+
+ (* Store Reg *)
+ + destruct i; simpl store_chunk. all:
+ unfold parexec_store_reg; simpl; unfold exec_store_deps_reg; rewrite H, H0; rewrite (H0 ra); rewrite (H0 rofs);
+ destruct (Mem.storev _ _ _ _) eqn:MEML; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl.
+
+ (* Store Reg XS *)
+ + destruct i; simpl store_chunk. all:
+ unfold parexec_store_regxs; simpl; unfold exec_store_deps_regxs; rewrite H, H0; rewrite (H0 ra); rewrite (H0 rofs);
+ destruct (Mem.storev _ _ _ _) eqn:MEML; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl.
+
+ (* Store Quad Word *)
+ + unfold parexec_store_q_offset.
+ destruct (gpreg_q_expand rs) as [s0 s1]; destruct Ge; simpl.
+ rewrite !H0, !H.
+ destruct (Mem.storev _ _ _ (rsr s0)) as [mem0 | ]; simpl; auto.
+ rewrite !assign_diff, !H0; auto.
+ destruct (Mem.storev _ _ _ (rsr s1)) as [mem1 | ]; simpl; auto.
+ eexists; intuition eauto.
+ rewrite !assign_diff; auto.
+
+ (* Store Ocuple Word *)
+ + unfold parexec_store_o_offset.
+ destruct (gpreg_o_expand rs) as [[[s0 s1] s2] s3]; destruct Ge; simpl.
+ rewrite !H0, !H.
+ destruct (Mem.storev _ _ _ (rsr s0)) as [store0 | ]; simpl; auto.
+ rewrite !assign_diff, !H0; auto.
+ destruct (Mem.storev _ _ _ (rsr s1)) as [store1 | ]; simpl; auto.
+ rewrite !assign_diff, !H0; auto.
+ destruct (Mem.storev _ _ _ (rsr s2)) as [store2 | ]; simpl; auto.
+ rewrite !assign_diff, !H0; auto.
+ destruct (Mem.storev _ _ _ (rsr s3)) as [store3 | ]; simpl; auto.
+ eexists; intuition eauto.
+ rewrite !assign_diff; auto.
+
+ (* Allocframe *)
+ - destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS.
+ * eexists; repeat split.
+ { Simpl. erewrite !H0, H, MEMAL, MEMS. Simpl.
+ rewrite H, MEMAL. rewrite MEMS. reflexivity. }
+ { Simpl. }
+ { intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g GPR32); [| destruct (ireg_eq g GPR12); [| destruct (ireg_eq g GPR17)]]; subst; Simpl. }
+ * simpl; Simpl; erewrite !H0, H, MEMAL, MEMS; auto.
+ (* Freeframe *)
+ - erewrite !H0, H.
+ destruct (Mem.loadv _ _ _) eqn:MLOAD; simpl; auto.
+ destruct (rsr GPR12) eqn:SPeq; simpl; auto.
+ destruct (Mem.free _ _ _ _) eqn:MFREE; simpl; auto.
+ eexists; repeat split.
+ * simpl. Simpl. erewrite H0, SPeq, MLOAD, MFREE. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl. destruct (ireg_eq g GPR32); [| destruct (ireg_eq g GPR12); [| destruct (ireg_eq g GPR17)]]; subst; Simpl.
+(* Pget *)
+ - destruct rs eqn:rseq; simpl; auto.
+ eexists. repeat split. Simpl. intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* Pset *)
+ - destruct rd eqn:rdeq; simpl; auto.
+ eexists. repeat split. Simpl. intros rr; destruct rr; Simpl.
+(* Pnop *)
+ - eexists. repeat split; assumption.
+Qed.
+
+
+Theorem bisimu_par_body:
+ forall bdy ge fn rsr mr sr rsw mw sw,
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ match_outcome (parexec_wio_body ge bdy rsr rsw mr mw) (prun_iw Ge (trans_body bdy) sw sr).
+Proof.
+ induction bdy as [|i bdy]; simpl; eauto.
+ intros.
+ exploit (bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw i); eauto.
+ destruct (bstep _ _ _ _ _ _); simpl.
+ - intros (s' & X1 & X2). rewrite X1; simpl; eauto.
+ - intros X; rewrite X; simpl; auto.
+Qed.
+
+Theorem bisimu_par_control ex sz aux ge fn rsr rsw mr mw sr sw:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ match_outcome (parexec_control ge fn ex (incrPC (Ptrofs.repr sz) rsr) (rsw#PC <- aux) mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
+Proof.
+ intros GENV MSR MSW; unfold estep.
+ simpl in *. inv MSR. inv MSW.
+ destruct ex.
+ - destruct c; destruct i; try discriminate; simpl.
+ all: try (rewrite (H0 PC); eexists; split; try split; Simpl; intros rr; destruct rr; unfold incrPC; Simpl).
+
+ (* Pjumptable *)
+ + rewrite (H0 PC). Simpl. rewrite (H0 r). unfold incrPC. Simpl.
+ destruct (rsr r); simpl; auto. destruct (list_nth_z _ _); simpl; auto.
+ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto.
+ eexists; split; try split; Simpl. intros rr; destruct rr; unfold incrPC; Simpl.
+ destruct (preg_eq g GPR62). rewrite e. Simpl.
+ destruct (preg_eq g GPR63). rewrite e. Simpl. Simpl.
+
+ (* Pj_l *)
+ + rewrite (H0 PC). Simpl. unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto.
+ unfold incrPC. Simpl. destruct (Val.offset_ptr _ _); simpl; auto.
+ eexists; split; try split; Simpl. intros rr; destruct rr; unfold incrPC; Simpl.
+
+ (* Pcb *)
+ + rewrite (H0 PC). Simpl. rewrite (H0 r). destruct (cmp_for_btest _); simpl; auto. destruct o; simpl; auto.
+ unfold par_eval_branch. unfold eval_branch_deps. unfold incrPC. Simpl. destruct i.
+ ++ destruct (Val.cmp_bool _ _ _); simpl; auto. destruct b.
+ +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
+ intros rr; destruct rr; Simpl.
+ +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl.
+ ++ destruct (Val.cmpl_bool _ _ _); simpl; auto. destruct b.
+ +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
+ intros rr; destruct rr; Simpl.
+ +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl.
+
+ (* Pcbu *)
+ + rewrite (H0 PC). Simpl. rewrite (H0 r). destruct (cmpu_for_btest _); simpl; auto. destruct o; simpl; auto.
+ unfold par_eval_branch. unfold eval_branch_deps. unfold incrPC. Simpl. destruct i.
+ ++ destruct (Val.mxcmpu_bool _ _ _); simpl; auto. destruct b.
+ +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
+ intros rr; destruct rr; Simpl.
+ +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl.
+ ++ destruct (Val.mxcmplu_bool _ _ _); simpl; auto. destruct b.
+ +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
+ intros rr; destruct rr; Simpl.
+ +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl.
+
+ - simpl in *. rewrite (H0 PC). eexists; split; try split; Simpl.
+ intros rr; destruct rr; unfold incrPC; Simpl.
+Qed.
+
+Theorem bisimu_par_exit ex sz ge fn rsr rsw mr mw sr sw:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ match_outcome (estep ge fn ex (Ptrofs.repr sz) rsr rsw mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
+Proof.
+ intros; unfold estep.
+ exploit (bisimu_par_control ex sz rsw#PC ge fn rsr rsw mr mw sr sw); eauto.
+ replace (rsw # PC <- (rsw PC)) with rsw; auto.
+ apply extensionality. intros; destruct x; simpl; auto.
+Qed.
+
+Definition trans_block_aux bdy sz ex := (trans_body bdy) ++ (trans_pcincr sz (trans_exit ex) :: nil).
+
+Theorem bisimu_par_wio ge fn rsr mr sr bdy ex sz:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_outcome (parexec_wio ge fn bdy ex (Ptrofs.repr sz) rsr mr) (prun_iw Ge (trans_block_aux bdy sz ex) sr sr).
+Proof.
+ intros GENV MSR. unfold parexec_wio, trans_block_aux.
+ exploit (bisimu_par_body bdy ge fn rsr mr sr rsr mr sr); eauto.
+ destruct (parexec_wio_body _ _ _ _ _ _); simpl.
+ - intros (s' & X1 & X2).
+ erewrite prun_iw_app_Some; eauto.
+ exploit (bisimu_par_exit ex sz ge fn rsr rs mr m sr s'); eauto.
+ subst Ge; simpl. destruct MSR as (Y1 & Y2). erewrite Y2; simpl.
+ destruct (inst_prun _ _ _ _ _); simpl; auto.
+ - intros X; erewrite prun_iw_app_None; eauto.
+Qed.
+
+Theorem bisimu_par_wio_bblock ge fn rsr mr sr bdy1 bdy2 ex sz:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_outcome
+ match parexec_wio ge fn bdy1 ex (Ptrofs.repr sz) rsr mr with
+ | Next rs' m' => parexec_wio_body ge bdy2 rsr rs' mr m'
+ | Stuck => Stuck
+ end
+ (prun_iw Ge ((trans_block_aux bdy1 sz ex)++(trans_body bdy2)) sr sr).
+Proof.
+ intros.
+ exploit (bisimu_par_wio ge fn rsr mr sr bdy1 ex sz); eauto.
+ destruct (parexec_wio _ _ _ _ _ _); simpl.
+ - intros (s' & X1 & X2).
+ erewrite prun_iw_app_Some; eauto.
+ eapply bisimu_par_body; eauto.
+ - intros; erewrite prun_iw_app_None; eauto.
+Qed.
+
+Lemma trans_body_perserves_permutation bdy1 bdy2:
+ Permutation bdy1 bdy2 ->
+ Permutation (trans_body bdy1) (trans_body bdy2).
+Proof.
+ induction 1; simpl; econstructor; eauto.
+Qed.
+
+Lemma trans_body_app bdy1: forall bdy2,
+ trans_body (bdy1++bdy2) = (trans_body bdy1) ++ (trans_body bdy2).
+Proof.
+ induction bdy1; simpl; congruence.
+Qed.
+
+Theorem trans_block_perserves_permutation bdy1 bdy2 b:
+ Permutation (bdy1 ++ bdy2) (body b) ->
+ Permutation (trans_block b) ((trans_block_aux bdy1 (size b) (exit b))++(trans_body bdy2)).
+Proof.
+ intro H; unfold trans_block, trans_block_aux.
+ eapply perm_trans.
+ - eapply Permutation_app_tail.
+ apply trans_body_perserves_permutation.
+ apply Permutation_sym; eapply H.
+ - rewrite trans_body_app. rewrite <-! app_assoc.
+ apply Permutation_app_head.
+ apply Permutation_app_comm.
+Qed.
+
+Theorem bisimu_par rs1 m1 s1' b ge fn o2:
+ Ge = Genv ge fn ->
+ match_states (State rs1 m1) s1' ->
+ parexec_bblock ge fn b rs1 m1 o2 ->
+ exists o2',
+ prun Ge (trans_block b) s1' o2'
+ /\ match_outcome o2 o2'.
+Proof.
+ intros GENV MS PAREXEC.
+ inversion PAREXEC as (bdy1 & bdy2 & PERM & WIO).
+ exploit trans_block_perserves_permutation; eauto.
+ intros Perm.
+ exploit (bisimu_par_wio_bblock ge fn rs1 m1 s1' bdy1 bdy2 (exit b) (size b)); eauto.
+ rewrite <- WIO. clear WIO.
+ intros H; eexists; split. 2: eapply H.
+ unfold prun; eexists; split; eauto.
+ destruct (prun_iw _ _ _ _); simpl; eauto.
+Qed.
+
+(** sequential execution *)
+Theorem bisimu_basic ge fn bi rs m s:
+ Ge = Genv ge fn ->
+ match_states (State rs m) s ->
+ match_outcome (exec_basic_instr ge bi rs m) (inst_run Ge (trans_basic bi) s s).
+Proof.
+ intros; unfold exec_basic_instr. rewrite inst_run_prun.
+ eapply bisimu_par_wio_basic; eauto.
+Qed.
+
+Lemma bisimu_body:
+ forall bdy ge fn rs m s,
+ Ge = Genv ge fn ->
+ match_states (State rs m) s ->
+ match_outcome (exec_body ge bdy rs m) (exec Ge (trans_body bdy) s).
+Proof.
+ induction bdy as [|i bdy]; simpl; eauto.
+ intros.
+ exploit (bisimu_basic ge fn i rs m s); eauto.
+ destruct (exec_basic_instr _ _ _ _); simpl.
+ - intros (s' & X1 & X2). rewrite X1; simpl; eauto.
+ - intros X; rewrite X; simpl; auto.
+Qed.
+
+Theorem bisimu_exit ge fn b rs m s:
+ Ge = Genv ge fn ->
+ match_states (State rs m) s ->
+ match_outcome (exec_control ge fn (exit b) (nextblock b rs) m) (inst_run Ge (trans_pcincr (size b) (trans_exit (exit b))) s s).
+Proof.
+ intros; unfold exec_control, nextblock. rewrite inst_run_prun.
+ apply (bisimu_par_control (exit b) (size b) (Val.offset_ptr (rs PC) (Ptrofs.repr (size b))) ge fn rs rs m m s s); auto.
+Qed.
+
+Theorem bisimu rs m b ge fn s:
+ Ge = Genv ge fn ->
+ match_states (State rs m) s ->
+ match_outcome (exec_bblock ge fn b rs m) (exec Ge (trans_block b) s).
+Proof.
+ intros GENV MS. unfold exec_bblock.
+ exploit (bisimu_body (body b) ge fn rs m s); eauto.
+ unfold exec, trans_block; simpl.
+ destruct (exec_body _ _ _ _); simpl.
+ - intros (s' & X1 & X2).
+ erewrite run_app_Some; eauto.
+ exploit (bisimu_exit ge fn b rs0 m0 s'); eauto.
+ subst Ge; simpl. destruct X2 as (Y1 & Y2). erewrite Y2; simpl.
+ destruct (inst_run _ _ _); simpl; auto.
+ - intros X; erewrite run_app_None; eauto.
+Qed.
+
+
+Theorem trans_state_match: forall S, match_states S (trans_state S).
+Proof.
+ intros. destruct S as (rs & m). simpl.
+ split. reflexivity.
+ intro. destruct r; try reflexivity.
+ destruct g; reflexivity.
+Qed.
+
+
+Lemma state_eq_decomp:
+ forall rs1 m1 rs2 m2, rs1 = rs2 -> m1 = m2 -> State rs1 m1 = State rs2 m2.
+Proof.
+ intros. congruence.
+Qed.
+
+Theorem state_equiv S1 S2 S': match_states S1 S' -> match_states S2 S' -> S1 = S2.
+Proof.
+ unfold match_states; intros H0 H1. destruct S1 as (rs1 & m1). destruct S2 as (rs2 & m2). inv H0. inv H1.
+ apply state_eq_decomp.
+ - apply functional_extensionality. intros. assert (Val (rs1 x) = Val (rs2 x)) by congruence. congruence.
+ - congruence.
+Qed.
+
+Lemma bblock_para_check_correct ge fn bb rs m rs' m':
+ Ge = Genv ge fn ->
+ exec_bblock ge fn bb rs m = Next rs' m' ->
+ bblock_para_check bb = true ->
+ det_parexec ge fn bb rs m rs' m'.
+Proof.
+ intros H H0 H1 o H2. unfold bblock_para_check in H1.
+ exploit (bisimu rs m bb ge fn); eauto. eapply trans_state_match.
+ rewrite H0; simpl.
+ intros (s2' & EXEC & MS).
+ exploit bisimu_par. 2: apply (trans_state_match (State rs m)). all: eauto.
+ intros (o2' & PRUN & MO).
+ exploit parallelizable_correct. apply is_para_correct_aux. eassumption.
+ intro. eapply H3 in PRUN. clear H3. destruct o2'.
+ - inv PRUN. inv H3. unfold exec in EXEC; unfold trans_state in H.
+ assert (x = s2') by congruence. subst. clear H.
+ assert (m0 = s2') by (apply functional_extensionality; auto). subst. clear H4.
+ destruct o; try discriminate. inv MO. inv H. assert (s2' = x) by congruence. subst.
+ exploit (state_equiv (State rs' m') (State rs0 m0)).
+ 2: eapply H4. eapply MS. intro H. inv H. reflexivity.
+ - unfold match_outcome in MO. destruct o.
+ + inv MO. inv H3. discriminate.
+ + clear MO. unfold exec in EXEC.
+ unfold trans_state in PRUN; rewrite EXEC in PRUN. discriminate.
+Qed.
+
+End SECT_PAR.
+
+Section SECT_BBLOCK_EQUIV.
+
+Variable Ge: genv.
+
+Local Hint Resolve trans_state_match: core.
+
+Lemma bblock_simu_reduce:
+ forall p1 p2 ge fn,
+ Ge = Genv ge fn ->
+ L.bblock_simu Ge (trans_block p1) (trans_block p2) ->
+ Asmblockprops.bblock_simu ge fn p1 p2.
+Proof.
+ unfold bblock_simu, res_eq; intros p1 p2 ge fn H1 H2 rs m DONTSTUCK.
+ generalize (H2 (trans_state (State rs m))); clear H2.
+ intro H2.
+ exploit (bisimu Ge rs m p1 ge fn (trans_state (State rs m))); eauto.
+ exploit (bisimu Ge rs m p2 ge fn (trans_state (State rs m))); eauto.
+ destruct (exec_bblock ge fn p1 rs m); try congruence.
+ intros H3 (s2' & exp2 & MS'). unfold exec in exp2, H3. rewrite exp2 in H2.
+ destruct H2 as (m2' & H2 & H4). discriminate. rewrite H2 in H3.
+ destruct (exec_bblock ge fn p2 rs m); simpl in H3.
+ * destruct H3 as (s' & H3 & H5 & H6). inv H3. inv MS'.
+ replace rs0 with rs1.
+ - replace m0 with m1; auto. congruence.
+ - apply functional_extensionality. intros r.
+ generalize (H0 r). intros Hr. congruence.
+ * discriminate.
+Qed.
+
+(** Used for debug traces *)
+
+Definition gpreg_name (gpr: gpreg) :=
+ match gpr with
+ | GPR0 => Str ("GPR0") | GPR1 => Str ("GPR1") | GPR2 => Str ("GPR2") | GPR3 => Str ("GPR3") | GPR4 => Str ("GPR4")
+ | GPR5 => Str ("GPR5") | GPR6 => Str ("GPR6") | GPR7 => Str ("GPR7") | GPR8 => Str ("GPR8") | GPR9 => Str ("GPR9")
+ | GPR10 => Str ("GPR10") | GPR11 => Str ("GPR11") | GPR12 => Str ("GPR12") | GPR13 => Str ("GPR13") | GPR14 => Str ("GPR14")
+ | GPR15 => Str ("GPR15") | GPR16 => Str ("GPR16") | GPR17 => Str ("GPR17") | GPR18 => Str ("GPR18") | GPR19 => Str ("GPR19")
+ | GPR20 => Str ("GPR20") | GPR21 => Str ("GPR21") | GPR22 => Str ("GPR22") | GPR23 => Str ("GPR23") | GPR24 => Str ("GPR24")
+ | GPR25 => Str ("GPR25") | GPR26 => Str ("GPR26") | GPR27 => Str ("GPR27") | GPR28 => Str ("GPR28") | GPR29 => Str ("GPR29")
+ | GPR30 => Str ("GPR30") | GPR31 => Str ("GPR31") | GPR32 => Str ("GPR32") | GPR33 => Str ("GPR33") | GPR34 => Str ("GPR34")
+ | GPR35 => Str ("GPR35") | GPR36 => Str ("GPR36") | GPR37 => Str ("GPR37") | GPR38 => Str ("GPR38") | GPR39 => Str ("GPR39")
+ | GPR40 => Str ("GPR40") | GPR41 => Str ("GPR41") | GPR42 => Str ("GPR42") | GPR43 => Str ("GPR43") | GPR44 => Str ("GPR44")
+ | GPR45 => Str ("GPR45") | GPR46 => Str ("GPR46") | GPR47 => Str ("GPR47") | GPR48 => Str ("GPR48") | GPR49 => Str ("GPR49")
+ | GPR50 => Str ("GPR50") | GPR51 => Str ("GPR51") | GPR52 => Str ("GPR52") | GPR53 => Str ("GPR53") | GPR54 => Str ("GPR54")
+ | GPR55 => Str ("GPR55") | GPR56 => Str ("GPR56") | GPR57 => Str ("GPR57") | GPR58 => Str ("GPR58") | GPR59 => Str ("GPR59")
+ | GPR60 => Str ("GPR60") | GPR61 => Str ("GPR61") | GPR62 => Str ("GPR62") | GPR63 => Str ("GPR63")
+ end.
+
+Definition string_of_name (x: P.R.t): ?? pstring :=
+ if (Pos.eqb x pmem) then
+ RET (Str "MEM")
+ else
+ match inv_ppos x with
+ | Some RA => RET (Str ("RA"))
+ | Some PC => RET (Str ("PC"))
+ | Some (IR gpr) => RET (gpreg_name gpr)
+ | _ => RET (Str ("UNDEFINED"))
+ end.
+
+Definition string_of_name_r (n: arith_name_r): pstring :=
+ match n with
+ | Ploadsymbol _ _ => "Ploadsymbol"
+ end.
+
+Definition string_of_name_rr (n: arith_name_rr): pstring :=
+ match n with
+ Pmv => "Pmv"
+ | Pnegw => "Pnegw"
+ | Pnegl => "Pnegl"
+ | Pcvtl2w => "Pcvtl2w"
+ | Psxwd => "Psxwd"
+ | Pzxwd => "Pzxwd"
+ | Pextfz _ _ => "Pextfz"
+ | Pextfs _ _ => "Pextfs"
+ | Pextfzl _ _ => "Pextfzl"
+ | Pextfsl _ _ => "Pextfsl"
+ | Pfabsd => "Pfabsd"
+ | Pfabsw => "Pfabsw"
+ | Pfnegd => "Pfnegd"
+ | Pfnegw => "Pfnegw"
+ | Pfinvw => "Pfinvw"
+ | Pfnarrowdw => "Pfnarrowdw"
+ | Pfwidenlwd => "Pfwidenlwd"
+ | Pfloatwrnsz => "Pfloatwrnsz"
+ | Pfloatuwrnsz => "Pfloatuwrnsz"
+ | Pfloatudrnsz => "Pfloatudrnsz"
+ | Pfloatdrnsz => "Pfloatdrnsz"
+ | Pfixedwrzz => "Pfixedwrzz"
+ | Pfixeduwrzz => "Pfixeduwrzz"
+ | Pfixeddrzz => "Pfixeddrzz"
+ | Pfixedudrzz => "Pfixedudrzz"
+ | Pfixeddrzz_i32 => "Pfixeddrzz_i32"
+ | Pfixedudrzz_i32 => "Pfixedudrzz_i32"
+ end.
+
+Definition string_of_name_ri32 (n: arith_name_ri32): pstring :=
+ match n with
+ | Pmake => "Pmake"
+ end.
+
+Definition string_of_name_ri64 (n: arith_name_ri64): pstring :=
+ match n with
+ | Pmakel => "Pmakel"
+ end.
+
+Definition string_of_name_rf32 (n: arith_name_rf32): pstring :=
+ match n with
+ | Pmakefs => "Pmakefs"
+ end.
+
+Definition string_of_name_rf64 (n: arith_name_rf64): pstring :=
+ match n with
+ | Pmakef => "Pmakef"
+ end.
+
+Definition string_of_name_rrr (n: arith_name_rrr): pstring :=
+ match n with
+ | Pcompw _ => "Pcompw"
+ | Pcompl _ => "Pcompl"
+ | Pfcompw _ => "Pfcompw"
+ | Pfcompl _ => "Pfcompl"
+ | Paddw => "Paddw"
+ | Paddxw _ => "Paddxw"
+ | Psubw => "Psubw"
+ | Prevsubxw _ => "Prevsubxw"
+ | Pmulw => "Pmulw"
+ | Pandw => "Pandw"
+ | Pnandw => "Pnandw"
+ | Porw => "Porw"
+ | Pnorw => "Pnorw"
+ | Pxorw => "Pxorw"
+ | Pnxorw => "Pnxorw"
+ | Pandnw => "Pandnw"
+ | Pornw => "Pornw"
+ | Psraw => "Psraw"
+ | Psrlw => "Psrlw"
+ | Psrxw => "Psrxw"
+ | Psllw => "Psllw"
+ | Paddl => "Paddl"
+ | Paddxl _ => "Paddxl"
+ | Psubl => "Psubl"
+ | Prevsubxl _ => "Prevsubxl"
+ | Pandl => "Pandl"
+ | Pnandl => "Pnandl"
+ | Porl => "Porl"
+ | Pnorl => "Pnorl"
+ | Pxorl => "Pxorl"
+ | Pnxorl => "Pnxorl"
+ | Pandnl => "Pandnl"
+ | Pornl => "Pornl"
+ | Pmull => "Pmull"
+ | Pslll => "Pslll"
+ | Psrll => "Psrll"
+ | Psrxl => "Psrxl"
+ | Psral => "Psral"
+ | Pfaddd => "Pfaddd"
+ | Pfaddw => "Pfaddw"
+ | Pfsbfd => "Pfsbfd"
+ | Pfsbfw => "Pfsbfw"
+ | Pfmuld => "Pfmuld"
+ | Pfmulw => "Pfmulw"
+ | Pfmind => "Pfmind"
+ | Pfminw => "Pfminw"
+ | Pfmaxd => "Pfmaxd"
+ | Pfmaxw => "Pfmaxw"
+ end.
+
+Definition string_of_name_rri32 (n: arith_name_rri32): pstring :=
+ match n with
+ Pcompiw _ => "Pcompiw"
+ | Paddiw => "Paddiw"
+ | Paddxiw _ => "Paddxiw"
+ | Prevsubiw => "Prevsubiw"
+ | Prevsubxiw _ => "Prevsubxiw"
+ | Pmuliw => "Pmuliw"
+ | Pandiw => "Pandiw"
+ | Pnandiw => "Pnandiw"
+ | Poriw => "Poriw"
+ | Pnoriw => "Pnoriw"
+ | Pxoriw => "Pxoriw"
+ | Pnxoriw => "Pnxoriw"
+ | Pandniw => "Pandniw"
+ | Porniw => "Porniw"
+ | Psraiw => "Psraiw"
+ | Psrliw => "Psrliw"
+ | Psrxiw => "Psrxiw"
+ | Pslliw => "Pslliw"
+ | Proriw => "Proriw"
+ | Psllil => "Psllil"
+ | Psrlil => "Psrlil"
+ | Psrail => "Psrail"
+ | Psrxil => "Psrxil"
+ end.
+
+Definition string_of_name_rri64 (n: arith_name_rri64): pstring :=
+ match n with
+ Pcompil _ => "Pcompil"
+ | Paddil => "Paddil"
+ | Prevsubil => "Prevsubil"
+ | Paddxil _ => "Paddxil"
+ | Prevsubxil _ => "Prevsubxil"
+ | Pmulil => "Pmulil"
+ | Pandil => "Pandil"
+ | Pnandil => "Pnandil"
+ | Poril => "Poril"
+ | Pnoril => "Pnoril"
+ | Pxoril => "Pxoril"
+ | Pnxoril => "Pnxoril"
+ | Pandnil => "Pandnil"
+ | Pornil => "Pornil"
+ end.
+
+Definition string_of_name_arrr (n: arith_name_arrr): pstring :=
+ match n with
+ | Pmaddw => "Pmaddw"
+ | Pmaddl => "Pmaddl"
+ | Pmsubw => "Pmsubw"
+ | Pmsubl => "Pmsubl"
+ | Pcmove _ => "Pcmove"
+ | Pcmoveu _ => "Pcmoveu"
+ | Pfmaddfw => "Pfmaddfw"
+ | Pfmaddfl => "Pfmaddfl"
+ | Pfmsubfw => "Pfmsubfw"
+ | Pfmsubfl => "Pfmsubfl"
+ end.
+
+Definition string_of_name_arr (n: arith_name_arr): pstring :=
+ match n with
+ | Pinsf _ _ => "Pinsf"
+ | Pinsfl _ _ => "Pinsfl"
+ end.
+
+Definition string_of_name_arri32 (n: arith_name_arri32): pstring :=
+ match n with
+ | Pmaddiw => "Pmaddw"
+ | Pcmoveiw _ => "Pcmoveiw"
+ | Pcmoveuiw _ => "Pcmoveuiw"
+ end.
+
+Definition string_of_name_arri64 (n: arith_name_arri64): pstring :=
+ match n with
+ | Pmaddil => "Pmaddl"
+ | Pcmoveil _ => "Pcmoveil"
+ | Pcmoveuil _ => "Pcmoveuil"
+ end.
+
+Definition string_of_arith (op: arith_op): pstring :=
+ match op with
+ | OArithR n => string_of_name_r n
+ | OArithRR n => string_of_name_rr n
+ | OArithRI32 n _ => string_of_name_ri32 n
+ | OArithRI64 n _ => string_of_name_ri64 n
+ | OArithRF32 n _ => string_of_name_rf32 n
+ | OArithRF64 n _ => string_of_name_rf64 n
+ | OArithRRR n => string_of_name_rrr n
+ | OArithRRI32 n _ => string_of_name_rri32 n
+ | OArithRRI64 n _ => string_of_name_rri64 n
+ | OArithARRR n => string_of_name_arrr n
+ | OArithARR n => string_of_name_arr n
+ | OArithARRI32 n _ => string_of_name_arri32 n
+ | OArithARRI64 n _ => string_of_name_arri64 n
+ end.
+
+Definition string_of_load_name (n: load_name) : pstring :=
+ match n with
+ Plb => "Plb"
+ | Plbu => "Plbu"
+ | Plh => "Plh"
+ | Plhu => "Plhu"
+ | Plw => "Plw"
+ | Plw_a => "Plw_a"
+ | Pld => "Pld"
+ | Pld_a => "Pld_a"
+ | Pfls => "Pfls"
+ | Pfld => "Pfld"
+ end.
+
+Definition string_of_load (op: load_op): pstring :=
+ match op with
+ | OLoadRRO n _ _ => string_of_load_name n
+ | OLoadRRR n _ => string_of_load_name n
+ | OLoadRRRXS n _ => string_of_load_name n
+ end.
+
+Definition string_of_store_name (n: store_name) : pstring :=
+ match n with
+ Psb => "Psb"
+ | Psh => "Psh"
+ | Psw => "Psw"
+ | Psw_a => "Psw_a"
+ | Psd => "Psd"
+ | Psd_a => "Psd_a"
+ | Pfss => "Pfss"
+ | Pfsd => "Pfsd"
+ end.
+
+Definition string_of_store (op: store_op) : pstring :=
+ match op with
+ | OStoreRRO n _ => string_of_store_name n
+ | OStoreRRR n => string_of_store_name n
+ | OStoreRRRXS n => string_of_store_name n
+ end.
+
+Definition string_of_control (op: control_op) : pstring :=
+ match op with
+ | Oj_l _ => "Oj_l"
+ | Ocb _ _ => "Ocb"
+ | Ocbu _ _ => "Ocbu"
+ | Odiv => "Odiv"
+ | Odivu => "Odivu"
+ | Ojumptable _ => "Ojumptable"
+ | OError => "OError"
+ | OIncremPC _ => "OIncremPC"
+ end.
+
+Definition string_of_op (op: P.op): ?? pstring :=
+ match op with
+ | Arith op => RET (string_of_arith op)
+ | Load op => RET (string_of_load op)
+ | Store op => RET (string_of_store op)
+ | Control op => RET (string_of_control op)
+ | Allocframe _ _ => RET (Str "Allocframe")
+ | Allocframe2 _ _ => RET (Str "Allocframe2")
+ | Freeframe _ _ => RET (Str "Freeframe")
+ | Freeframe2 _ _ => RET (Str "Freeframe2")
+ | Constant _ => RET (Str "Constant")
+ | Fail => RET (Str "Fail")
+ end.
+
+End SECT_BBLOCK_EQUIV.
+
+(** REWRITE RULES *)
+
+Definition is_constant (o: op): bool :=
+ match o with
+ | Constant _ | OArithR _ | OArithRI32 _ _ | OArithRI64 _ _ | OArithRF32 _ _ | OArithRF64 _ _ => true
+ | _ => false
+ end.
+
+Lemma is_constant_correct ge o: is_constant o = true -> op_eval ge o [] <> None.
+Proof.
+ destruct o; simpl in * |- *; try congruence.
+ destruct ao; simpl in * |- *; try congruence;
+ destruct n; simpl in * |- *; try congruence;
+ unfold arith_eval; destruct ge; simpl in * |- *; try congruence.
+Qed.
+
+Definition main_reduce (t: Terms.term):= RET (Terms.nofail is_constant t).
+
+Local Hint Resolve is_constant_correct: wlp.
+
+Lemma main_reduce_correct t:
+ WHEN main_reduce t ~> pt THEN Terms.match_pt t pt.
+Proof.
+ wlp_simplify.
+Qed.
+
+Definition reduce := {| Terms.result := main_reduce; Terms.result_correct := main_reduce_correct |}.
+
+Definition bblock_simu_test (verb: bool) (p1 p2: Asmvliw.bblock) : ?? bool :=
+ if verb then
+ IST.verb_bblock_simu_test reduce string_of_name string_of_op (trans_block p1) (trans_block p2)
+ else
+ IST.bblock_simu_test reduce (trans_block p1) (trans_block p2).
+
+Local Hint Resolve IST.bblock_simu_test_correct bblock_simu_reduce IST.verb_bblock_simu_test_correct: wlp.
+
+(** Main simulation (Impure) theorem *)
+Theorem bblock_simu_test_correct verb p1 p2 :
+ WHEN bblock_simu_test verb p1 p2 ~> b THEN b=true -> forall ge fn, Asmblockprops.bblock_simu ge fn p1 p2.
+Proof.
+ wlp_simplify.
+Qed.
+Hint Resolve bblock_simu_test_correct: wlp.
+
+(** ** Coerce bblock_simu_test into a pure function (this is a little unsafe like all oracles in CompCert). *)
+
+Import UnsafeImpure.
+
+Definition pure_bblock_simu_test (verb: bool) (p1 p2: Asmvliw.bblock): bool :=
+ match unsafe_coerce (bblock_simu_test verb p1 p2) with
+ | Some b => b
+ | None => false
+ end.
+
+Theorem pure_bblock_simu_test_correct verb p1 p2 ge fn: pure_bblock_simu_test verb p1 p2 = true -> Asmblockprops.bblock_simu ge fn p1 p2.
+Proof.
+ unfold pure_bblock_simu_test.
+ destruct (unsafe_coerce (bblock_simu_test verb p1 p2)) eqn: UNSAFE; try discriminate.
+ intros; subst. eapply bblock_simu_test_correct; eauto.
+ apply unsafe_coerce_not_really_correct; eauto.
+Qed.
+
+Definition bblock_simub: Asmvliw.bblock -> Asmvliw.bblock -> bool := pure_bblock_simu_test true.
+
+Lemma bblock_simub_correct p1 p2 ge fn: bblock_simub p1 p2 = true -> Asmblockprops.bblock_simu ge fn p1 p2.
+Proof.
+ eapply (pure_bblock_simu_test_correct true).
+Qed.
diff --git a/kvx/Asmblockgen.v b/kvx/Asmblockgen.v
new file mode 100644
index 00000000..ab827b1c
--- /dev/null
+++ b/kvx/Asmblockgen.v
@@ -0,0 +1,1211 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * Translation from Machblock to KVX assembly language (Asmblock)
+ Inspired from the Mach->Asm pass of other backends, but adapted to the block structure *)
+
+Require Archi.
+Require Import Coqlib Errors.
+Require Import AST Integers Floats Memdata.
+Require Import Op Locations Machblock Asmvliw Asmblock.
+Require ExtValues.
+Require Import Chunks.
+
+Local Open Scope string_scope.
+Local Open Scope error_monad_scope.
+
+Import PArithCoercions.
+
+(** The code generation functions take advantage of several
+ characteristics of the [Mach] code generated by earlier passes of the
+ compiler, mostly that argument and result registers are of the correct
+ types. These properties are true by construction, but it's easier to
+ recheck them during code generation and fail if they do not hold. *)
+
+(** Extracting integer or float registers. *)
+
+Inductive immed32 : Type :=
+ | Imm32_single (imm: int).
+
+Definition make_immed32 (val: int) := Imm32_single val.
+
+Inductive immed64 : Type :=
+ | Imm64_single (imm: int64)
+.
+
+Definition make_immed64 (val: int64) := Imm64_single val.
+
+Notation "a ::g b" := (cons (A:=instruction) a b) (at level 49, right associativity).
+Notation "a ::i b" := (cons (A:=basic) a b) (at level 49, right associativity).
+Notation "a ::b lb" := ((bblock_single_inst a) :: lb) (at level 49, right associativity).
+Notation "a ++g b" := (app (A:=instruction) a b) (at level 49, right associativity).
+Notation "a @@ b" := (app a b) (at level 49, right associativity).
+
+Definition loadimm32 (r: ireg) (n: int) :=
+ match make_immed32 n with
+ | Imm32_single imm => Pmake r imm
+ end.
+
+Definition opimm32 (op: arith_name_rrr)
+ (opimm: arith_name_rri32)
+ (rd rs: ireg) (n: int) :=
+ match make_immed32 n with
+ | Imm32_single imm => opimm rd rs imm
+ end.
+
+Definition addimm32 := opimm32 Paddw Paddiw.
+Definition mulimm32 := opimm32 Pmulw Pmuliw.
+Definition andimm32 := opimm32 Pandw Pandiw.
+Definition nandimm32 := opimm32 Pnandw Pnandiw.
+Definition orimm32 := opimm32 Porw Poriw.
+Definition norimm32 := opimm32 Pnorw Pnoriw.
+Definition xorimm32 := opimm32 Pxorw Pxoriw.
+Definition nxorimm32 := opimm32 Pnxorw Pnxoriw.
+
+Definition loadimm64 (r: ireg) (n: int64) :=
+ match make_immed64 n with
+ | Imm64_single imm => Pmakel r imm
+ end.
+
+Definition opimm64 (op: arith_name_rrr)
+ (opimm: arith_name_rri64)
+ (rd rs: ireg) (n: int64) :=
+ match make_immed64 n with
+ | Imm64_single imm => opimm rd rs imm
+end.
+
+Definition addimm64 := opimm64 Paddl Paddil.
+Definition mulimm64 := opimm64 Pmull Pmulil.
+Definition orimm64 := opimm64 Porl Poril.
+Definition andimm64 := opimm64 Pandl Pandil.
+Definition xorimm64 := opimm64 Pxorl Pxoril.
+Definition norimm64 := opimm64 Pnorl Pnoril.
+Definition nandimm64 := opimm64 Pnandl Pnandil.
+Definition nxorimm64 := opimm64 Pnxorl Pnxoril.
+
+Definition addptrofs (rd rs: ireg) (n: ptrofs) :=
+ if Ptrofs.eq_dec n Ptrofs.zero then
+ Pmv rd rs
+ else
+ addimm64 rd rs (Ptrofs.to_int64 n).
+
+(** Translation of conditional branches. *)
+
+Definition transl_comp
+ (c: comparison) (s: signedness) (r1 r2: ireg) (lbl: label) (k: code) : list instruction :=
+ Pcompw (itest_for_cmp c s) RTMP r1 r2 ::g Pcb BTwnez RTMP lbl ::g k.
+
+Definition transl_compi
+ (c: comparison) (s: signedness) (r: ireg) (imm: int) (lbl: label) (k: code) : list instruction :=
+ Pcompiw (itest_for_cmp c s) RTMP r imm ::g Pcb BTwnez RTMP lbl ::g k.
+
+Definition transl_compl
+ (c: comparison) (s: signedness) (r1 r2: ireg) (lbl: label) (k: code) : list instruction :=
+ Pcompl (itest_for_cmp c s) RTMP r1 r2 ::g Pcb BTwnez RTMP lbl ::g k.
+
+Definition transl_compil
+ (c: comparison) (s: signedness) (r: ireg) (imm: int64) (lbl: label) (k: code) : list instruction :=
+ Pcompil (itest_for_cmp c s) RTMP r imm ::g Pcb BTwnez RTMP lbl ::g k.
+
+Definition select_comp (n: int) (c: comparison) : option comparison :=
+ if Int.eq n Int.zero then
+ match c with
+ | Ceq => Some Ceq
+ | Cne => Some Cne
+ | _ => None
+ end
+ else
+ None
+ .
+
+Definition transl_opt_compuimm
+ (n: int) (c: comparison) (r1: ireg) (lbl: label) (k: code) : list instruction :=
+ if Int.eq n Int.zero then
+ match c with
+ | Ceq => Pcbu BTweqz r1 lbl ::g k
+ | Cne => Pcbu BTwnez r1 lbl ::g k
+ | _ => transl_compi c Unsigned r1 n lbl k
+ end
+ else
+ transl_compi c Unsigned r1 n lbl k
+ .
+
+Definition select_compl (n: int64) (c: comparison) : option comparison :=
+ if Int64.eq n Int64.zero then
+ match c with
+ | Ceq => Some Ceq
+ | Cne => Some Cne
+ | _ => None
+ end
+ else
+ None
+ .
+
+Definition transl_opt_compluimm
+ (n: int64) (c: comparison) (r1: ireg) (lbl: label) (k: code) : list instruction :=
+ if Int64.eq n Int64.zero then
+ match c with
+ | Ceq => Pcbu BTdeqz r1 lbl ::g k
+ | Cne => Pcbu BTdnez r1 lbl ::g k
+ | _ => transl_compil c Unsigned r1 n lbl k
+ end
+ else
+ transl_compil c Unsigned r1 n lbl k
+ .
+
+Definition transl_comp_float32 (cmp: comparison) (r1 r2: ireg) (lbl: label) (k: code) :=
+ match ftest_for_cmp cmp with
+ | Normal ft => Pfcompw ft GPR32 r1 r2 ::g Pcb BTwnez GPR32 lbl ::g k
+ | Reversed ft => Pfcompw ft GPR32 r2 r1 ::g Pcb BTwnez GPR32 lbl ::g k
+ end.
+
+Definition transl_comp_notfloat32 (cmp: comparison) (r1 r2: ireg) (lbl: label) (k: code) :=
+ match notftest_for_cmp cmp with
+ | Normal ft => Pfcompw ft GPR32 r1 r2 ::g Pcb BTwnez GPR32 lbl ::g k
+ | Reversed ft => Pfcompw ft GPR32 r2 r1 ::g Pcb BTwnez GPR32 lbl ::g k
+ end.
+
+Definition transl_comp_float64 (cmp: comparison) (r1 r2: ireg) (lbl: label) (k: code) :=
+ match ftest_for_cmp cmp with
+ | Normal ft => Pfcompl ft GPR32 r1 r2 ::g Pcb BTwnez GPR32 lbl ::g k
+ | Reversed ft => Pfcompl ft GPR32 r2 r1 ::g Pcb BTwnez GPR32 lbl ::g k
+ end.
+
+Definition transl_comp_notfloat64 (cmp: comparison) (r1 r2: ireg) (lbl: label) (k: code) :=
+ match notftest_for_cmp cmp with
+ | Normal ft => Pfcompl ft GPR32 r1 r2 ::g Pcb BTwnez GPR32 lbl ::g k
+ | Reversed ft => Pfcompl ft GPR32 r2 r1 ::g Pcb BTwnez GPR32 lbl ::g k
+ end.
+
+Definition transl_cbranch
+ (cond: condition) (args: list mreg) (lbl: label) (k: code) : res (list instruction ) :=
+ match cond, args with
+ | Ccompuimm c n, a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (transl_opt_compuimm n c r1 lbl k)
+ | Ccomp c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_comp c Signed r1 r2 lbl k)
+ | Ccompu c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_comp c Unsigned r1 r2 lbl k)
+ | Ccompimm c n, a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (if Int.eq n Int.zero then
+ Pcb (btest_for_cmpswz c) r1 lbl ::g k
+ else
+ transl_compi c Signed r1 n lbl k
+ )
+ | Ccompluimm c n, a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (transl_opt_compluimm n c r1 lbl k)
+ | Ccompl c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_compl c Signed r1 r2 lbl k)
+ | Ccomplu c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_compl c Unsigned r1 r2 lbl k)
+ | Ccomplimm c n, a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (if Int64.eq n Int64.zero then
+ Pcb (btest_for_cmpsdz c) r1 lbl ::g k
+ else
+ transl_compil c Signed r1 n lbl k
+ )
+ | Ccompf c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_comp_float64 c r1 r2 lbl k)
+ | Cnotcompf c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_comp_notfloat64 c r1 r2 lbl k)
+ | Ccompfs c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_comp_float32 c r1 r2 lbl k)
+ | Cnotcompfs c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_comp_notfloat32 c r1 r2 lbl k)
+ | _, _ =>
+ Error(msg "Asmgenblock.transl_cbranch")
+ end.
+
+(** Translation of a condition operator. The generated code sets the
+ [rd] target register to 0 or 1 depending on the truth value of the
+ condition. *)
+
+Definition transl_cond_int32s (cmp: comparison) (rd r1 r2: ireg) (k: bcode) :=
+ Pcompw (itest_for_cmp cmp Signed) rd r1 r2 ::i k.
+
+Definition transl_cond_int32u (cmp: comparison) (rd r1 r2: ireg) (k: bcode) :=
+ Pcompw (itest_for_cmp cmp Unsigned) rd r1 r2 ::i k.
+
+Definition transl_cond_int64s (cmp: comparison) (rd r1 r2: ireg) (k: bcode) :=
+ Pcompl (itest_for_cmp cmp Signed) rd r1 r2 ::i k.
+
+Definition transl_cond_int64u (cmp: comparison) (rd r1 r2: ireg) (k: bcode) :=
+ Pcompl (itest_for_cmp cmp Unsigned) rd r1 r2 ::i k.
+
+Definition transl_condimm_int32s (cmp: comparison) (rd r1: ireg) (n: int) (k: bcode) :=
+ Pcompiw (itest_for_cmp cmp Signed) rd r1 n ::i k.
+
+Definition transl_condimm_int32u (cmp: comparison) (rd r1: ireg) (n: int) (k: bcode) :=
+ Pcompiw (itest_for_cmp cmp Unsigned) rd r1 n ::i k.
+
+Definition transl_condimm_int64s (cmp: comparison) (rd r1: ireg) (n: int64) (k: bcode) :=
+ Pcompil (itest_for_cmp cmp Signed) rd r1 n ::i k.
+
+Definition transl_condimm_int64u (cmp: comparison) (rd r1: ireg) (n: int64) (k: bcode) :=
+ Pcompil (itest_for_cmp cmp Unsigned) rd r1 n ::i k.
+
+
+Definition transl_cond_float32 (cmp: comparison) (rd r1 r2: ireg) (k: bcode) :=
+ match ftest_for_cmp cmp with
+ | Normal ft => Pfcompw ft rd r1 r2 ::i k
+ | Reversed ft => Pfcompw ft rd r2 r1 ::i k
+ end.
+
+Definition transl_cond_notfloat32 (cmp: comparison) (rd r1 r2: ireg) (k: bcode) :=
+ match notftest_for_cmp cmp with
+ | Normal ft => Pfcompw ft rd r1 r2 ::i k
+ | Reversed ft => Pfcompw ft rd r2 r1 ::i k
+ end.
+
+Definition transl_cond_float64 (cmp: comparison) (rd r1 r2: ireg) (k: bcode) :=
+ match ftest_for_cmp cmp with
+ | Normal ft => Pfcompl ft rd r1 r2 ::i k
+ | Reversed ft => Pfcompl ft rd r2 r1 ::i k
+ end.
+
+Definition transl_cond_notfloat64 (cmp: comparison) (rd r1 r2: ireg) (k: bcode) :=
+ match notftest_for_cmp cmp with
+ | Normal ft => Pfcompl ft rd r1 r2 ::i k
+ | Reversed ft => Pfcompl ft rd r2 r1 ::i k
+ end.
+
+
+(* CoMPare Unsigned Words to Zero *)
+Definition btest_for_cmpuwz (c: comparison) :=
+ match c with
+ | Cne => OK BTwnez
+ | Ceq => OK BTweqz
+ | Clt => Error (msg "btest_for_compuwz: Clt") (* TODO reachable *)
+ | Cge => Error (msg "btest_for_compuwz: Cge")
+ | Cle => OK BTweqz
+ | Cgt => OK BTwnez
+ end.
+
+(* CoMPare Unsigned Words to Zero *)
+Definition btest_for_cmpudz (c: comparison) :=
+ match c with
+ | Cne => OK BTdnez
+ | Ceq => OK BTdeqz
+ | Clt => Error (msg "btest_for_compudz: Clt")
+ | Cge => Error (msg "btest_for_compudz: Cge")
+ | Cle => OK BTdeqz
+ | Cgt => OK BTdnez
+ end.
+
+Definition conditional_move (cond0 : condition0) (rc rd rs : ireg) :
+ res basic :=
+ if ireg_eq rd rs
+ then OK Pnop
+ else
+ (match cond0 with
+ | Ccomp0 cmp =>
+ OK (PArith (Pcmove (btest_for_cmpswz cmp) rd rc rs))
+ | Ccompu0 cmp =>
+ do bt <- btest_for_cmpuwz cmp;
+ OK (PArith (Pcmoveu bt rd rc rs))
+ | Ccompl0 cmp =>
+ OK (PArith (Pcmove (btest_for_cmpsdz cmp) rd rc rs))
+ | Ccomplu0 cmp =>
+ do bt <- btest_for_cmpudz cmp;
+ OK (PArith (Pcmoveu bt rd rc rs))
+ end).
+
+Definition conditional_move_imm32 (cond0 : condition0) (rc rd : ireg) (imm : int) : res basic :=
+ match cond0 with
+ | Ccomp0 cmp =>
+ OK (PArith (Pcmoveiw (btest_for_cmpswz cmp) rd rc imm))
+ | Ccompu0 cmp =>
+ do bt <- btest_for_cmpuwz cmp;
+ OK (PArith (Pcmoveuiw bt rd rc imm))
+ | Ccompl0 cmp =>
+ OK (PArith (Pcmoveiw (btest_for_cmpsdz cmp) rd rc imm))
+ | Ccomplu0 cmp =>
+ do bt <- btest_for_cmpudz cmp;
+ OK (PArith (Pcmoveuiw bt rd rc imm))
+ end.
+
+Definition conditional_move_imm64 (cond0 : condition0) (rc rd : ireg) (imm : int64) : res basic :=
+ match cond0 with
+ | Ccomp0 cmp =>
+ OK (PArith (Pcmoveil (btest_for_cmpswz cmp) rd rc imm))
+ | Ccompu0 cmp =>
+ do bt <- btest_for_cmpuwz cmp;
+ OK (PArith (Pcmoveuil bt rd rc imm))
+ | Ccompl0 cmp =>
+ OK (PArith (Pcmoveil (btest_for_cmpsdz cmp) rd rc imm))
+ | Ccomplu0 cmp =>
+ do bt <- btest_for_cmpudz cmp;
+ OK (PArith (Pcmoveuil bt rd rc imm))
+ end.
+
+Definition transl_cond_op
+ (cond: condition) (rd: ireg) (args: list mreg) (k: bcode) :=
+ match cond, args with
+ | Ccomp c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_cond_int32s c rd r1 r2 k)
+ | Ccompu c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_cond_int32u c rd r1 r2 k)
+ | Ccompimm c n, a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (transl_condimm_int32s c rd r1 n k)
+ | Ccompuimm c n, a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (transl_condimm_int32u c rd r1 n k)
+ | Ccompl c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_cond_int64s c rd r1 r2 k)
+ | Ccomplu c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_cond_int64u c rd r1 r2 k)
+ | Ccomplimm c n, a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (transl_condimm_int64s c rd r1 n k)
+ | Ccompluimm c n, a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (transl_condimm_int64u c rd r1 n k)
+ | Ccompfs c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_cond_float32 c rd r1 r2 k)
+ | Cnotcompfs c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_cond_notfloat32 c rd r1 r2 k)
+ | Ccompf c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_cond_float64 c rd r1 r2 k)
+ | Cnotcompf c, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (transl_cond_notfloat64 c rd r1 r2 k)
+ | _, _ =>
+ Error(msg "Asmblockgen.transl_cond_op")
+end.
+
+(** Translation of the arithmetic operation [r <- op(args)].
+ The corresponding instructions are prepended to [k]. *)
+
+Definition transl_op
+ (op: operation) (args: list mreg) (res: mreg) (k: bcode) :=
+ match op, args with
+ | Omove, a1 :: nil =>
+ match preg_of res, preg_of a1 with
+ | IR r, IR a => OK (Pmv r a ::i k)
+ | _ , _ => Error(msg "Asmgenblock.transl_op: Omove")
+ end
+ | Ointconst n, nil =>
+ do rd <- ireg_of res;
+ OK (loadimm32 rd n ::i k)
+ | Olongconst n, nil =>
+ do rd <- ireg_of res;
+ OK (loadimm64 rd n ::i k)
+ | Ofloatconst f, nil =>
+ do rd <- freg_of res;
+ OK (Pmakef rd f ::i k)
+ | Osingleconst f, nil =>
+ do rd <- freg_of res;
+ OK (Pmakefs rd f ::i k)
+ | Oaddrsymbol s ofs, nil =>
+ do rd <- ireg_of res;
+ OK (if Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)
+ then Ploadsymbol s Ptrofs.zero rd ::i addptrofs rd rd ofs ::i k
+ else Ploadsymbol s ofs rd ::i k)
+ | Oaddrstack n, nil =>
+ do rd <- ireg_of res;
+ OK (addptrofs rd SP n ::i k)
+
+ | Ocast8signed, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pslliw rd rs (Int.repr 24) ::i Psraiw rd rd (Int.repr 24) ::i k)
+ | Ocast16signed, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pslliw rd rs (Int.repr 16) ::i Psraiw rd rd (Int.repr 16) ::i k)
+ | Oadd, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Paddw rd rs1 rs2 ::i k)
+ | Oaddimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (addimm32 rd rs n ::i k)
+ | Oaddx shift, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Paddxw shift rd rs1 rs2 ::i k)
+ | Oaddximm shift n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Paddxiw shift rd rs n ::i k)
+ | Oaddxl shift, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Paddxl shift rd rs1 rs2 ::i k)
+ | Oaddxlimm shift n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Paddxil shift rd rs n ::i k)
+ | Oneg, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pnegw rd rs ::i k)
+ | Osub, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Psubw rd rs1 rs2 ::i k)
+ | Orevsubimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Prevsubiw rd rs n ::i k)
+ | Orevsubx shift, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Prevsubxw shift rd rs1 rs2 ::i k)
+ | Orevsubximm shift n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Prevsubxiw shift rd rs n ::i k)
+ | Omul, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pmulw rd rs1 rs2 ::i k)
+ | Omulimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1;
+ OK (mulimm32 rd rs1 n ::i k)
+ | Omulhs, _ => Error(msg "Asmblockgen.transl_op: Omulhs") (* Normalement pas émis *)
+ | Omulhu, _ => Error(msg "Asmblockgen.transl_op: Omulhu") (* Normalement pas émis *)
+ | Oand, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pandw rd rs1 rs2 ::i k)
+ | Oandimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (andimm32 rd rs n ::i k)
+ | Onand, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pnandw rd rs1 rs2 ::i k)
+ | Onandimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (nandimm32 rd rs n ::i k)
+ | Oor, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Porw rd rs1 rs2 ::i k)
+ | Onor, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pnorw rd rs1 rs2 ::i k)
+ | Oorimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (orimm32 rd rs n ::i k)
+ | Onorimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (norimm32 rd rs n ::i k)
+ | Oxor, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pxorw rd rs1 rs2 ::i k)
+ | Oxorimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (xorimm32 rd rs n ::i k)
+ | Onxor, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pnxorw rd rs1 rs2 ::i k)
+ | Onxorimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (nxorimm32 rd rs n ::i k)
+ | Onot, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (xorimm32 rd rs Int.mone ::i k)
+ | Oandn, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pandnw rd rs1 rs2 ::i k)
+ | Oandnimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pandniw rd rs n ::i k)
+ | Oorn, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pornw rd rs1 rs2 ::i k)
+ | Oornimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Porniw rd rs n ::i k)
+ | Oshl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Psllw rd rs1 rs2 ::i k)
+ | Oshlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pslliw rd rs n ::i k)
+ | Oshr, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Psraw rd rs1 rs2 ::i k)
+ | Oshrimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Psraiw rd rs n ::i k)
+ | Oshru, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Psrlw rd rs1 rs2 ::i k)
+ | Oshruimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Psrliw rd rs n ::i k)
+ | Oshrximm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Psrxiw rd rs n ::i k)
+ | Ororimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Proriw rd rs n ::i k)
+ | Omadd, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do r1 <- ireg_of a1;
+ do r2 <- ireg_of a2;
+ do r3 <- ireg_of a3;
+ OK (Pmaddw r1 r2 r3 ::i k)
+ | Omaddimm n, a1 :: a2 :: nil =>
+ assertion (mreg_eq a1 res);
+ do r1 <- ireg_of a1;
+ do r2 <- ireg_of a2;
+ OK (Pmaddiw r1 r2 n ::i k)
+ | Omsub, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do r1 <- ireg_of a1;
+ do r2 <- ireg_of a2;
+ do r3 <- ireg_of a3;
+ OK (Pmsubw r1 r2 r3 ::i k)
+ (* [Omakelong], [Ohighlong] should not occur *)
+ | Olowlong, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pcvtl2w rd rs ::i k)
+ | Ocast32signed, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Psxwd rd rs ::i k)
+ | Ocast32unsigned, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pzxwd rd rs ::i k)
+(* assertion (ireg_eq rd rs);
+ OK (Pcvtw2l rd ::i Psllil rd rd (Int.repr 32) ::i Psrlil rd rd (Int.repr 32) ::i k) *)
+ | Oaddl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Paddl rd rs1 rs2 ::i k)
+ | Oaddlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (addimm64 rd rs n ::i k)
+ | Onegl, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pnegl rd rs ::i k)
+ | Osubl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Psubl rd rs1 rs2 ::i k)
+ | Orevsubxl shift, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Prevsubxl shift rd rs1 rs2 ::i k)
+ | Orevsublimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Prevsubil rd rs n ::i k)
+ | Orevsubxlimm shift n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Prevsubxil shift rd rs n ::i k)
+ | Omull, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pmull rd rs1 rs2 ::i k)
+ | Omullimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1;
+ OK (mulimm64 rd rs1 n ::i k)
+ | Omullhs, _ => Error (msg "Asmblockgen.transl_op: Omullhs") (* Normalement pas émis *)
+ | Omullhu, _ => Error (msg "Asmblockgen.transl_op: Omullhu") (* Normalement pas émis *)
+ | Odivl, _ => Error (msg "Asmblockgen.transl_op: Odivl") (* Géré par fonction externe *)
+ | Odivlu, _ => Error (msg "Asmblockgen.transl_op: Odivlu") (* Géré par fonction externe *)
+ | Omodl, _ => Error (msg "Asmblockgen.transl_op: Omodl") (* Géré par fonction externe *)
+ | Omodlu, _ => Error (msg "Asmblockgen.transl_op: Omodlu") (* Géré par fonction externe *)
+ | Onotl, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (xorimm64 rd rs Int64.mone ::i k)
+ | Oandl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pandl rd rs1 rs2 ::i k)
+ | Oandlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (andimm64 rd rs n ::i k)
+ | Onandl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pnandl rd rs1 rs2 ::i k)
+ | Onandlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (nandimm64 rd rs n ::i k)
+ | Oorl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Porl rd rs1 rs2 ::i k)
+ | Oorlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (orimm64 rd rs n ::i k)
+ | Onorl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pnorl rd rs1 rs2 ::i k)
+ | Onorlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (norimm64 rd rs n ::i k)
+ | Oxorl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pxorl rd rs1 rs2 ::i k)
+ | Oxorlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (xorimm64 rd rs n ::i k)
+ | Onxorl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pnxorl rd rs1 rs2 ::i k)
+ | Onxorlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (nxorimm64 rd rs n ::i k)
+ | Oandnl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pandnl rd rs1 rs2 ::i k)
+ | Oandnlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pandnil rd rs n ::i k)
+ | Oornl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pornl rd rs1 rs2 ::i k)
+ | Oornlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pornil rd rs n ::i k)
+ | Oshll, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pslll rd rs1 rs2 ::i k)
+ | Oshllimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Psllil rd rs n ::i k)
+ | Oshrl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Psral rd rs1 rs2 ::i k)
+ | Oshrlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Psrail rd rs n ::i k)
+ | Oshrlu, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Psrll rd rs1 rs2 ::i k)
+ | Oshrluimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Psrlil rd rs n ::i k)
+ | Oshrxlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Psrxil rd rs n ::i k)
+ | Omaddl, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do r1 <- ireg_of a1;
+ do r2 <- ireg_of a2;
+ do r3 <- ireg_of a3;
+ OK (Pmaddl r1 r2 r3 ::i k)
+ | Omaddlimm n, a1 :: a2 :: nil =>
+ assertion (mreg_eq a1 res);
+ do r1 <- ireg_of a1;
+ do r2 <- ireg_of a2;
+ OK (Pmaddil r1 r2 n ::i k)
+ | Omsubl, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do r1 <- ireg_of a1;
+ do r2 <- ireg_of a2;
+ do r3 <- ireg_of a3;
+ OK (Pmsubl r1 r2 r3 ::i k)
+ | Oabsf, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfabsd rd rs ::i k)
+ | Oabsfs, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfabsw rd rs ::i k)
+ | Oaddf, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfaddd rd rs1 rs2 ::i k)
+ | Oaddfs, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfaddw rd rs1 rs2 ::i k)
+ | Osubf, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfsbfd rd rs1 rs2 ::i k)
+ | Osubfs, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfsbfw rd rs1 rs2 ::i k)
+ | Omulf, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfmuld rd rs1 rs2 ::i k)
+ | Omulfs, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfmulw rd rs1 rs2 ::i k)
+ | Ominf, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfmind rd rs1 rs2 ::i k)
+ | Ominfs, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfminw rd rs1 rs2 ::i k)
+ | Omaxf, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfmaxd rd rs1 rs2 ::i k)
+ | Omaxfs, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfmaxw rd rs1 rs2 ::i k)
+ | Onegf, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfnegd rd rs ::i k)
+ | Onegfs, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfnegw rd rs ::i k)
+ | Oinvfs, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfinvw rd rs ::i k)
+
+ | Ofmaddf, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do rs1 <- freg_of a1;
+ do rs2 <- freg_of a2;
+ do rs3 <- freg_of a3;
+ OK (Pfmaddfl rs1 rs2 rs3 ::i k)
+ | Ofmaddfs, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do rs1 <- freg_of a1;
+ do rs2 <- freg_of a2;
+ do rs3 <- freg_of a3;
+ OK (Pfmaddfw rs1 rs2 rs3 ::i k)
+ | Ofmsubf, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do rs1 <- freg_of a1;
+ do rs2 <- freg_of a2;
+ do rs3 <- freg_of a3;
+ OK (Pfmsubfl rs1 rs2 rs3 ::i k)
+ | Ofmsubfs, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do rs1 <- freg_of a1;
+ do rs2 <- freg_of a2;
+ do rs3 <- freg_of a3;
+ OK (Pfmsubfw rs1 rs2 rs3 ::i k)
+
+ | Osingleofint, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pfloatwrnsz rd rs ::i k)
+ | Osingleofintu, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pfloatuwrnsz rd rs ::i k)
+ | Ofloatoflong, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pfloatdrnsz rd rs ::i k)
+ | Ofloatoflongu, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pfloatudrnsz rd rs ::i k)
+ | Ointofsingle, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixedwrzz rd rs ::i k)
+ | Ointuofsingle, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixeduwrzz rd rs ::i k)
+ | Olongoffloat, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixeddrzz rd rs ::i k)
+ | Ointoffloat, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixeddrzz_i32 rd rs ::i k)
+ | Ointuoffloat, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixedudrzz_i32 rd rs ::i k)
+ | Olonguoffloat, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfixedudrzz rd rs ::i k)
+
+ | Ofloatofsingle, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfwidenlwd rd rs ::i k)
+ | Osingleoffloat, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfnarrowdw rd rs ::i k)
+
+
+ | Odivf , _ => Error (msg "Asmblockgen.transl_op: Odivf")
+ | Odivfs, _ => Error (msg "Asmblockgen.transl_op: Odivfs")
+
+ (* We use the Splitlong instead for these four conversions *)
+ | Osingleoflong , _ => Error (msg "Asmblockgen.transl_op: Osingleoflong")
+ | Osingleoflongu , _ => Error (msg "Asmblockgen.transl_op: Osingleoflongu")
+ | Olongofsingle , _ => Error (msg "Asmblockgen.transl_op: Olongofsingle")
+ | Olonguofsingle , _ => Error (msg "Asmblockgen.transl_op: Olonguofsingle")
+
+
+ | Ocmp cmp, _ =>
+ do rd <- ireg_of res;
+ transl_cond_op cmp rd args k
+
+
+ | Oextfz stop start, a1 :: nil =>
+ assertion (ExtValues.is_bitfield stop start);
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pextfz stop start rd rs ::i k)
+
+ | Oextfs stop start, a1 :: nil =>
+ assertion (ExtValues.is_bitfield stop start);
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pextfs stop start rd rs ::i k)
+
+ | Oextfzl stop start, a1 :: nil =>
+ assertion (ExtValues.is_bitfieldl stop start);
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pextfzl stop start rd rs ::i k)
+
+ | Oextfsl stop start, a1 :: nil =>
+ assertion (ExtValues.is_bitfieldl stop start);
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pextfsl stop start rd rs ::i k)
+
+ | Oinsf stop start, a0 :: a1 :: nil =>
+ assertion (ExtValues.is_bitfield stop start);
+ assertion (mreg_eq a0 res);
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pinsf stop start rd rs ::i k)
+
+ | Oinsfl stop start, a0 :: a1 :: nil =>
+ assertion (ExtValues.is_bitfieldl stop start);
+ assertion (mreg_eq a0 res);
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (Pinsfl stop start rd rs ::i k)
+
+ | Osel cond0 ty, aT :: aF :: aC :: nil =>
+ assertion (mreg_eq aT res);
+ do rT <- ireg_of aT;
+ do rF <- ireg_of aF;
+ do rC <- ireg_of aC;
+ do op <- conditional_move (negate_condition0 cond0) rC rT rF;
+ OK (op ::i k)
+
+ | Oselimm cond0 imm, aT :: aC :: nil =>
+ assertion (mreg_eq aT res);
+ do rT <- ireg_of aT;
+ do rC <- ireg_of aC;
+ do op <- conditional_move_imm32 (negate_condition0 cond0) rC rT imm;
+ OK (op ::i k)
+
+
+ | Osellimm cond0 imm, aT :: aC :: nil =>
+ assertion (mreg_eq aT res);
+ do rT <- ireg_of aT;
+ do rC <- ireg_of aC;
+ do op <- conditional_move_imm64 (negate_condition0 cond0) rC rT imm;
+ OK (op ::i k)
+
+ | _, _ =>
+ Error(msg "Asmgenblock.transl_op")
+ end.
+
+(** Accessing data in the stack frame. *)
+
+Definition indexed_memory_access
+ (mk_instr: ireg -> offset -> basic)
+ (base: ireg) (ofs: ptrofs) :=
+ match make_immed64 (Ptrofs.to_int64 ofs) with
+ | Imm64_single imm =>
+ mk_instr base (Ptrofs.of_int64 imm)
+end.
+
+Definition loadind (base: ireg) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: bcode) :=
+ match ty, preg_of dst with
+ | Tint, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Plw rd) base ofs ::i k)
+ | Tlong, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pld rd) base ofs ::i k)
+ | Tsingle, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pfls rd) base ofs ::i k)
+ | Tfloat, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pfld rd) base ofs ::i k)
+ | Tany32, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Plw_a rd) base ofs ::i k)
+ | Tany64, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pld_a rd) base ofs ::i k)
+ | _, _ => Error (msg "Asmblockgen.loadind")
+ end.
+
+Definition storeind (src: mreg) (base: ireg) (ofs: ptrofs) (ty: typ) (k: bcode) :=
+ match ty, preg_of src with
+ | Tint, IR rd => OK (indexed_memory_access (PStoreRRO Psw rd) base ofs ::i k)
+ | Tlong, IR rd => OK (indexed_memory_access (PStoreRRO Psd rd) base ofs ::i k)
+ | Tsingle, IR rd => OK (indexed_memory_access (PStoreRRO Pfss rd) base ofs ::i k)
+ | Tfloat, IR rd => OK (indexed_memory_access (PStoreRRO Pfsd rd) base ofs ::i k)
+ | Tany32, IR rd => OK (indexed_memory_access (PStoreRRO Psw_a rd) base ofs ::i k)
+ | Tany64, IR rd => OK (indexed_memory_access (PStoreRRO Psd_a rd) base ofs ::i k)
+ | _, _ => Error (msg "Asmblockgen.storeind")
+ end.
+
+Definition loadind_ptr (base: ireg) (ofs: ptrofs) (dst: ireg) :=
+ indexed_memory_access (PLoadRRO TRAP Pld dst) base ofs.
+
+Definition storeind_ptr (src: ireg) (base: ireg) (ofs: ptrofs) :=
+ indexed_memory_access (PStoreRRO Psd src) base ofs.
+
+(** Translation of memory accesses: loads, and stores. *)
+
+Definition transl_memory_access2
+ (mk_instr: ireg -> ireg -> basic)
+ (addr: addressing) (args: list mreg) (k: bcode) : res bcode :=
+ match addr, args with
+ | Aindexed2, a1 :: a2 :: nil =>
+ do rs1 <- ireg_of a1;
+ do rs2 <- ireg_of a2;
+ OK (mk_instr rs1 rs2 ::i k)
+ | _, _ => Error (msg "Asmblockgen.transl_memory_access2")
+ end.
+
+Definition transl_memory_access2XS
+ (chunk: memory_chunk)
+ (mk_instr: ireg -> ireg -> basic)
+ scale (args: list mreg) (k: bcode) : res bcode :=
+ match args with
+ | (a1 :: a2 :: nil) =>
+ assertion (Z.eqb (zscale_of_chunk chunk) scale);
+ do rs1 <- ireg_of a1;
+ do rs2 <- ireg_of a2;
+ OK (mk_instr rs1 rs2 ::i k)
+ | _ => Error (msg "Asmblockgen.transl_memory_access2XS")
+ end.
+
+Definition transl_memory_access
+ (mk_instr: ireg -> offset -> basic)
+ (addr: addressing) (args: list mreg) (k: bcode) : res bcode :=
+ match addr, args with
+ | Aindexed ofs, a1 :: nil =>
+ do rs <- ireg_of a1;
+ OK (indexed_memory_access mk_instr rs ofs ::i k)
+ | Aglobal id ofs, nil =>
+ OK (Ploadsymbol id ofs RTMP ::i (mk_instr RTMP Ptrofs.zero ::i k))
+ | Ainstack ofs, nil =>
+ OK (indexed_memory_access mk_instr SP ofs ::i k)
+ | _, _ =>
+ Error(msg "Asmblockgen.transl_memory_access")
+ end.
+
+Definition chunk2load (chunk: memory_chunk) :=
+ match chunk with
+ | Mint8signed => Plb
+ | Mint8unsigned => Plbu
+ | Mint16signed => Plh
+ | Mint16unsigned => Plhu
+ | Mint32 => Plw
+ | Mint64 => Pld
+ | Mfloat32 => Pfls
+ | Mfloat64 => Pfld
+ | Many32 => Plw_a
+ | Many64 => Pld_a
+ end.
+
+Definition transl_load_rro (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing)
+ (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
+ do r <- ireg_of dst;
+ transl_memory_access (PLoadRRO trap (chunk2load chunk) r) addr args k.
+
+Definition transl_load_rrr (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing)
+ (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
+ do r <- ireg_of dst;
+ transl_memory_access2 (PLoadRRR trap (chunk2load chunk) r) addr args k.
+
+Definition transl_load_rrrXS (trap: trapping_mode) (chunk: memory_chunk) (scale : Z)
+ (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
+ do r <- ireg_of dst;
+ transl_memory_access2XS chunk (PLoadRRRXS trap (chunk2load chunk) r) scale args k.
+
+Definition transl_load (trap : trapping_mode)
+ (chunk: memory_chunk) (addr: addressing)
+ (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
+ match addr with
+ | Aindexed2XS scale => transl_load_rrrXS trap chunk scale args dst k
+ | Aindexed2 => transl_load_rrr trap chunk addr args dst k
+ | _ => transl_load_rro trap chunk addr args dst k
+ end.
+
+Definition chunk2store (chunk: memory_chunk) :=
+ match chunk with
+ | Mint8signed | Mint8unsigned => Psb
+ | Mint16signed | Mint16unsigned => Psh
+ | Mint32 => Psw
+ | Mint64 => Psd
+ | Mfloat32 => Pfss
+ | Mfloat64 => Pfsd
+ | Many32 => Psw_a
+ | Many64 => Psd_a
+ end.
+
+Definition transl_store_rro (chunk: memory_chunk) (addr: addressing)
+ (args: list mreg) (src: mreg) (k: bcode) : res bcode :=
+ do r <- ireg_of src;
+ transl_memory_access (PStoreRRO (chunk2store chunk) r) addr args k.
+
+Definition transl_store_rrr (chunk: memory_chunk) (addr: addressing)
+ (args: list mreg) (src: mreg) (k: bcode) : res bcode :=
+ do r <- ireg_of src;
+ transl_memory_access2 (PStoreRRR (chunk2store chunk) r) addr args k.
+
+Definition transl_store_rrrxs (chunk: memory_chunk) (scale: Z)
+ (args: list mreg) (src: mreg) (k: bcode) : res bcode :=
+ do r <- ireg_of src;
+ transl_memory_access2XS chunk (PStoreRRRXS (chunk2store chunk) r) scale args k.
+
+Definition transl_store (chunk: memory_chunk) (addr: addressing)
+ (args: list mreg) (src: mreg) (k: bcode) : res bcode :=
+ match addr with
+ | Aindexed2 => transl_store_rrr chunk addr args src k
+ | Aindexed2XS scale => transl_store_rrrxs chunk scale args src k
+ | _ => transl_store_rro chunk addr args src k
+ end.
+
+(** Function epilogue *)
+
+Definition make_epilogue (f: Machblock.function) (k: code) :=
+ (loadind_ptr SP f.(fn_retaddr_ofs) GPRA)
+ ::g Pset RA GPRA ::g Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) ::g k.
+
+(** Translation of a Machblock instruction. *)
+
+Definition transl_instr_basic (f: Machblock.function) (i: Machblock.basic_inst)
+ (ep: bool) (k: bcode) :=
+ match i with
+ | MBgetstack ofs ty dst =>
+ loadind SP ofs ty dst k
+ | MBsetstack src ofs ty =>
+ storeind src SP ofs ty k
+ | MBgetparam ofs ty dst =>
+ (* load via the frame pointer if it is valid *)
+ do c <- loadind FP ofs ty dst k;
+ OK (if ep then c
+ else (loadind_ptr SP f.(fn_link_ofs) FP) ::i c)
+ | MBop op args res =>
+ transl_op op args res k
+ | MBload trap chunk addr args dst =>
+ transl_load trap chunk addr args dst k
+ | MBstore chunk addr args src =>
+ transl_store chunk addr args src k
+ end.
+
+Definition transl_instr_control (f: Machblock.function) (oi: option Machblock.control_flow_inst)
+ : res code :=
+ match oi with
+ | None => OK nil
+ | Some i =>
+ match i with
+ | MBcall sig (inl r) =>
+ do r1 <- ireg_of r; OK ((Picall r1) ::g nil)
+ | MBcall sig (inr symb) =>
+ OK ((Pcall symb) ::g nil)
+ | MBtailcall sig (inr symb) =>
+ OK (make_epilogue f ((Pgoto symb) ::g nil))
+ | MBtailcall sig (inl r) =>
+ do r1 <- ireg_of r; OK (make_epilogue f ((Pigoto r1) ::g nil))
+ | MBbuiltin ef args res =>
+ OK (Pbuiltin ef (List.map (map_builtin_arg preg_of) args) (map_builtin_res preg_of res) ::g nil)
+ | MBgoto lbl =>
+ OK (Pj_l lbl ::g nil)
+ | MBcond cond args lbl =>
+ transl_cbranch cond args lbl nil
+ | MBreturn =>
+ OK (make_epilogue f (Pret ::g nil))
+ | MBjumptable arg tbl =>
+ do r <- ireg_of arg;
+ OK (Pjumptable r tbl ::g nil)
+ end
+ end.
+
+(** Translation of a code sequence *)
+
+Definition fp_is_parent (before: bool) (i: Machblock.basic_inst) : bool :=
+ match i with
+ | MBgetstack ofs ty dst => before && negb (mreg_eq dst MFP)
+ | MBsetstack src ofs ty => before
+ | MBgetparam ofs ty dst => negb (mreg_eq dst MFP)
+ | MBop op args res => before && negb (mreg_eq res MFP)
+ | MBload trapping_mode chunk addr args dst => before && negb (mreg_eq dst MFP)
+ | MBstore chunk addr args res => before
+ end.
+
+(** This is the naive definition, which is not tail-recursive unlike the other backends *)
+
+Fixpoint transl_basic_code (f: Machblock.function) (il: list Machblock.basic_inst) (it1p: bool) :=
+ match il with
+ | nil => OK nil
+ | i1 :: il' =>
+ do k <- transl_basic_code f il' (fp_is_parent it1p i1);
+ transl_instr_basic f i1 it1p k
+ end.
+
+(* (** This is an equivalent definition in continuation-passing style
+ that runs in constant stack space. *)
+
+Fixpoint transl_basic_rec (f: Machblock.function) (il: list Machblock.basic_inst)
+ (it1p: bool) (k: bcode -> res bcode) :=
+ match il with
+ | nil => k nil
+ | i1 :: il' =>
+ transl_basic_rec f il' (fp_is_parent it1p i1)
+ (fun c1 => do c2 <- transl_instr_basic f i1 it1p c1; k c2)
+ end.
+
+Definition transl_basic_code' (f: Machblock.function) (il: list Machblock.basic_inst) (it1p: bool) :=
+ transl_basic_rec f il it1p (fun c => OK c). *)
+
+(** Translation of a whole function. Note that we must check
+ that the generated code contains less than [2^64] instructions,
+ otherwise the offset part of the [PC] code pointer could wrap
+ around, leading to incorrect executions. *)
+
+(* gen_bblocks can generate two bblocks if the ctl is a PExpand (since the PExpand must be alone in its block) *)
+Program Definition gen_bblocks (hd: list label) (c: list basic) (ctl: list instruction) :=
+ match (extract_ctl ctl) with
+ | None =>
+ match c with
+ | nil => {| header := hd; body := Pnop::nil; exit := None |} :: nil
+ | i::c => {| header := hd; body := ((i::c) ++ extract_basic ctl); exit := None |} :: nil
+ end
+ | Some (PExpand (Pbuiltin ef args res)) =>
+ match c with
+ | nil => {| header := hd; body := nil; exit := Some (PExpand (Pbuiltin ef args res)) |} :: nil
+ | _ => {| header := hd; body := c; exit := None |}
+ :: {| header := nil; body := nil; exit := Some (PExpand (Pbuiltin ef args res)) |} :: nil
+ end
+ | Some ex => {| header := hd; body := (c ++ extract_basic ctl); exit := Some ex |} :: nil
+ end
+.
+Next Obligation.
+ apply wf_bblock_refl. constructor.
+ left. auto.
+ discriminate.
+Qed. Next Obligation.
+ apply wf_bblock_refl. constructor.
+ right. discriminate.
+ unfold builtin_alone. intros. pose (H ef args res). rewrite H0 in n. contradiction.
+Qed.
+
+Definition transl_block (f: Machblock.function) (fb: Machblock.bblock) (ep: bool) : res (list bblock) :=
+ do c <- transl_basic_code f fb.(Machblock.body) ep;
+ do ctl <- transl_instr_control f fb.(Machblock.exit);
+ OK (gen_bblocks fb.(Machblock.header) c ctl)
+.
+
+Fixpoint transl_blocks (f: Machblock.function) (lmb: list Machblock.bblock) (ep: bool) :=
+ match lmb with
+ | nil => OK nil
+ | mb :: lmb =>
+ do lb <- transl_block f mb (if Machblock.header mb then ep else false);
+ do lb' <- transl_blocks f lmb false;
+ OK (lb @@ lb')
+ end
+.
+
+Program Definition make_prologue (f: Machblock.function) lb :=
+ ({| header := nil; body := Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::i
+ Pget GPRA RA ::i
+ storeind_ptr GPRA SP f.(fn_retaddr_ofs) ::i nil;
+ exit := None |} :: lb).
+
+Definition transl_function (f: Machblock.function) :=
+ do lb <- transl_blocks f f.(Machblock.fn_code) true;
+ OK (mkfunction f.(Machblock.fn_sig)
+ (make_prologue f lb)).
+
+Definition transf_function (f: Machblock.function) : res Asmvliw.function :=
+ do tf <- transl_function f;
+ if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks))
+ then Error (msg "code size exceeded")
+ else OK tf.
+
+Definition transf_fundef (f: Machblock.fundef) : res Asmvliw.fundef :=
+ transf_partial_fundef transf_function f.
+
+Definition transf_program (p: Machblock.program) : res Asmvliw.program :=
+ transform_partial_program transf_fundef p.
diff --git a/kvx/Asmblockgenproof.v b/kvx/Asmblockgenproof.v
new file mode 100644
index 00000000..6e3029d8
--- /dev/null
+++ b/kvx/Asmblockgenproof.v
@@ -0,0 +1,1808 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Correctness proof for kvx/Asmblock generation: main proof. *)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Machblock Conventions Asmblock.
+Require Import Asmblockgen Asmblockgenproof0 Asmblockgenproof1 Asmblockprops.
+Require Import Axioms.
+Require Import Lia.
+
+Module MB := Machblock.
+Module AB := Asmvliw.
+
+Definition match_prog (p: Machblock.program) (tp: Asmvliw.program) :=
+ match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+ forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+ intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: Machblock.program.
+Variable tprog: Asmvliw.program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved:
+ forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof (Genv.find_symbol_match TRANSF).
+
+Lemma senv_preserved:
+ Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSF).
+
+Lemma functions_translated:
+ forall b f,
+ Genv.find_funct_ptr ge b = Some f ->
+ exists tf,
+ Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial TRANSF).
+
+Lemma functions_transl:
+ forall fb f tf,
+ Genv.find_funct_ptr ge fb = Some (Internal f) ->
+ transf_function f = OK tf ->
+ Genv.find_funct_ptr tge fb = Some (Internal tf).
+Proof.
+ intros. exploit functions_translated; eauto. intros [tf' [A B]].
+ monadInv B. rewrite H0 in EQ; inv EQ; auto.
+Qed.
+
+Lemma transf_function_no_overflow:
+ forall f tf,
+ transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned.
+Proof.
+ intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0.
+ lia.
+Qed.
+
+Section TRANSL_LABEL. (* Lemmas on translation of MB.is_label into AB.is_label *)
+
+Lemma gen_bblocks_label:
+ forall hd bdy ex tbb tc,
+ gen_bblocks hd bdy ex = tbb::tc ->
+ header tbb = hd.
+Proof.
+ intros until tc. intros GENB. unfold gen_bblocks in GENB.
+ destruct (extract_ctl ex); try destruct c; try destruct i; try destruct bdy.
+ all: inv GENB; simpl; auto.
+Qed.
+
+Lemma gen_bblocks_label2:
+ forall hd bdy ex tbb1 tbb2,
+ gen_bblocks hd bdy ex = tbb1::tbb2::nil ->
+ header tbb2 = nil.
+Proof.
+ intros until tbb2. intros GENB. unfold gen_bblocks in GENB.
+ destruct (extract_ctl ex); try destruct c; try destruct i; try destruct bdy.
+ all: inv GENB; simpl; auto.
+Qed.
+
+Remark in_dec_transl:
+ forall lbl hd,
+ (if in_dec lbl hd then true else false) = (if MB.in_dec lbl hd then true else false).
+Proof.
+ intros. destruct (in_dec lbl hd), (MB.in_dec lbl hd). all: tauto.
+Qed.
+
+Lemma transl_is_label:
+ forall lbl bb tbb f ep tc,
+ transl_block f bb ep = OK (tbb::tc) ->
+ is_label lbl tbb = MB.is_label lbl bb.
+Proof.
+ intros until tc. intros TLB.
+ destruct tbb as [thd tbdy tex]; simpl in *.
+ monadInv TLB.
+ unfold is_label. simpl.
+ apply gen_bblocks_label in H0. simpl in H0. subst.
+ rewrite in_dec_transl. auto.
+Qed.
+
+Lemma transl_is_label_false2:
+ forall lbl bb f ep tbb1 tbb2,
+ transl_block f bb ep = OK (tbb1::tbb2::nil) ->
+ is_label lbl tbb2 = false.
+Proof.
+ intros until tbb2. intros TLB.
+ destruct tbb2 as [thd tbdy tex]; simpl in *.
+ monadInv TLB. apply gen_bblocks_label2 in H0. simpl in H0. subst.
+ apply is_label_correct_false. simpl. auto.
+Qed.
+
+Lemma transl_is_label2:
+ forall f bb ep tbb1 tbb2 lbl,
+ transl_block f bb ep = OK (tbb1::tbb2::nil) ->
+ is_label lbl tbb1 = MB.is_label lbl bb
+ /\ is_label lbl tbb2 = false.
+Proof.
+ intros. split. eapply transl_is_label; eauto. eapply transl_is_label_false2; eauto.
+Qed.
+
+Lemma transl_block_nonil:
+ forall f c ep tc,
+ transl_block f c ep = OK tc ->
+ tc <> nil.
+Proof.
+ intros. monadInv H. unfold gen_bblocks.
+ destruct (extract_ctl x0); try destruct c0; try destruct x; try destruct i.
+ all: discriminate.
+Qed.
+
+Lemma transl_block_limit: forall f bb ep tbb1 tbb2 tbb3 tc,
+ ~transl_block f bb ep = OK (tbb1 :: tbb2 :: tbb3 :: tc).
+Proof.
+ intros. intro. monadInv H.
+ unfold gen_bblocks in H0.
+ destruct (extract_ctl x0); try destruct x; try destruct c; try destruct i.
+ all: discriminate.
+Qed.
+
+Lemma find_label_transl_false:
+ forall x f lbl bb ep x',
+ transl_block f bb ep = OK x ->
+ MB.is_label lbl bb = false ->
+ find_label lbl (x++x') = find_label lbl x'.
+Proof.
+ intros until x'. intros TLB MBis; simpl; auto.
+ destruct x as [|x0 x1]; simpl; auto.
+ destruct x1 as [|x1 x2]; simpl; auto.
+ - erewrite <- transl_is_label in MBis; eauto. rewrite MBis. auto.
+ - destruct x2 as [|x2 x3]; simpl; auto.
+ + erewrite <- transl_is_label in MBis; eauto. rewrite MBis.
+ erewrite transl_is_label_false2; eauto.
+ + apply transl_block_limit in TLB. destruct TLB.
+Qed.
+
+Lemma transl_blocks_label:
+ forall lbl f c tc ep,
+ transl_blocks f c ep = OK tc ->
+ match MB.find_label lbl c with
+ | None => find_label lbl tc = None
+ | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_blocks f c' false = OK tc'
+ end.
+Proof.
+ induction c; simpl; intros.
+ inv H. auto.
+ monadInv H.
+ destruct (MB.is_label lbl a) eqn:MBis.
+ - destruct x as [|tbb tc]. { apply transl_block_nonil in EQ. contradiction. }
+ simpl find_label. exploit transl_is_label; eauto. intros ABis. rewrite MBis in ABis.
+ rewrite ABis.
+ eexists. eexists. split; eauto. simpl transl_blocks.
+ assert (MB.header a <> nil).
+ { apply MB.is_label_correct_true in MBis.
+ destruct (MB.header a). contradiction. discriminate. }
+ destruct (MB.header a); try contradiction.
+ rewrite EQ. simpl. rewrite EQ1. simpl. auto.
+ - apply IHc in EQ1. destruct (MB.find_label lbl c).
+ + destruct EQ1 as (tc' & FIND & TLBS). exists tc'; eexists; auto.
+ erewrite find_label_transl_false; eauto.
+ + erewrite find_label_transl_false; eauto.
+Qed.
+
+Lemma find_label_nil:
+ forall bb lbl c,
+ header bb = nil ->
+ find_label lbl (bb::c) = find_label lbl c.
+Proof.
+ intros. destruct bb as [hd bdy ex]; simpl in *. subst.
+ assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false).
+ { erewrite <- is_label_correct_false. simpl. auto. }
+ rewrite H. auto.
+Qed.
+
+Theorem transl_find_label:
+ forall lbl f tf,
+ transf_function f = OK tf ->
+ match MB.find_label lbl f.(MB.fn_code) with
+ | None => find_label lbl tf.(fn_blocks) = None
+ | Some c => exists tc, find_label lbl tf.(fn_blocks) = Some tc /\ transl_blocks f c false = OK tc
+ end.
+Proof.
+ intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); inv EQ0. clear g.
+ monadInv EQ. unfold make_prologue. simpl fn_blocks. repeat (rewrite find_label_nil); simpl; auto.
+ eapply transl_blocks_label; eauto.
+Qed.
+
+End TRANSL_LABEL.
+
+(** A valid branch in a piece of Machblock code translates to a valid ``go to''
+ transition in the generated Asmblock code. *)
+
+Lemma find_label_goto_label:
+ forall f tf lbl rs m c' b ofs,
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ transf_function f = OK tf ->
+ rs PC = Vptr b ofs ->
+ MB.find_label lbl f.(MB.fn_code) = Some c' ->
+ exists tc', exists rs',
+ goto_label tf lbl rs m = Next rs' m
+ /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
+ /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+ intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
+ intros (tc & A & B).
+ exploit label_pos_code_tail; eauto. instantiate (1 := 0).
+ intros [pos' [P [Q R]]].
+ exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
+ split. unfold goto_label. unfold par_goto_label. rewrite P. rewrite H1. auto.
+ split. rewrite Pregmap.gss. constructor; auto.
+ rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
+ auto. lia.
+ generalize (transf_function_no_overflow _ _ H0). lia.
+ intros. apply Pregmap.gso; auto.
+Qed.
+
+(** Existence of return addresses *)
+
+Lemma return_address_exists:
+ forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
+ exists ra, return_address_offset f c ra.
+Proof.
+ intros. eapply Asmblockgenproof0.return_address_exists; eauto.
+
+- intros. monadInv H0.
+ destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. monadInv EQ. simpl.
+ exists x; exists true; split; auto.
+ repeat constructor.
+- exact transf_function_no_overflow.
+Qed.
+
+(** * Proof of semantic preservation *)
+
+(** Semantic preservation is proved using a complex simulation diagram
+ of the following form.
+<<
+ MB.step
+ ---------------------------------------->
+ header body exit
+ st1 -----> st2 -----> st3 ------------------> st4
+ | | | |
+ | (A) | (B) | (C) |
+ match_codestate | | | |
+ | header | body1 | body2 | match_states
+ cs1 -----> cs2 -----> cs3 ------> cs4 |
+ | / \ exit |
+ match_asmstate | --------------- --->--- |
+ | / match_asmstate \ |
+ st'1 ---------------------------------------> st'2
+ AB.step *
+>>
+ The invariant between each MB.step/AB.step is the [match_states] predicate below.
+ However, we also need to introduce an intermediary state [Codestate] which allows
+ us to reason on a finer grain, executing header, body and exit separately.
+
+ This [Codestate] consists in a state like [Asmblock.State], except that the
+ code is directly stored in the state, much like [Machblock.State]. It also features
+ additional useful elements to keep track of while executing a bblock.
+*)
+
+Remark preg_of_not_FP: forall r, negb (mreg_eq r MFP) = true -> IR FP <> preg_of r.
+Proof.
+ intros. change (IR FP) with (preg_of MFP). red; intros.
+ exploit preg_of_injective; eauto. intros; subst r; discriminate.
+Qed.
+
+Inductive match_states: Machblock.state -> Asmvliw.state -> Prop :=
+ | match_states_intro:
+ forall s fb sp c ep ms m m' rs f tf tc
+ (STACKS: match_stack ge s)
+ (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+ (MEXT: Mem.extends m m')
+ (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
+ (AG: agree ms sp rs)
+ (DXP: ep = true -> rs#FP = parent_sp s),
+ match_states (Machblock.State s fb sp c ms m)
+ (Asmvliw.State rs m')
+ | match_states_call:
+ forall s fb ms m m' rs
+ (STACKS: match_stack ge s)
+ (MEXT: Mem.extends m m')
+ (AG: agree ms (parent_sp s) rs)
+ (ATPC: rs PC = Vptr fb Ptrofs.zero)
+ (ATLR: rs RA = parent_ra s),
+ match_states (Machblock.Callstate s fb ms m)
+ (Asmvliw.State rs m')
+ | match_states_return:
+ forall s ms m m' rs
+ (STACKS: match_stack ge s)
+ (MEXT: Mem.extends m m')
+ (AG: agree ms (parent_sp s) rs)
+ (ATPC: rs PC = parent_ra s),
+ match_states (Machblock.Returnstate s ms m)
+ (Asmvliw.State rs m').
+
+Record codestate :=
+ Codestate { pstate: state; (**r projection to Asmblock.state *)
+ pheader: list label;
+ pbody1: list basic; (**r list of basic instructions coming from the translation of the Machblock body *)
+ pbody2: list basic; (**r list of basic instructions coming from the translation of the Machblock exit *)
+ pctl: option control; (**r exit instruction, coming from the translation of the Machblock exit *)
+ ep: bool; (**r reflects the [ep] variable used in the translation *)
+ rem: list AB.bblock; (**r remaining bblocks to execute *)
+ cur: bblock (**r current bblock to execute - to keep track of its size when incrementing PC *)
+ }.
+
+(* The part that deals with Machblock <-> Codestate agreement
+ * Note about DXP: the property of [ep] only matters if the current block doesn't have a header, hence the condition *)
+Inductive match_codestate fb: Machblock.state -> codestate -> Prop :=
+ | match_codestate_intro:
+ forall s sp ms m rs0 m0 f tc ep c bb tbb tbc tbi
+ (STACKS: match_stack ge s)
+ (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+ (MEXT: Mem.extends m m0)
+ (TBC: transl_basic_code f (MB.body bb) (if MB.header bb then ep else false) = OK tbc)
+ (TIC: transl_instr_control f (MB.exit bb) = OK tbi)
+ (TBLS: transl_blocks f c false = OK tc)
+ (AG: agree ms sp rs0)
+ (DXP: (if MB.header bb then ep else false) = true -> rs0#FP = parent_sp s)
+ ,
+ match_codestate fb (Machblock.State s fb sp (bb::c) ms m)
+ {| pstate := (Asmvliw.State rs0 m0);
+ pheader := (MB.header bb);
+ pbody1 := tbc;
+ pbody2 := extract_basic tbi;
+ pctl := extract_ctl tbi;
+ ep := ep;
+ rem := tc;
+ cur := tbb
+ |}
+.
+
+(* The part ensuring that the code in Codestate actually resides at [rs PC] *)
+Inductive match_asmstate fb: codestate -> Asmvliw.state -> Prop :=
+ | match_asmstate_some:
+ forall rs f tf tc m tbb ofs ep tbdy tex lhd
+ (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+ (TRANSF: transf_function f = OK tf)
+ (PCeq: rs PC = Vptr fb ofs)
+ (TAIL: code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (tbb::tc))
+ ,
+ match_asmstate fb
+ {| pstate := (Asmvliw.State rs m);
+ pheader := lhd;
+ pbody1 := tbdy;
+ pbody2 := extract_basic tex;
+ pctl := extract_ctl tex;
+ ep := ep;
+ rem := tc;
+ cur := tbb |}
+ (Asmvliw.State rs m)
+.
+
+(* Useful for dealing with the many cases in some proofs *)
+Ltac exploreInst :=
+ repeat match goal with
+ | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var
+ | [ H : OK _ = OK _ |- _ ] => monadInv H
+ | [ |- context[if ?b then _ else _] ] => destruct b
+ | [ |- context[match ?m with | _ => _ end] ] => destruct m
+ | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m
+ | [ H : bind _ _ = OK _ |- _ ] => monadInv H
+ | [ H : Error _ = OK _ |- _ ] => inversion H
+ end.
+
+(** Some translation properties *)
+
+Lemma transl_blocks_nonil:
+ forall f bb c tc ep,
+ transl_blocks f (bb::c) ep = OK tc ->
+ exists tbb tc', tc = tbb :: tc'.
+Proof.
+ intros until ep0. intros TLBS. monadInv TLBS. monadInv EQ. unfold gen_bblocks.
+ destruct (extract_ctl x2).
+ - destruct c0; destruct i; simpl; eauto. destruct x1; simpl; eauto.
+ - destruct x1; simpl; eauto.
+Qed.
+
+Lemma no_builtin_preserved:
+ forall f ex x2,
+ (forall ef args res, ex <> Some (MBbuiltin ef args res)) ->
+ transl_instr_control f ex = OK x2 ->
+ (exists i, extract_ctl x2 = Some (PCtlFlow i))
+ \/ extract_ctl x2 = None.
+Proof.
+ intros until x2. intros Hbuiltin TIC.
+ destruct ex.
+ - destruct c.
+ (* MBcall *)
+ + simpl in TIC. exploreInst; simpl; eauto.
+ (* MBtailcall *)
+ + simpl in TIC. exploreInst; simpl; eauto.
+ (* MBbuiltin *)
+ + assert (H: Some (MBbuiltin e l b) <> Some (MBbuiltin e l b)).
+ apply Hbuiltin. contradict H; auto.
+ (* MBgoto *)
+ + simpl in TIC. exploreInst; simpl; eauto.
+ (* MBcond *)
+ + simpl in TIC. unfold transl_cbranch in TIC. exploreInst; simpl; eauto.
+ * unfold transl_opt_compuimm. exploreInst; simpl; eauto.
+ * unfold transl_opt_compluimm. exploreInst; simpl; eauto.
+ * unfold transl_comp_float64. exploreInst; simpl; eauto.
+ * unfold transl_comp_notfloat64. exploreInst; simpl; eauto.
+ * unfold transl_comp_float32. exploreInst; simpl; eauto.
+ * unfold transl_comp_notfloat32. exploreInst; simpl; eauto.
+ (* MBjumptable *)
+ + simpl in TIC. exploreInst; simpl; eauto.
+ (* MBreturn *)
+ + simpl in TIC. monadInv TIC. simpl. eauto.
+ - monadInv TIC. simpl; auto.
+Qed.
+
+Lemma transl_blocks_distrib:
+ forall c f bb tbb tc ep,
+ transl_blocks f (bb::c) ep = OK (tbb::tc)
+ -> (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res))
+ -> transl_block f bb (if MB.header bb then ep else false) = OK (tbb :: nil)
+ /\ transl_blocks f c false = OK tc.
+Proof.
+ intros until ep0. intros TLBS Hbuiltin.
+ destruct bb as [hd bdy ex].
+ monadInv TLBS. monadInv EQ.
+ exploit no_builtin_preserved; eauto. intros Hectl. destruct Hectl.
+ - destruct H as [i Hectl].
+ unfold gen_bblocks in H0. rewrite Hectl in H0. inv H0.
+ simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl.
+ unfold gen_bblocks. rewrite Hectl. auto.
+ - unfold gen_bblocks in H0. rewrite H in H0.
+ destruct x1 as [|bi x1].
+ + simpl in H0. inv H0. simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl.
+ unfold gen_bblocks. rewrite H. auto.
+ + simpl in H0. inv H0. simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl.
+ unfold gen_bblocks. rewrite H. auto.
+Qed.
+
+Lemma gen_bblocks_nobuiltin:
+ forall thd tbdy tex tbb,
+ (tbdy <> nil \/ extract_ctl tex <> None) ->
+ (forall ef args res, extract_ctl tex <> Some (PExpand (Pbuiltin ef args res))) ->
+ gen_bblocks thd tbdy tex = tbb :: nil ->
+ header tbb = thd
+ /\ body tbb = tbdy ++ extract_basic tex
+ /\ exit tbb = extract_ctl tex.
+Proof.
+ intros until tbb. intros Hnonil Hnobuiltin GENB. unfold gen_bblocks in GENB.
+ destruct (extract_ctl tex) eqn:ECTL.
+ - destruct c.
+ + destruct i; try (inv GENB; simpl; auto; fail).
+ assert False. eapply Hnobuiltin. eauto. destruct H.
+ + inv GENB. simpl. auto.
+ - inversion Hnonil.
+ + destruct tbdy as [|bi tbdy]; try (contradict H; simpl; auto; fail). inv GENB. auto.
+ + contradict H; simpl; auto.
+Qed.
+
+Lemma transl_instr_basic_nonil:
+ forall k f bi ep x,
+ transl_instr_basic f bi ep k = OK x ->
+ x <> nil.
+Proof.
+ intros until x. intros TIB.
+ destruct bi.
+ - simpl in TIB. unfold loadind in TIB. exploreInst; try discriminate.
+ - simpl in TIB. unfold storeind in TIB. exploreInst; try discriminate.
+ - simpl in TIB. monadInv TIB. unfold loadind in EQ. exploreInst; try discriminate.
+ - simpl in TIB. unfold transl_op in TIB. exploreInst; try discriminate.
+ unfold transl_cond_op in EQ0. exploreInst; try discriminate.
+ unfold transl_cond_float64. exploreInst; try discriminate.
+ unfold transl_cond_notfloat64. exploreInst; try discriminate.
+ unfold transl_cond_float32. exploreInst; try discriminate.
+ unfold transl_cond_notfloat32. exploreInst; try discriminate.
+ - simpl in TIB. unfold transl_load in TIB. exploreInst; try discriminate.
+ all: monadInv TIB; unfold transl_memory_access in EQ0; unfold transl_memory_access2 in EQ0; unfold transl_memory_access2XS in EQ0; exploreInst; try discriminate.
+ - simpl in TIB. unfold transl_store in TIB. exploreInst; try discriminate.
+ all: monadInv TIB; unfold transl_memory_access in EQ0; unfold transl_memory_access2 in EQ0; unfold transl_memory_access2XS in EQ0; exploreInst; try discriminate.
+Qed.
+
+Lemma transl_basic_code_nonil:
+ forall bdy f x ep,
+ bdy <> nil ->
+ transl_basic_code f bdy ep = OK x ->
+ x <> nil.
+Proof.
+ induction bdy as [|bi bdy].
+ intros. contradict H0; auto.
+ destruct bdy as [|bi2 bdy].
+ - clear IHbdy. intros f x b _ TBC. simpl in TBC. eapply transl_instr_basic_nonil; eauto.
+ - intros f x b Hnonil TBC. remember (bi2 :: bdy) as bdy'.
+ monadInv TBC.
+ assert (x0 <> nil).
+ eapply IHbdy; eauto. subst bdy'. discriminate.
+ eapply transl_instr_basic_nonil; eauto.
+Qed.
+
+Lemma transl_instr_control_nonil:
+ forall ex f x,
+ ex <> None ->
+ transl_instr_control f ex = OK x ->
+ extract_ctl x <> None.
+Proof.
+ intros ex f x Hnonil TIC.
+ destruct ex as [ex|].
+ - clear Hnonil. destruct ex.
+ all: try (simpl in TIC; exploreInst; discriminate).
+ + simpl in TIC. unfold transl_cbranch in TIC. exploreInst; try discriminate.
+ * unfold transl_opt_compuimm. exploreInst; try discriminate.
+ * unfold transl_opt_compluimm. exploreInst; try discriminate.
+ * unfold transl_comp_float64. exploreInst; try discriminate.
+ * unfold transl_comp_notfloat64. exploreInst; try discriminate.
+ * unfold transl_comp_float32. exploreInst; try discriminate.
+ * unfold transl_comp_notfloat32. exploreInst; try discriminate.
+ - contradict Hnonil; auto.
+Qed.
+
+Lemma transl_instr_control_nobuiltin:
+ forall f ex x,
+ (forall ef args res, ex <> Some (MBbuiltin ef args res)) ->
+ transl_instr_control f ex = OK x ->
+ (forall ef args res, extract_ctl x <> Some (PExpand (Pbuiltin ef args res))).
+Proof.
+ intros until x. intros Hnobuiltin TIC. intros until res.
+ unfold transl_instr_control in TIC. exploreInst.
+ all: try discriminate.
+ - assert False. eapply Hnobuiltin; eauto. destruct H.
+ - unfold transl_cbranch in TIC. exploreInst.
+ all: try discriminate.
+ * unfold transl_opt_compuimm. exploreInst. all: try discriminate.
+ * unfold transl_opt_compluimm. exploreInst. all: try discriminate.
+ * unfold transl_comp_float64. exploreInst; try discriminate.
+ * unfold transl_comp_notfloat64. exploreInst; try discriminate.
+ * unfold transl_comp_float32. exploreInst; try discriminate.
+ * unfold transl_comp_notfloat32. exploreInst; try discriminate.
+Qed.
+
+(* Proving that one can decompose a [match_state] relation into a [match_codestate]
+ and a [match_asmstate], along with some helpful properties tying both relations together *)
+
+Theorem match_state_codestate:
+ forall mbs abs s fb sp bb c ms m,
+ (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+ (MB.body bb <> nil \/ MB.exit bb <> None) ->
+ mbs = (Machblock.State s fb sp (bb::c) ms m) ->
+ match_states mbs abs ->
+ exists cs fb f tbb tc ep,
+ match_codestate fb mbs cs /\ match_asmstate fb cs abs
+ /\ Genv.find_funct_ptr ge fb = Some (Internal f)
+ /\ transl_blocks f (bb::c) ep = OK (tbb::tc)
+ /\ body tbb = pbody1 cs ++ pbody2 cs
+ /\ exit tbb = pctl cs
+ /\ cur cs = tbb /\ rem cs = tc
+ /\ pstate cs = abs.
+Proof.
+ intros until m. intros Hnobuiltin Hnotempty Hmbs MS. subst. inv MS.
+ inv AT. clear H0. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst.
+ exploit transl_blocks_distrib; eauto. intros (TLB & TLBS). clear H2.
+ monadInv TLB. exploit gen_bblocks_nobuiltin; eauto.
+ { inversion Hnotempty.
+ - destruct (MB.body bb) as [|bi bdy]; try (contradict H0; simpl; auto; fail).
+ left. eapply transl_basic_code_nonil; eauto.
+ - destruct (MB.exit bb) as [ei|]; try (contradict H0; simpl; auto; fail).
+ right. eapply transl_instr_control_nonil; eauto. }
+ eapply transl_instr_control_nobuiltin; eauto.
+ intros (Hth & Htbdy & Htexit).
+ exists {| pstate := (State rs m'); pheader := (Machblock.header bb); pbody1 := x; pbody2 := extract_basic x0;
+ pctl := extract_ctl x0; ep := ep0; rem := tc'; cur := tbb |}, fb, f, tbb, tc', ep0.
+ repeat split. 1-2: econstructor; eauto.
+ { destruct (MB.header bb). eauto. discriminate. } eauto.
+ unfold transl_blocks. fold transl_blocks. unfold transl_block. rewrite EQ. simpl. rewrite EQ1; simpl.
+ rewrite TLBS. simpl. rewrite H2.
+ all: simpl; auto.
+Qed.
+
+Definition mb_remove_body (bb: MB.bblock) :=
+ {| MB.header := MB.header bb; MB.body := nil; MB.exit := MB.exit bb |}.
+
+Lemma exec_straight_pnil:
+ forall c rs1 m1 rs2 m2,
+ exec_straight tge c rs1 m1 (Pnop ::g nil) rs2 m2 ->
+ exec_straight tge c rs1 m1 nil rs2 m2.
+Proof.
+ intros. eapply exec_straight_trans. eapply H. econstructor; eauto.
+Qed.
+
+Lemma transl_block_nobuiltin:
+ forall f bb ep tbb,
+ (MB.body bb <> nil \/ MB.exit bb <> None) ->
+ (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+ transl_block f bb ep = OK (tbb :: nil) ->
+ exists c c',
+ transl_basic_code f (MB.body bb) ep = OK c
+ /\ transl_instr_control f (MB.exit bb) = OK c'
+ /\ body tbb = c ++ extract_basic c'
+ /\ exit tbb = extract_ctl c'.
+Proof.
+ intros until tbb. intros Hnonil Hnobuiltin TLB. monadInv TLB. destruct Hnonil.
+ - eexists. eexists. split; eauto. split; eauto. eapply gen_bblocks_nobuiltin; eauto.
+ left. eapply transl_basic_code_nonil; eauto. eapply transl_instr_control_nobuiltin; eauto.
+ - eexists. eexists. split; eauto. split; eauto. eapply gen_bblocks_nobuiltin; eauto.
+ right. eapply transl_instr_control_nonil; eauto. eapply transl_instr_control_nobuiltin; eauto.
+Qed.
+
+Lemma nextblock_preserves:
+ forall rs rs' bb r,
+ rs' = nextblock bb rs ->
+ data_preg r = true ->
+ rs r = rs' r.
+Proof.
+ intros. destruct r; try discriminate.
+ subst. Simpl.
+Qed.
+
+Remark cons3_app {A: Type}:
+ forall a b c (l: list A),
+ a :: b :: c :: l = (a :: b :: c :: nil) ++ l.
+Proof.
+ intros. simpl. auto.
+Qed.
+
+Lemma exec_straight_opt_body2:
+ forall c rs1 m1 c' rs2 m2,
+ exec_straight_opt tge c rs1 m1 c' rs2 m2 ->
+ exists body,
+ exec_body tge body rs1 m1 = Next rs2 m2
+ /\ (basics_to_code body) ++g c' = c.
+Proof.
+ intros until m2. intros EXES.
+ inv EXES.
+ - exists nil. split; auto.
+ - eapply exec_straight_body2. auto.
+Qed.
+
+Lemma extract_basics_to_code:
+ forall lb c,
+ extract_basic (basics_to_code lb ++ c) = lb ++ extract_basic c.
+Proof.
+ induction lb; intros; simpl; congruence.
+Qed.
+
+Lemma extract_ctl_basics_to_code:
+ forall lb c,
+ extract_ctl (basics_to_code lb ++ c) = extract_ctl c.
+Proof.
+ induction lb; intros; simpl; congruence.
+Qed.
+
+(* See (C) in the diagram. The proofs are mostly adapted from the previous Mach->Asm proofs, but are
+ unfortunately quite cumbersome. To reproduce them, it's best to have a Coq IDE with you and see by
+ yourself the steps *)
+Theorem step_simu_control:
+ forall bb' fb fn s sp c ms' m' rs2 m2 t S'' rs1 m1 tbb tbdy2 tex cs2,
+ MB.body bb' = nil ->
+ (forall ef args res, MB.exit bb' <> Some (MBbuiltin ef args res)) ->
+ Genv.find_funct_ptr tge fb = Some (Internal fn) ->
+ pstate cs2 = (Asmvliw.State rs2 m2) ->
+ pbody1 cs2 = nil -> pbody2 cs2 = tbdy2 -> pctl cs2 = tex ->
+ cur cs2 = tbb ->
+ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2 ->
+ match_asmstate fb cs2 (Asmvliw.State rs1 m1) ->
+ exit_step return_address_offset ge (MB.exit bb') (MB.State s fb sp (bb'::c) ms' m') t S'' ->
+ (exists rs3 m3 rs4 m4,
+ exec_body tge tbdy2 rs2 m2 = Next rs3 m3
+ /\ exec_control_rel tge fn tex tbb rs3 m3 rs4 m4
+ /\ match_states S'' (State rs4 m4)).
+Proof.
+ intros until cs2. intros Hbody Hbuiltin FIND Hpstate Hpbody1 Hpbody2 Hpctl Hcur MCS MAS ESTEP.
+ inv ESTEP.
+ - inv MCS. inv MAS. simpl in *.
+ inv Hpstate.
+ destruct ctl.
+ + (* MBcall *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ destruct s1 as [rf|fid]; simpl in H7.
+ * (* Indirect call *)
+ monadInv H1.
+ assert (ms' rf = Vptr f' Ptrofs.zero).
+ { unfold find_function_ptr in H14. destruct (ms' rf); try discriminate.
+ revert H14; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+ assert (rs2 x = Vptr f' Ptrofs.zero).
+ { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
+ generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+ remember (Ptrofs.add _ _) as ofs'.
+ assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+ { econstructor; eauto. }
+ assert (f1 = f) by congruence. subst f1.
+ exploit return_address_offset_correct; eauto. intros; subst ra.
+
+ repeat eexists.
+ rewrite H6. econstructor; eauto.
+ rewrite H7. econstructor; eauto.
+ econstructor; eauto.
+ econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. Simpl.
+ simpl. Simpl. rewrite PCeq. rewrite Heqofs'. simpl. auto.
+
+ * (* Direct call *)
+ monadInv H1.
+ generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+ remember (Ptrofs.add _ _) as ofs'.
+ assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+ econstructor; eauto.
+ assert (f1 = f) by congruence. subst f1.
+ exploit return_address_offset_correct; eauto. intros; subst ra.
+ repeat eexists.
+ rewrite H6. econstructor; eauto.
+ rewrite H7. econstructor; eauto.
+ econstructor; eauto.
+ econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. Simpl.
+ Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. simpl in H14. rewrite H14. auto.
+ Simpl. simpl. subst. Simpl. simpl. unfold Val.offset_ptr. rewrite PCeq. auto.
+ + (* MBtailcall *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ exploit Mem.loadv_extends. eauto. eexact H15. auto. simpl. intros [parent' [A B]].
+ destruct s1 as [rf|fid]; simpl in H13.
+ * monadInv H1.
+ assert (ms' rf = Vptr f' Ptrofs.zero).
+ { destruct (ms' rf); try discriminate. revert H13. predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+ assert (rs2 x = Vptr f' Ptrofs.zero).
+ { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
+
+ assert (f = f1) by congruence. subst f1. clear FIND1. clear H14.
+ exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+ exploit exec_straight_body; eauto.
+ { simpl. eauto. }
+ intros EXEB.
+ repeat eexists.
+ rewrite H6. simpl extract_basic. eauto.
+ rewrite H7. simpl extract_ctl. simpl. reflexivity.
+ econstructor; eauto.
+ { apply agree_set_other.
+ - econstructor; auto with asmgen.
+ + apply V.
+ + intro r. destruct r; apply V; auto.
+ - eauto with asmgen. }
+ assert (IR x <> IR GPR12 /\ IR x <> IR GPR32 /\ IR x <> IR GPR16).
+ { clear - EQ. destruct x; repeat split; try discriminate.
+ all: unfold ireg_of in EQ; destruct rf; try discriminate. }
+ Simpl. inv H1. inv H3. rewrite Z; auto; try discriminate.
+ * monadInv H1. assert (f = f1) by congruence. subst f1. clear FIND1. clear H14.
+ exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+ exploit exec_straight_body; eauto.
+ simpl. eauto.
+ intros EXEB.
+ repeat eexists.
+ rewrite H6. simpl extract_basic. eauto.
+ rewrite H7. simpl extract_ctl. simpl. reflexivity.
+ econstructor; eauto.
+ { apply agree_set_other.
+ - econstructor; auto with asmgen.
+ + apply V.
+ + intro r. destruct r; apply V; auto.
+ - eauto with asmgen. }
+ { Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H13. auto. }
+ + (* MBbuiltin (contradiction) *)
+ assert (MB.exit bb' <> Some (MBbuiltin e l b)) by (apply Hbuiltin).
+ rewrite <- H in H1. contradict H1; auto.
+ + (* MBgoto *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0. assert (f1 = f) by congruence. subst f1. clear H11.
+ remember (nextblock tbb rs2) as rs2'.
+ exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND'.
+ assert (tf = fn) by congruence. subst tf.
+ exploit find_label_goto_label.
+ eauto. eauto.
+ instantiate (2 := rs2').
+ { subst. unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. eauto. }
+ eauto.
+ intros (tc' & rs' & GOTO & AT2 & INV).
+
+ eexists. eexists. repeat eexists. repeat split.
+ rewrite H6. simpl extract_basic. simpl. eauto.
+ rewrite H7. simpl extract_ctl. simpl. rewrite <- Heqrs2'. eauto.
+ econstructor; eauto.
+ rewrite Heqrs2' in INV. unfold nextblock, incrPC in INV.
+ eapply agree_exten; eauto with asmgen.
+ assert (forall r : preg, r <> PC -> rs' r = rs2 r).
+ { intros. destruct r.
+ - destruct g. all: rewrite INV; Simpl; auto.
+ - rewrite INV; Simpl; auto.
+ - contradiction. }
+ eauto with asmgen.
+ congruence.
+ + (* MBcond *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ * (* MBcond true *)
+ assert (f0 = f) by congruence. subst f0.
+ exploit eval_condition_lessdef.
+ eapply preg_vals; eauto.
+ all: eauto.
+ intros EC.
+ exploit transl_cbranch_correct_true; eauto. intros (rs' & jmp & A & B & C).
+ exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
+ assert (PCeq': rs2 PC = rs' PC). { inv A; auto. erewrite <- exec_straight_pc. 2: eapply H. eauto. }
+ rewrite PCeq' in PCeq.
+ assert (f1 = f) by congruence. subst f1.
+ exploit find_label_goto_label.
+ 4: eapply H16. 1-2: eauto. instantiate (2 := (nextblock tbb rs')). rewrite nextblock_pc.
+ unfold Val.offset_ptr. rewrite PCeq. eauto.
+ intros (tc' & rs3 & GOTOL & TLPC & Hrs3).
+ exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
+ assert (tf = fn) by congruence. subst tf.
+
+ repeat eexists.
+ rewrite H6. rewrite <- BTC. rewrite extract_basics_to_code. simpl. rewrite app_nil_r. eauto.
+ rewrite H7. rewrite <- BTC. rewrite extract_ctl_basics_to_code. simpl extract_ctl. rewrite B. eauto.
+
+ econstructor; eauto.
+ eapply agree_exten with rs2; eauto with asmgen.
+ { intros. destruct r; try destruct g; try discriminate.
+ all: rewrite Hrs3; try discriminate; unfold nextblock, incrPC; Simpl. }
+ intros. discriminate.
+
+ * (* MBcond false *)
+ assert (f0 = f) by congruence. subst f0.
+ exploit eval_condition_lessdef.
+ eapply preg_vals; eauto.
+ all: eauto.
+ intros EC.
+
+ exploit transl_cbranch_correct_false; eauto. intros (rs' & jmp & A & B & C).
+ exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
+ assert (PCeq': rs2 PC = rs' PC). { inv A; auto. erewrite <- exec_straight_pc. 2: eapply H. eauto. }
+ rewrite PCeq' in PCeq.
+ exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
+ assert (tf = fn) by congruence. subst tf.
+
+ assert (NOOV: size_blocks fn.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+
+ repeat eexists.
+ rewrite H6. rewrite <- BTC. rewrite extract_basics_to_code. simpl. rewrite app_nil_r. eauto.
+ rewrite H7. rewrite <- BTC. rewrite extract_ctl_basics_to_code. simpl extract_ctl. rewrite B. eauto.
+
+ econstructor; eauto.
+ unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. econstructor; eauto.
+ eapply agree_exten with rs2; eauto with asmgen.
+ { intros. destruct r; try destruct g; try discriminate.
+ all: rewrite <- C; try discriminate; unfold nextblock, incrPC; Simpl. }
+ intros. discriminate.
+ + (* MBjumptable *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0.
+ monadInv H1.
+ generalize (transf_function_no_overflow _ _ TRANSF0); intro NOOV.
+ assert (f1 = f) by congruence. subst f1.
+ exploit find_label_goto_label. 4: eapply H16. 1-2: eauto. instantiate (2 := (nextblock tbb rs2) # GPR62 <- Vundef # GPR63 <- Vundef).
+ unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. reflexivity.
+ exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND3. assert (fn = tf) by congruence. subst fn.
+
+ intros [tc' [rs' [A [B C]]]].
+ exploit ireg_val; eauto. rewrite H13. intros LD; inv LD.
+
+ repeat eexists.
+ rewrite H6. simpl extract_basic. simpl. eauto.
+ rewrite H7. simpl extract_ctl. simpl. Simpl. rewrite <- H1. unfold Mach.label in H14. unfold label. rewrite H14. eapply A.
+ econstructor; eauto.
+ eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen.
+ { assert (destroyed_by_jumptable = R62 :: R63 :: nil) by auto. rewrite H2 in H0. simpl in H0. inv H0.
+ destruct (preg_eq r' GPR63). subst. contradiction.
+ destruct (preg_eq r' GPR62). subst. contradiction.
+ destruct r'; Simpl. }
+ discriminate.
+ + (* MBreturn *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+ exploit exec_straight_body; eauto.
+ simpl. eauto.
+ intros EXEB.
+ assert (f1 = f) by congruence. subst f1.
+
+ repeat eexists.
+ rewrite H6. simpl extract_basic. eauto.
+ rewrite H7. simpl extract_ctl. simpl. reflexivity.
+ econstructor; eauto.
+ unfold nextblock, incrPC. repeat apply agree_set_other; auto with asmgen.
+
+ - inv MCS. inv MAS. simpl in *. subst. inv Hpstate.
+ destruct bb' as [hd' bdy' ex']; simpl in *. subst.
+ monadInv TBC. monadInv TIC. simpl in *. rewrite H5. rewrite H6.
+ simpl. repeat eexists.
+ econstructor. 4: instantiate (3 := false). all:eauto.
+ unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ assert (f = f0) by congruence. subst f0. econstructor; eauto.
+ generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. eauto.
+ eapply agree_exten; eauto. intros. Simpl.
+ discriminate.
+Qed.
+
+Definition mb_remove_first (bb: MB.bblock) :=
+ {| MB.header := MB.header bb; MB.body := tail (MB.body bb); MB.exit := MB.exit bb |}.
+
+Lemma exec_straight_body:
+ forall c c' lc rs1 m1 rs2 m2,
+ exec_straight tge c rs1 m1 c' rs2 m2 ->
+ code_to_basics c = Some lc ->
+ exists l ll,
+ c = l ++ c'
+ /\ code_to_basics l = Some ll
+ /\ exec_body tge ll rs1 m1 = Next rs2 m2.
+Proof.
+ induction c; try (intros; inv H; fail).
+ intros until m2. intros EXES CTB. inv EXES.
+ - exists (i1 ::g nil),(i1::nil). repeat (split; simpl; auto). rewrite H6. auto.
+ - inv CTB. destruct (code_to_basics c); try discriminate. inv H0.
+ eapply IHc in H7; eauto. destruct H7 as (l' & ll & Hc & CTB & EXECB). subst.
+ exists (i ::g l'),(i::ll). repeat (split; simpl; auto).
+ rewrite CTB. auto.
+ rewrite H1. auto.
+Qed.
+
+Lemma basics_to_code_app:
+ forall c l x ll,
+ basics_to_code c = l ++ basics_to_code x ->
+ code_to_basics l = Some ll ->
+ c = ll ++ x.
+Proof.
+ intros. apply (f_equal code_to_basics) in H.
+ erewrite code_to_basics_dist in H; eauto. 2: eapply code_to_basics_id.
+ rewrite code_to_basics_id in H. inv H. auto.
+Qed.
+
+Lemma basics_to_code_app2:
+ forall i c l x ll,
+ (PBasic i) :: basics_to_code c = l ++ basics_to_code x ->
+ code_to_basics l = Some ll ->
+ i :: c = ll ++ x.
+Proof.
+ intros until ll. intros.
+ exploit basics_to_code_app. instantiate (3 := (i::c)). simpl.
+ all: eauto.
+Qed.
+
+(* Handling the individual instructions of theorem (B) in the above diagram. A bit less cumbersome, but still tough *)
+Theorem step_simu_basic:
+ forall bb bb' s fb sp c ms m rs1 m1 ms' m' bi cs1 tbdy bdy,
+ MB.header bb = nil -> MB.body bb = bi::(bdy) -> (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+ bb' = {| MB.header := nil; MB.body := bdy; MB.exit := MB.exit bb |} ->
+ basic_step ge s fb sp ms m bi ms' m' ->
+ pstate cs1 = (State rs1 m1) -> pbody1 cs1 = tbdy ->
+ match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+ (exists rs2 m2 l cs2 tbdy',
+ cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := tbdy'; pbody2 := pbody2 cs1;
+ pctl := pctl cs1; ep := fp_is_parent (ep cs1) bi; rem := rem cs1; cur := cur cs1 |}
+ /\ tbdy = l ++ tbdy'
+ /\ exec_body tge l rs1 m1 = Next rs2 m2
+ /\ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2).
+Proof.
+ intros until bdy. intros Hheader Hbody Hnobuiltin (* Hnotempty *) Hbb' BSTEP Hpstate Hpbody1 MCS. inv MCS.
+ simpl in *. inv Hpstate.
+ rewrite Hbody in TBC. monadInv TBC.
+ inv BSTEP.
+
+ - (* MBgetstack *)
+ simpl in EQ0.
+ unfold Mach.load_stack in H.
+ exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+ rewrite (sp_val _ _ _ AG) in A.
+ exploit loadind_correct; eauto with asmgen.
+ intros (rs2 & EXECS & Hrs'1 & Hrs'2).
+ eapply exec_straight_body in EXECS.
+ 2: eapply code_to_basics_id; eauto.
+ destruct EXECS as (l & Hlbi & BTC & CTB & EXECB).
+ exists rs2, m1, Hlbi.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ eapply basics_to_code_app; eauto.
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
+ subst. simpl in Hheadereq.
+
+ eapply match_codestate_intro; eauto.
+ { simpl. simpl in EQ. rewrite <- Hheadereq in EQ. assumption. }
+ eapply agree_set_mreg; eauto with asmgen.
+ intro Hep. simpl in Hep.
+ destruct (andb_prop _ _ Hep). clear Hep.
+ rewrite <- Hheadereq in DXP. subst. rewrite <- DXP. rewrite Hrs'2. reflexivity.
+ discriminate. apply preg_of_not_FP; assumption. reflexivity.
+
+ - (* MBsetstack *)
+ simpl in EQ0.
+ unfold Mach.store_stack in H.
+ assert (Val.lessdef (ms src) (rs1 (preg_of src))). { eapply preg_val; eauto. }
+ exploit Mem.storev_extends; eauto. intros [m2' [A B]].
+ exploit storeind_correct; eauto with asmgen.
+ rewrite (sp_val _ _ _ AG) in A. eauto. intros [rs' [P Q]].
+
+ eapply exec_straight_body in P.
+ 2: eapply code_to_basics_id; eauto.
+ destruct P as (l & ll & TBC & CTB & EXECB).
+ exists rs', m2', ll.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ eapply basics_to_code_app; eauto.
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ subst.
+ eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
+
+ eapply agree_undef_regs; eauto with asmgen.
+ simpl; intros. rewrite Q; auto with asmgen. rewrite Hheader in DXP. auto.
+ - (* MBgetparam *)
+ simpl in EQ0.
+
+ assert (f0 = f) by congruence; subst f0.
+ unfold Mach.load_stack in *.
+ exploit Mem.loadv_extends. eauto. eexact H0. auto.
+ intros [parent' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+ exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
+ exploit Mem.loadv_extends. eauto. eexact H1. auto.
+ intros [v' [C D]].
+
+ monadInv EQ0. rewrite Hheader. rewrite Hheader in DXP.
+ destruct ep0 eqn:EPeq.
+
+ (* RTMP contains parent *)
+ + exploit loadind_correct. eexact EQ1.
+ instantiate (2 := rs1). rewrite DXP; eauto.
+ intros [rs2 [P [Q R]]].
+
+ eapply exec_straight_body in P.
+ 2: eapply code_to_basics_id; eauto.
+ destruct P as (l & ll & BTC & CTB & EXECB).
+ exists rs2, m1, ll. eexists.
+ eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ { eapply basics_to_code_app; eauto. }
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
+ subst.
+ eapply match_codestate_intro; eauto.
+
+ eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
+ simpl; intros. rewrite R; auto with asmgen.
+ apply preg_of_not_FP; auto.
+
+ (* RTMP does not contain parent *)
+ + rewrite chunk_of_Tptr in A.
+ exploit loadind_ptr_correct. eexact A. intros [rs2 [P [Q R]]].
+ exploit loadind_correct. eexact EQ1. instantiate (2 := rs2). rewrite Q. eauto.
+ intros [rs3 [S [T U]]].
+
+ exploit exec_straight_trans.
+ eapply P.
+ eapply S.
+ intros EXES.
+
+ eapply exec_straight_body in EXES.
+ 2: simpl. 2: erewrite code_to_basics_id; eauto.
+ destruct EXES as (l & ll & BTC & CTB & EXECB).
+ exists rs3, m1, ll.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ eapply basics_to_code_app2; eauto.
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+ subst.
+ eapply match_codestate_intro; eauto.
+ eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
+ instantiate (1 := rs2#FP <- (rs3#FP)). intros.
+ rewrite Pregmap.gso; auto with asmgen.
+ congruence.
+ intros. unfold Pregmap.set. destruct (PregEq.eq r' FP). congruence. auto with asmgen.
+ simpl; intros. rewrite U; auto with asmgen.
+ apply preg_of_not_FP; auto.
+ - (* MBop *)
+ simpl in EQ0. rewrite Hheader in DXP.
+
+ assert (eval_operation tge sp op (map ms args) m' = Some v).
+ rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
+ exploit eval_operation_lessdef.
+ eapply preg_vals; eauto.
+ 2: eexact H0.
+ all: eauto.
+ intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+ exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
+
+ eapply exec_straight_body in P.
+ 2: eapply code_to_basics_id; eauto.
+ destruct P as (l & ll & TBC & CTB & EXECB).
+ exists rs2, m1, ll.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ eapply basics_to_code_app; eauto.
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ subst.
+ eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
+ apply agree_set_undef_mreg with rs1; auto.
+ apply Val.lessdef_trans with v'; auto.
+ simpl; intros. destruct (andb_prop _ _ H1); clear H1.
+ rewrite R; auto. apply preg_of_not_FP; auto.
+Local Transparent destroyed_by_op.
+ destruct op; simpl; auto; congruence.
+ - (* MBload *)
+ simpl in EQ0. rewrite Hheader in DXP.
+
+ assert (eval_addressing tge sp addr (map ms args) = Some a).
+ rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+ exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+ intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+ exploit Mem.loadv_extends; eauto. intros [v' [C D]].
+ exploit transl_load_correct; eauto.
+ intros [rs2 [P [Q R]]].
+
+ eapply exec_straight_body in P.
+ 2: eapply code_to_basics_id; eauto.
+ destruct P as (l & ll & TBC & CTB & EXECB).
+ exists rs2, m1, ll.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ eapply basics_to_code_app; eauto.
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+ subst.
+ eapply match_codestate_intro; eauto. simpl. simpl in EQ.
+ rewrite <- Hheadereq in EQ. assumption.
+ eapply agree_set_mreg; eauto with asmgen.
+ intro Hep. simpl in Hep.
+ destruct (andb_prop _ _ Hep). clear Hep.
+ subst. rewrite <- DXP. rewrite R; try discriminate. reflexivity.
+ apply preg_of_not_FP; assumption. reflexivity.
+
+ - (* notrap1 cannot happen *)
+ simpl in EQ0. unfold transl_load in EQ0.
+ destruct addr; simpl in H.
+ all: unfold transl_load_rrrXS, transl_load_rrr, transl_load_rro in EQ0;
+ monadInv EQ0; unfold transl_memory_access2XS, transl_memory_access2, transl_memory_access in EQ2;
+ destruct args as [|h0 t0]; try discriminate;
+ destruct t0 as [|h1 t1]; try discriminate;
+ destruct t1 as [|h2 t2]; try discriminate.
+
+ - (* MBload notrap2 TODO *)
+ simpl in EQ0. rewrite Hheader in DXP.
+
+ assert (eval_addressing tge sp addr (map ms args) = Some a).
+ rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+ exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+ intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+
+ destruct (Mem.loadv chunk m1 a') as [v' | ] eqn:Hload.
+ {
+ exploit transl_load_correct; eauto.
+ intros [rs2 [P [Q R]]].
+
+ eapply exec_straight_body in P.
+ 2: eapply code_to_basics_id; eauto.
+ destruct P as (l & ll & TBC & CTB & EXECB).
+ exists rs2, m1, ll.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ eapply basics_to_code_app; eauto.
+ eapply match_codestate_intro; eauto. simpl. rewrite Hheader in *.
+ simpl in EQ. assumption.
+
+ eapply agree_set_undef_mreg; eauto. intros; auto with asmgen.
+
+ simpl. intro.
+ rewrite R; try congruence.
+ apply DXP.
+ destruct ep0; simpl in *; congruence.
+ apply preg_of_not_FP.
+ destruct ep0; simpl in *; congruence.
+ }
+ {
+ exploit transl_load_correct_notrap2; eauto.
+ intros [rs2 [P [Q R]]].
+
+ eapply exec_straight_body in P.
+ 2: eapply code_to_basics_id; eauto.
+ destruct P as (l & ll & TBC & CTB & EXECB).
+ exists rs2, m1, ll.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ eapply basics_to_code_app; eauto.
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+(* assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+ rewrite <- Hheadereq. *) subst.
+ eapply match_codestate_intro; eauto. simpl. rewrite Hheader in *. simpl in EQ. assumption.
+
+ eapply agree_set_undef_mreg; eauto. intros; auto with asmgen.
+ simpl. intro.
+ rewrite R; try congruence.
+ apply DXP.
+ destruct ep0; simpl in *; congruence.
+ apply preg_of_not_FP.
+ destruct ep0; simpl in *; congruence.
+ }
+ - (* MBstore *)
+ simpl in EQ0. rewrite Hheader in DXP.
+
+ assert (eval_addressing tge sp addr (map ms args) = Some a).
+ rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+ exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+ intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+ assert (Val.lessdef (ms src) (rs1 (preg_of src))). eapply preg_val; eauto.
+ exploit Mem.storev_extends; eauto. intros [m2' [C D]].
+ exploit transl_store_correct; eauto. intros [rs2 [P Q]].
+
+ eapply exec_straight_body in P.
+ 2: eapply code_to_basics_id; eauto.
+ destruct P as (l & ll & TBC & CTB & EXECB).
+ exists rs2, m2', ll.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ eapply basics_to_code_app; eauto.
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+ subst.
+ eapply match_codestate_intro; eauto. simpl. simpl in EQ.
+ rewrite <- Hheadereq in EQ. assumption.
+ eapply agree_undef_regs; eauto with asmgen.
+ intro Hep. simpl in Hep.
+ subst. rewrite <- DXP. rewrite Q; try discriminate. reflexivity. reflexivity.
+Qed.
+
+Lemma exec_body_trans:
+ forall l l' rs0 m0 rs1 m1 rs2 m2,
+ exec_body tge l rs0 m0 = Next rs1 m1 ->
+ exec_body tge l' rs1 m1 = Next rs2 m2 ->
+ exec_body tge (l++l') rs0 m0 = Next rs2 m2.
+Proof.
+ induction l.
+ - simpl. congruence.
+ - intros until m2. intros EXEB1 EXEB2.
+ inv EXEB1. destruct (exec_basic_instr _) eqn:EBI; try discriminate.
+ simpl. rewrite EBI. eapply IHl; eauto.
+Qed.
+
+Definition mb_remove_header bb := {| MB.header := nil; MB.body := MB.body bb; MB.exit := MB.exit bb |}.
+
+Program Definition remove_header tbb := {| header := nil; body := body tbb; exit := exit tbb |}.
+Next Obligation.
+ destruct tbb. simpl. auto.
+Qed.
+
+Inductive exec_header: codestate -> codestate -> Prop :=
+ | exec_header_cons: forall cs1,
+ exec_header cs1 {| pstate := pstate cs1; pheader := nil; pbody1 := pbody1 cs1; pbody2 := pbody2 cs1;
+ pctl := pctl cs1; ep := (if pheader cs1 then ep cs1 else false); rem := rem cs1;
+ cur := cur cs1 |}.
+
+(* Theorem (A) in the diagram, the easiest of all *)
+Theorem step_simu_header:
+ forall bb s fb sp c ms m rs1 m1 cs1,
+ pstate cs1 = (State rs1 m1) ->
+ match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+ (exists cs1',
+ exec_header cs1 cs1'
+ /\ match_codestate fb (MB.State s fb sp (mb_remove_header bb::c) ms m) cs1').
+Proof.
+ intros until cs1. intros Hpstate MCS.
+ eexists. split; eauto.
+ econstructor; eauto.
+ inv MCS. simpl in *. inv Hpstate.
+ econstructor; eauto.
+Qed.
+
+Lemma step_matchasm_header:
+ forall fb cs1 cs1' s1,
+ match_asmstate fb cs1 s1 ->
+ exec_header cs1 cs1' ->
+ match_asmstate fb cs1' s1.
+Proof.
+ intros until s1. intros MAS EXH.
+ inv MAS. inv EXH.
+ simpl. econstructor; eauto.
+Qed.
+
+(* Theorem (B) in the diagram, using step_simu_basic + induction on the Machblock body *)
+Theorem step_simu_body:
+ forall bb s fb sp c ms m rs1 m1 ms' cs1 m',
+ MB.header bb = nil ->
+ (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+ body_step ge s fb sp (MB.body bb) ms m ms' m' ->
+ pstate cs1 = (State rs1 m1) ->
+ match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+ (exists rs2 m2 cs2 ep,
+ cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := nil; pbody2 := pbody2 cs1;
+ pctl := pctl cs1; ep := ep; rem := rem cs1; cur := cur cs1 |}
+ /\ exec_body tge (pbody1 cs1) rs1 m1 = Next rs2 m2
+ /\ match_codestate fb (MB.State s fb sp ({| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |}::c) ms' m') cs2).
+Proof.
+ intros bb. destruct bb as [hd bdy ex]; simpl; auto. induction bdy as [|bi bdy].
+ - intros until m'. intros Hheader Hnobuiltin BSTEP Hpstate MCS.
+ inv BSTEP.
+ exists rs1, m1, cs1, (ep cs1).
+ inv MCS. inv Hpstate. simpl in *. monadInv TBC. repeat (split; simpl; auto).
+ econstructor; eauto.
+ - intros until m'. intros Hheader Hnobuiltin BSTEP Hpstate MCS. inv BSTEP.
+ rename ms' into ms''. rename m' into m''. rename rs' into ms'. rename m'0 into m'.
+ exploit (step_simu_basic); eauto. simpl. eauto. simpl; auto. simpl; auto.
+ intros (rs2 & m2 & l & cs2 & tbdy' & Hcs2 & Happ & EXEB & MCS').
+ simpl in *.
+ exploit IHbdy. auto. 2: eapply H6. 3: eapply MCS'. all: eauto. subst; eauto. simpl; auto.
+ intros (rs3 & m3 & cs3 & ep & Hcs3 & EXEB' & MCS'').
+ exists rs3, m3, cs3, ep.
+ repeat (split; simpl; auto). subst. simpl in *. auto.
+ rewrite Happ. eapply exec_body_trans; eauto. rewrite Hcs2 in EXEB'; simpl in EXEB'. auto.
+Qed.
+
+Lemma exec_body_control:
+ forall b rs1 m1 rs2 m2 rs3 m3 fn,
+ exec_body tge (body b) rs1 m1 = Next rs2 m2 ->
+ exec_control_rel tge fn (exit b) b rs2 m2 rs3 m3 ->
+ exec_bblock_rel tge fn b rs1 m1 rs3 m3.
+Proof.
+ intros until fn. intros EXEB EXECTL.
+ econstructor; eauto. inv EXECTL.
+ unfold exec_bblock. rewrite EXEB. auto.
+Qed.
+
+Definition mbsize (bb: MB.bblock) := (length (MB.body bb) + length_opt (MB.exit bb))%nat.
+
+Lemma mbsize_eqz:
+ forall bb, mbsize bb = 0%nat -> MB.body bb = nil /\ MB.exit bb = None.
+Proof.
+ intros. destruct bb as [hd bdy ex]; simpl in *. unfold mbsize in H.
+ remember (length _) as a. remember (length_opt _) as b.
+ assert (a = 0%nat) by lia. assert (b = 0%nat) by lia. subst. clear H.
+ inv H0. inv H1. destruct bdy; destruct ex; auto.
+ all: try discriminate.
+Qed.
+
+Lemma mbsize_neqz:
+ forall bb, mbsize bb <> 0%nat -> (MB.body bb <> nil \/ MB.exit bb <> None).
+Proof.
+ intros. destruct bb as [hd bdy ex]; simpl in *.
+ destruct bdy; destruct ex; try (right; discriminate); try (left; discriminate).
+ contradict H. unfold mbsize. simpl. auto.
+Qed.
+
+(* Bringing theorems (A), (B) and (C) together, for the case of the absence of builtin instruction *)
+(* This more general form is easier to prove, but the actual theorem is step_simulation_bblock further below *)
+Lemma step_simulation_bblock':
+ forall sf f sp bb bb' bb'' rs m rs' m' s'' c S1,
+ bb' = mb_remove_header bb ->
+ body_step ge sf f sp (Machblock.body bb') rs m rs' m' ->
+ bb'' = mb_remove_body bb' ->
+ (forall ef args res, MB.exit bb'' <> Some (MBbuiltin ef args res)) ->
+ exit_step return_address_offset ge (Machblock.exit bb'') (Machblock.State sf f sp (bb'' :: c) rs' m') E0 s'' ->
+ match_states (Machblock.State sf f sp (bb :: c) rs m) S1 ->
+ exists S2 : state, plus step tge S1 E0 S2 /\ match_states s'' S2.
+Proof.
+ intros until S1. intros Hbb' BSTEP Hbb'' Hbuiltin ESTEP MS.
+ destruct (mbsize bb) eqn:SIZE.
+ - apply mbsize_eqz in SIZE. destruct SIZE as (Hbody & Hexit).
+ destruct bb as [hd bdy ex]; simpl in *; subst.
+ inv MS. inv AT. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. rename tc' into tc.
+ monadInv H2. simpl in *. inv ESTEP. inv BSTEP.
+ eexists. split. eapply plus_one.
+ exploit functions_translated; eauto. intros (tf0 & FIND' & TRANSF'). monadInv TRANSF'.
+ assert (x = tf) by congruence. subst x.
+ eapply exec_step_internal; eauto. eapply find_bblock_tail; eauto.
+ unfold exec_bblock. simpl. eauto.
+ econstructor. eauto. eauto. eauto.
+ unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite <- H.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ econstructor; eauto.
+ generalize (code_tail_next_int _ _ _ _ NOOV H3). intro CT1. eauto.
+ eapply agree_exten; eauto. intros. Simpl.
+ intros. discriminate.
+ - subst. exploit mbsize_neqz. { instantiate (1 := bb). rewrite SIZE. discriminate. }
+ intros Hnotempty.
+
+ (* initial setting *)
+ exploit match_state_codestate.
+ 2: eapply Hnotempty.
+ all: eauto.
+ intros (cs1 & fb & f0 & tbb & tc & ep & MCS & MAS & FIND & TLBS & Hbody & Hexit & Hcur & Hrem & Hpstate).
+
+ (* step_simu_header part *)
+ assert (exists rs1 m1, pstate cs1 = State rs1 m1). { inv MAS. simpl. eauto. }
+ destruct H as (rs1 & m1 & Hpstate2). subst.
+ assert (f = fb). { inv MCS. auto. } subst fb.
+ exploit step_simu_header.
+ 2: eapply MCS.
+ all: eauto.
+ intros (cs1' & EXEH & MCS2).
+
+ (* step_simu_body part *)
+ assert (Hpstate': pstate cs1' = pstate cs1). { inv EXEH; auto. }
+ exploit step_simu_body.
+ 3: eapply BSTEP.
+ 4: eapply MCS2.
+ all: eauto. rewrite Hpstate'. eauto.
+ intros (rs2 & m2 & cs2 & ep' & Hcs2 & EXEB & MCS').
+
+ (* step_simu_control part *)
+ assert (exists tf, Genv.find_funct_ptr tge f = Some (Internal tf)).
+ { exploit functions_translated; eauto. intros (tf & FIND' & TRANSF'). monadInv TRANSF'. eauto. }
+ destruct H as (tf & FIND').
+ assert (exists tex, pbody2 cs1 = extract_basic tex /\ pctl cs1 = extract_ctl tex).
+ { inv MAS. simpl in *. eauto. }
+ destruct H as (tex & Hpbody2 & Hpctl).
+ inv EXEH. simpl in *.
+ subst. exploit step_simu_control.
+ 9: eapply MCS'. all: simpl.
+ 10: eapply ESTEP.
+ all: simpl; eauto.
+ rewrite Hpbody2. rewrite Hpctl.
+ { inv MAS; simpl in *. inv Hpstate2. eapply match_asmstate_some; eauto.
+ erewrite exec_body_pc; eauto. }
+ intros (rs3 & m3 & rs4 & m4 & EXEB' & EXECTL' & MS').
+
+ (* bringing the pieces together *)
+ exploit exec_body_trans.
+ eapply EXEB.
+ eauto.
+ intros EXEB2.
+ exploit exec_body_control; eauto.
+ rewrite <- Hpbody2 in EXEB2. rewrite <- Hbody in EXEB2. eauto.
+ rewrite Hexit. rewrite Hpctl. eauto.
+ intros EXECB. inv EXECB.
+ exists (State rs4 m4).
+ split; auto. eapply plus_one. rewrite Hpstate2.
+ assert (exists ofs, rs1 PC = Vptr f ofs).
+ { rewrite Hpstate2 in MAS. inv MAS. simpl in *. eauto. }
+ destruct H0 as (ofs & Hrs1pc).
+ eapply exec_step_internal; eauto.
+
+ (* proving the initial find_bblock *)
+ rewrite Hpstate2 in MAS. inv MAS. simpl in *.
+ assert (f1 = f0) by congruence. subst f0.
+ rewrite PCeq in Hrs1pc. inv Hrs1pc.
+ exploit functions_translated; eauto. intros (tf1 & FIND'' & TRANS''). rewrite FIND' in FIND''.
+ inv FIND''. monadInv TRANS''. rewrite TRANSF0 in EQ. inv EQ.
+ eapply find_bblock_tail; eauto.
+Qed.
+
+Theorem step_simulation_bblock:
+ forall sf f sp bb ms m ms' m' S2 c,
+ body_step ge sf f sp (Machblock.body bb) ms m ms' m' ->
+ (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) ->
+ exit_step return_address_offset ge (Machblock.exit bb) (Machblock.State sf f sp (bb :: c) ms' m') E0 S2 ->
+ forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' ->
+ exists S2' : state, plus step tge S1' E0 S2' /\ match_states S2 S2'.
+Proof.
+ intros until c. intros BSTEP Hbuiltin ESTEP S1' MS.
+ eapply step_simulation_bblock'; eauto.
+ all: destruct bb as [hd bdy ex]; simpl in *; eauto.
+ inv ESTEP.
+ - econstructor. inv H; try (econstructor; eauto; fail).
+ - econstructor.
+Qed.
+
+(** Dealing now with the builtin case *)
+
+Definition split (c: MB.code) :=
+ match c with
+ | nil => nil
+ | bb::c => {| MB.header := MB.header bb; MB.body := MB.body bb; MB.exit := None |}
+ :: {| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |} :: c
+ end.
+
+Lemma cons_ok_eq3 {A: Type} :
+ forall (x:A) y z x' y' z',
+ x = x' -> y = y' -> z = z' ->
+ OK (x::y::z) = OK (x'::y'::z').
+Proof.
+ intros. subst. auto.
+Qed.
+
+Lemma transl_blocks_split_builtin:
+ forall bb c ep f ef args res,
+ MB.exit bb = Some (MBbuiltin ef args res) -> MB.body bb <> nil ->
+ transl_blocks f (split (bb::c)) ep = transl_blocks f (bb::c) ep.
+Proof.
+ intros until res. intros Hexit Hbody. simpl split.
+ unfold transl_blocks. fold transl_blocks. unfold transl_block.
+ simpl. remember (transl_basic_code _ _ _) as tbc. remember (transl_instr_control _ _) as tbi.
+ remember (transl_blocks _ _ _) as tlbs.
+ destruct tbc; destruct tbi; destruct tlbs.
+ all: try simpl; auto.
+ - simpl. rewrite Hexit in Heqtbi. simpl in Heqtbi. monadInv Heqtbi. simpl.
+ unfold gen_bblocks. simpl. destruct l.
+ + exploit transl_basic_code_nonil; eauto. intro. destruct H.
+ + simpl. rewrite app_nil_r. apply cons_ok_eq3. all: try eapply bblock_equality. all: simpl; auto.
+Qed.
+
+Lemma transl_code_at_pc_split_builtin:
+ forall rs f f0 bb c ep tf tc ef args res,
+ MB.body bb <> nil -> MB.exit bb = Some (MBbuiltin ef args res) ->
+ transl_code_at_pc ge (rs PC) f f0 (bb :: c) ep tf tc ->
+ transl_code_at_pc ge (rs PC) f f0 (split (bb :: c)) ep tf tc.
+Proof.
+ intros until res. intros Hbody Hexit AT. inv AT.
+ econstructor; eauto. erewrite transl_blocks_split_builtin; eauto.
+Qed.
+
+Theorem match_states_split_builtin:
+ forall sf f sp bb c rs m ef args res S1,
+ MB.body bb <> nil -> MB.exit bb = Some (MBbuiltin ef args res) ->
+ match_states (Machblock.State sf f sp (bb :: c) rs m) S1 ->
+ match_states (Machblock.State sf f sp (split (bb::c)) rs m) S1.
+Proof.
+ intros until S1. intros Hbody Hexit MS.
+ inv MS.
+ econstructor; eauto.
+ eapply transl_code_at_pc_split_builtin; eauto.
+Qed.
+
+Theorem step_simulation_builtin:
+ forall ef args res bb sf f sp c ms m t S2,
+ MB.body bb = nil -> MB.exit bb = Some (MBbuiltin ef args res) ->
+ exit_step return_address_offset ge (MB.exit bb) (Machblock.State sf f sp (bb :: c) ms m) t S2 ->
+ forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' ->
+ exists S2' : state, plus step tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+ intros until S2. intros Hbody Hexit ESTEP S1' MS.
+ inv MS. inv AT. monadInv H2. monadInv EQ.
+ rewrite Hbody in EQ0. monadInv EQ0.
+ rewrite Hexit in EQ. monadInv EQ.
+ rewrite Hexit in ESTEP. inv ESTEP. inv H4.
+
+ exploit functions_transl; eauto. intro FN.
+ generalize (transf_function_no_overflow _ _ H1); intro NOOV.
+ exploit builtin_args_match; eauto. intros [vargs' [P Q]].
+ exploit external_call_mem_extends; eauto.
+ intros [vres' [m2' [A [B [C D]]]]].
+ econstructor; split. apply plus_one.
+ simpl in H3.
+ eapply exec_step_builtin. eauto. eauto.
+ eapply find_bblock_tail; eauto.
+ simpl. eauto.
+ erewrite <- sp_val by eauto.
+ eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+ eauto.
+ econstructor; eauto.
+ instantiate (2 := tf); instantiate (1 := x0).
+ unfold nextblock, incrPC. rewrite Pregmap.gss.
+ rewrite set_res_other. rewrite undef_regs_other_2. rewrite Pregmap.gso by congruence.
+ rewrite <- H. simpl. econstructor; eauto.
+ eapply code_tail_next_int; eauto.
+ rewrite preg_notin_charact. intros. auto with asmgen.
+ auto with asmgen.
+ apply agree_nextblock. eapply agree_set_res; auto.
+ eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2; auto.
+ apply Pregmap.gso; auto with asmgen.
+ congruence.
+Qed.
+
+Lemma next_sep:
+ forall rs m rs' m', rs = rs' -> m = m' -> Next rs m = Next rs' m'.
+Proof.
+ congruence.
+Qed.
+
+(* Measure to prove finite stuttering, see the other backends *)
+Definition measure (s: MB.state) : nat :=
+ match s with
+ | MB.State _ _ _ _ _ _ => 0%nat
+ | MB.Callstate _ _ _ _ => 0%nat
+ | MB.Returnstate _ _ _ => 1%nat
+ end.
+
+(* The actual MB.step/AB.step simulation, using the above theorems, plus extra proofs
+ for the internal and external function cases *)
+Theorem step_simulation:
+ forall S1 t S2, MB.step return_address_offset ge S1 t S2 ->
+ forall S1' (MS: match_states S1 S1'),
+ (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
+ \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
+Proof.
+ induction 1; intros.
+
+- (* bblock *)
+ left. destruct (Machblock.exit bb) eqn:MBE; try destruct c0.
+ all: try(inversion H0; subst; inv H2; eapply step_simulation_bblock;
+ try (rewrite MBE; try discriminate); eauto).
+ + (* MBbuiltin *)
+ destruct (MB.body bb) eqn:MBB.
+ * inv H. eapply step_simulation_builtin; eauto. rewrite MBE. eauto.
+ * eapply match_states_split_builtin in MS; eauto.
+ 2: rewrite MBB; discriminate.
+ simpl split in MS.
+ rewrite <- MBB in H.
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb1.
+ assert (MB.body bb = MB.body bb1). { subst. simpl. auto. }
+ rewrite H1 in H. subst.
+ exploit step_simulation_bblock. eapply H.
+ discriminate.
+ simpl. constructor.
+ eauto.
+ intros (S2' & PLUS1 & MS').
+ rewrite MBE in MS'.
+ assert (exit_step return_address_offset ge (Some (MBbuiltin e l b))
+ (MB.State sf f sp ({| MB.header := nil; MB.body := nil; MB.exit := Some (MBbuiltin e l b) |}::c)
+ rs' m') t s').
+ { inv H0. inv H3. econstructor. econstructor; eauto. }
+ exploit step_simulation_builtin.
+ 4: eapply MS'.
+ all: simpl; eauto.
+ intros (S3' & PLUS'' & MS'').
+ exists S3'. split; eauto.
+ eapply plus_trans. eapply PLUS1. eapply PLUS''. eauto.
+ + inversion H0. subst. eapply step_simulation_bblock; try (rewrite MBE; try discriminate); eauto.
+
+- (* internal function *)
+ inv MS.
+ exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
+ generalize EQ; intros EQ'. monadInv EQ'.
+ destruct (zlt Ptrofs.max_unsigned (size_blocks x0.(fn_blocks))); inversion EQ1. clear EQ1. subst x0.
+ unfold Mach.store_stack in *.
+ exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
+ intros [m1' [C D]].
+ exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
+ intros [m2' [F G]].
+ simpl chunk_of_type in F.
+ exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
+ intros [m3' [P Q]].
+ (* Execution of function prologue *)
+ monadInv EQ0.
+ set (tfbody := make_prologue f x0) in *.
+ set (tf := {| fn_sig := MB.fn_sig f; fn_blocks := tfbody |}) in *.
+ set (rs2 := rs0#FP <- (parent_sp s) #SP <- sp #RTMP <- Vundef).
+ exploit (Pget_correct tge GPRA RA nil rs2 m2'); auto.
+ intros (rs' & U' & V').
+ exploit (storeind_ptr_correct tge SP (fn_retaddr_ofs f) GPRA nil rs' m2').
+ { rewrite chunk_of_Tptr in P.
+ assert (rs' GPRA = rs0 RA). { apply V'. }
+ assert (rs' SP = rs2 SP). { apply V'; discriminate. }
+ rewrite H4. rewrite H3.
+ rewrite ATLR.
+ change (rs2 SP) with sp. eexact P. }
+ intros (rs3 & U & V).
+ assert (EXEC_PROLOGUE: exists rs3',
+ exec_straight_blocks tge tf
+ tf.(fn_blocks) rs0 m'
+ x0 rs3' m3'
+ /\ forall r, r <> PC -> rs3' r = rs3 r).
+ { eexists. split.
+ - change (fn_blocks tf) with tfbody; unfold tfbody.
+ econstructor; eauto. unfold exec_bblock. simpl exec_body.
+ rewrite C. fold sp. rewrite <- (sp_val _ _ _ AG). rewrite chunk_of_Tptr in F. simpl in F. rewrite F.
+ Simpl. unfold parexec_store_offset. rewrite Ptrofs.of_int64_to_int64. unfold eval_offset.
+ rewrite chunk_of_Tptr in P. Simpl. rewrite ATLR. unfold Mptr in P. assert (Archi.ptr64 = true) by auto. 2: auto. rewrite H3 in P. rewrite P.
+ simpl. apply next_sep; eauto. reflexivity.
+ - intros. destruct V' as (V'' & V'). destruct r.
+ + Simpl.
+ destruct (gpreg_eq g0 GPR16). { subst. Simpl. rewrite V; try discriminate. rewrite V''. subst rs2. Simpl. }
+ destruct (gpreg_eq g0 GPR32). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. }
+ destruct (gpreg_eq g0 GPR12). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. }
+ destruct (gpreg_eq g0 GPR17). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. }
+ Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. { destruct g0; try discriminate. contradiction. }
+ + Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl.
+ + contradiction.
+ } destruct EXEC_PROLOGUE as (rs3' & EXEC_PROLOGUE & Heqrs3').
+ exploit exec_straight_steps_2; eauto using functions_transl.
+ simpl fn_blocks. simpl fn_blocks in g. lia. constructor.
+ intros (ofs' & X & Y).
+ left; exists (State rs3' m3'); split.
+ eapply exec_straight_steps_1; eauto.
+ simpl fn_blocks. simpl fn_blocks in g. lia.
+ constructor.
+ econstructor; eauto.
+ rewrite X; econstructor; eauto.
+ apply agree_exten with rs2; eauto with asmgen.
+ unfold rs2.
+ apply agree_set_other; auto with asmgen.
+ apply agree_change_sp with (parent_sp s).
+ apply agree_undef_regs with rs0. auto.
+Local Transparent destroyed_at_function_entry.
+ simpl; intros; Simpl.
+ unfold sp; congruence.
+
+ intros.
+ assert (r <> RTMP). { contradict H3; rewrite H3; unfold data_preg; auto. }
+ rewrite Heqrs3'. Simpl. rewrite V. inversion V'. rewrite H6. auto.
+ assert (r <> GPRA). { contradict H3; rewrite H3; unfold data_preg; auto. }
+ assert (forall r : preg, r <> PC -> r <> GPRA -> rs' r = rs2 r). { apply V'. }
+ contradict H3; rewrite H3; unfold data_preg; auto.
+ contradict H3; rewrite H3; unfold data_preg; auto.
+ contradict H3; rewrite H3; unfold data_preg; auto.
+ contradict H3; rewrite H3; unfold data_preg; auto.
+ intros. rewrite Heqrs3'. rewrite V by auto with asmgen.
+ assert (forall r : preg, r <> PC -> r <> GPRA -> rs' r = rs2 r). { apply V'. }
+ rewrite H4 by auto with asmgen. reflexivity. discriminate.
+
+- (* external function *)
+ inv MS.
+ exploit functions_translated; eauto.
+ intros [tf [A B]]. simpl in B. inv B.
+ exploit extcall_arguments_match; eauto.
+ intros [args' [C D]].
+ exploit external_call_mem_extends; eauto.
+ intros [res' [m2' [P [Q [R S]]]]].
+ left; econstructor; split.
+ apply plus_one. eapply exec_step_external; eauto.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+ econstructor; eauto.
+ unfold loc_external_result.
+ apply agree_set_other; auto.
+ apply agree_set_pair; auto.
+ apply agree_undef_caller_save_regs; auto.
+
+- (* return *)
+ inv MS.
+ inv STACKS. simpl in *.
+ right. split. lia. split. auto.
+ rewrite <- ATPC in H5.
+ econstructor; eauto. congruence.
+Qed.
+
+Lemma transf_initial_states:
+ forall st1, MB.initial_state prog st1 ->
+ exists st2, AB.initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+ intros. inversion H. unfold ge0 in *.
+ econstructor; split.
+ econstructor.
+ eapply (Genv.init_mem_transf_partial TRANSF); eauto.
+ replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero)
+ with (Vptr fb Ptrofs.zero).
+ econstructor; eauto.
+ constructor.
+ apply Mem.extends_refl.
+ split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
+ intros. rewrite Mach.Regmap.gi. auto.
+ unfold Genv.symbol_address.
+ rewrite (match_program_main TRANSF).
+ rewrite symbols_preserved.
+ unfold ge; rewrite H1. auto.
+Qed.
+
+Lemma transf_final_states:
+ forall st1 st2 r,
+ match_states st1 st2 -> MB.final_state st1 r -> AB.final_state st2 r.
+Proof.
+ intros. inv H0. inv H. constructor. assumption.
+ compute in H1. inv H1.
+ generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
+Qed.
+
+Definition return_address_offset : Machblock.function -> Machblock.code -> ptrofs -> Prop :=
+ Asmblockgenproof0.return_address_offset.
+
+Theorem transf_program_correct:
+ forward_simulation (MB.semantics return_address_offset prog) (Asmblock.semantics tprog).
+Proof.
+ eapply forward_simulation_star with (measure := measure).
+ - apply senv_preserved.
+ - eexact transf_initial_states.
+ - eexact transf_final_states.
+ - exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/kvx/Asmblockgenproof0.v b/kvx/Asmblockgenproof0.v
new file mode 100644
index 00000000..83b574e7
--- /dev/null
+++ b/kvx/Asmblockgenproof0.v
@@ -0,0 +1,982 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * "block" version of Asmgenproof0
+
+ This module is largely adapted from Asmgenproof0.v of the other backends
+ It needs to stand apart because of the block structure, and the distinction control/basic that there isn't in the other backends
+ It has similar definitions than Asmgenproof0, but adapted to this new structure *)
+
+Require Import Coqlib.
+Require Intv.
+Require Import AST.
+Require Import Errors.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Events.
+Require Import Smallstep.
+Require Import Locations.
+Require Import Machblock.
+Require Import Asmblock.
+Require Import Asmblockgen.
+Require Import Conventions1.
+Require Import Axioms.
+Require Import Asmblockprops.
+Require Import Lia.
+
+Module MB:=Machblock.
+Module AB:=Asmblock.
+
+Lemma ireg_of_eq:
+ forall r r', ireg_of r = OK r' -> preg_of r = IR r'.
+Proof.
+ unfold ireg_of; intros. destruct (preg_of r); inv H; auto.
+Qed.
+
+Lemma freg_of_eq:
+ forall r r', freg_of r = OK r' -> preg_of r = IR r'.
+Proof.
+ unfold freg_of; intros. destruct (preg_of r); inv H; auto.
+Qed.
+
+Lemma preg_of_injective:
+ forall r1 r2, preg_of r1 = preg_of r2 -> r1 = r2.
+Proof.
+ destruct r1; destruct r2; simpl; intros; reflexivity || discriminate.
+Qed.
+
+Lemma undef_regs_other:
+ forall r rl rs,
+ (forall r', In r' rl -> r <> r') ->
+ undef_regs rl rs r = rs r.
+Proof.
+ induction rl; simpl; intros. auto.
+ rewrite IHrl by auto. rewrite Pregmap.gso; auto.
+Qed.
+
+Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop :=
+ match rl with
+ | nil => True
+ | r1 :: nil => r <> preg_of r1
+ | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl
+ end.
+
+Remark preg_notin_charact:
+ forall r rl,
+ preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr).
+Proof.
+ induction rl; simpl; intros.
+ tauto.
+ destruct rl.
+ simpl. split. intros. intuition congruence. auto.
+ rewrite IHrl. split.
+ intros [A B]. intros. destruct H. congruence. auto.
+ auto.
+Qed.
+
+Lemma undef_regs_other_2:
+ forall r rl rs,
+ preg_notin r rl ->
+ undef_regs (map preg_of rl) rs r = rs r.
+Proof.
+ intros. apply undef_regs_other. intros.
+ exploit list_in_map_inv; eauto. intros [mr [A B]]. subst.
+ rewrite preg_notin_charact in H. auto.
+Qed.
+
+(** * Agreement between Mach registers and processor registers *)
+
+Record agree (ms: Mach.regset) (sp: val) (rs: AB.regset) : Prop := mkagree {
+ agree_sp: rs#SP = sp;
+ agree_sp_def: sp <> Vundef;
+ agree_mregs: forall r: mreg, Val.lessdef (ms r) (rs#(preg_of r))
+}.
+
+Lemma preg_val:
+ forall ms sp rs r, agree ms sp rs -> Val.lessdef (ms r) rs#(preg_of r).
+Proof.
+ intros. destruct H. auto.
+Qed.
+
+Lemma preg_vals:
+ forall ms sp rs, agree ms sp rs ->
+ forall l, Val.lessdef_list (map ms l) (map rs (map preg_of l)).
+Proof.
+ induction l; simpl. constructor. constructor. eapply preg_val; eauto. auto.
+Qed.
+
+Lemma sp_val:
+ forall ms sp rs, agree ms sp rs -> sp = rs#SP.
+Proof.
+ intros. destruct H; auto.
+Qed.
+
+Lemma ireg_val:
+ forall ms sp rs r r',
+ agree ms sp rs ->
+ ireg_of r = OK r' ->
+ Val.lessdef (ms r) rs#r'.
+Proof.
+ intros. rewrite <- (ireg_of_eq _ _ H0). eapply preg_val; eauto.
+Qed.
+
+Lemma freg_val:
+ forall ms sp rs r r',
+ agree ms sp rs ->
+ freg_of r = OK r' ->
+ Val.lessdef (ms r) (rs#r').
+Proof.
+ intros. rewrite <- (freg_of_eq _ _ H0). eapply preg_val; eauto.
+Qed.
+
+Lemma agree_exten:
+ forall ms sp rs rs',
+ agree ms sp rs ->
+ (forall r, data_preg r = true -> rs'#r = rs#r) ->
+ agree ms sp rs'.
+Proof.
+ intros. destruct H. split; auto.
+ rewrite H0; auto. auto.
+ intros. rewrite H0; auto. apply preg_of_data.
+Qed.
+
+(** Preservation of register agreement under various assignments. *)
+
+Lemma agree_set_mreg:
+ forall ms sp rs r v rs',
+ agree ms sp rs ->
+ Val.lessdef v (rs'#(preg_of r)) ->
+ (forall r', data_preg r' = true -> r' <> preg_of r -> rs'#r' = rs#r') ->
+ agree (Mach.Regmap.set r v ms) sp rs'.
+Proof.
+ intros. destruct H. split; auto.
+ rewrite H1; auto. apply not_eq_sym. apply preg_of_not_SP.
+ intros. unfold Mach.Regmap.set. destruct (Mach.RegEq.eq r0 r). congruence.
+ rewrite H1. auto. apply preg_of_data.
+ red; intros; elim n. eapply preg_of_injective; eauto.
+Qed.
+
+Corollary agree_set_mreg_parallel:
+ forall ms sp rs r v v',
+ agree ms sp rs ->
+ Val.lessdef v v' ->
+ agree (Mach.Regmap.set r v ms) sp (Pregmap.set (preg_of r) v' rs).
+Proof.
+ intros. eapply agree_set_mreg; eauto. rewrite Pregmap.gss; auto. intros; apply Pregmap.gso; auto.
+Qed.
+
+Lemma agree_set_other:
+ forall ms sp rs r v,
+ agree ms sp rs ->
+ data_preg r = false ->
+ agree ms sp (rs#r <- v).
+Proof.
+ intros. apply agree_exten with rs. auto.
+ intros. apply Pregmap.gso. congruence.
+Qed.
+
+Lemma agree_nextblock:
+ forall ms sp rs b,
+ agree ms sp rs -> agree ms sp (nextblock b rs).
+Proof.
+ intros. unfold nextblock. apply agree_set_other. auto. auto.
+Qed.
+
+Lemma agree_set_pair:
+ forall sp p v v' ms rs,
+ agree ms sp rs ->
+ Val.lessdef v v' ->
+ agree (Mach.set_pair p v ms) sp (set_pair (map_rpair preg_of p) v' rs).
+Proof.
+ intros. destruct p; simpl.
+- apply agree_set_mreg_parallel; auto.
+- apply agree_set_mreg_parallel. apply agree_set_mreg_parallel; auto.
+ apply Val.hiword_lessdef; auto. apply Val.loword_lessdef; auto.
+Qed.
+
+Lemma agree_undef_nondata_regs:
+ forall ms sp rl rs,
+ agree ms sp rs ->
+ (forall r, In r rl -> data_preg r = false) ->
+ agree ms sp (undef_regs rl rs).
+Proof.
+ induction rl; simpl; intros. auto.
+ apply IHrl. apply agree_exten with rs; auto.
+ intros. apply Pregmap.gso. red; intros; subst.
+ assert (data_preg a = false) by auto. congruence.
+ intros. apply H0; auto.
+Qed.
+
+Lemma agree_undef_regs:
+ forall ms sp rl rs rs',
+ agree ms sp rs ->
+ (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') ->
+ agree (Mach.undef_regs rl ms) sp rs'.
+Proof.
+ intros. destruct H. split; auto.
+ rewrite <- agree_sp0. apply H0; auto.
+ rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP.
+ intros. destruct (In_dec mreg_eq r rl).
+ rewrite Mach.undef_regs_same; auto.
+ rewrite Mach.undef_regs_other; auto. rewrite H0; auto.
+ apply preg_of_data.
+ rewrite preg_notin_charact. intros; red; intros. elim n.
+ exploit preg_of_injective; eauto. congruence.
+Qed.
+
+Lemma agree_set_undef_mreg:
+ forall ms sp rs r v rl rs',
+ agree ms sp rs ->
+ Val.lessdef v (rs'#(preg_of r)) ->
+ (forall r', data_preg r' = true -> r' <> preg_of r -> preg_notin r' rl -> rs'#r' = rs#r') ->
+ agree (Mach.Regmap.set r v (Mach.undef_regs rl ms)) sp rs'.
+Proof.
+ intros. apply agree_set_mreg with (rs'#(preg_of r) <- (rs#(preg_of r))); auto.
+ apply agree_undef_regs with rs; auto.
+ intros. unfold Pregmap.set. destruct (PregEq.eq r' (preg_of r)).
+ congruence. auto.
+ intros. rewrite Pregmap.gso; auto.
+Qed.
+
+Lemma agree_undef_caller_save_regs:
+ forall ms sp rs,
+ agree ms sp rs ->
+ agree (Mach.undef_caller_save_regs ms) sp (undef_caller_save_regs rs).
+Proof.
+ intros. destruct H. unfold Mach.undef_caller_save_regs, undef_caller_save_regs; split.
+- unfold proj_sumbool; rewrite dec_eq_true. auto.
+- auto.
+- intros. unfold proj_sumbool. rewrite dec_eq_false by (apply preg_of_not_SP).
+ destruct (List.in_dec preg_eq (preg_of r) (List.map preg_of (List.filter is_callee_save all_mregs))); simpl.
++ apply list_in_map_inv in i. destruct i as (mr & A & B).
+ assert (r = mr) by (apply preg_of_injective; auto). subst mr; clear A.
+ apply List.filter_In in B. destruct B as [C D]. rewrite D. auto.
++ destruct (is_callee_save r) eqn:CS; auto.
+ elim n. apply List.in_map. apply List.filter_In. auto using all_mregs_complete.
+Qed.
+
+Lemma agree_change_sp:
+ forall ms sp rs sp',
+ agree ms sp rs -> sp' <> Vundef ->
+ agree ms sp' (rs#SP <- sp').
+Proof.
+ intros. inv H. split; auto.
+ intros. rewrite Pregmap.gso; auto with asmgen.
+Qed.
+
+(** Connection between Mach and Asm calling conventions for external
+ functions. *)
+
+Lemma extcall_arg_match:
+ forall ms sp rs m m' l v,
+ agree ms sp rs ->
+ Mem.extends m m' ->
+ Mach.extcall_arg ms m sp l v ->
+ exists v', AB.extcall_arg rs m' l v' /\ Val.lessdef v v'.
+Proof.
+ intros. inv H1.
+ exists (rs#(preg_of r)); split. constructor. eapply preg_val; eauto.
+ unfold Mach.load_stack in H2.
+ exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+ rewrite (sp_val _ _ _ H) in A.
+ exists v'; split; auto.
+ econstructor. eauto. assumption.
+Qed.
+
+Lemma extcall_arg_pair_match:
+ forall ms sp rs m m' p v,
+ agree ms sp rs ->
+ Mem.extends m m' ->
+ Mach.extcall_arg_pair ms m sp p v ->
+ exists v', AB.extcall_arg_pair rs m' p v' /\ Val.lessdef v v'.
+Proof.
+ intros. inv H1.
+- exploit extcall_arg_match; eauto. intros (v' & A & B). exists v'; split; auto. constructor; auto.
+- exploit extcall_arg_match. eauto. eauto. eexact H2. intros (v1 & A1 & B1).
+ exploit extcall_arg_match. eauto. eauto. eexact H3. intros (v2 & A2 & B2).
+ exists (Val.longofwords v1 v2); split. constructor; auto. apply Val.longofwords_lessdef; auto.
+Qed.
+
+
+Lemma extcall_args_match:
+ forall ms sp rs m m', agree ms sp rs -> Mem.extends m m' ->
+ forall ll vl,
+ list_forall2 (Mach.extcall_arg_pair ms m sp) ll vl ->
+ exists vl', list_forall2 (AB.extcall_arg_pair rs m') ll vl' /\ Val.lessdef_list vl vl'.
+Proof.
+ induction 3; intros.
+ exists (@nil val); split. constructor. constructor.
+ exploit extcall_arg_pair_match; eauto. intros [v1' [A B]].
+ destruct IHlist_forall2 as [vl' [C D]].
+ exists (v1' :: vl'); split; constructor; auto.
+Qed.
+
+Lemma extcall_arguments_match:
+ forall ms m m' sp rs sg args,
+ agree ms sp rs -> Mem.extends m m' ->
+ Mach.extcall_arguments ms m sp sg args ->
+ exists args', AB.extcall_arguments rs m' sg args' /\ Val.lessdef_list args args'.
+Proof.
+ unfold Mach.extcall_arguments, AB.extcall_arguments; intros.
+ eapply extcall_args_match; eauto.
+Qed.
+
+Remark builtin_arg_match:
+ forall ge (rs: regset) sp m a v,
+ eval_builtin_arg ge (fun r => rs (preg_of r)) sp m a v ->
+ eval_builtin_arg ge rs sp m (map_builtin_arg preg_of a) v.
+Proof.
+ induction 1; simpl; eauto with barg.
+Qed.
+
+Lemma builtin_args_match:
+ forall ge ms sp rs m m', agree ms sp rs -> Mem.extends m m' ->
+ forall al vl, eval_builtin_args ge ms sp m al vl ->
+ exists vl', eval_builtin_args ge rs sp m' (map (map_builtin_arg preg_of) al) vl'
+ /\ Val.lessdef_list vl vl'.
+Proof.
+ induction 3; intros; simpl.
+ exists (@nil val); split; constructor.
+ exploit (@eval_builtin_arg_lessdef _ ge ms (fun r => rs (preg_of r))); eauto.
+ intros; eapply preg_val; eauto.
+ intros (v1' & A & B).
+ destruct IHlist_forall2 as [vl' [C D]].
+ exists (v1' :: vl'); split; constructor; auto. apply builtin_arg_match; auto.
+Qed.
+
+Lemma agree_set_res:
+ forall res ms sp rs v v',
+ agree ms sp rs ->
+ Val.lessdef v v' ->
+ agree (Mach.set_res res v ms) sp (AB.set_res (map_builtin_res preg_of res) v' rs).
+Proof.
+ induction res; simpl; intros.
+- eapply agree_set_mreg; eauto. rewrite Pregmap.gss. auto.
+ intros. apply Pregmap.gso; auto.
+- auto.
+- apply IHres2. apply IHres1. auto.
+ apply Val.hiword_lessdef; auto.
+ apply Val.loword_lessdef; auto.
+Qed.
+
+Lemma set_res_other:
+ forall r res v rs,
+ data_preg r = false ->
+ set_res (map_builtin_res preg_of res) v rs r = rs r.
+Proof.
+ induction res; simpl; intros.
+- apply Pregmap.gso. red; intros; subst r. rewrite preg_of_data in H; discriminate.
+- auto.
+- rewrite IHres2, IHres1; auto.
+Qed.
+
+(* inspired from Mach *)
+
+Lemma find_label_tail:
+ forall lbl c c', MB.find_label lbl c = Some c' -> is_tail c' c.
+Proof.
+ induction c; simpl; intros. discriminate.
+ destruct (MB.is_label lbl a). inv H. auto with coqlib. eauto with coqlib.
+Qed.
+
+(* inspired from Asmgenproof0 *)
+
+(* ... skip ... *)
+
+(** The ``code tail'' of an instruction list [c] is the list of instructions
+ starting at PC [pos]. *)
+
+Inductive code_tail: Z -> bblocks -> bblocks -> Prop :=
+ | code_tail_0: forall c,
+ code_tail 0 c c
+ | code_tail_S: forall pos bi c1 c2,
+ code_tail pos c1 c2 ->
+ code_tail (pos + (size bi)) (bi :: c1) c2.
+
+Lemma code_tail_pos:
+ forall pos c1 c2, code_tail pos c1 c2 -> pos >= 0.
+Proof.
+ induction 1. lia. generalize (size_positive bi); intros; lia.
+Qed.
+
+Lemma find_bblock_tail:
+ forall c1 bi c2 pos,
+ code_tail pos c1 (bi :: c2) ->
+ find_bblock pos c1 = Some bi.
+Proof.
+ induction c1; simpl; intros.
+ inversion H.
+ destruct (zlt pos 0). generalize (code_tail_pos _ _ _ H); intro; lia.
+ destruct (zeq pos 0). subst pos.
+ inv H. auto. generalize (size_positive a) (code_tail_pos _ _ _ H4). intro; lia.
+ inv H. congruence. replace (pos0 + size a - size a) with pos0 by lia.
+ eauto.
+Qed.
+
+
+Local Hint Resolve code_tail_0 code_tail_S: core.
+
+Lemma code_tail_next:
+ forall fn ofs c0,
+ code_tail ofs fn c0 ->
+ forall bi c1, c0 = bi :: c1 -> code_tail (ofs + (size bi)) fn c1.
+Proof.
+ induction 1; intros.
+ - subst; eauto.
+ - replace (pos + size bi + size bi0) with ((pos + size bi0) + size bi); eauto.
+ lia.
+Qed.
+
+Lemma size_blocks_pos c: 0 <= size_blocks c.
+Proof.
+ induction c as [| a l ]; simpl; try lia.
+ generalize (size_positive a); lia.
+Qed.
+
+Remark code_tail_positive:
+ forall fn ofs c,
+ code_tail ofs fn c -> 0 <= ofs.
+Proof.
+ induction 1; intros; simpl.
+ - lia.
+ - generalize (size_positive bi). lia.
+Qed.
+
+Remark code_tail_size:
+ forall fn ofs c,
+ code_tail ofs fn c -> size_blocks fn = ofs + size_blocks c.
+Proof.
+ induction 1; intros; simpl; try lia.
+Qed.
+
+Remark code_tail_bounds fn ofs c:
+ code_tail ofs fn c -> 0 <= ofs <= size_blocks fn.
+Proof.
+ intro H;
+ exploit code_tail_size; eauto.
+ generalize (code_tail_positive _ _ _ H), (size_blocks_pos c).
+ lia.
+Qed.
+
+Local Hint Resolve code_tail_next: core.
+
+Lemma code_tail_next_int:
+ forall fn ofs bi c,
+ size_blocks fn <= Ptrofs.max_unsigned ->
+ code_tail (Ptrofs.unsigned ofs) fn (bi :: c) ->
+ code_tail (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bi)))) fn c.
+Proof.
+ intros.
+ exploit code_tail_size; eauto.
+ simpl; generalize (code_tail_positive _ _ _ H0), (size_positive bi), (size_blocks_pos c).
+ intros.
+ rewrite Ptrofs.add_unsigned, Ptrofs.unsigned_repr.
+ - rewrite Ptrofs.unsigned_repr; eauto.
+ lia.
+ - rewrite Ptrofs.unsigned_repr; lia.
+Qed.
+
+(** Predictor for return addresses in generated Asm code.
+
+ The [return_address_offset] predicate defined here is used in the
+ semantics for Mach to determine the return addresses that are
+ stored in activation records. *)
+
+(** Consider a Mach function [f] and a sequence [c] of Mach instructions
+ representing the Mach code that remains to be executed after a
+ function call returns. The predicate [return_address_offset f c ofs]
+ holds if [ofs] is the integer offset of the PPC instruction
+ following the call in the Asm code obtained by translating the
+ code of [f]. Graphically:
+<<
+ Mach function f |--------- Mcall ---------|
+ Mach code c | |--------|
+ | \ \
+ | \ \
+ | \ \
+ Asm code | |--------|
+ Asm function |------------- Pcall ---------|
+
+ <-------- ofs ------->
+>>
+*)
+
+Definition return_address_offset (f: MB.function) (c: MB.code) (ofs: ptrofs) : Prop :=
+ forall tf tc,
+ transf_function f = OK tf ->
+ transl_blocks f c false = OK tc ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc.
+
+Lemma transl_blocks_tail:
+ forall f c1 c2, is_tail c1 c2 ->
+ forall tc2 ep2, transl_blocks f c2 ep2 = OK tc2 ->
+ exists tc1, exists ep1, transl_blocks f c1 ep1 = OK tc1 /\ is_tail tc1 tc2.
+Proof.
+ induction 1; simpl; intros.
+ exists tc2; exists ep2; split; auto with coqlib.
+ monadInv H0. exploit IHis_tail; eauto. intros (tc1 & ep1 & A & B).
+ exists tc1; exists ep1; split. auto.
+ eapply is_tail_trans with x0; eauto with coqlib.
+Qed.
+
+Lemma is_tail_code_tail:
+ forall c1 c2, is_tail c1 c2 -> exists ofs, code_tail ofs c2 c1.
+Proof.
+ induction 1; eauto.
+ destruct IHis_tail; eauto.
+Qed.
+
+Section RETADDR_EXISTS.
+
+Hypothesis transf_function_inv:
+ forall f tf, transf_function f = OK tf ->
+ exists tc ep, transl_blocks f (Machblock.fn_code f) ep = OK tc /\ is_tail tc (fn_blocks tf).
+
+Hypothesis transf_function_len:
+ forall f tf, transf_function f = OK tf -> size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned.
+
+
+Lemma return_address_exists:
+ forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
+ exists ra, return_address_offset f c ra.
+Proof.
+ intros. destruct (transf_function f) as [tf|] eqn:TF.
+ + exploit transf_function_inv; eauto. intros (tc1 & ep1 & TR1 & TL1).
+ exploit transl_blocks_tail; eauto. intros (tc2 & ep2 & TR2 & TL2).
+ monadInv TR2.
+ assert (TL3: is_tail x0 (fn_blocks tf)).
+ { apply is_tail_trans with tc1; auto.
+ apply is_tail_trans with (x++x0); auto. eapply is_tail_app.
+ }
+ exploit is_tail_code_tail. eexact TL3. intros [ofs CT].
+ exists (Ptrofs.repr ofs). red; intros.
+ rewrite Ptrofs.unsigned_repr. congruence.
+ exploit code_tail_bounds; eauto.
+ intros; apply transf_function_len in TF. lia.
+ + exists Ptrofs.zero; red; intros. congruence.
+Qed.
+
+End RETADDR_EXISTS.
+
+(** [transl_code_at_pc pc fb f c ep tf tc] holds if the code pointer [pc] points
+ within the Asmblock code generated by translating Machblock function [f],
+ and [tc] is the tail of the generated code at the position corresponding
+ to the code pointer [pc]. *)
+
+Inductive transl_code_at_pc (ge: MB.genv):
+ val -> block -> MB.function -> MB.code -> bool -> AB.function -> AB.bblocks -> Prop :=
+ transl_code_at_pc_intro:
+ forall b ofs f c ep tf tc,
+ Genv.find_funct_ptr ge b = Some(Internal f) ->
+ transf_function f = Errors.OK tf ->
+ transl_blocks f c ep = OK tc ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc ->
+ transl_code_at_pc ge (Vptr b ofs) b f c ep tf tc.
+
+Remark code_tail_no_bigger:
+ forall pos c1 c2, code_tail pos c1 c2 -> (length c2 <= length c1)%nat.
+Proof.
+ induction 1; simpl; lia.
+Qed.
+
+Remark code_tail_unique:
+ forall fn c pos pos',
+ code_tail pos fn c -> code_tail pos' fn c -> pos = pos'.
+Proof.
+ induction fn; intros until pos'; intros ITA CT; inv ITA; inv CT; auto.
+ generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia.
+ generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia.
+ f_equal. eauto.
+Qed.
+
+Lemma return_address_offset_correct:
+ forall ge b ofs fb f c tf tc ofs',
+ transl_code_at_pc ge (Vptr b ofs) fb f c false tf tc ->
+ return_address_offset f c ofs' ->
+ ofs' = ofs.
+Proof.
+ intros. inv H. red in H0.
+ exploit code_tail_unique. eexact H12. eapply H0; eauto. intro.
+ rewrite <- (Ptrofs.repr_unsigned ofs).
+ rewrite <- (Ptrofs.repr_unsigned ofs').
+ congruence.
+Qed.
+
+(** The [find_label] function returns the code tail starting at the
+ given label. A connection with [code_tail] is then established. *)
+
+Fixpoint find_label (lbl: label) (c: bblocks) {struct c} : option bblocks :=
+ match c with
+ | nil => None
+ | bb1 :: bbl => if is_label lbl bb1 then Some c else find_label lbl bbl
+ end.
+
+Lemma label_pos_code_tail:
+ forall lbl c pos c',
+ find_label lbl c = Some c' ->
+ exists pos',
+ label_pos lbl pos c = Some pos'
+ /\ code_tail (pos' - pos) c c'
+ /\ pos <= pos' <= pos + size_blocks c.
+Proof.
+ induction c.
+ simpl; intros. discriminate.
+ simpl; intros until c'.
+ case (is_label lbl a).
+ - intros. inv H. exists pos. split; auto. split.
+ replace (pos - pos) with 0 by lia. constructor. constructor; try lia.
+ generalize (size_blocks_pos c). generalize (size_positive a). lia.
+ - intros. generalize (IHc (pos+size a) c' H). intros [pos' [A [B C]]].
+ exists pos'. split. auto. split.
+ replace (pos' - pos) with ((pos' - (pos + (size a))) + (size a)) by lia.
+ constructor. auto. generalize (size_positive a). lia.
+Qed.
+
+(** Helper lemmas to reason about
+- the "code is tail of" property
+- correct translation of labels. *)
+
+Definition tail_nolabel (k c: bblocks) : Prop :=
+ is_tail k c /\ forall lbl, find_label lbl c = find_label lbl k.
+
+Lemma tail_nolabel_refl:
+ forall c, tail_nolabel c c.
+Proof.
+ intros; split. apply is_tail_refl. auto.
+Qed.
+
+Lemma tail_nolabel_trans:
+ forall c1 c2 c3, tail_nolabel c2 c3 -> tail_nolabel c1 c2 -> tail_nolabel c1 c3.
+Proof.
+ intros. destruct H; destruct H0; split.
+ eapply is_tail_trans; eauto.
+ intros. rewrite H1; auto.
+Qed.
+
+Definition nolabel (b: bblock) :=
+ match (header b) with nil => True | _ => False end.
+
+Hint Extern 1 (nolabel _) => exact I : labels.
+
+Lemma tail_nolabel_cons:
+ forall b c k,
+ nolabel b -> tail_nolabel k c -> tail_nolabel k (b :: c).
+Proof.
+ intros. destruct H0. split.
+ constructor; auto.
+ intros. simpl. rewrite <- H1. destruct b as [hd bdy ex]; simpl in *.
+ destruct hd as [|l hd]; simpl in *.
+ - assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false).
+ { apply is_label_correct_false. simpl header. apply in_nil. }
+ rewrite H2. auto.
+ - contradiction.
+Qed.
+
+Hint Resolve tail_nolabel_refl: labels.
+
+Ltac TailNoLabel :=
+ eauto with labels;
+ match goal with
+ | [ |- tail_nolabel _ (_ :: _) ] => apply tail_nolabel_cons; [auto; exact I | TailNoLabel]
+ | [ H: Error _ = OK _ |- _ ] => discriminate
+ | [ H: assertion_failed = OK _ |- _ ] => discriminate
+ | [ H: OK _ = OK _ |- _ ] => inv H; TailNoLabel
+ | [ H: bind _ _ = OK _ |- _ ] => monadInv H; TailNoLabel
+ | [ H: (if ?x then _ else _) = OK _ |- _ ] => destruct x; TailNoLabel
+ | [ H: match ?x with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct x; TailNoLabel
+ | _ => idtac
+ end.
+
+Remark tail_nolabel_find_label:
+ forall lbl k c, tail_nolabel k c -> find_label lbl c = find_label lbl k.
+Proof.
+ intros. destruct H. auto.
+Qed.
+
+Remark tail_nolabel_is_tail:
+ forall k c, tail_nolabel k c -> is_tail k c.
+Proof.
+ intros. destruct H. auto.
+Qed.
+
+Lemma exec_body_pc:
+ forall ge l rs1 m1 rs2 m2,
+ exec_body ge l rs1 m1 = Next rs2 m2 ->
+ rs2 PC = rs1 PC.
+Proof.
+ induction l.
+ - intros. inv H. auto.
+ - intros until m2. intro EXEB.
+ inv EXEB. destruct (exec_basic_instr _ _ _ _) eqn:EBI; try discriminate.
+ eapply IHl in H0. rewrite H0.
+ erewrite exec_basic_instr_pc; eauto.
+Qed.
+
+Section STRAIGHTLINE.
+
+Variable ge: genv.
+Variable fn: function.
+
+(** Straight-line code is composed of processor instructions that execute
+ in sequence (no branches, no function calls and returns).
+ The following inductive predicate relates the machine states
+ before and after executing a straight-line sequence of instructions.
+ Instructions are taken from the first list instead of being fetched
+ from memory. *)
+
+Inductive exec_straight: list instruction -> regset -> mem ->
+ list instruction -> regset -> mem -> Prop :=
+ | exec_straight_one:
+ forall i1 c rs1 m1 rs2 m2,
+ exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 ->
+ exec_straight ((PBasic i1) ::g c) rs1 m1 c rs2 m2
+ | exec_straight_step:
+ forall i c rs1 m1 rs2 m2 c' rs3 m3,
+ exec_basic_instr ge i rs1 m1 = Next rs2 m2 ->
+ exec_straight c rs2 m2 c' rs3 m3 ->
+ exec_straight ((PBasic i) :: c) rs1 m1 c' rs3 m3.
+
+Inductive exec_control_rel: option control -> bblock -> regset -> mem ->
+ regset -> mem -> Prop :=
+ | exec_control_rel_intro:
+ forall rs1 m1 b rs1' ctl rs2 m2,
+ rs1' = nextblock b rs1 ->
+ exec_control ge fn ctl rs1' m1 = Next rs2 m2 ->
+ exec_control_rel ctl b rs1 m1 rs2 m2.
+
+Inductive exec_bblock_rel: bblock -> regset -> mem -> regset -> mem -> Prop :=
+ | exec_bblock_rel_intro:
+ forall rs1 m1 b rs2 m2,
+ exec_bblock ge fn b rs1 m1 = Next rs2 m2 ->
+ exec_bblock_rel b rs1 m1 rs2 m2.
+
+Lemma exec_straight_body:
+ forall c l rs1 m1 rs2 m2,
+ exec_straight c rs1 m1 nil rs2 m2 ->
+ code_to_basics c = Some l ->
+ exec_body ge l rs1 m1 = Next rs2 m2.
+Proof.
+ induction c as [|i c].
+ - intros until m2. intros EXES CTB. inv EXES.
+ - intros until m2. intros EXES CTB. inv EXES.
+ + inv CTB. simpl. rewrite H6. auto.
+ + inv CTB. destruct (code_to_basics c); try discriminate. inv H0. eapply IHc in H7; eauto.
+ rewrite <- H7. simpl. rewrite H1. auto.
+Qed.
+
+Lemma exec_straight_body2:
+ forall c rs1 m1 c' rs2 m2,
+ exec_straight c rs1 m1 c' rs2 m2 ->
+ exists body,
+ exec_body ge body rs1 m1 = Next rs2 m2
+ /\ (basics_to_code body) ++g c' = c.
+Proof.
+ intros until m2. induction 1.
+ - exists (i1::nil). split; auto. simpl. rewrite H. auto.
+ - destruct IHexec_straight as (bdy & EXEB & BTC).
+ exists (i:: bdy). split; simpl.
+ + rewrite H. auto.
+ + congruence.
+Qed.
+
+Lemma exec_straight_trans:
+ forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3,
+ exec_straight c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight c2 rs2 m2 c3 rs3 m3 ->
+ exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+ induction 1; intros.
+ apply exec_straight_step with rs2 m2; auto.
+ apply exec_straight_step with rs2 m2; auto.
+Qed.
+
+Lemma exec_straight_two:
+ forall i1 i2 c rs1 m1 rs2 m2 rs3 m3,
+ exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 ->
+ exec_basic_instr ge i2 rs2 m2 = Next rs3 m3 ->
+ exec_straight (i1 ::g i2 ::g c) rs1 m1 c rs3 m3.
+Proof.
+ intros. apply exec_straight_step with rs2 m2; auto.
+ apply exec_straight_one; auto.
+Qed.
+
+Lemma exec_straight_three:
+ forall i1 i2 i3 c rs1 m1 rs2 m2 rs3 m3 rs4 m4,
+ exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 ->
+ exec_basic_instr ge i2 rs2 m2 = Next rs3 m3 ->
+ exec_basic_instr ge i3 rs3 m3 = Next rs4 m4 ->
+ exec_straight (i1 ::g i2 ::g i3 ::g c) rs1 m1 c rs4 m4.
+Proof.
+ intros. apply exec_straight_step with rs2 m2; auto.
+ eapply exec_straight_two; eauto.
+Qed.
+
+(** Like exec_straight predicate, but on blocks *)
+
+Inductive exec_straight_blocks: bblocks -> regset -> mem ->
+ bblocks -> regset -> mem -> Prop :=
+ | exec_straight_blocks_one:
+ forall b1 c rs1 m1 rs2 m2,
+ exec_bblock ge fn b1 rs1 m1 = Next rs2 m2 ->
+ rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b1)) ->
+ exec_straight_blocks (b1 :: c) rs1 m1 c rs2 m2
+ | exec_straight_blocks_step:
+ forall b c rs1 m1 rs2 m2 c' rs3 m3,
+ exec_bblock ge fn b rs1 m1 = Next rs2 m2 ->
+ rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b)) ->
+ exec_straight_blocks c rs2 m2 c' rs3 m3 ->
+ exec_straight_blocks (b :: c) rs1 m1 c' rs3 m3.
+
+Lemma exec_straight_blocks_trans:
+ forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3,
+ exec_straight_blocks c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight_blocks c2 rs2 m2 c3 rs3 m3 ->
+ exec_straight_blocks c1 rs1 m1 c3 rs3 m3.
+Proof.
+ induction 1; intros.
+ apply exec_straight_blocks_step with rs2 m2; auto.
+ apply exec_straight_blocks_step with rs2 m2; auto.
+Qed.
+
+(** Linking exec_straight with exec_straight_blocks *)
+
+Lemma exec_straight_pc:
+ forall c c' rs1 m1 rs2 m2,
+ exec_straight c rs1 m1 c' rs2 m2 ->
+ rs2 PC = rs1 PC.
+Proof.
+ induction c; intros; try (inv H; fail).
+ inv H.
+ - eapply exec_basic_instr_pc; eauto.
+ - rewrite (IHc c' rs3 m3 rs2 m2); auto.
+ erewrite exec_basic_instr_pc; eauto.
+Qed.
+
+Lemma regset_same_assign (rs: regset) r:
+ rs # r <- (rs r) = rs.
+Proof.
+ apply functional_extensionality. intros x. destruct (preg_eq x r); subst; Simpl.
+Qed.
+
+Lemma exec_straight_through_singleinst:
+ forall a b rs1 m1 rs2 m2 rs2' m2' lb,
+ bblock_single_inst (PBasic a) = b ->
+ exec_straight (a ::g nil) rs1 m1 nil rs2 m2 ->
+ nextblock b rs2 = rs2' -> m2 = m2' ->
+ exec_straight_blocks (b::lb) rs1 m1 lb rs2' m2'.
+Proof.
+ intros. subst. constructor 1. unfold exec_bblock. simpl body. erewrite exec_straight_body; eauto.
+ simpl. rewrite regset_same_assign. auto.
+ simpl; auto. unfold nextblock, incrPC; simpl. Simpl. erewrite exec_straight_pc; eauto.
+Qed.
+
+(** The following lemmas show that straight-line executions
+ (predicate [exec_straight_blocks]) correspond to correct Asm executions. *)
+
+Lemma exec_straight_steps_1:
+ forall c rs m c' rs' m',
+ exec_straight_blocks c rs m c' rs' m' ->
+ size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned ->
+ forall b ofs,
+ rs#PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal fn) ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c ->
+ plus step ge (State rs m) E0 (State rs' m').
+Proof.
+ induction 1; intros.
+ apply plus_one.
+ econstructor; eauto.
+ eapply find_bblock_tail. eauto.
+ eapply plus_left'.
+ econstructor; eauto.
+ eapply find_bblock_tail. eauto.
+ apply IHexec_straight_blocks with b0 (Ptrofs.add ofs (Ptrofs.repr (size b))).
+ auto. rewrite H0. rewrite H3. reflexivity.
+ auto.
+ apply code_tail_next_int; auto.
+ traceEq.
+Qed.
+
+Lemma exec_straight_steps_2:
+ forall c rs m c' rs' m',
+ exec_straight_blocks c rs m c' rs' m' ->
+ size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned ->
+ forall b ofs,
+ rs#PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal fn) ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c ->
+ exists ofs',
+ rs'#PC = Vptr b ofs'
+ /\ code_tail (Ptrofs.unsigned ofs') (fn_blocks fn) c'.
+Proof.
+ induction 1; intros.
+ exists (Ptrofs.add ofs (Ptrofs.repr (size b1))). split.
+ rewrite H0. rewrite H2. auto.
+ apply code_tail_next_int; auto.
+ apply IHexec_straight_blocks with (Ptrofs.add ofs (Ptrofs.repr (size b))).
+ auto. rewrite H0. rewrite H3. reflexivity. auto.
+ apply code_tail_next_int; auto.
+Qed.
+
+End STRAIGHTLINE.
+
+(** * Properties of the Machblock call stack *)
+
+Section MATCH_STACK.
+
+Variable ge: MB.genv.
+
+Inductive match_stack: list MB.stackframe -> Prop :=
+ | match_stack_nil:
+ match_stack nil
+ | match_stack_cons: forall fb sp ra c s f tf tc,
+ Genv.find_funct_ptr ge fb = Some (Internal f) ->
+ transl_code_at_pc ge ra fb f c false tf tc ->
+ sp <> Vundef ->
+ match_stack s ->
+ match_stack (Stackframe fb sp ra c :: s).
+
+Lemma parent_sp_def: forall s, match_stack s -> parent_sp s <> Vundef.
+Proof.
+ induction 1; simpl.
+ unfold Vnullptr; destruct Archi.ptr64; congruence.
+ auto.
+Qed.
+
+Lemma parent_ra_def: forall s, match_stack s -> parent_ra s <> Vundef.
+Proof.
+ induction 1; simpl.
+ unfold Vnullptr; destruct Archi.ptr64; congruence.
+ inv H0. congruence.
+Qed.
+
+Lemma lessdef_parent_sp:
+ forall s v,
+ match_stack s -> Val.lessdef (parent_sp s) v -> v = parent_sp s.
+Proof.
+ intros. inv H0. auto. exploit parent_sp_def; eauto. tauto.
+Qed.
+
+Lemma lessdef_parent_ra:
+ forall s v,
+ match_stack s -> Val.lessdef (parent_ra s) v -> v = parent_ra s.
+Proof.
+ intros. inv H0. auto. exploit parent_ra_def; eauto. tauto.
+Qed.
+
+End MATCH_STACK.
diff --git a/kvx/Asmblockgenproof1.v b/kvx/Asmblockgenproof1.v
new file mode 100644
index 00000000..a18afec8
--- /dev/null
+++ b/kvx/Asmblockgenproof1.v
@@ -0,0 +1,2500 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * Proof of correctness for individual instructions *)
+
+Require Import Coqlib Errors Maps.
+Require Import AST Integers Floats Values Memory Globalenvs.
+Require Import Op Locations Machblock Conventions.
+Require Import Asmblock Asmblockgen Asmblockgenproof0 Asmblockprops.
+Require Import Chunks.
+Require Import Lia.
+
+Import PArithCoercions.
+
+(** Decomposition of integer constants. *)
+
+Lemma make_immed32_sound:
+ forall n,
+ match make_immed32 n with
+ | Imm32_single imm => n = imm
+ end.
+Proof.
+ intros; unfold make_immed32. set (lo := Int.sign_ext 12 n).
+ predSpec Int.eq Int.eq_spec n lo; auto.
+Qed.
+
+Lemma make_immed64_sound:
+ forall n,
+ match make_immed64 n with
+ | Imm64_single imm => n = imm
+ end.
+Proof.
+ intros; unfold make_immed64. set (lo := Int64.sign_ext 12 n).
+ predSpec Int64.eq Int64.eq_spec n lo.
+- auto.
+- set (m := Int64.sub n lo).
+ set (p := Int64.zero_ext 20 (Int64.shru m (Int64.repr 12))).
+ predSpec Int64.eq Int64.eq_spec n (Int64.add (Int64.sign_ext 32 (Int64.shl p (Int64.repr 12))) lo).
+ auto.
+ auto.
+Qed.
+
+
+(** Properties of registers *)
+
+Lemma ireg_of_not_RTMP:
+ forall m r, ireg_of m = OK r -> IR r <> IR RTMP.
+Proof.
+ intros. erewrite <- ireg_of_eq; eauto with asmgen.
+Qed.
+
+Lemma ireg_of_not_RTMP':
+ forall m r, ireg_of m = OK r -> r <> RTMP.
+Proof.
+ intros. apply ireg_of_not_RTMP in H. congruence.
+Qed.
+
+Hint Resolve ireg_of_not_RTMP ireg_of_not_RTMP': asmgen.
+
+
+(** Useful simplification tactic *)
+
+Ltac Simplif :=
+ ((rewrite nextblock_inv by eauto with asmgen)
+ || (rewrite nextblock_inv1 by eauto with asmgen)
+ || (rewrite Pregmap.gss)
+ || (rewrite nextblock_pc)
+ || (rewrite Pregmap.gso by eauto with asmgen)
+ ); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+(** * Correctness of RISC-V constructor functions *)
+
+Section CONSTRUCTORS.
+
+Variable ge: genv.
+Variable fn: function.
+
+Lemma loadimm32_correct:
+ forall rd n k rs m,
+ exists rs',
+ exec_straight ge (loadimm32 rd n ::g k) rs m k rs' m
+ /\ rs'#rd = Vint n
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold loadimm32; intros. generalize (make_immed32_sound n); intros E.
+ destruct (make_immed32 n).
+- subst imm. econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl.
+ intros; Simpl.
+Qed.
+
+Lemma loadimm64_correct:
+ forall rd n k rs m,
+ exists rs',
+ exec_straight ge (loadimm64 rd n ::g k) rs m k rs' m
+ /\ rs'#rd = Vlong n
+ /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ unfold loadimm64; intros. generalize (make_immed64_sound n); intros E.
+ destruct (make_immed64 n).
+- subst imm. econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl.
+ intros; Simpl.
+Qed.
+
+Lemma opimm64_correct:
+ forall (op: arith_name_rrr)
+ (opi: arith_name_rri64)
+ (sem: val -> val -> val) m,
+ (forall d s1 s2 rs,
+ exec_basic_instr ge (op d s1 s2) rs m = Next ((rs#d <- (sem rs#s1 rs#s2))) m) ->
+ (forall d s n rs,
+ exec_basic_instr ge (opi d s n) rs m = Next ((rs#d <- (sem rs#s (Vlong n)))) m) ->
+ forall rd r1 n k rs,
+ r1 <> RTMP ->
+ exists rs',
+ exec_straight ge (opimm64 op opi rd r1 n ::g k) rs m k rs' m
+ /\ rs'#rd = sem rs#r1 (Vlong n)
+ /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros. unfold opimm64. generalize (make_immed64_sound n); intros E.
+ destruct (make_immed64 n).
+- subst imm. econstructor; split.
+ apply exec_straight_one. rewrite H0. simpl; eauto. auto.
+ split. Simpl. intros; Simpl.
+Qed.
+
+(** Add offset to pointer *)
+
+Lemma addptrofs_correct:
+ forall rd r1 n k rs m,
+ r1 <> RTMP ->
+ exists rs',
+ exec_straight ge (addptrofs rd r1 n ::g k) rs m k rs' m
+ /\ Val.lessdef (Val.offset_ptr rs#r1 n) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ unfold addptrofs; intros.
+ destruct (Ptrofs.eq_dec n Ptrofs.zero).
+- subst n. econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl. destruct (rs r1); simpl; auto. rewrite Ptrofs.add_zero; auto.
+ intros; Simpl.
+- unfold addimm64.
+ exploit (opimm64_correct Paddl Paddil Val.addl); eauto. intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split; auto.
+ rewrite B. destruct (rs r1); simpl; auto.
+ rewrite Ptrofs.of_int64_to_int64 by auto. auto.
+Qed.
+
+Ltac ArgsInv :=
+ repeat (match goal with
+ | [ H: Error _ = OK _ |- _ ] => discriminate
+ | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
+ | [ H: bind _ _ = OK _ |- _ ] => monadInv H
+ | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+ | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+ end);
+ subst;
+ repeat (match goal with
+ | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *
+ | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *
+ end).
+
+Inductive exec_straight_opt: list instruction -> regset -> mem -> list instruction -> regset -> mem -> Prop :=
+ | exec_straight_opt_refl: forall c rs m,
+ exec_straight_opt c rs m c rs m
+ | exec_straight_opt_intro: forall c1 rs1 m1 c2 rs2 m2,
+ exec_straight ge c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight_opt c1 rs1 m1 c2 rs2 m2.
+
+Remark exec_straight_opt_right:
+ forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2,
+ exec_straight_opt c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight ge c2 rs2 m2 c3 rs3 m3 ->
+ exec_straight ge c1 rs1 m1 c3 rs3 m3.
+Proof.
+ destruct 1; intros. auto. eapply exec_straight_trans; eauto.
+Qed.
+
+Lemma transl_comp_correct:
+ forall cmp r1 r2 lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_comp cmp Signed r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.cmp_bool cmp rs#r1 rs#r2 = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. esplit. split.
+- unfold transl_comp. apply exec_straight_one; simpl; eauto.
+- split.
+ + intros; Simpl.
+ + intros.
+ remember (rs # RTMP <- (compare_int (itest_for_cmp cmp Signed) rs # r1 rs # r2)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Signed) rs # r1 rs # r2)).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmp_bool cmp rs#r1 rs#r2) as cmpbool.
+ destruct cmp; simpl;
+ unfold Val.cmp; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma transl_compi_correct:
+ forall cmp r1 n lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_compi cmp Signed r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.cmp_bool cmp rs#r1 (Vint n) = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. esplit. split.
+- unfold transl_compi. apply exec_straight_one; simpl; eauto.
+- split.
+ + intros; Simpl.
+ + intros.
+ remember (rs # RTMP <- (compare_int (itest_for_cmp cmp Signed) rs # r1 (Vint n))) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Signed) rs # r1 (Vint n))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmp_bool cmp rs#r1 (Vint n)) as cmpbool.
+ destruct cmp; simpl;
+ unfold Val.cmp; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma transl_compu_correct:
+ forall cmp r1 r2 lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_comp cmp Unsigned r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ (Val.mxcmpu_bool cmp rs#r1 rs#r2 = Some b ->
+ exec_control ge fn (Some (PCtlFlow ((Pcb BTwnez RTMP lbl)))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. esplit. split.
+- unfold transl_comp. apply exec_straight_one; simpl; eauto.
+- split.
+ + intros; Simpl.
+ + intros.
+ remember (rs # RTMP <- (compare_int (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.mxcmpu_bool cmp rs#r1 rs#r2) as cmpubool.
+ destruct cmp; simpl; unfold Val.mxcmpu;
+ rewrite <- Heqcmpubool; destruct cmpubool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma transl_compui_correct:
+ forall cmp r1 n lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_compi cmp Unsigned r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ (Val.mxcmpu_bool cmp rs#r1 (Vint n) = Some b ->
+ exec_control ge fn (Some (PCtlFlow ((Pcb BTwnez RTMP lbl)))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. esplit. split.
+- unfold transl_compi. apply exec_straight_one; simpl; eauto.
+- split.
+ + intros; Simpl.
+ + intros.
+ remember (rs # RTMP <- (compare_int (itest_for_cmp cmp Unsigned) rs # r1 (Vint n))) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Unsigned) rs # r1 (Vint n))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.mxcmpu_bool cmp rs#r1 (Vint n)) as cmpubool.
+ destruct cmp; simpl; unfold Val.mxcmpu;
+ rewrite <- Heqcmpubool; destruct cmpubool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma transl_compl_correct:
+ forall cmp r1 r2 lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_compl cmp Signed r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.cmpl_bool cmp rs#r1 rs#r2 = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. esplit. split.
+- unfold transl_compl. apply exec_straight_one; simpl; eauto.
+- split.
+ + intros; Simpl.
+ + intros.
+ remember (rs # RTMP <- (compare_long (itest_for_cmp cmp Signed) rs # r1 rs # r2)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Signed) rs # r1 rs # r2)).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmpl_bool cmp rs#r1 rs#r2) as cmpbool.
+ destruct cmp; simpl;
+ unfold compare_long, Val.cmpl;
+ rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma transl_compil_correct:
+ forall cmp r1 n lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_compil cmp Signed r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.cmpl_bool cmp rs#r1 (Vlong n) = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. esplit. split.
+- unfold transl_compil. apply exec_straight_one; simpl; eauto.
+- split.
+ + intros; Simpl.
+ + intros.
+ remember (rs # RTMP <- (compare_long (itest_for_cmp cmp Signed) rs # r1 (Vlong n))) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Signed) rs # r1 (Vlong n))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmpl_bool cmp rs#r1 (Vlong n)) as cmpbool.
+ destruct cmp; simpl;
+ unfold compare_long, Val.cmpl;
+ rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma swap_comparison_cmpf_eq:
+ forall v1 v2 cmp,
+ (Val.cmpf cmp v1 v2) = (Val.cmpf (swap_comparison cmp) v2 v1).
+Proof.
+ intros. unfold Val.cmpf. unfold Val.cmpf_bool. destruct v1; destruct v2; auto.
+ rewrite Float.cmp_swap. auto.
+Qed.
+
+Lemma swap_comparison_cmpf_bool:
+ forall cmp ft v1 v2,
+ ftest_for_cmp cmp = Reversed ft ->
+ Val.cmpf_bool cmp v1 v2 = Val.cmpf_bool (swap_comparison cmp) v2 v1.
+Proof.
+ intros. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. rewrite Float.cmp_swap. reflexivity.
+Qed.
+
+Lemma transl_compf_correct:
+ forall cmp r1 r2 lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_comp_float64 cmp r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.cmpf_bool cmp rs#r1 rs#r2 = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. unfold transl_comp_float64. destruct (ftest_for_cmp cmp) eqn:FT.
+ * esplit. split.
+ - apply exec_straight_one; simpl; eauto.
+ - split.
+ + intros; Simpl.
+ + intros. remember (rs # RTMP <- (compare_float _ _ _)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_float ft (rs r1) (rs r2))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmpf_bool cmp rs#r1 rs#r2) as cmpbool.
+ destruct cmp; simpl;
+ unfold compare_float;
+ unfold Val.cmpf; simpl in FT; inversion FT; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+ * esplit. split.
+ - apply exec_straight_one; simpl; eauto.
+ - split.
+ + intros; Simpl.
+ + intros. remember (rs # RTMP <- (compare_float _ _ _)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_float ft (rs r2) (rs r1))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmpf_bool cmp rs#r1 rs#r2) as cmpbool.
+ erewrite swap_comparison_cmpf_bool in Heqcmpbool; eauto.
+ destruct cmp; simpl;
+ unfold compare_float;
+ unfold Val.cmpf; simpl in FT; inversion FT; simpl in Heqcmpbool; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma cmpf_bool_ne_eq:
+ forall v1 v2,
+ Val.cmpf_bool Cne v1 v2 = option_map negb (Val.cmpf_bool Ceq v1 v2).
+Proof.
+ intros. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. rewrite Float.cmp_ne_eq. simpl. reflexivity.
+Qed.
+
+Lemma cmpf_bool_ne_eq_rev:
+ forall v1 v2,
+ Val.cmpf_bool Ceq v1 v2 = option_map negb (Val.cmpf_bool Cne v1 v2).
+Proof.
+ intros. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. rewrite Float.cmp_ne_eq. simpl. rewrite negb_involutive. reflexivity.
+Qed.
+
+Lemma option_map_negb_negb:
+ forall v,
+ option_map negb (option_map negb v) = v.
+Proof.
+ destruct v; simpl; auto. rewrite negb_involutive. reflexivity.
+Qed.
+
+Lemma notbool_option_map_negb:
+ forall v, Val.notbool (Val.of_optbool v) = Val.of_optbool (option_map negb v).
+Proof.
+ unfold Val.notbool. unfold Val.of_optbool.
+ destruct v; auto. destruct b; auto.
+Qed.
+
+Lemma swap_comparison_cmpf_bool_notftest:
+ forall cmp ft v1 v2,
+ notftest_for_cmp cmp = Reversed ft ->
+ Val.cmpf_bool cmp v1 v2 = Val.cmpf_bool (swap_comparison cmp) v2 v1.
+Proof.
+ intros. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. rewrite Float.cmp_swap. reflexivity.
+Qed.
+
+Lemma transl_compnotf_correct:
+ forall cmp r1 r2 lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_comp_notfloat64 cmp r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ (option_map negb (Val.cmpf_bool cmp rs#r1 rs#r2) = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. unfold transl_comp_notfloat64. destruct (notftest_for_cmp cmp) eqn:FT.
+ * esplit. split.
+ - apply exec_straight_one; simpl; eauto.
+ - split.
+ + intros; Simpl.
+ + intros. remember (rs # RTMP <- (compare_float _ _ _)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_float ft (rs r1) (rs r2))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (option_map negb (Val.cmpf_bool cmp rs#r1 rs#r2)) as cmpbool.
+ destruct cmp; simpl;
+ unfold compare_float;
+ unfold Val.cmpf; simpl in FT; inversion FT.
+ * rewrite cmpf_bool_ne_eq; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ * rewrite cmpf_bool_ne_eq_rev. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+ * esplit. split.
+ - apply exec_straight_one; simpl; eauto.
+ - split.
+ + intros; Simpl.
+ + intros. remember (rs # RTMP <- (compare_float _ _ _)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_float ft (rs r2) (rs r1))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmpf_bool cmp rs#r1 rs#r2) as cmpbool.
+ erewrite swap_comparison_cmpf_bool_notftest in Heqcmpbool; eauto.
+ destruct cmp; simpl;
+ unfold compare_float;
+ unfold Val.cmpf; simpl in FT; inversion FT; simpl in Heqcmpbool.
+ * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma swap_comparison_cmpfs_bool:
+ forall cmp ft v1 v2,
+ ftest_for_cmp cmp = Reversed ft ->
+ Val.cmpfs_bool cmp v1 v2 = Val.cmpfs_bool (swap_comparison cmp) v2 v1.
+Proof.
+ intros. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto. rewrite Float32.cmp_swap. reflexivity.
+Qed.
+
+Lemma transl_compfs_correct:
+ forall cmp r1 r2 lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_comp_float32 cmp r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.cmpfs_bool cmp rs#r1 rs#r2 = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. unfold transl_comp_float32. destruct (ftest_for_cmp cmp) eqn:FT.
+ * esplit. split.
+ - apply exec_straight_one; simpl; eauto.
+ - split.
+ + intros; Simpl.
+ + intros. remember (rs # RTMP <- (compare_single _ _ _)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_single ft (rs r1) (rs r2))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmpfs_bool cmp rs#r1 rs#r2) as cmpbool.
+ destruct cmp; simpl;
+ unfold compare_single;
+ unfold Val.cmpfs; simpl in FT; inversion FT; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+ * esplit. split.
+ - apply exec_straight_one; simpl; eauto.
+ - split.
+ + intros; Simpl.
+ + intros. remember (rs # RTMP <- (compare_single _ _ _)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_single ft (rs r2) (rs r1))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmpfs_bool cmp rs#r1 rs#r2) as cmpbool.
+ erewrite swap_comparison_cmpfs_bool in Heqcmpbool; eauto.
+ destruct cmp; simpl;
+ unfold compare_single;
+ unfold Val.cmpfs; simpl in FT; inversion FT; simpl in Heqcmpbool; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma swap_comparison_cmpfs_bool_notftest:
+ forall cmp ft v1 v2,
+ notftest_for_cmp cmp = Reversed ft ->
+ Val.cmpfs_bool cmp v1 v2 = Val.cmpfs_bool (swap_comparison cmp) v2 v1.
+Proof.
+ intros. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto. rewrite Float32.cmp_swap. reflexivity.
+Qed.
+
+Lemma cmpfs_bool_ne_eq:
+ forall v1 v2,
+ Val.cmpfs_bool Cne v1 v2 = option_map negb (Val.cmpfs_bool Ceq v1 v2).
+Proof.
+ intros. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto. rewrite Float32.cmp_ne_eq. simpl. reflexivity.
+Qed.
+
+Lemma cmpfs_bool_ne_eq_rev:
+ forall v1 v2,
+ Val.cmpfs_bool Ceq v1 v2 = option_map negb (Val.cmpfs_bool Cne v1 v2).
+Proof.
+ intros. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto. rewrite Float32.cmp_ne_eq. simpl. rewrite negb_involutive. reflexivity.
+Qed.
+
+Lemma transl_compnotfs_correct:
+ forall cmp r1 r2 lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_comp_notfloat32 cmp r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ (option_map negb (Val.cmpfs_bool cmp rs#r1 rs#r2) = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. unfold transl_comp_notfloat32. destruct (notftest_for_cmp cmp) eqn:FT.
+ * esplit. split.
+ - apply exec_straight_one; simpl; eauto.
+ - split.
+ + intros; Simpl.
+ + intros. remember (rs # RTMP <- (compare_single _ _ _)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_single ft (rs r1) (rs r2))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (option_map negb (Val.cmpfs_bool cmp rs#r1 rs#r2)) as cmpbool.
+ destruct cmp; simpl;
+ unfold compare_single;
+ unfold Val.cmpfs; simpl in FT; inversion FT.
+ * rewrite cmpfs_bool_ne_eq; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ * rewrite cmpfs_bool_ne_eq_rev. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+ * esplit. split.
+ - apply exec_straight_one; simpl; eauto.
+ - split.
+ + intros; Simpl.
+ + intros. remember (rs # RTMP <- (compare_single _ _ _)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_single ft (rs r2) (rs r1))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.cmpfs_bool cmp rs#r1 rs#r2) as cmpbool.
+ erewrite swap_comparison_cmpfs_bool_notftest in Heqcmpbool; eauto.
+ destruct cmp; simpl;
+ unfold compare_single;
+ unfold Val.cmpfs; simpl in FT; inversion FT; simpl in Heqcmpbool.
+ * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma transl_complu_correct:
+ forall cmp r1 r2 lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_compl cmp Unsigned r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.mxcmplu_bool cmp rs#r1 rs#r2 = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. esplit. split.
+- unfold transl_compl. apply exec_straight_one; simpl; eauto.
+- split.
+ + intros; Simpl.
+ + intros.
+ remember (rs # RTMP <- (compare_long (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.mxcmplu_bool cmp rs#r1 rs#r2) as cmpbool.
+ destruct cmp; simpl;
+ unfold compare_long, Val.mxcmplu; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma transl_compilu_correct:
+ forall cmp r1 n lbl k rs m tbb b,
+ exists rs',
+ exec_straight ge (transl_compil cmp Unsigned r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.mxcmplu_bool cmp rs#r1 (Vlong n) = Some b ->
+ exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m
+ = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros. esplit. split.
+- unfold transl_compil. apply exec_straight_one; simpl; eauto.
+- split.
+ + intros; Simpl.
+ + intros.
+ remember (rs # RTMP <- (compare_long (itest_for_cmp cmp Unsigned) rs # r1 (Vlong n))) as rs'.
+ simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b).
+ {
+ assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Unsigned) rs # r1 (Vlong n))).
+ { rewrite Heqrs'. auto. }
+ rewrite H0. rewrite <- H.
+ remember (Val.mxcmplu_bool cmp rs#r1 (Vlong n)) as cmpbool.
+ destruct cmp; simpl;
+ unfold compare_long, Val.mxcmplu; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto;
+ destruct b0; simpl; auto.
+ }
+ rewrite H0. simpl; auto.
+Qed.
+
+Lemma transl_opt_compuimm_correct:
+ forall n cmp r1 lbl k rs m b tbb c,
+ select_comp n cmp = Some c ->
+ exists rs', exists insn,
+ exec_straight_opt (transl_opt_compuimm n cmp r1 lbl k) rs m ((PControl insn) ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.mxcmpu_bool cmp rs#r1 (Vint n) = Some b ->
+ exec_control ge fn (Some insn) (nextblock tbb rs') m = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros.
+(* unfold transl_opt_compuimm. unfold select_comp in H. rewrite H; simpl. *)
+ remember c as c'.
+ destruct c'.
+ - (* c = Ceq *)
+ assert (Int.eq n Int.zero = true) as H'.
+ { remember (Int.eq n Int.zero) as termz. destruct termz; auto.
+ generalize H. unfold select_comp; rewrite <- Heqtermz; simpl.
+ discriminate. }
+ assert (n = (Int.repr 0)) as H0. {
+ destruct (Int.eq_dec n (Int.repr 0)) as [Ha|Ha]; auto.
+ generalize (Int.eq_false _ _ Ha). unfold Int.zero in H'.
+ rewrite H'. discriminate.
+ }
+ assert (Ceq = cmp). {
+ remember cmp as c0'. destruct c0'; auto; generalize H; unfold select_comp;
+ rewrite H'; simpl; auto;
+ intros; contradict H; discriminate.
+ }
+ unfold transl_opt_compuimm. subst. rewrite H'.
+
+ exists rs, (Pcbu BTweqz r1 lbl).
+ split.
+ * constructor.
+ * split; auto. simpl. intros.
+ assert (rs r1 = (nextblock tbb rs) r1).
+ unfold nextblock, incrPC. Simpl. rewrite H1 in H0.
+ (*assert (Val.cmp_bool Ceq (rs r1) (Vint (Int.repr 0)) = Some b) as EVAL'S.
+ { rewrite <- H2. rewrite <- H0. rewrite <- H1. auto. }*)
+ auto;
+ unfold eval_branch. rewrite H0; auto.
+ - (* c = Cne *)
+ assert (Int.eq n Int.zero = true) as H'.
+ { remember (Int.eq n Int.zero) as termz. destruct termz; auto.
+ generalize H. unfold select_comp; rewrite <- Heqtermz; simpl.
+ discriminate. }
+ assert (n = (Int.repr 0)) as H0. {
+ destruct (Int.eq_dec n (Int.repr 0)) as [Ha|Ha]; auto.
+ generalize (Int.eq_false _ _ Ha). unfold Int.zero in H'.
+ rewrite H'. discriminate.
+ }
+ assert (Cne = cmp). {
+ remember cmp as c0'. destruct c0'; auto; generalize H; unfold select_comp;
+ rewrite H'; simpl; auto;
+ intros; contradict H; discriminate.
+ }
+ unfold transl_opt_compuimm. subst. rewrite H'.
+
+ exists rs, (Pcbu BTwnez r1 lbl).
+ split.
+ * constructor.
+ * split; auto. simpl. intros.
+ assert (rs r1 = (nextblock tbb rs) r1).
+ unfold nextblock, incrPC. Simpl. rewrite H1 in H0.
+ auto;
+ unfold eval_branch. rewrite H0. auto.
+ - (* c = Clt *) contradict H; unfold select_comp; destruct (Int.eq n Int.zero);
+ destruct cmp; discriminate.
+ - (* c = Cle *) contradict H; unfold select_comp; destruct (Int.eq n Int.zero);
+ destruct cmp; discriminate.
+ - (* c = Cgt *) contradict H; unfold select_comp; destruct (Int.eq n Int.zero);
+ destruct cmp; discriminate.
+ - (* c = Cge *) contradict H; unfold select_comp; destruct (Int.eq n Int.zero);
+ destruct cmp; discriminate.
+Qed.
+
+Lemma transl_opt_compluimm_correct:
+ forall n cmp r1 lbl k rs m b tbb c,
+ select_compl n cmp = Some c ->
+ exists rs', exists insn,
+ exec_straight_opt (transl_opt_compluimm n cmp r1 lbl k) rs m ((PControl insn) ::g k) rs' m
+ /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r)
+ /\ ( Val.mxcmplu_bool cmp rs#r1 (Vlong n) = Some b ->
+ exec_control ge fn (Some insn) (nextblock tbb rs') m = eval_branch fn lbl (nextblock tbb rs') m (Some b))
+ .
+Proof.
+ intros.
+(* unfold transl_opt_compluimm; rewrite H; simpl. *)
+ remember c as c'.
+ destruct c'.
+ - (* c = Ceq *)
+ assert (Int64.eq n Int64.zero = true) as H'.
+ { remember (Int64.eq n Int64.zero) as termz. destruct termz; auto.
+ generalize H. unfold select_compl; rewrite <- Heqtermz; simpl.
+ discriminate. }
+ assert (n = (Int64.repr 0)) as H0. {
+ destruct (Int64.eq_dec n (Int64.repr 0)) as [Ha|Ha]; auto.
+ generalize (Int64.eq_false _ _ Ha). unfold Int64.zero in H'.
+ rewrite H'. discriminate.
+ }
+ assert (Ceq = cmp). {
+ remember cmp as c0'. destruct c0'; auto; generalize H; unfold select_compl;
+ rewrite H'; simpl; auto;
+ intros; contradict H; discriminate.
+ }
+ unfold transl_opt_compluimm; subst; rewrite H'.
+
+ exists rs, (Pcbu BTdeqz r1 lbl).
+ split.
+ * constructor.
+ * split; auto. simpl. intros.
+ assert (rs r1 = (nextblock tbb rs) r1).
+ unfold nextblock, incrPC. Simpl. rewrite H1 in H0.
+ auto;
+ unfold eval_branch. rewrite H0; auto.
+ - (* c = Cne *)
+ assert (Int64.eq n Int64.zero = true) as H'.
+ { remember (Int64.eq n Int64.zero) as termz. destruct termz; auto.
+ generalize H. unfold select_compl; rewrite <- Heqtermz; simpl.
+ discriminate. }
+ assert (n = (Int64.repr 0)) as H0. {
+ destruct (Int64.eq_dec n (Int64.repr 0)) as [Ha|Ha]; auto.
+ generalize (Int64.eq_false _ _ Ha). unfold Int64.zero in H'.
+ rewrite H'. discriminate.
+ }
+ assert (Cne = cmp). {
+ remember cmp as c0'. destruct c0'; auto; generalize H; unfold select_compl;
+ rewrite H'; simpl; auto;
+ intros; contradict H; discriminate.
+ }
+ unfold transl_opt_compluimm; subst; rewrite H'.
+
+ exists rs, (Pcbu BTdnez r1 lbl).
+ split.
+ * constructor.
+ * split; auto. simpl. intros.
+ assert (rs r1 = (nextblock tbb rs) r1).
+ unfold nextblock, incrPC. Simpl. rewrite H1 in H0.
+ auto;
+ unfold eval_branch. rewrite H0; auto.
+ - (* c = Clt *) contradict H; unfold select_compl; destruct (Int64.eq n Int64.zero);
+ destruct cmp; discriminate.
+ - (* c = Cle *) contradict H; unfold select_compl; destruct (Int64.eq n Int64.zero);
+ destruct cmp; discriminate.
+ - (* c = Cgt *) contradict H; unfold select_compl; destruct (Int64.eq n Int64.zero);
+ destruct cmp; discriminate.
+ - (* c = Cge *) contradict H; unfold select_compl; destruct (Int64.eq n Int64.zero);
+ destruct cmp; discriminate.
+Qed.
+
+Local Hint Resolve Val.mxcmpu_bool_correct Val.mxcmplu_bool_correct: core.
+
+Lemma transl_cbranch_correct_1:
+ forall cond args lbl k c m ms b sp rs m' tbb,
+ transl_cbranch cond args lbl k = OK c ->
+ eval_condition cond (List.map ms args) m = Some b ->
+ agree ms sp rs ->
+ Mem.extends m m' ->
+ exists rs', exists insn,
+ exec_straight_opt c rs m' ((PControl insn) ::g k) rs' m'
+ /\ exec_control ge fn (Some insn) (nextblock tbb rs') m' = eval_branch fn lbl (nextblock tbb rs') m' (Some b)
+ /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros until tbb; intros TRANSL EVAL AG MEXT.
+ set (vl' := map rs (map preg_of args)).
+ assert (EVAL': eval_condition cond vl' m' = Some b).
+ { apply eval_condition_lessdef with (map ms args) m; auto. eapply preg_vals; eauto. }
+ clear EVAL MEXT AG.
+ destruct cond; simpl in TRANSL; ArgsInv.
+(* Ccomp *)
+- exploit (transl_comp_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C).
+ exists rs', (Pcb BTwnez RTMP lbl).
+ split.
+ + constructor. eexact A.
+ + split; auto. apply C; auto.
+(* Ccompu *)
+- exploit (transl_compu_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C).
+ exists rs', (Pcb BTwnez RTMP lbl).
+ split.
+ + constructor. eexact A.
+ + split; auto. apply C; eauto.
+(* Ccompimm *)
+- remember (Int.eq n Int.zero) as eqz.
+ destruct eqz.
+ + assert (n = (Int.repr 0)). {
+ destruct (Int.eq_dec n (Int.repr 0)) as [H|H]; auto.
+ generalize (Int.eq_false _ _ H). unfold Int.zero in Heqeqz.
+ rewrite <- Heqeqz. discriminate.
+ }
+ exists rs, (Pcb (btest_for_cmpswz c0) x lbl).
+ split.
+ * constructor.
+ * split; auto.
+ assert (rs x = (nextblock tbb rs) x).
+ unfold nextblock, incrPC. Simpl. rewrite H0 in EVAL'. clear H0.
+ destruct c0; simpl; auto;
+ unfold eval_branch; rewrite <- H; rewrite EVAL'; auto.
+ + exploit (transl_compi_correct c0 x n lbl); eauto. intros (rs'2 & A' & B' & C').
+ exists rs'2, (Pcb BTwnez RTMP lbl).
+ split.
+ * constructor. eexact A'.
+ * split; auto.
+ { apply C'; auto. }
+(* Ccompuimm *)
+- remember (select_comp n c0) as selcomp.
+ destruct selcomp.
+ + exploit (transl_opt_compuimm_correct n c0 x lbl k). apply eq_sym. apply Heqselcomp.
+ intros (rs' & i & A & B & C).
+ exists rs', i.
+ split.
+ * apply A.
+ * split; auto. apply C. apply EVAL'.
+ + assert (transl_opt_compuimm n c0 x lbl k = transl_compi c0 Unsigned x n lbl k).
+ { unfold transl_opt_compuimm.
+ destruct (Int.eq n Int.zero) eqn:EQN.
+ all: unfold select_comp in Heqselcomp; rewrite EQN in Heqselcomp; destruct c0; simpl in *; auto.
+ all: discriminate. }
+ rewrite H. clear H.
+ exploit (transl_compui_correct c0 x n lbl); eauto. intros (rs'2 & A' & B' & C').
+ exists rs'2, (Pcb BTwnez RTMP lbl).
+ split.
+ * constructor. eexact A'.
+ * split; auto.
+ { apply C'; auto. }
+(* Ccompl *)
+- exploit (transl_compl_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C).
+ exists rs', (Pcb BTwnez RTMP lbl).
+ split.
+ + constructor. eexact A.
+ + split; auto. apply C; auto.
+(* Ccomplu *)
+- exploit (transl_complu_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C).
+ exists rs', (Pcb BTwnez RTMP lbl).
+ split.
+ + constructor. eexact A.
+ + split; auto. apply C; eauto.
+(* Ccomplimm *)
+- remember (Int64.eq n Int64.zero) as eqz.
+ destruct eqz.
+ + assert (n = (Int64.repr 0)). {
+ destruct (Int64.eq_dec n (Int64.repr 0)) as [H|H]; auto.
+ generalize (Int64.eq_false _ _ H). unfold Int64.zero in Heqeqz.
+ rewrite <- Heqeqz. discriminate.
+ }
+ exists rs, (Pcb (btest_for_cmpsdz c0) x lbl).
+ split.
+ * constructor.
+ * split; auto.
+ assert (rs x = (nextblock tbb rs) x).
+ unfold nextblock, incrPC. Simpl. rewrite H0 in EVAL'. clear H0.
+ destruct c0; simpl; auto;
+ unfold eval_branch; rewrite <- H; rewrite EVAL'; auto.
+ + exploit (transl_compil_correct c0 x n lbl); eauto. intros (rs'2 & A' & B' & C').
+ exists rs'2, (Pcb BTwnez RTMP lbl).
+ split.
+ * constructor. eexact A'.
+ * split; auto.
+ { apply C'; auto. }
+
+(* Ccompluimm *)
+- remember (select_compl n c0) as selcomp.
+ destruct selcomp.
+ + exploit (transl_opt_compluimm_correct n c0 x lbl k). apply eq_sym. apply Heqselcomp.
+ intros (rs' & i & A & B & C).
+ exists rs', i.
+ split.
+ * apply A.
+ * split; eauto. (* apply C. apply EVAL'. *)
+ + assert (transl_opt_compluimm n c0 x lbl k = transl_compil c0 Unsigned x n lbl k).
+ { unfold transl_opt_compluimm.
+ destruct (Int64.eq n Int64.zero) eqn:EQN.
+ all: unfold select_compl in Heqselcomp; rewrite EQN in Heqselcomp; destruct c0; simpl in *; auto.
+ all: discriminate. }
+ rewrite H. clear H.
+ exploit (transl_compilu_correct c0 x n lbl); eauto. intros (rs'2 & A' & B' & C').
+ exists rs'2, (Pcb BTwnez RTMP lbl).
+ split.
+ * constructor. eexact A'.
+ * split; auto.
+ { apply C'; auto. eapply Val.mxcmplu_bool_correct; eauto. }
+
+(* Ccompf *)
+- exploit (transl_compf_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C).
+ exists rs', (Pcb BTwnez RTMP lbl).
+ split.
+ + constructor. eexact A.
+ + split; auto. apply C; auto.
+
+(* Cnotcompf *)
+- exploit (transl_compnotf_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C).
+ exists rs', (Pcb BTwnez RTMP lbl).
+ split.
+ + constructor. eexact A.
+ + split; auto. apply C; auto.
+
+(* Ccompfs *)
+- exploit (transl_compfs_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C).
+ exists rs', (Pcb BTwnez RTMP lbl).
+ split.
+ + constructor. eexact A.
+ + split; auto. apply C; auto.
+
+(* Cnotcompfs *)
+- exploit (transl_compnotfs_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C).
+ exists rs', (Pcb BTwnez RTMP lbl).
+ split.
+ + constructor. eexact A.
+ + split; auto. apply C; auto.
+Qed.
+
+Lemma transl_cbranch_correct_true:
+ forall cond args lbl k c m ms sp rs m' tbb,
+ transl_cbranch cond args lbl k = OK c ->
+ eval_condition cond (List.map ms args) m = Some true ->
+ agree ms sp rs ->
+ Mem.extends m m' ->
+ exists rs', exists insn,
+ exec_straight_opt c rs m' ((PControl insn) ::g k) rs' m'
+ /\ exec_control ge fn (Some insn) (nextblock tbb rs') m' = goto_label fn lbl (nextblock tbb rs') m'
+ /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros. eapply transl_cbranch_correct_1 with (b := true); eauto.
+Qed.
+
+Lemma transl_cbranch_correct_false:
+ forall cond args lbl k c m ms sp rs tbb m',
+ transl_cbranch cond args lbl k = OK c ->
+ eval_condition cond (List.map ms args) m = Some false ->
+ agree ms sp rs ->
+ Mem.extends m m' ->
+ exists rs', exists insn,
+ exec_straight_opt c rs m' ((PControl insn) ::g k) rs' m'
+ /\ exec_control ge fn (Some insn) (nextblock tbb rs') m' = Next (nextblock tbb rs') m'
+ /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros. exploit transl_cbranch_correct_1. all: eauto. simpl eval_branch. instantiate (1 := tbb).
+ intros (rs' & insn & A & B & C). rewrite regset_same_assign in B.
+ eexists; eexists. split; try split. all: eassumption.
+Qed.
+
+(** Translation of condition operators *)
+
+Lemma transl_cond_int32s_correct:
+ forall cmp rd r1 r2 k rs m,
+ exists rs',
+ exec_straight ge (basics_to_code (transl_cond_int32s cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.cmp cmp rs#r1 rs#r2) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+Qed.
+
+
+Lemma transl_cond_int32u_correct:
+ forall cmp rd r1 r2 k rs m,
+ exists rs',
+ exec_straight ge (basics_to_code (transl_cond_int32u cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
+ /\ rs'#rd = Val.mxcmpu cmp rs#r1 rs#r2
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+Qed.
+
+Lemma transl_cond_int64s_correct:
+ forall cmp rd r1 r2 k rs m,
+ exists rs',
+ exec_straight ge (basics_to_code (transl_cond_int64s cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs#r1 rs#r2)) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+Qed.
+
+Lemma transl_cond_int64u_correct:
+ forall cmp rd r1 r2 k rs m,
+ exists rs',
+ exec_straight ge (basics_to_code (transl_cond_int64u cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
+ /\ rs'#rd = Val.mxcmplu cmp rs#r1 rs#r2
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+Qed.
+
+Lemma transl_condimm_int32s_correct:
+ forall cmp rd r1 n k rs m,
+ r1 <> RTMP ->
+ exists rs',
+ exec_straight ge (basics_to_code (transl_condimm_int32s cmp rd r1 n k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.cmp cmp rs#r1 (Vint n)) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+Qed.
+
+Local Hint Resolve Val.mxcmpu_correct Val.mxcmplu_correct: core.
+
+Lemma transl_condimm_int32u_correct:
+ forall cmp rd r1 n k rs m,
+ r1 <> RTMP ->
+ exists rs',
+ exec_straight ge (basics_to_code (transl_condimm_int32u cmp rd r1 n k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.cmpu (Mem.valid_pointer m) cmp rs#r1 (Vint n)) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+Qed.
+
+Lemma transl_condimm_int64s_correct:
+ forall cmp rd r1 n k rs m,
+ r1 <> RTMP ->
+ exists rs',
+ exec_straight ge (basics_to_code (transl_condimm_int64s cmp rd r1 n k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs#r1 (Vlong n))) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+Qed.
+
+Lemma transl_condimm_int64u_correct:
+ forall cmp rd r1 n k rs m,
+ r1 <> RTMP ->
+ exists rs',
+ exec_straight ge (basics_to_code (transl_condimm_int64u cmp rd r1 n k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.maketotal (Val.cmplu (Mem.valid_pointer m) cmp rs#r1 (Vlong n))) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl;
+ (econstructor; split;
+ [ apply exec_straight_one; [simpl; eauto] |
+ split; intros; Simpl; unfold compare_long; eauto]).
+Qed.
+
+Lemma swap_comparison_cmpfs:
+ forall v1 v2 cmp,
+ Val.lessdef (Val.cmpfs cmp v1 v2) (Val.cmpfs (swap_comparison cmp) v2 v1).
+Proof.
+ intros. unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto.
+ rewrite Float32.cmp_swap. auto.
+Qed.
+
+Lemma transl_cond_float32_correct:
+ forall cmp rd r1 r2 k rs m,
+ exists rs',
+ exec_straight ge (basics_to_code (transl_cond_float32 cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.cmpfs cmp rs#r1 rs#r2) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl. apply swap_comparison_cmpfs.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl. apply swap_comparison_cmpfs.
+- econstructor; split. apply exec_straight_one; [simpl;
+ eauto].
+ split; intros; Simpl.
+Qed.
+
+Lemma transl_cond_nofloat32_correct:
+ forall cmp rd r1 r2 k rs m,
+ exists rs',
+ exec_straight ge (basics_to_code (transl_cond_notfloat32 cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.of_optbool (option_map negb (Val.cmpfs_bool cmp (rs r1) (rs r2)))) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto.
+ rewrite Float32.cmp_ne_eq. auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto.
+ rewrite Float32.cmp_ne_eq. simpl. destruct (Float32.cmp Ceq f f0); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
+ destruct (Float32.cmp Clt f f0); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
+ replace (Cge) with (swap_comparison Cle); auto. rewrite Float32.cmp_swap.
+ destruct (Float32.cmp _ _ _); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
+ replace (Clt) with (swap_comparison Cgt); auto. rewrite Float32.cmp_swap.
+ destruct (Float32.cmp _ _ _); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
+ destruct (Float32.cmp _ _ _); auto.
+Qed.
+
+Lemma swap_comparison_cmpf:
+ forall v1 v2 cmp,
+ Val.lessdef (Val.cmpf cmp v1 v2) (Val.cmpf (swap_comparison cmp) v2 v1).
+Proof.
+ intros. unfold Val.cmpf. unfold Val.cmpf_bool. destruct v1; destruct v2; auto.
+ rewrite Float.cmp_swap. auto.
+Qed.
+
+Lemma transl_cond_float64_correct:
+ forall cmp rd r1 r2 k rs m,
+ exists rs',
+ exec_straight ge (basics_to_code (transl_cond_float64 cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.cmpf cmp rs#r1 rs#r2) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl. apply swap_comparison_cmpf.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl. apply swap_comparison_cmpf.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+Qed.
+
+Lemma transl_cond_nofloat64_correct:
+ forall cmp rd r1 r2 k rs m,
+ exists rs',
+ exec_straight ge (basics_to_code (transl_cond_notfloat64 cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.of_optbool (option_map negb (Val.cmpf_bool cmp (rs r1) (rs r2)))) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros. destruct cmp; simpl.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto.
+ rewrite Float.cmp_ne_eq. auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto.
+ rewrite Float.cmp_ne_eq. simpl. destruct (Float.cmp Ceq f f0); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
+ destruct (Float.cmp Clt f f0); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
+ replace (Cge) with (swap_comparison Cle); auto. rewrite Float.cmp_swap.
+ destruct (Float.cmp _ _ _); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
+ replace (Clt) with (swap_comparison Cgt); auto. rewrite Float.cmp_swap.
+ destruct (Float.cmp _ _ _); auto.
+- econstructor; split. apply exec_straight_one; [simpl; eauto].
+ split; intros; Simpl.
+ unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl.
+ destruct (Float.cmp _ _ _); auto.
+Qed.
+
+Lemma transl_cond_op_correct:
+ forall cond rd args k c rs m,
+ transl_cond_op cond rd args k = OK c ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef (Val.of_optbool (eval_condition cond (map rs (map preg_of args)) m)) rs'#rd
+ /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ assert (MKTOT: forall ob, Val.of_optbool ob = Val.maketotal (option_map Val.of_bool ob)).
+ { destruct ob as [[]|]; reflexivity. }
+ intros until m; intros TR.
+ destruct cond; simpl in TR; ArgsInv.
++ (* cmp *)
+ exploit transl_cond_int32s_correct; eauto. simpl. intros (rs' & A & B & C). exists rs'; eauto.
++ (* cmpu *)
+ exploit transl_cond_int32u_correct; eauto. simpl. intros (rs' & A & B & C).
+ exists rs'; repeat split; eauto. rewrite B; eapply Val.mxcmpu_correct.
++ (* cmpimm *)
+ apply transl_condimm_int32s_correct; eauto with asmgen.
++ (* cmpuimm *)
+ apply transl_condimm_int32u_correct; eauto with asmgen.
++ (* cmpl *)
+ exploit transl_cond_int64s_correct; eauto. simpl. intros (rs' & A & B & C).
+ exists rs'; repeat split; eauto. rewrite MKTOT; eauto.
++ (* cmplu *)
+ exploit transl_cond_int64u_correct; eauto. simpl. intros (rs' & A & B & C).
+ exists rs'; repeat split; eauto. rewrite B, MKTOT; eauto.
+ eapply Val.mxcmplu_correct.
++ (* cmplimm *)
+ exploit transl_condimm_int64s_correct; eauto. instantiate (1 := x); eauto with asmgen. simpl.
+ intros (rs' & A & B & C).
+ exists rs'; repeat split; eauto. rewrite MKTOT; eauto.
++ (* cmpluimm *)
+ exploit transl_condimm_int64u_correct; eauto. instantiate (1 := x); eauto with asmgen. simpl.
+ intros (rs' & A & B & C).
+ exists rs'; repeat split; eauto. rewrite MKTOT; eauto.
++ (* cmpfloat *)
+ exploit transl_cond_float64_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto.
++ (* cmpnosingle *)
+ exploit transl_cond_nofloat64_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto.
++ (* cmpsingle *)
+ exploit transl_cond_float32_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto.
++ (* cmpnosingle *)
+ exploit transl_cond_nofloat32_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto.
+Qed.
+
+(* Translation of arithmetic operations *)
+
+Ltac SimplEval H :=
+ match type of H with
+ | Some _ = None _ => discriminate
+ | Some _ = Some _ => inv H
+ | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity)
+end.
+
+Ltac TranslOpSimpl :=
+ econstructor; split;
+ [ apply exec_straight_one; reflexivity
+ | split; [ apply Val.lessdef_same; simpl; Simpl; fail | intros; simpl; Simpl; fail ] ].
+
+Lemma int_eq_comm:
+ forall (x y: int),
+ (Int.eq x y) = (Int.eq y x).
+Proof.
+ intros.
+ unfold Int.eq.
+ unfold zeq.
+ destruct (Z.eq_dec _ _); destruct (Z.eq_dec _ _); congruence.
+Qed.
+
+Lemma int64_eq_comm:
+ forall (x y: int64),
+ (Int64.eq x y) = (Int64.eq y x).
+Proof.
+ intros.
+ unfold Int64.eq.
+ unfold zeq.
+ destruct (Z.eq_dec _ _); destruct (Z.eq_dec _ _); congruence.
+Qed.
+
+Lemma select_same_lessdef:
+ forall ty c v,
+ Val.lessdef (Val.select c v v ty) v.
+Proof.
+ intros.
+ unfold Val.select.
+ destruct c; try econstructor.
+ replace (if b then v else v) with v by (destruct b ; trivial).
+ destruct v; destruct ty; simpl; econstructor.
+Qed.
+
+Lemma if_neg : forall X,
+ forall a,
+ forall b c : X,
+ (if (negb a) then b else c) = (if a then c else b).
+Proof.
+ destruct a; reflexivity.
+Qed.
+
+Lemma int_ltu_to_neq:
+ forall x,
+ Int.ltu Int.zero x = negb (Int.eq x Int.zero).
+Proof.
+ intros.
+ unfold Int.ltu, Int.eq.
+ change (Int.unsigned Int.zero) with 0.
+ pose proof (Int.unsigned_range x) as RANGE.
+ unfold zlt, zeq.
+ destruct (Z_lt_dec _ _); destruct (Z.eq_dec _ _); trivial; lia.
+Qed.
+
+Lemma int64_ltu_to_neq:
+ forall x,
+ Int64.ltu Int64.zero x = negb (Int64.eq x Int64.zero).
+Proof.
+ intros.
+ unfold Int64.ltu, Int64.eq.
+ change (Int64.unsigned Int64.zero) with 0.
+ pose proof (Int64.unsigned_range x) as RANGE.
+ unfold zlt, zeq.
+ destruct (Z_lt_dec _ _); destruct (Z.eq_dec _ _); trivial; lia.
+Qed.
+
+Ltac splitall := repeat match goal with |- _ /\ _ => split end.
+
+Lemma transl_op_correct:
+ forall op args res k (rs: regset) m v c,
+ transl_op op args res k = OK c ->
+ eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ Val.lessdef v rs'#(preg_of res)
+ /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
+Proof.
+ assert (SAME: forall v1 v2, v1 = v2 -> Val.lessdef v2 v1). { intros; subst; auto. }
+Opaque Int.eq.
+ intros until c; intros TR EV.
+ unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl.
+- (* Omove *)
+ destruct (preg_of res), (preg_of m0); inv TR; TranslOpSimpl.
+- (* Oaddrsymbol *)
+ destruct (Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)).
++ set (rs1 := (rs#x <- (Genv.symbol_address ge id Ptrofs.zero))).
+ exploit (addptrofs_correct x x ofs (basics_to_code k) rs1 m); eauto with asmgen.
+ intros (rs2 & A & B & C).
+ exists rs2; split.
+ apply exec_straight_step with rs1 m; auto.
+ split. replace ofs with (Ptrofs.add Ptrofs.zero ofs) by (apply Ptrofs.add_zero_l).
+ rewrite Genv.shift_symbol_address.
+ replace (rs1 x) with (Genv.symbol_address ge id Ptrofs.zero) in B by (unfold rs1; Simpl).
+ exact B.
+ intros. rewrite C by eauto with asmgen. unfold rs1; Simpl.
++ TranslOpSimpl.
+- (* Oaddrstack *)
+ exploit addptrofs_correct. instantiate (1 := SP); auto with asmgen. intros (rs' & A & B & C).
+ exists rs'; split; eauto. auto with asmgen.
+- (* Ocast8signed *)
+ econstructor; split.
+ eapply exec_straight_two. simpl;eauto. simpl;eauto.
+ repeat split; intros; simpl; Simpl.
+ assert (A: Int.ltu (Int.repr 24) Int.iwordsize = true) by auto.
+ destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A.
+ apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. compute; intuition congruence.
+- (* Ocast16signed *)
+ econstructor; split.
+ eapply exec_straight_two. simpl;eauto. simpl;eauto.
+ repeat split; intros; Simpl.
+ assert (A: Int.ltu (Int.repr 16) Int.iwordsize = true) by auto.
+ destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A.
+ apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. compute; intuition congruence.
+- (* Oshrximm *)
+ econstructor; split.
+ + apply exec_straight_one. simpl. eauto.
+ + repeat split.
+ * rewrite Pregmap.gss.
+ destruct (rs x0); simpl; trivial.
+ unfold Val.maketotal.
+ destruct (Int.ltu _ _); simpl; trivial.
+ * intros.
+ rewrite Pregmap.gso; trivial.
+- (* Oshrxlimm *)
+ econstructor; split.
+ + apply exec_straight_one. simpl. eauto.
+ + repeat split.
+ * rewrite Pregmap.gss.
+ destruct (rs x0); simpl; trivial.
+ unfold Val.maketotal.
+ destruct (Int.ltu _ _); simpl; trivial.
+ * intros.
+ rewrite Pregmap.gso; trivial.
+
+- (* Ocmp *)
+ exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C).
+ exists rs'; repeat split; eauto with asmgen.
+
+- (* Osel *)
+ unfold conditional_move in *.
+ destruct (ireg_eq _ _).
+ {
+ subst x. inv EQ2.
+ econstructor; split.
+ {
+ apply exec_straight_one.
+ simpl. reflexivity.
+ }
+ split.
+ { apply select_same_lessdef. }
+ intros; trivial.
+ }
+
+ destruct c0; simpl in *.
+
+ all: destruct c.
+ all: simpl in *.
+ all: inv EQ2.
+ all: econstructor; splitall.
+ all: try apply exec_straight_one.
+ all: intros; simpl; trivial.
+ all: unfold Val.select, cmove, cmoveu; simpl.
+ all: destruct (rs x1); simpl; trivial.
+ all: try rewrite int_ltu_to_neq.
+ all: try rewrite int64_ltu_to_neq.
+ all: try change (Int64.eq Int64.zero Int64.zero) with true.
+ all: try destruct Archi.ptr64.
+ all: try rewrite Pregmap.gss.
+ all: repeat rewrite if_neg.
+ all: simpl.
+ all: try destruct (_ || _).
+ all: try apply Val.lessdef_normalize.
+ all: trivial. (* no more lessdef *)
+ all: apply Pregmap.gso; congruence.
+
+- (* Oselimm *)
+ unfold conditional_move_imm32 in *.
+ destruct c0; simpl in *.
+
+ all: destruct c.
+ all: simpl in *.
+ all: inv EQ0.
+ all: econstructor; splitall.
+ all: try apply exec_straight_one.
+ all: intros; simpl; trivial.
+ all: unfold Val.select, cmove, cmoveu; simpl.
+ all: destruct (rs x0); simpl; trivial.
+ all: try rewrite int_ltu_to_neq.
+ all: try rewrite int64_ltu_to_neq.
+ all: try change (Int64.eq Int64.zero Int64.zero) with true.
+ all: try destruct Archi.ptr64.
+ all: try rewrite Pregmap.gss.
+ all: repeat rewrite if_neg.
+ all: simpl.
+ all: try destruct (_ || _).
+ all: try apply Val.lessdef_normalize.
+ all: trivial. (* no more lessdef *)
+ all: apply Pregmap.gso; congruence.
+
+- (* Osellimm *)
+ unfold conditional_move_imm64 in *.
+ destruct c0; simpl in *.
+
+ all: destruct c.
+ all: simpl in *.
+ all: inv EQ0.
+ all: econstructor; splitall.
+ all: try apply exec_straight_one.
+ all: intros; simpl; trivial.
+ all: unfold Val.select, cmove, cmoveu; simpl.
+ all: destruct (rs x0); simpl; trivial.
+ all: try rewrite int_ltu_to_neq.
+ all: try rewrite int64_ltu_to_neq.
+ all: try change (Int64.eq Int64.zero Int64.zero) with true.
+ all: try destruct Archi.ptr64.
+ all: try rewrite Pregmap.gss.
+ all: repeat rewrite if_neg.
+ all: simpl.
+ all: try destruct (_ || _).
+ all: try apply Val.lessdef_normalize.
+ all: trivial. (* no more lessdef *)
+ all: apply Pregmap.gso; congruence.
+Qed.
+
+(** Memory accesses *)
+
+Lemma indexed_memory_access_correct:
+ forall mk_instr base ofs k rs m,
+ exists base' ofs' rs' ptr',
+ exec_straight_opt (indexed_memory_access mk_instr base ofs ::g k) rs m
+ (mk_instr base' ofs' ::g k) rs' m
+ /\ eval_offset ofs' = OK ptr'
+ /\ Val.offset_ptr rs'#base' ptr' = Val.offset_ptr rs#base ofs
+ /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+ unfold indexed_memory_access; intros.
+ (* destruct Archi.ptr64 eqn:SF. *)
+ assert (Archi.ptr64 = true) as SF; auto.
+- generalize (make_immed64_sound (Ptrofs.to_int64 ofs)); intros EQ.
+ destruct (make_immed64 (Ptrofs.to_int64 ofs)).
++ econstructor; econstructor; econstructor; econstructor; split.
+ apply exec_straight_opt_refl.
+ split; auto. simpl. subst imm. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
+Qed.
+
+
+Lemma indexed_load_access_correct:
+ forall trap chunk (mk_instr: ireg -> offset -> basic) rd m,
+ (forall base ofs rs,
+ exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset trap chunk rs m rd base ofs) ->
+ forall (base: ireg) ofs k (rs: regset) v,
+ Mem.loadv chunk m (Val.offset_ptr rs#base ofs) = Some v ->
+ exists rs',
+ exec_straight ge (indexed_memory_access mk_instr base ofs ::g k) rs m k rs' m
+ /\ rs'#rd = v
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until m; intros EXEC; intros until v; intros LOAD.
+ exploit indexed_memory_access_correct; eauto.
+ intros (base' & ofs' & rs' & ptr' & A & PtrEq & B & C).
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one. rewrite EXEC.
+ unfold exec_load_offset. unfold parexec_load_offset. rewrite PtrEq. rewrite B, LOAD. eauto. Simpl.
+ split; intros; Simpl. auto.
+Qed.
+
+Lemma indexed_store_access_correct:
+ forall chunk (mk_instr: ireg -> offset -> basic) r1 m,
+ (forall base ofs rs,
+ exec_basic_instr ge (mk_instr base ofs) rs m = exec_store_offset chunk rs m r1 base ofs) ->
+ forall (base: ireg) ofs k (rs: regset) m',
+ Mem.storev chunk m (Val.offset_ptr rs#base ofs) (rs#r1) = Some m' ->
+ exists rs',
+ exec_straight ge (indexed_memory_access mk_instr base ofs ::g k) rs m k rs' m'
+ /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+ intros until m; intros EXEC; intros until m'; intros STORE.
+ exploit indexed_memory_access_correct. (* instantiate (1 := base). eauto. *)
+ intros (base' & ofs' & rs' & ptr' & A & PtrEq & B & C).
+ econstructor; split.
+ eapply exec_straight_opt_right. eapply A. apply exec_straight_one. rewrite EXEC.
+ unfold exec_store_offset. unfold parexec_store_offset. rewrite PtrEq. rewrite B, C, STORE.
+ eauto.
+ discriminate.
+ auto.
+Qed.
+
+Lemma loadind_correct:
+ forall (base: ireg) ofs ty dst k c (rs: regset) m v,
+ loadind base ofs ty dst k = OK c ->
+ Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ rs'#(preg_of dst) = v
+ /\ forall r, r <> PC -> r <> preg_of dst -> rs'#r = rs#r.
+Proof.
+ intros until v; intros TR LOAD.
+ assert (A: exists mk_instr rd,
+ preg_of dst = IR rd
+ /\ c = indexed_memory_access mk_instr base ofs :: k
+ /\ forall base' ofs' rs',
+ exec_basic_instr ge (mk_instr base' ofs') rs' m =
+ exec_load_offset TRAP (chunk_of_type ty) rs' m rd base' ofs').
+ { unfold loadind in TR.
+ destruct ty, (preg_of dst); inv TR; econstructor; esplit; eauto. }
+ destruct A as (mk_instr & rd & rdEq & B & C). subst c. rewrite rdEq.
+ eapply indexed_load_access_correct; eauto with asmgen.
+Qed.
+
+Lemma storeind_correct:
+ forall (base: ireg) ofs ty src k c (rs: regset) m m',
+ storeind src base ofs ty k = OK c ->
+ Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m'
+ /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+ intros until m'; intros TR STORE.
+ assert (A: exists mk_instr rr,
+ preg_of src = IR rr
+ /\ c = indexed_memory_access mk_instr base ofs :: k
+ /\ forall base' ofs' rs',
+ exec_basic_instr ge (mk_instr base' ofs') rs' m =
+ exec_store_offset (chunk_of_type ty) rs' m rr base' ofs').
+ { unfold storeind in TR. destruct ty, (preg_of src); inv TR; econstructor; esplit; eauto. }
+ destruct A as (mk_instr & rr & rsEq & B & C). subst c.
+ eapply indexed_store_access_correct; eauto with asmgen.
+ congruence.
+Qed.
+
+Ltac bsimpl := unfold exec_bblock; simpl.
+
+Lemma Pget_correct:
+ forall (dst: gpreg) (src: preg) k (rs: regset) m,
+ src = RA ->
+ exists rs',
+ exec_straight ge (Pget dst src ::g k) rs m k rs' m
+ /\ rs'#dst = rs#src
+ /\ forall r, r <> PC -> r <> dst -> rs'#r = rs#r.
+Proof.
+ intros. econstructor; econstructor; econstructor.
+- rewrite H. bsimpl. auto.
+- Simpl.
+- intros. Simpl.
+Qed.
+
+Lemma Pset_correct:
+ forall (dst: preg) (src: gpreg) k (rs: regset) m,
+ dst = RA ->
+ exists rs',
+ exec_straight ge (Pset dst src ::g k) rs m k rs' m
+ /\ rs'#dst = rs#src
+ /\ forall r, r <> PC -> r <> dst -> rs'#r = rs#r.
+Proof.
+ intros. econstructor; econstructor; econstructor; simpl.
+ rewrite H. auto.
+ Simpl.
+ Simpl.
+ intros. rewrite H. Simpl.
+Qed.
+
+Lemma loadind_ptr_correct:
+ forall (base: ireg) ofs (dst: ireg) k (rs: regset) m v,
+ Mem.loadv Mptr m (Val.offset_ptr rs#base ofs) = Some v ->
+ exists rs',
+ exec_straight ge (loadind_ptr base ofs dst ::g k) rs m k rs' m
+ /\ rs'#dst = v
+ /\ forall r, r <> PC -> r <> dst -> rs'#r = rs#r.
+Proof.
+ intros. eapply indexed_load_access_correct; eauto with asmgen.
+ intros. unfold Mptr. assert (Archi.ptr64 = true). auto. rewrite H0.
+ instantiate (1 := TRAP).
+ auto.
+Qed.
+
+Lemma storeind_ptr_correct:
+ forall (base: ireg) ofs (src: ireg) k (rs: regset) m m',
+ Mem.storev Mptr m (Val.offset_ptr rs#base ofs) rs#src = Some m' ->
+ exists rs',
+ exec_straight ge (storeind_ptr src base ofs ::g k) rs m k rs' m'
+ /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+ intros. eapply indexed_store_access_correct with (r1 := src); eauto with asmgen.
+ intros. unfold Mptr. assert (Archi.ptr64 = true); auto.
+Qed.
+
+Lemma transl_memory_access_correct:
+ forall mk_instr addr args k c (rs: regset) m v,
+ transl_memory_access mk_instr addr args k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+ exists base ofs rs' ptr,
+ exec_straight_opt (basics_to_code c) rs m (mk_instr base ofs ::g (basics_to_code k)) rs' m
+ /\ eval_offset ofs = OK ptr
+ /\ Val.offset_ptr rs'#base ptr = v
+ /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros until v; intros TR EV.
+ unfold transl_memory_access in TR; destruct addr; ArgsInv.
+- (* indexed *)
+ inv EV. exploit indexed_memory_access_correct; eauto. intros (base' & ofs' & rs' & ptr' & EOPT & EVALOFF & VALOFF & RSEQ).
+ eexists; eexists; eexists; eexists. split; try split; try split.
+ eapply EOPT. unfold eval_offset in EVALOFF. inv EVALOFF. eauto.
+ { intros. destruct r; rewrite RSEQ; auto. }
+- (* global *)
+ simpl in EV. inv EV. inv TR. econstructor; econstructor; econstructor; econstructor; split.
+ constructor. apply exec_straight_one. simpl; eauto. auto.
+ split; split; intros; Simpl.
+ assert (Val.lessdef (Val.offset_ptr (Genv.symbol_address ge i i0) Ptrofs.zero) (Genv.symbol_address ge i i0)).
+ { apply Val.offset_ptr_zero. }
+ remember (Genv.symbol_address ge i i0) as symbol.
+ destruct symbol; auto.
+ + contradict Heqsymbol; unfold Genv.symbol_address.
+ destruct (Genv.find_symbol ge i); discriminate.
+ + contradict Heqsymbol; unfold Genv.symbol_address;
+ destruct (Genv.find_symbol ge i); discriminate.
+ + contradict Heqsymbol; unfold Genv.symbol_address;
+ destruct (Genv.find_symbol ge i); discriminate.
+ + contradict Heqsymbol; unfold Genv.symbol_address;
+ destruct (Genv.find_symbol ge i); discriminate.
+ + simpl. rewrite Ptrofs.add_zero; auto.
+- (* stack *)
+ inv TR. inv EV.
+ exploit indexed_memory_access_correct; eauto. intros (base' & ofs' & rs' & ptr' & EOPT & EVALOFF & VALOFF & RSEQ).
+ eexists; eexists; eexists; eexists. split; try split; try split.
+ eapply EOPT. unfold eval_offset in EVALOFF. inv EVALOFF. eauto.
+ { intros. destruct r; rewrite RSEQ; auto. }
+Qed.
+
+Lemma transl_memory_access2_correct:
+ forall mk_instr addr args k c (rs: regset) m v,
+ transl_memory_access2 mk_instr addr args k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+ exists base ro mro mr1 rs',
+ args = mr1 :: mro :: nil
+ /\ ireg_of mro = OK ro
+ /\ exec_straight_opt (basics_to_code c) rs m (mk_instr base ro ::g (basics_to_code k)) rs' m
+ /\ Val.addl rs'#base rs'#ro = v
+ /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros until v; intros TR EV.
+ unfold transl_memory_access2 in TR; destruct addr; ArgsInv.
+ inv EV. repeat eexists. eassumption. econstructor; eauto.
+Qed.
+
+Lemma transl_memory_access2XS_correct:
+ forall chunk mk_instr (scale : Z) args k c (rs: regset) m v,
+ transl_memory_access2XS chunk mk_instr scale args k = OK c ->
+ eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v ->
+ exists base ro mro mr1 rs',
+ args = mr1 :: mro :: nil
+ /\ ireg_of mro = OK ro
+ /\ exec_straight_opt (basics_to_code c) rs m (mk_instr base ro ::g (basics_to_code k)) rs' m
+ /\ Val.addl rs'#base (Val.shll rs'#ro (Vint (Int.repr scale))) = v
+ /\ (forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r)
+ /\ scale = (zscale_of_chunk chunk).
+Proof.
+ intros until v; intros TR EV.
+ unfold transl_memory_access2XS in TR; ArgsInv.
+ inv EV. repeat eexists. eassumption. econstructor; eauto.
+ symmetry.
+ apply Z.eqb_eq.
+ assumption.
+Qed.
+
+Lemma transl_load_access2_correct:
+ forall trap chunk (mk_instr: ireg -> ireg -> basic) addr args k c rd (rs: regset) m v mro mr1 ro v',
+ args = mr1 :: mro :: nil ->
+ ireg_of mro = OK ro ->
+ (forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg trap chunk rs m rd base ro) ->
+ transl_memory_access2 mk_instr addr args k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+ Mem.loadv chunk m v = Some v' ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ rs'#rd = v'
+ /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until v'; intros ARGS IREGE INSTR TR EV LOAD.
+ exploit transl_memory_access2_correct; eauto.
+ intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS.
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2.
+ rewrite INSTR. unfold exec_load_reg. unfold parexec_load_reg. rewrite B, LOAD. reflexivity. Simpl.
+ split; intros; Simpl. auto.
+Qed.
+
+Lemma transl_load_access2_correct_notrap2:
+ forall chunk (mk_instr: ireg -> ireg -> basic) addr args k c rd (rs: regset) m v mro mr1 ro,
+ args = mr1 :: mro :: nil ->
+ ireg_of mro = OK ro ->
+ (forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg NOTRAP chunk rs m rd base ro) ->
+ transl_memory_access2 mk_instr addr args k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+ Mem.loadv chunk m v = None ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ rs'#rd = Vundef
+ /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until ro; intros ARGS IREGE INSTR TR EV LOAD.
+ exploit transl_memory_access2_correct; eauto.
+ intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS.
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2.
+ rewrite INSTR. unfold exec_load_reg. unfold parexec_load_reg. rewrite B, LOAD. reflexivity. Simpl.
+ split; intros; Simpl. auto.
+Qed.
+
+Lemma transl_load_access2XS_correct:
+ forall trap chunk (mk_instr: ireg -> ireg -> basic) (scale : Z) args k c rd (rs: regset) m v mro mr1 ro v',
+ args = mr1 :: mro :: nil ->
+ ireg_of mro = OK ro ->
+ (forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs trap chunk rs m rd base ro) ->
+ transl_memory_access2XS chunk mk_instr scale args k = OK c ->
+ eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v ->
+ Mem.loadv chunk m v = Some v' ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ rs'#rd = v'
+ /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until v'; intros ARGS IREGE INSTR TR EV LOAD.
+ exploit transl_memory_access2XS_correct; eauto.
+ intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C & D). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS.
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2.
+ rewrite INSTR. unfold exec_load_regxs. unfold parexec_load_regxs.
+ unfold scale_of_chunk.
+ subst scale.
+ rewrite B, LOAD. reflexivity. Simpl.
+ split. trivial. intros. Simpl.
+Qed.
+
+Lemma transl_load_access2XS_correct_notrap2:
+ forall chunk (mk_instr: ireg -> ireg -> basic) (scale : Z) args k c rd (rs: regset) m v mro mr1 ro,
+ args = mr1 :: mro :: nil ->
+ ireg_of mro = OK ro ->
+ (forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs NOTRAP chunk rs m rd base ro) ->
+ transl_memory_access2XS chunk mk_instr scale args k = OK c ->
+ eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v ->
+ Mem.loadv chunk m v = None ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ rs'#rd = Vundef
+ /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until ro; intros ARGS IREGE INSTR TR EV LOAD.
+ exploit transl_memory_access2XS_correct; eauto.
+ intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C & D). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS.
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2.
+ rewrite INSTR. unfold exec_load_regxs. unfold parexec_load_regxs.
+ unfold scale_of_chunk.
+ subst scale.
+ rewrite B, LOAD. reflexivity. Simpl.
+ split. trivial. intros. Simpl.
+Qed.
+
+Lemma transl_load_access_correct:
+ forall trap chunk (mk_instr: ireg -> offset -> basic) addr args k c rd (rs: regset) m v v',
+ (forall base ofs rs,
+ exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset trap chunk rs m rd base ofs) ->
+ transl_memory_access mk_instr addr args k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+ Mem.loadv chunk m v = Some v' ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ rs'#rd = v'
+ /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until v'; intros INSTR TR EV LOAD.
+ exploit transl_memory_access_correct; eauto.
+ intros (base & ofs & rs' & ptr & A & PtrEq & B & C).
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one.
+ rewrite INSTR. unfold exec_load_offset. unfold parexec_load_offset. rewrite PtrEq, B, LOAD. reflexivity. Simpl.
+ split; intros; Simpl. auto.
+Qed.
+
+Lemma transl_load_access_correct_notrap2:
+ forall chunk (mk_instr: ireg -> offset -> basic) addr args k c rd (rs: regset) m v,
+ (forall base ofs rs,
+ exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset NOTRAP chunk rs m rd base ofs) ->
+ transl_memory_access mk_instr addr args k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+ Mem.loadv chunk m v = None ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ rs'#rd = Vundef
+ /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until v; intros INSTR TR EV LOAD.
+ exploit transl_memory_access_correct; eauto.
+ intros (base & ofs & rs' & ptr & A & PtrEq & B & C).
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one.
+ rewrite INSTR. unfold exec_load_offset. unfold parexec_load_offset. rewrite PtrEq, B, LOAD. reflexivity. Simpl.
+ split. trivial. intros. Simpl.
+Qed.
+
+Lemma transl_load_memory_access_ok:
+ forall addr trap chunk args dst k c rs a v m,
+ (match addr with Aindexed2XS _ | Aindexed2 => False | _ => True end) ->
+ transl_load trap chunk addr args dst k = OK c ->
+ eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a ->
+ Mem.loadv chunk m a = Some v ->
+ exists mk_instr rd,
+ preg_of dst = IR rd
+ /\ transl_memory_access mk_instr addr args k = OK c
+ /\ forall base ofs rs,
+ exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset trap chunk rs m rd base ofs.
+Proof.
+ intros until m. intros ADDR TR ? ?.
+ unfold transl_load in TR. destruct addr; try contradiction.
+ - monadInv TR. destruct chunk; ArgsInv; econstructor; (esplit; eauto).
+ - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split;
+ [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity
+ | eauto ].
+ - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split;
+ [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity
+ | eauto ].
+Qed.
+
+Lemma transl_load_memory_access_ok_notrap2:
+ forall addr chunk args dst k c rs a m,
+ (match addr with Aindexed2XS _ | Aindexed2 => False | _ => True end) ->
+ transl_load NOTRAP chunk addr args dst k = OK c ->
+ eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a ->
+ Mem.loadv chunk m a = None ->
+ exists mk_instr rd,
+ preg_of dst = IR rd
+ /\ transl_memory_access mk_instr addr args k = OK c
+ /\ forall base ofs rs,
+ exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset NOTRAP chunk rs m rd base ofs.
+Proof.
+ intros until m. intros ADDR TR ? ?.
+ unfold transl_load in TR. destruct addr; try contradiction.
+ - monadInv TR. destruct chunk; ArgsInv; econstructor; (esplit; eauto).
+ - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split;
+ [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity
+ | eauto ].
+ - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split;
+ [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity
+ | eauto ].
+Qed.
+
+Lemma transl_load_memory_access2_ok:
+ forall trap chunk args dst k c rs a v m,
+ transl_load trap chunk Aindexed2 args dst k = OK c ->
+ eval_addressing ge (rs (IR SP)) Aindexed2 (map rs (map preg_of args)) = Some a ->
+ Mem.loadv chunk m a = Some v ->
+ exists mk_instr mr0 mro rd ro,
+ args = mr0 :: mro :: nil
+ /\ preg_of dst = IR rd
+ /\ preg_of mro = IR ro
+ /\ transl_memory_access2 mk_instr Aindexed2 args k = OK c
+ /\ forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg trap chunk rs m rd base ro.
+Proof.
+ intros until m. intros TR ? ?.
+ unfold transl_load in TR. subst. monadInv TR. destruct chunk. all:
+ unfold transl_memory_access2 in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
+ [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity
+ | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRR _ _ x)); simpl; reflexivity
+ | eauto].
+Qed.
+
+
+Lemma transl_load_memory_access2_ok_notrap2:
+ forall chunk args dst k c rs a m,
+ transl_load NOTRAP chunk Aindexed2 args dst k = OK c ->
+ eval_addressing ge (rs (IR SP)) Aindexed2 (map rs (map preg_of args)) = Some a ->
+ Mem.loadv chunk m a = None ->
+ exists mk_instr mr0 mro rd ro,
+ args = mr0 :: mro :: nil
+ /\ preg_of dst = IR rd
+ /\ preg_of mro = IR ro
+ /\ transl_memory_access2 mk_instr Aindexed2 args k = OK c
+ /\ forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg NOTRAP chunk rs m rd base ro.
+Proof.
+ intros until m. intros TR ? ?.
+ unfold transl_load in TR. subst. monadInv TR. destruct chunk. all:
+ unfold transl_memory_access2 in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
+ [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity
+ | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRR _ _ x)); simpl; reflexivity
+ | eauto].
+Qed.
+
+Lemma transl_load_memory_access2XS_ok:
+ forall scale trap chunk args dst k c rs a v m,
+ transl_load trap chunk (Aindexed2XS scale) args dst k = OK c ->
+ eval_addressing ge (rs (IR SP)) (Aindexed2XS scale) (map rs (map preg_of args)) = Some a ->
+ Mem.loadv chunk m a = Some v ->
+ exists mk_instr mr0 mro rd ro,
+ args = mr0 :: mro :: nil
+ /\ preg_of dst = IR rd
+ /\ preg_of mro = IR ro
+ /\ transl_memory_access2XS chunk mk_instr scale args k = OK c
+ /\ forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs trap chunk rs m rd base ro.
+Proof.
+ intros until m. intros TR ? ?.
+ unfold transl_load in TR. subst. monadInv TR. destruct chunk. all:
+ unfold transl_memory_access2XS in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
+ [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity
+ | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRRXS _ _ x)); simpl; rewrite Heqb; eauto
+ | eauto].
+Qed.
+
+
+Lemma transl_load_memory_access2XS_ok_notrap2:
+ forall scale chunk args dst k c rs a m,
+ transl_load NOTRAP chunk (Aindexed2XS scale) args dst k = OK c ->
+ eval_addressing ge (rs (IR SP)) (Aindexed2XS scale) (map rs (map preg_of args)) = Some a ->
+ Mem.loadv chunk m a = None ->
+ exists mk_instr mr0 mro rd ro,
+ args = mr0 :: mro :: nil
+ /\ preg_of dst = IR rd
+ /\ preg_of mro = IR ro
+ /\ transl_memory_access2XS chunk mk_instr scale args k = OK c
+ /\ forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs NOTRAP chunk rs m rd base ro.
+Proof.
+ intros until m. intros TR ? ?.
+ unfold transl_load in TR. subst. monadInv TR. destruct chunk. all:
+ unfold transl_memory_access2XS in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
+ [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity
+ | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRRXS _ _ x)); simpl; rewrite Heqb; eauto
+ | eauto].
+Qed.
+
+Lemma transl_load_correct:
+ forall trap chunk addr args dst k c (rs: regset) m a v,
+ transl_load trap chunk addr args dst k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a ->
+ Mem.loadv chunk m a = Some v ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ rs'#(preg_of dst) = v
+ /\ forall r, r <> PC -> r <> RTMP -> r <> preg_of dst -> rs'#r = rs#r.
+Proof.
+ intros until v; intros TR EV LOAD. destruct addr.
+ - exploit transl_load_memory_access2XS_ok; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C).
+ rewrite rdEq. eapply transl_load_access2XS_correct; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity.
+ - exploit transl_load_memory_access2_ok; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C).
+ rewrite rdEq. eapply transl_load_access2_correct; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity.
+ - exploit transl_load_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity).
+ intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq;
+ eapply transl_load_access_correct; eauto with asmgen.
+ - exploit transl_load_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity).
+ intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq;
+ eapply transl_load_access_correct; eauto with asmgen.
+ - exploit transl_load_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity).
+ intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq;
+ eapply transl_load_access_correct; eauto with asmgen.
+Qed.
+
+Lemma transl_load_correct_notrap2:
+ forall chunk addr args dst k c (rs: regset) m a,
+ transl_load NOTRAP chunk addr args dst k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a ->
+ Mem.loadv chunk m a = None ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
+ /\ rs'#(preg_of dst) = Vundef
+ /\ forall r, r <> PC -> r <> RTMP -> r <> preg_of dst -> rs'#r = rs#r.
+Proof.
+ intros until a; intros TR EV LOAD. destruct addr.
+ - exploit transl_load_memory_access2XS_ok_notrap2; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C).
+ rewrite rdEq. eapply transl_load_access2XS_correct_notrap2; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity.
+ - exploit transl_load_memory_access2_ok_notrap2; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C).
+ rewrite rdEq. eapply transl_load_access2_correct_notrap2; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity.
+ - exploit transl_load_memory_access_ok_notrap2; eauto; try discriminate; try (simpl; reflexivity).
+ intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq;
+ eapply transl_load_access_correct_notrap2; eauto with asmgen.
+ - exploit transl_load_memory_access_ok_notrap2; eauto; try discriminate; try (simpl; reflexivity).
+ intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq;
+ eapply transl_load_access_correct_notrap2; eauto with asmgen.
+ - exploit transl_load_memory_access_ok_notrap2; eauto; try discriminate; try (simpl; reflexivity).
+ intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq;
+ eapply transl_load_access_correct_notrap2; eauto with asmgen.
+Qed.
+
+Lemma transl_store_access2_correct:
+ forall chunk (mk_instr: ireg -> ireg -> basic) addr args k c r1 (rs: regset) m v mr1 mro ro m',
+ args = mr1 :: mro :: nil ->
+ ireg_of mro = OK ro ->
+ (forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_store_reg chunk rs m r1 base ro) ->
+ transl_memory_access2 mk_instr addr args k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+ Mem.storev chunk m v rs#r1 = Some m' ->
+ r1 <> RTMP ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m'
+ /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros until m'; intros ARGS IREG INSTR TR EV STORE NOT31.
+ exploit transl_memory_access2_correct; eauto.
+ intros (base & ro2 & mr2 & mro2 & rs' & ARGSS & IREGG & A & B & C). rewrite ARGSS in ARGS. inversion ARGS. subst mro2 mr2. clear ARGS.
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2.
+ rewrite INSTR. unfold exec_store_reg. unfold parexec_store_reg. rewrite B. rewrite C; try discriminate. rewrite STORE. auto.
+ intro. inv H. contradiction.
+ auto.
+Qed.
+
+Lemma transl_store_access2XS_correct:
+ forall chunk (mk_instr: ireg -> ireg -> basic) scale args k c r1 (rs: regset) m v mr1 mro ro m',
+ args = mr1 :: mro :: nil ->
+ ireg_of mro = OK ro ->
+ (forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_store_regxs chunk rs m r1 base ro) ->
+ transl_memory_access2XS chunk mk_instr scale args k = OK c ->
+ eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v ->
+ Mem.storev chunk m v rs#r1 = Some m' ->
+ r1 <> RTMP ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m'
+ /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros until m'; intros ARGS IREG INSTR TR EV STORE NOT31.
+ exploit transl_memory_access2XS_correct; eauto.
+ intros (base & ro2 & mr2 & mro2 & rs' & ARGSS & IREGG & A & B & C & D). rewrite ARGSS in ARGS. inversion ARGS. subst mro2 mr2. clear ARGS.
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2.
+ rewrite INSTR. unfold exec_store_regxs. unfold parexec_store_regxs.
+ unfold scale_of_chunk.
+ subst scale.
+ rewrite B. rewrite C; try discriminate. rewrite STORE. auto.
+ intro. inv H. contradiction.
+ auto.
+Qed.
+
+Lemma transl_store_access_correct:
+ forall chunk (mk_instr: ireg -> offset -> basic) addr args k c r1 (rs: regset) m v m',
+ (forall base ofs rs,
+ exec_basic_instr ge (mk_instr base ofs) rs m = exec_store_offset chunk rs m r1 base ofs) ->
+ transl_memory_access mk_instr addr args k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v ->
+ Mem.storev chunk m v rs#r1 = Some m' ->
+ r1 <> RTMP ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m'
+ /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros until m'; intros INSTR TR EV STORE NOT31.
+ exploit transl_memory_access_correct; eauto.
+ intros (base & ofs & rs' & ptr & A & PtrEq & B & C).
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A. apply exec_straight_one.
+ rewrite INSTR. unfold exec_store_offset. unfold parexec_store_offset. rewrite PtrEq, B. rewrite C; try discriminate. rewrite STORE. auto.
+ intro. inv H. contradiction.
+ auto.
+Qed.
+
+
+Remark exec_store_offset_8_sign rs m x base ofs:
+ exec_store_offset Mint8unsigned rs m x base ofs = exec_store_offset Mint8signed rs m x base ofs.
+Proof.
+ unfold exec_store_offset. unfold parexec_store_offset. unfold eval_offset; auto. unfold Mem.storev.
+ destruct (Val.offset_ptr _ _); auto. erewrite <- Mem.store_signed_unsigned_8. reflexivity.
+Qed.
+
+Remark exec_store_offset_16_sign rs m x base ofs:
+ exec_store_offset Mint16unsigned rs m x base ofs = exec_store_offset Mint16signed rs m x base ofs.
+Proof.
+ unfold exec_store_offset. unfold parexec_store_offset. unfold eval_offset; auto. unfold Mem.storev.
+ destruct (Val.offset_ptr _ _); auto. erewrite <- Mem.store_signed_unsigned_16. reflexivity.
+Qed.
+
+Lemma transl_store_memory_access_ok:
+ forall addr chunk args src k c rs a m m',
+ (match addr with Aindexed2XS _ | Aindexed2 => False | _ => True end) ->
+ transl_store chunk addr args src k = OK c ->
+ eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a ->
+ Mem.storev chunk m a (rs (preg_of src)) = Some m' ->
+ exists mk_instr chunk' rr,
+ preg_of src = IR rr
+ /\ transl_memory_access mk_instr addr args k = OK c
+ /\ (forall base ofs rs,
+ exec_basic_instr ge (mk_instr base ofs) rs m = exec_store_offset chunk' rs m rr base ofs)
+ /\ Mem.storev chunk m a rs#(preg_of src) = Mem.storev chunk' m a rs#(preg_of src).
+Proof.
+ intros until m'. intros ? TR ? ?.
+ unfold transl_store in TR. destruct addr; try contradiction.
+ - monadInv TR. destruct chunk. all:
+ ArgsInv; eexists; eexists; eexists; split; try split; [
+ repeat (destruct args; try discriminate); eassumption
+ | split; eauto; intros; simpl; try reflexivity].
+ eapply exec_store_offset_8_sign.
+ eapply exec_store_offset_16_sign.
+ - monadInv TR. destruct chunk. all:
+ ArgsInv; eexists; eexists; eexists; split; try split;
+ [ repeat (destruct args; try discriminate); instantiate (1 := PStoreRRO _ x); simpl; eassumption
+ | split; eauto; intros; simpl; try reflexivity].
+ eapply exec_store_offset_8_sign.
+ eapply exec_store_offset_16_sign.
+ - monadInv TR. destruct chunk. all:
+ ArgsInv; eexists; eexists; eexists; split; try split;
+ [ repeat (destruct args; try discriminate); instantiate (1 := PStoreRRO _ x); simpl; eassumption
+ | split; eauto; intros; simpl; try reflexivity].
+ eapply exec_store_offset_8_sign.
+ eapply exec_store_offset_16_sign.
+Qed.
+
+Remark exec_store_reg_8_sign rs m x base ofs:
+ exec_store_reg Mint8unsigned rs m x base ofs = exec_store_reg Mint8signed rs m x base ofs.
+Proof.
+ unfold exec_store_reg. unfold parexec_store_reg. unfold Mem.storev. destruct (Val.addl _ _); auto.
+ erewrite <- Mem.store_signed_unsigned_8. reflexivity.
+Qed.
+
+Remark exec_store_reg_16_sign rs m x base ofs:
+ exec_store_reg Mint16unsigned rs m x base ofs = exec_store_reg Mint16signed rs m x base ofs.
+Proof.
+ unfold exec_store_reg. unfold parexec_store_reg. unfold Mem.storev. destruct (Val.addl _ _); auto.
+ erewrite <- Mem.store_signed_unsigned_16. reflexivity.
+Qed.
+
+Remark exec_store_regxs_8_sign rs m x base ofs:
+ exec_store_regxs Mint8unsigned rs m x base ofs = exec_store_regxs Mint8signed rs m x base ofs.
+Proof.
+ unfold exec_store_regxs. unfold parexec_store_regxs. unfold Mem.storev. destruct (Val.addl _ _); auto.
+ erewrite <- Mem.store_signed_unsigned_8. reflexivity.
+Qed.
+
+Remark exec_store_regxs_16_sign rs m x base ofs:
+ exec_store_regxs Mint16unsigned rs m x base ofs = exec_store_regxs Mint16signed rs m x base ofs.
+Proof.
+ unfold exec_store_regxs. unfold parexec_store_regxs. unfold Mem.storev. destruct (Val.addl _ _); auto.
+ erewrite <- Mem.store_signed_unsigned_16. reflexivity.
+Qed.
+
+Lemma transl_store_memory_access2_ok:
+ forall addr chunk args src k c rs a m m',
+ addr = Aindexed2 ->
+ transl_store chunk addr args src k = OK c ->
+ eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a ->
+ Mem.storev chunk m a (rs (preg_of src)) = Some m' ->
+ exists mk_instr chunk' rr mr0 mro ro,
+ args = mr0 :: mro :: nil
+ /\ preg_of mro = IR ro
+ /\ preg_of src = IR rr
+ /\ transl_memory_access2 mk_instr addr args k = OK c
+ /\ (forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_store_reg chunk' rs m rr base ro)
+ /\ Mem.storev chunk m a rs#(preg_of src) = Mem.storev chunk' m a rs#(preg_of src).
+Proof.
+ intros until m'. intros ? TR ? ?.
+ unfold transl_store in TR. subst addr. monadInv TR. destruct chunk. all:
+ unfold transl_memory_access2 in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
+ [ ArgsInv; reflexivity
+ | rewrite EQ1; rewrite EQ0; instantiate (1 := (PStoreRRR _ x)); simpl; reflexivity
+ | eauto ].
+ - simpl. intros. eapply exec_store_reg_8_sign.
+ - simpl. intros. eapply exec_store_reg_16_sign.
+Qed.
+
+Lemma transl_store_memory_access2XS_ok:
+ forall scale chunk args src k c rs a m m',
+ transl_store chunk (Aindexed2XS scale) args src k = OK c ->
+ eval_addressing ge (rs (IR SP)) (Aindexed2XS scale) (map rs (map preg_of args)) = Some a ->
+ Mem.storev chunk m a (rs (preg_of src)) = Some m' ->
+ exists mk_instr chunk' rr mr0 mro ro,
+ args = mr0 :: mro :: nil
+ /\ preg_of mro = IR ro
+ /\ preg_of src = IR rr
+ /\ transl_memory_access2XS chunk' mk_instr scale args k = OK c
+ /\ (forall base rs,
+ exec_basic_instr ge (mk_instr base ro) rs m = exec_store_regxs chunk' rs m rr base ro)
+ /\ Mem.storev chunk m a rs#(preg_of src) = Mem.storev chunk' m a rs#(preg_of src).
+Proof.
+ intros until m'. intros TR ? ?.
+ unfold transl_store in TR. monadInv TR. destruct chunk. all:
+ unfold transl_memory_access2XS in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists;
+ [ ArgsInv; reflexivity
+ | rewrite EQ1; rewrite EQ0; instantiate (1 := (PStoreRRRXS _ x)); simpl; rewrite Heqb; eauto
+ | eauto ].
+ - simpl. intros. eapply exec_store_regxs_8_sign.
+ - simpl. intros. eapply exec_store_regxs_16_sign.
+Qed.
+
+Lemma transl_store_correct:
+ forall chunk addr args src k c (rs: regset) m a m',
+ transl_store chunk addr args src k = OK c ->
+ eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a ->
+ Mem.storev chunk m a rs#(preg_of src) = Some m' ->
+ exists rs',
+ exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m'
+ /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r.
+Proof.
+ intros until m'; intros TR EV STORE. destruct addr.
+ - exploit transl_store_memory_access2XS_ok; eauto. intros (mk_instr & chunk' & rr & mr0 & mro & ro & argsEq & roEq & srcEq & A & B & C).
+ eapply transl_store_access2XS_correct; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity. congruence.
+ destruct rr; try discriminate. destruct src; simpl in srcEq; try discriminate.
+ - exploit transl_store_memory_access2_ok; eauto. intros (mk_instr & chunk' & rr & mr0 & mro & ro & argsEq & roEq & srcEq & A & B & C).
+ eapply transl_store_access2_correct; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity. congruence.
+ destruct rr; try discriminate. destruct src; simpl in srcEq; try discriminate.
+ - exploit transl_store_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity).
+ intro A;
+ destruct A as (mk_instr & chunk' & rr & rrEq & B & C & D);
+ rewrite D in STORE; clear D;
+ eapply transl_store_access_correct; eauto with asmgen; try congruence;
+ destruct rr; try discriminate; destruct src; try discriminate.
+ - exploit transl_store_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity).
+ intro A;
+ destruct A as (mk_instr & chunk' & rr & rrEq & B & C & D);
+ rewrite D in STORE; clear D;
+ eapply transl_store_access_correct; eauto with asmgen; try congruence;
+ destruct rr; try discriminate; destruct src; try discriminate.
+ - exploit transl_store_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity).
+ intro A;
+ destruct A as (mk_instr & chunk' & rr & rrEq & B & C & D);
+ rewrite D in STORE; clear D;
+ eapply transl_store_access_correct; eauto with asmgen; try congruence;
+ destruct rr; try discriminate; destruct src; try discriminate.
+Qed.
+
+Lemma make_epilogue_correct:
+ forall ge0 f m stk soff cs m' ms rs k tm,
+ Mach.load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) ->
+ Mach.load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) ->
+ Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+ agree ms (Vptr stk soff) rs ->
+ Mem.extends m tm ->
+ match_stack ge0 cs ->
+ exists rs', exists tm',
+ exec_straight ge (make_epilogue f k) rs tm k rs' tm'
+ /\ agree ms (parent_sp cs) rs'
+ /\ Mem.extends m' tm'
+ /\ rs'#RA = parent_ra cs
+ /\ rs'#SP = parent_sp cs
+ /\ (forall r, r <> PC -> r <> RA -> r <> SP -> r <> RTMP -> r <> GPRA -> rs'#r = rs#r).
+Proof.
+ intros until tm; intros LP LRA FREE AG MEXT MCS.
+ exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
+ exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA').
+ exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
+ exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'.
+ exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
+ unfold make_epilogue.
+ rewrite chunk_of_Tptr in *.
+
+ exploit ((loadind_ptr_correct SP (fn_retaddr_ofs f) GPRA (Pset RA GPRA ::g Pfreeframe (fn_stacksize f) (fn_link_ofs f) ::g k))
+ rs tm).
+ - rewrite <- (sp_val _ _ rs AG). simpl. eexact LRA'.
+ - intros (rs1 & A1 & B1 & C1).
+ assert (agree ms (Vptr stk soff) rs1) as AG1.
+ + destruct AG.
+ apply mkagree; auto.
+ rewrite C1; discriminate || auto.
+ intro. rewrite C1; auto; destruct r; simpl; try discriminate.
+ + exploit (Pset_correct RA GPRA (Pfreeframe (fn_stacksize f) (fn_link_ofs f) ::g k) rs1 tm). auto.
+ intros (rs2 & A2 & B2 & C2).
+ econstructor; econstructor; split.
+ * eapply exec_straight_trans.
+ { eexact A1. }
+ { eapply exec_straight_trans.
+ { eapply A2. }
+ { apply exec_straight_one. simpl.
+ rewrite (C2 SP) by auto with asmgen. rewrite <- (sp_val _ _ rs1 AG1). simpl; rewrite LP'.
+ rewrite FREE'. eauto. } }
+ * split. apply agree_set_other; auto with asmgen.
+ apply agree_change_sp with (Vptr stk soff).
+ apply agree_exten with rs; auto. intros; rewrite C2; auto with asmgen.
+ eapply parent_sp_def; eauto.
+ split. auto.
+ split. Simpl. rewrite B2. auto.
+ split. Simpl.
+ intros. Simpl.
+ rewrite C2; auto.
+Qed.
+
+End CONSTRUCTORS.
+
+
diff --git a/kvx/Asmblockprops.v b/kvx/Asmblockprops.v
new file mode 100644
index 00000000..a732d29b
--- /dev/null
+++ b/kvx/Asmblockprops.v
@@ -0,0 +1,357 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Common definition and proofs on Asmblock required by various modules *)
+
+Require Import Coqlib.
+Require Import Integers.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Values.
+Require Import Asmblock.
+Require Import Axioms.
+
+Definition bblock_simu (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) :=
+ forall rs m,
+ exec_bblock ge f bb rs m <> Stuck ->
+ exec_bblock ge f bb rs m = exec_bblock ge f bb' rs m.
+
+Hint Extern 2 (_ <> _) => congruence: asmgen.
+
+Lemma preg_of_data:
+ forall r, data_preg (preg_of r) = true.
+Proof.
+ intros. destruct r; reflexivity.
+Qed.
+Hint Resolve preg_of_data: asmgen.
+
+Lemma data_diff:
+ forall r r',
+ data_preg r = true -> data_preg r' = false -> r <> r'.
+Proof.
+ congruence.
+Qed.
+Hint Resolve data_diff: asmgen.
+
+Lemma preg_of_not_PC:
+ forall r, preg_of r <> PC.
+Proof.
+ intros. apply data_diff; auto with asmgen.
+Qed.
+
+Lemma preg_of_not_SP:
+ forall r, preg_of r <> SP.
+Proof.
+ intros. unfold preg_of; destruct r; cbn; congruence.
+Qed.
+
+Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen.
+
+
+Lemma nextblock_pc:
+ forall b rs, (nextblock b rs)#PC = Val.offset_ptr rs#PC (Ptrofs.repr (size b)).
+Proof.
+ intros. apply Pregmap.gss.
+Qed.
+
+Lemma nextblock_inv:
+ forall b r rs, r <> PC -> (nextblock b rs)#r = rs#r.
+Proof.
+ intros. unfold nextblock. apply Pregmap.gso. red; intro; subst. auto.
+Qed.
+
+Lemma nextblock_inv1:
+ forall b r rs, data_preg r = true -> (nextblock b rs)#r = rs#r.
+Proof.
+ intros. apply nextblock_inv. red; intro; subst; discriminate.
+Qed.
+
+Ltac Simplif :=
+ ((rewrite nextblock_inv by eauto with asmgen)
+ || (rewrite nextblock_inv1 by eauto with asmgen)
+ || (rewrite Pregmap.gss)
+ || (rewrite nextblock_pc)
+ || (rewrite Pregmap.gso by eauto with asmgen)
+ ); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+(* For Asmblockgenproof0 *)
+
+Theorem exec_basic_instr_pc:
+ forall ge b rs1 m1 rs2 m2,
+ exec_basic_instr ge b rs1 m1 = Next rs2 m2 ->
+ rs2 PC = rs1 PC.
+Proof.
+ intros. destruct b; try destruct i; try destruct i.
+ all: try (inv H; Simpl).
+ 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail.
+
+ 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail.
+
+ { (* PLoadQRRO *)
+ unfold parexec_load_q_offset in H1.
+ destruct (gpreg_q_expand _) as [r0 r1] in H1.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ inv H1. Simpl. }
+ { (* PLoadORRO *)
+ unfold parexec_load_o_offset in H1.
+ destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ inv H1. Simpl. }
+ 1-8: unfold parexec_store_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.storev _ _ _); [inv H1; auto | discriminate]; fail.
+ 1-8: unfold parexec_store_reg in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]; auto; fail.
+ 1-8: unfold parexec_store_regxs in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]; auto; fail.
+
+ { (* PStoreQRRO *)
+ unfold parexec_store_q_offset in H1.
+ destruct (gpreg_q_expand _) as [r0 r1] in H1.
+ unfold eval_offset in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ inv H1. Simpl. reflexivity. }
+ { (* PStoreORRO *)
+ unfold parexec_store_o_offset in H1.
+ destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1.
+ unfold eval_offset in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ inv H1. Simpl. reflexivity. }
+ - destruct (Mem.alloc _ _ _). destruct (Mem.store _ _ _ _ _). inv H1. Simpl. discriminate.
+ - destruct (Mem.loadv _ _ _); try discriminate. destruct (rs1 _); try discriminate.
+ destruct (Mem.free _ _ _ _). inv H1. Simpl. discriminate.
+ - destruct rs; try discriminate. inv H1. Simpl.
+ - destruct rd; try discriminate. inv H1; Simpl.
+ - reflexivity.
+Qed.
+
+(* For PostpassSchedulingproof *)
+
+Lemma regset_double_set:
+ forall r1 r2 (rs: regset) v1 v2,
+ r1 <> r2 ->
+ (rs # r1 <- v1 # r2 <- v2) = (rs # r2 <- v2 # r1 <- v1).
+Proof.
+ intros. apply functional_extensionality. intros r. destruct (preg_eq r r1).
+ - subst. rewrite Pregmap.gso; auto. repeat (rewrite Pregmap.gss). auto.
+ - destruct (preg_eq r r2).
+ + subst. rewrite Pregmap.gss. rewrite Pregmap.gso; auto. rewrite Pregmap.gss. auto.
+ + repeat (rewrite Pregmap.gso; auto).
+Qed.
+
+Lemma next_eq:
+ forall (rs rs': regset) m m',
+ rs = rs' -> m = m' -> Next rs m = Next rs' m'.
+Proof.
+ intros; apply f_equal2; auto.
+Qed.
+
+Lemma exec_load_offset_pc_var:
+ forall trap t rs m rd ra ofs rs' m' v,
+ exec_load_offset trap t rs m rd ra ofs = Next rs' m' ->
+ exec_load_offset trap t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_offset in *. unfold parexec_load_offset in *. rewrite Pregmap.gso; try discriminate. destruct (eval_offset ofs); try discriminate.
+ destruct (Mem.loadv _ _ _).
+ - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+ - unfold parexec_incorrect_load in *.
+ destruct trap; try discriminate.
+ inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+Qed.
+
+Lemma exec_load_reg_pc_var:
+ forall trap t rs m rd ra ro rs' m' v,
+ exec_load_reg trap t rs m rd ra ro = Next rs' m' ->
+ exec_load_reg trap t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_reg in *. unfold parexec_load_reg in *. rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.loadv _ _ _).
+ - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+ - unfold parexec_incorrect_load in *.
+ destruct trap; try discriminate.
+ inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+Qed.
+
+Lemma exec_load_regxs_pc_var:
+ forall trap t rs m rd ra ro rs' m' v,
+ exec_load_regxs trap t rs m rd ra ro = Next rs' m' ->
+ exec_load_regxs trap t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_regxs in *. unfold parexec_load_regxs in *. rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.loadv _ _ _).
+ - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+ - unfold parexec_incorrect_load in *.
+ destruct trap; try discriminate.
+ inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+Qed.
+
+Lemma exec_load_offset_q_pc_var:
+ forall rs m rd ra ofs rs' m' v,
+ exec_load_q_offset rs m rd ra ofs = Next rs' m' ->
+ exec_load_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_q_offset in *. unfold parexec_load_q_offset in *.
+ destruct (gpreg_q_expand rd) as [rd0 rd1].
+ (* destruct (ireg_eq rd0 ra); try discriminate. *)
+ rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ inv H.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ inv H1. f_equal.
+ rewrite (regset_double_set PC rd0) by discriminate.
+ rewrite (regset_double_set PC rd1) by discriminate.
+ reflexivity.
+Qed.
+
+Lemma exec_load_offset_o_pc_var:
+ forall rs m rd ra ofs rs' m' v,
+ exec_load_o_offset rs m rd ra ofs = Next rs' m' ->
+ exec_load_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_o_offset in *. unfold parexec_load_o_offset in *.
+ destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3].
+(*
+ destruct (ireg_eq rd0 ra); try discriminate.
+ destruct (ireg_eq rd1 ra); try discriminate.
+ destruct (ireg_eq rd2 ra); try discriminate.
+*)
+ rewrite Pregmap.gso; try discriminate.
+ cbn in *.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ rewrite (regset_double_set PC rd0) by discriminate.
+ rewrite (regset_double_set PC rd1) by discriminate.
+ rewrite (regset_double_set PC rd2) by discriminate.
+ rewrite (regset_double_set PC rd3) by discriminate.
+ inv H.
+ trivial.
+Qed.
+
+Lemma exec_store_offset_pc_var:
+ forall t rs m rd ra ofs rs' m' v,
+ exec_store_offset t rs m rd ra ofs = Next rs' m' ->
+ exec_store_offset t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_store_offset in *. unfold parexec_store_offset in *. rewrite Pregmap.gso; try discriminate.
+ destruct (eval_offset ofs); try discriminate.
+ destruct (Mem.storev _ _ _).
+ - inv H. apply next_eq; auto.
+ - discriminate.
+Qed.
+
+Lemma exec_store_q_offset_pc_var:
+ forall rs m rd ra ofs rs' m' v,
+ exec_store_q_offset rs m rd ra ofs = Next rs' m' ->
+ exec_store_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_store_q_offset in *. unfold parexec_store_q_offset in *. rewrite Pregmap.gso; try discriminate.
+ cbn in *.
+ destruct (gpreg_q_expand _) as [s0 s1].
+ destruct (Mem.storev _ _ _); try discriminate.
+ destruct (Mem.storev _ _ _); try discriminate.
+ inv H. apply next_eq; auto.
+Qed.
+
+Lemma exec_store_o_offset_pc_var:
+ forall rs m rd ra ofs rs' m' v,
+ exec_store_o_offset rs m rd ra ofs = Next rs' m' ->
+ exec_store_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros.
+ unfold exec_store_o_offset in *. unfold parexec_store_o_offset in *.
+ destruct (gpreg_o_expand _) as [[[s0 s1] s2] s3].
+ destruct (Mem.storev _ _ _); try discriminate.
+ destruct (Mem.storev _ _ _); try discriminate.
+ destruct (Mem.storev _ _ _); try discriminate.
+ destruct (Mem.storev _ _ _); try discriminate.
+ inv H.
+ trivial.
+Qed.
+
+Lemma exec_store_reg_pc_var:
+ forall t rs m rd ra ro rs' m' v,
+ exec_store_reg t rs m rd ra ro = Next rs' m' ->
+ exec_store_reg t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_store_reg in *. unfold parexec_store_reg in *. rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.storev _ _ _).
+ - inv H. apply next_eq; auto.
+ - discriminate.
+Qed.
+
+Lemma exec_store_regxs_pc_var:
+ forall t rs m rd ra ro rs' m' v,
+ exec_store_regxs t rs m rd ra ro = Next rs' m' ->
+ exec_store_regxs t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_store_regxs in *. unfold parexec_store_regxs in *. rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.storev _ _ _).
+ - inv H. apply next_eq; auto.
+ - discriminate.
+Qed.
+
+Theorem exec_basic_instr_pc_var:
+ forall ge i rs m rs' m' v,
+ exec_basic_instr ge i rs m = Next rs' m' ->
+ exec_basic_instr ge i (rs # PC <- v) m = Next (rs' # PC <- v) m'.
+Proof.
+ intros. unfold exec_basic_instr in *. unfold bstep in *. destruct i.
+ - unfold exec_arith_instr in *. destruct i; destruct i.
+ all: try (exploreInst; inv H; apply next_eq; auto;
+ apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate).
+(*
+ (* Some cases treated seperately because exploreInst destructs too much *)
+ all: try (inv H; apply next_eq; auto; apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate). *)
+ - destruct i.
+ + exploreInst; apply exec_load_offset_pc_var; auto.
+ + exploreInst; apply exec_load_reg_pc_var; auto.
+ + exploreInst; apply exec_load_regxs_pc_var; auto.
+ + apply exec_load_offset_q_pc_var; auto.
+ + apply exec_load_offset_o_pc_var; auto.
+ - destruct i.
+ + exploreInst; apply exec_store_offset_pc_var; auto.
+ + exploreInst; apply exec_store_reg_pc_var; auto.
+ + exploreInst; apply exec_store_regxs_pc_var; auto.
+ + apply exec_store_q_offset_pc_var; auto.
+ + apply exec_store_o_offset_pc_var; auto.
+ - destruct (Mem.alloc _ _ _) as (m1 & stk). repeat (rewrite Pregmap.gso; try discriminate).
+ destruct (Mem.storev _ _ _ _); try discriminate.
+ inv H. apply next_eq; auto. apply functional_extensionality. intros.
+ rewrite (regset_double_set GPR32 PC); try discriminate.
+ rewrite (regset_double_set GPR12 PC); try discriminate.
+ rewrite (regset_double_set FP PC); try discriminate. reflexivity.
+ - repeat (rewrite Pregmap.gso; try discriminate).
+ destruct (Mem.loadv _ _ _); try discriminate.
+ destruct (rs GPR12); try discriminate.
+ destruct (Mem.free _ _ _ _); try discriminate.
+ inv H. apply next_eq; auto.
+ rewrite (regset_double_set GPR32 PC).
+ rewrite (regset_double_set GPR12 PC). reflexivity.
+ all: discriminate.
+ - destruct rs0; try discriminate. inv H. apply next_eq; auto.
+ repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate.
+ - destruct rd; try discriminate. inv H. apply next_eq; auto.
+ repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate.
+ - inv H. apply next_eq; auto.
+Qed.
+
+
diff --git a/kvx/Asmexpand.ml b/kvx/Asmexpand.ml
new file mode 100644
index 00000000..f84cf22d
--- /dev/null
+++ b/kvx/Asmexpand.ml
@@ -0,0 +1,642 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(* Expanding built-ins and some pseudo-instructions by rewriting
+ of the RISC-V assembly code. *)
+
+open Asm
+open Asmexpandaux
+open AST
+open Camlcoq
+
+exception Error of string
+
+(* Useful constants and helper functions *)
+
+let _0 = Integers.Int.zero
+let _1 = Integers.Int.one
+let _2 = coqint_of_camlint 2l
+let _4 = coqint_of_camlint 4l
+let _8 = coqint_of_camlint 8l
+let _16 = coqint_of_camlint 16l
+let _m1 = coqint_of_camlint (-1l)
+
+let wordsize = if Archi.ptr64 then 8 else 4
+
+let align n a = (n + a - 1) land (-a)
+
+let stack_pointer = Asmvliw.GPR12
+
+(* Emit instruction sequences that set or offset a register by a constant. *)
+(*
+ let expand_loadimm32 dst n =
+ List.iter emit (Asmgen.loadimm32 dst n [])
+*)
+let expand_addptrofs dst src n =
+ List.iter emit (basic_to_instruction (Asmvliw.PArith (Asmblockgen.addptrofs dst src n)) :: [])
+let expand_storeind_ptr src base ofs =
+ List.iter emit (basic_to_instruction (Asmblockgen.storeind_ptr src base ofs) :: [])
+let expand_loadind_ptr dst base ofs =
+ List.iter emit (basic_to_instruction (Asmblockgen.loadind_ptr base ofs dst) :: [])
+
+(* Built-ins. They come in two flavors:
+ - annotation statements: take their arguments in registers or stack
+ locations; generate no code;
+ - inlined by the compiler: take their arguments in arbitrary
+ registers.
+*)
+
+(* Fix-up code around calls to variadic functions. Floating-point arguments
+ residing in FP registers need to be moved to integer registers. *)
+
+let int_param_regs = let open Asmvliw in [| GPR0; GPR1; GPR2; GPR3; GPR4; GPR5; GPR6; GPR7; GPR8; GPR9; GPR10; GPR11 |]
+(* let float_param_regs = [| F10; F11; F12; F13; F14; F15; F16; F17 |] *)
+let float_param_regs = [| |]
+
+let fixup_variadic_call pos tyl = assert false
+(*if pos < 8 then
+ match tyl with
+ | [] ->
+ ()
+ | (Tint | Tany32) :: tyl ->
+ fixup_variadic_call (pos + 1) tyl
+ | Tsingle :: tyl ->
+ let rs =float_param_regs.(pos)
+ and rd = int_param_regs.(pos) in
+ emit (Pfmvxs(rd, rs));
+ fixup_variadic_call (pos + 1) tyl
+ | Tlong :: tyl ->
+ let pos' = if Archi.ptr64 then pos + 1 else align pos 2 + 2 in
+ fixup_variadic_call pos' tyl
+ | (Tfloat | Tany64) :: tyl ->
+ if Archi.ptr64 then begin
+ let rs = float_param_regs.(pos)
+ and rd = int_param_regs.(pos) in
+ emit (Pfmvxd(rd, rs));
+ fixup_variadic_call (pos + 1) tyl
+ end else begin
+ let pos = align pos 2 in
+ if pos < 8 then begin
+ let rs = float_param_regs.(pos)
+ and rd1 = int_param_regs.(pos)
+ and rd2 = int_param_regs.(pos + 1) in
+ emit (Paddiw(X2, X X2, Integers.Int.neg _16));
+ emit (Pfsd(rs, X2, Ofsimm _0));
+ emit (Plw(rd1, X2, Ofsimm _0));
+ emit (Plw(rd2, X2, Ofsimm _4));
+ emit (Paddiw(X2, X X2, _16));
+ fixup_variadic_call (pos + 2) tyl
+ end
+ end
+*)
+
+let fixup_call sg =
+ if sg.sig_cc.cc_vararg <> None then fixup_variadic_call 0 sg.sig_args
+
+(* Handling of annotations *)
+
+let expand_annot_val kind txt targ args res =
+ emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none));
+ match args, res with
+ | [BA(Asmvliw.IR src)], BR(Asmvliw.IR dst) ->
+ if dst <> src then emit (Pmv (dst, src))
+ | _, _ ->
+ raise (Error "ill-formed __builtin_annot_val")
+
+(* Handling of memcpy *)
+
+let emit_move dst r =
+ if dst <> r
+ then emit (Paddil(dst, r, Z.zero));;
+
+(* FIXME DMonniaux this is probably not complete *)
+let get_builtin_arg dst arg =
+ match arg with
+ | BA (Asmvliw.IR reg) -> emit_move dst reg
+ | BA (ireg) -> failwith "get_builtin_arg: BA_int(not ireg)"
+ | BA_int _ -> failwith "get_builtin_arg: BA_int"
+ | BA_long _ -> failwith "get_builtin_arg: BA_long"
+ | BA_float _ -> failwith "get_builtin_arg: BA_float"
+ | BA_single _ -> failwith "get_builtin_arg: BA_single"
+ | BA_loadstack _ -> failwith "get_builtin_arg: BA_loadstack"
+ | BA_addrstack ofs -> emit (Paddil(dst, stack_pointer, ofs))
+ | BA_loadglobal _ -> failwith "get_builtin_arg: BA_loadglobal"
+ | BA_addrglobal _ -> failwith "get_builtin_arg: BA_addrglobal"
+ | BA_splitlong _ -> failwith "get_builtin_arg: BA_splitlong"
+ | BA_addptr _ -> failwith "get_builtin_arg: BA_addptr";;
+
+let smart_memcpy = true
+
+(* FIXME DMonniaux this is really suboptimal (byte per byte) *)
+let expand_builtin_memcpy_big sz al src dst =
+ assert (sz > Z.zero);
+ let dstptr = Asmvliw.GPR62
+ and srcptr = Asmvliw.GPR63
+ and tmpbuf = Asmvliw.GPR61
+ and tmpbuf2 = Asmvliw.R60R61
+ and caml_sz = camlint64_of_coqint sz in
+ get_builtin_arg dstptr dst;
+ get_builtin_arg srcptr src;
+ let caml_sz_div16 = Int64.shift_right caml_sz 4
+ and sixteen = coqint_of_camlint64 16L in
+ if smart_memcpy
+ then
+ let remaining = ref caml_sz
+ and offset = ref 0L in
+ let cpy buf size load store =
+ (if !remaining >= size
+ then
+ let zofs = coqint_of_camlint64 !offset in
+ begin
+ emit Psemi;
+ emit (load buf srcptr (AOff zofs));
+ emit Psemi;
+ emit (store buf dstptr (AOff zofs));
+ remaining := Int64.sub !remaining size;
+ offset := Int64.add !offset size
+ end) in
+ begin
+ (if caml_sz_div16 >= 2L
+ then
+ begin
+ emit (Pmake (tmpbuf, (coqint_of_camlint64 caml_sz_div16)));
+ emit Psemi;
+ let lbl = new_label() in
+ emit (Ploopdo (tmpbuf, lbl));
+ emit Psemi;
+ emit (Plq (tmpbuf2, srcptr, AOff Z.zero));
+ emit (Paddil (srcptr, srcptr, sixteen));
+ emit Psemi;
+ emit (Psq (tmpbuf2, dstptr, AOff Z.zero));
+ emit (Paddil (dstptr, dstptr, sixteen));
+ emit Psemi;
+ emit (Plabel lbl);
+ remaining := Int64.sub !remaining (Int64.shift_left caml_sz_div16 4)
+ end);
+
+ cpy tmpbuf2 16L (fun x y z -> Plq(x, y, z)) (fun x y z -> Psq(x, y, z));
+ cpy tmpbuf 8L (fun x y z -> Pld(TRAP, x, y, z)) (fun x y z -> Psd(x, y, z));
+ cpy tmpbuf 4L (fun x y z -> Plw(TRAP, x, y, z)) (fun x y z -> Psw(x, y, z));
+ cpy tmpbuf 2L (fun x y z -> Plh(TRAP, x, y, z)) (fun x y z -> Psh(x, y, z));
+ cpy tmpbuf 1L (fun x y z -> Plb(TRAP, x, y, z)) (fun x y z -> Psb(x, y, z));
+ assert (!remaining = 0L)
+ end
+ else
+ begin
+ emit (Pmake (tmpbuf, sz));
+ emit Psemi;
+ let lbl = new_label() in
+ emit (Ploopdo (tmpbuf, lbl));
+ emit Psemi;
+ emit (Plb (TRAP, tmpbuf, srcptr, AOff Z.zero));
+ emit (Paddil (srcptr, srcptr, Z.one));
+ emit Psemi;
+ emit (Psb (tmpbuf, dstptr, AOff Z.zero));
+ emit (Paddil (dstptr, dstptr, Z.one));
+ emit Psemi;
+ emit (Plabel lbl);
+ end;;
+
+let expand_builtin_memcpy sz al args =
+ match args with
+ | [dst; src] ->
+ expand_builtin_memcpy_big sz al src dst
+ | _ -> assert false;;
+
+(* Handling of volatile reads and writes *)
+(* FIXME probably need to check for size of displacement *)
+let expand_builtin_vload_common chunk base ofs res =
+ match chunk, res with
+ | Mint8unsigned, BR(Asmvliw.IR res) ->
+ emit (Plbu (TRAP, res, base, AOff ofs))
+ | Mint8signed, BR(Asmvliw.IR res) ->
+ emit (Plb (TRAP, res, base, AOff ofs))
+ | Mint16unsigned, BR(Asmvliw.IR res) ->
+ emit (Plhu (TRAP, res, base, AOff ofs))
+ | Mint16signed, BR(Asmvliw.IR res) ->
+ emit (Plh (TRAP, res, base, AOff ofs))
+ | Mint32, BR(Asmvliw.IR res) ->
+ emit (Plw (TRAP, res, base, AOff ofs))
+ | Mint64, BR(Asmvliw.IR res) ->
+ emit (Pld (TRAP, res, base, AOff ofs))
+ | Mint64, BR_splitlong(BR(Asmvliw.IR res1), BR(Asmvliw.IR res2)) ->
+ let ofs' = Integers.Ptrofs.add ofs _4 in
+ if base <> res2 then begin
+ emit (Plw (TRAP, res2, base, AOff ofs));
+ emit (Plw (TRAP, res1, base, AOff ofs'))
+ end else begin
+ emit (Plw (TRAP, res1, base, AOff ofs'));
+ emit (Plw (TRAP, res2, base, AOff ofs))
+ end
+ | Mfloat32, BR(Asmvliw.IR res) ->
+ emit (Pfls (TRAP, res, base, AOff ofs))
+ | Mfloat64, BR(Asmvliw.IR res) ->
+ emit (Pfld (TRAP, res, base, AOff ofs))
+ | _ ->
+ assert false
+
+let expand_builtin_vload chunk args res =
+ match args with
+ | [BA(Asmvliw.IR addr)] ->
+ expand_builtin_vload_common chunk addr _0 res
+ | [BA_addrstack ofs] ->
+ expand_builtin_vload_common chunk stack_pointer ofs res
+ | [BA_addptr(BA(Asmvliw.IR addr), (BA_int ofs | BA_long ofs))] ->
+ expand_builtin_vload_common chunk addr ofs res
+ | _ ->
+ assert false
+
+
+let expand_builtin_vstore_common chunk base ofs src =
+ match chunk, src with
+ | (Mint8signed | Mint8unsigned), BA(Asmvliw.IR src) ->
+ emit (Psb (src, base, AOff ofs))
+ | (Mint16signed | Mint16unsigned), BA(Asmvliw.IR src) ->
+ emit (Psh (src, base, AOff ofs))
+ | Mint32, BA(Asmvliw.IR src) ->
+ emit (Psw (src, base, AOff ofs))
+ | Mint64, BA(Asmvliw.IR src) ->
+ emit (Psd (src, base, AOff ofs))
+ | Mint64, BA_splitlong(BA(Asmvliw.IR src1), BA(Asmvliw.IR src2)) ->
+ let ofs' = Integers.Ptrofs.add ofs _4 in
+ emit (Psw (src2, base, AOff ofs));
+ emit (Psw (src1, base, AOff ofs'))
+ | Mfloat32, BA(Asmvliw.IR src) ->
+ emit (Pfss (src, base, AOff ofs))
+ | Mfloat64, BA(Asmvliw.IR src) ->
+ emit (Pfsd (src, base, AOff ofs))
+ | _ ->
+ assert false
+
+let expand_builtin_vstore chunk args =
+ match args with
+ | [BA(Asmvliw.IR addr); src] ->
+ expand_builtin_vstore_common chunk addr _0 src
+ | [BA_addrstack ofs; src] ->
+ expand_builtin_vstore_common chunk stack_pointer ofs src
+ | [BA_addptr(BA(Asmvliw.IR addr), (BA_int ofs | BA_long ofs)); src] ->
+ expand_builtin_vstore_common chunk addr ofs src
+ | _ ->
+ assert false
+
+(* Handling of varargs *)
+
+(* Size in words of the arguments to a function. This includes both
+ arguments passed in registers and arguments passed on stack. *)
+
+let rec args_size sz = function
+ | [] -> sz
+ | (Tint | Tsingle | Tany32) :: l ->
+ args_size (sz + 1) l
+ | (Tlong | Tfloat | Tany64) :: l ->
+ args_size (if Archi.ptr64 then sz + 1 else align sz 2 + 2) l
+
+let arguments_size sg =
+ args_size 0 sg.sig_args
+
+let _nbregargs_ = 12
+let _alignment_ = 8
+
+let save_arguments first_reg base_ofs = let open Asmvliw in
+ for i = first_reg to (_nbregargs_ - 1) do begin
+ expand_storeind_ptr
+ int_param_regs.(i)
+ GPR12
+ (Integers.Ptrofs.repr (Z.add base_ofs (Z.of_uint ((i - first_reg) * wordsize))));
+ emit Psemi
+ end done
+
+let vararg_start_ofs : Z.t option ref = ref None
+
+let expand_builtin_va_start r = (* assert false *)
+match !vararg_start_ofs with
+ | None ->
+ invalid_arg "Fatal error: va_start used in non-vararg function"
+ | Some ofs ->
+ expand_addptrofs Asmvliw.GPR32 stack_pointer (Integers.Ptrofs.repr ofs);
+ emit Psemi;
+ expand_storeind_ptr Asmvliw.GPR32 r Integers.Ptrofs.zero
+
+(* Auxiliary for 64-bit integer arithmetic built-ins. They expand to
+ two instructions, one computing the low 32 bits of the result,
+ followed by another computing the high 32 bits. In cases where
+ the first instruction would overwrite arguments to the second
+ instruction, we must go through X31 to hold the low 32 bits of the result.
+*)
+
+let expand_int64_arith conflict rl fn = assert false
+(*if conflict then (fn X31; emit (Pmv(rl, X31))) else fn rl *)
+
+(* Byte swaps. There are no specific instructions, so we use standard,
+ not-very-efficient formulas. *)
+
+let expand_bswap16 d s = let open Asmvliw in
+ (* d = (s & 0xFF) << 8 | (s >> 8) & 0xFF *)
+ emit (Pandiw(GPR32, s, coqint_of_camlint 0xFFl)); emit Psemi;
+ emit (Pslliw(GPR32, GPR32, _8)); emit Psemi;
+ emit (Psrliw(d, s, _8)); emit Psemi;
+ emit (Pandiw(d, d, coqint_of_camlint 0xFFl));
+ emit (Porw(d, GPR32, d)); emit Psemi
+
+let expand_bswap32 d s = let open Asmvliw in
+ (* d = (s << 24)
+ | (((s >> 8) & 0xFF) << 16)
+ | (((s >> 16) & 0xFF) << 8)
+ | (s >> 24) *)
+ emit (Pslliw(GPR16, s, coqint_of_camlint 24l)); emit Psemi;
+ emit (Psrliw(GPR32, s, _8)); emit Psemi;
+ emit (Pandiw(GPR32, GPR32, coqint_of_camlint 0xFFl)); emit Psemi;
+ emit (Pslliw(GPR32, GPR32, _16)); emit Psemi;
+ emit (Porw(GPR16, GPR16, GPR32)); emit Psemi;
+ emit (Psrliw(GPR32, s, _16)); emit Psemi;
+ emit (Pandiw(GPR32, GPR32, coqint_of_camlint 0xFFl)); emit Psemi;
+ emit (Pslliw(GPR32, GPR32, _8)); emit Psemi;
+ emit (Porw(GPR16, GPR16, GPR32)); emit Psemi;
+ emit (Psrliw(GPR32, s, coqint_of_camlint 24l)); emit Psemi;
+ emit (Porw(d, GPR16, GPR32)); emit Psemi
+
+let expand_bswap64 d s = let open Asmvliw in
+ (* d = s << 56
+ | (((s >> 8) & 0xFF) << 48)
+ | (((s >> 16) & 0xFF) << 40)
+ | (((s >> 24) & 0xFF) << 32)
+ | (((s >> 32) & 0xFF) << 24)
+ | (((s >> 40) & 0xFF) << 16)
+ | (((s >> 48) & 0xFF) << 8)
+ | s >> 56 *)
+ emit (Psllil(GPR16, s, coqint_of_camlint 56l)); emit Psemi;
+ List.iter
+ (fun (n1, n2) ->
+ emit (Psrlil(GPR32, s, coqint_of_camlint n1)); emit Psemi;
+ emit (Pandil(GPR32, GPR32, coqint_of_camlint 0xFFl)); emit Psemi;
+ emit (Psllil(GPR32, GPR32, coqint_of_camlint n2)); emit Psemi;
+ emit (Porl(GPR16, GPR16, GPR32)); emit Psemi;)
+ [(8l,48l); (16l,40l); (24l,32l); (32l,24l); (40l,16l); (48l,8l)];
+ emit (Psrlil(GPR32, s, coqint_of_camlint 56l)); emit Psemi;
+ emit (Porl(d, GPR16, GPR32)); emit Psemi
+
+(* Handling of compiler-inlined builtins *)
+let last_system_register = 511l
+let not_system_register cn =cn<0l || cn>last_system_register
+
+let expand_builtin_inline name args res = let open Asmvliw in
+ match name, args, res with
+ (* Synchronization *)
+ | "__builtin_membar", [], _ ->
+ ()
+ (* Vararg stuff *)
+ | "__builtin_va_start", [BA(IR a)], _ ->
+ expand_builtin_va_start a
+ | "__builtin_kvx_clzw", [BA(IR a)], BR(IR res) ->
+ emit (Pclzw(res, a))
+ | "__builtin_clzll", [BA(IR a)], BR(IR res) ->
+ emit (Pclzll(res, a))
+ | "__builtin_kvx_ctzw", [BA(IR a)], BR(IR res) ->
+ emit (Pctzw(res, a))
+ | "__builtin_ctzll", [BA(IR a)], BR(IR res) ->
+ emit (Pctzll(res, a))
+ | "__builtin_kvx_stsud", [BA(IR a1); BA(IR a2)], BR(IR res) ->
+ emit (Pstsud(res, a1, a2))
+ | "__builtin_kvx_get", [BA_int(n)], BR(IR res) ->
+ let cn = camlint_of_coqint n in
+ (if not_system_register cn
+ then failwith (Printf.sprintf "__builtin_kvx_get(n): n must be between 0 and %ld, was %ld" last_system_register cn)
+ else emit (Pgetn(n, res)))
+ | "__builtin_kvx_set", [BA_int(n); BA(IR src)], _ ->
+ let cn = camlint_of_coqint n in
+ (if not_system_register cn
+ then failwith (Printf.sprintf "__builtin_kvx_set(n, val): n must be between 0 and %ld, was %ld" last_system_register cn)
+ else emit (Psetn(n, src)))
+ | "__builtin_kvx_wfxl", [BA_int(n); BA(IR src)], _ ->
+ let cn = camlint_of_coqint n in
+ (if not_system_register cn
+ then failwith (Printf.sprintf "__builtin_kvx_wfxl(n, val): n must be between 0 and %ld, was %ld" last_system_register cn)
+ else emit (Pwfxl(n, src)))
+ | "__builtin_kvx_wfxm", [BA_int(n); BA(IR src)], _ ->
+ let cn = camlint_of_coqint n in
+ (if not_system_register cn
+ then failwith (Printf.sprintf "__builtin_kvx_wfxm(n, val): n must be between 0 and %ld, was %ld" last_system_register cn)
+ else emit (Pwfxm(n, src)))
+ | "__builtin_kvx_ldu", [BA(IR addr)], BR(IR res) ->
+ emit (Pldu(res, addr))
+ | "__builtin_kvx_lbzu", [BA(IR addr)], BR(IR res) ->
+ emit (Plbzu(res, addr))
+ | "__builtin_kvx_lhzu", [BA(IR addr)], BR(IR res) ->
+ emit (Plhzu(res, addr))
+ | "__builtin_kvx_lwzu", [BA(IR addr)], BR(IR res) ->
+ emit (Plwzu(res, addr))
+ | "__builtin_kvx_alclrd", [BA(IR addr)], BR(IR res) ->
+ emit (Palclrd(res, addr))
+ | "__builtin_kvx_alclrw", [BA(IR addr)], BR(IR res) ->
+ emit (Palclrw(res, addr))
+ | "__builtin_kvx_await", [], _ ->
+ emit Pawait
+ | "__builtin_kvx_sleep", [], _ ->
+ emit Psleep
+ | "__builtin_kvx_stop", [], _ ->
+ emit Pstop
+ | "__builtin_kvx_barrier", [], _ ->
+ emit Pbarrier
+ | "__builtin_kvx_fence", [], _ ->
+ emit Pfence
+ | "__builtin_kvx_dinval", [], _ ->
+ emit Pdinval
+ | "__builtin_kvx_dinvall", [BA(IR addr)], _ ->
+ emit (Pdinvall addr)
+ | "__builtin_kvx_dtouchl", [BA(IR addr)], _ ->
+ emit (Pdtouchl addr)
+ | "__builtin_kvx_iinval", [], _ ->
+ emit Piinval
+ | "__builtin_kvx_iinvals", [BA(IR addr)], _ ->
+ emit (Piinvals addr)
+ | "__builtin_kvx_itouchl", [BA(IR addr)], _ ->
+ emit (Pitouchl addr)
+ | "__builtin_kvx_dzerol", [BA(IR addr)], _ ->
+ emit (Pdzerol addr)
+(*| "__builtin_kvx_afaddd", [BA(IR addr); BA (IR incr_res)], BR(IR res) ->
+ (if res <> incr_res
+ then (emit (Asm.Pmv(res, incr_res)); emit Psemi));
+ emit (Pafaddd(addr, res))
+ | "__builtin_kvx_afaddw", [BA(IR addr); BA (IR incr_res)], BR(IR res) ->
+ (if res <> incr_res
+ then (emit (Asm.Pmv(res, incr_res)); emit Psemi));
+ emit (Pafaddw(addr, res)) *) (* see #157 *)
+ | "__builtin_alclrd", [BA(IR addr)], BR(IR res) ->
+ emit (Palclrd(res, addr))
+ | "__builtin_alclrw", [BA(IR addr)], BR(IR res) ->
+ emit (Palclrw(res, addr))
+ | "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
+ expand_bswap16 res a1
+ | ("__builtin_bswap"| "__builtin_bswap32"), [BA(IR a1)], BR(IR res) ->
+ expand_bswap32 res a1
+ | "__builtin_bswap64", [BA(IR src)], BR(IR res) ->
+ expand_bswap64 res src
+
+ (* Byte swaps *)
+(*| "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
+ expand_bswap16 res a1
+ | "__builtin_fabs", [BA(FR a1)], BR(FR res) ->
+ emit (Pfabsd(res, a1))
+*)
+ (* Catch-all *)
+ | _ ->
+ raise (Error ("unrecognized builtin " ^ name))
+
+(* Expansion of instructions *)
+
+let expand_instruction instr =
+ match instr with
+ | Pallocframe (sz, ofs) ->
+ let sg = get_current_function_sig() in
+ emit (Pmv (Asmvliw.GPR17, stack_pointer));
+ if sg.sig_cc.cc_vararg <> None then begin
+ let n = arguments_size sg in
+ let extra_sz = if n >= _nbregargs_ then 0 else (* align _alignment_ *) ((_nbregargs_ - n) * wordsize) in
+ let full_sz = Z.add sz (Z.of_uint extra_sz) in
+ expand_addptrofs stack_pointer stack_pointer (Integers.Ptrofs.repr (Z.neg full_sz));
+ emit Psemi;
+ expand_storeind_ptr Asmvliw.GPR17 stack_pointer ofs;
+ emit Psemi;
+ let va_ofs =
+ let extra_ofs = if n <= _nbregargs_ then 0 else ((n - _nbregargs_) * wordsize) in
+ Z.add sz (Z.of_sint extra_ofs) in
+ vararg_start_ofs := Some va_ofs;
+ save_arguments n va_ofs
+ end else begin
+ let below = Integers.Ptrofs.repr (Z.neg sz) in
+ expand_addptrofs stack_pointer stack_pointer below;
+ expand_storeind_ptr stack_pointer stack_pointer (Integers.Ptrofs.add ofs below);
+ emit Psemi; (* Psemi required to fit in resource constraints *)
+ vararg_start_ofs := None
+ end
+ | Pfreeframe (sz, ofs) ->
+ let sg = get_current_function_sig() in
+ let extra_sz =
+ if sg.sig_cc.cc_vararg <> None then begin
+ let n = arguments_size sg in
+ if n >= _nbregargs_ then 0 else (* align _alignment_ *) ((_nbregargs_ - n) * wordsize)
+ end else 0 in
+ expand_addptrofs stack_pointer stack_pointer (Integers.Ptrofs.repr (Z.add sz (Z.of_uint extra_sz)))
+
+(*| Pseqw(rd, rs1, rs2) ->
+ (* emulate based on the fact that x == 0 iff x <u 1 (unsigned cmp) *)
+ if rs2 = X0 then begin
+ emit (Psltiuw(rd, rs1, Int.one))
+ end else begin
+ emit (Pxorw(rd, rs1, rs2)); emit (Psltiuw(rd, X rd, Int.one))
+ end
+ | Psnew(rd, rs1, rs2) ->
+ (* emulate based on the fact that x != 0 iff 0 <u x (unsigned cmp) *)
+ if rs2 = X0 then begin
+ emit (Psltuw(rd, X0, rs1))
+ end else begin
+ emit (Pxorw(rd, rs1, rs2)); emit (Psltuw(rd, X0, X rd))
+ end
+ | Pseql(rd, rs1, rs2) ->
+ (* emulate based on the fact that x == 0 iff x <u 1 (unsigned cmp) *)
+ if rs2 = X0 then begin
+ emit (Psltiul(rd, rs1, Int64.one))
+ end else begin
+ emit (Pxorl(rd, rs1, rs2)); emit (Psltiul(rd, X rd, Int64.one))
+ end
+ | Psnel(rd, rs1, rs2) ->
+ (* emulate based on the fact that x != 0 iff 0 <u x (unsigned cmp) *)
+ if rs2 = X0 then begin
+ emit (Psltul(rd, X0, rs1))
+ end else begin
+ emit (Pxorl(rd, rs1, rs2)); emit (Psltul(rd, X0, X rd))
+ end
+*)| Pcvtl2w (rd, rs) ->
+ assert Archi.ptr64;
+ emit (Paddiw (rd, rs, Integers.Int.zero)) (* 32-bit sign extension *)
+
+(*| Pjal_r(r, sg) ->
+ fixup_call sg; emit instr
+ | Pjal_s(symb, sg) ->
+ fixup_call sg; emit instr
+ | Pj_r(r, sg) when r <> X1 ->
+ fixup_call sg; emit instr
+ | Pj_s(symb, sg) ->
+ fixup_call sg; emit instr
+
+*)| Pbuiltin (ef,args,res) ->
+ begin match ef with
+ | EF_builtin (name,sg) ->
+ expand_builtin_inline (camlstring_of_coqstring name) args res
+ | EF_vload chunk ->
+ expand_builtin_vload chunk args res
+ | EF_vstore chunk ->
+ expand_builtin_vstore chunk args
+(* | EF_annot_val (kind,txt,targ) ->
+ expand_annot_val kind txt targ args res *)
+ | EF_memcpy(sz, al) ->
+ expand_builtin_memcpy sz al args
+ (* | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+ emit instr
+ *)
+ | EF_malloc -> failwith "asmexpand: malloc"
+ | EF_free -> failwith "asmexpand: free"
+ | EF_debug _ -> failwith "asmexpand: debug"
+ | EF_annot _ -> emit instr
+ | EF_annot_val (kind, txt, targ) -> expand_annot_val kind txt targ args res
+ | EF_external _ -> failwith "asmexpand: external"
+ | EF_inline_asm _ -> emit instr
+ | EF_runtime _ -> failwith "asmexpand: runtime"
+ | EF_profiling _ -> emit instr
+ end
+ | _ ->
+ emit instr
+
+(* NOTE: Dwarf register maps for RV32G are not yet specified
+ officially. This is just a placeholder. *)
+let int_reg_to_dwarf = let open Asmvliw in function
+ | GPR0 -> 1 | GPR1 -> 2 | GPR2 -> 3 | GPR3 -> 4 | GPR4 -> 5
+ | GPR5 -> 6 | GPR6 -> 7 | GPR7 -> 8 | GPR8 -> 9 | GPR9 -> 10
+ | GPR10 -> 11 | GPR11 -> 12 | GPR12 -> 13 | GPR13 -> 14 | GPR14 -> 15
+ | GPR15 -> 16 | GPR16 -> 17 | GPR17 -> 18 | GPR18 -> 19 | GPR19 -> 20
+ | GPR20 -> 21 | GPR21 -> 22 | GPR22 -> 23 | GPR23 -> 24 | GPR24 -> 25
+ | GPR25 -> 26 | GPR26 -> 27 | GPR27 -> 28 | GPR28 -> 29 | GPR29 -> 30
+ | GPR30 -> 31 | GPR31 -> 32 | GPR32 -> 33 | GPR33 -> 34 | GPR34 -> 35
+ | GPR35 -> 36 | GPR36 -> 37 | GPR37 -> 38 | GPR38 -> 39 | GPR39 -> 40
+ | GPR40 -> 41 | GPR41 -> 42 | GPR42 -> 43 | GPR43 -> 44 | GPR44 -> 45
+ | GPR45 -> 46 | GPR46 -> 47 | GPR47 -> 48 | GPR48 -> 49 | GPR49 -> 50
+ | GPR50 -> 51 | GPR51 -> 52 | GPR52 -> 53 | GPR53 -> 54 | GPR54 -> 55
+ | GPR55 -> 56 | GPR56 -> 57 | GPR57 -> 58 | GPR58 -> 59 | GPR59 -> 60
+ | GPR60 -> 61 | GPR61 -> 62 | GPR62 -> 63 | GPR63 -> 64
+
+let preg_to_dwarf = let open Asmvliw in function
+ | IR r -> int_reg_to_dwarf r
+ | RA -> 65 (* FIXME - No idea what is $ra DWARF number in k1-gdb *)
+ | _ -> assert false
+
+let expand_function id fn =
+ try
+ set_current_function fn;
+ expand id (* sp= *) 2 preg_to_dwarf expand_instruction fn.fn_code;
+ Errors.OK (get_current_function ())
+ with Error s ->
+ Errors.Error (Errors.msg (coqstring_of_camlstring s))
+
+let expand_fundef id = function
+ | Internal f ->
+ begin match expand_function id f with
+ | Errors.OK tf -> Errors.OK (Internal tf)
+ | Errors.Error msg -> Errors.Error msg
+ end
+ | External ef ->
+ Errors.OK (External ef)
+
+let expand_program (p: Asm.program) : Asm.program Errors.res =
+ AST.transform_partial_program2 expand_fundef (fun id v -> Errors.OK v) p
diff --git a/kvx/Asmgen.v b/kvx/Asmgen.v
new file mode 100644
index 00000000..61856acf
--- /dev/null
+++ b/kvx/Asmgen.v
@@ -0,0 +1,41 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Integers.
+Require Import Mach Asm Asmblock Asmblockgen Machblockgen.
+Require Import PostpassScheduling.
+Require Import Errors String.
+Require Compopts.
+
+Local Open Scope error_monad_scope.
+
+Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := Compopts.time name f.
+
+Definition transf_program (p: Mach.program) : res Asm.program :=
+ let mbp := (time "Machblock generation" Machblockgen.transf_program) p in
+ do abp <- (time "Asmblock generation" Asmblockgen.transf_program) mbp;
+ do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp;
+ OK ((time "Asm generation" Asm.transf_program) abp').
+
+Definition transf_function (f: Mach.function) : res Asm.function :=
+ let mbf := Machblockgen.transf_function f in
+ do abf <- Asmblockgen.transf_function mbf;
+ OK (Asm.transf_function abf).
+
+Definition transl_code (f: Mach.function) (l: Mach.code) : res (list Asm.instruction) :=
+ let mbf := Machblockgen.transf_function f in
+ let mbc := Machblockgen.trans_code l in
+ do abc <- transl_blocks mbf mbc true;
+ OK (unfold abc).
diff --git a/kvx/Asmgenproof.v b/kvx/Asmgenproof.v
new file mode 100644
index 00000000..636c105f
--- /dev/null
+++ b/kvx/Asmgenproof.v
@@ -0,0 +1,96 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Composing all passes from Mach to KVX Asm *)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Mach Conventions Asm Asmgen Machblockgen Asmblockgen.
+Require Import Machblockgenproof Asmblockgenproof PostpassSchedulingproof.
+
+Local Open Scope linking_scope.
+
+Definition block_passes :=
+ mkpass Machblockgenproof.match_prog
+ ::: mkpass Asmblockgenproof.match_prog
+ ::: mkpass PostpassSchedulingproof.match_prog
+ ::: mkpass Asm.match_prog
+ ::: pass_nil _.
+
+Definition match_prog := pass_match (compose_passes block_passes).
+
+Lemma transf_program_match:
+ forall p tp, Asmgen.transf_program p = OK tp -> match_prog p tp.
+Proof.
+ intros p tp H.
+ unfold Asmgen.transf_program in H. apply bind_inversion in H. destruct H.
+ inversion_clear H. apply bind_inversion in H1. destruct H1.
+ inversion_clear H. inversion H2. unfold time, Compopts.time in *. remember (Machblockgen.transf_program p) as mbp.
+ unfold match_prog; cbn.
+ exists mbp; split. apply Machblockgenproof.transf_program_match; auto.
+ exists x; split. apply Asmblockgenproof.transf_program_match; auto.
+ exists x0; split. apply PostpassSchedulingproof.transf_program_match; auto.
+ exists tp; split. apply Asm.transf_program_match; auto. auto.
+Qed.
+
+(** Return Address Offset for Mach *)
+
+Definition return_address_offset: Mach.function -> Mach.code -> ptrofs -> Prop :=
+ Mach_return_address_offset Asmblockgenproof.return_address_offset.
+
+Lemma return_address_exists:
+ forall f sg ros c, is_tail (Mcall sg ros :: c) f.(Mach.fn_code) ->
+ exists ra, return_address_offset f c ra.
+Proof.
+ intros; unfold return_address_offset; eapply Mach_return_address_exists; eauto.
+ intros; eapply Asmblockgenproof.return_address_exists; eauto.
+Qed.
+
+(** Main preservation theorem: from Mach to KVX Asm *)
+
+Section PRESERVATION.
+
+Variable prog: Mach.program.
+Variable tprog: program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Theorem transf_program_correct:
+ forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog).
+Proof.
+ unfold match_prog in TRANSF. cbn in TRANSF.
+ inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H.
+ eapply compose_forward_simulations.
+ exploit Machblockgenproof.transf_program_correct; eauto.
+ unfold Machblockgenproof.inv_trans_rao.
+ eapply compose_forward_simulations. apply Asmblockgenproof.transf_program_correct; eauto.
+ eapply compose_forward_simulations. apply PostpassSchedulingproof.transf_program_correct; eauto.
+ apply Asm.transf_program_correct. eauto.
+Qed.
+
+End PRESERVATION.
+
+Instance TransfAsm: TransfLink match_prog := pass_match_link (compose_passes block_passes).
+
+(*******************************************)
+(** Stub actually needed by driver/Compiler *)
+
+Module Asmgenproof0.
+
+Definition return_address_offset := return_address_offset.
+
+End Asmgenproof0.
diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v
new file mode 100644
index 00000000..45b230e6
--- /dev/null
+++ b/kvx/Asmvliw.v
@@ -0,0 +1,1729 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Abstract syntax and semantics for VLIW semantics of KVX assembly language. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import ExtValues.
+Require Import Memory.
+Require Import Events.
+Require Import Globalenvs.
+Require Import Smallstep.
+Require Import Locations.
+Require Stacklayout.
+Require Import Conventions.
+Require Import Errors.
+Require Import Sorting.Permutation.
+Require Import Chunks.
+Require Import Lia.
+
+(** * Abstract syntax *)
+
+(** A KVX program is syntactically given as a list of functions.
+ Each function is associated to a list of bundles of type [bblock] below.
+ Hence, syntactically, we view each bundle as a basic block:
+ this view induces our sequential semantics of bundles defined in [Asmblock].
+*)
+
+(** ** General Purpose registers. *)
+
+Inductive gpreg: Type :=
+ | GPR0: gpreg | GPR1: gpreg | GPR2: gpreg | GPR3: gpreg | GPR4: gpreg
+ | GPR5: gpreg | GPR6: gpreg | GPR7: gpreg | GPR8: gpreg | GPR9: gpreg
+ | GPR10: gpreg | GPR11: gpreg | GPR12: gpreg | GPR13: gpreg | GPR14: gpreg
+ | GPR15: gpreg | GPR16: gpreg | GPR17: gpreg | GPR18: gpreg | GPR19: gpreg
+ | GPR20: gpreg | GPR21: gpreg | GPR22: gpreg | GPR23: gpreg | GPR24: gpreg
+ | GPR25: gpreg | GPR26: gpreg | GPR27: gpreg | GPR28: gpreg | GPR29: gpreg
+ | GPR30: gpreg | GPR31: gpreg | GPR32: gpreg | GPR33: gpreg | GPR34: gpreg
+ | GPR35: gpreg | GPR36: gpreg | GPR37: gpreg | GPR38: gpreg | GPR39: gpreg
+ | GPR40: gpreg | GPR41: gpreg | GPR42: gpreg | GPR43: gpreg | GPR44: gpreg
+ | GPR45: gpreg | GPR46: gpreg | GPR47: gpreg | GPR48: gpreg | GPR49: gpreg
+ | GPR50: gpreg | GPR51: gpreg | GPR52: gpreg | GPR53: gpreg | GPR54: gpreg
+ | GPR55: gpreg | GPR56: gpreg | GPR57: gpreg | GPR58: gpreg | GPR59: gpreg
+ | GPR60: gpreg | GPR61: gpreg | GPR62: gpreg | GPR63: gpreg.
+
+Definition ireg := gpreg.
+Definition freg := gpreg.
+
+Lemma gpreg_eq: forall (x y: gpreg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+Inductive gpreg_q : Type :=
+| R0R1 | R2R3 | R4R5 | R6R7 | R8R9
+| R10R11 | R12R13 | R14R15 | R16R17 | R18R19
+| R20R21 | R22R23 | R24R25 | R26R27 | R28R29
+| R30R31 | R32R33 | R34R35 | R36R37 | R38R39
+| R40R41 | R42R43 | R44R45 | R46R47 | R48R49
+| R50R51 | R52R53 | R54R55 | R56R57 | R58R59
+| R60R61 | R62R63.
+
+Lemma gpreg_q_eq : forall (x y : gpreg_q), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+Definition gpreg_q_expand (x : gpreg_q) : gpreg * gpreg :=
+ match x with
+ | R0R1 => (GPR0, GPR1)
+ | R2R3 => (GPR2, GPR3)
+ | R4R5 => (GPR4, GPR5)
+ | R6R7 => (GPR6, GPR7)
+ | R8R9 => (GPR8, GPR9)
+ | R10R11 => (GPR10, GPR11)
+ | R12R13 => (GPR12, GPR13)
+ | R14R15 => (GPR14, GPR15)
+ | R16R17 => (GPR16, GPR17)
+ | R18R19 => (GPR18, GPR19)
+ | R20R21 => (GPR20, GPR21)
+ | R22R23 => (GPR22, GPR23)
+ | R24R25 => (GPR24, GPR25)
+ | R26R27 => (GPR26, GPR27)
+ | R28R29 => (GPR28, GPR29)
+ | R30R31 => (GPR30, GPR31)
+ | R32R33 => (GPR32, GPR33)
+ | R34R35 => (GPR34, GPR35)
+ | R36R37 => (GPR36, GPR37)
+ | R38R39 => (GPR38, GPR39)
+ | R40R41 => (GPR40, GPR41)
+ | R42R43 => (GPR42, GPR43)
+ | R44R45 => (GPR44, GPR45)
+ | R46R47 => (GPR46, GPR47)
+ | R48R49 => (GPR48, GPR49)
+ | R50R51 => (GPR50, GPR51)
+ | R52R53 => (GPR52, GPR53)
+ | R54R55 => (GPR54, GPR55)
+ | R56R57 => (GPR56, GPR57)
+ | R58R59 => (GPR58, GPR59)
+ | R60R61 => (GPR60, GPR61)
+ | R62R63 => (GPR62, GPR63)
+ end.
+
+Inductive gpreg_o : Type :=
+| R0R1R2R3 | R4R5R6R7 | R8R9R10R11 | R12R13R14R15
+| R16R17R18R19 | R20R21R22R23 | R24R25R26R27 | R28R29R30R31
+| R32R33R34R35 | R36R37R38R39 | R40R41R42R43 | R44R45R46R47
+| R48R49R50R51 | R52R53R54R55 | R56R57R58R59 | R60R61R62R63.
+
+Definition gpreg_o_expand (x : gpreg_o) : gpreg * gpreg * gpreg * gpreg :=
+ match x with
+ | R0R1R2R3 => (GPR0, GPR1, GPR2, GPR3)
+ | R4R5R6R7 => (GPR4, GPR5, GPR6, GPR7)
+ | R8R9R10R11 => (GPR8, GPR9, GPR10, GPR11)
+ | R12R13R14R15 => (GPR12, GPR13, GPR14, GPR15)
+ | R16R17R18R19 => (GPR16, GPR17, GPR18, GPR19)
+ | R20R21R22R23 => (GPR20, GPR21, GPR22, GPR23)
+ | R24R25R26R27 => (GPR24, GPR25, GPR26, GPR27)
+ | R28R29R30R31 => (GPR28, GPR29, GPR30, GPR31)
+ | R32R33R34R35 => (GPR32, GPR33, GPR34, GPR35)
+ | R36R37R38R39 => (GPR36, GPR37, GPR38, GPR39)
+ | R40R41R42R43 => (GPR40, GPR41, GPR42, GPR43)
+ | R44R45R46R47 => (GPR44, GPR45, GPR46, GPR47)
+ | R48R49R50R51 => (GPR48, GPR49, GPR50, GPR51)
+ | R52R53R54R55 => (GPR52, GPR53, GPR54, GPR55)
+ | R56R57R58R59 => (GPR56, GPR57, GPR58, GPR59)
+ | R60R61R62R63 => (GPR60, GPR61, GPR62, GPR63)
+ end.
+
+Lemma gpreg_o_eq : forall (x y : gpreg_o), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+Inductive preg: Type :=
+ | IR: gpreg -> preg (**r integer general purpose registers *)
+ | RA: preg
+ | PC: preg
+ .
+
+Coercion IR: gpreg >-> preg.
+
+Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}.
+Proof. decide equality. apply ireg_eq. Defined.
+
+Module PregEq.
+ Definition t := preg.
+ Definition eq := preg_eq.
+End PregEq.
+
+(* FIXME - R16 and R32 are excluded *)
+Definition preg_of (r: mreg) : preg :=
+ match r with
+ | R0 => GPR0 | R1 => GPR1 | R2 => GPR2 | R3 => GPR3 | R4 => GPR4
+ | R5 => GPR5 | R6 => GPR6 | R7 => GPR7 | R8 => GPR8 | R9 => GPR9
+ | R10 => GPR10 | R11 => GPR11 (* | R12 => GPR12 | R13 => GPR13 | R14 => GPR14 *)
+ | R15 => GPR15 (* | R16 => GPR16 *) | R17 => GPR17 | R18 => GPR18 | R19 => GPR19
+ | R20 => GPR20 | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24
+ | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 | R29 => GPR29
+ | R30 => GPR30 | R31 => GPR31 (* | R32 => GPR32 *) | R33 => GPR33 | R34 => GPR34
+ | R35 => GPR35 | R36 => GPR36 | R37 => GPR37 | R38 => GPR38 | R39 => GPR39
+ | R40 => GPR40 | R41 => GPR41 | R42 => GPR42 | R43 => GPR43 | R44 => GPR44
+ | R45 => GPR45 | R46 => GPR46 | R47 => GPR47 | R48 => GPR48 | R49 => GPR49
+ | R50 => GPR50 | R51 => GPR51 | R52 => GPR52 | R53 => GPR53 | R54 => GPR54
+ | R55 => GPR55 | R56 => GPR56 | R57 => GPR57 | R58 => GPR58 | R59 => GPR59
+ | R60 => GPR60 | R61 => GPR61 | R62 => GPR62 | R63 => GPR63
+ end.
+
+Definition ireg_of (r: mreg) : res ireg :=
+ match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.ireg_of") end.
+
+Definition freg_of (r: mreg) : res freg :=
+ match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end.
+
+Module Pregmap := EMap(PregEq).
+
+(** ** Conventional names for stack pointer ([SP]), return address ([RA]), frame pointer ([FP]) and other temporaries used *)
+
+Notation "'SP'" := GPR12 (only parsing) : asm.
+Notation "'FP'" := GPR17 (only parsing) : asm.
+Notation "'MFP'" := R17 (only parsing) : asm.
+Notation "'GPRA'" := GPR16 (only parsing) : asm.
+Notation "'RTMP'" := GPR32 (only parsing) : asm.
+
+(** ** Names of tests in comparisons *)
+
+Inductive btest: Type :=
+ | BTdnez (**r Double Not Equal to Zero *)
+ | BTdeqz (**r Double Equal to Zero *)
+ | BTdltz (**r Double Less Than Zero *)
+ | BTdgez (**r Double Greater Than or Equal to Zero *)
+ | BTdlez (**r Double Less Than or Equal to Zero *)
+ | BTdgtz (**r Double Greater Than Zero *)
+ | BTwnez (**r Word Not Equal to Zero *)
+ | BTweqz (**r Word Equal to Zero *)
+ | BTwltz (**r Word Less Than Zero *)
+ | BTwgez (**r Word Greater Than or Equal to Zero *)
+ | BTwlez (**r Word Less Than or Equal to Zero *)
+ | BTwgtz (**r Word Greater Than Zero *)
+ .
+
+Inductive itest: Type :=
+ | ITne (**r Not Equal *)
+ | ITeq (**r Equal *)
+ | ITlt (**r Less Than *)
+ | ITge (**r Greater Than or Equal *)
+ | ITle (**r Less Than or Equal *)
+ | ITgt (**r Greater Than *)
+ | ITneu (**r Unsigned Not Equal *)
+ | ITequ (**r Unsigned Equal *)
+ | ITltu (**r Less Than Unsigned *)
+ | ITgeu (**r Greater Than or Equal Unsigned *)
+ | ITleu (**r Less Than or Equal Unsigned *)
+ | ITgtu (**r Greater Than Unsigned *)
+ .
+
+Inductive ftest: Type :=
+ | FTone (**r Ordered and Not Equal *)
+ | FTueq (**r Unordered or Equal *)
+ | FToeq (**r Ordered and Equal *)
+ | FTune (**r Unordered or Not Equal *)
+ | FTolt (**r Ordered and Less Than *)
+ | FTuge (**r Unordered or Greater Than or Equal *)
+ | FToge (**r Ordered and Greater Than or Equal *)
+ | FTult (**r Unordered or Less Than *)
+ .
+
+(** *** Offsets for load and store instructions. *)
+
+Definition offset : Type := ptrofs.
+
+(** *** Labels for goto (in the current function) *)
+
+Definition label := positive.
+
+(** ** Instructions *)
+
+(** We model a subset of the KVX instruction set.
+
+- Although it is possible to use the 32-bits mode, for now we don't support it. When mapping to actual instructions, the OCaml code in TargetPrinter.ml
+ throws an error if we are not in 64-bits mode.
+
+- We follow a design close to the one used for the Risc-V port: one set of
+ pseudo-instructions for 32-bit integer arithmetic, with suffix W, another
+ set for 64-bit integer arithmetic, with suffix L.
+
+- With respect to other CompCert assemblies, we define a type hierarchy of instructions (instead of a flat type).
+ This helps us to factorize similar cases for the scheduling verifier.
+
+*)
+
+(** *** Instructions to be expanded in control-flow *)
+Inductive ex_instruction : Type :=
+ (* Pseudo-instructions *)
+ | Pbuiltin: external_function -> list (builtin_arg preg)
+ -> builtin_res preg -> ex_instruction (**r built-in function (pseudo) *)
+.
+
+(** Similarly to other CompCert assembly languages, the pseudo-instructions are the following:
+
+- [Ploadsymbol]: load the address of a symbol in an integer register.
+
+- [Pallocframe sz pos]: in the formal semantics, this
+ pseudo-instruction allocates a memory block with bounds [0] and
+ [sz], stores the value of the stack pointer at offset [pos] in this
+ block, and sets the stack pointer to the address of the bottom of
+ this block.
+
+ This cannot be expressed in our memory model, which does not reflect
+ the fact that stack frames are adjacent and allocated/freed
+ following a stack discipline.
+
+- [Pfreeframe sz pos]: in the formal semantics, this pseudo-instruction
+ reads the word at [pos] of the block pointed by the stack pointer,
+ frees this block, and sets the stack pointer to the value read.
+ Again, our memory model cannot comprehend that this operation
+ frees (logically) the current stack frame.
+
+- [Pbtbl reg table]: this is a N-way branch, implemented via a jump table
+*)
+
+(** *** Control Flow instructions *)
+Inductive cf_instruction : Type :=
+ | Pret (**r return *)
+ | Pcall (l: label) (**r function call *)
+ | Picall (r: ireg) (**r function call on register value *)
+ | Pjumptable (r: ireg) (labels: list label) (**r N-way branch through a jump table (pseudo) *)
+
+ (* Pgoto is for tailcalls, Pj_l is for jumping to a particular label *)
+ | Pgoto (l: label) (**r goto *)
+ | Pigoto (r: ireg) (**r goto from register *)
+ | Pj_l (l: label) (**r jump to label *)
+
+ (* Conditional branches *)
+ | Pcb (bt: btest) (r: ireg) (l: label) (**r branch based on btest *)
+ | Pcbu (bt: btest) (r: ireg) (l: label) (**r branch based on btest with unsigned semantics *)
+.
+
+(** *** Loads *)
+
+(* What follows was the original spec, but is subtly incorrect.
+ Our definition of the assembly-level memory model is already an abstraction of the real world.
+ In particular, we consider that a load is incorrect when it points outside of CompCert's visible memory, whereas this memory could be correct at the assembly level.
+ This means that CompCert would believe an incorrect load would yield 0 whereas it would yield another value.
+ match chunk with
+ | Mint8signed | Mint8unsigned | Mint16signed | Mint16unsigned
+ | Mint32 => Vint Int.zero
+ | Mint64 => Vlong Int64.zero
+ | Many32 | Many64 => Vundef
+ | Mfloat32 => Vsingle Float32.zero
+ | Mfloat64 => Vfloat Float.zero
+ end. *)
+
+Inductive load_name : Type :=
+ | Plb (**r load byte *)
+ | Plbu (**r load byte unsigned *)
+ | Plh (**r load half word *)
+ | Plhu (**r load half word unsigned *)
+ | Plw (**r load int32 *)
+ | Plw_a (**r load any32 *)
+ | Pld (**r load int64 *)
+ | Pld_a (**r load any64 *)
+ | Pfls (**r load float *)
+ | Pfld (**r load 64-bit float *)
+.
+
+Inductive ld_instruction : Type :=
+ | PLoadRRO (trap: trapping_mode) (i: load_name) (rd: ireg) (ra: ireg) (ofs: offset)
+ | PLoadRRR (trap: trapping_mode) (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg)
+ | PLoadRRRXS (trap: trapping_mode) (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg)
+ | PLoadQRRO (rd: gpreg_q) (ra: ireg) (ofs: offset)
+ | PLoadORRO (rd: gpreg_o) (ra: ireg) (ofs: offset)
+.
+
+(** *** Stores *)
+Inductive store_name : Type :=
+ | Psb (**r store byte *)
+ | Psh (**r store half byte *)
+ | Psw (**r store int32 *)
+ | Psw_a (**r store any32 *)
+ | Psd (**r store int64 *)
+ | Psd_a (**r store any64 *)
+ | Pfss (**r store float *)
+ | Pfsd (**r store 64-bit float *)
+.
+
+Inductive st_instruction : Type :=
+ | PStoreRRO (i: store_name) (rs: ireg) (ra: ireg) (ofs: offset)
+ | PStoreRRR (i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg)
+ | PStoreRRRXS(i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg)
+ | PStoreQRRO (rs: gpreg_q) (ra: ireg) (ofs: offset)
+ | PStoreORRO (rs: gpreg_o) (ra: ireg) (ofs: offset)
+.
+
+(** *** Arithmetic instructions *)
+Inductive arith_name_r : Type :=
+ | Ploadsymbol (id: ident) (ofs: ptrofs) (**r load the address of a symbol *)
+.
+
+Inductive arith_name_rr : Type :=
+ | Pmv (**r register move *)
+ | Pnegw (**r negate word *)
+ | Pnegl (**r negate long *)
+ | Pcvtl2w (**r Convert Long to Word *)
+ | Psxwd (**r Sign Extend Word to Double Word *)
+ | Pzxwd (**r Zero Extend Word to Double Word *)
+ | Pextfz (stop : Z) (start : Z) (**r extract bit field, unsigned *)
+ | Pextfs (stop : Z) (start : Z) (**r extract bit field, signed *)
+ | Pextfzl (stop : Z) (start : Z) (**r extract bit field, unsigned *)
+ | Pextfsl (stop : Z) (start : Z) (**r extract bit field, signed *)
+
+ | Pfabsd (**r float absolute double *)
+ | Pfabsw (**r float absolute word *)
+ | Pfnegd (**r float negate double *)
+ | Pfnegw (**r float negate word *)
+ | Pfinvw (**r float invert word *)
+ | Pfnarrowdw (**r float narrow 64 -> 32 bits *)
+ | Pfwidenlwd (**r Floating Point widen from 32 bits to 64 bits *)
+ | Pfloatwrnsz (**r Floating Point conversion from integer (int -> SINGLE) *)
+ | Pfloatuwrnsz (**r Floating Point conversion from integer (unsigned int -> SINGLE) *)
+ | Pfloatudrnsz (**r Floating Point Conversion from integer (unsigned long -> float) *)
+ | Pfloatdrnsz (**r Floating Point Conversion from integer (long -> float) *)
+ | Pfixedwrzz (**r Integer conversion from floating point (single -> int) *)
+ | Pfixeduwrzz (**r Integer conversion from floating point (single -> unsigned int) *)
+ | Pfixeddrzz (**r Integer conversion from floating point (float -> long) *)
+ | Pfixedudrzz (**r Integer conversion from floating point (float -> unsigned long) *)
+ | Pfixeddrzz_i32 (**r Integer conversion from floating point (float -> int) *)
+ | Pfixedudrzz_i32 (**r Integer conversion from floating point (float -> unsigned int) *)
+.
+
+Inductive arith_name_ri32 : Type :=
+ | Pmake (**r load immediate *)
+.
+
+Inductive arith_name_ri64 : Type :=
+ | Pmakel (**r load immediate long *)
+.
+
+Inductive arith_name_rf32 : Type :=
+ | Pmakefs (**r load immediate single *)
+.
+
+Inductive arith_name_rf64 : Type :=
+ | Pmakef (**r load immediate float *)
+.
+
+Inductive arith_name_rrr : Type :=
+ | Pcompw (it: itest) (**r comparison word *)
+ | Pcompl (it: itest) (**r comparison long *)
+ | Pfcompw (ft: ftest) (**r comparison float32 *)
+ | Pfcompl (ft: ftest) (**r comparison float64 *)
+
+ | Paddw (**r add word *)
+ | Paddxw (shift : shift1_4) (**r add shift *)
+ | Psubw (**r sub word word *)
+ | Prevsubxw (shift : shift1_4) (**r sub shift word *)
+ | Pmulw (**r mul word *)
+ | Pandw (**r and word *)
+ | Pnandw (**r nand word *)
+ | Porw (**r or word *)
+ | Pnorw (**r nor word *)
+ | Pxorw (**r xor word *)
+ | Pnxorw (**r nxor word *)
+ | Pandnw (**r andn word *)
+ | Pornw (**r orn word *)
+ | Psraw (**r shift right arithmetic word *)
+ | Psrxw (**r shift right arithmetic word round to 0*)
+ | Psrlw (**r shift right logical word *)
+ | Psllw (**r shift left logical word *)
+
+ | Paddl (**r add long *)
+ | Paddxl (shift : shift1_4) (**r add shift long *)
+ | Psubl (**r sub long *)
+ | Prevsubxl (shift : shift1_4) (**r sub shift long *)
+ | Pandl (**r and long *)
+ | Pnandl (**r nand long *)
+ | Porl (**r or long *)
+ | Pnorl (**r nor long *)
+ | Pxorl (**r xor long *)
+ | Pnxorl (**r nxor long *)
+ | Pandnl (**r andn long *)
+ | Pornl (**r orn long *)
+ | Pmull (**r mul long (low part) *)
+ | Pslll (**r shift left logical long *)
+ | Psrll (**r shift right logical long *)
+ | Psrxl (**r shift right logical long round to 0*)
+ | Psral (**r shift right arithmetic long *)
+
+ | Pfaddd (**r float add double *)
+ | Pfaddw (**r float add word *)
+ | Pfsbfd (**r float sub double *)
+ | Pfsbfw (**r float sub word *)
+ | Pfmuld (**r float multiply double *)
+ | Pfmulw (**r float multiply word *)
+ | Pfmind (**r float min double *)
+ | Pfminw (**r float min word *)
+ | Pfmaxd (**r float max double *)
+ | Pfmaxw (**r float max word *)
+.
+
+Inductive arith_name_rri32 : Type :=
+ | Pcompiw (it: itest) (**r comparison imm word *)
+
+ | Paddiw (**r add imm word *)
+ | Paddxiw (shift : shift1_4)
+ | Prevsubiw (**r add imm word *)
+ | Prevsubxiw (shift : shift1_4)
+ | Pmuliw (**r add imm word *)
+ | Pandiw (**r and imm word *)
+ | Pnandiw (**r nand imm word *)
+ | Poriw (**r or imm word *)
+ | Pnoriw (**r nor imm word *)
+ | Pxoriw (**r xor imm word *)
+ | Pnxoriw (**r nxor imm word *)
+ | Pandniw (**r andn word *)
+ | Porniw (**r orn word *)
+ | Psraiw (**r shift right arithmetic imm word *)
+ | Psrxiw (**r shift right arithmetic imm word round to 0*)
+ | Psrliw (**r shift right logical imm word *)
+ | Pslliw (**r shift left logical imm word *)
+ | Proriw (**r rotate right imm word *)
+ | Psllil (**r shift left logical immediate long *)
+ | Psrlil (**r shift right logical immediate long *)
+ | Psrail (**r shift right arithmetic immediate long *)
+ | Psrxil (**r shift right arithmetic immediate long round to 0*)
+.
+
+Inductive arith_name_rri64 : Type :=
+ | Pcompil (it: itest) (**r comparison imm long *)
+ | Paddil (**r add immediate long *)
+ | Paddxil (shift : shift1_4)
+ | Prevsubil
+ | Prevsubxil (shift : shift1_4)
+ | Pmulil (**r mul immediate long *)
+ | Pandil (**r and immediate long *)
+ | Pnandil (**r nand immediate long *)
+ | Poril (**r or immediate long *)
+ | Pnoril (**r nor immediate long *)
+ | Pxoril (**r xor immediate long *)
+ | Pnxoril (**r nxor immediate long *)
+ | Pandnil (**r andn immediate long *)
+ | Pornil (**r orn immediate long *)
+.
+
+Inductive arith_name_arrr : Type :=
+ | Pmaddw (**r multiply add word *)
+ | Pmaddl (**r multiply add long *)
+ | Pmsubw (**r multiply subtract word *)
+ | Pmsubl (**r multiply subtract long *)
+ | Pcmove (bt: btest) (**r conditional move *)
+ | Pcmoveu (bt: btest) (**r conditional move, test on unsigned semantics *)
+ | Pfmaddfw (**r float fused multiply add word *)
+ | Pfmaddfl (**r float fused multiply add long *)
+ | Pfmsubfw (**r float fused multiply subtract word *)
+ | Pfmsubfl (**r float fused multiply subtract long *)
+.
+
+Inductive arith_name_arri32 : Type :=
+ | Pmaddiw (**r multiply add word *)
+ | Pcmoveiw (bt: btest)
+ | Pcmoveuiw (bt: btest)
+.
+
+Inductive arith_name_arri64 : Type :=
+ | Pmaddil (**r multiply add long *)
+ | Pcmoveil (bt: btest)
+ | Pcmoveuil (bt: btest)
+.
+
+Inductive arith_name_arr : Type :=
+ | Pinsf (stop : Z) (start : Z) (**r insert bit field *)
+ | Pinsfl (stop : Z) (start : Z) (**r insert bit field *)
+.
+
+Inductive ar_instruction : Type :=
+ | PArithR (i: arith_name_r) (rd: ireg)
+ | PArithRR (i: arith_name_rr) (rd rs: ireg)
+ | PArithRI32 (i: arith_name_ri32) (rd: ireg) (imm: int)
+ | PArithRI64 (i: arith_name_ri64) (rd: ireg) (imm: int64)
+ | PArithRF32 (i: arith_name_rf32) (rd: ireg) (imm: float32)
+ | PArithRF64 (i: arith_name_rf64) (rd: ireg) (imm: float)
+ | PArithRRR (i: arith_name_rrr) (rd rs1 rs2: ireg)
+ | PArithRRI32 (i: arith_name_rri32) (rd rs: ireg) (imm: int)
+ | PArithRRI64 (i: arith_name_rri64) (rd rs: ireg) (imm: int64)
+ | PArithARRR (i: arith_name_arrr) (rd rs1 rs2: ireg)
+ | PArithARR (i: arith_name_arr) (rd rs: ireg)
+ | PArithARRI32 (i: arith_name_arri32) (rd rs: ireg) (imm: int)
+ | PArithARRI64 (i: arith_name_arri64) (rd rs: ireg) (imm: int64)
+.
+
+Module PArithCoercions.
+
+Coercion PArithR: arith_name_r >-> Funclass.
+Coercion PArithRR: arith_name_rr >-> Funclass.
+Coercion PArithRI32: arith_name_ri32 >-> Funclass.
+Coercion PArithRI64: arith_name_ri64 >-> Funclass.
+Coercion PArithRF32: arith_name_rf32 >-> Funclass.
+Coercion PArithRF64: arith_name_rf64 >-> Funclass.
+Coercion PArithRRR: arith_name_rrr >-> Funclass.
+Coercion PArithRRI32: arith_name_rri32 >-> Funclass.
+Coercion PArithRRI64: arith_name_rri64 >-> Funclass.
+Coercion PArithARRR: arith_name_arrr >-> Funclass.
+Coercion PArithARR: arith_name_arr >-> Funclass.
+Coercion PArithARRI32: arith_name_arri32 >-> Funclass.
+Coercion PArithARRI64: arith_name_arri64 >-> Funclass.
+
+End PArithCoercions.
+
+(** ** Basic instructions *)
+
+Inductive basic : Type :=
+ | PArith (i: ar_instruction)
+ | PLoad (i: ld_instruction)
+ | PStore (i: st_instruction)
+ | Pallocframe (sz: Z) (pos: ptrofs) (**r allocate new stack frame *)
+ | Pfreeframe (sz: Z) (pos: ptrofs) (**r deallocate stack frame and restore previous frame *)
+ | Pget (rd: ireg) (rs: preg) (**r get system register *)
+ | Pset (rd: preg) (rs: ireg) (**r set system register *)
+ | Pnop (**r virtual instruction that does nothing *)
+.
+
+Coercion PLoad: ld_instruction >-> basic.
+Coercion PStore: st_instruction >-> basic.
+Coercion PArith: ar_instruction >-> basic.
+
+(** ** Control-flow instructions *)
+
+Inductive control : Type :=
+ | PExpand (i: ex_instruction)
+ | PCtlFlow (i: cf_instruction)
+.
+
+Coercion PExpand: ex_instruction >-> control.
+Coercion PCtlFlow: cf_instruction >-> control.
+
+
+(** * Definition of a bblock (ie a bundle) *)
+
+(** A bundle/bblock must contain at least one instruction.
+
+This choice simplifies the definition of [find_bblock] below:
+indeed, each address of a code block identifies at most one bundle
+(which depends on the number of instructions in the bundles of lower addresses).
+
+*)
+
+Definition non_empty_body (body: list basic): bool :=
+ match body with
+ | nil => false
+ | _ => true
+ end.
+
+Definition non_empty_exit (exit: option control): bool :=
+ match exit with
+ | None => false
+ | _ => true
+ end.
+
+Definition non_empty_bblockb (body: list basic) (exit: option control): bool := non_empty_body body || non_empty_exit exit.
+
+
+(** For now, we consider a builtin is alone in a bundle (and a basic block).
+ Is there a way to avoid that ? (TODO)
+ *)
+Definition builtin_aloneb (body: list basic) (exit: option control) :=
+ match exit with
+ | Some (PExpand (Pbuiltin _ _ _)) =>
+ match body with
+ | nil => true
+ | _ => false
+ end
+ | _ => true
+ end.
+
+Definition wf_bblockb (body: list basic) (exit: option control) :=
+ (non_empty_bblockb body exit) && (builtin_aloneb body exit).
+
+(** A bblock is well-formed if he contains at least one instruction,
+ and if there is a builtin then it must be alone in this bblock. *)
+
+Record bblock := mk_bblock {
+ header: list label;
+ body: list basic;
+ exit: option control;
+ correct: Is_true (wf_bblockb body exit)
+}.
+
+(* FIXME? redundant with definition in Machblock *)
+Definition length_opt {A} (o: option A) : nat :=
+ match o with
+ | Some o => 1
+ | None => 0
+ end.
+
+(** The notion of size induces the notion of "valid" code address given by [find_bblock]
+ The result is in Z to be compatible with operations on PC.
+
+ WARNING: this notion of size is not the same than in Machblock !
+ We ignore labels here...
+
+*)
+Definition size (b:bblock): Z := Z.of_nat (length (body b) + length_opt (exit b)).
+
+Definition bblocks := list bblock.
+
+Fixpoint size_blocks (l: bblocks): Z :=
+ match l with
+ | nil => 0
+ | b :: l =>
+ (size b) + (size_blocks l)
+ end
+ .
+
+Record function : Type := mkfunction { fn_sig: signature; fn_blocks: bblocks }.
+Definition fundef := AST.fundef function.
+Definition program := AST.program fundef unit.
+
+(** * Parallel Semantics of bundles *)
+
+(** The semantics operates over a single mapping from registers
+ (type [preg]) to values. We maintain
+ the convention that integer registers are mapped to values of
+ type [Tint] or [Tlong] (in 64 bit mode),
+ and float registers to values of type [Tsingle] or [Tfloat]. *)
+
+Definition regset := Pregmap.t val.
+
+Definition genv := Genv.t fundef unit.
+
+Notation "a # b" := (a b) (at level 1, only parsing) : asm.
+Notation "a # b <- c" := (Pregmap.set b c a) (at level 1, b at next level) : asm.
+
+Open Scope asm.
+
+(** *** Undefining some registers *)
+
+Fixpoint undef_regs (l: list preg) (rs: regset) : regset :=
+ match l with
+ | nil => rs
+ | r :: l' => undef_regs l' (rs#r <- Vundef)
+ end.
+
+
+(** *** Assigning a register pair *)
+Definition set_pair (p: rpair preg) (v: val) (rs: regset) : regset :=
+ match p with
+ | One r => rs#r <- v
+ | Twolong rhi rlo => rs#rhi <- (Val.hiword v) #rlo <- (Val.loword v)
+ end.
+
+
+(** *** Assigning the result of a builtin *)
+
+Fixpoint set_res (res: builtin_res preg) (v: val) (rs: regset) : regset :=
+ match res with
+ | BR r => rs#r <- v
+ | BR_none => rs
+ | BR_splitlong hi lo => set_res lo (Val.loword v) (set_res hi (Val.hiword v) rs)
+ end.
+
+Local Open Scope asm.
+
+Section RELSEM.
+
+Variable ge: genv.
+
+(** The parallel semantics on bundles is purely small-step and defined as a relation
+ from the current state (a register set + a memory state) to either [Next rs' m']
+ where [rs'] and [m'] are the updated register set and memory state after execution
+ of the instruction at [rs#PC], or [Stuck] if the processor is stuck.
+
+ The parallel semantics of each instructions handles two states in input:
+ - the actual input state of the bundle which is only read
+ - and the other on which every "write" is performed:
+ it represents a temporary "writes" buffer, from which the final state
+ of the bundle is computed.
+
+ NB: the sequential semantics defined in [Asmblock] is derived
+ from the parallel semantics of each instruction by identifying
+ the read state and the write state.
+
+*)
+
+Inductive outcome: Type :=
+ | Next (rs:regset) (m:mem)
+ | Stuck
+.
+
+(** *** Arithmetic Expressions (including comparisons) *)
+
+Inductive signedness: Type := Signed | Unsigned.
+
+Inductive intsize: Type := Int | Long.
+
+Definition itest_for_cmp (c: comparison) (s: signedness) :=
+ match c, s with
+ | Cne, Signed => ITne
+ | Ceq, Signed => ITeq
+ | Clt, Signed => ITlt
+ | Cge, Signed => ITge
+ | Cle, Signed => ITle
+ | Cgt, Signed => ITgt
+ | Cne, Unsigned => ITneu
+ | Ceq, Unsigned => ITequ
+ | Clt, Unsigned => ITltu
+ | Cge, Unsigned => ITgeu
+ | Cle, Unsigned => ITleu
+ | Cgt, Unsigned => ITgtu
+ end.
+
+Inductive oporder_ftest :=
+ | Normal (ft: ftest)
+ | Reversed (ft: ftest)
+.
+
+Definition ftest_for_cmp (c: comparison) :=
+ match c with
+ | Ceq => Normal FToeq
+ | Cne => Normal FTune
+ | Clt => Normal FTolt
+ | Cle => Reversed FToge
+ | Cgt => Reversed FTolt
+ | Cge => Normal FToge
+ end.
+
+Definition notftest_for_cmp (c: comparison) :=
+ match c with
+ | Ceq => Normal FTune
+ | Cne => Normal FToeq
+ | Clt => Normal FTuge
+ | Cle => Reversed FTult
+ | Cgt => Reversed FTuge
+ | Cge => Normal FTult
+ end.
+
+(* **** CoMPare Signed Words to Zero *)
+Definition btest_for_cmpswz (c: comparison) :=
+ match c with
+ | Cne => BTwnez
+ | Ceq => BTweqz
+ | Clt => BTwltz
+ | Cge => BTwgez
+ | Cle => BTwlez
+ | Cgt => BTwgtz
+ end.
+
+(* **** CoMPare Signed Doubles to Zero *)
+Definition btest_for_cmpsdz (c: comparison) :=
+ match c with
+ | Cne => BTdnez
+ | Ceq => BTdeqz
+ | Clt => BTdltz
+ | Cge => BTdgez
+ | Cle => BTdlez
+ | Cgt => BTdgtz
+ end.
+
+Definition cmp_for_btest (bt: btest) :=
+ match bt with
+ | BTwnez => (Some Cne, Int)
+ | BTweqz => (Some Ceq, Int)
+ | BTwltz => (Some Clt, Int)
+ | BTwgez => (Some Cge, Int)
+ | BTwlez => (Some Cle, Int)
+ | BTwgtz => (Some Cgt, Int)
+
+ | BTdnez => (Some Cne, Long)
+ | BTdeqz => (Some Ceq, Long)
+ | BTdltz => (Some Clt, Long)
+ | BTdgez => (Some Cge, Long)
+ | BTdlez => (Some Cle, Long)
+ | BTdgtz => (Some Cgt, Long)
+ end.
+
+Definition cmpu_for_btest (bt: btest) :=
+ match bt with
+ | BTwnez => (Some Cne, Int)
+ | BTweqz => (Some Ceq, Int)
+ | BTdnez => (Some Cne, Long)
+ | BTdeqz => (Some Ceq, Long)
+ | _ => (None, Int)
+ end.
+
+
+(** **** Comparing integers *)
+Definition compare_int (t: itest) (v1 v2: val): val :=
+ match t with
+ | ITne => Val.cmp Cne v1 v2
+ | ITeq => Val.cmp Ceq v1 v2
+ | ITlt => Val.cmp Clt v1 v2
+ | ITge => Val.cmp Cge v1 v2
+ | ITle => Val.cmp Cle v1 v2
+ | ITgt => Val.cmp Cgt v1 v2
+ | ITneu => Val.mxcmpu Cne v1 v2
+ | ITequ => Val.mxcmpu Ceq v1 v2
+ | ITltu => Val.mxcmpu Clt v1 v2
+ | ITgeu => Val.mxcmpu Cge v1 v2
+ | ITleu => Val.mxcmpu Cle v1 v2
+ | ITgtu => Val.mxcmpu Cgt v1 v2
+ end.
+
+Definition compare_long (t: itest) (v1 v2: val): val :=
+ let res := match t with
+ | ITne => Val.cmpl Cne v1 v2
+ | ITeq => Val.cmpl Ceq v1 v2
+ | ITlt => Val.cmpl Clt v1 v2
+ | ITge => Val.cmpl Cge v1 v2
+ | ITle => Val.cmpl Cle v1 v2
+ | ITgt => Val.cmpl Cgt v1 v2
+ | ITneu => Some (Val.mxcmplu Cne v1 v2)
+ | ITequ => Some (Val.mxcmplu Ceq v1 v2)
+ | ITltu => Some (Val.mxcmplu Clt v1 v2)
+ | ITgeu => Some (Val.mxcmplu Cge v1 v2)
+ | ITleu => Some (Val.mxcmplu Cle v1 v2)
+ | ITgtu => Some (Val.mxcmplu Cgt v1 v2)
+ end in
+ match res with
+ | Some v => v
+ | None => Vundef
+ end
+ .
+
+Definition compare_single (t: ftest) (v1 v2: val): val :=
+ match t with
+ | FTone | FTueq => Vundef (* unused *)
+ | FToeq => Val.cmpfs Ceq v1 v2
+ | FTune => Val.cmpfs Cne v1 v2
+ | FTolt => Val.cmpfs Clt v1 v2
+ | FTuge => Val.notbool (Val.cmpfs Clt v1 v2)
+ | FToge => Val.cmpfs Cge v1 v2
+ | FTult => Val.notbool (Val.cmpfs Cge v1 v2)
+ end.
+
+Definition compare_float (t: ftest) (v1 v2: val): val :=
+ match t with
+ | FTone | FTueq => Vundef (* unused *)
+ | FToeq => Val.cmpf Ceq v1 v2
+ | FTune => Val.cmpf Cne v1 v2
+ | FTolt => Val.cmpf Clt v1 v2
+ | FTuge => Val.notbool (Val.cmpf Clt v1 v2)
+ | FToge => Val.cmpf Cge v1 v2
+ | FTult => Val.notbool (Val.cmpf Cge v1 v2)
+ end.
+
+(** **** Arithmetic evaluators *)
+
+Definition arith_eval_r n :=
+ match n with
+ | Ploadsymbol s ofs => Genv.symbol_address ge s ofs
+ end
+.
+
+Definition arith_eval_rr n v :=
+ match n with
+ | Pmv => v
+ | Pnegw => Val.neg v
+ | Pnegl => Val.negl v
+ | Pcvtl2w => Val.loword v
+ | Psxwd => Val.longofint v
+ | Pzxwd => Val.longofintu v
+ | Pextfz stop start => extfz stop start v
+ | Pextfs stop start => extfs stop start v
+ | Pextfzl stop start => extfzl stop start v
+ | Pextfsl stop start => extfsl stop start v
+ | Pfnegd => Val.negf v
+ | Pfnegw => Val.negfs v
+ | Pfabsd => Val.absf v
+ | Pfabsw => Val.absfs v
+ | Pfinvw => ExtValues.invfs v
+ | Pfnarrowdw => Val.singleoffloat v
+ | Pfwidenlwd => Val.floatofsingle v
+ | Pfloatwrnsz => Val.maketotal (Val.singleofint v)
+ | Pfloatuwrnsz => Val.maketotal (Val.singleofintu v)
+ | Pfloatudrnsz => Val.maketotal (Val.floatoflongu v)
+ | Pfloatdrnsz => Val.maketotal (Val.floatoflong v)
+ | Pfixedwrzz => Val.maketotal (Val.intofsingle v)
+ | Pfixeduwrzz => Val.maketotal (Val.intuofsingle v)
+ | Pfixeddrzz => Val.maketotal (Val.longoffloat v)
+ | Pfixedudrzz => Val.maketotal (Val.longuoffloat v)
+ | Pfixeddrzz_i32 => Val.maketotal (Val.intoffloat v)
+ | Pfixedudrzz_i32 => Val.maketotal (Val.intuoffloat v)
+ end.
+
+Definition arith_eval_ri32 n i :=
+ match n with
+ | Pmake => Vint i
+ end.
+
+Definition arith_eval_ri64 n i :=
+ match n with
+ | Pmakel => Vlong i
+ end.
+
+Definition arith_eval_rf32 n i :=
+ match n with
+ | Pmakefs => Vsingle i
+ end.
+
+Definition arith_eval_rf64 n i :=
+ match n with
+ | Pmakef => Vfloat i
+ end.
+
+Definition arith_eval_rrr n v1 v2 :=
+ match n with
+ | Pcompw c => compare_int c v1 v2
+ | Pcompl c => compare_long c v1 v2
+ | Pfcompw c => compare_single c v1 v2
+ | Pfcompl c => compare_float c v1 v2
+
+ | Paddw => Val.add v1 v2
+ | Psubw => Val.sub v1 v2
+ | Pmulw => Val.mul v1 v2
+ | Pandw => Val.and v1 v2
+ | Pnandw => Val.notint (Val.and v1 v2)
+ | Porw => Val.or v1 v2
+ | Pnorw => Val.notint (Val.or v1 v2)
+ | Pxorw => Val.xor v1 v2
+ | Pnxorw => Val.notint (Val.xor v1 v2)
+ | Pandnw => Val.and (Val.notint v1) v2
+ | Pornw => Val.or (Val.notint v1) v2
+ | Psrlw => Val.shru v1 v2
+ | Psraw => Val.shr v1 v2
+ | Psllw => Val.shl v1 v2
+ | Psrxw => ExtValues.val_shrx v1 v2
+
+ | Paddl => Val.addl v1 v2
+ | Psubl => Val.subl v1 v2
+ | Pandl => Val.andl v1 v2
+ | Pnandl => Val.notl (Val.andl v1 v2)
+ | Porl => Val.orl v1 v2
+ | Pnorl => Val.notl (Val.orl v1 v2)
+ | Pxorl => Val.xorl v1 v2
+ | Pnxorl => Val.notl (Val.xorl v1 v2)
+ | Pandnl => Val.andl (Val.notl v1) v2
+ | Pornl => Val.orl (Val.notl v1) v2
+ | Pmull => Val.mull v1 v2
+ | Pslll => Val.shll v1 v2
+ | Psrll => Val.shrlu v1 v2
+ | Psral => Val.shrl v1 v2
+ | Psrxl => ExtValues.val_shrxl v1 v2
+
+ | Pfaddd => Val.addf v1 v2
+ | Pfaddw => Val.addfs v1 v2
+ | Pfsbfd => Val.subf v1 v2
+ | Pfsbfw => Val.subfs v1 v2
+ | Pfmuld => Val.mulf v1 v2
+ | Pfmulw => Val.mulfs v1 v2
+
+ | Pfmind => ExtValues.minf v1 v2
+ | Pfminw => ExtValues.minfs v1 v2
+ | Pfmaxd => ExtValues.maxf v1 v2
+ | Pfmaxw => ExtValues.maxfs v1 v2
+
+ | Paddxw shift => ExtValues.addx (int_of_shift1_4 shift) v1 v2
+ | Paddxl shift => ExtValues.addxl (int_of_shift1_4 shift) v1 v2
+
+ | Prevsubxw shift => ExtValues.revsubx (int_of_shift1_4 shift) v1 v2
+ | Prevsubxl shift => ExtValues.revsubxl (int_of_shift1_4 shift) v1 v2
+ end.
+
+Definition arith_eval_rri32 n v i :=
+ match n with
+ | Pcompiw c => compare_int c v (Vint i)
+ | Paddiw => Val.add v (Vint i)
+ | Prevsubiw => Val.sub (Vint i) v
+ | Pmuliw => Val.mul v (Vint i)
+ | Pandiw => Val.and v (Vint i)
+ | Pnandiw => Val.notint (Val.and v (Vint i))
+ | Poriw => Val.or v (Vint i)
+ | Pnoriw => Val.notint (Val.or v (Vint i))
+ | Pxoriw => Val.xor v (Vint i)
+ | Pnxoriw => Val.notint (Val.xor v (Vint i))
+ | Pandniw => Val.and (Val.notint v) (Vint i)
+ | Porniw => Val.or (Val.notint v) (Vint i)
+ | Psraiw => Val.shr v (Vint i)
+ | Psrxiw => ExtValues.val_shrx v (Vint i)
+ | Psrliw => Val.shru v (Vint i)
+ | Pslliw => Val.shl v (Vint i)
+ | Proriw => Val.ror v (Vint i)
+ | Psllil => Val.shll v (Vint i)
+ | Psrxil => ExtValues.val_shrxl v (Vint i)
+ | Psrlil => Val.shrlu v (Vint i)
+ | Psrail => Val.shrl v (Vint i)
+ | Paddxiw shift => ExtValues.addx (int_of_shift1_4 shift) v (Vint i)
+ | Prevsubxiw shift => ExtValues.revsubx (int_of_shift1_4 shift) v (Vint i)
+ end.
+
+Definition arith_eval_rri64 n v i :=
+ match n with
+ | Pcompil c => compare_long c v (Vlong i)
+ | Paddil => Val.addl v (Vlong i)
+ | Prevsubil => Val.subl (Vlong i) v
+ | Pmulil => Val.mull v (Vlong i)
+ | Pandil => Val.andl v (Vlong i)
+ | Pnandil => Val.notl (Val.andl v (Vlong i))
+ | Poril => Val.orl v (Vlong i)
+ | Pnoril => Val.notl (Val.orl v (Vlong i))
+ | Pxoril => Val.xorl v (Vlong i)
+ | Pnxoril => Val.notl (Val.xorl v (Vlong i))
+ | Pandnil => Val.andl (Val.notl v) (Vlong i)
+ | Pornil => Val.orl (Val.notl v) (Vlong i)
+ | Paddxil shift => ExtValues.addxl (int_of_shift1_4 shift) v (Vlong i)
+ | Prevsubxil shift => ExtValues.revsubxl (int_of_shift1_4 shift) v (Vlong i)
+ end.
+
+Definition cmove bt v1 v2 v3 :=
+ match cmp_for_btest bt with
+ | (Some c, Int) =>
+ match Val.cmp_bool c v2 (Vint Int.zero) with
+ | None => Vundef
+ | Some true => v3
+ | Some false => v1
+ end
+ | (Some c, Long) =>
+ match Val.cmpl_bool c v2 (Vlong Int64.zero) with
+ | None => Vundef
+ | Some true => v3
+ | Some false => v1
+ end
+ | (None, _) => Vundef
+ end.
+
+Definition cmoveu bt v1 v2 v3 :=
+ match cmpu_for_btest bt with
+ | (Some c, Int) =>
+ match Val.mxcmpu_bool c v2 (Vint Int.zero) with
+ | None => Vundef
+ | Some true => v3
+ | Some false => v1
+ end
+ | (Some c, Long) =>
+ match Val.mxcmplu_bool c v2 (Vlong Int64.zero) with
+ | None => Vundef
+ | Some true => v3
+ | Some false => v1
+ end
+ | (None, _) => Vundef
+ end.
+
+Definition arith_eval_arrr n v1 v2 v3 :=
+ match n with
+ | Pmaddw => Val.add v1 (Val.mul v2 v3)
+ | Pmaddl => Val.addl v1 (Val.mull v2 v3)
+ | Pmsubw => Val.sub v1 (Val.mul v2 v3)
+ | Pmsubl => Val.subl v1 (Val.mull v2 v3)
+ | Pcmove bt => cmove bt v1 v2 v3
+ | Pcmoveu bt => cmoveu bt v1 v2 v3
+ | Pfmaddfw => ExtValues.fmaddfs v1 v2 v3
+ | Pfmaddfl => ExtValues.fmaddf v1 v2 v3
+ | Pfmsubfw => ExtValues.fmsubfs v1 v2 v3
+ | Pfmsubfl => ExtValues.fmsubf v1 v2 v3
+ end.
+
+Definition arith_eval_arr n v1 v2 :=
+ match n with
+ | Pinsf stop start => ExtValues.insf stop start v1 v2
+ | Pinsfl stop start => ExtValues.insfl stop start v1 v2
+ end.
+
+Definition arith_eval_arri32 n v1 v2 v3 :=
+ match n with
+ | Pmaddiw => Val.add v1 (Val.mul v2 (Vint v3))
+ | Pcmoveiw bt => cmove bt v1 v2 (Vint v3)
+ | Pcmoveuiw bt => cmoveu bt v1 v2 (Vint v3)
+ end.
+
+Definition arith_eval_arri64 n v1 v2 v3 :=
+ match n with
+ | Pmaddil => Val.addl v1 (Val.mull v2 (Vlong v3))
+ | Pcmoveil bt => cmove bt v1 v2 (Vlong v3)
+ | Pcmoveuil bt => cmoveu bt v1 v2 (Vlong v3)
+ end.
+
+Definition parexec_arith_instr (ai: ar_instruction) (rsr rsw: regset): regset :=
+ match ai with
+ | PArithR n d => rsw#d <- (arith_eval_r n)
+
+ | PArithRR n d s => rsw#d <- (arith_eval_rr n rsr#s)
+
+ | PArithRI32 n d i => rsw#d <- (arith_eval_ri32 n i)
+ | PArithRI64 n d i => rsw#d <- (arith_eval_ri64 n i)
+ | PArithRF32 n d i => rsw#d <- (arith_eval_rf32 n i)
+ | PArithRF64 n d i => rsw#d <- (arith_eval_rf64 n i)
+
+ | PArithRRR n d s1 s2 => rsw#d <- (arith_eval_rrr n rsr#s1 rsr#s2)
+ | PArithRRI32 n d s i => rsw#d <- (arith_eval_rri32 n rsr#s i)
+ | PArithRRI64 n d s i => rsw#d <- (arith_eval_rri64 n rsr#s i)
+
+ | PArithARRR n d s1 s2 => rsw#d <- (arith_eval_arrr n rsr#d rsr#s1 rsr#s2)
+ | PArithARR n d s => rsw#d <- (arith_eval_arr n rsr#d rsr#s)
+ | PArithARRI32 n d s i => rsw#d <- (arith_eval_arri32 n rsr#d rsr#s i)
+ | PArithARRI64 n d s i => rsw#d <- (arith_eval_arri64 n rsr#d rsr#s i)
+ end.
+
+Definition eval_offset (ofs: offset) : res ptrofs := OK ofs.
+
+(** *** load/store instructions *)
+
+Definition parexec_incorrect_load trap d rsw mw :=
+ match trap with
+ | TRAP => Stuck
+ | NOTRAP => Next (rsw#d <- Vundef) mw
+ end.
+
+Definition parexec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a: ireg) (ofs: offset) :=
+ match (eval_offset ofs) with
+ | OK ptr => match Mem.loadv chunk mr (Val.offset_ptr (rsr a) ptr) with
+ | None => parexec_incorrect_load trap d rsw mw
+ | Some v => Next (rsw#d <- v) mw
+ end
+ | _ => Stuck
+ end.
+
+Definition parexec_load_q_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_q) (a: ireg) (ofs: offset) :=
+ let (rd0, rd1) := gpreg_q_expand d in
+(* NB: By construction of [gpreg_q], register rd0 and rd1 are distinct, thus, the register writes cannot overlap.
+ But we do not need to express/prove this in the semantics.
+*)
+ match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) ofs) with
+ | None => Stuck
+ | Some v0 =>
+ match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) with
+ | None => Stuck
+ | Some v1 => Next (rsw#rd0 <- v0 #rd1 <- v1) mw
+ end
+ end.
+
+Definition parexec_load_o_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_o) (a: ireg) (ofs: offset) :=
+ match gpreg_o_expand d with
+ | (rd0, rd1, rd2, rd3) =>
+(* NB: By construction of [gpreg_o], the four destination registers are pairwise distinct, thus, the register writes cannot overlap.
+ But we do not need to express/prove this in the semantics.
+*)
+ match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) ofs) with
+ | None => Stuck
+ | Some v0 =>
+ match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) with
+ | None => Stuck
+ | Some v1 =>
+ match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 16))) with
+ | None => Stuck
+ | Some v2 =>
+ match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 24))) with
+ | None => Stuck
+ | Some v3 =>
+ Next (rsw#rd0 <- v0 #rd1 <- v1 #rd2 <- v2 #rd3 <- v3) mw
+ end
+ end
+ end
+ end
+ end.
+
+Definition parexec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) :=
+ match Mem.loadv chunk mr (Val.addl (rsr a) (rsr ro)) with
+ | None => parexec_incorrect_load trap d rsw mw
+ | Some v => Next (rsw#d <- v) mw
+ end.
+
+Definition parexec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) :=
+ match Mem.loadv chunk mr (Val.addl (rsr a) (Val.shll (rsr ro) (scale_of_chunk chunk))) with
+ | None => parexec_incorrect_load trap d rsw mw
+ | Some v => Next (rsw#d <- v) mw
+ end.
+
+Definition parexec_store_offset (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (s a: ireg) (ofs: offset) :=
+ match (eval_offset ofs) with
+ | OK ptr => match Mem.storev chunk mr (Val.offset_ptr (rsr a) ptr) (rsr s) with
+ | None => Stuck
+ | Some m' => Next rsw m'
+ end
+ | _ => Stuck
+ end.
+
+Definition parexec_store_reg
+ (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (s a ro: ireg) :=
+ match Mem.storev chunk mr (Val.addl (rsr a) (rsr ro)) (rsr s) with
+ | None => Stuck
+ | Some m' => Next rsw m'
+ end.
+
+Definition parexec_store_regxs (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (s a ro: ireg) :=
+ match Mem.storev chunk mr (Val.addl (rsr a) (Val.shll (rsr ro) (scale_of_chunk chunk))) (rsr s) with
+ | None => Stuck
+ | Some m' => Next rsw m'
+ end.
+
+Definition parexec_store_q_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_q) (a: ireg) (ofs: offset) :=
+ let (s0, s1) := gpreg_q_expand s in
+ match Mem.storev Many64 mr (Val.offset_ptr (rsr a) ofs) (rsr s0) with
+ | None => Stuck
+ | Some m1 =>
+ match Mem.storev Many64 m1 (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) (rsr s1) with
+ | None => Stuck
+ | Some m2 => Next rsw m2
+ end
+ end.
+
+Definition parexec_store_o_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_o) (a: ireg) (ofs: offset) :=
+ match gpreg_o_expand s with
+ | (s0, s1, s2, s3) =>
+ match Mem.storev Many64 mr (Val.offset_ptr (rsr a) ofs) (rsr s0) with
+ | None => Stuck
+ | Some m1 =>
+ match Mem.storev Many64 m1 (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) (rsr s1) with
+ | None => Stuck
+ | Some m2 =>
+ match Mem.storev Many64 m2 (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 16))) (rsr s2) with
+ | None => Stuck
+ | Some m3 =>
+ match Mem.storev Many64 m3 (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 24))) (rsr s3) with
+ | None => Stuck
+ | Some m4 => Next rsw m4
+ end
+ end
+ end
+ end
+ end.
+
+
+Definition load_chunk n :=
+ match n with
+ | Plb => Mint8signed
+ | Plbu => Mint8unsigned
+ | Plh => Mint16signed
+ | Plhu => Mint16unsigned
+ | Plw => Mint32
+ | Plw_a => Many32
+ | Pld => Mint64
+ | Pld_a => Many64
+ | Pfls => Mfloat32
+ | Pfld => Mfloat64
+ end.
+
+Definition store_chunk n :=
+ match n with
+ | Psb => Mint8unsigned
+ | Psh => Mint16unsigned
+ | Psw => Mint32
+ | Psw_a => Many32
+ | Psd => Mint64
+ | Psd_a => Many64
+ | Pfss => Mfloat32
+ | Pfsd => Mfloat64
+ end.
+
+(** ** Basic (instruction) step *)
+
+Definition bstep (bi: basic) (rsr rsw: regset) (mr mw: mem) :=
+ match bi with
+ | PArith ai => Next (parexec_arith_instr ai rsr rsw) mw
+
+ | PLoad (PLoadRRO trap n d a ofs) => parexec_load_offset trap (load_chunk n) rsr rsw mr mw d a ofs
+ | PLoad (PLoadRRR trap n d a ro) => parexec_load_reg trap (load_chunk n) rsr rsw mr mw d a ro
+ | PLoad (PLoadRRRXS trap n d a ro) => parexec_load_regxs trap (load_chunk n) rsr rsw mr mw d a ro
+ | PLoad (PLoadQRRO d a ofs) =>
+ parexec_load_q_offset rsr rsw mr mw d a ofs
+ | PLoad (PLoadORRO d a ofs) =>
+ parexec_load_o_offset rsr rsw mr mw d a ofs
+
+ | PStoreRRO n s a ofs => parexec_store_offset (store_chunk n) rsr rsw mr mw s a ofs
+ | PStoreRRR n s a ro => parexec_store_reg (store_chunk n) rsr rsw mr mw s a ro
+ | PStoreRRRXS n s a ro => parexec_store_regxs (store_chunk n) rsr rsw mr mw s a ro
+ | PStoreQRRO s a ofs =>
+ parexec_store_q_offset rsr rsw mr mw s a ofs
+ | PStoreORRO s a ofs =>
+ parexec_store_o_offset rsr rsw mr mw s a ofs
+
+ | Pallocframe sz pos =>
+ let (mw, stk) := Mem.alloc mr 0 sz in
+ let sp := (Vptr stk Ptrofs.zero) in
+ match Mem.storev Mptr mw (Val.offset_ptr sp pos) rsr#SP with
+ | None => Stuck
+ | Some mw => Next (rsw #FP <- (rsr SP) #SP <- sp #RTMP <- Vundef) mw
+ end
+
+ | Pfreeframe sz pos =>
+ match Mem.loadv Mptr mr (Val.offset_ptr rsr#SP pos) with
+ | None => Stuck
+ | Some v =>
+ match rsr SP with
+ | Vptr stk ofs =>
+ match Mem.free mr stk 0 sz with
+ | None => Stuck
+ | Some mw => Next (rsw#SP <- v #RTMP <- Vundef) mw
+ end
+ | _ => Stuck
+ end
+ end
+ | Pget rd ra =>
+ match ra with
+ | RA => Next (rsw#rd <- (rsr#ra)) mw
+ | _ => Stuck
+ end
+ | Pset ra rd =>
+ match ra with
+ | RA => Next (rsw#ra <- (rsr#rd)) mw
+ | _ => Stuck
+ end
+ | Pnop => Next rsw mw
+end.
+
+(** *** parexec with writes-in-order *)
+Fixpoint parexec_wio_body (body: list basic) (rsr rsw: regset) (mr mw: mem) :=
+ match body with
+ | nil => Next rsw mw
+ | bi::body' =>
+ match bstep bi rsr rsw mr mw with
+ | Next rsw mw => parexec_wio_body body' rsr rsw mr mw
+ | Stuck => Stuck
+ end
+ end.
+
+(* TODO: redundant w.r.t Machblock ?? *)
+Lemma in_dec (lbl: label) (l: list label): { List.In lbl l } + { ~(List.In lbl l) }.
+Proof.
+ apply List.in_dec.
+ apply Pos.eq_dec.
+Qed.
+
+
+
+(* Note: copy-paste from Machblock *)
+Definition is_label (lbl: label) (bb: bblock) : bool :=
+ if in_dec lbl (header bb) then true else false.
+
+Lemma is_label_correct_true lbl bb:
+ List.In lbl (header bb) <-> is_label lbl bb = true.
+Proof.
+ unfold is_label; destruct (in_dec lbl (header bb)); cbn; intuition.
+Qed.
+
+Lemma is_label_correct_false lbl bb:
+ ~(List.In lbl (header bb)) <-> is_label lbl bb = false.
+Proof.
+ unfold is_label; destruct (in_dec lbl (header bb)); cbn; intuition.
+Qed.
+
+
+
+(** **** convert a label into a position in the code *)
+Fixpoint label_pos (lbl: label) (pos: Z) (lb: bblocks) {struct lb} : option Z :=
+ match lb with
+ | nil => None
+ | b :: lb' => if is_label lbl b then Some pos else label_pos lbl (pos + (size b)) lb'
+ end.
+
+Definition par_goto_label (f: function) (lbl: label) (rsr rsw: regset) (mw: mem) :=
+ match label_pos lbl 0 (fn_blocks f) with
+ | None => Stuck
+ | Some pos =>
+ match rsr#PC with
+ | Vptr b ofs => Next (rsw#PC <- (Vptr b (Ptrofs.repr pos))) mw
+ | _ => Stuck
+ end
+ end.
+
+(** **** Parallel Evaluation of a branch *)
+
+(** Warning: PC is assumed to be already pointing on the next bundle ! *)
+
+Definition par_eval_branch (f: function) (l: label) (rsr rsw: regset) (mw: mem) (res: option bool) :=
+ match res with
+ | Some true => par_goto_label f l rsr rsw mw
+ | Some false => Next (rsw # PC <- (rsr PC)) mw
+ | None => Stuck
+ end.
+
+
+(** **** Parallel execution of a control-flow instruction *)
+
+(** As above: PC is assumed to be incremented on the next block before the control-flow instruction
+*)
+
+Definition parexec_control (f: function) (oc: option control) (rsr rsw: regset) (mw: mem) :=
+ match oc with
+ | None => Next (rsw#PC <- (rsr#PC)) mw
+ | Some ic => (**r Branch Control Unit instructions *)
+ match ic with
+ | Pret =>
+ Next (rsw#PC <- (rsr#RA)) mw
+ | Pcall s =>
+ Next (rsw#RA <- (rsr#PC) #PC <- (Genv.symbol_address ge s Ptrofs.zero)) mw
+ | Picall r =>
+ Next (rsw#RA <- (rsr#PC) #PC <- (rsr#r)) mw
+ | Pjumptable r tbl =>
+ match rsr#r with
+ | Vint n =>
+ match list_nth_z tbl (Int.unsigned n) with
+ | None => Stuck
+ | Some lbl => par_goto_label f lbl rsr (rsw #GPR62 <- Vundef #GPR63 <- Vundef) mw
+ end
+ | _ => Stuck
+ end
+ | Pgoto s =>
+ Next (rsw#PC <- (Genv.symbol_address ge s Ptrofs.zero)) mw
+ | Pigoto r =>
+ Next (rsw#PC <- (rsr#r)) mw
+ | Pj_l l =>
+ par_goto_label f l rsr rsw mw
+ | Pcb bt r l =>
+ match cmp_for_btest bt with
+ | (Some c, Int) => par_eval_branch f l rsr rsw mw (Val.cmp_bool c rsr#r (Vint (Int.repr 0)))
+ | (Some c, Long) => par_eval_branch f l rsr rsw mw (Val.cmpl_bool c rsr#r (Vlong (Int64.repr 0)))
+ | (None, _) => Stuck
+ end
+ | Pcbu bt r l =>
+ match cmpu_for_btest bt with
+ | (Some c, Int) => par_eval_branch f l rsr rsw mw (Val.mxcmpu_bool c rsr#r (Vint (Int.repr 0)))
+ | (Some c, Long) => par_eval_branch f l rsr rsw mw (Val.mxcmplu_bool c rsr#r (Vlong (Int64.repr 0)))
+ | (None, _) => Stuck
+ end
+ (**r Pseudo-instructions *)
+ | Pbuiltin ef args res =>
+ Stuck (**r treated specially below *)
+ end
+ end.
+
+
+Definition incrPC size_b (rs: regset) :=
+ rs#PC <- (Val.offset_ptr rs#PC size_b).
+
+(** parallel execution of the exit instruction of a bundle *)
+Definition estep (f: function) ext size_b (rsr rsw: regset) (mw: mem)
+ := parexec_control f ext (incrPC size_b rsr) rsw mw.
+
+Definition parexec_wio f bdy ext size_b (rs: regset) (m: mem): outcome :=
+ match parexec_wio_body bdy rs rs m m with
+ | Next rsw mw => estep f ext size_b rs rsw mw
+ | Stuck => Stuck
+ end.
+
+(** *** non-deterministic (out-of-order writes) parallel execution of bundles *)
+Definition parexec_bblock (f: function) (bundle: bblock) (rs: regset) (m: mem) (o: outcome): Prop :=
+ exists bdy1 bdy2, Permutation (bdy1++bdy2) (body bundle) /\
+ o=match parexec_wio f bdy1 (exit bundle) (Ptrofs.repr (size bundle)) rs m with
+ | Next rsw mw => parexec_wio_body bdy2 rs rsw m mw
+ | Stuck => Stuck
+ end.
+
+(** *** deterministic parallel (out-of-order writes) execution of bundles *)
+Definition det_parexec (f: function) (bundle: bblock) (rs: regset) (m: mem) rs' m': Prop :=
+ forall o, parexec_bblock f bundle rs m o -> o = Next rs' m'.
+
+
+(** *** Translation of the LTL/Linear/Mach view of machine registers to
+ the assembly view. Note that no LTL register maps to [X31]. This
+ register is reserved as temporary, to be used by the generated RV32G
+ code. *)
+
+
+(** **** Undefine all registers except SP and callee-save registers *)
+
+Definition undef_caller_save_regs (rs: regset) : regset :=
+ fun r =>
+ if preg_eq r SP
+ || In_dec preg_eq r (List.map preg_of (List.filter is_callee_save all_mregs))
+ then rs r
+ else Vundef.
+
+(** **** Extract the values of the arguments of an external call.
+ We exploit the calling conventions from module [Conventions], except that
+ we use assembly registers instead of locations. *)
+
+Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
+ | extcall_arg_reg: forall r,
+ extcall_arg rs m (R r) (rs (preg_of r))
+ | extcall_arg_stack: forall ofs ty bofs v,
+ bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
+ Mem.loadv (chunk_of_type ty) m
+ (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v ->
+ extcall_arg rs m (S Outgoing ofs ty) v.
+
+Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop :=
+ | extcall_arg_one: forall l v,
+ extcall_arg rs m l v ->
+ extcall_arg_pair rs m (One l) v
+ | extcall_arg_twolong: forall hi lo vhi vlo,
+ extcall_arg rs m hi vhi ->
+ extcall_arg rs m lo vlo ->
+ extcall_arg_pair rs m (Twolong hi lo) (Val.longofwords vhi vlo).
+
+Definition extcall_arguments
+ (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
+ list_forall2 (extcall_arg_pair rs m) (loc_arguments sg) args.
+
+
+Definition loc_external_result (sg: signature) : rpair preg :=
+ map_rpair preg_of (loc_result sg).
+
+
+(** ** Looking up bblocks in a code sequence by position. *)
+Fixpoint find_bblock (pos: Z) (lb: bblocks) {struct lb} : option bblock :=
+ match lb with
+ | nil => None
+ | b :: il =>
+ if zlt pos 0 then None (*r NOTE: It is impossible to branch inside a block *)
+ else if zeq pos 0 then Some b
+ else find_bblock (pos - (size b)) il
+ end.
+
+
+Inductive state: Type :=
+ | State: regset -> mem -> state.
+
+Definition nextblock (b:bblock) (rs: regset) :=
+ incrPC (Ptrofs.repr (size b)) rs.
+
+Inductive step: state -> trace -> state -> Prop :=
+ | exec_step_internal:
+ forall b ofs f bundle rs m rs' m',
+ rs PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle ->
+ det_parexec f bundle rs m rs' m' ->
+ step (State rs m) E0 (State rs' m')
+ | exec_step_builtin:
+ forall b ofs f ef args res rs m vargs t vres rs' m' bi,
+ rs PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) f.(fn_blocks) = Some bi ->
+ exit bi = Some (PExpand (Pbuiltin ef args res)) ->
+ eval_builtin_args ge rs (rs SP) m args vargs ->
+ external_call ef ge vargs m t vres m' ->
+ rs' = nextblock bi
+ (set_res res vres
+ (undef_regs (map preg_of (destroyed_by_builtin ef))
+ (rs#RTMP <- Vundef))) ->
+ step (State rs m) t (State rs' m')
+ | exec_step_external:
+ forall b ef args res rs m t rs' m',
+ rs PC = Vptr b Ptrofs.zero ->
+ Genv.find_funct_ptr ge b = Some (External ef) ->
+ external_call ef ge args m t res m' ->
+ extcall_arguments rs m (ef_sig ef) args ->
+ rs' = (set_pair (loc_external_result (ef_sig ef) ) res (undef_caller_save_regs rs))#PC <- (rs RA) ->
+ step (State rs m) t (State rs' m')
+ .
+
+
+(** parallel in-order writes execution of bundles *)
+Definition parexec_wio_bblock (f: function) (b: bblock) (rs: regset) (m: mem): outcome :=
+ parexec_wio f (body b) (exit b) (Ptrofs.repr (size b)) rs m.
+
+
+Lemma parexec_bblock_write_in_order f b rs m:
+ parexec_bblock f b rs m (parexec_wio_bblock f b rs m).
+Proof.
+ exists (body b). exists nil.
+ constructor 1.
+ - rewrite app_nil_r; auto.
+ - unfold parexec_wio_bblock.
+ destruct (parexec_wio f _ _ _); cbn; auto.
+Qed.
+
+
+Local Hint Resolve parexec_bblock_write_in_order: core.
+
+Lemma det_parexec_write_in_order f b rs m rs' m':
+ det_parexec f b rs m rs' m' -> parexec_wio_bblock f b rs m = Next rs' m'.
+Proof.
+ unfold det_parexec; auto.
+Qed.
+
+End RELSEM.
+
+(** ** Execution of whole programs. *)
+
+Inductive initial_state (p: program): state -> Prop :=
+ | initial_state_intro: forall m0,
+ let ge := Genv.globalenv p in
+ let rs0 :=
+ (Pregmap.init Vundef)
+ # PC <- (Genv.symbol_address ge p.(prog_main) Ptrofs.zero)
+ # SP <- Vnullptr
+ # RA <- Vnullptr in
+ Genv.init_mem p = Some m0 ->
+ initial_state p (State rs0 m0).
+
+Inductive final_state: state -> int -> Prop :=
+ | final_state_intro: forall rs m r,
+ rs PC = Vnullptr ->
+ rs GPR0 = Vint r ->
+ final_state (State rs m) r.
+
+Definition semantics (p: program) :=
+ Semantics step (initial_state p) final_state (Genv.globalenv p).
+
+Remark extcall_arguments_determ:
+ forall rs m sg args1 args2,
+ extcall_arguments rs m sg args1 -> extcall_arguments rs m sg args2 -> args1 = args2.
+Proof.
+ intros until m.
+ assert (A: forall l v1 v2,
+ extcall_arg rs m l v1 -> extcall_arg rs m l v2 -> v1 = v2).
+ { intros. inv H; inv H0; congruence. }
+ assert (B: forall p v1 v2,
+ extcall_arg_pair rs m p v1 -> extcall_arg_pair rs m p v2 -> v1 = v2).
+ { intros. inv H; inv H0.
+ eapply A; eauto.
+ f_equal; eapply A; eauto. }
+ assert (C: forall ll vl1, list_forall2 (extcall_arg_pair rs m) ll vl1 ->
+ forall vl2, list_forall2 (extcall_arg_pair rs m) ll vl2 -> vl1 = vl2).
+ {
+ induction 1; intros vl2 EA; inv EA.
+ auto.
+ f_equal; eauto. }
+ intros. eapply C; eauto.
+Qed.
+
+Lemma semantics_determinate p: determinate (semantics p).
+Proof.
+Ltac Equalities :=
+ match goal with
+ | [ H1: ?a = ?b, H2: ?a = ?c |- _ ] =>
+ rewrite H1 in H2; inv H2; Equalities
+ | _ => idtac
+ end.
+Ltac Det_WIO X :=
+ match goal with
+ | [ H: det_parexec _ _ _ _ _ _ _ |- _ ] =>
+ exploit det_parexec_write_in_order; [ eapply H | idtac]; clear H; intro X
+ | _ => idtac
+ end.
+ intros; constructor; cbn.
+- (* determ *) intros s t1 s1 t2 s2 H H0. inv H; Det_WIO X1;
+ inv H0; Det_WIO X2; Equalities.
+ + split. constructor. auto.
+ + unfold parexec_wio_bblock, parexec_wio in X1. destruct (parexec_wio_body _ _ _ _ _ _); try discriminate.
+ rewrite H8 in X1. discriminate.
+ + unfold parexec_wio_bblock, parexec_wio in X2. destruct (parexec_wio_body _ _ _ _ _ _); try discriminate.
+ rewrite H4 in X2. discriminate.
+ + assert (vargs0 = vargs) by (eapply eval_builtin_args_determ; eauto). subst vargs0.
+ exploit external_call_determ. eexact H6. eexact H13. intros [A B].
+ split. auto. intros. destruct B; auto. subst. auto.
+ + assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
+ exploit external_call_determ. eexact H3. eexact H8. intros [A B].
+ split. auto. intros. destruct B; auto. subst. auto.
+- (* trace length *)
+ red; intros. inv H; cbn.
+ lia.
+ eapply external_call_trace_length; eauto.
+ eapply external_call_trace_length; eauto.
+- (* initial states *)
+ intros s1 s2 H H0; inv H; inv H0; f_equal; congruence.
+- (* final no step *)
+ intros s r H; assert (NOTNULL: forall b ofs, Vnullptr <> Vptr b ofs).
+ { intros; unfold Vnullptr; destruct Archi.ptr64; congruence. }
+ inv H. red; intros; red; intros.
+ inv H; rewrite H0 in *; eelim NOTNULL; eauto.
+- (* final states *)
+ intros s r1 r2 H H0; inv H; inv H0. congruence.
+Qed.
diff --git a/kvx/BTL_SEsimplify.v b/kvx/BTL_SEsimplify.v
new file mode 120000
index 00000000..f190e6d5
--- /dev/null
+++ b/kvx/BTL_SEsimplify.v
@@ -0,0 +1 @@
+../aarch64/BTL_SEsimplify.v \ No newline at end of file
diff --git a/kvx/Builtins1.v b/kvx/Builtins1.v
new file mode 100644
index 00000000..441345bf
--- /dev/null
+++ b/kvx/Builtins1.v
@@ -0,0 +1,61 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Platform-specific built-in functions *)
+
+Require Import String Coqlib.
+Require Import AST Integers Floats Values ExtFloats.
+Require Import Builtins0.
+
+Inductive platform_builtin : Type :=
+| BI_fmin
+| BI_fmax
+| BI_fminf
+| BI_fmaxf
+| BI_fma
+| BI_fmaf.
+
+Local Open Scope string_scope.
+
+Definition platform_builtin_table : list (string * platform_builtin) :=
+ ("__builtin_fmin", BI_fmin)
+ :: ("__builtin_fmax", BI_fmax)
+ :: ("__builtin_fminf", BI_fminf)
+ :: ("__builtin_fmaxf", BI_fmaxf)
+ :: ("__builtin_fma", BI_fma)
+ :: ("__builtin_fmaf", BI_fmaf)
+ :: nil.
+
+Definition platform_builtin_sig (b: platform_builtin) : signature :=
+ match b with
+ | BI_fmin | BI_fmax =>
+ mksignature (Tfloat :: Tfloat :: nil) Tfloat cc_default
+ | BI_fminf | BI_fmaxf =>
+ mksignature (Tsingle :: Tsingle :: nil) Tsingle cc_default
+ | BI_fma =>
+ mksignature (Tfloat :: Tfloat :: Tfloat :: nil) Tfloat cc_default
+ | BI_fmaf =>
+ mksignature (Tsingle :: Tsingle :: Tsingle :: nil) Tsingle cc_default
+ end.
+
+Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) :=
+ match b with
+ | BI_fmin => mkbuiltin_n2t Tfloat Tfloat Tfloat ExtFloat.min
+ | BI_fmax => mkbuiltin_n2t Tfloat Tfloat Tfloat ExtFloat.max
+ | BI_fminf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.min
+ | BI_fmaxf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.max
+ | BI_fma => mkbuiltin_n3t Tfloat Tfloat Tfloat Tfloat Float.fma
+ | BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma
+ end.
diff --git a/kvx/CBuiltins.ml b/kvx/CBuiltins.ml
new file mode 100644
index 00000000..7398e0f4
--- /dev/null
+++ b/kvx/CBuiltins.ml
@@ -0,0 +1,145 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(* Processor-dependent builtin C functions *)
+
+open C
+
+let builtins = {
+ builtin_typedefs = [
+ "__builtin_va_list", TPtr(TVoid [], [])
+ ];
+ (* The builtin list is inspired from the GCC file builtin_kvx.h *)
+ builtin_functions = [ (* Some builtins are commented out because their opcode is not present (yet?) *)
+ (* BCU Instructions *)
+ "__builtin_kvx_await", (TVoid [], [], false); (* DONE *)
+ "__builtin_kvx_barrier", (TVoid [], [], false); (* DONE *)
+ "__builtin_kvx_doze", (TVoid [], [], false); (* opcode not supported in assembly, not in documentation *)
+ "__builtin_kvx_wfxl", (TVoid [], [TInt(IUChar, []); TInt(ILongLong, [])], false); (* DONE *)
+ "__builtin_kvx_wfxm", (TVoid [], [TInt(IUChar, []); TInt(ILongLong, [])], false); (* DONE *)
+ "__builtin_kvx_sleep", (TVoid [], [], false); (* DONE *)
+ "__builtin_kvx_stop", (TVoid [], [], false); (* DONE *)
+ "__builtin_kvx_syncgroup", (TVoid [], [TInt(IULongLong, [])], false);
+ "__builtin_kvx_tlbread", (TVoid [], [], false);
+ "__builtin_kvx_tlbwrite", (TVoid [], [], false);
+ "__builtin_kvx_tlbprobe", (TVoid [], [], false);
+ "__builtin_kvx_tlbdinval", (TVoid [], [], false);
+ "__builtin_kvx_tlbiinval", (TVoid [], [], false);
+
+ "__builtin_kvx_get", (TInt(IULongLong, []), [TInt(IInt, [])], false); (* DONE *)
+ "__builtin_kvx_set", (TVoid [], [TInt(IInt, []); TInt(IULongLong, [])], false); (* DONE *)
+
+ (* LSU Instructions *)
+ (* acswapd and acswapw done using headers and assembly *)
+(* "__builtin_kvx_afaddd", (TInt(IULongLong, []), [TPtr(TVoid [], []); TInt(ILongLong, [])], false);
+ "__builtin_kvx_afaddw", (TInt(IUInt, []), [TPtr(TVoid [], []); TInt(IInt, [])], false); *) (* see #157 *)
+ "__builtin_kvx_alclrd", (TInt(IULongLong, []), [TPtr(TVoid [], [])], false); (* DONE *)
+ "__builtin_kvx_alclrw", (TInt(IUInt, []), [TPtr(TVoid [], [])], false); (* DONE *)
+ "__builtin_kvx_dinval", (TVoid [], [], false); (* DONE *)
+ "__builtin_kvx_dinvall", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE *)
+ "__builtin_kvx_dtouchl", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE *)
+ "__builtin_kvx_dzerol", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE *)
+ "__builtin_kvx_fence", (TVoid [], [], false); (* DONE *)
+ "__builtin_kvx_iinval", (TVoid [], [], false); (* DONE *)
+ "__builtin_kvx_iinvals", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE *)
+ "__builtin_kvx_itouchl", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE [not supported by assembler but in documentation] *)
+ "__builtin_kvx_lbsu", (TInt(IChar, []), [TPtr(TVoid [], [])], false);
+ "__builtin_kvx_lbzu", (TInt(IUChar, []), [TPtr(TVoid [], [])], false);
+ "__builtin_kvx_ldu", (TInt(IULongLong, []), [TPtr(TVoid [], [])], false); (* DONE *)
+ "__builtin_kvx_lhsu", (TInt(IShort, []), [TPtr(TVoid [], [])], false);
+ "__builtin_kvx_lhzu", (TInt(IUShort, []), [TPtr(TVoid [], [])], false);
+ "__builtin_kvx_lwzu", (TInt(IUInt, []), [TPtr(TVoid [], [])], false);
+
+ (* ALU Instructions *)
+ (* "__builtin_kvx_addhp", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *)
+ (* "__builtin_kvx_adds", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *)
+ (* "__builtin_kvx_bwlu", (TInt(IUInt, []),
+ [TInt(IUInt, []); TInt(IUInt, []); TInt(IUInt, []); TInt(IUInt, []); TInt(IUShort, [])], false); *)
+ (* "__builtin_kvx_bwluhp", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, []); TInt(IUInt, [])], false); *)
+ (* "__builtin_kvx_bwluwp", (TInt(IULongLong, []),
+ [TInt(IULongLong, []); TInt(IULongLong, []); TInt(IUInt, [])], false); *)
+ (* "__builtin_kvx_cbs", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
+ (* "__builtin_kvx_cbsdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *)
+ (* "__builtin_kvx_clz", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
+ "__builtin_kvx_clzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
+ "__builtin_clzll", (TInt(ILongLong, []), [TInt(IULongLong, [])], false);
+ "__builtin_kvx_ctzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
+ "__builtin_ctzll", (TInt(ILongLong, []), [TInt(IULongLong, [])], false);
+ (* "__builtin_kvx_clzdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *)
+ (* "__builtin_kvx_cmove", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, []); TInt(IInt, [])], false); *)
+ (* "__builtin_kvx_ctz", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
+ "__builtin_kvx_ctzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
+ "__builtin_kvx_ctzd", (TInt(ILongLong, []), [TInt(IULongLong, [])], false);
+ (* "__builtin_kvx_ctzdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *)
+ (* "__builtin_kvx_extfz", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, []); TInt(IUInt, [])], false); *)
+ (* "__builtin_kvx_landhp", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, []); TInt(IInt, [])], false); *)
+ (* "__builtin_kvx_sat", (TInt(IInt, []), [TInt(IInt, []); TInt(IUChar, [])], false); *)
+ "__builtin_kvx_satd", (TInt(ILongLong, []), [TInt(ILongLong, []); TInt(IUChar, [])], false);
+ (* "__builtin_kvx_sbfhp", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *)
+ "__builtin_kvx_sbmm8", (TInt(IULongLong, []), [TInt(IULongLong, []); TInt(IULongLong, [])], false);
+ "__builtin_kvx_sbmmt8", (TInt(IULongLong, []), [TInt(IULongLong, []); TInt(IULongLong, [])], false);
+ (* "__builtin_kvx_sllhps", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false); *)
+ (* "__builtin_kvx_srahps", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false); *)
+ (* "__builtin_kvx_stsu", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false); *)
+ "__builtin_kvx_stsud", (TInt(IULongLong, []), [TInt(IULongLong, []); TInt(IULongLong, [])], false);
+
+
+ (* Synchronization *)
+(* "__builtin_fence",
+ (TVoid [], [], false); *)
+(* (* Float arithmetic *)
+ "__builtin_fmadd",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false);
+ "__builtin_fmsub",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false);
+ "__builtin_fnmadd",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false);
+ "__builtin_fnmsub",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false); *)
+ "__builtin_fabsf",
+ (TFloat(FFloat, []),
+ [TFloat(FFloat, [])], false);
+ "__builtin_fmax",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, [])], false);
+ "__builtin_fmin",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, [])], false);
+ "__builtin_fmaxf",
+ (TFloat(FFloat, []),
+ [TFloat(FFloat, []); TFloat(FFloat, [])], false);
+ "__builtin_fminf",
+ (TFloat(FFloat, []),
+ [TFloat(FFloat, []); TFloat(FFloat, [])], false);
+ "__builtin_fma",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false);
+ "__builtin_fmaf",
+ (TFloat(FFloat, []),
+ [TFloat(FFloat, []); TFloat(FFloat, []); TFloat(FFloat, [])], false);
+]
+}
+
+let va_list_type = TPtr(TVoid [], []) (* to check! *)
+let size_va_list = if Archi.ptr64 then 8 else 4
+let va_list_scalar = true
+
+(* Expand memory references inside extended asm statements. Used in C2C. *)
+
+let asm_mem_argument arg = Printf.sprintf "0(%s)" arg
diff --git a/kvx/CSE2deps.v b/kvx/CSE2deps.v
new file mode 100644
index 00000000..c0deacf0
--- /dev/null
+++ b/kvx/CSE2deps.v
@@ -0,0 +1,35 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* David Monniaux CNRS, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+ (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+ && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+ && ((ofsw + size_chunk chunkw <=? ofsr) ||
+ (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+ match addr, addr', args, args' with
+ | (Aindexed ofs), (Aindexed ofs'),
+ (base :: nil), (base' :: nil) =>
+ if peq base base'
+ then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+ else true
+ | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+ negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+ | _, _, _, _ => true
+ end.
diff --git a/kvx/CSE2depsproof.v b/kvx/CSE2depsproof.v
new file mode 100644
index 00000000..a5f7b317
--- /dev/null
+++ b/kvx/CSE2depsproof.v
@@ -0,0 +1,146 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* David Monniaux CNRS, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+ Ptrofs.wordsize = (if Archi.ptr64 then 64 else 32)%nat.
+Proof.
+ unfold Ptrofs.wordsize.
+ unfold Wordsize_Ptrofs.wordsize.
+ trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+ Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296.
+Proof.
+ unfold Ptrofs.modulus.
+ rewrite ptrofs_size.
+ destruct Archi.ptr64; reflexivity.
+Qed.
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Section MEMORY_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+ Variable base : val.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : ptrofs.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aindexed ofsw) (base :: nil) = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aindexed ofsr) (base :: nil) = Some addrr.
+
+ Lemma load_store_away1 :
+ forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+ \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in *.
+
+ rewrite ptrofs_modulus in *.
+ cbn in *.
+ inv ADDRR.
+ inv ADDRW.
+ destruct base; try discriminate.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+ rewrite OFSW).
+ all: try rewrite ptrofs_modulus in *.
+ all: destruct Archi.ptr64.
+
+ all: intuition lia.
+ Qed.
+
+ Theorem load_store_away :
+ can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Lemma may_overlap_sound:
+ forall m m' : mem,
+ forall chunk addr args chunk' addr' args' v a a' rs,
+ (eval_addressing genv sp addr (rs ## args)) = Some a ->
+ (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+ (may_overlap chunk addr args chunk' addr' args') = false ->
+ (Mem.storev chunk m a v) = Some m' ->
+ (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a').
+Proof.
+ intros until rs.
+ intros ADDR ADDR' OVERLAP STORE.
+ destruct addr; destruct addr'; try discriminate.
+ - (* Aindexed / Aindexed *)
+ destruct args as [ | base [ | ]]. 1,3: discriminate.
+ destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+ cbn in OVERLAP.
+ destruct (peq base base'). 2: discriminate.
+ subst base'.
+ destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+ 2: discriminate.
+ cbn in *.
+ eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+ - (* Ainstack / Ainstack *)
+ destruct args. 2: discriminate.
+ destruct args'. 2: discriminate.
+ cbn in OVERLAP.
+ destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+ 2: discriminate.
+ cbn in *.
+ eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+Qed.
+
+End SOUNDNESS.
diff --git a/kvx/Chunks.v b/kvx/Chunks.v
new file mode 100644
index 00000000..86d4f0ac
--- /dev/null
+++ b/kvx/Chunks.v
@@ -0,0 +1,36 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import AST.
+Require Import Values.
+Require Import Integers.
+Require Import Coq.ZArith.BinIntDef.
+Require Import BinNums.
+
+Local Open Scope Z_scope.
+
+Definition zscale_of_chunk (chunk: memory_chunk) : Z :=
+ match chunk with
+ | Mint8signed => 0
+ | Mint8unsigned => 0
+ | Mint16signed => 1
+ | Mint16unsigned => 1
+ | Mint32 => 2
+ | Mint64 => 3
+ | Mfloat32 => 2
+ | Mfloat64 => 3
+ | Many32 => 2
+ | Many64 => 3
+ end.
+Definition scale_of_chunk chunk := Vint (Int.repr (zscale_of_chunk chunk)).
diff --git a/kvx/CombineOp.v b/kvx/CombineOp.v
new file mode 100644
index 00000000..ff1db3cd
--- /dev/null
+++ b/kvx/CombineOp.v
@@ -0,0 +1,141 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Recognition of combined operations, addressing modes and conditions
+ during the [CSE] phase. *)
+
+Require Import Coqlib.
+Require Import AST.
+Require Import Integers.
+Require Import Op.
+Require Import CSEdomain.
+
+Section COMBINE.
+
+Variable get: valnum -> option rhs.
+
+Function combine_compimm_ne_0 (x: valnum) : option(condition * list valnum) :=
+ match get x with
+ | Some(Op (Ocmp c) ys) => Some (c, ys)
+ | _ => None
+ end.
+
+Function combine_compimm_eq_0 (x: valnum) : option(condition * list valnum) :=
+ match get x with
+ | Some(Op (Ocmp c) ys) => Some (negate_condition c, ys)
+ | _ => None
+ end.
+
+Function combine_compimm_eq_1 (x: valnum) : option(condition * list valnum) :=
+ match get x with
+ | Some(Op (Ocmp c) ys) => Some (c, ys)
+ | _ => None
+ end.
+
+Function combine_compimm_ne_1 (x: valnum) : option(condition * list valnum) :=
+ match get x with
+ | Some(Op (Ocmp c) ys) => Some (negate_condition c, ys)
+ | _ => None
+ end.
+
+Function combine_cond (cond: condition) (args: list valnum) : option(condition * list valnum) :=
+ match cond, args with
+ | Ccompimm Cne n, x::nil =>
+ if Int.eq_dec n Int.zero then combine_compimm_ne_0 x
+ else if Int.eq_dec n Int.one then combine_compimm_ne_1 x
+ else None
+ | Ccompimm Ceq n, x::nil =>
+ if Int.eq_dec n Int.zero then combine_compimm_eq_0 x
+ else if Int.eq_dec n Int.one then combine_compimm_eq_1 x
+ else None
+ | Ccompuimm Cne n, x::nil =>
+ if Int.eq_dec n Int.zero then combine_compimm_ne_0 x
+ else if Int.eq_dec n Int.one then combine_compimm_ne_1 x
+ else None
+ | Ccompuimm Ceq n, x::nil =>
+ if Int.eq_dec n Int.zero then combine_compimm_eq_0 x
+ else if Int.eq_dec n Int.one then combine_compimm_eq_1 x
+ else None
+ | _, _ => None
+ end.
+
+Function combine_addr (addr: addressing) (args: list valnum) : option(addressing * list valnum) :=
+ match addr, args with
+ | Aindexed n, x::nil =>
+ match get x with
+ | Some(Op (Oaddimm m) ys) =>
+ if Archi.ptr64 then None else Some(Aindexed (Ptrofs.add (Ptrofs.of_int m) n), ys)
+ | Some(Op (Oaddlimm m) ys) =>
+ if Archi.ptr64 then Some(Aindexed (Ptrofs.add (Ptrofs.of_int64 m) n), ys) else None
+ | _ => None
+ end
+ | _, _ => None
+ end.
+
+Function combine_op (op: operation) (args: list valnum) : option(operation * list valnum) :=
+ match op, args with
+ | Oaddimm n, x :: nil =>
+ match get x with
+ | Some(Op (Oaddimm m) ys) => Some(Oaddimm (Int.add m n), ys)
+ | _ => None
+ end
+ | Oandimm n, x :: nil =>
+ match get x with
+ | Some(Op (Oandimm m) ys) =>
+ Some(let p := Int.and m n in
+ if Int.eq p m then (Omove, x :: nil) else (Oandimm p, ys))
+ | _ => None
+ end
+ | Oorimm n, x :: nil =>
+ match get x with
+ | Some(Op (Oorimm m) ys) => Some(Oorimm (Int.or m n), ys)
+ | _ => None
+ end
+ | Oxorimm n, x :: nil =>
+ match get x with
+ | Some(Op (Oxorimm m) ys) => Some(Oxorimm (Int.xor m n), ys)
+ | _ => None
+ end
+ | Oaddlimm n, x :: nil =>
+ match get x with
+ | Some(Op (Oaddlimm m) ys) => Some(Oaddlimm (Int64.add m n), ys)
+ | _ => None
+ end
+ | Oandlimm n, x :: nil =>
+ match get x with
+ | Some(Op (Oandlimm m) ys) =>
+ Some(let p := Int64.and m n in
+ if Int64.eq p m then (Omove, x :: nil) else (Oandlimm p, ys))
+ | _ => None
+ end
+ | Oorlimm n, x :: nil =>
+ match get x with
+ | Some(Op (Oorlimm m) ys) => Some(Oorlimm (Int64.or m n), ys)
+ | _ => None
+ end
+ | Oxorlimm n, x :: nil =>
+ match get x with
+ | Some(Op (Oxorlimm m) ys) => Some(Oxorlimm (Int64.xor m n), ys)
+ | _ => None
+ end
+ | Ocmp cond, _ =>
+ match combine_cond cond args with
+ | Some(cond', args') => Some(Ocmp cond', args')
+ | None => None
+ end
+ | _, _ => None
+ end.
+
+End COMBINE.
diff --git a/kvx/CombineOpproof.v b/kvx/CombineOpproof.v
new file mode 100644
index 00000000..5dffc565
--- /dev/null
+++ b/kvx/CombineOpproof.v
@@ -0,0 +1,176 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Recognition of combined operations, addressing modes and conditions
+ during the [CSE] phase. *)
+
+Require Import FunInd.
+Require Import Coqlib.
+Require Import AST.
+Require Import Integers.
+Require Import Values.
+Require Import Memory.
+Require Import Op.
+Require Import Registers.
+Require Import RTL.
+Require Import CSEdomain.
+Require Import CombineOp.
+
+Section COMBINE.
+
+Variable ge: genv.
+Variable sp: val.
+Variable m: mem.
+Variable get: valnum -> option rhs.
+Variable valu: valnum -> val.
+Hypothesis get_sound: forall v rhs, get v = Some rhs -> rhs_eval_to valu ge sp m rhs (valu v).
+
+Lemma get_op_sound:
+ forall v op vl, get v = Some (Op op vl) -> eval_operation ge sp op (map valu vl) m = Some (valu v).
+Proof.
+ intros. exploit get_sound; eauto. intros REV; inv REV; auto.
+Qed.
+
+Ltac UseGetSound :=
+ match goal with
+ | [ H: get _ = Some _ |- _ ] =>
+ let x := fresh "EQ" in (generalize (get_op_sound _ _ _ H); intros x; cbn in x; FuncInv)
+ end.
+
+Lemma combine_compimm_ne_0_sound:
+ forall x cond args,
+ combine_compimm_ne_0 get x = Some(cond, args) ->
+ eval_condition cond (map valu args) m = Val.cmp_bool Cne (valu x) (Vint Int.zero) /\
+ eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Cne (valu x) (Vint Int.zero).
+Proof.
+ intros until args. functional induction (combine_compimm_ne_0 get x); intros EQ; inv EQ.
+ (* of cmp *)
+ UseGetSound. rewrite <- H.
+ destruct (eval_condition cond (map valu args) m); cbn; auto. destruct b; auto.
+Qed.
+
+Lemma combine_compimm_eq_0_sound:
+ forall x cond args,
+ combine_compimm_eq_0 get x = Some(cond, args) ->
+ eval_condition cond (map valu args) m = Val.cmp_bool Ceq (valu x) (Vint Int.zero) /\
+ eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Ceq (valu x) (Vint Int.zero).
+Proof.
+ intros until args. functional induction (combine_compimm_eq_0 get x); intros EQ; inv EQ.
+ (* of cmp *)
+ UseGetSound. rewrite <- H.
+ rewrite eval_negate_condition.
+ destruct (eval_condition c (map valu args) m); cbn; auto. destruct b; auto.
+Qed.
+
+Lemma combine_compimm_eq_1_sound:
+ forall x cond args,
+ combine_compimm_eq_1 get x = Some(cond, args) ->
+ eval_condition cond (map valu args) m = Val.cmp_bool Ceq (valu x) (Vint Int.one) /\
+ eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Ceq (valu x) (Vint Int.one).
+Proof.
+ intros until args. functional induction (combine_compimm_eq_1 get x); intros EQ; inv EQ.
+ (* of cmp *)
+ UseGetSound. rewrite <- H.
+ destruct (eval_condition cond (map valu args) m); cbn; auto. destruct b; auto.
+Qed.
+
+Lemma combine_compimm_ne_1_sound:
+ forall x cond args,
+ combine_compimm_ne_1 get x = Some(cond, args) ->
+ eval_condition cond (map valu args) m = Val.cmp_bool Cne (valu x) (Vint Int.one) /\
+ eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Cne (valu x) (Vint Int.one).
+Proof.
+ intros until args. functional induction (combine_compimm_ne_1 get x); intros EQ; inv EQ.
+ (* of cmp *)
+ UseGetSound. rewrite <- H.
+ rewrite eval_negate_condition.
+ destruct (eval_condition c (map valu args) m); cbn; auto. destruct b; auto.
+Qed.
+
+Theorem combine_cond_sound:
+ forall cond args cond' args',
+ combine_cond get cond args = Some(cond', args') ->
+ eval_condition cond' (map valu args') m = eval_condition cond (map valu args) m.
+Proof.
+ intros. functional inversion H; subst.
+ (* compimm ne zero *)
+ - cbn; eapply combine_compimm_ne_0_sound; eauto.
+ (* compimm ne one *)
+ - cbn; eapply combine_compimm_ne_1_sound; eauto.
+ (* compimm eq zero *)
+ - cbn; eapply combine_compimm_eq_0_sound; eauto.
+ (* compimm eq one *)
+ - cbn; eapply combine_compimm_eq_1_sound; eauto.
+ (* compuimm ne zero *)
+ - cbn; eapply combine_compimm_ne_0_sound; eauto.
+ (* compuimm ne one *)
+ - cbn; eapply combine_compimm_ne_1_sound; eauto.
+ (* compuimm eq zero *)
+ - cbn; eapply combine_compimm_eq_0_sound; eauto.
+ (* compuimm eq one *)
+ - cbn; eapply combine_compimm_eq_1_sound; eauto.
+Qed.
+
+Theorem combine_addr_sound:
+ forall addr args addr' args',
+ combine_addr get addr args = Some(addr', args') ->
+ eval_addressing ge sp addr' (map valu args') = eval_addressing ge sp addr (map valu args).
+Proof.
+ intros. functional inversion H; subst.
+- (* indexed - addimm *)
+ UseGetSound. cbn. rewrite <- H0. destruct v; auto. cbn; rewrite H7; cbn.
+ rewrite Ptrofs.add_assoc. auto.
+- (* indexed - addimml *)
+ UseGetSound. cbn. rewrite <- H0. destruct v; auto. cbn; rewrite H7; cbn.
+ rewrite Ptrofs.add_assoc. auto.
+Qed.
+
+Theorem combine_op_sound:
+ forall op args op' args',
+ combine_op get op args = Some(op', args') ->
+ eval_operation ge sp op' (map valu args') m = eval_operation ge sp op (map valu args) m.
+Proof.
+ intros. functional inversion H; subst.
+ (* addimm - addimm *)
+ - UseGetSound. FuncInv. cbn.
+ rewrite <- H0. rewrite Val.add_assoc. auto.
+ (* andimm - andimm *)
+ - UseGetSound; cbn.
+ generalize (Int.eq_spec p m0); rewrite H7; intros.
+ rewrite <- H0. rewrite Val.and_assoc. cbn. fold p. rewrite H1. auto.
+ - UseGetSound; cbn.
+ rewrite <- H0. rewrite Val.and_assoc. auto.
+ (* orimm - orimm *)
+ - UseGetSound. cbn. rewrite <- H0. rewrite Val.or_assoc. auto.
+ (* xorimm - xorimm *)
+ - UseGetSound. cbn. rewrite <- H0. rewrite Val.xor_assoc. auto.
+ (* addlimm - addlimm *)
+ - UseGetSound. FuncInv. cbn.
+ rewrite <- H0. rewrite Val.addl_assoc. auto.
+ (* andlimm - andlimm *)
+ - UseGetSound; cbn.
+ generalize (Int64.eq_spec p m0); rewrite H7; intros.
+ rewrite <- H0. rewrite Val.andl_assoc. cbn. fold p. rewrite H1. auto.
+ - UseGetSound; cbn.
+ rewrite <- H0. rewrite Val.andl_assoc. auto.
+ (* orlimm - orlimm *)
+ - UseGetSound. cbn. rewrite <- H0. rewrite Val.orl_assoc. auto.
+ (* xorlimm - xorlimm *)
+ - UseGetSound. cbn. rewrite <- H0. rewrite Val.xorl_assoc. auto.
+ (* cmp *)
+ - cbn. decEq; decEq. eapply combine_cond_sound; eauto.
+Qed.
+
+End COMBINE.
diff --git a/kvx/ConstpropOp.vp b/kvx/ConstpropOp.vp
new file mode 100644
index 00000000..2a428020
--- /dev/null
+++ b/kvx/ConstpropOp.vp
@@ -0,0 +1,312 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Strength reduction for operators and conditions.
+ This is the machine-dependent part of [Constprop]. *)
+
+Require Archi.
+Require Import Coqlib Compopts.
+Require Import AST Integers Floats.
+Require Import Op Registers.
+Require Import ValueDomain.
+
+(** * Converting known values to constants *)
+
+Definition const_for_result (a: aval) : option operation :=
+ match a with
+ | I n => Some(Ointconst n)
+ | L n => if Archi.ptr64 then Some(Olongconst n) else None
+ | F n => if Compopts.generate_float_constants tt then Some(Ofloatconst n) else None
+ | FS n => if Compopts.generate_float_constants tt then Some(Osingleconst n) else None
+ | Ptr(Gl id ofs) => Some(Oaddrsymbol id ofs)
+ | Ptr(Stk ofs) => Some(Oaddrstack ofs)
+ | _ => None
+ end.
+
+(** * Operator strength reduction *)
+
+(** We now define auxiliary functions for strength reduction of
+ operators and addressing modes: replacing an operator with a cheaper
+ one if some of its arguments are statically known. These are again
+ large pattern-matchings expressed in indirect style. *)
+
+Nondetfunction cond_strength_reduction
+ (cond: condition) (args: list reg) (vl: list aval) :=
+ match cond, args, vl with
+ | Ccomp c, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
+ (Ccompimm (swap_comparison c) n1, r2 :: nil)
+ | Ccomp c, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
+ (Ccompimm c n2, r1 :: nil)
+ | Ccompu c, r1 :: r2 :: nil, I n1 :: v2 :: nil =>
+ (Ccompuimm (swap_comparison c) n1, r2 :: nil)
+ | Ccompu c, r1 :: r2 :: nil, v1 :: I n2 :: nil =>
+ (Ccompuimm c n2, r1 :: nil)
+ | Ccompl c, r1 :: r2 :: nil, L n1 :: v2 :: nil =>
+ (Ccomplimm (swap_comparison c) n1, r2 :: nil)
+ | Ccompl c, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
+ (Ccomplimm c n2, r1 :: nil)
+ | Ccomplu c, r1 :: r2 :: nil, L n1 :: v2 :: nil =>
+ (Ccompluimm (swap_comparison c) n1, r2 :: nil)
+ | Ccomplu c, r1 :: r2 :: nil, v1 :: L n2 :: nil =>
+ (Ccompluimm c n2, r1 :: nil)
+ | _, _, _ =>
+ (cond, args)
+ end.
+
+Definition make_cmp_base (c: condition) (args: list reg) (vl: list aval) :=
+ let (c', args') := cond_strength_reduction c args vl in (Ocmp c', args').
+
+Definition make_cmp_imm_eq (c: condition) (args: list reg) (vl: list aval)
+ (n: int) (r1: reg) (v1: aval) :=
+ if Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1) then (Omove, r1 :: nil)
+ else if Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1) then (Oxorimm Int.one, r1 :: nil)
+ else make_cmp_base c args vl.
+
+Definition make_cmp_imm_ne (c: condition) (args: list reg) (vl: list aval)
+ (n: int) (r1: reg) (v1: aval) :=
+ if Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1) then (Omove, r1 :: nil)
+ else if Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1) then (Oxorimm Int.one, r1 :: nil)
+ else make_cmp_base c args vl.
+
+Nondetfunction make_cmp (c: condition) (args: list reg) (vl: list aval) :=
+ match c, args, vl with
+ | Ccompimm Ceq n, r1 :: nil, v1 :: nil =>
+ make_cmp_imm_eq c args vl n r1 v1
+ | Ccompimm Cne n, r1 :: nil, v1 :: nil =>
+ make_cmp_imm_ne c args vl n r1 v1
+ | Ccompuimm Ceq n, r1 :: nil, v1 :: nil =>
+ make_cmp_imm_eq c args vl n r1 v1
+ | Ccompuimm Cne n, r1 :: nil, v1 :: nil =>
+ make_cmp_imm_ne c args vl n r1 v1
+ | _, _, _ =>
+ make_cmp_base c args vl
+ end.
+
+Definition make_addimm (n: int) (r: reg) :=
+ if Int.eq n Int.zero
+ then (Omove, r :: nil)
+ else (Oaddimm n, r :: nil).
+
+Definition make_shlimm (n: int) (r1 r2: reg) :=
+ if Int.eq n Int.zero then (Omove, r1 :: nil)
+ else if Int.ltu n Int.iwordsize then (Oshlimm n, r1 :: nil)
+ else (Oshl, r1 :: r2 :: nil).
+
+Definition make_shrimm (n: int) (r1 r2: reg) :=
+ if Int.eq n Int.zero then (Omove, r1 :: nil)
+ else if Int.ltu n Int.iwordsize then (Oshrimm n, r1 :: nil)
+ else (Oshr, r1 :: r2 :: nil).
+
+Definition make_shruimm (n: int) (r1 r2: reg) :=
+ if Int.eq n Int.zero then (Omove, r1 :: nil)
+ else if Int.ltu n Int.iwordsize then (Oshruimm n, r1 :: nil)
+ else (Oshru, r1 :: r2 :: nil).
+
+Definition make_mulimm (n: int) (r1 r2: reg) :=
+ if Int.eq n Int.zero then
+ (Ointconst Int.zero, nil)
+ else if Int.eq n Int.one then
+ (Omove, r1 :: nil)
+ else
+ match Int.is_power2 n with
+ | Some l => (Oshlimm l, r1 :: nil)
+ | None => (Omul, r1 :: r2 :: nil)
+ end.
+
+Definition make_andimm (n: int) (r: reg) (a: aval) :=
+ if Int.eq n Int.zero then (Ointconst Int.zero, nil)
+ else if Int.eq n Int.mone then (Omove, r :: nil)
+ else if match a with Uns _ m => Int.eq (Int.zero_ext m (Int.not n)) Int.zero
+ | _ => false end
+ then (Omove, r :: nil)
+ else (Oandimm n, r :: nil).
+
+Definition make_orimm (n: int) (r: reg) :=
+ if Int.eq n Int.zero then (Omove, r :: nil)
+ else if Int.eq n Int.mone then (Ointconst Int.mone, nil)
+ else (Oorimm n, r :: nil).
+
+Definition make_xorimm (n: int) (r: reg) :=
+ if Int.eq n Int.zero then (Omove, r :: nil)
+ else (Oxorimm n, r :: nil).
+
+Definition make_divimm n (r1 r2: reg) :=
+ if Int.eq n Int.one then
+ (Omove, r1 :: nil)
+ else
+ match Int.is_power2 n with
+ | Some l => if Int.ltu l (Int.repr 31)
+ then (Oshrximm l, r1 :: nil)
+ else (Odiv, r1 :: r2 :: nil)
+ | None => (Odiv, r1 :: r2 :: nil)
+ end.
+
+Definition make_divuimm n (r1 r2: reg) :=
+ if Int.eq n Int.one then
+ (Omove, r1 :: nil)
+ else
+ match Int.is_power2 n with
+ | Some l => (Oshruimm l, r1 :: nil)
+ | None => (Odivu, r1 :: r2 :: nil)
+ end.
+
+Definition make_moduimm n (r1 r2: reg) :=
+ match Int.is_power2 n with
+ | Some l => (Oandimm (Int.sub n Int.one), r1 :: nil)
+ | None => (Omodu, r1 :: r2 :: nil)
+ end.
+
+Definition make_addlimm (n: int64) (r: reg) :=
+ if Int64.eq n Int64.zero
+ then (Omove, r :: nil)
+ else (Oaddlimm n, r :: nil).
+
+Definition make_shllimm (n: int) (r1 r2: reg) :=
+ if Int.eq n Int.zero then (Omove, r1 :: nil)
+ else if Int.ltu n Int64.iwordsize' then (Oshllimm n, r1 :: nil)
+ else (Oshll, r1 :: r2 :: nil).
+
+Definition make_shrlimm (n: int) (r1 r2: reg) :=
+ if Int.eq n Int.zero then (Omove, r1 :: nil)
+ else if Int.ltu n Int64.iwordsize' then (Oshrlimm n, r1 :: nil)
+ else (Oshrl, r1 :: r2 :: nil).
+
+Definition make_shrluimm (n: int) (r1 r2: reg) :=
+ if Int.eq n Int.zero then (Omove, r1 :: nil)
+ else if Int.ltu n Int64.iwordsize' then (Oshrluimm n, r1 :: nil)
+ else (Oshrlu, r1 :: r2 :: nil).
+
+Definition make_mullimm (n: int64) (r1 r2: reg) :=
+ if Int64.eq n Int64.zero then
+ (Olongconst Int64.zero, nil)
+ else if Int64.eq n Int64.one then
+ (Omove, r1 :: nil)
+ else
+ match Int64.is_power2' n with
+ | Some l => (Oshllimm l, r1 :: nil)
+ | None => (Omull, r1 :: r2 :: nil)
+ end.
+
+Definition make_andlimm (n: int64) (r: reg) (a: aval) :=
+ if Int64.eq n Int64.zero then (Olongconst Int64.zero, nil)
+ else if Int64.eq n Int64.mone then (Omove, r :: nil)
+ else (Oandlimm n, r :: nil).
+
+Definition make_orlimm (n: int64) (r: reg) :=
+ if Int64.eq n Int64.zero then (Omove, r :: nil)
+ else if Int64.eq n Int64.mone then (Olongconst Int64.mone, nil)
+ else (Oorlimm n, r :: nil).
+
+Definition make_xorlimm (n: int64) (r: reg) :=
+ if Int64.eq n Int64.zero then (Omove, r :: nil)
+ else (Oxorlimm n, r :: nil).
+
+Definition make_divlimm n (r1 r2: reg) :=
+ match Int64.is_power2' n with
+ | Some l => if Int.ltu l (Int.repr 63)
+ then (Oshrxlimm l, r1 :: nil)
+ else (Odivl, r1 :: r2 :: nil)
+ | None => (Odivl, r1 :: r2 :: nil)
+ end.
+
+Definition make_divluimm n (r1 r2: reg) :=
+ match Int64.is_power2' n with
+ | Some l => (Oshrluimm l, r1 :: nil)
+ | None => (Odivlu, r1 :: r2 :: nil)
+ end.
+
+Definition make_modluimm n (r1 r2: reg) :=
+ match Int64.is_power2 n with
+ | Some l => (Oandlimm (Int64.sub n Int64.one), r1 :: nil)
+ | None => (Omodlu, r1 :: r2 :: nil)
+ end.
+
+Definition make_mulfimm (n: float) (r r1 r2: reg) :=
+ if Float.eq_dec n (Float.of_int (Int.repr 2))
+ then (Oaddf, r :: r :: nil)
+ else (Omulf, r1 :: r2 :: nil).
+
+Definition make_mulfsimm (n: float32) (r r1 r2: reg) :=
+ if Float32.eq_dec n (Float32.of_int (Int.repr 2))
+ then (Oaddfs, r :: r :: nil)
+ else (Omulfs, r1 :: r2 :: nil).
+
+Definition make_cast8signed (r: reg) (a: aval) :=
+ if vincl a (Sgn Ptop 8) then (Omove, r :: nil) else (Ocast8signed, r :: nil).
+Definition make_cast16signed (r: reg) (a: aval) :=
+ if vincl a (Sgn Ptop 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil).
+
+Nondetfunction op_strength_reduction
+ (op: operation) (args: list reg) (vl: list aval) :=
+ match op, args, vl with
+ | Ocast8signed, r1 :: nil, v1 :: nil => make_cast8signed r1 v1
+ | Ocast16signed, r1 :: nil, v1 :: nil => make_cast16signed r1 v1
+ | Oadd, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_addimm n1 r2
+ | Oadd, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm n2 r1
+ | Osub, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm (Int.neg n2) r1
+ | Omul, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_mulimm n1 r2 r1
+ | Omul, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_mulimm n2 r1 r2
+ | Odiv, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divimm n2 r1 r2
+ | Odivu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divuimm n2 r1 r2
+ | Omodu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_moduimm n2 r1 r2
+ | Oand, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_andimm n1 r2 v2
+ | Oand, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_andimm n2 r1 v1
+ | Oandimm n, r1 :: nil, v1 :: nil => make_andimm n r1 v1
+ | Oor, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_orimm n1 r2
+ | Oor, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_orimm n2 r1
+ | Oxor, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_xorimm n1 r2
+ | Oxor, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_xorimm n2 r1
+ | Oshl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shlimm n2 r1 r2
+ | Oshr, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrimm n2 r1 r2
+ | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2
+ | Oaddl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_addlimm n1 r2
+ | Oaddl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm n2 r1
+ | Osubl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm (Int64.neg n2) r1
+ | Omull, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_mullimm n1 r2 r1
+ | Omull, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_mullimm n2 r1 r2
+ | Odivl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divlimm n2 r1 r2
+ | Odivlu, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divluimm n2 r1 r2
+ | Omodlu, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_modluimm n2 r1 r2
+ | Oandl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_andlimm n1 r2 v2
+ | Oandl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_andlimm n2 r1 v1
+ | Oandlimm n, r1 :: nil, v1 :: nil => make_andlimm n r1 v1
+ | Oorl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_orlimm n1 r2
+ | Oorl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_orlimm n2 r1
+ | Oxorl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_xorlimm n1 r2
+ | Oxorl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_xorlimm n2 r1
+ | Oshll, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shllimm n2 r1 r2
+ | Oshrl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrlimm n2 r1 r2
+ | Oshrlu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrluimm n2 r1 r2
+ | Ocmp c, args, vl => make_cmp c args vl
+ | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
+ | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
+ | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
+ | Omulfs, r1 :: r2 :: nil, FS n1 :: v2 :: nil => make_mulfsimm n1 r2 r1 r2
+ | _, _, _ => (op, args)
+ end.
+
+Nondetfunction addr_strength_reduction
+ (addr: addressing) (args: list reg) (vl: list aval) :=
+ match addr, args, vl with
+ | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil =>
+ if (orb (Archi.pic_code tt) (negb (Compopts.optim_globaladdrtmp tt)))
+ then (addr, args)
+ else (Aglobal symb (Ptrofs.add n1 n), nil)
+ | Aindexed n, r1 :: nil, Ptr(Stk n1) :: nil =>
+ (Ainstack (Ptrofs.add n1 n), nil)
+ | _, _, _ =>
+ (addr, args)
+ end.
+
diff --git a/kvx/ConstpropOpproof.v b/kvx/ConstpropOpproof.v
new file mode 100644
index 00000000..f67b8a4e
--- /dev/null
+++ b/kvx/ConstpropOpproof.v
@@ -0,0 +1,749 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Correctness proof for operator strength reduction. *)
+
+Require Import Coqlib Compopts.
+Require Import Integers Floats Values Memory Globalenvs Events.
+Require Import Op Registers RTL ValueDomain.
+Require Import ConstpropOp.
+Require Import Lia.
+
+Section STRENGTH_REDUCTION.
+
+Variable bc: block_classification.
+Variable ge: genv.
+Hypothesis GENV: genv_match bc ge.
+Variable sp: block.
+Hypothesis STACK: bc sp = BCstack.
+Variable ae: AE.t.
+Variable e: regset.
+Variable m: mem.
+Hypothesis MATCH: ematch bc e ae.
+
+Lemma match_G:
+ forall r id ofs,
+ AE.get r ae = Ptr(Gl id ofs) -> Val.lessdef e#r (Genv.symbol_address ge id ofs).
+Proof.
+ intros. apply vmatch_ptr_gl with bc; auto. rewrite <- H. apply MATCH.
+Qed.
+
+Lemma match_S:
+ forall r ofs,
+ AE.get r ae = Ptr(Stk ofs) -> Val.lessdef e#r (Vptr sp ofs).
+Proof.
+ intros. apply vmatch_ptr_stk with bc; auto. rewrite <- H. apply MATCH.
+Qed.
+
+Ltac InvApproxRegs :=
+ match goal with
+ | [ H: _ :: _ = _ :: _ |- _ ] =>
+ injection H; clear H; intros; InvApproxRegs
+ | [ H: ?v = AE.get ?r ae |- _ ] =>
+ generalize (MATCH r); rewrite <- H; clear H; intro; InvApproxRegs
+ | _ => idtac
+ end.
+
+Ltac SimplVM :=
+ match goal with
+ | [ H: vmatch _ ?v (I ?n) |- _ ] =>
+ let E := fresh in
+ assert (E: v = Vint n) by (inversion H; auto);
+ rewrite E in *; clear H; SimplVM
+ | [ H: vmatch _ ?v (L ?n) |- _ ] =>
+ let E := fresh in
+ assert (E: v = Vlong n) by (inversion H; auto);
+ rewrite E in *; clear H; SimplVM
+ | [ H: vmatch _ ?v (F ?n) |- _ ] =>
+ let E := fresh in
+ assert (E: v = Vfloat n) by (inversion H; auto);
+ rewrite E in *; clear H; SimplVM
+ | [ H: vmatch _ ?v (FS ?n) |- _ ] =>
+ let E := fresh in
+ assert (E: v = Vsingle n) by (inversion H; auto);
+ rewrite E in *; clear H; SimplVM
+ | [ H: vmatch _ ?v (Ptr(Gl ?id ?ofs)) |- _ ] =>
+ let E := fresh in
+ assert (E: Val.lessdef v (Genv.symbol_address ge id ofs)) by (eapply vmatch_ptr_gl; eauto);
+ clear H; SimplVM
+ | [ H: vmatch _ ?v (Ptr(Stk ?ofs)) |- _ ] =>
+ let E := fresh in
+ assert (E: Val.lessdef v (Vptr sp ofs)) by (eapply vmatch_ptr_stk; eauto);
+ clear H; SimplVM
+ | _ => idtac
+ end.
+
+Lemma const_for_result_correct:
+ forall a op v,
+ const_for_result a = Some op ->
+ vmatch bc v a ->
+ exists v', eval_operation ge (Vptr sp Ptrofs.zero) op nil m = Some v' /\ Val.lessdef v v'.
+Proof.
+ unfold const_for_result. generalize Archi.ptr64; intros ptr64; intros.
+ destruct a; inv H; SimplVM.
+- (* integer *)
+ exists (Vint n); auto.
+- (* long *)
+ destruct ptr64; inv H2. exists (Vlong n); auto.
+- (* float *)
+ destruct (Compopts.generate_float_constants tt); inv H2. exists (Vfloat f); auto.
+- (* single *)
+ destruct (Compopts.generate_float_constants tt); inv H2. exists (Vsingle f); auto.
+- (* pointer *)
+ destruct p; try discriminate; SimplVM.
+ + (* global *)
+ inv H2. exists (Genv.symbol_address ge id ofs); auto.
+ + (* stack *)
+ inv H2. exists (Vptr sp ofs); split; auto. cbn. rewrite Ptrofs.add_zero_l; auto.
+Qed.
+
+Lemma cond_strength_reduction_correct:
+ forall cond args vl,
+ vl = map (fun r => AE.get r ae) args ->
+ let (cond', args') := cond_strength_reduction cond args vl in
+ eval_condition cond' e##args' m = eval_condition cond e##args m.
+Proof.
+ intros until vl. unfold cond_strength_reduction.
+ case (cond_strength_reduction_match cond args vl); cbn; intros; InvApproxRegs; SimplVM.
+- apply Val.swap_cmp_bool.
+- auto.
+- apply Val.swap_cmpu_bool.
+- auto.
+- apply Val.swap_cmpl_bool.
+- auto.
+- apply Val.swap_cmplu_bool.
+- auto.
+- auto.
+Qed.
+
+Lemma make_cmp_base_correct:
+ forall c args vl,
+ vl = map (fun r => AE.get r ae) args ->
+ let (op', args') := make_cmp_base c args vl in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some v
+ /\ Val.lessdef (Val.of_optbool (eval_condition c e##args m)) v.
+Proof.
+ intros. unfold make_cmp_base.
+ generalize (cond_strength_reduction_correct c args vl H).
+ destruct (cond_strength_reduction c args vl) as [c' args']. intros EQ.
+ econstructor; split. cbn; eauto. rewrite EQ. auto.
+Qed.
+
+Lemma make_cmp_correct:
+ forall c args vl,
+ vl = map (fun r => AE.get r ae) args ->
+ let (op', args') := make_cmp c args vl in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some v
+ /\ Val.lessdef (Val.of_optbool (eval_condition c e##args m)) v.
+Proof.
+ intros c args vl.
+ assert (Y: forall r, vincl (AE.get r ae) (Uns Ptop 1) = true ->
+ e#r = Vundef \/ e#r = Vint Int.zero \/ e#r = Vint Int.one).
+ { intros. apply vmatch_Uns_1 with bc Ptop. eapply vmatch_ge. eapply vincl_ge; eauto. apply MATCH. }
+ unfold make_cmp. case (make_cmp_match c args vl); intros.
+- unfold make_cmp_imm_eq.
+ destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
++ cbn in H; inv H. InvBooleans. subst n.
+ exists (e#r1); split; auto. cbn.
+ exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
++ destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
+* cbn in H; inv H. InvBooleans. subst n.
+ exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn.
+ exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
+* apply make_cmp_base_correct; auto.
+- unfold make_cmp_imm_ne.
+ destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
++ cbn in H; inv H. InvBooleans. subst n.
+ exists (e#r1); split; auto. cbn.
+ exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
++ destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
+* cbn in H; inv H. InvBooleans. subst n.
+ exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn.
+ exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
+* apply make_cmp_base_correct; auto.
+- unfold make_cmp_imm_eq.
+ destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
++ cbn in H; inv H. InvBooleans. subst n.
+ exists (e#r1); split; auto. cbn.
+ exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
++ destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
+* cbn in H; inv H. InvBooleans. subst n.
+ exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn.
+ exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
+* apply make_cmp_base_correct; auto.
+- unfold make_cmp_imm_ne.
+ destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0.
++ cbn in H; inv H. InvBooleans. subst n.
+ exists (e#r1); split; auto. cbn.
+ exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
++ destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1.
+* cbn in H; inv H. InvBooleans. subst n.
+ exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn.
+ exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto.
+* apply make_cmp_base_correct; auto.
+- apply make_cmp_base_correct; auto.
+Qed.
+
+Lemma make_addimm_correct:
+ forall n r,
+ let (op, args) := make_addimm n r in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.add e#r (Vint n)) v.
+Proof.
+ intros. unfold make_addimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros.
+ subst. exists (e#r); split; auto.
+ destruct (e#r); cbn; auto; rewrite ?Int.add_zero, ?Ptrofs.add_zero; auto.
+ exists (Val.add e#r (Vint n)); split; auto.
+Qed.
+
+Lemma make_shlimm_correct:
+ forall n r1 r2,
+ e#r2 = Vint n ->
+ let (op, args) := make_shlimm n r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shl e#r1 (Vint n)) v.
+Proof.
+ intros; unfold make_shlimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+ exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.shl_zero. auto.
+ destruct (Int.ltu n Int.iwordsize).
+ econstructor; split. cbn. eauto. auto.
+ econstructor; split. cbn. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_shrimm_correct:
+ forall n r1 r2,
+ e#r2 = Vint n ->
+ let (op, args) := make_shrimm n r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shr e#r1 (Vint n)) v.
+Proof.
+ intros; unfold make_shrimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+ exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.shr_zero. auto.
+ destruct (Int.ltu n Int.iwordsize).
+ econstructor; split. cbn. eauto. auto.
+ econstructor; split. cbn. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_shruimm_correct:
+ forall n r1 r2,
+ e#r2 = Vint n ->
+ let (op, args) := make_shruimm n r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shru e#r1 (Vint n)) v.
+Proof.
+ intros; unfold make_shruimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+ exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.shru_zero. auto.
+ destruct (Int.ltu n Int.iwordsize).
+ econstructor; split. cbn. eauto. auto.
+ econstructor; split. cbn. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_mulimm_correct:
+ forall n r1 r2,
+ e#r2 = Vint n ->
+ let (op, args) := make_mulimm n r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mul e#r1 (Vint n)) v.
+Proof.
+ intros; unfold make_mulimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+ exists (Vint Int.zero); split; auto. destruct (e#r1); cbn; auto. rewrite Int.mul_zero; auto.
+ predSpec Int.eq Int.eq_spec n Int.one; intros. subst.
+ exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.mul_one; auto.
+ destruct (Int.is_power2 n) eqn:?; intros.
+ rewrite (Val.mul_pow2 e#r1 _ _ Heqo). econstructor; split. cbn; eauto. auto.
+ econstructor; split; eauto. cbn. rewrite H; auto.
+Qed.
+
+Lemma make_divimm_correct:
+ forall n r1 r2 v,
+ Val.divs e#r1 e#r2 = Some v ->
+ e#r2 = Vint n ->
+ let (op, args) := make_divimm n r1 r2 in
+ exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+ intros; unfold make_divimm.
+ predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
+ destruct (e#r1) eqn:?;
+ try (rewrite Val.divs_one in H; exists (Vint i); split; cbn; try rewrite Heqv0; auto);
+ inv H; auto.
+ destruct (Int.is_power2 n) eqn:?.
+ destruct (Int.ltu i (Int.repr 31)) eqn:?.
+ exists v; split; auto. cbn.
+ erewrite Val.divs_pow2; eauto. reflexivity. congruence.
+ exists v; auto.
+ exists v; auto.
+Qed.
+
+Lemma make_divuimm_correct:
+ forall n r1 r2 v,
+ Val.divu e#r1 e#r2 = Some v ->
+ e#r2 = Vint n ->
+ let (op, args) := make_divuimm n r1 r2 in
+ exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+ intros; unfold make_divuimm.
+ predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
+ destruct (e#r1) eqn:?;
+ try (rewrite Val.divu_one in H; exists (Vint i); split; cbn; try rewrite Heqv0; auto);
+ inv H; auto.
+ destruct (Int.is_power2 n) eqn:?.
+ econstructor; split. cbn; eauto.
+ rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
+ exists v; auto.
+Qed.
+
+Lemma make_moduimm_correct:
+ forall n r1 r2 v,
+ Val.modu e#r1 e#r2 = Some v ->
+ e#r2 = Vint n ->
+ let (op, args) := make_moduimm n r1 r2 in
+ exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+ intros; unfold make_moduimm.
+ destruct (Int.is_power2 n) eqn:?.
+ exists v; split; auto. cbn. decEq. eapply Val.modu_pow2; eauto. congruence.
+ exists v; auto.
+Qed.
+
+Lemma make_andimm_correct:
+ forall n r x,
+ vmatch bc e#r x ->
+ let (op, args) := make_andimm n r x in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.and e#r (Vint n)) v.
+Proof.
+ intros; unfold make_andimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros.
+ subst n. exists (Vint Int.zero); split; auto. destruct (e#r); cbn; auto. rewrite Int.and_zero; auto.
+ predSpec Int.eq Int.eq_spec n Int.mone; intros.
+ subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int.and_mone; auto.
+ destruct (match x with Uns _ k => Int.eq (Int.zero_ext k (Int.not n)) Int.zero
+ | _ => false end) eqn:UNS.
+ destruct x; try congruence.
+ exists (e#r); split; auto.
+ inv H; auto. cbn. replace (Int.and i n) with i; auto.
+ generalize (Int.eq_spec (Int.zero_ext n0 (Int.not n)) Int.zero); rewrite UNS; intro EQ.
+ Int.bit_solve. destruct (zlt i0 n0).
+ replace (Int.testbit n i0) with (negb (Int.testbit Int.zero i0)).
+ rewrite Int.bits_zero. cbn. rewrite andb_true_r. auto.
+ rewrite <- EQ. rewrite Int.bits_zero_ext by lia. rewrite zlt_true by auto.
+ rewrite Int.bits_not by auto. apply negb_involutive.
+ rewrite H6 by auto. auto.
+ econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_orimm_correct:
+ forall n r,
+ let (op, args) := make_orimm n r in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.or e#r (Vint n)) v.
+Proof.
+ intros; unfold make_orimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros.
+ subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int.or_zero; auto.
+ predSpec Int.eq Int.eq_spec n Int.mone; intros.
+ subst n. exists (Vint Int.mone); split; auto. destruct (e#r); cbn; auto. rewrite Int.or_mone; auto.
+ econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_xorimm_correct:
+ forall n r,
+ let (op, args) := make_xorimm n r in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.xor e#r (Vint n)) v.
+Proof.
+ intros; unfold make_xorimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros.
+ subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int.xor_zero; auto.
+ predSpec Int.eq Int.eq_spec n Int.mone; intros.
+ subst n. exists (Val.notint e#r); split; auto.
+ econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_addlimm_correct:
+ forall n r,
+ let (op, args) := make_addlimm n r in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.addl e#r (Vlong n)) v.
+Proof.
+ intros. unfold make_addlimm.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
+ subst. exists (e#r); split; auto.
+ destruct (e#r); cbn; auto; rewrite ? Int64.add_zero, ? Ptrofs.add_zero; auto.
+ exists (Val.addl e#r (Vlong n)); split; auto.
+Qed.
+
+Lemma make_shllimm_correct:
+ forall n r1 r2,
+ e#r2 = Vint n ->
+ let (op, args) := make_shllimm n r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shll e#r1 (Vint n)) v.
+Proof.
+ intros; unfold make_shllimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+ exists (e#r1); split; auto. destruct (e#r1); cbn; auto.
+ unfold Int64.shl'. rewrite Z.shiftl_0_r, Int64.repr_unsigned. auto.
+ destruct (Int.ltu n Int64.iwordsize').
+ econstructor; split. cbn. eauto. auto.
+ econstructor; split. cbn. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_shrlimm_correct:
+ forall n r1 r2,
+ e#r2 = Vint n ->
+ let (op, args) := make_shrlimm n r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shrl e#r1 (Vint n)) v.
+Proof.
+ intros; unfold make_shrlimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+ exists (e#r1); split; auto. destruct (e#r1); cbn; auto.
+ unfold Int64.shr'. rewrite Z.shiftr_0_r, Int64.repr_signed. auto.
+ destruct (Int.ltu n Int64.iwordsize').
+ econstructor; split. cbn. eauto. auto.
+ econstructor; split. cbn. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_shrluimm_correct:
+ forall n r1 r2,
+ e#r2 = Vint n ->
+ let (op, args) := make_shrluimm n r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shrlu e#r1 (Vint n)) v.
+Proof.
+ intros; unfold make_shrluimm.
+ predSpec Int.eq Int.eq_spec n Int.zero; intros. subst.
+ exists (e#r1); split; auto. destruct (e#r1); cbn; auto.
+ unfold Int64.shru'. rewrite Z.shiftr_0_r, Int64.repr_unsigned. auto.
+ destruct (Int.ltu n Int64.iwordsize').
+ econstructor; split. cbn. eauto. auto.
+ econstructor; split. cbn. eauto. rewrite H; auto.
+Qed.
+
+Lemma make_mullimm_correct:
+ forall n r1 r2,
+ e#r2 = Vlong n ->
+ let (op, args) := make_mullimm n r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mull e#r1 (Vlong n)) v.
+Proof.
+ intros; unfold make_mullimm.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero; intros. subst.
+ exists (Vlong Int64.zero); split; auto. destruct (e#r1); cbn; auto. rewrite Int64.mul_zero; auto.
+ predSpec Int64.eq Int64.eq_spec n Int64.one; intros. subst.
+ exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int64.mul_one; auto.
+ destruct (Int64.is_power2' n) eqn:?; intros.
+ exists (Val.shll e#r1 (Vint i)); split; auto.
+ destruct (e#r1); cbn; auto.
+ erewrite Int64.is_power2'_range by eauto.
+ erewrite Int64.mul_pow2' by eauto. auto.
+ econstructor; split; eauto. cbn; rewrite H; auto.
+Qed.
+
+Lemma make_divlimm_correct:
+ forall n r1 r2 v,
+ Val.divls e#r1 e#r2 = Some v ->
+ e#r2 = Vlong n ->
+ let (op, args) := make_divlimm n r1 r2 in
+ exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+ intros; unfold make_divlimm.
+ destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
+ rewrite H0 in H. econstructor; split. cbn; eauto.
+ erewrite Val.divls_pow2; eauto. auto.
+ exists v; auto.
+ exists v; auto.
+Qed.
+
+Lemma make_divluimm_correct:
+ forall n r1 r2 v,
+ Val.divlu e#r1 e#r2 = Some v ->
+ e#r2 = Vlong n ->
+ let (op, args) := make_divluimm n r1 r2 in
+ exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+ intros; unfold make_divluimm.
+ destruct (Int64.is_power2' n) eqn:?.
+ econstructor; split. cbn; eauto.
+ rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
+ cbn.
+ erewrite Int64.is_power2'_range by eauto.
+ erewrite Int64.divu_pow2' by eauto. auto.
+ exists v; auto.
+Qed.
+
+Lemma make_modluimm_correct:
+ forall n r1 r2 v,
+ Val.modlu e#r1 e#r2 = Some v ->
+ e#r2 = Vlong n ->
+ let (op, args) := make_modluimm n r1 r2 in
+ exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
+Proof.
+ intros; unfold make_modluimm.
+ destruct (Int64.is_power2 n) eqn:?.
+ exists v; split; auto. cbn. decEq.
+ rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
+ cbn. erewrite Int64.modu_and by eauto. auto.
+ exists v; auto.
+Qed.
+
+Lemma make_andlimm_correct:
+ forall n r x,
+ let (op, args) := make_andlimm n r x in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.andl e#r (Vlong n)) v.
+Proof.
+ intros; unfold make_andlimm.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
+ subst n. exists (Vlong Int64.zero); split; auto. destruct (e#r); cbn; auto. rewrite Int64.and_zero; auto.
+ predSpec Int64.eq Int64.eq_spec n Int64.mone; intros.
+ subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int64.and_mone; auto.
+ econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_orlimm_correct:
+ forall n r,
+ let (op, args) := make_orlimm n r in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.orl e#r (Vlong n)) v.
+Proof.
+ intros; unfold make_orlimm.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
+ subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int64.or_zero; auto.
+ predSpec Int64.eq Int64.eq_spec n Int64.mone; intros.
+ subst n. exists (Vlong Int64.mone); split; auto. destruct (e#r); cbn; auto. rewrite Int64.or_mone; auto.
+ econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_xorlimm_correct:
+ forall n r,
+ let (op, args) := make_xorlimm n r in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.xorl e#r (Vlong n)) v.
+Proof.
+ intros; unfold make_xorlimm.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero; intros.
+ subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int64.xor_zero; auto.
+ predSpec Int64.eq Int64.eq_spec n Int64.mone; intros.
+ subst n. exists (Val.notl e#r); split; auto.
+ econstructor; split; eauto. auto.
+Qed.
+
+Lemma make_mulfimm_correct:
+ forall n r1 r2,
+ e#r2 = Vfloat n ->
+ let (op, args) := make_mulfimm n r1 r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v.
+Proof.
+ intros; unfold make_mulfimm.
+ destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros.
+ cbn. econstructor; split. eauto. rewrite H; subst n.
+ destruct (e#r1); cbn; auto. rewrite Float.mul2_add; auto.
+ cbn. econstructor; split; eauto.
+Qed.
+
+Lemma make_mulfimm_correct_2:
+ forall n r1 r2,
+ e#r1 = Vfloat n ->
+ let (op, args) := make_mulfimm n r2 r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v.
+Proof.
+ intros; unfold make_mulfimm.
+ destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros.
+ cbn. econstructor; split. eauto. rewrite H; subst n.
+ destruct (e#r2); cbn; auto. rewrite Float.mul2_add; auto.
+ rewrite Float.mul_commut; auto.
+ cbn. econstructor; split; eauto.
+Qed.
+
+Lemma make_mulfsimm_correct:
+ forall n r1 r2,
+ e#r2 = Vsingle n ->
+ let (op, args) := make_mulfsimm n r1 r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v.
+Proof.
+ intros; unfold make_mulfsimm.
+ destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros.
+ cbn. econstructor; split. eauto. rewrite H; subst n.
+ destruct (e#r1); cbn; auto. rewrite Float32.mul2_add; auto.
+ cbn. econstructor; split; eauto.
+Qed.
+
+Lemma make_mulfsimm_correct_2:
+ forall n r1 r2,
+ e#r1 = Vsingle n ->
+ let (op, args) := make_mulfsimm n r2 r1 r2 in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v.
+Proof.
+ intros; unfold make_mulfsimm.
+ destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros.
+ cbn. econstructor; split. eauto. rewrite H; subst n.
+ destruct (e#r2); cbn; auto. rewrite Float32.mul2_add; auto.
+ rewrite Float32.mul_commut; auto.
+ cbn. econstructor; split; eauto.
+Qed.
+
+Lemma make_cast8signed_correct:
+ forall r x,
+ vmatch bc e#r x ->
+ let (op, args) := make_cast8signed r x in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.sign_ext 8 e#r) v.
+Proof.
+ intros; unfold make_cast8signed. destruct (vincl x (Sgn Ptop 8)) eqn:INCL.
+ exists e#r; split; auto.
+ assert (V: vmatch bc e#r (Sgn Ptop 8)).
+ { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
+ inv V; cbn; auto. rewrite is_sgn_sign_ext in H4 by auto. rewrite H4; auto.
+ econstructor; split; cbn; eauto.
+Qed.
+
+Lemma make_cast16signed_correct:
+ forall r x,
+ vmatch bc e#r x ->
+ let (op, args) := make_cast16signed r x in
+ exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.sign_ext 16 e#r) v.
+Proof.
+ intros; unfold make_cast16signed. destruct (vincl x (Sgn Ptop 16)) eqn:INCL.
+ exists e#r; split; auto.
+ assert (V: vmatch bc e#r (Sgn Ptop 16)).
+ { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
+ inv V; cbn; auto. rewrite is_sgn_sign_ext in H4 by auto. rewrite H4; auto.
+ econstructor; split; cbn; eauto.
+Qed.
+
+Lemma op_strength_reduction_correct:
+ forall op args vl v,
+ vl = map (fun r => AE.get r ae) args ->
+ eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v ->
+ let (op', args') := op_strength_reduction op args vl in
+ exists w, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some w /\ Val.lessdef v w.
+Proof.
+ intros until v; unfold op_strength_reduction;
+ case (op_strength_reduction_match op args vl); cbn; intros.
+- (* cast8signed *)
+ InvApproxRegs; SimplVM; inv H0. apply make_cast8signed_correct; auto.
+- (* cast16signed *)
+ InvApproxRegs; SimplVM; inv H0. apply make_cast16signed_correct; auto.
+- (* add 1 *)
+ rewrite Val.add_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_addimm_correct; auto.
+- (* add 2 *)
+ InvApproxRegs; SimplVM; inv H0. apply make_addimm_correct; auto.
+- (* sub *)
+ InvApproxRegs; SimplVM; inv H0. rewrite Val.sub_add_opp. apply make_addimm_correct; auto.
+- (* mul 1 *)
+ rewrite Val.mul_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
+- (* mul 2*)
+ InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
+- (* divs *)
+ assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
+ apply make_divimm_correct; auto.
+- (* divu *)
+ assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
+ apply make_divuimm_correct; auto.
+- (* modu *)
+ assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
+ apply make_moduimm_correct; auto.
+- (* and 1 *)
+ rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
+- (* and 2 *)
+ InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
+- (* andimm *)
+ inv H; inv H0. apply make_andimm_correct; auto.
+- (* or 1 *)
+ rewrite Val.or_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_orimm_correct; auto.
+- (* or 2 *)
+ InvApproxRegs; SimplVM; inv H0. apply make_orimm_correct; auto.
+- (* xor 1 *)
+ rewrite Val.xor_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_xorimm_correct; auto.
+- (* xor 2 *)
+ InvApproxRegs; SimplVM; inv H0. apply make_xorimm_correct; auto.
+- (* shl *)
+ InvApproxRegs; SimplVM; inv H0. apply make_shlimm_correct; auto.
+- (* shr *)
+ InvApproxRegs; SimplVM; inv H0. apply make_shrimm_correct; auto.
+- (* shru *)
+ InvApproxRegs; SimplVM; inv H0. apply make_shruimm_correct; auto.
+- (* addl 1 *)
+ rewrite Val.addl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_addlimm_correct; auto.
+- (* addl 2 *)
+ InvApproxRegs; SimplVM; inv H0. apply make_addlimm_correct; auto.
+- (* subl *)
+ InvApproxRegs; SimplVM; inv H0.
+ replace (Val.subl e#r1 (Vlong n2)) with (Val.addl e#r1 (Vlong (Int64.neg n2))).
+ apply make_addlimm_correct; auto.
+ unfold Val.addl, Val.subl. destruct Archi.ptr64 eqn:SF, e#r1; auto.
+ rewrite Int64.sub_add_opp; auto.
+ rewrite Ptrofs.sub_add_opp. do 2 f_equal. auto with ptrofs.
+ rewrite Int64.sub_add_opp; auto.
+- (* mull 1 *)
+ rewrite Val.mull_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
+- (* mull 2 *)
+ InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
+- (* divl *)
+ assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
+ apply make_divlimm_correct; auto.
+- (* divlu *)
+ assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
+ apply make_divluimm_correct; auto.
+- (* modlu *)
+ assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
+ apply make_modluimm_correct; auto.
+- (* andl 1 *)
+ rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
+- (* andl 2 *)
+ InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
+- (* andlimm *)
+ inv H; inv H0. apply make_andlimm_correct; auto.
+- (* orl 1 *)
+ rewrite Val.orl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_orlimm_correct; auto.
+- (* orl 2 *)
+ InvApproxRegs; SimplVM; inv H0. apply make_orlimm_correct; auto.
+- (* xorl 1 *)
+ rewrite Val.xorl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_xorlimm_correct; auto.
+- (* xorl 2 *)
+ InvApproxRegs; SimplVM; inv H0. apply make_xorlimm_correct; auto.
+- (* shll *)
+ InvApproxRegs; SimplVM; inv H0. apply make_shllimm_correct; auto.
+- (* shrl *)
+ InvApproxRegs; SimplVM; inv H0. apply make_shrlimm_correct; auto.
+- (* shrlu *)
+ InvApproxRegs; SimplVM; inv H0. apply make_shrluimm_correct; auto.
+- (* cond *)
+ inv H0. apply make_cmp_correct; auto.
+- (* mulf 1 *)
+ InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
+- (* mulf 2 *)
+ InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) e#r2).
+ rewrite <- H2. apply make_mulfimm_correct_2; auto.
+- (* mulfs 1 *)
+ InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfsimm_correct; auto.
+- (* mulfs 2 *)
+ InvApproxRegs; SimplVM; inv H0. fold (Val.mulfs (Vsingle n1) e#r2).
+ rewrite <- H2. apply make_mulfsimm_correct_2; auto.
+- (* default *)
+ exists v; auto.
+Qed.
+
+Lemma addr_strength_reduction_correct:
+ forall addr args vl res,
+ vl = map (fun r => AE.get r ae) args ->
+ eval_addressing ge (Vptr sp Ptrofs.zero) addr e##args = Some res ->
+ let (addr', args') := addr_strength_reduction addr args vl in
+ exists res', eval_addressing ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'.
+Proof.
+ intros until res. unfold addr_strength_reduction.
+ destruct (addr_strength_reduction_match addr args vl); cbn;
+ intros VL EA; InvApproxRegs; SimplVM; try (inv EA).
+- destruct (orb _ _).
++ exists (Val.offset_ptr e#r1 n); auto.
++ cbn. rewrite Genv.shift_symbol_address. econstructor; split; eauto.
+ inv H0; cbn; auto.
+- rewrite Ptrofs.add_zero_l. econstructor; split; eauto.
+ change (Vptr sp (Ptrofs.add n1 n)) with (Val.offset_ptr (Vptr sp n1) n).
+ inv H0; cbn; auto.
+- exists res; auto.
+Qed.
+
+End STRENGTH_REDUCTION.
diff --git a/kvx/Conventions1.v b/kvx/Conventions1.v
new file mode 100644
index 00000000..d8eff34e
--- /dev/null
+++ b/kvx/Conventions1.v
@@ -0,0 +1,431 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Function calling conventions and other conventions regarding the use of
+ machine registers and stack slots. *)
+
+Require Import Coqlib Decidableplus.
+Require Import AST Machregs Locations.
+
+(** * Classification of machine registers *)
+
+(** Machine registers (type [mreg] in module [Locations]) are divided in
+ the following groups:
+- Callee-save registers, whose value is preserved across a function call.
+- Caller-save registers that can be modified during a function call.
+
+ We follow the RISC-V application binary interface (ABI) in our choice
+ of callee- and caller-save registers.
+*)
+
+Definition is_callee_save (r: mreg) : bool :=
+ match r with
+ (* | R15 | R16 | R17 *) | R18 | R19 | R20 | R21 | R22
+ | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | R31 => true
+ | _ => false
+ end.
+
+Definition int_caller_save_regs :=
+ R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9
+ :: R10 :: R11 :: R15 (* :: R16 *) :: R17
+ (* :: R32 *) :: R33 :: R34 :: R35 :: R36 :: R37 :: R38 :: R39 :: R40 :: R41
+ :: R42 :: R43 :: R44 :: R45 :: R46 :: R47 :: R48 :: R49 :: R50 :: R51
+ :: R52 :: R53 :: R54 :: R55 :: R56 :: R57 :: R58 :: R59 :: R60 :: R61
+ :: R62 :: R63 :: nil.
+
+Definition float_caller_save_regs : list mreg := nil.
+
+Definition int_callee_save_regs :=
+ (* R15 :: R16 :: R17 :: *)R18 :: R19 :: R20 :: R21 :: R22
+ :: R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 :: R30 :: R31 :: nil.
+
+Definition float_callee_save_regs : list mreg := nil.
+
+Definition destroyed_at_call :=
+ List.filter (fun r => negb (is_callee_save r)) all_mregs.
+
+Definition dummy_int_reg := R63. (**r Used in [Coloring]. *)
+Definition dummy_float_reg := R62. (**r Used in [Coloring]. *)
+
+Definition callee_save_type := mreg_type.
+
+Definition is_float_reg (r: mreg) := false.
+
+(** * Function calling conventions *)
+
+(** The functions in this section determine the locations (machine registers
+ and stack slots) used to communicate arguments and results between the
+ caller and the callee during function calls. These locations are functions
+ of the signature of the function and of the call instruction.
+ Agreement between the caller and the callee on the locations to use
+ is guaranteed by our dynamic semantics for Cminor and RTL, which demand
+ that the signature of the call instruction is identical to that of the
+ called function.
+
+ Calling conventions are largely arbitrary: they must respect the properties
+ proved in this section (such as no overlapping between the locations
+ of function arguments), but this leaves much liberty in choosing actual
+ locations. To ensure binary interoperability of code generated by our
+ compiler with libraries compiled by another compiler, we
+ implement the standard RISC-V conventions. *)
+
+(** ** Location of function result *)
+
+(** The result value of a function is passed back to the caller in
+ registers [R10] or [F10] or [R10,R11], depending on the type of the
+ returned value. We treat a function without result as a function
+ with one integer result. *)
+
+
+Definition loc_result (s: signature) : rpair mreg :=
+ match s.(sig_res) with
+ | Tvoid => One R0
+ | Tint8signed => One R0
+ | Tint8unsigned => One R0
+ | Tint16signed => One R0
+ | Tint16unsigned => One R0
+ | Tint | Tany32 => One R0
+ | Tfloat | Tsingle | Tany64 => One R0
+ | Tlong => if Archi.ptr64 then One R0 else One R0
+ end.
+
+(** The result registers have types compatible with that given in the signature. *)
+
+Lemma loc_result_type:
+ forall sig,
+ subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true.
+Proof.
+ intros. unfold proj_sig_res, loc_result, mreg_type.
+ destruct (sig_res sig); try destruct Archi.ptr64; cbn; trivial; destruct t; trivial.
+Qed.
+
+(** The result locations are caller-save registers *)
+
+Lemma loc_result_caller_save:
+ forall (s: signature),
+ forall_rpair (fun r => is_callee_save r = false) (loc_result s).
+Proof.
+ intros. unfold loc_result, is_callee_save;
+ destruct (sig_res s); cbn; auto; try destruct Archi.ptr64; cbn; auto; try destruct t; cbn; auto.
+Qed.
+
+(** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *)
+
+Lemma loc_result_pair:
+ forall sg,
+ match loc_result sg with
+ | One _ => True
+ | Twolong r1 r2 =>
+ r1 <> r2 /\ proj_sig_res sg = Tlong
+ /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true
+ /\ Archi.ptr64 = false
+ end.
+Proof.
+ intros.
+ unfold loc_result; destruct (sig_res sg); auto;
+ unfold mreg_type; try destruct Archi.ptr64; auto;
+ destruct t; auto.
+Qed.
+
+(** The location of the result depends only on the result part of the signature *)
+
+Lemma loc_result_exten:
+ forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2.
+Proof.
+ intros. unfold loc_result. rewrite H; auto.
+Qed.
+
+(** ** Location of function arguments *)
+
+(** The RISC-V ABI states the following convention for passing arguments
+ to a function:
+
+- Arguments are passed in registers when possible.
+
+- Up to eight integer registers (ai: int_param_regs) and up to eight
+ floating-point registers (fai: float_param_regs) are used for this
+ purpose.
+
+- If the arguments to a function are conceptualized as fields of a C
+ struct, each with pointer alignment, the argument registers are a
+ shadow of the first eight pointer-words of that struct. If argument
+ i < 8 is a floating-point type, it is passed in floating-point
+ register fa_i; otherwise, it is passed in integer register a_i.
+
+- When primitive arguments twice the size of a pointer-word are passed
+ on the stack, they are naturally aligned. When they are passed in the
+ integer registers, they reside in an aligned even-odd register pair,
+ with the even register holding the least-significant bits.
+
+- Floating-point arguments to variadic functions (except those that
+ are explicitly named in the parameter list) are passed in integer
+ registers.
+
+- The portion of the conceptual struct that is not passed in argument
+ registers is passed on the stack. The stack pointer sp points to the
+ first argument not passed in a register.
+
+The bit about variadic functions doesn't quite fit CompCert's model.
+We do our best by passing the FP arguments in registers, as usual,
+and reserving the corresponding integer registers, so that fixup
+code can be introduced in the Asmexpand pass.
+*)
+
+Definition param_regs :=
+ R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: nil.
+
+Definition one_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ)
+ (rec: Z -> Z -> list (rpair loc)) :=
+ match list_nth_z regs rn with
+ | Some r =>
+ One(R r) :: rec (rn + 1) ofs
+ | None =>
+ let ofs := align ofs (typealign ty) in
+ One(S Outgoing ofs ty) :: rec rn (ofs + (if Archi.ptr64 then 2 else typesize ty))
+ end.
+
+Definition two_args (regs: list mreg) (rn: Z) (ofs: Z)
+ (rec: Z -> Z -> list (rpair loc)) :=
+ let rn := align rn 2 in
+ match list_nth_z regs rn, list_nth_z regs (rn + 1) with
+ | Some r1, Some r2 =>
+ Twolong (R r2) (R r1) :: rec (rn + 2) ofs
+ | _, _ =>
+ let ofs := align ofs 2 in
+ Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint) ::
+ rec rn (ofs + 2)
+ end.
+
+Definition hybrid_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ)
+ (rec: Z -> Z -> list (rpair loc)) :=
+ let rn := align rn 2 in
+ match list_nth_z regs rn with
+ | Some r =>
+ One (R r) :: rec (rn + 2) ofs
+ | None =>
+ let ofs := align ofs 2 in
+ One (S Outgoing ofs ty) :: rec rn (ofs + 2)
+ end.
+
+Fixpoint loc_arguments_rec (va: bool)
+ (tyl: list typ) (r ofs: Z) {struct tyl} : list (rpair loc) :=
+ match tyl with
+ | nil => nil
+ | ty :: tys => one_arg param_regs r ofs ty (loc_arguments_rec va tys)
+(*
+ | (Tint | Tany32) as ty :: tys =>
+ one_arg int_param_regs r ofs ty (loc_arguments_rec va tys)
+ | Tsingle as ty :: tys =>
+ one_arg float_param_regs r ofs ty (loc_arguments_rec va tys)
+ | Tlong as ty :: tys =>
+ if Archi.ptr64
+ then one_arg int_param_regs r ofs ty (loc_arguments_rec va tys)
+ else two_args int_param_regs r ofs (loc_arguments_rec va tys)
+ | (Tfloat | Tany64) as ty :: tys =>
+ if va && negb Archi.ptr64
+ then hybrid_arg float_param_regs r ofs ty (loc_arguments_rec va tys)
+ else one_arg float_param_regs r ofs ty (loc_arguments_rec va tys)
+*)
+ end.
+
+(* FIX Sylvain: not sure to understand what I have done... *)
+Definition has_va (s: signature) : bool :=
+ match s.(sig_cc).(cc_vararg) with
+ | Some n => true
+ | None => false
+ end.
+
+(** [loc_arguments s] returns the list of locations where to store arguments
+ when calling a function with signature [s]. *)
+
+Definition loc_arguments (s: signature) : list (rpair loc) :=
+ loc_arguments_rec (has_va s) s.(sig_args) 0 0.
+
+(** [size_arguments s] returns the number of [Outgoing] slots used
+ to call a function with signature [s]. *)
+
+Definition max_outgoing_1 (accu: Z) (l: loc) : Z :=
+ match l with
+ | S Outgoing ofs ty => Z.max accu (ofs + typesize ty)
+ | _ => accu
+ end.
+
+Definition max_outgoing_2 (accu: Z) (rl: rpair loc) : Z :=
+ match rl with
+ | One l => max_outgoing_1 accu l
+ | Twolong l1 l2 => max_outgoing_1 (max_outgoing_1 accu l1) l2
+ end.
+
+Definition size_arguments (s: signature) : Z :=
+ List.fold_left max_outgoing_2 (loc_arguments s) 0.
+
+(** Argument locations are either non-temporary registers or [Outgoing]
+ stack slots at nonnegative offsets. *)
+
+Definition loc_argument_acceptable (l: loc) : Prop :=
+ match l with
+ | R r => is_callee_save r = false
+ | S Outgoing ofs ty => ofs >= 0 /\ (typealign ty | ofs)
+ | _ => False
+ end.
+
+Lemma loc_arguments_rec_charact:
+ forall va tyl rn ofs p,
+ ofs >= 0 ->
+ In p (loc_arguments_rec va tyl rn ofs) -> forall_rpair loc_argument_acceptable p.
+Proof.
+ set (OK := fun (l: list (rpair loc)) =>
+ forall p, In p l -> forall_rpair loc_argument_acceptable p).
+ set (OKF := fun (f: Z -> Z -> list (rpair loc)) =>
+ forall rn ofs, ofs >= 0 -> OK (f rn ofs)).
+ set (OKREGS := fun (l: list mreg) => forall r, In r l -> is_callee_save r = false).
+ assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typealign ty) >= 0).
+ { intros.
+ assert (ofs <= align ofs (typealign ty)) by (apply align_le; apply typealign_pos).
+ lia. }
+ assert (SK: (if Archi.ptr64 then 2 else 1) > 0).
+ { destruct Archi.ptr64; lia. }
+ assert (SKK: forall ty, (if Archi.ptr64 then 2 else typesize ty) > 0).
+ { intros. destruct Archi.ptr64. lia. apply typesize_pos. }
+ assert (A: forall regs rn ofs ty f,
+ OKREGS regs -> OKF f -> ofs >= 0 -> OK (one_arg regs rn ofs ty f)).
+ { intros until f; intros OR OF OO; red; unfold one_arg; intros.
+ destruct (list_nth_z regs rn) as [r|] eqn:NTH; destruct H.
+ - subst p; cbn. apply OR. eapply list_nth_z_in; eauto.
+ - eapply OF; eauto.
+ - subst p; cbn. auto using align_divides, typealign_pos.
+ - eapply OF; [idtac|eauto].
+ generalize (AL ofs ty OO) (SKK ty); lia.
+ }
+ assert (B: forall regs rn ofs f,
+ OKREGS regs -> OKF f -> ofs >= 0 -> OK (two_args regs rn ofs f)).
+ { intros until f; intros OR OF OO; unfold two_args.
+ set (rn' := align rn 2).
+ set (ofs' := align ofs 2).
+ assert (OO': ofs' >= 0) by (apply (AL ofs Tlong); auto).
+ assert (DFL: OK (Twolong (S Outgoing (ofs' + 1) Tint) (S Outgoing ofs' Tint)
+ :: f rn' (ofs' + 2))).
+ { red; cbn; intros. destruct H.
+ - subst p; cbn.
+ repeat split; auto using Z.divide_1_l. lia.
+ - eapply OF; [idtac|eauto]. lia.
+ }
+ destruct (list_nth_z regs rn') as [r1|] eqn:NTH1;
+ destruct (list_nth_z regs (rn' + 1)) as [r2|] eqn:NTH2;
+ try apply DFL.
+ red; cbn; intros; destruct H.
+ - subst p; cbn. split; apply OR; eauto using list_nth_z_in.
+ - eapply OF; [idtac|eauto]. auto.
+ }
+ assert (C: forall regs rn ofs ty f,
+ OKREGS regs -> OKF f -> ofs >= 0 -> typealign ty = 1 -> OK (hybrid_arg regs rn ofs ty f)).
+ { intros until f; intros OR OF OO OTY; unfold hybrid_arg; red; intros.
+ set (rn' := align rn 2) in *.
+ destruct (list_nth_z regs rn') as [r|] eqn:NTH; destruct H.
+ - subst p; cbn. apply OR. eapply list_nth_z_in; eauto.
+ - eapply OF; eauto.
+ - subst p; cbn. rewrite OTY. split. apply (AL ofs Tlong OO). apply Z.divide_1_l.
+ - eapply OF; [idtac|eauto]. generalize (AL ofs Tlong OO); cbn; lia.
+ }
+ assert (D: OKREGS param_regs).
+ { red. decide_goal. }
+ assert (E: OKREGS param_regs).
+ { red. decide_goal. }
+
+ cut (forall va tyl rn ofs, ofs >= 0 -> OK (loc_arguments_rec va tyl rn ofs)).
+ unfold OK. eauto.
+ induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; cbn.
+ - red; cbn; tauto.
+ - destruct ty1.
++ (* int *) apply A; auto.
++ (* float *)
+ apply A; auto.
++ (* long *)
+ apply A; auto.
++ (* single *)
+ apply A; auto.
++ (* any32 *)
+ apply A; auto.
++ (* any64 *)
+ apply A; auto.
+Qed.
+
+Lemma loc_arguments_acceptable:
+ forall (s: signature) (p: rpair loc),
+ In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p.
+Proof.
+ unfold loc_arguments; intros. eapply loc_arguments_rec_charact; eauto. lia.
+Qed.
+
+(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
+
+Remark fold_max_outgoing_above:
+ forall l n, fold_left max_outgoing_2 l n >= n.
+Proof.
+ assert (A: forall n l, max_outgoing_1 n l >= n).
+ { intros; unfold max_outgoing_1. destruct l as [_ | []]; lia. }
+ induction l; cbn; intros.
+ - lia.
+ - eapply Zge_trans. eauto.
+ destruct a; cbn. apply A. eapply Zge_trans; eauto.
+Qed.
+
+Lemma size_arguments_above:
+ forall s, size_arguments s >= 0.
+Proof.
+ intros. apply fold_max_outgoing_above.
+Qed.
+
+Lemma loc_arguments_bounded:
+ forall (s: signature) (ofs: Z) (ty: typ),
+ In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
+ ofs + typesize ty <= size_arguments s.
+Proof.
+ intros until ty.
+ assert (A: forall n l, n <= max_outgoing_1 n l).
+ { intros; unfold max_outgoing_1. destruct l as [_ | []]; lia. }
+ assert (B: forall p n,
+ In (S Outgoing ofs ty) (regs_of_rpair p) ->
+ ofs + typesize ty <= max_outgoing_2 n p).
+ { intros. destruct p; cbn in H; intuition; subst; cbn.
+ - lia.
+ - eapply Z.le_trans. 2: apply A. lia.
+ - lia. }
+ assert (C: forall l n,
+ In (S Outgoing ofs ty) (regs_of_rpairs l) ->
+ ofs + typesize ty <= fold_left max_outgoing_2 l n).
+ { induction l; cbn; intros.
+ - contradiction.
+ - rewrite in_app_iff in H. destruct H.
+ + eapply Z.le_trans. eapply B; eauto. apply Z.ge_le. apply fold_max_outgoing_above.
+ + apply IHl; auto.
+ }
+ apply C.
+Qed.
+
+Lemma loc_arguments_main:
+ loc_arguments signature_main = nil.
+Proof.
+ reflexivity.
+Qed.
+
+
+(** ** Normalization of function results and parameters *)
+
+(** No normalization needed. *)
+
+Definition return_value_needs_normalization (t: rettype): bool := false.
+Definition parameter_needs_normalization (t: rettype): bool := false.
+
diff --git a/kvx/DecBoolOps.v b/kvx/DecBoolOps.v
new file mode 100644
index 00000000..1e0a6187
--- /dev/null
+++ b/kvx/DecBoolOps.v
@@ -0,0 +1,30 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Set Implicit Arguments.
+
+Theorem and_dec : forall A B C D : Prop,
+ { A } + { B } -> { C } + { D } ->
+ { A /\ C } + { (B /\ C) \/ (B /\ D) \/ (A /\ D) }.
+Proof.
+ intros A B C D AB CD.
+ destruct AB; destruct CD.
+ - left. tauto.
+ - right. tauto.
+ - right. tauto.
+ - right. tauto.
+Qed.
+
+
diff --git a/kvx/DuplicateOpcodeHeuristic.ml b/kvx/DuplicateOpcodeHeuristic.ml
new file mode 100644
index 00000000..38702e1b
--- /dev/null
+++ b/kvx/DuplicateOpcodeHeuristic.ml
@@ -0,0 +1,41 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(* open Camlcoq *)
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+ match cond with
+ | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccompf c | Ccompfs c -> (match c with
+ | Ceq -> Some false
+ | Cne -> Some true
+ | _ -> None
+ )
+ | Cnotcompf c | Cnotcompfs c -> (match c with
+ | Ceq -> Some true
+ | Cne -> Some false
+ | _ -> None
+ )
+ | _ -> None
diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml
new file mode 120000
index 00000000..ee2674bf
--- /dev/null
+++ b/kvx/ExpansionOracle.ml
@@ -0,0 +1 @@
+../aarch64/ExpansionOracle.ml \ No newline at end of file
diff --git a/kvx/ExtFloats.v b/kvx/ExtFloats.v
new file mode 100644
index 00000000..b08503a5
--- /dev/null
+++ b/kvx/ExtFloats.v
@@ -0,0 +1,54 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Floats Integers ZArith.
+
+Module ExtFloat.
+(** TODO check with the actual KVX;
+ this is what happens on x86 and may be inappropriate. *)
+
+Definition min (x : float) (y : float) : float :=
+ match Float.compare x y with
+ | Some Eq | Some Lt => x
+ | Some Gt | None => y
+ end.
+
+Definition max (x : float) (y : float) : float :=
+ match Float.compare x y with
+ | Some Eq | Some Gt => x
+ | Some Lt | None => y
+ end.
+End ExtFloat.
+
+Module ExtFloat32.
+(** TODO check with the actual KVX *)
+
+Definition min (x : float32) (y : float32) : float32 :=
+ match Float32.compare x y with
+ | Some Eq | Some Lt => x
+ | Some Gt | None => y
+ end.
+
+Definition max (x : float32) (y : float32) : float32 :=
+ match Float32.compare x y with
+ | Some Eq | Some Gt => x
+ | Some Lt | None => y
+ end.
+
+Definition one := Float32.of_int (Int.repr (1%Z)).
+Definition inv (x : float32) : float32 :=
+ Float32.div one x.
+
+End ExtFloat32.
diff --git a/kvx/ExtValues.v b/kvx/ExtValues.v
new file mode 100644
index 00000000..b4e14898
--- /dev/null
+++ b/kvx/ExtValues.v
@@ -0,0 +1,756 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib.
+Require Import Integers.
+Require Import Values.
+Require Import Floats ExtFloats.
+Require Import Lia.
+
+Open Scope Z_scope.
+
+Definition abs_diff (x y : Z) := Z.abs (x - y).
+Definition abs_diff2 (x y : Z) :=
+ if x <=? y then y - x else x - y.
+Lemma abs_diff2_correct :
+ forall x y : Z, (abs_diff x y) = (abs_diff2 x y).
+Proof.
+ intros.
+ unfold abs_diff, abs_diff2.
+ unfold Z.leb.
+ pose proof (Z.compare_spec x y) as Hspec.
+ inv Hspec.
+ - rewrite Z.abs_eq; lia.
+ - rewrite Z.abs_neq; lia.
+ - rewrite Z.abs_eq; lia.
+Qed.
+
+Inductive shift1_4 : Type :=
+| SHIFT1 | SHIFT2 | SHIFT3 | SHIFT4.
+
+Definition z_of_shift1_4 (x : shift1_4) :=
+ match x with
+ | SHIFT1 => 1
+ | SHIFT2 => 2
+ | SHIFT3 => 3
+ | SHIFT4 => 4
+ end.
+
+Definition shift1_4_of_z (x : Z) :=
+ if Z.eq_dec x 1 then Some SHIFT1
+ else if Z.eq_dec x 2 then Some SHIFT2
+ else if Z.eq_dec x 3 then Some SHIFT3
+ else if Z.eq_dec x 4 then Some SHIFT4
+ else None.
+
+Lemma shift1_4_of_z_correct :
+ forall z,
+ match shift1_4_of_z z with
+ | Some x => z_of_shift1_4 x = z
+ | None => True
+ end.
+Proof.
+ intro. unfold shift1_4_of_z.
+ destruct (Z.eq_dec _ _); cbn; try congruence.
+ destruct (Z.eq_dec _ _); cbn; try congruence.
+ destruct (Z.eq_dec _ _); cbn; try congruence.
+ destruct (Z.eq_dec _ _); cbn; try congruence.
+ trivial.
+Qed.
+
+Definition int_of_shift1_4 (x : shift1_4) :=
+ Int.repr (z_of_shift1_4 x).
+
+Definition is_bitfield stop start :=
+ (Z.leb start stop)
+ && (Z.geb start Z.zero)
+ && (Z.ltb stop Int.zwordsize).
+
+Definition extfz stop start v :=
+ if is_bitfield stop start
+ then
+ let stop' := Z.add stop Z.one in
+ match v with
+ | Vint w =>
+ Vint (Int.shru (Int.shl w (Int.repr (Z.sub Int.zwordsize stop'))) (Int.repr (Z.sub Int.zwordsize (Z.sub stop' start))))
+ | _ => Vundef
+ end
+ else Vundef.
+
+
+Definition extfs stop start v :=
+ if is_bitfield stop start
+ then
+ let stop' := Z.add stop Z.one in
+ match v with
+ | Vint w =>
+ Vint (Int.shr (Int.shl w (Int.repr (Z.sub Int.zwordsize stop'))) (Int.repr (Z.sub Int.zwordsize (Z.sub stop' start))))
+ | _ => Vundef
+ end
+ else Vundef.
+
+Definition zbitfield_mask stop start :=
+ (Z.shiftl 1 (Z.succ stop)) - (Z.shiftl 1 start).
+
+Definition bitfield_mask stop start :=
+ Vint(Int.repr (zbitfield_mask stop start)).
+
+Definition bitfield_maskl stop start :=
+ Vlong(Int64.repr (zbitfield_mask stop start)).
+
+Definition insf stop start prev fld :=
+ let mask := bitfield_mask stop start in
+ if is_bitfield stop start
+ then
+ Val.or (Val.and prev (Val.notint mask))
+ (Val.and (Val.shl fld (Vint (Int.repr start))) mask)
+ else Vundef.
+
+Definition is_bitfieldl stop start :=
+ (Z.leb start stop)
+ && (Z.geb start Z.zero)
+ && (Z.ltb stop Int64.zwordsize).
+
+Definition extfzl stop start v :=
+ if is_bitfieldl stop start
+ then
+ let stop' := Z.add stop Z.one in
+ match v with
+ | Vlong w =>
+ Vlong (Int64.shru' (Int64.shl' w (Int.repr (Z.sub Int64.zwordsize stop'))) (Int.repr (Z.sub Int64.zwordsize (Z.sub stop' start))))
+ | _ => Vundef
+ end
+ else Vundef.
+
+
+Definition extfsl stop start v :=
+ if is_bitfieldl stop start
+ then
+ let stop' := Z.add stop Z.one in
+ match v with
+ | Vlong w =>
+ Vlong (Int64.shr' (Int64.shl' w (Int.repr (Z.sub Int64.zwordsize stop'))) (Int.repr (Z.sub Int64.zwordsize (Z.sub stop' start))))
+ | _ => Vundef
+ end
+ else Vundef.
+
+Definition insfl stop start prev fld :=
+ let mask := bitfield_maskl stop start in
+ if is_bitfieldl stop start
+ then
+ Val.orl (Val.andl prev (Val.notl mask))
+ (Val.andl (Val.shll fld (Vint (Int.repr start))) mask)
+ else Vundef.
+
+Fixpoint highest_bit (x : Z) (n : nat) : Z :=
+ match n with
+ | O => 0
+ | S n1 =>
+ let n' := Z.of_N (N_of_nat n) in
+ if Z.testbit x n'
+ then n'
+ else highest_bit x n1
+ end.
+
+Definition int_highest_bit (x : int) : Z :=
+ highest_bit (Int.unsigned x) (31%nat).
+
+
+Definition int64_highest_bit (x : int64) : Z :=
+ highest_bit (Int64.unsigned x) (63%nat).
+
+Definition val_shrx (v1 v2: val): val :=
+ match v1, v2 with
+ | Vint n1, Vint n2 =>
+ if Int.ltu n2 (Int.repr 31)
+ then Vint(Int.shrx n1 n2)
+ else Vundef
+ | _, _ => Vundef
+ end.
+
+Definition val_shrxl (v1 v2: val): val :=
+ match v1, v2 with
+ | Vlong n1, Vint n2 =>
+ if Int.ltu n2 (Int.repr 63)
+ then Vlong(Int64.shrx' n1 n2)
+ else Vundef
+ | _, _ => Vundef
+ end.
+
+Remark modulus_fits_64: Int.modulus < Int64.max_unsigned.
+Proof.
+ compute.
+ trivial.
+Qed.
+
+Remark unsigned64_repr :
+ forall i,
+ -1 < i < Int.modulus ->
+ Int64.unsigned (Int64.repr i) = i.
+Proof.
+ intros i H.
+ destruct H as [Hlow Hhigh].
+ apply Int64.unsigned_repr.
+ split. { lia. }
+ pose proof modulus_fits_64.
+ lia.
+Qed.
+
+Theorem divu_is_divlu: forall v1 v2 : val,
+ Val.divu v1 v2 =
+ match Val.divlu (Val.longofintu v1) (Val.longofintu v2) with
+ | None => None
+ | Some q => Some (Val.loword q)
+ end.
+Proof.
+ intros.
+ destruct v1; cbn; trivial.
+ destruct v2; cbn; trivial.
+ destruct i as [i_val i_range].
+ destruct i0 as [i0_val i0_range].
+ cbn.
+ unfold Int.eq, Int64.eq, Int.zero, Int64.zero.
+ cbn.
+ rewrite Int.unsigned_repr by (compute; split; discriminate).
+ rewrite (Int64.unsigned_repr 0) by (compute; split; discriminate).
+ rewrite (unsigned64_repr i0_val) by assumption.
+ destruct (zeq i0_val 0) as [ | Hnot0]; cbn; trivial.
+ f_equal. f_equal.
+ unfold Int.divu, Int64.divu. cbn.
+ rewrite (unsigned64_repr i_val) by assumption.
+ rewrite (unsigned64_repr i0_val) by assumption.
+ unfold Int64.loword.
+ rewrite Int64.unsigned_repr.
+ reflexivity.
+ destruct (Z.eq_dec i0_val 1).
+ {subst i0_val.
+ pose proof modulus_fits_64.
+ rewrite Zdiv_1_r.
+ lia.
+ }
+ destruct (Z.eq_dec i_val 0).
+ { subst i_val. compute.
+ split;
+ intro ABSURD;
+ discriminate ABSURD. }
+ assert ((i_val / i0_val) < i_val).
+ { apply Z_div_lt; lia. }
+ split.
+ { apply Z_div_pos; lia. }
+ pose proof modulus_fits_64.
+ lia.
+Qed.
+
+Theorem modu_is_modlu: forall v1 v2 : val,
+ Val.modu v1 v2 =
+ match Val.modlu (Val.longofintu v1) (Val.longofintu v2) with
+ | None => None
+ | Some q => Some (Val.loword q)
+ end.
+Proof.
+ intros.
+ destruct v1; cbn; trivial.
+ destruct v2; cbn; trivial.
+ destruct i as [i_val i_range].
+ destruct i0 as [i0_val i0_range].
+ cbn.
+ unfold Int.eq, Int64.eq, Int.zero, Int64.zero.
+ cbn.
+ rewrite Int.unsigned_repr by (compute; split; discriminate).
+ rewrite (Int64.unsigned_repr 0) by (compute; split; discriminate).
+ rewrite (unsigned64_repr i0_val) by assumption.
+ destruct (zeq i0_val 0) as [ | Hnot0]; cbn; trivial.
+ f_equal. f_equal.
+ unfold Int.modu, Int64.modu. cbn.
+ rewrite (unsigned64_repr i_val) by assumption.
+ rewrite (unsigned64_repr i0_val) by assumption.
+ unfold Int64.loword.
+ rewrite Int64.unsigned_repr.
+ reflexivity.
+ assert((i_val mod i0_val) < i0_val).
+ apply Z_mod_lt.
+ lia.
+ split.
+ { apply Z_mod_lt.
+ lia. }
+ pose proof modulus_fits_64.
+ lia.
+Qed.
+
+Remark if_zlt_0_half_modulus :
+ forall T : Type,
+ forall x y: T,
+ (if (zlt 0 Int.half_modulus) then x else y) = x.
+Proof.
+ reflexivity.
+Qed.
+
+Remark if_zlt_mone_half_modulus :
+ forall T : Type,
+ forall x y: T,
+ (if (zlt (Int.unsigned Int.mone) Int.half_modulus) then x else y) = y.
+Proof.
+ reflexivity.
+Qed.
+
+Remark if_zlt_min_signed_half_modulus :
+ forall T : Type,
+ forall x y: T,
+ (if (zlt (Int.unsigned (Int.repr Int.min_signed))
+ Int.half_modulus)
+ then x
+ else y) = y.
+Proof.
+ reflexivity.
+Qed.
+
+Lemma repr_unsigned64_repr:
+ forall x, Int.repr (Int64.unsigned (Int64.repr x)) = Int.repr x.
+Proof.
+ intros.
+ apply Int.eqm_samerepr.
+ unfold Int.eqm.
+ unfold Zbits.eqmod.
+ pose proof (Int64.eqm_unsigned_repr x) as H64.
+ unfold Int64.eqm in H64.
+ unfold Zbits.eqmod in H64.
+ destruct H64 as [k64 H64].
+ change Int64.modulus with 18446744073709551616 in *.
+ change Int.modulus with 4294967296.
+ exists (-4294967296 * k64).
+ set (y := Int64.unsigned (Int64.repr x)) in *.
+ rewrite H64.
+ clear H64.
+ lia.
+Qed.
+
+(*
+Theorem divs_is_divls: forall v1 v2 : val,
+ match Val.divs v1 v2 with
+ | Some q =>
+ match Val.divls (Val.longofint v1) (Val.longofint v2) with
+ | None => False
+ | Some q' => q = Val.loword q'
+ end
+ | None => True
+ end.
+Proof.
+ intros.
+ destruct v1; cbn; trivial.
+ destruct v2; cbn; trivial.
+ destruct i as [i_val i_range].
+ destruct i0 as [i0_val i0_range].
+ cbn.
+ unfold Int.eq, Int64.eq, Int.zero, Int64.zero.
+ cbn.
+ replace (Int.unsigned (Int.repr 0)) with 0 in * by reflexivity.
+ destruct (zeq _ _) as [H0' | Hnot0]; cbn; trivial.
+ destruct (zeq i_val (Int.unsigned (Int.repr Int.min_signed))) as [Hmin | Hnotmin]; cbn.
+ { subst.
+ destruct (zeq i0_val (Int.unsigned Int.mone)) as [Hmone | Hnotmone]; trivial.
+ unfold Int.signed. cbn.
+ replace (Int64.unsigned (Int64.repr 0)) with 0 in * by reflexivity.
+ rewrite if_zlt_min_signed_half_modulus.
+ replace (if
+ zeq
+ (Int64.unsigned
+ (Int64.repr
+ (Int.unsigned (Int.repr Int.min_signed) - Int.modulus)))
+ (Int64.unsigned (Int64.repr Int64.min_signed))
+ then true
+ else false) with false by reflexivity.
+ cbn.
+ rewrite orb_false_r.
+ destruct (zlt i0_val Int.half_modulus) as [Hlt_half | Hge_half].
+ {
+ replace Int.half_modulus with 2147483648 in * by reflexivity.
+ rewrite Int64.unsigned_repr by (change Int64.max_unsigned with 18446744073709551615; lia).
+ destruct (zeq _ _) as [ | Hneq0]; try lia. clear Hneq0.
+ unfold Val.loword.
+ f_equal.
+ unfold Int64.divs, Int.divs, Int64.loword.
+ unfold Int.signed, Int64.signed. cbn.
+ rewrite if_zlt_min_signed_half_modulus.
+ change Int.half_modulus with 2147483648 in *.
+ destruct (zlt _ _) as [discard|]; try lia. clear discard.
+ change (Int64.unsigned
+ (Int64.repr
+ (Int.unsigned (Int.repr Int.min_signed) - Int.modulus)))
+ with 18446744071562067968.
+ change Int64.half_modulus with 9223372036854775808.
+ change Int64.modulus with 18446744073709551616.
+ cbn.
+ rewrite (Int64.unsigned_repr i0_val) by (change Int64.max_unsigned with 18446744073709551615; lia).
+ destruct (zlt i0_val 9223372036854775808) as [discard |]; try lia.
+ clear discard.
+ change (Int.unsigned (Int.repr Int.min_signed) - Int.modulus) with (-2147483648).
+ destruct (Z.eq_dec i0_val 1) as [H1 | Hnot1].
+ { subst.
+ rewrite Z.quot_1_r.
+ apply Int.eqm_samerepr.
+ unfold Int.eqm.
+ change (Int64.unsigned (Int64.repr (-2147483648))) with 18446744071562067968.
+ unfold Zbits.eqmod.
+ change Int.modulus with 4294967296.
+ exists (-4294967296).
+ compute.
+ reflexivity.
+ }
+ change (-2147483648) with (-(2147483648)).
+ rewrite Z.quot_opp_l by assumption.
+ rewrite repr_unsigned64_repr.
+ reflexivity.
+ }
+ destruct (zeq _ _) as [Hmod|Hnmod].
+ {
+ rewrite Int64.unsigned_repr_eq in Hmod.
+ set (delta := (i0_val - Int.modulus)) in *.
+ assert (delta = Int64.modulus*(delta/Int64.modulus)) as Hdelta.
+ { apply Z_div_exact_full_2.
+ compute. lia.
+ assumption. }
+ set (k := (delta / Int64.modulus)) in *.
+ change Int64.modulus with 18446744073709551616 in *.
+ change Int.modulus with 4294967296 in *.
+ change Int.half_modulus with 2147483648 in *.
+ change (Int.unsigned Int.mone) with 4294967295 in *.
+ lia.
+ }
+ unfold Int.divs, Int64.divs, Val.loword, Int64.loword.
+ change (Int.unsigned (Int.repr Int.min_signed)) with 2147483648.
+ change Int.modulus with 4294967296.
+ change (Int64.signed (Int64.repr (2147483648 - 4294967296))) with (-2147483648).
+ f_equal.
+ change (Int.signed {| Int.intval := 2147483648; Int.intrange := i_range |})
+ with (-2147483648).
+ rewrite Int64.signed_repr.
+ {
+ replace (Int.signed {| Int.intval := i0_val; Int.intrange := i0_range |}) with (i0_val - 4294967296).
+ { rewrite repr_unsigned64_repr.
+ reflexivity.
+ }
+ *)
+
+Lemma big_unsigned_signed:
+ forall x,
+ (Int.unsigned x >= Int.half_modulus) ->
+ (Int.signed x) = (Int.unsigned x) - Int.modulus.
+Proof.
+ destruct x as [xval xrange].
+ intro BIG.
+ unfold Int.signed, Int.unsigned in *. cbn in *.
+ destruct (zlt _ _).
+ lia.
+ trivial.
+Qed.
+
+(*
+Lemma signed_0_eqb :
+ forall x, (Z.eqb (Int.signed x) 0) = Int.eq x Int.zero.
+Qed.
+ *)
+
+Lemma Z_quot_le: forall a b,
+ 0 <= a -> 1 <= b -> Z.quot a b <= a.
+Proof.
+ intros a b Ha Hb.
+ destruct (Z.eq_dec b 1) as [Hb1 | Hb1].
+ { (* b=1 *)
+ subst.
+ rewrite Z.quot_1_r.
+ auto with zarith.
+ }
+ destruct (Z.eq_dec a 0) as [Ha0 | Ha0].
+ { (* a=0 *)
+ subst.
+ rewrite Z.quot_0_l.
+ auto with zarith.
+ lia.
+ }
+ assert ((Z.quot a b) < a).
+ {
+ apply Z.quot_lt; lia.
+ }
+ auto with zarith.
+Qed.
+
+(*
+Lemma divs_is_quot: forall v1 v2 : val,
+ Val.divs v1 v2 =
+ match v1, v2 with
+ | (Vint w1), (Vint w2) =>
+ let q := Z.quot (Int.signed w1) (Int.signed w2) in
+ if (negb (Z.eqb (Int.signed w2) 0))
+ && (Z.geb q Int.min_signed) && (Z.leb q Int.max_signed)
+ then Some (Vint (Int.repr q))
+ else None
+ | _, _ => None
+ end.
+
+Proof.
+ destruct v1; destruct v2; cbn; trivial.
+ unfold Int.divs.
+ rewrite signed_0_eqb.
+ destruct (Int.eq i0 Int.zero) eqn:Eeq0; cbn; trivial.
+ destruct (Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone) eqn:EXCEPTION.
+ { replace (Int.signed i0) with (-1).
+ replace (Int.signed i) with Int.min_signed.
+ change Int.min_signed with (-2147483648).
+ change Int.max_signed with (2147483647).
+ compute.
+ reflexivity.
+ { unfold Int.eq in EXCEPTION.
+ destruct (zeq _ _) as [Hmin | ] in EXCEPTION; try discriminate.
+ change Int.min_signed with (-2147483648).
+ change (Int.unsigned (Int.repr Int.min_signed)) with (2147483648) in *.
+ rewrite big_unsigned_signed.
+ change Int.modulus with 4294967296.
+ lia.
+ change Int.half_modulus with 2147483648.
+ lia.
+ }
+ unfold Int.eq in EXCEPTION.
+ destruct (zeq _ _) in EXCEPTION; try discriminate.
+ destruct (zeq _ _) as [Hmone | ] in EXCEPTION; try discriminate.
+ destruct i0 as [i0val i0range]; unfold Int.signed in *; cbn in *.
+ rewrite Hmone.
+ reflexivity.
+ }
+ replace (Int.signed i ÷ Int.signed i0 >=? Int.min_signed) with true.
+ replace (Int.signed i ÷ Int.signed i0 <=? Int.max_signed) with true.
+ reflexivity.
+ { assert (Int.signed i ÷ Int.signed i0 <= Int.max_signed).
+ {
+ destruct (Z_lt_le_dec (Int.signed i) 0).
+ {
+ apply Z.le_trans with (m:=0).
+ rewrite <- (Z.quot_0_l (Int.signed i0)).
+ Require Import Coq.ZArith.Zquot.
+ apply Z_quot_monotone.
+ }
+ assert ( Int.signed i ÷ Int.signed i0 <= Int.signed i).
+ apply Z_quot_le.
+ }
+ }
+
+ *)
+
+Require Import Coq.ZArith.Zquot.
+Lemma Z_quot_pos_pos_bound: forall a b m,
+ 0 <= a <= m -> 1 <= b -> 0 <= Z.quot a b <= m.
+Proof.
+ intros.
+ split.
+ { rewrite <- (Z.quot_0_l b) by lia.
+ apply Z_quot_monotone; lia.
+ }
+ apply Z.le_trans with (m := a).
+ {
+ apply Z_quot_le; tauto.
+ }
+ tauto.
+Qed.
+Lemma Z_quot_neg_pos_bound: forall a b m,
+ m <= a <= 0 -> 1 <= b -> m <= Z.quot a b <= 0.
+ intros.
+ assert (0 <= - (a ÷ b) <= -m).
+ {
+ rewrite <- Z.quot_opp_l by lia.
+ apply Z_quot_pos_pos_bound; lia.
+ }
+ lia.
+Qed.
+
+Lemma Z_quot_signed_pos_bound: forall a b,
+ Int.min_signed <= a <= Int.max_signed -> 1 <= b ->
+ Int.min_signed <= Z.quot a b <= Int.max_signed.
+Proof.
+ intros.
+ destruct (Z_lt_ge_dec a 0).
+ {
+ split.
+ { apply Z_quot_neg_pos_bound; lia. }
+ { eapply Z.le_trans with (m := 0).
+ { apply Z_quot_neg_pos_bound with (m := Int.min_signed); trivial.
+ split. tauto. auto with zarith.
+ }
+ discriminate.
+ }
+ }
+ { split.
+ { eapply Z.le_trans with (m := 0).
+ discriminate.
+ apply Z_quot_pos_pos_bound with (m := Int.max_signed); trivial.
+ split. lia. tauto.
+ }
+ { apply Z_quot_pos_pos_bound; lia.
+ }
+ }
+Qed.
+
+Lemma Z_quot_signed_neg_bound: forall a b,
+ Int.min_signed <= a <= Int.max_signed -> b < -1 ->
+ Int.min_signed <= Z.quot a b <= Int.max_signed.
+Proof.
+ change Int.min_signed with (-2147483648).
+ change Int.max_signed with 2147483647.
+ intros.
+
+ replace b with (-(-b)) by auto with zarith.
+ rewrite Z.quot_opp_r by lia.
+ assert (-2147483647 <= (a ÷ - b) <= 2147483648).
+ 2: lia.
+
+ destruct (Z_lt_ge_dec a 0).
+ {
+ replace a with (-(-a)) by auto with zarith.
+ rewrite Z.quot_opp_l by lia.
+ assert (-2147483648 <= - a ÷ - b <= 2147483647).
+ 2: lia.
+ split.
+ {
+ rewrite Z.quot_opp_l by lia.
+ assert (a ÷ - b <= 2147483648).
+ 2: lia.
+ {
+ apply Z.le_trans with (m := 0).
+ rewrite <- (Z.quot_0_l (-b)) by lia.
+ apply Z_quot_monotone; lia.
+ discriminate.
+ }
+ }
+ assert (- a ÷ - b < -a ).
+ 2: lia.
+ apply Z_quot_lt; lia.
+ }
+ {
+ split.
+ { apply Z.le_trans with (m := 0).
+ discriminate.
+ rewrite <- (Z.quot_0_l (-b)) by lia.
+ apply Z_quot_monotone; lia.
+ }
+ { apply Z.le_trans with (m := a).
+ apply Z_quot_le.
+ all: lia.
+ }
+ }
+Qed.
+
+Lemma sub_add_neg :
+ forall x y, Val.sub x y = Val.add x (Val.neg y).
+Proof.
+ destruct x; destruct y; cbn; trivial.
+ f_equal.
+ apply Int.sub_add_opp.
+Qed.
+
+Lemma neg_mul_distr_r :
+ forall x y, Val.neg (Val.mul x y) = Val.mul x (Val.neg y).
+Proof.
+ destruct x; destruct y; cbn; trivial.
+ f_equal.
+ apply Int.neg_mul_distr_r.
+Qed.
+
+(* pointer diff
+Lemma sub_addl_negl :
+ forall x y, Val.subl x y = Val.addl x (Val.negl y).
+Proof.
+ destruct x; destruct y; cbn; trivial.
+ + f_equal. apply Int64.sub_add_opp.
+ + destruct (Archi.ptr64) eqn:ARCHI64; trivial.
+ f_equal. rewrite Ptrofs.sub_add_opp.
+ pose (Ptrofs.agree64_neg ARCHI64 (Ptrofs.of_int64 i0) i0) as Hagree.
+ unfold Ptrofs.agree64 in Hagree.
+ unfold Ptrofs.add.
+ f_equal. f_equal.
+ rewrite Hagree.
+ pose (Ptrofs.agree64_of_int ARCHI64 (Int64.neg i0)) as Hagree2.
+ rewrite Hagree2.
+ reflexivity.
+ exact (Ptrofs.agree64_of_int ARCHI64 i0).
+ + destruct (Archi.ptr64) eqn:ARCHI64; cbn; trivial.
+ destruct (eq_block _ _); cbn; trivial.
+Qed.
+ *)
+
+Lemma negl_mull_distr_r :
+ forall x y, Val.negl (Val.mull x y) = Val.mull x (Val.negl y).
+Proof.
+ destruct x; destruct y; cbn; trivial.
+ f_equal.
+ apply Int64.neg_mul_distr_r.
+Qed.
+
+Definition addx sh v1 v2 :=
+ Val.add v2 (Val.shl v1 (Vint sh)).
+
+Definition addxl sh v1 v2 :=
+ Val.addl v2 (Val.shll v1 (Vint sh)).
+
+Definition revsubx sh v1 v2 :=
+ Val.sub v2 (Val.shl v1 (Vint sh)).
+
+Definition revsubxl sh v1 v2 :=
+ Val.subl v2 (Val.shll v1 (Vint sh)).
+
+Definition minf v1 v2 :=
+ match v1, v2 with
+ | (Vfloat f1), (Vfloat f2) => Vfloat (ExtFloat.min f1 f2)
+ | _, _ => Vundef
+ end.
+
+Definition maxf v1 v2 :=
+ match v1, v2 with
+ | (Vfloat f1), (Vfloat f2) => Vfloat (ExtFloat.max f1 f2)
+ | _, _ => Vundef
+ end.
+
+Definition minfs v1 v2 :=
+ match v1, v2 with
+ | (Vsingle f1), (Vsingle f2) => Vsingle (ExtFloat32.min f1 f2)
+ | _, _ => Vundef
+ end.
+
+Definition maxfs v1 v2 :=
+ match v1, v2 with
+ | (Vsingle f1), (Vsingle f2) => Vsingle (ExtFloat32.max f1 f2)
+ | _, _ => Vundef
+ end.
+
+Definition invfs v1 :=
+ match v1 with
+ | (Vsingle f1) => Vsingle (ExtFloat32.inv f1)
+ | _ => Vundef
+ end.
+
+Definition triple_op_float f v1 v2 v3 :=
+ match v1, v2, v3 with
+ | (Vfloat f1), (Vfloat f2), (Vfloat f3) => Vfloat (f f1 f2 f3)
+ | _, _, _ => Vundef
+ end.
+
+Definition triple_op_single f v1 v2 v3 :=
+ match v1, v2, v3 with
+ | (Vsingle f1), (Vsingle f2), (Vsingle f3) => Vsingle (f f1 f2 f3)
+ | _, _, _ => Vundef
+ end.
+
+Definition fmaddf := triple_op_float (fun f1 f2 f3 => Float.fma f2 f3 f1).
+Definition fmaddfs := triple_op_single (fun f1 f2 f3 => Float32.fma f2 f3 f1).
+
+Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma (Float.neg f2) f3 f1).
+Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma (Float32.neg f2) f3 f1).
diff --git a/kvx/Machregs.v b/kvx/Machregs.v
new file mode 100644
index 00000000..02fa4e6b
--- /dev/null
+++ b/kvx/Machregs.v
@@ -0,0 +1,245 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import String.
+Require Import Coqlib.
+Require Import Decidableplus.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Op.
+
+(** ** Machine registers *)
+
+(** The following type defines the machine registers that can be referenced
+ as locations. These include:
+- Integer registers that can be allocated to RTL pseudo-registers ([Rxx]).
+- Floating-point registers that can be allocated to RTL pseudo-registers
+ ([Fxx]).
+
+ The type [mreg] does not include reserved machine registers such as
+ the zero register (x0), the link register (x1), the stack pointer
+ (x2), the global pointer (x3), and the thread pointer (x4).
+ Finally, register x31 is reserved for use as a temporary by the
+ assembly-code generator [Asmgen].
+*)
+
+Inductive mreg: Type :=
+ (* Allocatable General Purpose regs. *)
+ | R0 | R1 | R2 | R3 | R4 | R5 | R6 | R7 | R8 | R9
+ | R10 | R11 (* | R12 | R13 | R14 *) | R15 (* | R16 *) | R17 | R18 | R19
+ | R20 | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | R29
+ | R30 | R31 (* | R32 *) | R33 | R34 | R35 | R36 | R37 | R38 | R39
+ | R40 | R41 | R42 | R43 | R44 | R45 | R46 | R47 | R48 | R49
+ | R50 | R51 | R52 | R53 | R54 | R55 | R56 | R57 | R58 | R59
+ | R60 | R61 | R62 | R63.
+
+Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}.
+Proof. decide equality. Defined.
+Global Opaque mreg_eq.
+
+Definition all_mregs :=
+ R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9
+ :: R10 :: R11 (* :: R12 :: R13 :: R14 *) :: R15 (* :: R16 *) :: R17 :: R18 :: R19
+ :: R20 :: R21 :: R22 :: R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29
+ :: R30 :: R31 (* :: R32 *) :: R33 :: R34 :: R35 :: R36 :: R37 :: R38 :: R39
+ :: R40 :: R41 :: R42 :: R43 :: R44 :: R45 :: R46 :: R47 :: R48 :: R49
+ :: R50 :: R51 :: R52 :: R53 :: R54 :: R55 :: R56 :: R57 :: R58 :: R59
+ :: R60 :: R61 :: R62 :: R63 :: nil.
+
+Lemma all_mregs_complete:
+ forall (r: mreg), In r all_mregs.
+Proof.
+ assert (forall r, proj_sumbool (In_dec mreg_eq r all_mregs) = true) by (destruct r; reflexivity).
+ intros. specialize (H r). InvBooleans. auto.
+Qed.
+
+Instance Decidable_eq_mreg : forall (x y: mreg), Decidable (eq x y) := Decidable_eq mreg_eq.
+
+Instance Finite_mreg : Finite mreg := {
+ Finite_elements := all_mregs;
+ Finite_elements_spec := all_mregs_complete
+}.
+
+Definition mreg_type (r: mreg): typ := Tany64.
+
+Open Scope positive_scope.
+
+Module IndexedMreg <: INDEXED_TYPE.
+ Definition t := mreg.
+ Definition eq := mreg_eq.
+ Definition index (r: mreg): positive :=
+ match r with
+ | R0 => 1 | R1 => 2 | R2 => 3 | R3 => 4 | R4 => 5
+ | R5 => 6 | R6 => 7 | R7 => 8 | R8 => 9 | R9 => 10
+ | R10 => 11 | R11 => 12 (* | R12 => 13 | R13 => 14 | R14 => 15 *)
+ | R15 => 16 (* | R16 => 17 *) | R17 => 18 | R18 => 19 | R19 => 20
+ | R20 => 21 | R21 => 22 | R22 => 23 | R23 => 24 | R24 => 25
+ | R25 => 26 | R26 => 27 | R27 => 28 | R28 => 29 | R29 => 30
+ | R30 => 31 | R31 => 32 (* | R32 => 33 *) | R33 => 34 | R34 => 35
+ | R35 => 36 | R36 => 37 | R37 => 38 | R38 => 39 | R39 => 40
+ | R40 => 41 | R41 => 42 | R42 => 43 | R43 => 44 | R44 => 45
+ | R45 => 46 | R46 => 47 | R47 => 48 | R48 => 49 | R49 => 50
+ | R50 => 51 | R51 => 52 | R52 => 53 | R53 => 54 | R54 => 55
+ | R55 => 56 | R56 => 57 | R57 => 58 | R58 => 59 | R59 => 60
+ | R60 => 61 | R61 => 62 | R62 => 63 | R63 => 64
+ end.
+
+ Lemma index_inj:
+ forall r1 r2, index r1 = index r2 -> r1 = r2.
+ Proof.
+ decide_goal.
+ Qed.
+End IndexedMreg.
+
+Definition is_stack_reg (r: mreg) : bool := false.
+
+(** ** Names of registers *)
+
+Local Open Scope string_scope.
+
+Definition register_names :=
+ ("R0" , R0) :: ("R1" , R1) :: ("R2" , R2) :: ("R3" , R3) :: ("R4" , R4)
+ :: ("R5" , R5) :: ("R6" , R6) :: ("R7" , R7) :: ("R8" , R8) :: ("R9" , R9)
+ :: ("R10", R10) :: ("R11", R11) (* :: ("R12", R12) :: ("R13", R13) :: ("R14", R14) *)
+ :: ("R15", R15) (* :: ("R16", R16) *) :: ("R17", R17) :: ("R18", R18) :: ("R19", R19)
+ :: ("R20", R20) :: ("R21", R21) :: ("R22", R22) :: ("R23", R23) :: ("R24", R24)
+ :: ("R25", R25) :: ("R26", R26) :: ("R27", R27) :: ("R28", R28) :: ("R29", R29)
+ :: ("R30", R30) :: ("R31", R31) (* :: ("R32", R32) *) :: ("R33", R33) :: ("R34", R34)
+ :: ("R35", R35) :: ("R36", R36) :: ("R37", R37) :: ("R38", R38) :: ("R39", R39)
+ :: ("R40", R40) :: ("R41", R41) :: ("R42", R42) :: ("R43", R43) :: ("R44", R44)
+ :: ("R45", R45) :: ("R46", R46) :: ("R47", R47) :: ("R48", R48) :: ("R49", R49)
+ :: ("R50", R50) :: ("R51", R51) :: ("R52", R52) :: ("R53", R53) :: ("R54", R54)
+ :: ("R55", R55) :: ("R56", R56) :: ("R57", R57) :: ("R58", R58) :: ("R59", R59)
+ :: ("R60", R60) :: ("R61", R61) :: ("R62", R62) :: ("R63", R63) :: nil.
+
+Definition register_by_name (s: string) : option mreg :=
+ let fix assoc (l: list (string * mreg)) : option mreg :=
+ match l with
+ | nil => None
+ | (s1, r1) :: l' => if string_dec s s1 then Some r1 else assoc l'
+ end
+ in assoc register_names.
+
+(** ** Destroyed registers, preferred registers *)
+
+Definition destroyed_by_op (op: operation): list mreg := nil.
+(*match op with
+ | Ointoffloat | Ointuoffloat | Ointofsingle | Ointuofsingle
+ | Olongoffloat | Olonguoffloat | Olongofsingle | Olonguofsingle
+ => F6 :: nil
+ | _ => nil
+ end.
+*)
+
+Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg := nil.
+
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := nil.
+
+Definition destroyed_by_cond (cond: condition): list mreg := nil.
+
+Definition destroyed_by_jumptable: list mreg := R62 :: R63 :: nil.
+
+Fixpoint destroyed_by_clobber (cl: list string): list mreg :=
+ match cl with
+ | nil => nil
+ | c1 :: cl =>
+ match register_by_name c1 with
+ | Some r => r :: destroyed_by_clobber cl
+ | None => destroyed_by_clobber cl
+ end
+ end.
+
+Definition destroyed_by_builtin (ef: external_function): list mreg :=
+ match ef with
+ | EF_inline_asm txt sg clob => destroyed_by_clobber clob
+ | EF_memcpy sz al =>
+ if Z.leb sz 15
+ then R62 :: R63 :: R61 :: nil
+ else R62 :: R63 :: R61 :: R60 :: nil
+ | EF_profiling _ _ => R62 :: R63 ::nil
+ | _ => nil
+ end.
+
+Definition destroyed_by_setstack (ty: typ): list mreg := nil.
+
+Definition destroyed_at_function_entry: list mreg := R17 :: nil.
+
+Definition temp_for_parent_frame: mreg := R17. (* Temporary used to store the parent frame, where the arguments are *)
+
+Definition destroyed_at_indirect_call: list mreg := nil.
+ (* R10 :: R11 :: R12 :: R13 :: R14 :: R15 :: R16 :: R17 :: nil. *)
+
+Definition mregs_for_operation (op: operation): list (option mreg) * option mreg := (nil, None).
+
+(* FIXME DMonniaux this seems to be the place for preferred registers for arguments *)
+Definition mregs_for_builtin (ef: external_function): list (option mreg) * list(option mreg) := (nil, nil).
+
+ (* match ef with
+ | EF_builtin name sg =>
+ if (negb Archi.ptr64) && string_dec name "__builtin_bswap64" then
+ (Some R6 :: Some R5 :: nil, Some R5 :: Some R6 :: nil)
+ else
+ (nil, nil)
+ | _ =>
+ (nil, nil)
+ end. *)
+
+Global Opaque
+ destroyed_by_op destroyed_by_load destroyed_by_store
+ destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
+ destroyed_by_setstack destroyed_at_function_entry temp_for_parent_frame
+ mregs_for_operation mregs_for_builtin.
+
+(** Two-address operations. Return [true] if the first argument and
+ the result must be in the same location *and* are unconstrained
+ by [mregs_for_operation]. There are two: the pseudo [Ocast32signed],
+ because it expands to a no-op owing to the representation of 32-bit
+ integers as their 64-bit sign extension; and [Ocast32unsigned],
+ because it builds on the same magic no-op. *)
+
+Definition two_address_op (op: operation) : bool :=
+ match op with
+ | Ofmaddf | Ofmaddfs
+ | Ofmsubf | Ofmsubfs
+ | Omadd | Omaddimm _
+ | Omaddl | Omaddlimm _
+ | Omsub | Omsubl
+ | Osel _ _ | Oselimm _ _ | Osellimm _ _
+ | Oinsf _ _ | Oinsfl _ _ => true
+ | _ => false
+ end.
+
+(** Constraints on constant propagation for builtins *)
+
+Definition builtin_constraints (ef: external_function) :
+ list builtin_arg_constraint :=
+ match ef with
+ | EF_builtin id sg =>
+ if string_dec id "__builtin_kvx_get" then OK_const :: nil
+ else if string_dec id "__builtin_kvx_set"
+ then OK_const :: OK_default :: nil
+ else if string_dec id "__builtin_kvx_wfxl"
+ then OK_const :: OK_default :: nil
+ else if string_dec id "__builtin_kvx_wfxm"
+ then OK_const :: OK_default :: nil
+ else nil
+ | EF_vload _ => OK_addressing :: nil
+ | EF_vstore _ => OK_addressing :: OK_default :: nil
+ | EF_memcpy _ _ => OK_addrstack :: OK_addrstack :: nil
+ | EF_annot kind txt targs => map (fun _ => OK_all) targs
+ | EF_debug kind txt targs => map (fun _ => OK_all) targs
+ | _ => nil
+ end.
diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml
new file mode 100644
index 00000000..dbb89727
--- /dev/null
+++ b/kvx/Machregsaux.ml
@@ -0,0 +1,35 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Auxiliary functions on machine registers *)
+
+open Camlcoq
+open Machregs
+
+let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
+
+let _ =
+ List.iter
+ (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
+ Machregs.register_names
+
+let is_scratch_register r = false
+
+let class_of_type = function
+ | AST.Tint | AST.Tlong
+ | AST.Tfloat | AST.Tsingle -> 0
+ | AST.Tany32 | AST.Tany64 -> assert false
+
+let nr_regs = [| 59 |]
diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli
new file mode 100644
index 00000000..23ac1c9a
--- /dev/null
+++ b/kvx/Machregsaux.mli
@@ -0,0 +1,20 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(** Auxiliary functions on machine registers *)
+
+val is_scratch_register: string -> bool
+
+val class_of_type: AST.typ -> int
+
+(* Number of registers in each class *)
+val nr_regs : int array
diff --git a/kvx/NeedOp.v b/kvx/NeedOp.v
new file mode 100644
index 00000000..4578b4e8
--- /dev/null
+++ b/kvx/NeedOp.v
@@ -0,0 +1,415 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs.
+Require Import Op RTL.
+Require Import NeedDomain.
+Require Import Lia.
+
+(** Neededness analysis for RISC-V operators *)
+
+Definition op1 (nv: nval) := nv :: nil.
+Definition op2 (nv: nval) := nv :: nv :: nil.
+Definition op3 (nv: nval) := nv :: nv :: nv :: nil.
+
+Definition needs_of_condition (cond: condition): list nval := nil.
+Definition needs_of_condition0 (cond0: condition0): list nval := nil.
+
+Definition needs_of_operation (op: operation) (nv: nval): list nval :=
+ match op with
+ | Omove => op1 nv
+ | Ointconst n => nil
+ | Olongconst n => nil
+ | Ofloatconst n => nil
+ | Osingleconst n => nil
+ | Oaddrsymbol id ofs => nil
+ | Oaddrstack ofs => nil
+ | Ocast8signed => op1 (sign_ext 8 nv)
+ | Ocast16signed => op1 (sign_ext 16 nv)
+ | Oadd => op2 (modarith nv)
+ | Oaddimm n => op1 (modarith nv)
+ | Oaddx _ => op2 (default nv)
+ | Oaddximm _ _ => op1 (default nv)
+ | Oneg => op1 (modarith nv)
+ | Osub => op2 (default nv)
+ | Orevsubimm _ => op1 (default nv)
+ | Orevsubx _ => op2 (default nv)
+ | Orevsubximm _ _ => op1 (default nv)
+ | Omul => op2 (modarith nv)
+ | Omulimm _ => op1 (modarith nv)
+ | Omulhs | Omulhu | Odiv | Odivu | Omod | Omodu => op2 (default nv)
+ | Oand => op2 (bitwise nv)
+ | Oandimm n => op1 (andimm nv n)
+ | Onand => op2 (bitwise nv)
+ | Onandimm n => op1 (andimm nv n)
+ | Oor => op2 (bitwise nv)
+ | Oorimm n => op1 (orimm nv n)
+ | Onor => op2 (bitwise nv)
+ | Onorimm n => op1 (orimm nv n)
+ | Oxor => op2 (bitwise nv)
+ | Oxorimm n => op1 (bitwise nv)
+ | Onxor => op2 (bitwise nv)
+ | Onxorimm n => op1 (bitwise nv)
+ | Onot => op1 (bitwise nv)
+ | Oandn => op2 (bitwise nv)
+ | Oandnimm n => op1 (andimm nv n)
+ | Oorn => op2 (bitwise nv)
+ | Oornimm n => op1 (orimm nv n)
+ | Oshl | Oshr | Oshru => op2 (default nv)
+ | Oshlimm n => op1 (shlimm nv n)
+ | Oshrimm n => op1 (shrimm nv n)
+ | Ororimm n => op1 (ror nv n)
+ | Oshruimm n => op1 (shruimm nv n)
+ | Oshrximm n => op1 (default nv)
+ | Omadd => op3 (modarith nv)
+ | Omaddimm n => op2 (modarith nv)
+ | Omsub => op3 (modarith nv)
+ | Omakelong => op2 (default nv)
+ | Olowlong | Ohighlong => op1 (default nv)
+ | Ocast32signed => op1 (default nv)
+ | Ocast32unsigned => op1 (default nv)
+ | Oaddl => op2 (default nv)
+ | Oaddlimm n => op1 (default nv)
+ | Oaddxl _ => op2 (default nv)
+ | Oaddxlimm _ _ => op1 (default nv)
+ | Orevsublimm _ => op1 (default nv)
+ | Orevsubxl _ => op2 (default nv)
+ | Orevsubxlimm _ _ => op1 (default nv)
+ | Onegl => op1 (default nv)
+ | Osubl => op2 (default nv)
+ | Omull => op2 (default nv)
+ | Omullimm _ => op1 (default nv)
+ | Omullhs | Omullhu | Odivl | Odivlu | Omodl | Omodlu => op2 (default nv)
+ | Oandl => op2 (default nv)
+ | Oandlimm n => op1 (default nv)
+ | Onandl => op2 (default nv)
+ | Onandlimm n => op1 (default nv)
+ | Oorl => op2 (default nv)
+ | Oorlimm n => op1 (default nv)
+ | Onorl => op2 (default nv)
+ | Onorlimm n => op1 (default nv)
+ | Oxorl => op2 (default nv)
+ | Oxorlimm n => op1 (default nv)
+ | Onxorl => op2 (default nv)
+ | Onxorlimm n => op1 (default nv)
+ | Onotl => op1 (default nv)
+ | Oandnl => op2 (default nv)
+ | Oandnlimm n => op1 (default nv)
+ | Oornl => op2 (default nv)
+ | Oornlimm n => op1 (default nv)
+ | Oshll | Oshrl | Oshrlu => op2 (default nv)
+ | Oshllimm n => op1 (default nv)
+ | Oshrlimm n => op1 (default nv)
+ | Oshrluimm n => op1 (default nv)
+ | Oshrxlimm n => op1 (default nv)
+ | Omaddl => op3 (default nv)
+ | Omaddlimm n => op2 (default nv)
+ | Omsubl => op3 (default nv)
+ | Onegf | Oabsf => op1 (default nv)
+ | Oaddf | Osubf | Omulf | Odivf | Ominf | Omaxf => op2 (default nv)
+ | Ofmaddf | Ofmsubf => op3 (default nv)
+ | Onegfs | Oabsfs => op1 (default nv)
+ | Oaddfs | Osubfs | Omulfs | Odivfs | Ominfs | Omaxfs => op2 (default nv)
+ | Oinvfs => op1 (default nv)
+ | Ofmaddfs | Ofmsubfs => op3 (default nv)
+ | Ofloatofsingle | Osingleoffloat => op1 (default nv)
+ | Ointoffloat | Ointuoffloat => op1 (default nv)
+ | Olongoffloat | Olonguoffloat | Ofloatoflong | Ofloatoflongu => op1 (default nv)
+ | Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv)
+ | Olongofsingle | Olonguofsingle | Osingleoflong | Osingleoflongu => op1 (default nv)
+ | Ocmp c => needs_of_condition c
+ | Oextfz _ _ | Oextfs _ _ | Oextfzl _ _ | Oextfsl _ _ => op1 (default nv)
+ | Oinsf _ _ | Oinsfl _ _ => op2 (default nv)
+ | Osel c ty => nv :: nv :: needs_of_condition0 c
+ | Oselimm c imm
+ | Osellimm c imm => nv :: needs_of_condition0 c
+ end.
+
+Definition operation_is_redundant (op: operation) (nv: nval): bool :=
+ match op with
+ | Ocast8signed => sign_ext_redundant 8 nv
+ | Ocast16signed => sign_ext_redundant 16 nv
+ | Oandimm n => andimm_redundant nv n
+ | Oorimm n => orimm_redundant nv n
+ | _ => false
+ end.
+
+Ltac InvAgree :=
+ match goal with
+ | [H: vagree_list nil _ _ |- _ ] => inv H; InvAgree
+ | [H: vagree_list (_::_) _ _ |- _ ] => inv H; InvAgree
+ | _ => idtac
+ end.
+
+Ltac TrivialExists :=
+ match goal with
+ | [ |- exists v, Some ?x = Some v /\ _ ] => exists x; split; auto
+ | _ => idtac
+ end.
+
+Section SOUNDNESS.
+
+Variable ge: genv.
+Variable sp: block.
+Variables m1 m2: mem.
+Hypothesis PERM: forall b ofs k p, Mem.perm m1 b ofs k p -> Mem.perm m2 b ofs k p.
+
+Lemma needs_of_condition_sound:
+ forall cond args b args',
+ eval_condition cond args m1 = Some b ->
+ vagree_list args args' (needs_of_condition cond) ->
+ eval_condition cond args' m2 = Some b.
+Proof.
+ intros. unfold needs_of_condition in H0.
+ eapply default_needs_of_condition_sound; eauto.
+Qed.
+
+Let valid_pointer_inj:
+ forall b1 ofs b2 delta,
+ inject_id b1 = Some(b2, delta) ->
+ Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+ Mem.valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+Proof.
+ unfold inject_id; intros. inv H. rewrite Ptrofs.add_zero.
+ rewrite Mem.valid_pointer_nonempty_perm in *. eauto.
+Qed.
+
+Let weak_valid_pointer_inj:
+ forall b1 ofs b2 delta,
+ inject_id b1 = Some(b2, delta) ->
+ Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+ Mem.weak_valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+Proof.
+ unfold inject_id; intros. inv H. rewrite Ptrofs.add_zero.
+ rewrite Mem.weak_valid_pointer_spec in *.
+ rewrite ! Mem.valid_pointer_nonempty_perm in *.
+ destruct H0; [left|right]; eauto.
+Qed.
+
+Let weak_valid_pointer_no_overflow:
+ forall b1 ofs b2 delta,
+ inject_id b1 = Some(b2, delta) ->
+ Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+ 0 <= Ptrofs.unsigned ofs + Ptrofs.unsigned (Ptrofs.repr delta) <= Ptrofs.max_unsigned.
+Proof.
+ unfold inject_id; intros. inv H. rewrite Z.add_0_r. apply Ptrofs.unsigned_range_2.
+Qed.
+
+Let valid_different_pointers_inj:
+ forall b1 ofs1 b2 ofs2 b1' delta1 b2' delta2,
+ b1 <> b2 ->
+ Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs1) = true ->
+ Mem.valid_pointer m1 b2 (Ptrofs.unsigned ofs2) = true ->
+ inject_id b1 = Some (b1', delta1) ->
+ inject_id b2 = Some (b2', delta2) ->
+ b1' <> b2' \/
+ Ptrofs.unsigned (Ptrofs.add ofs1 (Ptrofs.repr delta1)) <> Ptrofs.unsigned (Ptrofs.add ofs2 (Ptrofs.repr delta2)).
+Proof.
+ unfold inject_id; intros. left; congruence.
+Qed.
+
+Lemma needs_of_condition0_sound:
+ forall cond arg1 b arg2,
+ eval_condition0 cond arg1 m1 = Some b ->
+ vagree arg1 arg2 All ->
+ eval_condition0 cond arg2 m2 = Some b.
+Proof.
+ intros until arg2.
+ intros Hcond Hagree.
+ apply eval_condition0_inj with (f := inject_id) (m1 := m1) (v1 := arg1); cbn; auto.
+ apply val_inject_lessdef. apply lessdef_vagree. assumption.
+Qed.
+
+Lemma addl_sound:
+ forall v1 w1 v2 w2 x,
+ vagree v1 w1 (default x) -> vagree v2 w2 (default x) ->
+ vagree (Val.addl v1 v2) (Val.addl w1 w2) x.
+Proof.
+ unfold default; intros.
+ destruct x; cbn in *; trivial.
+ - unfold Val.addl.
+ destruct v1; destruct v2; trivial; destruct Archi.ptr64; trivial.
+ - apply Val.addl_lessdef; trivial.
+Qed.
+
+Lemma subl_lessdef:
+ forall v1 v1' v2 v2',
+ Val.lessdef v1 v1' -> Val.lessdef v2 v2' -> Val.lessdef (Val.subl v1 v2) (Val.subl v1' v2').
+Proof.
+ intros. inv H. inv H0. auto. destruct v1'; cbn; auto. cbn; auto.
+Qed.
+
+Lemma subl_sound:
+ forall v1 w1 v2 w2 x,
+ vagree v1 w1 (default x) -> vagree v2 w2 (default x) ->
+ vagree (Val.subl v1 v2) (Val.subl w1 w2) x.
+Proof.
+ unfold default; intros.
+ destruct x; cbn in *; trivial.
+ - unfold Val.subl.
+ destruct v1; destruct v2; trivial; destruct Archi.ptr64; cbn; trivial.
+ destruct (eq_block _ _) ; cbn; trivial.
+ - apply subl_lessdef; trivial.
+Qed.
+
+
+Lemma mull_sound:
+ forall v1 w1 v2 w2 x,
+ vagree v1 w1 (default x) -> vagree v2 w2 (default x) ->
+ vagree (Val.mull v1 v2) (Val.mull w1 w2) x.
+Proof.
+ unfold default; intros.
+ destruct x; cbn in *; trivial.
+ - unfold Val.mull.
+ destruct v1; destruct v2; trivial.
+ - unfold Val.mull.
+ destruct v1; destruct v2; trivial.
+ inv H. inv H0.
+ trivial.
+Qed.
+
+
+Remark default_idem: forall nv, default (default nv) = default nv.
+Proof.
+ destruct nv; cbn; trivial.
+Qed.
+
+Lemma vagree_triple_op_float :
+ forall f a b c x y z nv,
+ (vagree a x (default nv)) ->
+ (vagree b y (default nv)) ->
+ (vagree c z (default nv)) ->
+ (vagree (ExtValues.triple_op_float f a b c)
+ (ExtValues.triple_op_float f x y z) nv).
+Proof.
+ induction nv;
+ intros Hax Hby Hcz.
+ - trivial.
+ - cbn in *. destruct a; cbn; trivial.
+ destruct b; cbn; trivial.
+ destruct c; cbn; trivial.
+ - cbn in *. destruct a; cbn; trivial.
+ destruct b; cbn; trivial.
+ destruct c; cbn; trivial.
+ inv Hax. inv Hby. inv Hcz.
+ cbn.
+ constructor.
+Qed.
+
+Lemma vagree_triple_op_single :
+ forall f a b c x y z nv,
+ (vagree a x (default nv)) ->
+ (vagree b y (default nv)) ->
+ (vagree c z (default nv)) ->
+ (vagree (ExtValues.triple_op_single f a b c)
+ (ExtValues.triple_op_single f x y z) nv).
+Proof.
+ induction nv;
+ intros Hax Hby Hcz.
+ - trivial.
+ - cbn in *. destruct a; cbn; trivial.
+ destruct b; cbn; trivial.
+ destruct c; cbn; trivial.
+ - cbn in *. destruct a; cbn; trivial.
+ destruct b; cbn; trivial.
+ destruct c; cbn; trivial.
+ inv Hax. inv Hby. inv Hcz.
+ cbn.
+ constructor.
+Qed.
+
+Hint Resolve vagree_triple_op_float vagree_triple_op_single : na.
+
+Lemma needs_of_operation_sound:
+ forall op args v nv args',
+ eval_operation ge (Vptr sp Ptrofs.zero) op args m1 = Some v ->
+ vagree_list args args' (needs_of_operation op nv) ->
+ nv <> Nothing ->
+ exists v',
+ eval_operation ge (Vptr sp Ptrofs.zero) op args' m2 = Some v'
+ /\ vagree v v' nv.
+Proof.
+ unfold needs_of_operation; intros; destruct op; try (eapply default_needs_of_operation_sound; eauto; fail);
+ cbn in *; FuncInv; InvAgree; TrivialExists.
+- apply sign_ext_sound; auto. compute; auto.
+- apply sign_ext_sound; auto. compute; auto.
+- apply add_sound; auto.
+- apply add_sound; auto with na.
+- apply neg_sound; auto.
+- apply mul_sound; auto.
+- apply mul_sound; auto with na.
+- apply and_sound; auto.
+- apply andimm_sound; auto.
+- apply notint_sound; apply and_sound; auto.
+- apply notint_sound; apply andimm_sound; auto.
+- apply or_sound; auto.
+- apply orimm_sound; auto.
+- apply notint_sound; apply or_sound; auto.
+- apply notint_sound; apply orimm_sound; auto.
+- apply xor_sound; auto.
+- apply xor_sound; auto with na.
+- apply notint_sound; apply xor_sound; auto.
+- apply notint_sound; apply xor_sound; auto with na.
+- apply notint_sound; auto.
+- apply and_sound; try apply notint_sound; auto with na.
+- apply andimm_sound; try apply notint_sound; auto with na.
+- apply or_sound; try apply notint_sound; auto with na.
+- apply orimm_sound; try apply notint_sound; auto with na.
+- apply shlimm_sound; auto.
+- apply shrimm_sound; auto.
+- apply shruimm_sound; auto.
+- apply ror_sound; auto.
+ (* madd *)
+- apply add_sound; try apply mul_sound; auto with na; rewrite modarith_idem; assumption.
+- apply add_sound; try apply mul_sound; auto with na; rewrite modarith_idem; assumption.
+- repeat rewrite ExtValues.sub_add_neg.
+ apply add_sound; trivial.
+ apply neg_sound; trivial.
+ rewrite modarith_idem.
+ apply mul_sound;
+ rewrite modarith_idem; trivial.
+- destruct (eval_condition0 _ _ _) as [b|] eqn:EC.
+ erewrite needs_of_condition0_sound by eauto.
+ apply select_sound; auto.
+ cbn; auto with na.
+ (* select imm *)
+- destruct (eval_condition0 _ _ _) as [b|] eqn:EC.
+ { erewrite needs_of_condition0_sound by eauto.
+ apply select_sound; auto with na. }
+ cbn; auto with na.
+ (* select long imm *)
+- destruct (eval_condition0 _ _ _) as [b|] eqn:EC.
+ { erewrite needs_of_condition0_sound by eauto.
+ apply select_sound; auto with na. }
+ cbn; auto with na.
+Qed.
+
+Lemma operation_is_redundant_sound:
+ forall op nv arg1 args v arg1' args',
+ operation_is_redundant op nv = true ->
+ eval_operation ge (Vptr sp Ptrofs.zero) op (arg1 :: args) m1 = Some v ->
+ vagree_list (arg1 :: args) (arg1' :: args') (needs_of_operation op nv) ->
+ vagree v arg1' nv.
+Proof.
+ intros. destruct op; cbn in *; try discriminate; inv H1; FuncInv; subst.
+- apply sign_ext_redundant_sound; auto. lia.
+- apply sign_ext_redundant_sound; auto. lia.
+- apply andimm_redundant_sound; auto.
+- apply orimm_redundant_sound; auto.
+Qed.
+
+End SOUNDNESS.
diff --git a/kvx/Op.v b/kvx/Op.v
new file mode 100644
index 00000000..4458adb3
--- /dev/null
+++ b/kvx/Op.v
@@ -0,0 +1,2014 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Operators and addressing modes. The abstract syntax and dynamic
+ semantics for the CminorSel, RTL, LTL and Mach languages depend on the
+ following types, defined in this library:
+- [condition]: boolean conditions for conditional branches;
+- [operation]: arithmetic and logical operations;
+- [addressing]: addressing modes for load and store operations.
+
+ These types are processor-specific and correspond roughly to what the
+ processor can compute in one instruction. In other terms, these
+ types reflect the state of the program after instruction selection.
+ For a processor-independent set of operations, see the abstract
+ syntax and dynamic semantics of the Cminor language.
+*)
+
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values ExtValues Memory Globalenvs Events.
+
+Set Implicit Arguments.
+
+(** Conditions (boolean-valued operators). *)
+
+Inductive condition : Type :=
+ | Ccomp (c: comparison) (**r signed integer comparison *)
+ | Ccompu (c: comparison) (**r unsigned integer comparison *)
+ | Ccompimm (c: comparison) (n: int) (**r signed integer comparison with a constant *)
+ | Ccompuimm (c: comparison) (n: int) (**r unsigned integer comparison with a constant *)
+ | Ccompl (c: comparison) (**r signed 64-bit integer comparison *)
+ | Ccomplu (c: comparison) (**r unsigned 64-bit integer comparison *)
+ | Ccomplimm (c: comparison) (n: int64) (**r signed 64-bit integer comparison with a constant *)
+ | Ccompluimm (c: comparison) (n: int64) (**r unsigned 64-bit integer comparison with a constant *)
+ | Ccompf (c: comparison) (**r 64-bit floating-point comparison *)
+ | Cnotcompf (c: comparison) (**r negation of a floating-point comparison *)
+ | Ccompfs (c: comparison) (**r 32-bit floating-point comparison *)
+ | Cnotcompfs (c: comparison). (**r negation of a floating-point comparison *)
+
+Inductive condition0 : Type :=
+ | Ccomp0 (c: comparison) (**r signed integer comparison with 0 *)
+ | Ccompu0 (c: comparison) (**r unsigned integer comparison with 0 *)
+ | Ccompl0 (c: comparison) (**r signed 64-bit integer comparison with 0 *)
+ | Ccomplu0 (c: comparison). (**r unsigned 64-bit integer comparison with 0 *)
+
+Definition arg_type_of_condition0 (cond: condition0) :=
+ match cond with
+ | Ccomp0 _ | Ccompu0 _ => Tint
+ | Ccompl0 _ | Ccomplu0 _ => Tlong
+ end.
+
+(** Arithmetic and logical operations. In the descriptions, [rd] is the
+ result of the operation and [r1], [r2], etc, are the arguments. *)
+
+Inductive operation : Type :=
+ | Omove (**r [rd = r1] *)
+ | Ointconst (n: int) (**r [rd] is set to the given integer constant *)
+ | Olongconst (n: int64) (**r [rd] is set to the given integer constant *)
+ | Ofloatconst (n: float) (**r [rd] is set to the given float constant *)
+ | Osingleconst (n: float32)(**r [rd] is set to the given float constant *)
+ | Oaddrsymbol (id: ident) (ofs: ptrofs) (**r [rd] is set to the address of the symbol plus the given offset *)
+ | Oaddrstack (ofs: ptrofs) (**r [rd] is set to the stack pointer plus the given offset *)
+(*c 32-bit integer arithmetic: *)
+ | Ocast8signed (**r [rd] is 8-bit sign extension of [r1] *)
+ | Ocast16signed (**r [rd] is 16-bit sign extension of [r1] *)
+ | Oadd (**r [rd = r1 + r2] *)
+ | Oaddimm (n: int) (**r [rd = r1 + n] *)
+ | Oaddx (shift: shift1_4) (**r [rd = r1 << shift + r2] *)
+ | Oaddximm (shift: shift1_4) (n: int) (**r [rd = r1 << shift + n] *)
+ | Oneg (**r [rd = - r1] *)
+ | Osub (**r [rd = r1 - r2] *)
+ | Orevsubimm (n: int) (**r [rd = n - r1] *)
+ | Orevsubx (shift: shift1_4) (**r [rd = r2 -r1 << shift] *)
+ | Orevsubximm (shift: shift1_4) (n: int) (**r [rd = n -r1 << shift] *)
+ | Omul (**r [rd = r1 * r2] *)
+ | Omulimm (n: int) (**r [rd = r1 * n] *)
+ | Omulhs (**r [rd = high part of r1 * r2, signed] *)
+ | Omulhu (**r [rd = high part of r1 * r2, unsigned] *)
+ | Odiv (**r [rd = r1 / r2] (signed) *)
+ | Odivu (**r [rd = r1 / r2] (unsigned) *)
+ | Omod (**r [rd = r1 % r2] (signed) *)
+ | Omodu (**r [rd = r1 % r2] (unsigned) *)
+ | Oand (**r [rd = r1 & r2] *)
+ | Oandimm (n: int) (**r [rd = r1 & n] *)
+ | Onand (**r [rd = ~(r1 & r2)] *)
+ | Onandimm (n: int) (**r [rd = ~(r1 & n)] *)
+ | Oor (**r [rd = r1 | r2] *)
+ | Oorimm (n: int) (**r [rd = r1 | n] *)
+ | Onor (**r [rd = ~(r1 | r2)] *)
+ | Onorimm (n: int) (**r [rd = ~(r1 | n)] *)
+ | Oxor (**r [rd = r1 ^ r2] *)
+ | Oxorimm (n: int) (**r [rd = r1 ^ n] *)
+ | Onxor (**r [rd = ~(r1 ^ r2)] *)
+ | Onxorimm (n: int) (**r [rd = ~(r1 ^ n)] *)
+ | Onot (**r [rd = ~r1] *)
+ | Oandn (**r [rd = (~r1) & r2] *)
+ | Oandnimm (n: int) (**r [rd = (~r1) & n] *)
+ | Oorn (**r [rd = (~r1) | r2] *)
+ | Oornimm (n: int) (**r [rd = (~r1) | n] *)
+ | Oshl (**r [rd = r1 << r2] *)
+ | Oshlimm (n: int) (**r [rd = r1 << n] *)
+ | Oshr (**r [rd = r1 >>s r2] (signed) *)
+ | Oshrimm (n: int) (**r [rd = r1 >>s n] (signed) *)
+ | Oshru (**r [rd = r1 >>u r2] (unsigned) *)
+ | Oshruimm (n: int) (**r [rd = r1 >>x n] (unsigned) *)
+ | Oshrximm (n: int) (**r [rd = r1 / 2^n] (signed) *)
+ | Ororimm (n: int) (**r rotate right immediate *)
+ | Omadd (**r [rd = rd + r1 * r2] *)
+ | Omaddimm (n: int) (**r [rd = rd + r1 * imm] *)
+ | Omsub (**r [rd = rd - r1 * r2] *)
+(*c 64-bit integer arithmetic: *)
+ | Omakelong (**r [rd = r1 << 32 | r2] *)
+ | Olowlong (**r [rd = low-word(r1)] *)
+ | Ohighlong (**r [rd = high-word(r1)] *)
+ | Ocast32signed (**r [rd] is 32-bit sign extension of [r1] *)
+ | Ocast32unsigned (**r [rd] is 32-bit zero extension of [r1] *)
+ | Oaddl (**r [rd = r1 + r2] *)
+ | Oaddlimm (n: int64) (**r [rd = r1 + n] *)
+ | Oaddxl (shift: shift1_4) (**r [rd = r1 << shift + r2] *)
+ | Oaddxlimm (shift: shift1_4) (n: int64) (**r [rd = r1 << shift + n] *)
+ | Orevsublimm (n: int64) (**r [rd = n - r1] *)
+ | Orevsubxl (shift: shift1_4) (**r [rd = r2 -r1 << shift] *)
+ | Orevsubxlimm (shift: shift1_4) (n: int64) (**r [rd = n -r1 << shift] *)
+ | Onegl (**r [rd = - r1] *)
+ | Osubl (**r [rd = r1 - r2] *)
+ | Omull (**r [rd = r1 * r2] *)
+ | Omullimm (n: int64) (**r [rd = r1 * n] *)
+ | Omullhs (**r [rd = high part of r1 * r2, signed] *)
+ | Omullhu (**r [rd = high part of r1 * r2, unsigned] *)
+ | Odivl (**r [rd = r1 / r2] (signed) *)
+ | Odivlu (**r [rd = r1 / r2] (unsigned) *)
+ | Omodl (**r [rd = r1 % r2] (signed) *)
+ | Omodlu (**r [rd = r1 % r2] (unsigned) *)
+ | Oandl (**r [rd = r1 & r2] *)
+ | Oandlimm (n: int64) (**r [rd = r1 & n] *)
+ | Onandl (**r [rd = ~(r1 & r2)] *)
+ | Onandlimm (n: int64) (**r [rd = ~(r1 & n)] *)
+ | Oorl (**r [rd = r1 | r2] *)
+ | Oorlimm (n: int64) (**r [rd = r1 | n] *)
+ | Onorl (**r [rd = ~(r1 | r2)] *)
+ | Onorlimm (n: int64) (**r [rd = ~(r1 | n)] *)
+ | Oxorl (**r [rd = r1 ^ r2] *)
+ | Oxorlimm (n: int64) (**r [rd = r1 ^ n] *)
+ | Onxorl (**r [rd = ~(r1 ^ r2)] *)
+ | Onxorlimm (n: int64) (**r [rd = ~(r1 ^ n)] *)
+ | Onotl (**r [rd = ~r1] *)
+ | Oandnl (**r [rd = (~r1) & r2] *)
+ | Oandnlimm (n: int64) (**r [rd = (~r1) & n] *)
+ | Oornl (**r [rd = (~r1) | r2] *)
+ | Oornlimm (n: int64) (**r [rd = (~r1) | n] *)
+ | Oshll (**r [rd = r1 << r2] *)
+ | Oshllimm (n: int) (**r [rd = r1 << n] *)
+ | Oshrl (**r [rd = r1 >> r2] (signed) *)
+ | Oshrlimm (n: int) (**r [rd = r1 >> n] (signed) *)
+ | Oshrlu (**r [rd = r1 >> r2] (unsigned) *)
+ | Oshrluimm (n: int) (**r [rd = r1 >> n] (unsigned) *)
+ | Oshrxlimm (n: int) (**r [rd = r1 / 2^n] (signed) *)
+ | Omaddl (**r [rd = rd + r1 * r2] *)
+ | Omaddlimm (n: int64) (**r [rd = rd + r1 * imm] *)
+ | Omsubl (**r [rd = rd - r1 * r2] *)
+(*c Floating-point arithmetic: *)
+ | Onegf (**r [rd = - r1] *)
+ | Oabsf (**r [rd = abs(r1)] *)
+ | Oaddf (**r [rd = r1 + r2] *)
+ | Osubf (**r [rd = r1 - r2] *)
+ | Omulf (**r [rd = r1 * r2] *)
+ | Odivf (**r [rd = r1 / r2] *)
+ | Ominf
+ | Omaxf
+ | Ofmaddf
+ | Ofmsubf
+ | Onegfs (**r [rd = - r1] *)
+ | Oabsfs (**r [rd = abs(r1)] *)
+ | Oaddfs (**r [rd = r1 + r2] *)
+ | Osubfs (**r [rd = r1 - r2] *)
+ | Omulfs (**r [rd = r1 * r2] *)
+ | Odivfs (**r [rd = r1 / r2] *)
+ | Ominfs
+ | Omaxfs
+ | Oinvfs
+ | Ofmaddfs
+ | Ofmsubfs
+ | Osingleoffloat (**r [rd] is [r1] truncated to single-precision float *)
+ | Ofloatofsingle (**r [rd] is [r1] extended to double-precision float *)
+(*c Conversions between int and float: *)
+ | Ointoffloat (**r [rd = signed_int_of_float64(r1)] *)
+ | Ointuoffloat (**r [rd = unsigned_int_of_float64(r1)] *)
+ | Ointofsingle (**r [rd = signed_int_of_float32(r1)] *)
+ | Ointuofsingle (**r [rd = unsigned_int_of_float32(r1)] *)
+ | Osingleofint (**r [rd = float32_of_signed_int(r1)] *)
+ | Osingleofintu (**r [rd = float32_of_unsigned_int(r1)] *)
+ | Olongoffloat (**r [rd = signed_long_of_float64(r1)] *)
+ | Olonguoffloat (**r [rd = unsigned_long_of_float64(r1)] *)
+ | Ofloatoflong (**r [rd = float64_of_signed_long(r1)] *)
+ | Ofloatoflongu (**r [rd = float64_of_unsigned_long(r1)] *)
+ | Olongofsingle (**r [rd = signed_long_of_float32(r1)] *)
+ | Olonguofsingle (**r [rd = unsigned_long_of_float32(r1)] *)
+ | Osingleoflong (**r [rd = float32_of_signed_long(r1)] *)
+ | Osingleoflongu (**r [rd = float32_of_unsigned_int(r1)] *)
+(*c Boolean tests: *)
+ | Ocmp (cond: condition) (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
+ | Oextfz (stop : Z) (start : Z)
+ | Oextfs (stop : Z) (start : Z)
+ | Oextfzl (stop : Z) (start : Z)
+ | Oextfsl (stop : Z) (start : Z)
+ | Oinsf (stop : Z) (start : Z)
+ | Oinsfl (stop : Z) (start : Z)
+ | Osel (c0 : condition0) (ty : typ)
+ | Oselimm (c0 : condition0) (imm: int)
+ | Osellimm (c0 : condition0) (imm: int64).
+
+(** Addressing modes. [r1], [r2], etc, are the arguments to the
+ addressing. *)
+
+Inductive addressing: Type :=
+ | Aindexed2XS (scale : Z) : addressing (**r Address is [r1 + r2 << scale] *)
+ | Aindexed2 : addressing (**r Address is [r1 + r2] *)
+ | Aindexed: ptrofs -> addressing (**r Address is [r1 + offset] *)
+ | Aglobal: ident -> ptrofs -> addressing (**r Address is global plus offset *)
+ | Ainstack: ptrofs -> addressing. (**r Address is [stack_pointer + offset] *)
+
+(** Comparison functions (used in modules [CSE] and [Allocation]). *)
+
+Definition eq_condition (x y: condition) : {x=y} + {x<>y}.
+Proof.
+ generalize Int.eq_dec Int64.eq_dec; intro.
+ assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality.
+ decide equality.
+Defined.
+
+Definition eq_condition0 (x y: condition0) : {x=y} + {x<>y}.
+Proof.
+ generalize Int.eq_dec Int64.eq_dec; intro.
+ assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality.
+ decide equality.
+Defined.
+
+Definition eq_addressing (x y: addressing) : {x=y} + {x<>y}.
+Proof.
+ generalize ident_eq Ptrofs.eq_dec Z.eq_dec; intros.
+ decide equality.
+Defined.
+
+Definition eq_shift1_4 (x y : shift1_4): {x=y} + {x<>y}.
+Proof.
+ decide equality.
+Defined.
+
+Definition eq_operation: forall (x y: operation), {x=y} + {x<>y}.
+Proof.
+ generalize typ_eq Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec ident_eq eq_condition eq_condition0 Z.eq_dec eq_shift1_4; intros.
+ decide equality.
+Defined.
+
+(* Alternate definition:
+Definition beq_operation: forall (x y: operation), bool.
+Proof.
+ generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec ident_eq eq_condition; boolean_equality.
+Defined.
+
+Definition eq_operation: forall (x y: operation), {x=y} + {x<>y}.
+Proof.
+ decidable_equality_from beq_operation.
+Defined.
+*)
+
+Global Opaque eq_condition eq_addressing eq_operation.
+
+(** * Evaluation functions *)
+
+(** Evaluation of conditions, operators and addressing modes applied
+ to lists of values. Return [None] when the computation can trigger an
+ error, e.g. integer division by zero. [eval_condition] returns a boolean,
+ [eval_operation] and [eval_addressing] return a value. *)
+
+Definition eval_condition (cond: condition) (vl: list val) (m: mem): option bool :=
+ match cond, vl with
+ | Ccomp c, v1 :: v2 :: nil => Val.cmp_bool c v1 v2
+ | Ccompu c, v1 :: v2 :: nil => Val.cmpu_bool (Mem.valid_pointer m) c v1 v2
+ | Ccompimm c n, v1 :: nil => Val.cmp_bool c v1 (Vint n)
+ | Ccompuimm c n, v1 :: nil => Val.cmpu_bool (Mem.valid_pointer m) c v1 (Vint n)
+ | Ccompl c, v1 :: v2 :: nil => Val.cmpl_bool c v1 v2
+ | Ccomplu c, v1 :: v2 :: nil => Val.cmplu_bool (Mem.valid_pointer m) c v1 v2
+ | Ccomplimm c n, v1 :: nil => Val.cmpl_bool c v1 (Vlong n)
+ | Ccompluimm c n, v1 :: nil => Val.cmplu_bool (Mem.valid_pointer m) c v1 (Vlong n)
+ | Ccompf c, v1 :: v2 :: nil => Val.cmpf_bool c v1 v2
+ | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2)
+ | Ccompfs c, v1 :: v2 :: nil => Val.cmpfs_bool c v1 v2
+ | Cnotcompfs c, v1 :: v2 :: nil => option_map negb (Val.cmpfs_bool c v1 v2)
+ | _, _ => None
+ end.
+
+Definition eval_condition0 (cond: condition0) (v1: val) (m: mem): option bool :=
+ match cond with
+ | Ccomp0 c => Val.cmp_bool c v1 (Vint Int.zero)
+ | Ccompu0 c => Val.cmpu_bool (Mem.valid_pointer m) c v1 (Vint Int.zero)
+ | Ccompl0 c => Val.cmpl_bool c v1 (Vlong Int64.zero)
+ | Ccomplu0 c => Val.cmplu_bool (Mem.valid_pointer m) c v1 (Vlong Int64.zero)
+ end.
+
+Definition negate_condition0 (cond0 : condition0) : condition0 :=
+ match cond0 with
+ | Ccomp0 c => Ccomp0 (negate_comparison c)
+ | Ccompu0 c => Ccompu0 (negate_comparison c)
+ | Ccompl0 c => Ccompl0 (negate_comparison c)
+ | Ccomplu0 c => Ccomplu0 (negate_comparison c)
+ end.
+
+Definition eval_operation
+ (F V: Type) (genv: Genv.t F V) (sp: val)
+ (op: operation) (vl: list val) (m: mem): option val :=
+ match op, vl with
+ | Omove, v1::nil => Some v1
+ | Ointconst n, nil => Some (Vint n)
+ | Olongconst n, nil => Some (Vlong n)
+ | Ofloatconst n, nil => Some (Vfloat n)
+ | Osingleconst n, nil => Some (Vsingle n)
+ | Oaddrsymbol s ofs, nil => Some (Genv.symbol_address genv s ofs)
+ | Oaddrstack ofs, nil => Some (Val.offset_ptr sp ofs)
+ | Ocast8signed, v1 :: nil => Some (Val.sign_ext 8 v1)
+ | Ocast16signed, v1 :: nil => Some (Val.sign_ext 16 v1)
+ | Oadd, v1 :: v2 :: nil => Some (Val.add v1 v2)
+ | Oaddimm n, v1 :: nil => Some (Val.add v1 (Vint n))
+ | Oaddx s14, v1 :: v2 :: nil => Some (addx (int_of_shift1_4 s14) v1 v2)
+ | Oaddximm s14 n, v1 :: nil => Some (addx (int_of_shift1_4 s14) v1 (Vint n))
+ | Oneg, v1 :: nil => Some (Val.neg v1)
+ | Osub, v1 :: v2 :: nil => Some (Val.sub v1 v2)
+ | Orevsubimm n, v1 :: nil => Some (Val.sub (Vint n) v1)
+ | Orevsubx shift, v1 :: v2 :: nil => Some (ExtValues.revsubx (int_of_shift1_4 shift) v1 v2)
+ | Orevsubximm shift n, v1 :: nil => Some (ExtValues.revsubx (int_of_shift1_4 shift) v1 (Vint n))
+ | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
+ | Omulimm n, v1 :: nil => Some (Val.mul v1 (Vint n))
+ | Omulhs, v1::v2::nil => Some (Val.mulhs v1 v2)
+ | Omulhu, v1::v2::nil => Some (Val.mulhu v1 v2)
+ | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
+ | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+ | Omod, v1 :: v2 :: nil => Val.mods v1 v2
+ | Omodu, v1 :: v2 :: nil => Val.modu v1 v2
+ | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
+ | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
+ | Onand, v1 :: v2 :: nil => Some (Val.notint (Val.and v1 v2))
+ | Onandimm n, v1 :: nil => Some (Val.notint (Val.and v1 (Vint n)))
+ | Oor, v1 :: v2 :: nil => Some (Val.or v1 v2)
+ | Oorimm n, v1 :: nil => Some (Val.or v1 (Vint n))
+ | Onor, v1 :: v2 :: nil => Some (Val.notint (Val.or v1 v2))
+ | Onorimm n, v1 :: nil => Some (Val.notint (Val.or v1 (Vint n)))
+ | Oxor, v1 :: v2 :: nil => Some (Val.xor v1 v2)
+ | Oxorimm n, v1 :: nil => Some (Val.xor v1 (Vint n))
+ | Onxor, v1 :: v2 :: nil => Some (Val.notint (Val.xor v1 v2))
+ | Onxorimm n, v1 :: nil => Some (Val.notint (Val.xor v1 (Vint n)))
+ | Onot, v1 :: nil => Some (Val.notint v1)
+ | Oandn, v1 :: v2 :: nil => Some (Val.and (Val.notint v1) v2)
+ | Oandnimm n, v1 :: nil => Some (Val.and (Val.notint v1) (Vint n))
+ | Oorn, v1 :: v2 :: nil => Some (Val.or (Val.notint v1) v2)
+ | Oornimm n, v1 :: nil => Some (Val.or (Val.notint v1) (Vint n))
+ | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
+ | Oshlimm n, v1 :: nil => Some (Val.shl v1 (Vint n))
+ | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
+ | Oshrimm n, v1 :: nil => Some (Val.shr v1 (Vint n))
+ | Ororimm n, v1 :: nil => Some (Val.ror v1 (Vint n))
+ | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
+ | Oshruimm n, v1 :: nil => Some (Val.shru v1 (Vint n))
+ | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
+ | Omadd, v1::v2::v3::nil => Some (Val.add v1 (Val.mul v2 v3))
+ | (Omaddimm n), v1::v2::nil => Some (Val.add v1 (Val.mul v2 (Vint n)))
+ | Omsub, v1::v2::v3::nil => Some (Val.sub v1 (Val.mul v2 v3))
+
+ | Omakelong, v1::v2::nil => Some (Val.longofwords v1 v2)
+ | Olowlong, v1::nil => Some (Val.loword v1)
+ | Ohighlong, v1::nil => Some (Val.hiword v1)
+ | Ocast32signed, v1 :: nil => Some (Val.longofint v1)
+ | Ocast32unsigned, v1 :: nil => Some (Val.longofintu v1)
+ | Oaddl, v1 :: v2 :: nil => Some (Val.addl v1 v2)
+ | Oaddlimm n, v1::nil => Some (Val.addl v1 (Vlong n))
+ | Oaddxl s14, v1 :: v2 :: nil => Some (addxl (int_of_shift1_4 s14) v1 v2)
+ | Oaddxlimm s14 n, v1 :: nil => Some (addxl (int_of_shift1_4 s14) v1 (Vlong n))
+ | Onegl, v1::nil => Some (Val.negl v1)
+ | Osubl, v1::v2::nil => Some (Val.subl v1 v2)
+ | Orevsublimm n, v1 :: nil => Some (Val.subl (Vlong n) v1)
+ | Orevsubxl shift, v1 :: v2 :: nil => Some (ExtValues.revsubxl (int_of_shift1_4 shift) v1 v2)
+ | Orevsubxlimm shift n, v1 :: nil => Some (ExtValues.revsubxl (int_of_shift1_4 shift) v1 (Vlong n))
+ | Omull, v1::v2::nil => Some (Val.mull v1 v2)
+ | Omullimm n, v1::nil => Some (Val.mull v1 (Vlong n))
+ | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
+ | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
+ | Odivl, v1::v2::nil => Val.divls v1 v2
+ | Odivlu, v1::v2::nil => Val.divlu v1 v2
+ | Omodl, v1::v2::nil => Val.modls v1 v2
+ | Omodlu, v1::v2::nil => Val.modlu v1 v2
+ | Oandl, v1::v2::nil => Some(Val.andl v1 v2)
+ | Oandlimm n, v1::nil => Some (Val.andl v1 (Vlong n))
+ | Onandl, v1::v2::nil => Some(Val.notl (Val.andl v1 v2))
+ | Onandlimm n, v1::nil => Some(Val.notl (Val.andl v1 (Vlong n)))
+ | Oorl, v1::v2::nil => Some(Val.orl v1 v2)
+ | Oorlimm n, v1::nil => Some (Val.orl v1 (Vlong n))
+ | Onorl, v1::v2::nil => Some(Val.notl (Val.orl v1 v2))
+ | Onorlimm n, v1::nil => Some(Val.notl (Val.orl v1 (Vlong n)))
+ | Oxorl, v1::v2::nil => Some(Val.xorl v1 v2)
+ | Oxorlimm n, v1::nil => Some (Val.xorl v1 (Vlong n))
+ | Onxorl, v1::v2::nil => Some(Val.notl (Val.xorl v1 v2))
+ | Onxorlimm n, v1::nil => Some(Val.notl (Val.xorl v1 (Vlong n)))
+ | Onotl, v1 :: nil => Some (Val.notl v1)
+ | Oandnl, v1 :: v2 :: nil => Some (Val.andl (Val.notl v1) v2)
+ | Oandnlimm n, v1 :: nil => Some (Val.andl (Val.notl v1) (Vlong n))
+ | Oornl, v1 :: v2 :: nil => Some (Val.orl (Val.notl v1) v2)
+ | Oornlimm n, v1 :: nil => Some (Val.orl (Val.notl v1) (Vlong n))
+ | Oshll, v1::v2::nil => Some (Val.shll v1 v2)
+ | Oshllimm n, v1::nil => Some (Val.shll v1 (Vint n))
+ | Oshrl, v1::v2::nil => Some (Val.shrl v1 v2)
+ | Oshrlimm n, v1::nil => Some (Val.shrl v1 (Vint n))
+ | Oshrlu, v1::v2::nil => Some (Val.shrlu v1 v2)
+ | Oshrluimm n, v1::nil => Some (Val.shrlu v1 (Vint n))
+ | Oshrxlimm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
+ | Omaddl, v1::v2::v3::nil => Some (Val.addl v1 (Val.mull v2 v3))
+ | (Omaddlimm n), v1::v2::nil => Some (Val.addl v1 (Val.mull v2 (Vlong n)))
+ | Omsubl, v1::v2::v3::nil => Some (Val.subl v1 (Val.mull v2 v3))
+
+ | Onegf, v1::nil => Some (Val.negf v1)
+ | Oabsf, v1::nil => Some (Val.absf v1)
+ | Oaddf, v1::v2::nil => Some (Val.addf v1 v2)
+ | Osubf, v1::v2::nil => Some (Val.subf v1 v2)
+ | Omulf, v1::v2::nil => Some (Val.mulf v1 v2)
+ | Odivf, v1::v2::nil => Some (Val.divf v1 v2)
+ | Ominf, v1::v2::nil => Some (ExtValues.minf v1 v2)
+ | Omaxf, v1::v2::nil => Some (ExtValues.maxf v1 v2)
+ | Ofmaddf, v1::v2::v3::nil => Some (ExtValues.fmaddf v1 v2 v3)
+ | Ofmsubf, v1::v2::v3::nil => Some (ExtValues.fmsubf v1 v2 v3)
+
+ | Onegfs, v1::nil => Some (Val.negfs v1)
+ | Oabsfs, v1::nil => Some (Val.absfs v1)
+ | Oaddfs, v1::v2::nil => Some (Val.addfs v1 v2)
+ | Osubfs, v1::v2::nil => Some (Val.subfs v1 v2)
+ | Omulfs, v1::v2::nil => Some (Val.mulfs v1 v2)
+ | Odivfs, v1::v2::nil => Some (Val.divfs v1 v2)
+ | Ominfs, v1::v2::nil => Some (ExtValues.minfs v1 v2)
+ | Omaxfs, v1::v2::nil => Some (ExtValues.maxfs v1 v2)
+ | Oinvfs, v1::nil => Some (ExtValues.invfs v1)
+ | Ofmaddfs, v1::v2::v3::nil => Some (ExtValues.fmaddfs v1 v2 v3)
+ | Ofmsubfs, v1::v2::v3::nil => Some (ExtValues.fmsubfs v1 v2 v3)
+
+ | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
+ | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
+ | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+ | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+ | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+ | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+ | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+ | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+ | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+ | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+ | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+ | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+ | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+ | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+ | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+ | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
+ | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
+ | (Oextfz stop start), v0::nil => Some (extfz stop start v0)
+ | (Oextfs stop start), v0::nil => Some (extfs stop start v0)
+ | (Oextfzl stop start), v0::nil => Some (extfzl stop start v0)
+ | (Oextfsl stop start), v0::nil => Some (extfsl stop start v0)
+ | (Oinsf stop start), v0::v1::nil => Some (insf stop start v0 v1)
+ | (Oinsfl stop start), v0::v1::nil => Some (insfl stop start v0 v1)
+ | Osel c ty, v1::v2::vc::nil => Some(Val.select (eval_condition0 c vc m) v1 v2 ty)
+ | Oselimm c imm, v1::vc::nil => Some(Val.select (eval_condition0 c vc m) v1 (Vint imm) Tint)
+ | Osellimm c imm, v1::vc::nil => Some(Val.select (eval_condition0 c vc m) v1 (Vlong imm) Tlong)
+ | _, _ => None
+ end.
+
+Definition eval_addressing
+ (F V: Type) (genv: Genv.t F V) (sp: val)
+ (addr: addressing) (vl: list val) : option val :=
+ match addr, vl with
+ | Aindexed2XS scale, v1 :: v2 :: nil => Some (Val.addl v1 (Val.shll v2 (Vint (Int.repr scale))))
+ | Aindexed2, v1 :: v2 :: nil => Some (Val.addl v1 v2)
+ | Aindexed n, v1 :: nil => Some (Val.offset_ptr v1 n)
+ | Aglobal s ofs, nil => Some (Genv.symbol_address genv s ofs)
+ | Ainstack n, nil => Some (Val.offset_ptr sp n)
+ | _, _ => None
+ end.
+
+Remark eval_addressing_Ainstack:
+ forall (F V: Type) (genv: Genv.t F V) sp ofs,
+ eval_addressing genv sp (Ainstack ofs) nil = Some (Val.offset_ptr sp ofs).
+Proof.
+ intros. reflexivity.
+Qed.
+
+Remark eval_addressing_Ainstack_inv:
+ forall (F V: Type) (genv: Genv.t F V) sp ofs vl v,
+ eval_addressing genv sp (Ainstack ofs) vl = Some v -> vl = nil /\ v = Val.offset_ptr sp ofs.
+Proof.
+ unfold eval_addressing; intros; destruct vl; inv H; auto.
+Qed.
+
+Ltac FuncInv :=
+ match goal with
+ | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ =>
+ destruct x; cbn in H; FuncInv
+ | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
+ destruct v; cbn in H; FuncInv
+ | H: (if Archi.ptr64 then _ else _) = Some _ |- _ =>
+ destruct Archi.ptr64 eqn:?; FuncInv
+ | H: (Some _ = Some _) |- _ =>
+ injection H; intros; clear H; FuncInv
+ | H: (None = Some _) |- _ =>
+ discriminate H
+ | _ =>
+ idtac
+ end.
+
+(** * Static typing of conditions, operators and addressing modes. *)
+
+Definition type_of_condition (c: condition) : list typ :=
+ match c with
+ | Ccomp _ => Tint :: Tint :: nil
+ | Ccompu _ => Tint :: Tint :: nil
+ | Ccompimm _ _ => Tint :: nil
+ | Ccompuimm _ _ => Tint :: nil
+ | Ccompl _ => Tlong :: Tlong :: nil
+ | Ccomplu _ => Tlong :: Tlong :: nil
+ | Ccomplimm _ _ => Tlong :: nil
+ | Ccompluimm _ _ => Tlong :: nil
+ | Ccompf _ => Tfloat :: Tfloat :: nil
+ | Cnotcompf _ => Tfloat :: Tfloat :: nil
+ | Ccompfs _ => Tsingle :: Tsingle :: nil
+ | Cnotcompfs _ => Tsingle :: Tsingle :: nil
+ end.
+
+Definition type_of_operation (op: operation) : list typ * typ :=
+ match op with
+ | Omove => (nil, Tint) (* treated specially *)
+ | Ointconst _ => (nil, Tint)
+ | Olongconst _ => (nil, Tlong)
+ | Ofloatconst f => (nil, Tfloat)
+ | Osingleconst f => (nil, Tsingle)
+ | Oaddrsymbol _ _ => (nil, Tptr)
+ | Oaddrstack _ => (nil, Tptr)
+ | Ocast8signed => (Tint :: nil, Tint)
+ | Ocast16signed => (Tint :: nil, Tint)
+ | Oadd => (Tint :: Tint :: nil, Tint)
+ | Oaddimm _ => (Tint :: nil, Tint)
+ | Oaddx _ => (Tint :: Tint :: nil, Tint)
+ | Oaddximm _ _ => (Tint :: nil, Tint)
+ | Oneg => (Tint :: nil, Tint)
+ | Osub => (Tint :: Tint :: nil, Tint)
+ | Orevsubimm _ => (Tint :: nil, Tint)
+ | Orevsubx _ => (Tint :: Tint :: nil, Tint)
+ | Orevsubximm _ _ => (Tint :: nil, Tint)
+ | Omul => (Tint :: Tint :: nil, Tint)
+ | Omulimm _ => (Tint :: nil, Tint)
+ | Omulhs => (Tint :: Tint :: nil, Tint)
+ | Omulhu => (Tint :: Tint :: nil, Tint)
+ | Odiv => (Tint :: Tint :: nil, Tint)
+ | Odivu => (Tint :: Tint :: nil, Tint)
+ | Omod => (Tint :: Tint :: nil, Tint)
+ | Omodu => (Tint :: Tint :: nil, Tint)
+ | Oand => (Tint :: Tint :: nil, Tint)
+ | Oandimm _ => (Tint :: nil, Tint)
+ | Onand => (Tint :: Tint :: nil, Tint)
+ | Onandimm _ => (Tint :: nil, Tint)
+ | Oor => (Tint :: Tint :: nil, Tint)
+ | Oorimm _ => (Tint :: nil, Tint)
+ | Onor => (Tint :: Tint :: nil, Tint)
+ | Onorimm _ => (Tint :: nil, Tint)
+ | Oxor => (Tint :: Tint :: nil, Tint)
+ | Oxorimm _ => (Tint :: nil, Tint)
+ | Onxor => (Tint :: Tint :: nil, Tint)
+ | Onxorimm _ => (Tint :: nil, Tint)
+ | Onot => (Tint :: nil, Tint)
+ | Oandn => (Tint :: Tint :: nil, Tint)
+ | Oandnimm _ => (Tint :: nil, Tint)
+ | Oorn => (Tint :: Tint :: nil, Tint)
+ | Oornimm _ => (Tint :: nil, Tint)
+ | Oshl => (Tint :: Tint :: nil, Tint)
+ | Oshlimm _ => (Tint :: nil, Tint)
+ | Oshr => (Tint :: Tint :: nil, Tint)
+ | Oshrimm _ => (Tint :: nil, Tint)
+ | Oshru => (Tint :: Tint :: nil, Tint)
+ | Oshruimm _ => (Tint :: nil, Tint)
+ | Oshrximm _ => (Tint :: nil, Tint)
+ | Ororimm _ => (Tint :: nil, Tint)
+ | Omadd => (Tint :: Tint :: Tint :: nil, Tint)
+ | Omaddimm _ => (Tint :: Tint :: nil, Tint)
+ | Omsub => (Tint :: Tint :: Tint :: nil, Tint)
+
+ | Omakelong => (Tint :: Tint :: nil, Tlong)
+ | Olowlong => (Tlong :: nil, Tint)
+ | Ohighlong => (Tlong :: nil, Tint)
+ | Ocast32signed => (Tint :: nil, Tlong)
+ | Ocast32unsigned => (Tint :: nil, Tlong)
+ | Oaddl => (Tlong :: Tlong :: nil, Tlong)
+ | Oaddlimm _ => (Tlong :: nil, Tlong)
+ | Oaddxl _ => (Tlong :: Tlong :: nil, Tlong)
+ | Oaddxlimm _ _ => (Tlong :: nil, Tlong)
+ | Orevsublimm _ => (Tlong :: nil, Tlong)
+ | Orevsubxl _ => (Tlong :: Tlong :: nil, Tlong)
+ | Orevsubxlimm _ _ => (Tlong :: nil, Tlong)
+ | Onegl => (Tlong :: nil, Tlong)
+ | Osubl => (Tlong :: Tlong :: nil, Tlong)
+ | Omull => (Tlong :: Tlong :: nil, Tlong)
+ | Omullimm _ => (Tlong :: nil, Tlong)
+ | Omullhs => (Tlong :: Tlong :: nil, Tlong)
+ | Omullhu => (Tlong :: Tlong :: nil, Tlong)
+ | Odivl => (Tlong :: Tlong :: nil, Tlong)
+ | Odivlu => (Tlong :: Tlong :: nil, Tlong)
+ | Omodl => (Tlong :: Tlong :: nil, Tlong)
+ | Omodlu => (Tlong :: Tlong :: nil, Tlong)
+ | Oandl => (Tlong :: Tlong :: nil, Tlong)
+ | Oandlimm _ => (Tlong :: nil, Tlong)
+ | Onandl => (Tlong :: Tlong :: nil, Tlong)
+ | Onandlimm _ => (Tlong :: nil, Tlong)
+ | Oorl => (Tlong :: Tlong :: nil, Tlong)
+ | Oorlimm _ => (Tlong :: nil, Tlong)
+ | Onorl => (Tlong :: Tlong :: nil, Tlong)
+ | Onorlimm _ => (Tlong :: nil, Tlong)
+ | Oxorl => (Tlong :: Tlong :: nil, Tlong)
+ | Oxorlimm _ => (Tlong :: nil, Tlong)
+ | Onxorl => (Tlong :: Tlong :: nil, Tlong)
+ | Onxorlimm _ => (Tlong :: nil, Tlong)
+ | Onotl => (Tlong :: nil, Tlong)
+ | Oandnl => (Tlong :: Tlong :: nil, Tlong)
+ | Oandnlimm _ => (Tlong :: nil, Tlong)
+ | Oornl => (Tlong :: Tlong :: nil, Tlong)
+ | Oornlimm _ => (Tlong :: nil, Tlong)
+ | Oshll => (Tlong :: Tint :: nil, Tlong)
+ | Oshllimm _ => (Tlong :: nil, Tlong)
+ | Oshrl => (Tlong :: Tint :: nil, Tlong)
+ | Oshrlimm _ => (Tlong :: nil, Tlong)
+ | Oshrlu => (Tlong :: Tint :: nil, Tlong)
+ | Oshrluimm _ => (Tlong :: nil, Tlong)
+ | Oshrxlimm _ => (Tlong :: nil, Tlong)
+ | Omaddl => (Tlong :: Tlong :: Tlong :: nil, Tlong)
+ | Omaddlimm _ => (Tlong :: Tlong :: nil, Tlong)
+ | Omsubl => (Tlong :: Tlong :: Tlong :: nil, Tlong)
+
+ | Onegf => (Tfloat :: nil, Tfloat)
+ | Oabsf => (Tfloat :: nil, Tfloat)
+ | Oaddf
+ | Osubf
+ | Omulf
+ | Odivf
+ | Ominf
+ | Omaxf => (Tfloat :: Tfloat :: nil, Tfloat)
+ | Ofmaddf | Ofmsubf => (Tfloat :: Tfloat :: Tfloat :: nil, Tfloat)
+
+ | Onegfs => (Tsingle :: nil, Tsingle)
+ | Oabsfs => (Tsingle :: nil, Tsingle)
+ | Oaddfs
+ | Osubfs
+ | Omulfs
+ | Odivfs
+ | Ominfs
+ | Omaxfs => (Tsingle :: Tsingle :: nil, Tsingle)
+ | Oinvfs => (Tsingle :: nil, Tsingle)
+ | Ofmaddfs | Ofmsubfs => (Tsingle :: Tsingle :: Tsingle :: nil, Tsingle)
+
+ | Osingleoffloat => (Tfloat :: nil, Tsingle)
+ | Ofloatofsingle => (Tsingle :: nil, Tfloat)
+ | Ointoffloat => (Tfloat :: nil, Tint)
+ | Ointuoffloat => (Tfloat :: nil, Tint)
+ | Ointofsingle => (Tsingle :: nil, Tint)
+ | Ointuofsingle => (Tsingle :: nil, Tint)
+ | Osingleofint => (Tint :: nil, Tsingle)
+ | Osingleofintu => (Tint :: nil, Tsingle)
+ | Olongoffloat => (Tfloat :: nil, Tlong)
+ | Olonguoffloat => (Tfloat :: nil, Tlong)
+ | Ofloatoflong => (Tlong :: nil, Tfloat)
+ | Ofloatoflongu => (Tlong :: nil, Tfloat)
+ | Olongofsingle => (Tsingle :: nil, Tlong)
+ | Olonguofsingle => (Tsingle :: nil, Tlong)
+ | Osingleoflong => (Tlong :: nil, Tsingle)
+ | Osingleoflongu => (Tlong :: nil, Tsingle)
+ | Ocmp c => (type_of_condition c, Tint)
+ | Oextfz _ _ | Oextfs _ _ => (Tint :: nil, Tint)
+ | Oextfzl _ _ | Oextfsl _ _ => (Tlong :: nil, Tlong)
+ | Oinsf _ _ => (Tint :: Tint :: nil, Tint)
+ | Oinsfl _ _ => (Tlong :: Tlong :: nil, Tlong)
+ | Osel c ty => (ty :: ty :: arg_type_of_condition0 c :: nil, ty)
+ | Oselimm c ty => (Tint :: arg_type_of_condition0 c :: nil, Tint)
+ | Osellimm c ty => (Tlong :: arg_type_of_condition0 c :: nil, Tlong)
+ end.
+
+(* FIXME: two Tptr ?! *)
+Definition type_of_addressing (addr: addressing) : list typ :=
+ match addr with
+ | Aindexed2XS _ => Tptr :: Tptr :: nil
+ | Aindexed2 => Tptr :: Tptr :: nil
+ | Aindexed _ => Tptr :: nil
+ | Aglobal _ _ => nil
+ | Ainstack _ => nil
+ end.
+
+(** Weak type soundness results for [eval_operation]:
+ the result values, when defined, are always of the type predicted
+ by [type_of_operation]. *)
+
+Section SOUNDNESS.
+
+Variable A V: Type.
+Variable genv: Genv.t A V.
+
+Remark type_add:
+ forall v1 v2, Val.has_type (Val.add v1 v2) Tint.
+Proof.
+ intros. unfold Val.has_type, Val.add. destruct Archi.ptr64, v1, v2; auto.
+Qed.
+
+Remark type_addl:
+ forall v1 v2, Val.has_type (Val.addl v1 v2) Tlong.
+Proof.
+ intros. unfold Val.has_type, Val.addl. destruct Archi.ptr64, v1, v2; auto.
+Qed.
+
+Remark type_sub:
+ forall v1 v2, Val.has_type (Val.sub v1 v2) Tint.
+Proof.
+ intros. unfold Val.has_type, Val.sub. destruct Archi.ptr64, v1, v2; cbn; auto.
+ destruct (eq_block _ _); auto.
+Qed.
+
+Remark type_subl:
+ forall v1 v2, Val.has_type (Val.subl v1 v2) Tlong.
+Proof.
+ intros. unfold Val.has_type, Val.subl. destruct Archi.ptr64, v1, v2; cbn; auto.
+ destruct (eq_block _ _); auto.
+Qed.
+
+Remark type_shl:
+ forall v1 v2, Val.has_type (Val.shl v1 v2) Tint.
+Proof.
+ destruct v1, v2; cbn; trivial; destruct (Int.ltu _ _); cbn; trivial.
+Qed.
+
+Remark type_shll:
+ forall v1 v2, Val.has_type (Val.shll v1 v2) Tlong.
+Proof.
+ destruct v1, v2; cbn; trivial; destruct (Int.ltu _ _); cbn; trivial.
+Qed.
+
+Lemma type_of_operation_sound:
+ forall op vl sp v m,
+ op <> Omove ->
+ eval_operation genv sp op vl m = Some v ->
+ Val.has_type v (snd (type_of_operation op)).
+Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
+ intros.
+ destruct op; cbn; cbn in H0; FuncInv; subst; cbn.
+ (* move *)
+ - congruence.
+ (* intconst, longconst, floatconst, singleconst *)
+ - exact I.
+ - exact I.
+ - exact I.
+ - exact I.
+ (* addrsymbol *)
+ - unfold Genv.symbol_address. destruct (Genv.find_symbol genv id)...
+ (* addrstack *)
+ - destruct sp...
+ (* castsigned *)
+ - destruct v0...
+ - destruct v0...
+ (* add, addimm *)
+ - apply type_add.
+ - apply type_add.
+ (* addx, addximm *)
+ - apply type_add.
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu _ _); cbn; trivial.
+ (* neg, sub *)
+ - destruct v0...
+ - apply type_sub.
+ (* revsubimm, revsubx, revsubximm *)
+ - destruct v0...
+ - apply type_sub.
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu _ _); cbn; trivial.
+ (* mul, mulimm, mulhs, mulhu *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ - destruct v0; destruct v1...
+ - destruct v0; destruct v1...
+ (* div, divu *)
+ - destruct v0; destruct v1; cbn in *; inv H0.
+ destruct (_ || _); inv H2...
+ - destruct v0; destruct v1; cbn in *; inv H0.
+ destruct (Int.eq i0 Int.zero); inv H2...
+ (* mod, modu *)
+ - destruct v0; destruct v1; cbn in *; inv H0.
+ destruct (_ || _); inv H2...
+ - destruct v0; destruct v1; cbn in *; inv H0.
+ destruct (Int.eq i0 Int.zero); inv H2...
+ (* and, andimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* nand, nandimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* or, orimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* nor, norimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* xor, xorimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* nxor, nxorimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* not *)
+ - destruct v0...
+ (* andn, andnimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* orn, ornimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* shl, shlimm *)
+ - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int.iwordsize)...
+ - destruct v0; cbn... destruct (Int.ltu n Int.iwordsize)...
+ (* shr, shrimm *)
+ - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int.iwordsize)...
+ - destruct v0; cbn... destruct (Int.ltu n Int.iwordsize)...
+ (* shru, shruimm *)
+ - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int.iwordsize)...
+ - destruct v0; cbn... destruct (Int.ltu n Int.iwordsize)...
+ (* shrx *)
+ - destruct v0; cbn... destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
+ (* shrimm *)
+ - destruct v0; cbn...
+ (* madd *)
+ - apply type_add.
+ - apply type_add.
+ (* msub *)
+ - apply type_sub.
+ (* makelong, lowlong, highlong *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ - destruct v0...
+ (* cast32 *)
+ - destruct v0...
+ - destruct v0...
+ (* addl, addlimm *)
+ - apply type_addl.
+ - apply type_addl.
+ (* addxl addxlimm *)
+ - apply type_addl.
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu _ _); cbn; trivial.
+ (* negl, subl *)
+ - destruct v0...
+ - apply type_subl.
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu _ _); cbn; trivial.
+ - destruct v0...
+ - apply type_subl.
+ (* mull, mullhs, mullhu *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ - destruct v0; destruct v1...
+ - destruct v0; destruct v1...
+ (* divl, divlu *)
+ - destruct v0; destruct v1; cbn in *; inv H0.
+ destruct (_ || _); inv H2...
+ - destruct v0; destruct v1; cbn in *; inv H0.
+ destruct (Int64.eq i0 Int64.zero); inv H2...
+ (* modl, modlu *)
+ - destruct v0; destruct v1; cbn in *; inv H0.
+ destruct (_ || _); inv H2...
+ - destruct v0; destruct v1; cbn in *; inv H0.
+ destruct (Int64.eq i0 Int64.zero); inv H2...
+ (* andl, andlimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* nandl, nandlimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* orl, orlimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* norl, norlimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* xorl, xorlimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* nxorl, nxorlimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* notl *)
+ - destruct v0...
+ (* andnl, andnlimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* ornl, ornlimm *)
+ - destruct v0; destruct v1...
+ - destruct v0...
+ (* shll, shllimm *)
+ - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int64.iwordsize')...
+ - destruct v0; cbn... destruct (Int.ltu n Int64.iwordsize')...
+ (* shr, shrimm *)
+ - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int64.iwordsize')...
+ - destruct v0; cbn... destruct (Int.ltu n Int64.iwordsize')...
+ (* shru, shruimm *)
+ - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int64.iwordsize')...
+ - destruct v0; cbn... destruct (Int.ltu n Int64.iwordsize')...
+ (* shrxl *)
+ - destruct v0; cbn... destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
+ (* maddl, maddlim *)
+ - apply type_addl.
+ - apply type_addl.
+ (* msubl *)
+ - apply type_subl.
+ (* negf, absf *)
+ - destruct v0...
+ - destruct v0...
+ (* addf, subf *)
+ - destruct v0; destruct v1...
+ - destruct v0; destruct v1...
+ (* mulf, divf *)
+ - destruct v0; destruct v1...
+ - destruct v0; destruct v1...
+ (* minf, maxf *)
+ - destruct v0; destruct v1...
+ - destruct v0; destruct v1...
+ (* fmaddf, fmsubf *)
+ - destruct v0; destruct v1; destruct v2...
+ - destruct v0; destruct v1; destruct v2...
+ (* negfs, absfs *)
+ - destruct v0...
+ - destruct v0...
+ (* addfs, subfs *)
+ - destruct v0; destruct v1...
+ - destruct v0; destruct v1...
+ (* mulfs, divfs *)
+ - destruct v0; destruct v1...
+ - destruct v0; destruct v1...
+ (* minfs, maxfs *)
+ - destruct v0; destruct v1...
+ - destruct v0; destruct v1...
+ (* invfs *)
+ - destruct v0...
+ (* fmaddfs, fmsubfs *)
+ - destruct v0; destruct v1; destruct v2...
+ - destruct v0; destruct v1; destruct v2...
+ (* singleoffloat, floatofsingle *)
+ - destruct v0...
+ - destruct v0...
+ (* intoffloat, intuoffloat *)
+ - destruct v0; cbn... destruct (Float.to_int f); cbn; trivial.
+ - destruct v0; cbn... destruct (Float.to_intu f); cbn; trivial.
+ (* intofsingle, intuofsingle *)
+ - destruct v0; cbn... destruct (Float32.to_int f); cbn; trivial.
+ - destruct v0; cbn... destruct (Float32.to_intu f); cbn; trivial.
+ (* singleofint, singleofintu *)
+ - destruct v0; cbn...
+ - destruct v0; cbn...
+ (* longoffloat, longuoffloat *)
+ - destruct v0; cbn... destruct (Float.to_long f); cbn; trivial.
+ - destruct v0; cbn... destruct (Float.to_longu f); cbn; trivial.
+ (* floatoflong, floatoflongu *)
+ - destruct v0; cbn...
+ - destruct v0; cbn...
+ (* longofsingle, longuofsingle *)
+ - destruct v0; cbn... destruct (Float32.to_long f); cbn; trivial.
+ - destruct v0; cbn... destruct (Float32.to_longu f); cbn; trivial.
+ (* singleoflong, singleoflongu *)
+ - destruct v0; cbn...
+ - destruct v0; cbn...
+ (* cmp *)
+ - destruct (eval_condition cond vl m)... destruct b...
+ (* extfz *)
+ - unfold extfz.
+ destruct (is_bitfield _ _).
+ + destruct v0; cbn; trivial.
+ + constructor.
+ (* extfs *)
+ - unfold extfs.
+ destruct (is_bitfield _ _).
+ + destruct v0; cbn; trivial.
+ + constructor.
+ (* extfzl *)
+ - unfold extfzl.
+ destruct (is_bitfieldl _ _).
+ + destruct v0; cbn; trivial.
+ + constructor.
+ (* extfsl *)
+ - unfold extfsl.
+ destruct (is_bitfieldl _ _).
+ + destruct v0; cbn; trivial.
+ + constructor.
+ (* insf *)
+ - unfold insf, bitfield_mask.
+ destruct (is_bitfield _ _).
+ + destruct v0; destruct v1; cbn; trivial.
+ destruct (Int.ltu _ _); cbn; trivial.
+ + constructor.
+ (* insf *)
+ - unfold insfl, bitfield_mask.
+ destruct (is_bitfieldl _ _).
+ + destruct v0; destruct v1; cbn; trivial.
+ destruct (Int.ltu _ _); cbn; trivial.
+ + constructor.
+ (* Osel *)
+ - unfold Val.select. destruct (eval_condition0 _ _ m).
+ + apply Val.normalize_type.
+ + constructor.
+ (* Oselimm *)
+ - unfold Val.select. destruct (eval_condition0 _ _ m).
+ + apply Val.normalize_type.
+ + constructor.
+ (* Osellimm *)
+ - unfold Val.select. destruct (eval_condition0 _ _ m).
+ + apply Val.normalize_type.
+ + constructor.
+Qed.
+
+Definition is_trapping_op (op : operation) :=
+ match op with
+ | Odiv | Odivl | Odivu | Odivlu
+ | Omod | Omodl | Omodu | Omodlu => true
+ | _ => false
+ end.
+
+Definition args_of_operation op :=
+ if eq_operation op Omove
+ then 1%nat
+ else List.length (fst (type_of_operation op)).
+
+Lemma is_trapping_op_sound:
+ forall op vl sp m,
+ is_trapping_op op = false ->
+ (List.length vl) = args_of_operation op ->
+ eval_operation genv sp op vl m <> None.
+Proof.
+ unfold args_of_operation.
+ destruct op; destruct eq_operation; intros; cbn in *; try congruence.
+ all: try (destruct vl as [ | vh1 vl1]; try discriminate).
+ all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
+ all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
+ all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+Qed.
+End SOUNDNESS.
+
+(** * Manipulating and transforming operations *)
+
+(** Recognition of move operations. *)
+
+Definition is_move_operation
+ (A: Type) (op: operation) (args: list A) : option A :=
+ match op, args with
+ | Omove, arg :: nil => Some arg
+ | _, _ => None
+ end.
+
+Lemma is_move_operation_correct:
+ forall (A: Type) (op: operation) (args: list A) (a: A),
+ is_move_operation op args = Some a ->
+ op = Omove /\ args = a :: nil.
+Proof.
+ intros until a. unfold is_move_operation; destruct op;
+ try (intros; discriminate).
+ destruct args. intros; discriminate.
+ destruct args. intros. intuition congruence.
+ intros; discriminate.
+Qed.
+
+(** [negate_condition cond] returns a condition that is logically
+ equivalent to the negation of [cond]. *)
+
+Definition negate_condition (cond: condition): condition :=
+ match cond with
+ | Ccomp c => Ccomp(negate_comparison c)
+ | Ccompu c => Ccompu(negate_comparison c)
+ | Ccompimm c n => Ccompimm (negate_comparison c) n
+ | Ccompuimm c n => Ccompuimm (negate_comparison c) n
+ | Ccompl c => Ccompl(negate_comparison c)
+ | Ccomplu c => Ccomplu(negate_comparison c)
+ | Ccomplimm c n => Ccomplimm (negate_comparison c) n
+ | Ccompluimm c n => Ccompluimm (negate_comparison c) n
+ | Ccompf c => Cnotcompf c
+ | Cnotcompf c => Ccompf c
+ | Ccompfs c => Cnotcompfs c
+ | Cnotcompfs c => Ccompfs c
+ end.
+
+Lemma eval_negate_condition:
+ forall cond vl m,
+ eval_condition (negate_condition cond) vl m = option_map negb (eval_condition cond vl m).
+Proof.
+ intros. destruct cond; cbn.
+ repeat (destruct vl; auto). apply Val.negate_cmp_bool.
+ repeat (destruct vl; auto). apply Val.negate_cmpu_bool.
+ repeat (destruct vl; auto). apply Val.negate_cmp_bool.
+ repeat (destruct vl; auto). apply Val.negate_cmpu_bool.
+ repeat (destruct vl; auto). apply Val.negate_cmpl_bool.
+ repeat (destruct vl; auto). apply Val.negate_cmplu_bool.
+ repeat (destruct vl; auto). apply Val.negate_cmpl_bool.
+ repeat (destruct vl; auto). apply Val.negate_cmplu_bool.
+ repeat (destruct vl; auto).
+ repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0) as [[]|]; auto.
+ repeat (destruct vl; auto).
+ repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v v0) as [[]|]; auto.
+Qed.
+
+(** Shifting stack-relative references. This is used in [Stacking]. *)
+
+Definition shift_stack_addressing (delta: Z) (addr: addressing) :=
+ match addr with
+ | Ainstack ofs => Ainstack (Ptrofs.add ofs (Ptrofs.repr delta))
+ | _ => addr
+ end.
+
+Definition shift_stack_operation (delta: Z) (op: operation) :=
+ match op with
+ | Oaddrstack ofs => Oaddrstack (Ptrofs.add ofs (Ptrofs.repr delta))
+ | _ => op
+ end.
+
+Lemma type_shift_stack_addressing:
+ forall delta addr, type_of_addressing (shift_stack_addressing delta addr) = type_of_addressing addr.
+Proof.
+ intros. destruct addr; auto.
+Qed.
+
+Lemma type_shift_stack_operation:
+ forall delta op, type_of_operation (shift_stack_operation delta op) = type_of_operation op.
+Proof.
+ intros. destruct op; auto.
+Qed.
+
+Lemma eval_shift_stack_addressing:
+ forall F V (ge: Genv.t F V) sp addr vl delta,
+ eval_addressing ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl =
+ eval_addressing ge (Vptr sp (Ptrofs.repr delta)) addr vl.
+Proof.
+ intros. destruct addr; cbn; auto. destruct vl; auto.
+ rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto.
+Qed.
+
+Lemma eval_shift_stack_operation:
+ forall F V (ge: Genv.t F V) sp op vl m delta,
+ eval_operation ge (Vptr sp Ptrofs.zero) (shift_stack_operation delta op) vl m =
+ eval_operation ge (Vptr sp (Ptrofs.repr delta)) op vl m.
+Proof.
+ intros. destruct op; cbn; auto. destruct vl; auto.
+ rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto.
+Qed.
+
+(** Offset an addressing mode [addr] by a quantity [delta], so that
+ it designates the pointer [delta] bytes past the pointer designated
+ by [addr]. May be undefined, in which case [None] is returned. *)
+
+Definition offset_addressing (addr: addressing) (delta: Z) : option addressing :=
+ match addr with
+ | Aindexed2 | Aindexed2XS _ => None
+ | Aindexed n => Some(Aindexed (Ptrofs.add n (Ptrofs.repr delta)))
+ | Aglobal id n => Some(Aglobal id (Ptrofs.add n (Ptrofs.repr delta)))
+ | Ainstack n => Some(Ainstack (Ptrofs.add n (Ptrofs.repr delta)))
+ end.
+
+Lemma eval_offset_addressing:
+ forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v,
+ offset_addressing addr delta = Some addr' ->
+ eval_addressing ge sp addr args = Some v ->
+ Archi.ptr64 = false ->
+ eval_addressing ge sp addr' args = Some(Val.add v (Vint (Int.repr delta))).
+Proof.
+ intros.
+ assert (A: forall x n,
+ Val.offset_ptr x (Ptrofs.add n (Ptrofs.repr delta)) =
+ Val.add (Val.offset_ptr x n) (Vint (Int.repr delta))).
+ { intros; destruct x; cbn; auto. rewrite H1.
+ rewrite Ptrofs.add_assoc. f_equal; f_equal; f_equal. symmetry; auto with ptrofs. }
+ destruct addr; cbn in H; inv H; cbn in *; FuncInv; subst.
+- rewrite A; auto.
+- unfold Genv.symbol_address. destruct (Genv.find_symbol ge i); auto.
+ cbn. rewrite H1. f_equal; f_equal; f_equal. symmetry; auto with ptrofs.
+- rewrite A; auto.
+Qed.
+
+(** Operations that are so cheap to recompute that CSE should not factor them out. *)
+
+Definition is_trivial_op (op: operation) : bool :=
+ match op with
+ | Omove => true
+ | Ointconst n => Int.eq (Int.sign_ext 12 n) n
+ | Olongconst n => Int64.eq (Int64.sign_ext 12 n) n
+ | Oaddrstack _ => true
+ | _ => false
+ end.
+
+(** Operations that depend on the memory state. *)
+
+Definition cond_depends_on_memory (c: condition) : bool :=
+ match c with
+ | Ccompu _ | Ccompuimm _ _ => negb Archi.ptr64
+ | Ccomplu _ | Ccompluimm _ _ => Archi.ptr64
+ | _ => false
+ end.
+
+Lemma cond_depends_on_memory_correct:
+ forall c args m1 m2,
+ cond_depends_on_memory c = false ->
+ eval_condition c args m1 = eval_condition c args m2.
+Proof.
+ intros; destruct c; cbn; discriminate || reflexivity.
+Qed.
+
+Definition op_depends_on_memory (op: operation) : bool :=
+ match op with
+ | Ocmp (Ccompu _ | Ccompuimm _ _) => negb Archi.ptr64
+ | Ocmp (Ccomplu _ | Ccompluimm _ _) => Archi.ptr64
+
+ | Osel (Ccompu0 _) _ | Oselimm (Ccompu0 _) _ | Osellimm (Ccompu0 _) _ => negb Archi.ptr64
+ | Osel (Ccomplu0 _) _ | Oselimm (Ccomplu0 _) _ | Osellimm (Ccomplu0 _) _ => Archi.ptr64
+
+ | _ => false
+ end.
+
+Lemma op_depends_on_memory_correct:
+ forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+ op_depends_on_memory op = false ->
+ eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+ intros until m2. destruct op; cbn; try congruence.
+ - destruct cond; cbn; try congruence;
+ intros SF; auto; rewrite ? negb_false_iff in SF;
+ unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
+ - destruct c0; cbn; try congruence;
+ intros SF; auto; rewrite ? negb_false_iff in SF;
+ unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
+ - destruct c0; cbn; try congruence;
+ intros SF; auto; rewrite ? negb_false_iff in SF;
+ unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
+ - destruct c0; cbn; try congruence;
+ intros SF; auto; rewrite ? negb_false_iff in SF;
+ unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity.
+Qed.
+
+Lemma cond_valid_pointer_eq:
+ forall cond args m1 m2,
+ (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+ eval_condition cond args m1 = eval_condition cond args m2.
+Proof.
+ intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence.
+ all: repeat (destruct args; simpl; try congruence);
+ erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
+Lemma op_valid_pointer_eq:
+ forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+ (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+ eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+ intros until m2. destruct op; cbn; try congruence.
+ - intros MEM; destruct cond; cbn; try congruence;
+ repeat (destruct args; cbn; try congruence);
+ erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+ - intros MEM; destruct c0; cbn; try congruence;
+ repeat (destruct args; cbn; try congruence);
+ erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+ - intros MEM; destruct c0; cbn; try congruence;
+ repeat (destruct args; cbn; try congruence);
+ erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+ - intros MEM; destruct c0; cbn; try congruence;
+ repeat (destruct args; cbn; try congruence);
+ erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
+(** Global variables mentioned in an operation or addressing mode *)
+
+Definition globals_addressing (addr: addressing) : list ident :=
+ match addr with
+ | Aglobal s ofs => s :: nil
+ | _ => nil
+ end.
+
+Definition globals_operation (op: operation) : list ident :=
+ match op with
+ | Oaddrsymbol s ofs => s :: nil
+ | _ => nil
+ end.
+
+(** * Invariance and compatibility properties. *)
+
+(** [eval_operation] and [eval_addressing] depend on a global environment
+ for resolving references to global symbols. We show that they give
+ the same results if a global environment is replaced by another that
+ assigns the same addresses to the same symbols. *)
+
+Section GENV_TRANSF.
+
+Variable F1 F2 V1 V2: Type.
+Variable ge1: Genv.t F1 V1.
+Variable ge2: Genv.t F2 V2.
+Hypothesis agree_on_symbols:
+ forall (s: ident), Genv.find_symbol ge2 s = Genv.find_symbol ge1 s.
+
+Lemma eval_addressing_preserved:
+ forall sp addr vl,
+ eval_addressing ge2 sp addr vl = eval_addressing ge1 sp addr vl.
+Proof.
+ intros.
+ unfold eval_addressing; destruct addr; auto. destruct vl; auto.
+ unfold Genv.symbol_address. rewrite agree_on_symbols; auto.
+Qed.
+
+Lemma eval_operation_preserved:
+ forall sp op vl m,
+ eval_operation ge2 sp op vl m = eval_operation ge1 sp op vl m.
+Proof.
+ intros.
+ unfold eval_operation; destruct op; auto. destruct vl; auto.
+ unfold Genv.symbol_address. rewrite agree_on_symbols; auto.
+Qed.
+
+End GENV_TRANSF.
+
+(** Compatibility of the evaluation functions with value injections. *)
+
+Section EVAL_COMPAT.
+
+Variable F1 F2 V1 V2: Type.
+Variable ge1: Genv.t F1 V1.
+Variable ge2: Genv.t F2 V2.
+Variable f: meminj.
+
+Variable m1: mem.
+Variable m2: mem.
+
+Hypothesis valid_pointer_inj:
+ forall b1 ofs b2 delta,
+ f b1 = Some(b2, delta) ->
+ Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+ Mem.valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+
+Hypothesis weak_valid_pointer_inj:
+ forall b1 ofs b2 delta,
+ f b1 = Some(b2, delta) ->
+ Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+ Mem.weak_valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+
+Hypothesis weak_valid_pointer_no_overflow:
+ forall b1 ofs b2 delta,
+ f b1 = Some(b2, delta) ->
+ Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+ 0 <= Ptrofs.unsigned ofs + Ptrofs.unsigned (Ptrofs.repr delta) <= Ptrofs.max_unsigned.
+
+Hypothesis valid_different_pointers_inj:
+ forall b1 ofs1 b2 ofs2 b1' delta1 b2' delta2,
+ b1 <> b2 ->
+ Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs1) = true ->
+ Mem.valid_pointer m1 b2 (Ptrofs.unsigned ofs2) = true ->
+ f b1 = Some (b1', delta1) ->
+ f b2 = Some (b2', delta2) ->
+ b1' <> b2' \/
+ Ptrofs.unsigned (Ptrofs.add ofs1 (Ptrofs.repr delta1)) <> Ptrofs.unsigned (Ptrofs.add ofs2 (Ptrofs.repr delta2)).
+
+Ltac InvInject :=
+ match goal with
+ | [ H: Val.inject _ (Vint _) _ |- _ ] =>
+ inv H; InvInject
+ | [ H: Val.inject _ (Vfloat _) _ |- _ ] =>
+ inv H; InvInject
+ | [ H: Val.inject _ (Vptr _ _) _ |- _ ] =>
+ inv H; InvInject
+ | [ H: Val.inject_list _ nil _ |- _ ] =>
+ inv H; InvInject
+ | [ H: Val.inject_list _ (_ :: _) _ |- _ ] =>
+ inv H; InvInject
+ | _ => idtac
+ end.
+
+Lemma eval_condition_inj:
+ forall cond vl1 vl2 b,
+ Val.inject_list f vl1 vl2 ->
+ eval_condition cond vl1 m1 = Some b ->
+ eval_condition cond vl2 m2 = Some b.
+Proof.
+ intros. destruct cond; cbn in H0; FuncInv; InvInject; cbn; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
+- eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
+- inv H3; cbn in H0; inv H0; auto.
+- eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
+- eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
+- inv H3; cbn in H0; inv H0; auto.
+- eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
+- inv H3; inv H2; cbn in H0; inv H0; auto.
+Qed.
+
+Lemma eval_condition0_inj:
+ forall cond v1 v2 b,
+ Val.inject f v1 v2 ->
+ eval_condition0 cond v1 m1 = Some b ->
+ eval_condition0 cond v2 m2 = Some b.
+Proof.
+ intros. destruct cond; cbn in H0; FuncInv; InvInject; cbn; auto.
+ - inv H; cbn in *; congruence.
+ - eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies.
+ - inv H; cbn in *; congruence.
+ - eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies.
+Qed.
+
+Ltac TrivialExists :=
+ match goal with
+ | [ |- exists v2, Some ?v1 = Some v2 /\ Val.inject _ _ v2 ] =>
+ exists v1; split; auto
+ | _ => idtac
+ end.
+
+Lemma eval_operation_inj:
+ forall op sp1 vl1 sp2 vl2 v1,
+ (forall id ofs,
+ In id (globals_operation op) ->
+ Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+ Val.inject f sp1 sp2 ->
+ Val.inject_list f vl1 vl2 ->
+ eval_operation ge1 sp1 op vl1 m1 = Some v1 ->
+ exists v2, eval_operation ge2 sp2 op vl2 m2 = Some v2 /\ Val.inject f v1 v2.
+Proof.
+ intros until v1; intros GL; intros. destruct op; cbn in H1; cbn; FuncInv; InvInject; TrivialExists.
+ (* addrsymbol *)
+ - apply GL; cbn; auto.
+ (* addrstack *)
+ - apply Val.offset_ptr_inject; auto.
+ (* castsigned *)
+ - inv H4; cbn; auto.
+ - inv H4; cbn; auto.
+ (* add, addimm *)
+ - apply Val.add_inject; auto.
+ - apply Val.add_inject; auto.
+ (* addx, addximm *)
+ - apply Val.add_inject; trivial.
+ inv H4; inv H2; cbn; try destruct (Int.ltu _ _); cbn; auto.
+ - inv H4; cbn; trivial.
+ destruct (Int.ltu _ _); cbn; trivial.
+ (* neg, sub *)
+ - inv H4; cbn; auto.
+ - apply Val.sub_inject; auto.
+ (* revsubimm, revsubx, revsubximm *)
+ - inv H4; cbn; trivial.
+ - apply Val.sub_inject; trivial.
+ inv H4; inv H2; cbn; try destruct (Int.ltu _ _); cbn; auto.
+ - inv H4; cbn; try destruct (Int.ltu _ _); cbn; auto.
+ (* mul, mulimm, mulhs, mulhu *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ (* div, divu *)
+ - inv H4; inv H3; cbn in H1; inv H1. cbn.
+ destruct (_ || _); inv H2.
+ TrivialExists.
+ - inv H4; inv H3; cbn in H1; inv H1. cbn.
+ destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+ (* mod, modu *)
+ - inv H4; inv H3; cbn in H1; inv H1. cbn.
+ destruct (_ || _); inv H2.
+ TrivialExists.
+ - inv H4; inv H3; cbn in H1; inv H1. cbn.
+ destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+ (* and, andimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* nand, nandimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* or, orimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* nor, norimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* xor, xorimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* nxor, nxorimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* not *)
+ - inv H4; cbn; auto.
+ (* andn, andnimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* orn, ornimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* shl, shlimm *)
+ - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+ - inv H4; cbn; auto. destruct (Int.ltu n Int.iwordsize); auto.
+ (* shr, shrimm *)
+ - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+ - inv H4; cbn; auto. destruct (Int.ltu n Int.iwordsize); auto.
+ (* shru, shruimm *)
+ - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
+ - inv H4; cbn; auto. destruct (Int.ltu n Int.iwordsize); auto.
+ (* shrx *)
+ - inv H4; cbn; auto.
+ destruct (Int.ltu n (Int.repr 31)); inv H; cbn; auto.
+ (* rorimm *)
+ - inv H4; cbn; auto.
+ (* madd, maddim *)
+ - inv H2; inv H3; inv H4; cbn; auto.
+ - inv H2; inv H4; cbn; auto.
+ (* msub *)
+ - apply Val.sub_inject; auto.
+ inv H3; inv H2; cbn; auto.
+ (* makelong, highlong, lowlong *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ - inv H4; cbn; auto.
+ (* cast32 *)
+ - inv H4; cbn; auto.
+ - inv H4; cbn; auto.
+ (* addl, addlimm *)
+ - apply Val.addl_inject; auto.
+ - apply Val.addl_inject; auto.
+ (* addxl, addxlimm *)
+ - apply Val.addl_inject; auto.
+ inv H4; cbn; trivial.
+ destruct (Int.ltu _ _); cbn; trivial.
+ - inv H4; cbn; trivial.
+ destruct (Int.ltu _ _); cbn; trivial.
+ (* negl, subl *)
+ - inv H4; cbn; auto.
+ - apply Val.subl_inject; auto.
+ inv H4; inv H2; cbn; trivial;
+ destruct (Int.ltu _ _); cbn; trivial.
+ - inv H4; cbn; trivial;
+ destruct (Int.ltu _ _); cbn; trivial.
+ - inv H4; cbn; auto.
+ - apply Val.subl_inject; auto.
+ (* mull, mullhs, mullhu *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ (* divl, divlu *)
+ - inv H4; inv H3; cbn in H1; inv H1. cbn.
+ destruct (_ || _); inv H2.
+ TrivialExists.
+ - inv H4; inv H3; cbn in H1; inv H1. cbn.
+ destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+ (* modl, modlu *)
+ - inv H4; inv H3; cbn in H1; inv H1. cbn.
+ destruct (_ || _); inv H2.
+ TrivialExists.
+ - inv H4; inv H3; cbn in H1; inv H1. cbn.
+ destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+ (* andl, andlimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* nandl, nandlimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* orl, orlimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* norl, norlimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* xorl, xorlimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* nxorl, nxorlimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* notl *)
+ - inv H4; cbn; auto.
+ (* andnl, andnlimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* ornl, ornlimm *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; cbn; auto.
+ (* shll, shllimm *)
+ - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+ - inv H4; cbn; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
+ (* shr, shrimm *)
+ - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+ - inv H4; cbn; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
+ (* shru, shruimm *)
+ - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
+ - inv H4; cbn; auto. destruct (Int.ltu n Int64.iwordsize'); auto.
+ (* shrx *)
+ - inv H4; cbn; auto.
+ destruct (Int.ltu n (Int.repr 63)); cbn; auto.
+
+ (* maddl, maddlimm *)
+ - apply Val.addl_inject; auto.
+ inv H2; inv H3; inv H4; cbn; auto.
+ - apply Val.addl_inject; auto.
+ inv H4; inv H2; cbn; auto.
+ (* msubl, msublimm *)
+ - apply Val.subl_inject; auto.
+ inv H2; inv H3; inv H4; cbn; auto.
+
+ (* negf, absf *)
+ - inv H4; cbn; auto.
+ - inv H4; cbn; auto.
+ (* addf, subf *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ (* mulf, divf *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ (* minf, maxf *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ (* fmaddf, fmsubf *)
+ - inv H4; inv H3; inv H2; cbn; auto.
+ - inv H4; inv H3; inv H2; cbn; auto.
+ (* negfs, absfs *)
+ - inv H4; cbn; auto.
+ - inv H4; cbn; auto.
+ (* addfs, subfs *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ (* mulfs, divfs *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ (* minfs, maxfs *)
+ - inv H4; inv H2; cbn; auto.
+ - inv H4; inv H2; cbn; auto.
+ (* invfs *)
+ - inv H4; cbn; auto.
+ (* fmaddfs, fmsubfs *)
+ - inv H4; inv H3; inv H2; cbn; auto.
+ - inv H4; inv H3; inv H2; cbn; auto.
+ (* singleoffloat, floatofsingle *)
+ - inv H4; cbn; auto.
+ - inv H4; cbn; auto.
+ (* intoffloat, intuoffloat *)
+ - inv H4; cbn; auto. destruct (Float.to_int f0); cbn; auto.
+ - inv H4; cbn; auto. destruct (Float.to_intu f0); cbn; auto.
+ (* intofsingle, intuofsingle *)
+ - inv H4; cbn; auto. destruct (Float32.to_int f0); cbn; auto.
+ - inv H4; cbn; auto. destruct (Float32.to_intu f0); cbn; auto.
+ (* singleofint, singleofintu *)
+ - inv H4; cbn; auto.
+ - inv H4; cbn; auto.
+ (* longoffloat, longuoffloat *)
+ - inv H4; cbn; auto. destruct (Float.to_long f0); cbn; auto.
+ - inv H4; cbn; auto. destruct (Float.to_longu f0); cbn; auto.
+ (* floatoflong, floatoflongu *)
+ - inv H4; cbn; auto.
+ - inv H4; cbn; auto.
+ (* longofsingle, longuofsingle *)
+ - inv H4; cbn; auto. destruct (Float32.to_long f0); cbn; auto.
+ - inv H4; cbn; auto. destruct (Float32.to_longu f0); cbn; auto.
+ (* singleoflong, singleoflongu *)
+ - inv H4; cbn; auto.
+ - inv H4; cbn; auto.
+ (* cmp *)
+ - subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
+ exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
+ destruct b; cbn; constructor.
+ cbn; constructor.
+
+ (* extfz *)
+ - unfold extfz.
+ destruct (is_bitfield _ _).
+ + inv H4; trivial.
+ + trivial.
+
+ (* extfs *)
+ - unfold extfs.
+ destruct (is_bitfield _ _).
+ + inv H4; trivial.
+ + trivial.
+
+ (* extfzl *)
+ - unfold extfzl.
+ destruct (is_bitfieldl _ _).
+ + inv H4; trivial.
+ + trivial.
+
+ (* extfsl *)
+ - unfold extfsl.
+ destruct (is_bitfieldl _ _).
+ + inv H4; trivial.
+ + trivial.
+
+ (* insf *)
+ - unfold insf.
+ destruct (is_bitfield _ _).
+ + inv H4; inv H2; trivial.
+ cbn. destruct (Int.ltu _ _); trivial.
+ cbn. trivial.
+ + trivial.
+
+ (* insfl *)
+ - unfold insfl.
+ destruct (is_bitfieldl _ _).
+ + inv H4; inv H2; trivial.
+ cbn. destruct (Int.ltu _ _); trivial.
+ cbn. trivial.
+ + trivial.
+
+ (* Osel *)
+ - apply Val.select_inject; trivial.
+ destruct (eval_condition0 c0 v2 m1) eqn:Hcond.
+ + right.
+ symmetry.
+ eapply eval_condition0_inj; eassumption.
+ + left. trivial.
+
+ (* Oselimm *)
+ - apply Val.select_inject; trivial.
+ destruct (eval_condition0 _ _ _) eqn:Hcond.
+ + right.
+ symmetry.
+ eapply eval_condition0_inj; eassumption.
+ + left. trivial.
+
+ (* Osellimm *)
+ - apply Val.select_inject; trivial.
+ destruct (eval_condition0 _ _ _) eqn:Hcond.
+ + right.
+ symmetry.
+ eapply eval_condition0_inj; eassumption.
+ + left. trivial.
+Qed.
+
+Lemma eval_addressing_inj:
+ forall addr sp1 vl1 sp2 vl2 v1,
+ (forall id ofs,
+ In id (globals_addressing addr) ->
+ Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+ Val.inject f sp1 sp2 ->
+ Val.inject_list f vl1 vl2 ->
+ eval_addressing ge1 sp1 addr vl1 = Some v1 ->
+ exists v2, eval_addressing ge2 sp2 addr vl2 = Some v2 /\ Val.inject f v1 v2.
+Proof.
+ intros. destruct addr; cbn in H2; cbn; FuncInv; InvInject; TrivialExists.
+ - apply Val.addl_inject; trivial.
+ destruct v0; destruct v'0; cbn; trivial; destruct (Int.ltu _ _); cbn; trivial; inv H3.
+ apply Val.inject_long.
+ - apply Val.addl_inject; auto.
+ - apply Val.offset_ptr_inject; auto.
+ - apply H; cbn; auto.
+ - apply Val.offset_ptr_inject; auto.
+Qed.
+
+Lemma eval_addressing_inj_none:
+ forall addr sp1 vl1 sp2 vl2,
+ (forall id ofs,
+ In id (globals_addressing addr) ->
+ Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+ Val.inject f sp1 sp2 ->
+ Val.inject_list f vl1 vl2 ->
+ eval_addressing ge1 sp1 addr vl1 = None ->
+ eval_addressing ge2 sp2 addr vl2 = None.
+Proof.
+ intros until vl2. intros Hglobal Hinjsp Hinjvl.
+ destruct addr; cbn in *.
+ 1,2: inv Hinjvl; trivial;
+ inv H0; trivial;
+ inv H2; trivial;
+ discriminate.
+ 2,3: inv Hinjvl; trivial; discriminate.
+ inv Hinjvl; trivial; inv H0; trivial;
+ inv H; trivial; discriminate.
+Qed.
+
+End EVAL_COMPAT.
+
+(** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *)
+
+Section EVAL_LESSDEF.
+
+Variable F V: Type.
+Variable genv: Genv.t F V.
+
+Remark valid_pointer_extends:
+ forall m1 m2, Mem.extends m1 m2 ->
+ forall b1 ofs b2 delta,
+ Some(b1, 0) = Some(b2, delta) ->
+ Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+ Mem.valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+Proof.
+ intros. inv H0. rewrite Ptrofs.add_zero. eapply Mem.valid_pointer_extends; eauto.
+Qed.
+
+Remark weak_valid_pointer_extends:
+ forall m1 m2, Mem.extends m1 m2 ->
+ forall b1 ofs b2 delta,
+ Some(b1, 0) = Some(b2, delta) ->
+ Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+ Mem.weak_valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true.
+Proof.
+ intros. inv H0. rewrite Ptrofs.add_zero. eapply Mem.weak_valid_pointer_extends; eauto.
+Qed.
+
+Remark weak_valid_pointer_no_overflow_extends:
+ forall m1 b1 ofs b2 delta,
+ Some(b1, 0) = Some(b2, delta) ->
+ Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true ->
+ 0 <= Ptrofs.unsigned ofs + Ptrofs.unsigned (Ptrofs.repr delta) <= Ptrofs.max_unsigned.
+Proof.
+ intros. inv H. rewrite Z.add_0_r. apply Ptrofs.unsigned_range_2.
+Qed.
+
+Remark valid_different_pointers_extends:
+ forall m1 b1 ofs1 b2 ofs2 b1' delta1 b2' delta2,
+ b1 <> b2 ->
+ Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs1) = true ->
+ Mem.valid_pointer m1 b2 (Ptrofs.unsigned ofs2) = true ->
+ Some(b1, 0) = Some (b1', delta1) ->
+ Some(b2, 0) = Some (b2', delta2) ->
+ b1' <> b2' \/
+ Ptrofs.unsigned(Ptrofs.add ofs1 (Ptrofs.repr delta1)) <> Ptrofs.unsigned(Ptrofs.add ofs2 (Ptrofs.repr delta2)).
+Proof.
+ intros. inv H2; inv H3. auto.
+Qed.
+
+Lemma eval_condition_lessdef:
+ forall cond vl1 vl2 b m1 m2,
+ Val.lessdef_list vl1 vl2 ->
+ Mem.extends m1 m2 ->
+ eval_condition cond vl1 m1 = Some b ->
+ eval_condition cond vl2 m2 = Some b.
+Proof.
+ intros. eapply eval_condition_inj with (f := fun b => Some(b, 0)) (m1 := m1).
+ apply valid_pointer_extends; auto.
+ apply weak_valid_pointer_extends; auto.
+ apply weak_valid_pointer_no_overflow_extends.
+ apply valid_different_pointers_extends; auto.
+ rewrite <- val_inject_list_lessdef. eauto. auto.
+Qed.
+
+Lemma eval_operation_lessdef:
+ forall sp op vl1 vl2 v1 m1 m2,
+ Val.lessdef_list vl1 vl2 ->
+ Mem.extends m1 m2 ->
+ eval_operation genv sp op vl1 m1 = Some v1 ->
+ exists v2, eval_operation genv sp op vl2 m2 = Some v2 /\ Val.lessdef v1 v2.
+Proof.
+ intros. rewrite val_inject_list_lessdef in H.
+ assert (exists v2 : val,
+ eval_operation genv sp op vl2 m2 = Some v2
+ /\ Val.inject (fun b => Some(b, 0)) v1 v2).
+ eapply eval_operation_inj with (m1 := m1) (sp1 := sp).
+ apply valid_pointer_extends; auto.
+ apply weak_valid_pointer_extends; auto.
+ apply weak_valid_pointer_no_overflow_extends.
+ apply valid_different_pointers_extends; auto.
+ intros. apply val_inject_lessdef. auto.
+ apply val_inject_lessdef; auto.
+ eauto.
+ auto.
+ destruct H2 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto.
+Qed.
+
+Lemma eval_addressing_lessdef:
+ forall sp addr vl1 vl2 v1,
+ Val.lessdef_list vl1 vl2 ->
+ eval_addressing genv sp addr vl1 = Some v1 ->
+ exists v2, eval_addressing genv sp addr vl2 = Some v2 /\ Val.lessdef v1 v2.
+Proof.
+ intros. rewrite val_inject_list_lessdef in H.
+ assert (exists v2 : val,
+ eval_addressing genv sp addr vl2 = Some v2
+ /\ Val.inject (fun b => Some(b, 0)) v1 v2).
+ eapply eval_addressing_inj with (sp1 := sp).
+ intros. rewrite <- val_inject_lessdef; auto.
+ rewrite <- val_inject_lessdef; auto.
+ eauto. auto.
+ destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto.
+Qed.
+
+
+Lemma eval_addressing_lessdef_none:
+ forall sp addr vl1 vl2,
+ Val.lessdef_list vl1 vl2 ->
+ eval_addressing genv sp addr vl1 = None ->
+ eval_addressing genv sp addr vl2 = None.
+Proof.
+ intros until vl2. intros Hlessdef Heval1.
+ destruct addr; cbn in *.
+ 1, 2, 4, 5: inv Hlessdef; trivial;
+ inv H0; trivial;
+ inv H2; trivial;
+ discriminate.
+ inv Hlessdef; trivial.
+ inv H0; trivial.
+ discriminate.
+Qed.
+
+End EVAL_LESSDEF.
+
+(** Compatibility of the evaluation functions with memory injections. *)
+
+Section EVAL_INJECT.
+
+Variable F V: Type.
+Variable genv: Genv.t F V.
+Variable f: meminj.
+Hypothesis globals: meminj_preserves_globals genv f.
+Variable sp1: block.
+Variable sp2: block.
+Variable delta: Z.
+Hypothesis sp_inj: f sp1 = Some(sp2, delta).
+
+Remark symbol_address_inject:
+ forall id ofs, Val.inject f (Genv.symbol_address genv id ofs) (Genv.symbol_address genv id ofs).
+Proof.
+ intros. unfold Genv.symbol_address. destruct (Genv.find_symbol genv id) eqn:?; auto.
+ exploit (proj1 globals); eauto. intros.
+ econstructor; eauto. rewrite Ptrofs.add_zero; auto.
+Qed.
+
+Lemma eval_condition_inject:
+ forall cond vl1 vl2 b m1 m2,
+ Val.inject_list f vl1 vl2 ->
+ Mem.inject f m1 m2 ->
+ eval_condition cond vl1 m1 = Some b ->
+ eval_condition cond vl2 m2 = Some b.
+Proof.
+ intros. eapply eval_condition_inj with (f := f) (m1 := m1); eauto.
+ intros; eapply Mem.valid_pointer_inject_val; eauto.
+ intros; eapply Mem.weak_valid_pointer_inject_val; eauto.
+ intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
+ intros; eapply Mem.different_pointers_inject; eauto.
+Qed.
+
+Lemma eval_addressing_inject:
+ forall addr vl1 vl2 v1,
+ Val.inject_list f vl1 vl2 ->
+ eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = Some v1 ->
+ exists v2,
+ eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = Some v2
+ /\ Val.inject f v1 v2.
+Proof.
+ intros.
+ rewrite eval_shift_stack_addressing.
+ eapply eval_addressing_inj with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+ intros. apply symbol_address_inject.
+ econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
+Qed.
+
+Lemma eval_addressing_inject_none:
+ forall addr vl1 vl2,
+ Val.inject_list f vl1 vl2 ->
+ eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None ->
+ eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None.
+Proof.
+ intros.
+ rewrite eval_shift_stack_addressing.
+ eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+ intros. apply symbol_address_inject.
+ econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
+Qed.
+
+Lemma eval_operation_inject:
+ forall op vl1 vl2 v1 m1 m2,
+ Val.inject_list f vl1 vl2 ->
+ Mem.inject f m1 m2 ->
+ eval_operation genv (Vptr sp1 Ptrofs.zero) op vl1 m1 = Some v1 ->
+ exists v2,
+ eval_operation genv (Vptr sp2 Ptrofs.zero) (shift_stack_operation delta op) vl2 m2 = Some v2
+ /\ Val.inject f v1 v2.
+Proof.
+ intros.
+ rewrite eval_shift_stack_operation. cbn.
+ eapply eval_operation_inj with (sp1 := Vptr sp1 Ptrofs.zero) (m1 := m1); eauto.
+ intros; eapply Mem.valid_pointer_inject_val; eauto.
+ intros; eapply Mem.weak_valid_pointer_inject_val; eauto.
+ intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto.
+ intros; eapply Mem.different_pointers_inject; eauto.
+ intros. apply symbol_address_inject.
+ econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
+Qed.
+
+End EVAL_INJECT.
+
+(** * Handling of builtin arguments *)
+
+Definition builtin_arg_ok_1
+ (A: Type) (ba: builtin_arg A) (c: builtin_arg_constraint) :=
+ match c, ba with
+ | OK_all, _ => true
+ | OK_const, (BA_int _ | BA_long _ | BA_float _ | BA_single _) => true
+ | OK_addrstack, BA_addrstack _ => true
+ | OK_addressing, BA_addrstack _ => true
+ | OK_addressing, BA_addptr (BA _) (BA_int _) => true
+ | OK_addressing, BA_addptr (BA _) (BA_long _) => true
+ | _, _ => false
+ end.
+
+Definition builtin_arg_ok
+ (A: Type) (ba: builtin_arg A) (c: builtin_arg_constraint) :=
+ match ba with
+ | (BA _ | BA_splitlong (BA _) (BA _)) => true
+ | _ => builtin_arg_ok_1 ba c
+ end.
diff --git a/kvx/OpWeights.ml b/kvx/OpWeights.ml
new file mode 100644
index 00000000..23c2e5d3
--- /dev/null
+++ b/kvx/OpWeights.ml
@@ -0,0 +1,115 @@
+open Op;;
+open PostpassSchedulingOracle;;
+open PrepassSchedulingOracleDeps;;
+
+module KV3 =
+ struct
+let resource_bounds = PostpassSchedulingOracle.resource_bounds;;
+let nr_non_pipelined_units = 0;;
+
+let rec nlist_rec x l = function
+ | 0 -> l
+ | n when n > 0 -> nlist_rec x (x :: l) (n-1)
+ | _ -> failwith "nlist_rec";;
+let nlist x n = nlist_rec x [] n;;
+
+let bogus_register = Machregs.R0;;
+let bogus_inputs n = nlist bogus_register n;;
+
+let insns_of_op (op : operation) (nargs : int) =
+ match Asmblockgen.transl_op op
+ (bogus_inputs nargs) bogus_register [] with
+ | Errors.Error msg -> failwith "OpWeights.insns_of_op"
+ | Errors.OK insns -> insns;;
+
+let insn_of_op op nargs =
+ match insns_of_op op nargs with
+ | [] -> failwith "OpWeights.insn_of_op"
+ | h::_ -> h;;
+
+let insns_of_cond (cond : condition) (nargs : int) =
+ match Asmblockgen.transl_cond_op cond
+ Asmvliw.GPR0 (bogus_inputs nargs) [] with
+ | Errors.Error msg -> failwith "OpWeights.insns_of_cond"
+ | Errors.OK insns -> insns;;
+
+let insn_of_cond cond nargs =
+ match insns_of_cond cond nargs with
+ | [] -> failwith "OpWeights.insn_of_cond"
+ | h::_ -> h;;
+
+let insns_of_load trap chunk addressing (nargs : int) =
+ match Asmblockgen.transl_load trap chunk addressing
+ (bogus_inputs nargs) bogus_register [] with
+ | Errors.Error msg -> failwith "OpWeights.insns_of_load"
+ | Errors.OK insns -> insns;;
+
+let insn_of_load trap chunk addressing nargs =
+ match insns_of_load trap chunk addressing nargs with
+ | [] -> failwith "OpWeights.insn_of_load"
+ | h::_ -> h;;
+
+let insns_of_store chunk addressing (nargs : int) =
+ match Asmblockgen.transl_store chunk addressing
+ (bogus_inputs nargs) bogus_register [] with
+ | Errors.Error msg -> failwith "OpWeights.insns_of_store"
+ | Errors.OK insns -> insns;;
+
+let insn_of_store chunk addressing nargs =
+ match insns_of_store chunk addressing nargs with
+ | [] -> failwith "OpWeights.insn_of_store"
+ | h::_ -> h;;
+
+let latency_of_op (op : operation) (nargs : int) =
+ let insn = insn_of_op op nargs in
+ let record = basic_rec insn in
+ let latency = real_inst_to_latency record.inst in
+ latency;;
+
+let resources_of_op (op : operation) (nargs : int) =
+ let insn = insn_of_op op nargs in
+ let record = basic_rec insn in
+ rec_to_usage record;;
+
+let non_pipelined_resources_of_op (op : operation) (nargs : int) = [| |]
+
+let resources_of_cond (cond : condition) (nargs : int) =
+ let insn = insn_of_cond cond nargs in
+ let record = basic_rec insn in
+ rec_to_usage record;;
+
+let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+let latency_of_call _ _ = 6;;
+
+let resources_of_load trap chunk addressing nargs =
+ let insn = insn_of_load trap chunk addressing nargs in
+ let record = basic_rec insn in
+ rec_to_usage record;;
+
+let resources_of_store chunk addressing nargs =
+ let insn = insn_of_store chunk addressing nargs in
+ let record = basic_rec insn in
+ rec_to_usage record;;
+
+let resources_of_call _ _ = resource_bounds;;
+let resources_of_builtin _ = resource_bounds;;
+ end;;
+
+let get_opweights () : opweights =
+ match !Clflags.option_mtune with
+ | "kv3" | "" ->
+ {
+ pipelined_resource_bounds = KV3.resource_bounds;
+ nr_non_pipelined_units = KV3.nr_non_pipelined_units;
+ latency_of_op = KV3.latency_of_op;
+ resources_of_op = KV3.resources_of_op;
+ non_pipelined_resources_of_op = KV3.non_pipelined_resources_of_op;
+ latency_of_load = KV3.latency_of_load;
+ resources_of_load = KV3.resources_of_load;
+ resources_of_store = KV3.resources_of_store;
+ resources_of_cond = KV3.resources_of_cond;
+ latency_of_call = KV3.latency_of_call;
+ resources_of_call = KV3.resources_of_call;
+ resources_of_builtin = KV3.resources_of_builtin
+ }
+ | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/kvx/Peephole.v b/kvx/Peephole.v
new file mode 100644
index 00000000..5adb823b
--- /dev/null
+++ b/kvx/Peephole.v
@@ -0,0 +1,158 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib.
+Require Import Asmvliw.
+Require Import Values.
+Require Import Integers.
+Require Import AST.
+Require Compopts.
+
+Definition gpreg_q_list : list gpreg_q :=
+R0R1 :: R2R3 :: R4R5 :: R6R7 :: R8R9
+:: R10R11 :: R12R13 :: R14R15 :: R16R17 :: R18R19
+:: R20R21 :: R22R23 :: R24R25 :: R26R27 :: R28R29
+:: R30R31 :: R32R33 :: R34R35 :: R36R37 :: R38R39
+:: R40R41 :: R42R43 :: R44R45 :: R46R47 :: R48R49
+:: R50R51 :: R52R53 :: R54R55 :: R56R57 :: R58R59
+:: R60R61 :: R62R63 :: nil.
+
+Definition gpreg_o_list : list gpreg_o :=
+R0R1R2R3 :: R4R5R6R7 :: R8R9R10R11 :: R12R13R14R15
+:: R16R17R18R19 :: R20R21R22R23 :: R24R25R26R27 :: R28R29R30R31
+:: R32R33R34R35 :: R36R37R38R39 :: R40R41R42R43 :: R44R45R46R47
+:: R48R49R50R51 :: R52R53R54R55 :: R56R57R58R59 :: R60R61R62R63 :: nil.
+
+Fixpoint gpreg_q_search_rec r0 r1 l :=
+ match l with
+ | h :: t =>
+ let (s0, s1) := gpreg_q_expand h in
+ if (gpreg_eq r0 s0) && (gpreg_eq r1 s1)
+ then Some h
+ else gpreg_q_search_rec r0 r1 t
+ | nil => None
+ end.
+
+Fixpoint gpreg_o_search_rec r0 r1 r2 r3 l :=
+ match l with
+ | h :: t =>
+ match gpreg_o_expand h with
+ | (((s0, s1), s2), s3) =>
+ if (gpreg_eq r0 s0) && (gpreg_eq r1 s1) &&
+ (gpreg_eq r2 s2) && (gpreg_eq r3 s3)
+ then Some h
+ else gpreg_o_search_rec r0 r1 r2 r3 t
+ end
+ | nil => None
+ end.
+
+Definition gpreg_q_search (r0 : gpreg) (r1 : gpreg) : option gpreg_q :=
+ gpreg_q_search_rec r0 r1 gpreg_q_list.
+
+Definition gpreg_o_search r0 r1 r2 r3 : option gpreg_o :=
+ gpreg_o_search_rec r0 r1 r2 r3 gpreg_o_list.
+
+Parameter print_found_store: forall A, Z -> A -> A.
+
+Definition coalesce_octuples := true.
+
+Fixpoint coalesce_mem (insns : list basic) : list basic :=
+ match insns with
+ | nil => nil
+ | h0 :: t0 =>
+ match t0 with
+ | h1 :: t1 =>
+ match h0, h1 with
+ | (PStoreRRO Psd_a rs0 ra0 ofs0),
+ (PStoreRRO Psd_a rs1 ra1 ofs1) =>
+ match gpreg_q_search rs0 rs1 with
+ | Some rs0rs1 =>
+ let zofs0 := Ptrofs.signed ofs0 in
+ let zofs1 := Ptrofs.signed ofs1 in
+ if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1)
+ then
+ if coalesce_octuples
+ then
+ match t1 with
+ | (PStoreRRO Psd_a rs2 ra2 ofs2) ::
+ (PStoreRRO Psd_a rs3 ra3 ofs3) :: t3 =>
+ match gpreg_o_search rs0 rs1 rs2 rs3 with
+ | Some octuple =>
+ let zofs2 := Ptrofs.signed ofs2 in
+ let zofs3 := Ptrofs.signed ofs3 in
+ if (zofs2 =? zofs0 + 16) && (ireg_eq ra0 ra2) &&
+ (zofs3 =? zofs0 + 24) && (ireg_eq ra0 ra3)
+ then (PStore (PStoreORRO octuple ra0 ofs0)) :: Pnop :: Pnop :: Pnop :: (coalesce_mem t3)
+ else (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1)
+ | None => (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1)
+ end
+ | _ => (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1)
+ end
+ else (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1)
+ else h0 :: (coalesce_mem t0)
+ | None => h0 :: (coalesce_mem t0)
+ end
+
+ | (PLoad (PLoadRRO TRAP Pld_a rd0 ra0 ofs0)),
+ (PLoad (PLoadRRO TRAP Pld_a rd1 ra1 ofs1)) =>
+ match gpreg_q_search rd0 rd1 with
+ | Some rd0rd1 =>
+ let zofs0 := Ptrofs.signed ofs0 in
+ let zofs1 := Ptrofs.signed ofs1 in
+ if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1) && negb (ireg_eq ra0 rd0)
+ then
+ if coalesce_octuples
+ then
+ match t1 with
+ | (PLoad (PLoadRRO TRAP Pld_a rd2 ra2 ofs2)) ::
+ (PLoad (PLoadRRO TRAP Pld_a rd3 ra3 ofs3)) :: t3 =>
+ match gpreg_o_search rd0 rd1 rd2 rd3 with
+ | Some octuple =>
+ let zofs2 := Ptrofs.signed ofs2 in
+ let zofs3 := Ptrofs.signed ofs3 in
+ if (zofs2 =? zofs0 + 16) && (ireg_eq ra0 ra2) &&
+ (zofs3 =? zofs0 + 24) && (ireg_eq ra0 ra3) &&
+ negb (ireg_eq ra0 rd1) && negb (ireg_eq ra0 rd2)
+ then (PLoad (PLoadORRO octuple ra0 ofs0)) :: Pnop :: Pnop :: Pnop :: (coalesce_mem t3)
+ else (PLoad (PLoadQRRO rd0rd1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1)
+ | None => (PLoad (PLoadQRRO rd0rd1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1)
+ end
+ | _ => (PLoad (PLoadQRRO rd0rd1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1)
+ end
+ else (PLoad (PLoadQRRO rd0rd1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1)
+ else h0 :: (coalesce_mem t0)
+ | None => h0 :: (coalesce_mem t0)
+ end
+ | _, _ => h0 :: (coalesce_mem t0)
+ end
+ | nil => h0 :: nil
+ end
+ end.
+
+Definition optimize_body (insns : list basic) :=
+ if Compopts.optim_coalesce_mem tt
+ then coalesce_mem insns
+ else insns.
+
+Program Definition optimize_bblock (bb : bblock) :=
+ let optimized := optimize_body (body bb) in
+ let wf_ok := wf_bblockb optimized (exit bb) in
+ {| header := header bb;
+ body := if wf_ok then optimized else (body bb);
+ exit := exit bb |}.
+Next Obligation.
+ destruct (wf_bblockb (optimize_body (body bb))) eqn:Rwf.
+ - rewrite Rwf. cbn. trivial.
+ - exact (correct bb).
+Qed.
diff --git a/kvx/PostpassScheduling.v b/kvx/PostpassScheduling.v
new file mode 100644
index 00000000..08e640c6
--- /dev/null
+++ b/kvx/PostpassScheduling.v
@@ -0,0 +1,526 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Implementation (and basic properties) of the verified postpass scheduler *)
+
+Require Import Coqlib Errors AST Integers.
+Require Import Asmblock Axioms Memory Globalenvs.
+Require Import Asmblockdeps Asmblockgenproof0 Asmblockprops.
+Require Peephole.
+Require Import Lia.
+
+Local Open Scope error_monad_scope.
+
+(** * Oracle taking as input a basic block,
+ returns a scheduled list of bundles *)
+Axiom schedule: bblock -> (list (list basic)) * option control.
+
+Extract Constant schedule => "PostpassSchedulingOracle.schedule".
+
+(** * Concat all bundles into one big basic block *)
+
+(* Lemmas necessary for defining concat_all *)
+Lemma app_nonil {A: Type} (l l': list A) : l <> nil -> l ++ l' <> nil.
+Proof.
+ intros. destruct l; simpl.
+ - contradiction.
+ - discriminate.
+Qed.
+
+Lemma app_nonil2 {A: Type} : forall (l l': list A), l' <> nil -> l ++ l' <> nil.
+Proof.
+ destruct l.
+ - intros. simpl; auto.
+ - intros. rewrite <- app_comm_cons. discriminate.
+Qed.
+
+Definition check_size bb :=
+ if zlt Ptrofs.max_unsigned (size bb)
+ then Error (msg "PostpassSchedulingproof.check_size")
+ else OK tt.
+
+Program Definition concat2 (bb bb': bblock) : res bblock :=
+ do ch <- check_size bb;
+ do ch' <- check_size bb';
+ match (exit bb) with
+ | None =>
+ match (header bb') with
+ | nil =>
+ match (exit bb') with
+ | Some (PExpand (Pbuiltin _ _ _)) => Error (msg "PostpassSchedulingproof.concat2: builtin not alone")
+ | _ => OK {| header := header bb; body := body bb ++ body bb'; exit := exit bb' |}
+ end
+ | _ => Error (msg "PostpassSchedulingproof.concat2")
+ end
+ | _ => Error (msg "PostpassSchedulingproof.concat2")
+ end.
+Next Obligation.
+ apply wf_bblock_refl. constructor.
+ - destruct bb' as [hd' bdy' ex' WF']. destruct bb as [hd bdy ex WF]. simpl in *.
+ apply wf_bblock_refl in WF'. apply wf_bblock_refl in WF.
+ inversion_clear WF'. inversion_clear WF. clear H1 H3.
+ inversion H2; inversion H0.
+ + left. apply app_nonil. auto.
+ + right. auto.
+ + left. apply app_nonil2. auto.
+ + right. auto.
+ - unfold builtin_alone. intros. rewrite H0 in H.
+ assert (Some (PExpand (Pbuiltin ef args res)) <> Some (PExpand (Pbuiltin ef args res))).
+ apply (H ef args res). contradict H1. auto.
+Defined.
+
+Lemma concat2_zlt_size:
+ forall a b bb,
+ concat2 a b = OK bb ->
+ size a <= Ptrofs.max_unsigned
+ /\ size b <= Ptrofs.max_unsigned.
+Proof.
+ intros. monadInv H.
+ split.
+ - unfold check_size in EQ. destruct (zlt Ptrofs.max_unsigned (size a)); monadInv EQ. lia.
+ - unfold check_size in EQ1. destruct (zlt Ptrofs.max_unsigned (size b)); monadInv EQ1. lia.
+Qed.
+
+Lemma concat2_noexit:
+ forall a b bb,
+ concat2 a b = OK bb ->
+ exit a = None.
+Proof.
+ intros. destruct a as [hd bdy ex WF]; simpl in *.
+ destruct ex as [e|]; simpl in *; auto.
+ unfold concat2 in H. simpl in H. monadInv H.
+Qed.
+
+Lemma concat2_decomp:
+ forall a b bb,
+ concat2 a b = OK bb ->
+ body bb = body a ++ body b
+ /\ exit bb = exit b.
+Proof.
+ intros. exploit concat2_noexit; eauto. intros.
+ destruct a as [hda bda exa WFa]; destruct b as [hdb bdb exb WFb]; destruct bb as [hd bd ex WF]; simpl in *.
+ subst exa.
+ unfold concat2 in H; simpl in H.
+ destruct hdb.
+ - destruct exb.
+ + destruct c.
+ * destruct i; monadInv H; split; auto.
+ * monadInv H. split; auto.
+ + monadInv H. split; auto.
+ - monadInv H.
+Qed.
+
+Lemma concat2_size:
+ forall a b bb, concat2 a b = OK bb -> size bb = size a + size b.
+Proof.
+ intros. unfold concat2 in H.
+ destruct a as [hda bda exa WFa]; destruct b as [hdb bdb exb WFb]; destruct bb as [hd bdy ex WF]; simpl in *.
+ destruct exa; monadInv H. destruct hdb; try (monadInv EQ2). destruct exb; try (monadInv EQ2).
+ - destruct c.
+ + destruct i; monadInv EQ2;
+ unfold size; simpl; rewrite app_length; rewrite Nat.add_0_r; rewrite <- Nat2Z.inj_add; rewrite Nat.add_assoc; reflexivity.
+ + monadInv EQ2. unfold size; simpl. rewrite app_length. rewrite Nat.add_0_r. rewrite <- Nat2Z.inj_add. rewrite Nat.add_assoc. reflexivity.
+ - unfold size; simpl. rewrite app_length. repeat (rewrite Nat.add_0_r). rewrite <- Nat2Z.inj_add. reflexivity.
+Qed.
+
+Lemma concat2_header:
+ forall bb bb' tbb,
+ concat2 bb bb' = OK tbb -> header bb = header tbb.
+Proof.
+ intros. destruct bb as [hd bdy ex COR]; destruct bb' as [hd' bdy' ex' COR']; destruct tbb as [thd tbdy tex tCOR]; simpl in *.
+ unfold concat2 in H. simpl in H. monadInv H.
+ destruct ex; try discriminate. destruct hd'; try discriminate. destruct ex'.
+ - destruct c.
+ + destruct i; try discriminate; congruence.
+ + congruence.
+ - congruence.
+Qed.
+
+Lemma concat2_no_header_in_middle:
+ forall bb bb' tbb,
+ concat2 bb bb' = OK tbb ->
+ header bb' = nil.
+Proof.
+ intros. destruct bb as [hd bdy ex COR]; destruct bb' as [hd' bdy' ex' COR']; destruct tbb as [thd tbdy tex tCOR]; simpl in *.
+ unfold concat2 in H. simpl in H. monadInv H.
+ destruct ex; try discriminate. destruct hd'; try discriminate. reflexivity.
+Qed.
+
+
+
+Fixpoint concat_all (lbb: list bblock) : res bblock :=
+ match lbb with
+ | nil => Error (msg "PostpassSchedulingproof.concatenate: empty list")
+ | bb::nil => OK bb
+ | bb::lbb =>
+ do bb' <- concat_all lbb;
+ concat2 bb bb'
+ end.
+
+Lemma concat_all_size :
+ forall lbb a bb bb',
+ concat_all (a :: lbb) = OK bb ->
+ concat_all lbb = OK bb' ->
+ size bb = size a + size bb'.
+Proof.
+ intros. unfold concat_all in H. fold concat_all in H.
+ destruct lbb; try discriminate.
+ monadInv H. rewrite H0 in EQ. inv EQ.
+ apply concat2_size. assumption.
+Qed.
+
+Lemma concat_all_header:
+ forall lbb bb tbb,
+ concat_all (bb::lbb) = OK tbb -> header bb = header tbb.
+Proof.
+ destruct lbb.
+ - intros. simpl in H. congruence.
+ - intros. simpl in H. destruct lbb.
+ + inv H. eapply concat2_header; eassumption.
+ + monadInv H. eapply concat2_header; eassumption.
+Qed.
+
+Lemma concat_all_no_header_in_middle:
+ forall lbb tbb,
+ concat_all lbb = OK tbb ->
+ Forall (fun b => header b = nil) (tail lbb).
+Proof.
+ induction lbb; intros; try constructor.
+ simpl. simpl in H. destruct lbb.
+ - constructor.
+ - monadInv H. simpl tl in IHlbb. constructor.
+ + apply concat2_no_header_in_middle in EQ0. apply concat_all_header in EQ. congruence.
+ + apply IHlbb in EQ. assumption.
+Qed.
+
+Inductive is_concat : bblock -> list bblock -> Prop :=
+ | mk_is_concat: forall tbb lbb, concat_all lbb = OK tbb -> is_concat tbb lbb.
+
+(** * Remainder of the verified scheduler *)
+
+Definition verify_schedule (bb bb' : bblock) : res unit :=
+ match bblock_simub bb bb' with
+ | true => OK tt
+ | false => Error (msg "PostpassScheduling.verify_schedule")
+ end.
+
+
+Definition verify_size bb lbb := if (Z.eqb (size bb) (size_blocks lbb)) then OK tt else Error (msg "PostpassScheduling:verify_size: wrong size").
+
+Lemma verify_size_size:
+ forall bb lbb, verify_size bb lbb = OK tt -> size bb = size_blocks lbb.
+Proof.
+ intros. unfold verify_size in H. destruct (size bb =? size_blocks lbb) eqn:SIZE; try discriminate.
+ apply Z.eqb_eq. assumption.
+Qed.
+
+Lemma verify_schedule_no_header:
+ forall bb bb',
+ verify_schedule (no_header bb) bb' = verify_schedule bb bb'.
+Proof.
+ intros. unfold verify_schedule. unfold bblock_simub. unfold pure_bblock_simu_test, bblock_simu_test. rewrite trans_block_noheader_inv.
+ reflexivity.
+Qed.
+
+
+Lemma stick_header_verify_schedule:
+ forall hd bb' hbb' bb,
+ stick_header hd bb' = hbb' ->
+ verify_schedule bb bb' = verify_schedule bb hbb'.
+Proof.
+ intros. unfold verify_schedule. unfold bblock_simub, pure_bblock_simu_test, bblock_simu_test.
+ rewrite <- H. rewrite trans_block_header_inv. reflexivity.
+Qed.
+
+Lemma check_size_stick_header:
+ forall bb hd,
+ check_size bb = check_size (stick_header hd bb).
+Proof.
+ intros. unfold check_size. rewrite stick_header_size. reflexivity.
+Qed.
+
+Lemma stick_header_concat2:
+ forall bb bb' hd tbb,
+ concat2 bb bb' = OK tbb ->
+ concat2 (stick_header hd bb) bb' = OK (stick_header hd tbb).
+Proof.
+ intros. monadInv H. erewrite check_size_stick_header in EQ.
+ unfold concat2. rewrite EQ. rewrite EQ1. simpl.
+ destruct bb as [hdr bdy ex COR]; destruct bb' as [hdr' bdy' ex' COR']; simpl in *.
+ destruct ex; try discriminate. destruct hdr'; try discriminate. destruct ex'.
+ - destruct c.
+ + destruct i; try discriminate; inv EQ2; unfold stick_header; simpl; reflexivity.
+ + inv EQ2. unfold stick_header; simpl. reflexivity.
+ - inv EQ2. unfold stick_header; simpl. reflexivity.
+Qed.
+
+Lemma stick_header_concat_all:
+ forall bb c tbb hd,
+ concat_all (bb :: c) = OK tbb ->
+ concat_all (stick_header hd bb :: c) = OK (stick_header hd tbb).
+Proof.
+ intros. simpl in *. destruct c; try congruence.
+ monadInv H. rewrite EQ. simpl.
+ apply stick_header_concat2. assumption.
+Qed.
+
+
+
+Definition stick_header_code (h : list label) (lbb : list bblock) :=
+ match (head lbb) with
+ | None => Error (msg "PostpassScheduling.stick_header: empty schedule")
+ | Some fst => OK ((stick_header h fst) :: tail lbb)
+ end.
+
+Lemma stick_header_code_no_header:
+ forall bb c,
+ stick_header_code (header bb) (no_header bb :: c) = OK (bb :: c).
+Proof.
+ intros. unfold stick_header_code. simpl. rewrite stick_header_no_header. reflexivity.
+Qed.
+
+Lemma hd_tl_size:
+ forall lbb bb, hd_error lbb = Some bb -> size_blocks lbb = size bb + size_blocks (tl lbb).
+Proof.
+ destruct lbb.
+ - intros. simpl in H. discriminate.
+ - intros. simpl in H. inv H. simpl. reflexivity.
+Qed.
+
+Lemma stick_header_code_size:
+ forall h lbb lbb', stick_header_code h lbb = OK lbb' -> size_blocks lbb = size_blocks lbb'.
+Proof.
+ intros. unfold stick_header_code in H. destruct (hd_error lbb) eqn:HD; try discriminate.
+ inv H. simpl. rewrite stick_header_size. erewrite hd_tl_size; eauto.
+Qed.
+
+Lemma stick_header_code_no_header_in_middle:
+ forall c h lbb,
+ stick_header_code h c = OK lbb ->
+ Forall (fun b => header b = nil) (tl c) ->
+ Forall (fun b => header b = nil) (tl lbb).
+Proof.
+ destruct c; intros.
+ - unfold stick_header_code in H. simpl in H. discriminate.
+ - unfold stick_header_code in H. simpl in H. inv H. simpl in H0.
+ simpl. assumption.
+Qed.
+
+Lemma stick_header_code_concat_all:
+ forall hd lbb hlbb tbb,
+ stick_header_code hd lbb = OK hlbb ->
+ concat_all lbb = OK tbb ->
+ exists htbb,
+ concat_all hlbb = OK htbb
+ /\ stick_header hd tbb = htbb.
+Proof.
+ intros. exists (stick_header hd tbb). split; auto.
+ destruct lbb.
+ - unfold stick_header_code in H. simpl in H. discriminate.
+ - unfold stick_header_code in H. simpl in H. inv H.
+ apply stick_header_concat_all. assumption.
+Qed.
+
+Program Definition make_bblock_from_basics lb :=
+ match lb with
+ | nil => Error (msg "PostpassScheduling.make_bblock_from_basics")
+ | b :: lb => OK {| header := nil; body := b::lb; exit := None |}
+ end.
+
+Fixpoint schedule_to_bblocks_nocontrol llb :=
+ match llb with
+ | nil => OK nil
+ | lb :: llb => do bb <- make_bblock_from_basics lb;
+ do lbb <- schedule_to_bblocks_nocontrol llb;
+ OK (bb :: lbb)
+ end.
+
+Program Definition make_bblock_from_basics_and_control lb c :=
+ match c with
+ | PExpand (Pbuiltin _ _ _) => Error (msg "PostpassScheduling.make_bblock_from_basics_and_control")
+ | PCtlFlow cf => OK {| header := nil; body := lb; exit := Some (PCtlFlow cf) |}
+ end.
+Next Obligation.
+ apply wf_bblock_refl. constructor.
+ - right. discriminate.
+ - discriminate.
+Qed.
+
+Fixpoint schedule_to_bblocks_wcontrol llb c :=
+ match llb with
+ | nil => OK ((bblock_single_inst (PControl c)) :: nil)
+ | lb :: nil => do bb <- make_bblock_from_basics_and_control lb c; OK (bb :: nil)
+ | lb :: llb => do bb <- make_bblock_from_basics lb;
+ do lbb <- schedule_to_bblocks_wcontrol llb c;
+ OK (bb :: lbb)
+ end.
+
+Definition schedule_to_bblocks (llb: list (list basic)) (oc: option control) : res (list bblock) :=
+ match oc with
+ | None => schedule_to_bblocks_nocontrol llb
+ | Some c => schedule_to_bblocks_wcontrol llb c
+ end.
+
+Definition do_schedule (bb: bblock) : res (list bblock) :=
+ if (Z.eqb (size bb) 1) then OK (bb::nil)
+ else match (schedule bb) with (llb, oc) => schedule_to_bblocks llb oc end.
+
+Definition verify_par_bblock (bb: bblock) : res unit :=
+ if (bblock_para_check bb) then OK tt else Error (msg "PostpassScheduling.verify_par_bblock").
+
+Fixpoint verify_par (lbb: list bblock) :=
+ match lbb with
+ | nil => OK tt
+ | bb :: lbb => do res <- verify_par_bblock bb; verify_par lbb
+ end.
+
+Definition verified_schedule_nob (bb : bblock) : res (list bblock) :=
+ let bb' := no_header bb in
+ let bb'' := Peephole.optimize_bblock bb' in
+ do lbb <- do_schedule bb'';
+ do tbb <- concat_all lbb;
+ do sizecheck <- verify_size bb lbb;
+ do schedcheck <- verify_schedule bb' tbb;
+ do res <- stick_header_code (header bb) lbb;
+ do parcheck <- verify_par res;
+ OK res.
+
+Lemma verified_schedule_nob_size:
+ forall bb lbb, verified_schedule_nob bb = OK lbb -> size bb = size_blocks lbb.
+Proof.
+ intros. monadInv H. erewrite <- stick_header_code_size; eauto.
+ apply verify_size_size.
+ destruct x1; try discriminate. assumption.
+Qed.
+
+Lemma verified_schedule_nob_no_header_in_middle:
+ forall lbb bb,
+ verified_schedule_nob bb = OK lbb ->
+ Forall (fun b => header b = nil) (tail lbb).
+Proof.
+ intros. monadInv H. eapply stick_header_code_no_header_in_middle; eauto.
+ eapply concat_all_no_header_in_middle. eassumption.
+Qed.
+
+Lemma verified_schedule_nob_header:
+ forall bb tbb lbb,
+ verified_schedule_nob bb = OK (tbb :: lbb) ->
+ header bb = header tbb
+ /\ Forall (fun b => header b = nil) lbb.
+Proof.
+ intros. split.
+ - monadInv H. unfold stick_header_code in EQ3. destruct (hd_error _); try discriminate. inv EQ3.
+ simpl. reflexivity.
+ - apply verified_schedule_nob_no_header_in_middle in H. assumption.
+Qed.
+
+
+Definition verified_schedule (bb : bblock) : res (list bblock) :=
+ match exit bb with
+ | Some (PExpand (Pbuiltin ef args res)) => OK (bb::nil) (* Special case for ensuring the lemma verified_schedule_builtin_idem *)
+ | _ => verified_schedule_nob bb
+ end.
+
+Lemma verified_schedule_size:
+ forall bb lbb, verified_schedule bb = OK lbb -> size bb = size_blocks lbb.
+Proof.
+ intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.
+ all: try (apply verified_schedule_nob_size; auto; fail).
+ inv H. simpl. lia.
+Qed.
+
+Lemma verified_schedule_no_header_in_middle:
+ forall lbb bb,
+ verified_schedule bb = OK lbb ->
+ Forall (fun b => header b = nil) (tail lbb).
+Proof.
+ intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.
+ all: try (eapply verified_schedule_nob_no_header_in_middle; eauto; fail).
+ inv H. simpl. auto.
+Qed.
+
+Lemma verified_schedule_header:
+ forall bb tbb lbb,
+ verified_schedule bb = OK (tbb :: lbb) ->
+ header bb = header tbb
+ /\ Forall (fun b => header b = nil) lbb.
+Proof.
+ intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.
+ all: try (eapply verified_schedule_nob_header; eauto; fail).
+ inv H. split; simpl; auto.
+Qed.
+
+
+Lemma verified_schedule_nob_correct:
+ forall ge f bb lbb,
+ verified_schedule_nob bb = OK lbb ->
+ exists tbb,
+ is_concat tbb lbb
+ /\ bblock_simu ge f bb tbb.
+Proof.
+ intros. monadInv H.
+ exploit stick_header_code_concat_all; eauto.
+ intros (tbb & CONC & STH).
+ exists tbb. split; auto. constructor; auto.
+ rewrite verify_schedule_no_header in EQ2. erewrite stick_header_verify_schedule in EQ2; eauto.
+ eapply bblock_simub_correct; eauto. unfold verify_schedule in EQ2.
+ destruct (bblock_simub _ _); auto; try discriminate.
+Qed.
+
+Theorem verified_schedule_correct:
+ forall ge f bb lbb,
+ verified_schedule bb = OK lbb ->
+ exists tbb,
+ is_concat tbb lbb
+ /\ bblock_simu ge f bb tbb.
+Proof.
+ intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.
+ all: try (eapply verified_schedule_nob_correct; eauto; fail).
+ inv H. eexists. split; simpl; auto. constructor; auto. simpl; auto. constructor; auto.
+Qed.
+
+Lemma verified_schedule_builtin_idem:
+ forall bb ef args res lbb,
+ exit bb = Some (PExpand (Pbuiltin ef args res)) ->
+ verified_schedule bb = OK lbb ->
+ lbb = bb :: nil.
+Proof.
+ intros. unfold verified_schedule in H0. rewrite H in H0. inv H0. reflexivity.
+Qed.
+
+
+Fixpoint transf_blocks (lbb : list bblock) : res (list bblock) :=
+ match lbb with
+ | nil => OK nil
+ | (cons bb lbb) =>
+ do tlbb <- transf_blocks lbb;
+ do tbb <- verified_schedule bb;
+ OK (tbb ++ tlbb)
+ end.
+
+Definition transl_function (f: function) : res function :=
+ do lb <- transf_blocks (fn_blocks f);
+ OK (mkfunction (fn_sig f) lb).
+
+Definition transf_function (f: function) : res function :=
+ do tf <- transl_function f;
+ if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks))
+ then Error (msg "code size exceeded")
+ else OK tf.
+
+Definition transf_fundef (f: fundef) : res fundef :=
+ transf_partial_fundef transf_function f.
+
+Definition transf_program (p: program) : res program :=
+ transform_partial_program transf_fundef p.
diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml
new file mode 100644
index 00000000..3f4520a6
--- /dev/null
+++ b/kvx/PostpassSchedulingOracle.ml
@@ -0,0 +1,1036 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open Asmvliw
+open Asmblock
+open Printf
+open Camlcoq
+open InstructionScheduler
+open TargetPrinter.Target
+
+let debug = false
+
+(**
+ * Extracting infos from Asmvliw instructions
+ *)
+
+type immediate = I32 of Integers.Int.int | I64 of Integers.Int64.int | Off of offset
+
+type location = Reg of preg | Mem
+
+type real_instruction =
+ (* ALU *)
+ | Addw | Andw | Compw | Mulw | Orw | Sbfw | Sbfxw | Sraw | Srlw | Sllw | Srsw | Rorw | Xorw
+ | Addd | Andd | Compd | Muld | Ord | Sbfd | Sbfxd | Srad | Srld | Slld | Srsd | Xord
+ | Nandw | Norw | Nxorw | Nandd | Nord | Nxord | Andnw | Ornw | Andnd | Ornd
+ | Maddw | Maddd | Msbfw | Msbfd | Cmoved
+ | Make | Nop | Extfz | Extfs | Insf
+ | Addxw | Addxd
+ (* LSU *)
+ | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo
+ | Sb | Sh | Sw | Sd | Sq | So
+ (* BCU *)
+ | Icall | Call | Cb | Igoto | Goto | Ret | Get | Set
+ (* FPU *)
+ | Fabsd | Fabsw | Fnegw | Fnegd
+ | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw
+ | Fmind | Fminw | Fmaxd | Fmaxw | Finvw
+ | Ffmaw | Ffmad | Ffmsw | Ffmsd
+ | Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz | Fixedwz | Fixeduwz | Fixeddz | Fixedudz
+ | Fcompw | Fcompd
+
+type ab_inst_rec = {
+ inst: real_instruction;
+ write_locs : location list;
+ read_locs : location list;
+ read_at_id : location list; (* Must be contained in read_locs *)
+ read_at_e1 : location list; (* idem *)
+ imm : immediate option;
+ is_control : bool;
+}
+
+(** Asmvliw constructor to real instructions *)
+
+exception OpaqueInstruction
+
+let arith_rr_real = function
+ | Pcvtl2w -> Addw
+ | Pmv -> Addd
+ | Pnegw -> Sbfw
+ | Pnegl -> Sbfd
+ | Psxwd -> Extfs
+ | Pzxwd -> Extfz
+ | Pextfz(_,_) -> Extfz
+ | Pextfs(_,_) -> Extfs
+ | Pextfzl(_,_) -> Extfz
+ | Pextfsl(_,_) -> Extfs
+ | Pfabsw -> Fabsw
+ | Pfabsd -> Fabsd
+ | Pfnegw -> Fnegw
+ | Pfnegd -> Fnegd
+ | Pfinvw -> Finvw
+ | Pfnarrowdw -> Fnarrowdw
+ | Pfwidenlwd -> Fwidenlwd
+ | Pfloatwrnsz -> Floatwz
+ | Pfloatuwrnsz -> Floatuwz
+ | Pfloatudrnsz -> Floatudz
+ | Pfloatdrnsz -> Floatdz
+ | Pfixedwrzz -> Fixedwz
+ | Pfixeduwrzz -> Fixeduwz
+ | Pfixeddrzz -> Fixeddz
+ | Pfixedudrzz -> Fixedudz
+ | Pfixeddrzz_i32 -> Fixeddz
+ | Pfixedudrzz_i32 -> Fixedudz
+
+let arith_rrr_real = function
+ | Pcompw it -> Compw
+ | Pcompl it -> Compd
+ | Pfcompw ft -> Fcompw
+ | Pfcompl ft -> Fcompd
+ | Paddw -> Addw
+ | Paddxw _ -> Addxw
+ | Psubw -> Sbfw
+ | Prevsubxw _ -> Sbfxw
+ | Pmulw -> Mulw
+ | Pandw -> Andw
+ | Pnandw -> Nandw
+ | Porw -> Orw
+ | Pnorw -> Norw
+ | Pxorw -> Xorw
+ | Pnxorw -> Nxorw
+ | Pandnw -> Andnw
+ | Pornw -> Ornw
+ | Psraw -> Sraw
+ | Psrlw -> Srlw
+ | Psrxw -> Srsw
+ | Psllw -> Sllw
+ | Paddl -> Addd
+ | Paddxl _ -> Addxd
+ | Psubl -> Sbfd
+ | Prevsubxl _ -> Sbfxd
+ | Pandl -> Andd
+ | Pnandl -> Nandd
+ | Porl -> Ord
+ | Pnorl -> Nord
+ | Pxorl -> Xord
+ | Pnxorl -> Nxord
+ | Pandnl -> Andnd
+ | Pornl -> Ornd
+ | Pmull -> Muld
+ | Pslll -> Slld
+ | Psrll -> Srld
+ | Psrxl -> Srsd
+ | Psral -> Srad
+ | Pfaddd -> Faddd
+ | Pfaddw -> Faddw
+ | Pfsbfd -> Fsbfd
+ | Pfsbfw -> Fsbfw
+ | Pfmuld -> Fmuld
+ | Pfmulw -> Fmulw
+ | Pfmind -> Fmind
+ | Pfminw -> Fminw
+ | Pfmaxd -> Fmaxd
+ | Pfmaxw -> Fmaxw
+
+let arith_rri32_real = function
+ | Pcompiw it -> Compw
+ | Paddiw -> Addw
+ | Paddxiw _ -> Addxw
+ | Prevsubiw -> Sbfw
+ | Prevsubxiw _ -> Sbfxw
+ | Pmuliw -> Mulw
+ | Pandiw -> Andw
+ | Pnandiw -> Nandw
+ | Poriw -> Orw
+ | Pnoriw -> Norw
+ | Pxoriw -> Xorw
+ | Pnxoriw -> Nxorw
+ | Pandniw -> Andnw
+ | Porniw -> Ornw
+ | Psraiw -> Sraw
+ | Psrxiw -> Srsw
+ | Psrliw -> Srlw
+ | Pslliw -> Sllw
+ | Proriw -> Rorw
+ | Psllil -> Slld
+ | Psrlil -> Srld
+ | Psrail -> Srad
+ | Psrxil -> Srsd
+
+let arith_rri64_real = function
+ | Pcompil it -> Compd
+ | Paddil -> Addd
+ | Prevsubil -> Sbfd
+ | Paddxil _ -> Addxd
+ | Prevsubxil _ -> Sbfxd
+ | Pmulil -> Muld
+ | Pandil -> Andd
+ | Pnandil -> Nandd
+ | Poril -> Ord
+ | Pnoril -> Nord
+ | Pxoril -> Xord
+ | Pnxoril -> Nxord
+ | Pandnil -> Andnd
+ | Pornil -> Ornd
+
+
+let arith_arr_real = function
+ | Pinsf (_, _) -> Insf
+ | Pinsfl (_, _) -> Insf
+
+let arith_arrr_real = function
+ | Pfmaddfw -> Ffmaw
+ | Pfmaddfl -> Ffmad
+ | Pfmsubfw -> Ffmsw
+ | Pfmsubfl -> Ffmsd
+ | Pmaddw -> Maddw
+ | Pmaddl -> Maddd
+ | Pmsubw -> Msbfw
+ | Pmsubl -> Msbfd
+ | Pcmove _ -> Cmoved
+ | Pcmoveu _ -> Cmoved
+
+let arith_arri32_real = function
+ | Pmaddiw -> Maddw
+ | Pcmoveiw _ -> Cmoved
+ | Pcmoveuiw _ -> Cmoved
+
+let arith_arri64_real = function
+ | Pmaddil -> Maddd
+ | Pcmoveil _ -> Cmoved
+ | Pcmoveuil _ -> Cmoved
+
+let arith_ri32_real = Make
+
+let arith_ri64_real = Make
+
+let arith_rf32_real = Make
+
+let arith_rf64_real = Make
+
+let store_real = function
+ | Psb -> Sb
+ | Psh -> Sh
+ | Psw -> Sw
+ | Psw_a -> Sw
+ | Psd -> Sd
+ | Psd_a -> Sd
+ | Pfss -> Sw
+ | Pfsd -> Sd
+
+let load_real = function
+ | Plb -> Lbs
+ | Plbu -> Lbz
+ | Plh -> Lhs
+ | Plhu -> Lhz
+ | Plw -> Lws
+ | Plw_a -> Lws
+ | Pld -> Ld
+ | Pld_a -> Ld
+ | Pfls -> Lws
+ | Pfld -> Ld
+
+let set_real = Set
+let get_real = Get
+let nop_real = Nop
+let loadsymbol_real = Make
+let loadqrro_real = Lq
+let loadorro_real = Lo
+let storeqrro_real = Sq
+let storeorro_real = So
+
+let ret_real = Ret
+let call_real = Call
+let icall_real = Icall
+let goto_real = Goto
+let igoto_real = Igoto
+let jl_real = Goto
+let cb_real = Cb
+let cbu_real = Cb
+
+let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_arri32_rec i rd rs imm32 =
+ let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> []
+ in { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false;
+ read_at_id = [] ; read_at_e1 = rae1 }
+
+let arith_arri64_rec i rd rs imm64 =
+ let rae1 = match i with Pmaddil -> [Reg rd] | _ -> []
+ in { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false;
+ read_at_id = []; read_at_e1 = rae1 }
+
+let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_arrr_rec i rd rs1 rs2 =
+ let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> []
+ in { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = rae1 }
+
+let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_r_rec i rd = match i with
+ (* For Ploadsymbol, writing the highest integer since we do not know how many bits does a symbol have *)
+ | Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed);
+ is_control = false; read_at_id = []; read_at_e1 = [] }
+
+let arith_rec i =
+ match i with
+ | PArithRRI32 (i, rd, rs, imm32) -> arith_rri32_rec i (IR rd) (IR rs) (Some (I32 imm32))
+ | PArithRRI64 (i, rd, rs, imm64) -> arith_rri64_rec i (IR rd) (IR rs) (Some (I64 imm64))
+ | PArithRRR (i, rd, rs1, rs2) -> arith_rrr_rec i (IR rd) (IR rs1) (IR rs2)
+ | PArithARR (i, rd, rs) -> arith_arr_rec i (IR rd) (IR rs)
+ (* Seems like single constant constructor types are elided *)
+ | PArithARRI32 (i, rd, rs, imm32) -> arith_arri32_rec i (IR rd) (IR rs) (Some (I32 imm32))
+ | PArithARRI64 (i, rd, rs, imm64) -> arith_arri64_rec i (IR rd) (IR rs) (Some (I64 imm64))
+ | PArithARRR (i, rd, rs1, rs2) -> arith_arrr_rec i (IR rd) (IR rs1) (IR rs2)
+ | PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+ | PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+ | PArithRF32 (rd, f) -> { inst = arith_rf32_real; write_locs = [Reg (IR rd)]; read_locs = [];
+ imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
+ | PArithRF64 (rd, f) -> { inst = arith_rf64_real; write_locs = [Reg (IR rd)]; read_locs = [];
+ imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
+ | PArithRR (i, rd, rs) -> arith_rr_rec i (IR rd) (IR rs)
+ | PArithR (i, rd) -> arith_r_rec i (IR rd)
+
+let load_rec i = match i with
+ | PLoadRRO (trap, i, rs1, rs2, imm) ->
+ { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+ | PLoadQRRO(rs, ra, imm) ->
+ let (rs0, rs1) = gpreg_q_expand rs in
+ { inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+ | PLoadORRO(rs, ra, imm) ->
+ let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
+ { inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)];
+ imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []}
+ | PLoadRRR (trap, i, rs1, rs2, rs3) | PLoadRRRXS (trap, i, rs1, rs2, rs3) ->
+ { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let store_rec i = match i with
+ | PStoreRRO (i, rs, ra, imm) ->
+ { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm));
+ read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false}
+ | PStoreQRRO (rs, ra, imm) ->
+ let (rs0, rs1) = gpreg_q_expand rs in
+ { inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm));
+ read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false}
+ | PStoreORRO (rs, ra, imm) ->
+ let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
+ { inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)];
+ imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false}
+ | PStoreRRR (i, rs, ra1, ra2) | PStoreRRRXS (i, rs, ra1, ra2) ->
+ { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None;
+ read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false}
+
+let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false;
+ read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
+
+let basic_rec i =
+ match i with
+ | PArith i -> arith_rec i
+ | PLoad i -> load_rec i
+ | PStore i -> store_rec i
+ | Pallocframe (_, _) -> raise OpaqueInstruction
+ | Pfreeframe (_, _) -> raise OpaqueInstruction
+ | Pget (rd, rs) -> get_rec rd rs
+ | Pset (rd, rs) -> set_rec rd rs
+ | Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []}
+
+let expand_rec = function
+ | Pbuiltin _ -> raise OpaqueInstruction
+
+let ctl_flow_rec = function
+ | Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true; read_at_id = [Reg RA]; read_at_e1 = []}
+ | Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+ | Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true;
+ read_at_id = [Reg (IR r)]; read_at_e1 = [] }
+ | Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+ | Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true;
+ read_at_id = [Reg (IR r)]; read_at_e1 = [] }
+ | Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+ | Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
+ read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
+ | Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
+ read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
+ | Pjumptable (r, _) -> raise OpaqueInstruction (* { inst = "Pjumptable"; write_locs = [Reg (IR GPR62); Reg (IR GPR63)]; read_locs = [Reg (IR r)]; imm = None ; is_control = true} *)
+
+let control_rec i =
+ match i with
+ | PExpand i -> expand_rec i
+ | PCtlFlow i -> ctl_flow_rec i
+
+let rec basic_recs body = match body with
+ | [] -> []
+ | bi :: body -> (basic_rec bi) :: (basic_recs body)
+
+let exit_rec exit = match exit with
+ | None -> []
+ | Some ex -> [control_rec ex]
+
+let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit)
+
+(**
+ * Providing informations relative to the real instructions
+ *)
+
+(** Abstraction providing all the necessary informations for solving the scheduling problem *)
+type inst_info = {
+ write_locs : location list;
+ read_locs : location list;
+ reads_at_id : bool;
+ reads_at_e1 : bool;
+ is_control : bool;
+ usage: int array; (* resources consumed by the instruction *)
+ latency: int;
+}
+
+(** Figuring out whether an immediate is s10, u27l10 or e27u27l10 *)
+type imm_encoding = U6 | S10 | U27L5 | U27L10 | E27U27L10
+
+let rec pow a = function
+ | 0 -> Int64.one
+ | 1 -> Int64.of_int a
+ | n -> let b = pow a (n/2) in
+ Int64.mul b (Int64.mul b (if n mod 2 = 0 then Int64.one else Int64.of_int a))
+
+let signed_interval n : (int64 * int64) = begin
+ assert (n > 0);
+ let min = Int64.neg @@ pow 2 (n-1)
+ and max = Int64.sub (pow 2 (n-1)) Int64.one
+ in (min, max)
+end
+
+let within i interv = match interv with (min, max) -> (i >= min && i <= max)
+
+let signed_length (i:int64) =
+ let rec f (i:int64) n =
+ let interv = signed_interval n
+ in if (within i interv) then n else f i (n+1)
+ in f i 1
+
+let unsigned_length (i:int64) = (signed_length i) - 1
+
+let encode_imm (imm:int64) =
+ if (Int64.compare imm Int64.zero < 0) then
+ let length = signed_length imm
+ in if length <= 10 then S10
+ else if length <= 32 then U27L5
+ else if length <= 37 then U27L10
+ else if length <= 64 then E27U27L10
+ else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm
+ else
+ let length = unsigned_length imm
+ in if length <= 6 then U6
+ else if length <= 9 then S10 (* Special case for S10 - stay signed no matter what *)
+ else if length <= 32 then U27L5
+ else if length <= 37 then U27L10
+ else if length <= 64 then E27U27L10
+ else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm
+
+(** Resources *)
+type rname = Rissue | Rtiny | Rlite | Rfull | Rlsu | Rmau | Rbcu | Rtca | Rauxr | Rauxw | Rcrrp | Rcrwl | Rcrwh | Rnop
+
+let resource_names = [Rissue; Rtiny; Rlite; Rfull; Rlsu; Rmau; Rbcu; Rtca; Rauxr; Rauxw; Rcrrp; Rcrwl; Rcrwh; Rnop]
+
+let rec find_index elt l =
+ match l with
+ | [] -> raise Not_found
+ | e::l -> if (e == elt) then 0
+ else 1 + find_index elt l
+
+let resource_id resource : int = find_index resource resource_names
+
+let resource_bound resource : int =
+ match resource with
+ | Rissue -> 8
+ | Rtiny -> 4
+ | Rlite -> 2
+ | Rfull -> 1
+ | Rlsu -> 1
+ | Rmau -> 1
+ | Rbcu -> 1
+ | Rtca -> 1
+ | Rauxr -> 1
+ | Rauxw -> 1
+ | Rcrrp -> 1
+ | Rcrwl -> 1
+ | Rcrwh -> 1
+ | Rnop -> 4
+
+let resource_bounds : int array = Array.of_list (List.map resource_bound resource_names)
+
+(** Reservation tables *)
+let alu_full : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | Rfull -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_lite : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_lite_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rlite -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_lite_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rlite -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_nop : int array = let resmap = fun r -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_tiny : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_tiny_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_tiny_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let bcu : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rbcu -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let bcu_tiny_tiny_mau_xnop : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 2 | Rmau -> 1 | Rbcu -> 1 | Rnop -> 4 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxr : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxr_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxr_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxw : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxw_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxw_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_auxr : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_auxr_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_auxr_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+(** Real instructions *)
+
+exception InvalidEncoding
+
+let rec_to_usage r =
+ let encoding = match r.imm with None -> None | Some (I32 i) | Some (I64 i) -> Some (encode_imm @@ Z.to_int64 i)
+ | Some (Off ptr) -> Some (encode_imm @@ camlint64_of_ptrofs ptr)
+
+ in match r.inst with
+ | Addw | Andw | Nandw | Orw | Norw | Sbfw | Xorw
+ | Nxorw | Andnw | Ornw ->
+ (match encoding with None | Some U6 | Some S10 -> alu_tiny
+ | Some U27L5 | Some U27L10 -> alu_tiny_x
+ | _ -> raise InvalidEncoding)
+ | Sbfxw | Sbfxd ->
+ (match encoding with None -> alu_lite
+ | Some U6 | Some S10 | Some U27L5 -> alu_lite_x
+ | _ -> raise InvalidEncoding)
+ | Addd | Andd | Nandd | Ord | Nord | Sbfd | Xord
+ | Nxord | Andnd | Ornd ->
+ (match encoding with None | Some U6 | Some S10 -> alu_tiny
+ | Some U27L5 | Some U27L10 -> alu_tiny_x
+ | Some E27U27L10 -> alu_tiny_y)
+ |Cmoved ->
+ (match encoding with None | Some U6 | Some S10 -> alu_lite
+ | Some U27L5 | Some U27L10 -> alu_lite_x
+ | Some E27U27L10 -> alu_lite_y)
+ | Addxw ->
+ (match encoding with None | Some U6 | Some S10 -> alu_lite
+ | Some U27L5 | Some U27L10 -> alu_lite_x
+ | _ -> raise InvalidEncoding)
+ | Addxd ->
+ (match encoding with None | Some U6 | Some S10 -> alu_lite
+ | Some U27L5 | Some U27L10 -> alu_lite_x
+ | Some E27U27L10 -> alu_lite_y)
+ | Compw -> (match encoding with None -> alu_tiny
+ | Some U6 | Some S10 | Some U27L5 -> alu_tiny_x
+ | _ -> raise InvalidEncoding)
+ | Compd -> (match encoding with None | Some U6 | Some S10 -> alu_tiny
+ | Some U27L5 | Some U27L10 -> alu_tiny_x
+ | Some E27U27L10 -> alu_tiny_y)
+ | Fcompw -> (match encoding with None -> alu_lite
+ | Some U6 | Some S10 | Some U27L5 -> alu_lite_x
+ | _ -> raise InvalidEncoding)
+ | Fcompd -> (match encoding with None -> alu_lite
+ | Some U6 | Some S10 | Some U27L5 -> alu_lite_x
+ | _ -> raise InvalidEncoding)
+ | Make -> (match encoding with Some U6 | Some S10 -> alu_tiny
+ | Some U27L5 | Some U27L10 -> alu_tiny_x
+ | Some E27U27L10 -> alu_tiny_y
+ | _ -> raise InvalidEncoding)
+ | Maddw | Msbfw -> (match encoding with None -> mau_auxr
+ | Some U6 | Some S10 | Some U27L5 -> mau_auxr_x
+ | _ -> raise InvalidEncoding)
+ | Maddd | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau_auxr
+ | Some U27L5 | Some U27L10 -> mau_auxr_x
+ | Some E27U27L10 -> mau_auxr_y)
+ | Mulw -> (match encoding with None -> mau
+ | Some U6 | Some S10 | Some U27L5 -> mau_x
+ | _ -> raise InvalidEncoding)
+ | Muld -> (match encoding with None | Some U6 | Some S10 -> mau
+ | Some U27L5 | Some U27L10 -> mau_x
+ | Some E27U27L10 -> mau_y)
+ | Nop -> alu_nop
+ | Sraw | Srlw | Sllw | Srad | Srld | Slld -> (match encoding with None | Some U6 -> alu_tiny | _ -> raise InvalidEncoding)
+ (* TODO: check *)
+ | Srsw | Srsd | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding)
+ | Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding)
+ | Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau
+ | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo ->
+ (match encoding with None | Some U6 | Some S10 -> lsu_auxw
+ | Some U27L5 | Some U27L10 -> lsu_auxw_x
+ | Some E27U27L10 -> lsu_auxw_y)
+ | Sb | Sh | Sw | Sd | Sq | So ->
+ (match encoding with None | Some U6 | Some S10 -> lsu_auxr
+ | Some U27L5 | Some U27L10 -> lsu_auxr_x
+ | Some E27U27L10 -> lsu_auxr_y)
+ | Icall | Call | Cb | Igoto | Goto | Ret | Set -> bcu
+ | Get -> bcu_tiny_tiny_mau_xnop
+ | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd
+ | Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite
+ | Fnarrowdw -> alu_full
+ | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw
+ | Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau
+
+
+let inst_info_to_dlatency i =
+ begin
+ assert (not (i.reads_at_id && i.reads_at_e1));
+ match i.reads_at_id with
+ | true -> +1
+ | false -> (match i.reads_at_e1 with
+ | true -> -1
+ | false -> 0)
+ end
+
+let real_inst_to_latency = function
+ | Nop -> 0 (* Only goes through ID *)
+ | Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw
+ (* TODO check rorw *)
+ | Rorw | Nandw | Norw | Nxorw | Ornw | Andnw
+ | Nandd | Nord | Nxord | Ornd | Andnd
+ | Addd | Andd | Compd | Ord | Sbfd | Sbfxd | Srad | Srsd | Srld | Slld | Xord | Make
+ | Extfs | Extfz | Insf | Fcompw | Fcompd | Cmoved | Addxw | Addxd
+ | Fmind | Fmaxd | Fminw | Fmaxw
+ -> 1
+ | Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4
+ | Mulw | Muld | Maddw | Maddd | Msbfw | Msbfd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *)
+ | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 3
+ | Sb | Sh | Sw | Sd | Sq | So -> 1 (* See kvx-Optimization.pdf page 19 *)
+ | Get -> 1
+ | Set -> 4 (* According to the manual should be 3, but I measured 4 *)
+ | Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *)
+ | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1
+ | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw
+ | Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4
+ | Finvw -> 15
+
+let rec empty_inter la = function
+ | [] -> true
+ | b::lb -> if (List.mem b la) then false else empty_inter la lb
+
+let rec_to_info r : inst_info =
+ let usage = rec_to_usage r
+ and latency = real_inst_to_latency r.inst
+ and reads_at_id = not (empty_inter r.read_locs r.read_at_id)
+ and reads_at_e1 = not (empty_inter r.read_locs r.read_at_e1)
+ in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control;
+ reads_at_id = reads_at_id; reads_at_e1 = reads_at_e1 }
+
+let instruction_infos bb = List.map rec_to_info (instruction_recs bb)
+
+let instruction_usages bb =
+ let usages = List.map (fun info -> info.usage) (instruction_infos bb)
+ in Array.of_list usages
+
+(**
+ * Latency constraints building
+ *)
+
+(* type access = { inst: int; loc: location } *)
+
+let preg2int pr = Camlcoq.P.to_int @@ Asmblockdeps.ppos pr
+
+let loc2int = function
+ | Mem -> 1
+ | Reg pr -> preg2int pr
+
+(* module HashedLoc = struct
+ type t = { loc: location; key: int }
+ let equal l1 l2 = (l1.key = l2.key)
+ let hash l = l.key
+ let create (l:location) : t = { loc=l; key = loc2int l }
+end *)
+
+(* module LocHash = Hashtbl.Make(HashedLoc) *)
+module LocHash = Hashtbl
+
+(* Hash table : location => list of instruction ids *)
+
+let rec intlist n =
+ if n < 0 then failwith "intlist: n < 0"
+ else if n = 0 then []
+ else (n-1) :: (intlist (n-1))
+
+let find_in_hash hashloc loc =
+ match LocHash.find_opt hashloc loc with
+ | Some idl -> idl
+ | None -> []
+
+(* Returns a list of instruction ids *)
+let rec get_accesses hashloc (ll: location list) = match ll with
+ | [] -> []
+ | loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs)
+
+let compute_latency (ifrom: inst_info) (ito: inst_info) =
+ let dlat = inst_info_to_dlatency ito
+ in let lat = ifrom.latency + dlat
+ in assert (lat >= 0); if (lat == 0) then 1 else lat
+
+let latency_constraints bb =
+ let written = LocHash.create 70
+ and read = LocHash.create 70
+ and count = ref 0
+ and constraints = ref []
+ and instr_infos = instruction_infos bb
+ in let step (i: inst_info) =
+ let raw = get_accesses written i.read_locs
+ and waw = get_accesses written i.write_locs
+ and war = get_accesses read i.write_locs
+ in begin
+ List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
+ latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) raw;
+ List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
+ latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) waw;
+ List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = 0} :: !constraints) war;
+ if i.is_control then List.iter (fun n -> constraints := {instr_from = n; instr_to = !count; latency = 0} :: !constraints) (intlist !count);
+ (* Updating "read" and "written" hashmaps *)
+ List.iter (fun loc ->
+ begin
+ LocHash.replace written loc [!count];
+ LocHash.replace read loc []; (* Clearing all the entries of "read" hashmap when a register is written *)
+ end) i.write_locs;
+ List.iter (fun loc -> LocHash.replace read loc ((!count) :: (find_in_hash read loc))) i.read_locs;
+ count := !count + 1
+ end
+ in (List.iter step instr_infos; !constraints)
+
+(**
+ * Using the InstructionScheduler
+ *)
+
+let build_problem bb =
+{ max_latency = -1;
+ resource_bounds = resource_bounds;
+ instruction_usages = instruction_usages bb;
+ latency_constraints = latency_constraints bb;
+ live_regs_entry = Registers.Regset.empty; (* unused here *)
+ typing = (fun x -> AST.Tint); (* unused here *)
+ reference_counting = None
+}
+
+let rec find_min_opt (l: int option list) =
+ match l with
+ | [] -> None
+ | e :: l ->
+ begin match find_min_opt l with
+ | None -> e
+ | Some m ->
+ begin match e with
+ | None -> Some m
+ | Some n -> if n < m then Some n else Some m
+ end
+ end
+
+let rec filter_indexes predicate = function
+ | [] -> []
+ | e :: l -> if (predicate e) then e :: (filter_indexes predicate l) else filter_indexes predicate l
+
+let get_from_indexes indexes l = List.map (List.nth l) indexes
+
+let is_basic = function PBasic _ -> true | _ -> false
+let is_control = function PControl _ -> true | _ -> false
+let to_basic = function PBasic i -> i | _ -> failwith "to_basic: control instruction found"
+let to_control = function PControl i -> i | _ -> failwith "to_control: basic instruction found"
+
+let bundlize li hd =
+ let last = List.nth li (List.length li - 1)
+ in if is_control last then
+ let cut_li = Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1)
+ in let bli = List.map to_basic cut_li
+ in { header = hd; body = bli; exit = Some (to_control last) }
+ else
+ let bli = List.map to_basic li
+ in { header = hd; body = bli; exit = None }
+
+let apply_pbasic b = PBasic b
+let extract_some o = match o with Some e -> e | None -> failwith "extract_some: None found"
+
+let rec find_min = function
+ | [] -> None
+ | e :: l ->
+ match find_min l with
+ | None -> Some e
+ | Some m -> if (e < m) then Some e else Some m
+
+let rec remove_all m = function
+ | [] -> []
+ | e :: l -> if m=e then remove_all m l
+ else e :: (remove_all m l)
+
+let rec find_mins l = match find_min l with
+ | None -> []
+ | Some m -> m :: find_mins (remove_all m l)
+
+let find_all_indices m l =
+ let rec find m off = function
+ | [] -> []
+ | e :: l -> if m=e then off :: find m (off+1) l
+ else find m (off+1) l
+ in find m 0 l
+
+module TimeHash = Hashtbl
+
+(* Hash table : time => list of instruction ids *)
+
+let hashtbl2list h maxint =
+ let rec f i = match TimeHash.find_opt h i with
+ | None -> if (i > maxint) then [] else (f (i+1))
+ | Some bund -> bund :: (f (i+1))
+ in f 0
+
+let find_max l =
+ let rec f = function
+ | [] -> None
+ | e :: l -> match f l with
+ | None -> Some e
+ | Some m -> if (e > m) then Some e else Some m
+ in match (f l) with
+ | None -> raise Not_found
+ | Some m -> m
+
+(* [0, 2, 3, 1, 1, 2, 4, 5] -> [[0], [3, 4], [1, 5], [2], [6], [7]] *)
+let minpack_list (l: int list) =
+ let timehash = TimeHash.create (List.length l)
+ in let rec f i = function
+ | [] -> ()
+ | t::l -> begin
+ (match TimeHash.find_opt timehash t with
+ | None -> TimeHash.add timehash t [i]
+ | Some bund -> TimeHash.replace timehash t (bund @ [i]));
+ f (i+1) l
+ end
+ in begin
+ f 0 l;
+ hashtbl2list timehash (find_max l)
+ end;;
+
+(* let minpack_list l =
+ let mins = find_mins l
+ in List.map (fun m -> find_all_indices m l) mins
+ *)
+
+let bb_to_instrs bb = (List.map apply_pbasic bb.body) @ (match bb.exit with None -> [] | Some e -> [PControl e])
+
+let bundlize_solution bb sol =
+ let tmp = (Array.to_list @@ Array.sub sol 0 (Array.length sol - 1))
+ in let packs = minpack_list tmp
+ and instrs = bb_to_instrs bb
+ in let rec bund hd = function
+ | [] -> []
+ | pack :: packs -> bundlize (get_from_indexes pack instrs) hd :: (bund [] packs)
+ in bund bb.header packs
+
+let print_inst oc = function
+ | Asm.Pallocframe(sz, ofs) -> fprintf oc " Pallocframe\n"
+ | Asm.Pfreeframe(sz, ofs) -> fprintf oc " Pfreeframe\n"
+ | Asm.Pbuiltin(ef, args, res) -> fprintf oc " Pbuiltin\n"
+ | Asm.Pcvtl2w(rd, rs) -> fprintf oc " Pcvtl2w %a = %a\n" ireg rd ireg rs
+ | i -> print_instruction oc i
+
+let print_bb oc bb =
+ let asm_instructions = Asm.unfold_bblock bb
+ in List.iter (print_inst oc) asm_instructions
+
+let print_schedule sched =
+ print_string "[ ";
+ Array.iter (fun x -> Printf.printf "%d; " x) sched;
+ print_endline "]";;
+
+let do_schedule bb =
+ let problem = build_problem bb in
+ (if debug then print_problem stdout problem);
+ let solution = scheduler_by_name (!Clflags.option_fpostpass_sched) problem
+ in match solution with
+ | None -> failwith "Could not find a valid schedule"
+ | Some sol ->
+ ((if debug then print_schedule sol);
+ let bundles = bundlize_solution bb sol in
+ (if debug then
+ begin
+ Printf.eprintf "Scheduling the following group of instructions:\n";
+ print_bb stderr bb;
+ Printf.eprintf "Gave the following solution:\n";
+ List.iter (print_bb stderr) bundles;
+ Printf.eprintf "--------------------------------\n"
+ end;
+ bundles))
+
+(**
+ * Dumb schedule if the above doesn't work
+ *)
+
+let bundlize_label l =
+ match l with
+ | [] -> []
+ | l -> [{ header = l; body = []; exit = None }]
+
+let rec bundlize_basic l =
+ match l with
+ | [] -> []
+ | b :: l -> { header = []; body = [b]; exit = None } :: bundlize_basic l
+
+let bundlize_exit e =
+ match e with
+ | Some e -> [{ header = []; body = []; exit = Some e }]
+ | None -> []
+
+let dumb_schedule (bb : bblock) : bblock list = bundlize_label bb.header @ bundlize_basic bb.body @ bundlize_exit bb.exit
+
+(**
+ * Separates the opaque instructions such as Pfreeframe and Pallocframe
+ *)
+
+let is_opaque = function
+ | PBasic (Pallocframe _) | PBasic (Pfreeframe _) | PControl (PExpand (Pbuiltin _)) -> true
+ | _ -> false
+
+(* Returns : (accumulated instructions, remaining instructions, opaque instruction if found) *)
+let rec biggest_wo_opaque = function
+ | [] -> ([], [], None)
+ | i :: li -> if is_opaque i then ([], li, Some i)
+ else let big, rem, opaque = biggest_wo_opaque li in (i :: big, rem, opaque);;
+
+let separate_opaque bb =
+ let instrs = bb_to_instrs bb
+ in let rec f hd li =
+ match li with
+ | [] -> []
+ | li -> let big, rem, opaque = biggest_wo_opaque li in
+ match opaque with
+ | Some i ->
+ (match big with
+ | [] -> (bundlize [i] hd) :: (f [] rem)
+ | big -> (bundlize big hd) :: (bundlize [i] []) :: (f [] rem)
+ )
+ | None -> (bundlize big hd) :: (f [] rem)
+ in f bb.header instrs
+
+let smart_schedule bb =
+ let lbb = separate_opaque bb
+ in let rec f = function
+ | [] -> []
+ | bb :: lbb ->
+ let bundles =
+ try do_schedule bb
+ with OpaqueInstruction -> dumb_schedule bb
+ | e ->
+ let msg = Printexc.to_string e
+ and stack = Printexc.get_backtrace ()
+ in begin
+ Printf.eprintf "In regards to this group of instructions:\n";
+ print_bb stderr bb;
+ Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg stack;
+ failwith "Invalid schedule"
+ (*
+ Printf.eprintf "Issuing one instruction per bundle instead\n\n";
+ dumb_schedule bb
+ *)
+ end
+ in bundles @ (f lbb)
+ in f lbb
+
+let bblock_to_bundles bb =
+ if debug then (eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"; print_bb stderr bb);
+ (* print_problem (build_problem bb); *)
+ if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb
+
+(** To deal with the Coq Axiom schedule : bblock -> (list (list basic)) * option control *)
+
+let rec bundles_to_coq_schedule = function
+ | [] -> ([], None)
+ | bb :: [] -> ([bb.body], bb.exit)
+ | bb :: lbb -> let (llb, oc) = bundles_to_coq_schedule lbb in (bb.body :: llb, oc)
+
+(** Called schedule function from Coq *)
+
+let schedule_notime bb = let toto = bundles_to_coq_schedule @@ bblock_to_bundles bb in toto
+let schedule bb = Timing.time_coq ('P'::('o'::('s'::('t'::('p'::('a'::('s'::('s'::('S'::('c'::('h'::('e'::('d'::('u'::('l'::('i'::('n'::('g'::(' '::('o'::('r'::('a'::('c'::('l'::('e'::([])))))))))))))))))))))))))) schedule_notime bb
diff --git a/kvx/PostpassSchedulingproof.v b/kvx/PostpassSchedulingproof.v
new file mode 100644
index 00000000..937b3be6
--- /dev/null
+++ b/kvx/PostpassSchedulingproof.v
@@ -0,0 +1,690 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Machblock Conventions Asmblock.
+Require Import Asmblockgenproof0 Asmblockprops.
+Require Import PostpassScheduling.
+Require Import Asmblockgenproof.
+Require Import Axioms.
+Require Import Lia.
+
+Local Open Scope error_monad_scope.
+
+Definition match_prog (p tp: Asmvliw.program) :=
+ match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+ forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+ intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Lemma regset_double_set_id:
+ forall r (rs: regset) v1 v2,
+ (rs # r <- v1 # r <- v2) = (rs # r <- v2).
+Proof.
+ intros. apply functional_extensionality. intros. destruct (preg_eq r x).
+ - subst r. repeat (rewrite Pregmap.gss; auto).
+ - repeat (rewrite Pregmap.gso); auto.
+Qed.
+
+Lemma exec_body_pc_var:
+ forall l ge rs m rs' m' v,
+ exec_body ge l rs m = Next rs' m' ->
+ exec_body ge l (rs # PC <- v) m = Next (rs' # PC <- v) m'.
+Proof.
+ induction l.
+ - intros. simpl. simpl in H. inv H. auto.
+ - intros. simpl in *.
+ destruct (exec_basic_instr ge a rs m) eqn:EXEBI; try discriminate.
+ erewrite exec_basic_instr_pc_var; eauto.
+Qed.
+
+Lemma pc_set_add:
+ forall rs v r x y,
+ 0 <= x <= Ptrofs.max_unsigned ->
+ 0 <= y <= Ptrofs.max_unsigned ->
+ rs # r <- (Val.offset_ptr v (Ptrofs.repr (x + y))) = rs # r <- (Val.offset_ptr (rs # r <- (Val.offset_ptr v (Ptrofs.repr x)) r) (Ptrofs.repr y)).
+Proof.
+ intros. apply functional_extensionality. intros r0. destruct (preg_eq r r0).
+ - subst. repeat (rewrite Pregmap.gss); auto.
+ destruct v; simpl; auto.
+ rewrite Ptrofs.add_assoc.
+ enough (Ptrofs.repr (x + y) = Ptrofs.add (Ptrofs.repr x) (Ptrofs.repr y)) as ->; auto.
+ unfold Ptrofs.add.
+ enough (x + y = Ptrofs.unsigned (Ptrofs.repr x) + Ptrofs.unsigned (Ptrofs.repr y)) as ->; auto.
+ repeat (rewrite Ptrofs.unsigned_repr); auto.
+ - repeat (rewrite Pregmap.gso; auto).
+Qed.
+
+Lemma concat2_straight:
+ forall a b bb rs m rs'' m'' f ge,
+ concat2 a b = OK bb ->
+ exec_bblock ge f bb rs m = Next rs'' m'' ->
+ exists rs' m',
+ exec_bblock ge f a rs m = Next rs' m'
+ /\ rs' PC = Val.offset_ptr (rs PC) (Ptrofs.repr (size a))
+ /\ exec_bblock ge f b rs' m' = Next rs'' m''.
+Proof.
+ intros until ge. intros CONC2 EXEB.
+ exploit concat2_zlt_size; eauto. intros (LTA & LTB).
+ exploit concat2_noexit; eauto. intros EXA.
+ exploit concat2_decomp; eauto. intros. inv H.
+ unfold exec_bblock in EXEB. destruct (exec_body ge (body bb) rs m) eqn:EXEB'; try discriminate.
+ rewrite H0 in EXEB'. apply exec_body_app in EXEB'. destruct EXEB' as (rs1 & m1 & EXEB1 & EXEB2).
+ eexists; eexists. split.
+ unfold exec_bblock. rewrite EXEB1. rewrite EXA. simpl. eauto.
+ split.
+ exploit exec_body_pc. eapply EXEB1. intros. rewrite <- H. auto.
+ unfold exec_bblock. unfold nextblock, incrPC. rewrite regset_same_assign. erewrite exec_body_pc_var; eauto.
+ rewrite <- H1. unfold nextblock in EXEB. rewrite regset_double_set_id.
+ assert (size bb = size a + size b).
+ { unfold size. rewrite H0. rewrite H1. rewrite app_length. rewrite EXA. simpl. rewrite Nat.add_0_r.
+ repeat (rewrite Nat2Z.inj_add). lia. }
+ clear EXA H0 H1. rewrite H in EXEB.
+ assert (rs1 PC = rs0 PC). { apply exec_body_pc in EXEB2. auto. }
+ rewrite H0. rewrite <- pc_set_add; auto.
+ exploit size_positive. instantiate (1 := a). intro. lia.
+ exploit size_positive. instantiate (1 := b). intro. lia.
+Qed.
+
+Lemma concat_all_exec_bblock (ge: Genv.t fundef unit) (f: function) :
+ forall a bb rs m lbb rs'' m'',
+ lbb <> nil ->
+ concat_all (a :: lbb) = OK bb ->
+ exec_bblock ge f bb rs m = Next rs'' m'' ->
+ exists bb' rs' m',
+ concat_all lbb = OK bb'
+ /\ exec_bblock ge f a rs m = Next rs' m'
+ /\ rs' PC = Val.offset_ptr (rs PC) (Ptrofs.repr (size a))
+ /\ exec_bblock ge f bb' rs' m' = Next rs'' m''.
+Proof.
+ intros until m''. intros Hnonil CONC EXEB.
+ simpl in CONC.
+ destruct lbb as [|b lbb]; try contradiction. clear Hnonil.
+ monadInv CONC. exploit concat2_straight; eauto. intros (rs' & m' & EXEB1 & PCeq & EXEB2).
+ exists x. repeat econstructor. all: eauto.
+Qed.
+
+Lemma ptrofs_add_repr :
+ forall a b,
+ Ptrofs.unsigned (Ptrofs.add (Ptrofs.repr a) (Ptrofs.repr b)) = Ptrofs.unsigned (Ptrofs.repr (a + b)).
+Proof.
+ intros a b.
+ rewrite Ptrofs.add_unsigned. repeat (rewrite Ptrofs.unsigned_repr_eq).
+ rewrite <- Zplus_mod. auto.
+Qed.
+
+Section PRESERVATION_ASMBLOCK.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma transf_function_no_overflow:
+ forall f tf,
+ transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned.
+Proof.
+ intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0.
+ lia.
+Qed.
+
+Lemma symbols_preserved:
+ forall id,
+ Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_match TRANSL).
+
+Lemma senv_preserved:
+ Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSL).
+
+Lemma functions_translated:
+ forall v f,
+ Genv.find_funct ge v = Some f ->
+ exists tf,
+ Genv.find_funct tge v = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_transf_partial TRANSL).
+
+Lemma function_ptr_translated:
+ forall v f,
+ Genv.find_funct_ptr ge v = Some f ->
+ exists tf,
+ Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial TRANSL).
+
+Lemma functions_transl:
+ forall fb f tf,
+ Genv.find_funct_ptr ge fb = Some (Internal f) ->
+ transf_function f = OK tf ->
+ Genv.find_funct_ptr tge fb = Some (Internal tf).
+Proof.
+ intros. exploit function_ptr_translated; eauto.
+ intros (tf' & A & B). monadInv B. rewrite H0 in EQ. inv EQ. auto.
+Qed.
+
+Inductive match_states: state -> state -> Prop :=
+ | match_states_intro:
+ forall s1 s2, s1 = s2 -> match_states s1 s2.
+
+Lemma prog_main_preserved:
+ prog_main tprog = prog_main prog.
+Proof (match_program_main TRANSL).
+
+Lemma prog_main_address_preserved:
+ (Genv.symbol_address (Genv.globalenv prog) (prog_main prog) Ptrofs.zero) =
+ (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero).
+Proof.
+ unfold Genv.symbol_address. rewrite symbols_preserved.
+ rewrite prog_main_preserved. auto.
+Qed.
+
+Lemma transf_initial_states:
+ forall st1, initial_state prog st1 ->
+ exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+ intros. inv H.
+ econstructor; split.
+ - eapply initial_state_intro.
+ eapply (Genv.init_mem_transf_partial TRANSL); eauto.
+ - econstructor; eauto. subst ge0. subst rs0. rewrite prog_main_address_preserved. auto.
+Qed.
+
+Lemma transf_final_states:
+ forall st1 st2 r,
+ match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+ intros. inv H0. inv H. econstructor; eauto.
+Qed.
+
+Lemma tail_find_bblock:
+ forall lbb pos bb,
+ find_bblock pos lbb = Some bb ->
+ exists c, code_tail pos lbb (bb::c).
+Proof.
+ induction lbb.
+ - intros. simpl in H. inv H.
+ - intros. simpl in H.
+ destruct (zlt pos 0); try (inv H; fail).
+ destruct (zeq pos 0).
+ + inv H. exists lbb. constructor; auto.
+ + apply IHlbb in H. destruct H as (c & TAIL). exists c.
+ enough (pos = pos - size a + size a) as ->.
+ apply code_tail_S; auto.
+ lia.
+Qed.
+
+Lemma code_tail_head_app:
+ forall l pos c1 c2,
+ code_tail pos c1 c2 ->
+ code_tail (pos + size_blocks l) (l++c1) c2.
+Proof.
+ induction l.
+ - intros. simpl. rewrite Z.add_0_r. auto.
+ - intros. apply IHl in H. simpl. rewrite (Z.add_comm (size a)). rewrite Z.add_assoc. apply code_tail_S. assumption.
+Qed.
+
+Lemma transf_blocks_verified:
+ forall c tc pos bb c',
+ transf_blocks c = OK tc ->
+ code_tail pos c (bb::c') ->
+ exists lbb,
+ verified_schedule bb = OK lbb
+ /\ exists tc', code_tail pos tc (lbb ++ tc').
+Proof.
+ induction c; intros.
+ - simpl in H. inv H. inv H0.
+ - inv H0.
+ + monadInv H. exists x0.
+ split; simpl; auto. eexists; eauto. econstructor; eauto.
+ + unfold transf_blocks in H. fold transf_blocks in H. monadInv H.
+ exploit IHc; eauto.
+ intros (lbb & TRANS & tc' & TAIL).
+(* monadInv TRANS. *)
+ repeat eexists; eauto.
+ erewrite verified_schedule_size; eauto.
+ apply code_tail_head_app.
+ eauto.
+Qed.
+
+Lemma transf_find_bblock:
+ forall ofs f bb tf,
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb ->
+ transf_function f = OK tf ->
+ exists lbb,
+ verified_schedule bb = OK lbb
+ /\ exists c, code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (lbb ++ c).
+Proof.
+ intros.
+ monadInv H0. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); try (inv EQ0; fail). inv EQ0.
+ monadInv EQ. apply tail_find_bblock in H. destruct H as (c & TAIL).
+ eapply transf_blocks_verified; eauto.
+Qed.
+
+Lemma symbol_address_preserved:
+ forall l ofs, Genv.symbol_address ge l ofs = Genv.symbol_address tge l ofs.
+Proof.
+ intros. unfold Genv.symbol_address. repeat (rewrite symbols_preserved). reflexivity.
+Qed.
+
+Lemma head_tail {A: Type}:
+ forall (l: list A) hd, hd::l = hd :: (tail (hd::l)).
+Proof.
+ intros. simpl. auto.
+Qed.
+
+Lemma verified_schedule_not_empty:
+ forall bb lbb,
+ verified_schedule bb = OK lbb -> lbb <> nil.
+Proof.
+ intros. apply verified_schedule_size in H.
+ pose (size_positive bb). assert (size_blocks lbb > 0) by lia. clear H g.
+ destruct lbb; simpl in *; discriminate.
+Qed.
+
+Lemma header_nil_label_pos_none:
+ forall lbb l p,
+ Forall (fun b => header b = nil) lbb -> label_pos l p lbb = None.
+Proof.
+ induction lbb.
+ - intros. simpl. auto.
+ - intros. inv H. simpl. unfold is_label. rewrite H2. destruct (in_dec l nil). { inv i. }
+ auto.
+Qed.
+
+Lemma verified_schedule_label:
+ forall bb tbb lbb l,
+ verified_schedule bb = OK (tbb :: lbb) ->
+ is_label l bb = is_label l tbb
+ /\ label_pos l 0 lbb = None.
+Proof.
+ intros. exploit verified_schedule_header; eauto.
+ intros (HdrEq & HdrNil).
+ split.
+ - unfold is_label. rewrite HdrEq. reflexivity.
+ - apply header_nil_label_pos_none. assumption.
+Qed.
+
+Lemma label_pos_app_none:
+ forall c c' l p p',
+ label_pos l p c = None ->
+ label_pos l (p' + size_blocks c) c' = label_pos l p' (c ++ c').
+Proof.
+ induction c.
+ - intros. simpl in *. rewrite Z.add_0_r. reflexivity.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLABEL.
+ + discriminate.
+ + eapply IHc in H. rewrite Z.add_assoc. eauto.
+Qed.
+
+Remark label_pos_pvar_none_add:
+ forall tc l p p' k,
+ label_pos l (p+k) tc = None -> label_pos l (p'+k) tc = None.
+Proof.
+ induction tc.
+ - intros. simpl. auto.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+ + discriminate.
+ + pose (IHtc l p p' (k + size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar_none:
+ forall tc l p p',
+ label_pos l p tc = None -> label_pos l p' tc = None.
+Proof.
+ intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+ eapply label_pos_pvar_none_add; eauto.
+Qed.
+
+Remark label_pos_pvar_some_add_add:
+ forall tc l p p' k k',
+ label_pos l (p+k') tc = Some (p+k) -> label_pos l (p'+k') tc = Some (p'+k).
+Proof.
+ induction tc.
+ - intros. simpl in H. discriminate.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+ + inv H. assert (k = k') by lia. subst. reflexivity.
+ + pose (IHtc l p p' k (k' + size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar_some_add:
+ forall tc l p p' k,
+ label_pos l p tc = Some (p+k) -> label_pos l p' tc = Some (p'+k).
+Proof.
+ intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+ eapply label_pos_pvar_some_add_add; eauto.
+Qed.
+
+Remark label_pos_pvar_add:
+ forall c tc l p p' k,
+ label_pos l (p+k) c = label_pos l p tc ->
+ label_pos l (p'+k) c = label_pos l p' tc.
+Proof.
+ induction c.
+ - intros. simpl in *.
+ exploit label_pos_pvar_none; eauto.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+ + exploit label_pos_pvar_some_add; eauto.
+ + pose (IHc tc l p p' (k+size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar:
+ forall c tc l p p',
+ label_pos l p c = label_pos l p tc ->
+ label_pos l p' c = label_pos l p' tc.
+Proof.
+ intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+ eapply label_pos_pvar_add; eauto.
+Qed.
+
+Lemma label_pos_head_app:
+ forall c bb lbb l tc p,
+ verified_schedule bb = OK lbb ->
+ label_pos l p c = label_pos l p tc ->
+ label_pos l p (bb :: c) = label_pos l p (lbb ++ tc).
+Proof.
+ intros. simpl. destruct lbb as [|tbb lbb].
+ - apply verified_schedule_not_empty in H. contradiction.
+ - simpl. exploit verified_schedule_label; eauto. intros (ISLBL & LBLPOS).
+ rewrite ISLBL.
+ destruct (is_label l tbb) eqn:ISLBL'; simpl; auto.
+ eapply label_pos_pvar in H0. erewrite H0.
+ erewrite verified_schedule_size; eauto. simpl size_blocks. rewrite Z.add_assoc.
+ erewrite label_pos_app_none; eauto.
+Qed.
+
+Lemma label_pos_preserved:
+ forall c tc l,
+ transf_blocks c = OK tc -> label_pos l 0 c = label_pos l 0 tc.
+Proof.
+ induction c.
+ - intros. simpl in *. inv H. reflexivity.
+ - intros. unfold transf_blocks in H; fold transf_blocks in H. monadInv H. eapply IHc in EQ.
+ eapply label_pos_head_app; eauto.
+Qed.
+
+Lemma label_pos_preserved_blocks:
+ forall l f tf,
+ transf_function f = OK tf ->
+ label_pos l 0 (fn_blocks f) = label_pos l 0 (fn_blocks tf).
+Proof.
+ intros. monadInv H. monadInv EQ.
+ destruct (zlt Ptrofs.max_unsigned _); try discriminate.
+ monadInv EQ0. simpl. eapply label_pos_preserved; eauto.
+Qed.
+
+Lemma transf_exec_control:
+ forall f tf ex rs m,
+ transf_function f = OK tf ->
+ exec_control ge f ex rs m = exec_control tge tf ex rs m.
+Proof.
+ intros. destruct ex; simpl; auto.
+ assert (ge = Genv.globalenv prog). auto.
+ assert (tge = Genv.globalenv tprog). auto.
+ pose symbol_address_preserved.
+ exploreInst; simpl; auto; try congruence;
+ unfold par_goto_label; unfold par_eval_branch; unfold par_goto_label; erewrite label_pos_preserved_blocks; eauto.
+Qed.
+
+Lemma transf_exec_basic_instr:
+ forall i rs m, exec_basic_instr ge i rs m = exec_basic_instr tge i rs m.
+Proof.
+ intros. pose symbol_address_preserved.
+ unfold exec_basic_instr. unfold bstep. exploreInst; simpl; auto; try congruence.
+ unfold parexec_arith_instr; unfold arith_eval_r; exploreInst; simpl; auto; try congruence.
+Qed.
+
+Lemma transf_exec_body:
+ forall bdy rs m, exec_body ge bdy rs m = exec_body tge bdy rs m.
+Proof.
+ induction bdy; intros.
+ - simpl. reflexivity.
+ - simpl. rewrite transf_exec_basic_instr.
+ destruct (exec_basic_instr _ _ _); auto.
+Qed.
+
+Lemma transf_exec_bblock:
+ forall f tf bb rs m,
+ transf_function f = OK tf ->
+ exec_bblock ge f bb rs m = exec_bblock tge tf bb rs m.
+Proof.
+ intros. unfold exec_bblock. rewrite transf_exec_body. destruct (exec_body _ _ _ _); auto.
+ eapply transf_exec_control; eauto.
+Qed.
+
+Lemma transf_step_simu:
+ forall tf b lbb ofs c tbb rs m rs' m',
+ Genv.find_funct_ptr tge b = Some (Internal tf) ->
+ size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned ->
+ rs PC = Vptr b ofs ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (lbb ++ c) ->
+ concat_all lbb = OK tbb ->
+ exec_bblock tge tf tbb rs m = Next rs' m' ->
+ plus step tge (State rs m) E0 (State rs' m').
+Proof.
+ induction lbb.
+ - intros until m'. simpl. intros. discriminate.
+ - intros until m'. intros GFIND SIZE PCeq TAIL CONC EXEB.
+ destruct lbb.
+ + simpl in *. clear IHlbb. inv CONC. eapply plus_one. econstructor; eauto. eapply find_bblock_tail; eauto.
+ + exploit concat_all_exec_bblock; eauto; try discriminate.
+ intros (tbb0 & rs0 & m0 & CONC0 & EXEB0 & PCeq' & EXEB1).
+ eapply plus_left.
+ econstructor.
+ 3: eapply find_bblock_tail. rewrite <- app_comm_cons in TAIL. 3: eauto.
+ all: eauto.
+ eapply plus_star. eapply IHlbb; eauto. rewrite PCeq in PCeq'. simpl in PCeq'. all: eauto.
+ eapply code_tail_next_int; eauto.
+Qed.
+
+Theorem transf_step_correct:
+ forall s1 t s2, step ge s1 t s2 ->
+ forall s1' (MS: match_states s1 s1'),
+ (exists s2', plus step tge s1' t s2' /\ match_states s2 s2').
+Proof.
+ induction 1; intros; inv MS.
+ - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+ exploit transf_find_bblock; eauto. intros (lbb & VES & c & TAIL).
+ exploit verified_schedule_correct; eauto. intros (tbb & CONC & BBEQ). inv CONC. rename H3 into CONC.
+ assert (NOOV: size_blocks x.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+
+ erewrite transf_exec_bblock in H2; eauto.
+ unfold bblock_simu in BBEQ. rewrite BBEQ in H2; try congruence.
+ exists (State rs' m'). split; try (constructor; auto).
+ eapply transf_step_simu; eauto.
+
+ - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+ exploit transf_find_bblock; eauto. intros (lbb & VES & c & TAIL).
+ exploit verified_schedule_builtin_idem; eauto. intros. subst lbb.
+
+ remember (State (nextblock _ _) _) as s'. exists s'.
+ split; try constructor; auto.
+ eapply plus_one. subst s'.
+ eapply exec_step_builtin.
+ 3: eapply find_bblock_tail. simpl in TAIL. 3: eauto.
+ all: eauto.
+ eapply eval_builtin_args_preserved with (ge1 := ge). exact symbols_preserved. eauto.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+
+ - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+ remember (State _ m') as s'. exists s'. split; try constructor; auto.
+ subst s'. eapply plus_one. eapply exec_step_external; eauto.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+Qed.
+
+Theorem transf_program_correct_Asmblock:
+ forward_simulation (Asmblock.semantics prog) (Asmblock.semantics tprog).
+Proof.
+ eapply forward_simulation_plus.
+ - apply senv_preserved.
+ - apply transf_initial_states.
+ - apply transf_final_states.
+ - apply transf_step_correct.
+Qed.
+
+End PRESERVATION_ASMBLOCK.
+
+Require Import Asmvliw.
+
+Lemma verified_par_checks_alls_bundles lb x: forall bundle,
+ verify_par lb = OK x ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ induction lb; simpl; try tauto.
+ intros bundle H; monadInv H.
+ destruct 1; subst; eauto.
+ destruct x0; auto.
+Qed.
+
+Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle:
+ verified_schedule_nob bb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ unfold verified_schedule_nob. intros H;
+ monadInv H. destruct x4.
+ intros; eapply verified_par_checks_alls_bundles; eauto.
+Qed.
+
+Lemma verify_par_bblock_PExpand bb i:
+ exit bb = Some (PExpand i) -> verify_par_bblock bb = OK tt.
+Proof.
+ destruct bb as [h bdy ext H]; simpl.
+ intros; subst. destruct i.
+ generalize H.
+ rewrite <- wf_bblock_refl in H.
+ destruct H as [H H0].
+ unfold builtin_alone in H0. erewrite H0; eauto.
+Qed.
+
+Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core.
+
+Lemma verified_schedule_checks_alls_bundles bb lb bundle:
+ verified_schedule bb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ unfold verified_schedule. remember (exit bb) as exb.
+ destruct exb as [c|]; eauto.
+ destruct c as [i|]; eauto.
+ destruct i; intros H. inversion_clear H; simpl.
+ intuition subst.
+ intros; eapply verify_par_bblock_PExpand; eauto.
+Qed.
+
+Lemma transf_blocks_checks_all_bundles lbb: forall lb bundle,
+ transf_blocks lbb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ induction lbb; simpl.
+ - intros lb bundle H; inversion_clear H. simpl; try tauto.
+ - intros lb bundle H0.
+ monadInv H0.
+ rewrite in_app. destruct 1; eauto.
+ eapply verified_schedule_checks_alls_bundles; eauto.
+Qed.
+
+Lemma find_bblock_Some_in lb:
+ forall ofs b, find_bblock ofs lb = Some b -> List.In b lb.
+Proof.
+ induction lb; simpl; try congruence.
+ intros ofs b.
+ destruct (zlt ofs 0); try congruence.
+ destruct (zeq ofs 0); eauto.
+ intros X; inversion X; eauto.
+Qed.
+
+Section PRESERVATION_ASMVLIW.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma all_bundles_are_checked b ofs f bundle:
+ Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+ find_bblock ofs (fn_blocks f) = Some bundle ->
+ verify_par_bblock bundle = OK tt.
+Proof.
+ unfold match_prog, match_program in TRANSL.
+ unfold Genv.find_funct_ptr; simpl; intros X.
+ destruct (Genv.find_def_match_2 TRANSL b) as [|f0 y H]; try congruence.
+ destruct y as [tf0|]; try congruence.
+ inversion X as [H1]. subst. clear X.
+ remember (@Gfun fundef unit (Internal f)) as f2.
+ destruct H as [ctx' f1 f2 H0|]; try congruence.
+ inversion Heqf2 as [H2]. subst; clear Heqf2.
+ unfold transf_fundef, transf_partial_fundef in H.
+ destruct f1 as [f1|f1]; try congruence.
+ unfold transf_function, transl_function in H.
+ monadInv H. monadInv EQ.
+ destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks _))); simpl in *|-; try congruence.
+ injection EQ1; intros; subst.
+ monadInv EQ0. simpl in * |-.
+ intros; exploit transf_blocks_checks_all_bundles; eauto.
+ intros; eapply find_bblock_Some_in; eauto.
+Qed.
+
+Lemma checked_bundles_are_parexec_equiv f bundle rs rs' m m':
+ exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+ verify_par_bblock bundle = OK tt ->
+ det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'.
+Proof.
+ intros. unfold verify_par_bblock in H0. destruct (Asmblockdeps.bblock_para_check _) eqn:BPC; try discriminate. clear H0.
+ simpl in H.
+ eapply Asmblockdeps.bblock_para_check_correct; eauto.
+Qed.
+
+Lemma seqexec_parexec_equiv b ofs f bundle rs rs' m m':
+ Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle ->
+ exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+ det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'.
+Proof.
+ intros; eapply checked_bundles_are_parexec_equiv; eauto.
+ eapply all_bundles_are_checked; eauto.
+Qed.
+
+Theorem transf_program_correct_Asmvliw:
+ forward_simulation (Asmblock.semantics tprog) (Asmvliw.semantics tprog).
+Proof.
+ eapply forward_simulation_step with (match_states:=fun (s1:Asmvliw.state) s2 => s1=s2); eauto.
+ - intros; subst; auto.
+ - intros s1 t s1' H s2 H0; subst; inversion H; clear H; subst; eexists; split; eauto.
+ + eapply exec_step_internal; eauto.
+ intros; eapply seqexec_parexec_equiv; eauto.
+ + eapply exec_step_builtin; eauto.
+ + eapply exec_step_external; eauto.
+Qed.
+
+End PRESERVATION_ASMVLIW.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Theorem transf_program_correct:
+ forward_simulation (Asmblock.semantics prog) (Asmvliw.semantics tprog).
+Proof.
+ eapply compose_forward_simulations.
+ eapply transf_program_correct_Asmblock; eauto.
+ eapply transf_program_correct_Asmvliw; eauto.
+Qed.
+
+End PRESERVATION.
diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml
new file mode 120000
index 00000000..912e9ffa
--- /dev/null
+++ b/kvx/PrepassSchedulingOracle.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file
diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml
new file mode 120000
index 00000000..1e955b85
--- /dev/null
+++ b/kvx/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1 @@
+../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file
diff --git a/kvx/PrintOp.ml b/kvx/PrintOp.ml
new file mode 100644
index 00000000..da7d6c32
--- /dev/null
+++ b/kvx/PrintOp.ml
@@ -0,0 +1,229 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Pretty-printing of operators, conditions, addressing modes *)
+
+open Printf
+open Camlcoq
+open Integers
+open Op
+open ExtValues
+
+let comparison_name = function
+ | Ceq -> "=="
+ | Cne -> "!="
+ | Clt -> "<"
+ | Cle -> "<="
+ | Cgt -> ">"
+ | Cge -> ">="
+
+let print_condition reg pp = function
+ | (Ccomp c, [r1;r2]) ->
+ fprintf pp "%a %ss %a" reg r1 (comparison_name c) reg r2
+ | (Ccompu c, [r1;r2]) ->
+ fprintf pp "%a %su %a" reg r1 (comparison_name c) reg r2
+ | (Ccompimm(c, n), [r1]) ->
+ fprintf pp "%a %ss %ld" reg r1 (comparison_name c) (camlint_of_coqint n)
+ | (Ccompuimm(c, n), [r1]) ->
+ fprintf pp "%a %su %ld" reg r1 (comparison_name c) (camlint_of_coqint n)
+ | (Ccompf c, [r1;r2]) ->
+ fprintf pp "%a %sf %a" reg r1 (comparison_name c) reg r2
+ | (Ccompl c, [r1;r2]) ->
+ fprintf pp "%a %sls %a" reg r1 (comparison_name c) reg r2
+ | (Ccomplu c, [r1;r2]) ->
+ fprintf pp "%a %slu %a" reg r1 (comparison_name c) reg r2
+ | (Ccomplimm(c, n), [r1]) ->
+ fprintf pp "%a %sls %Ld" reg r1 (comparison_name c) (camlint64_of_coqint n)
+ | (Ccompluimm(c, n), [r1]) ->
+ fprintf pp "%a %slu %Lu" reg r1 (comparison_name c) (camlint64_of_coqint n)
+ | (Cnotcompf c, [r1;r2]) ->
+ fprintf pp "%a not(%sf) %a" reg r1 (comparison_name c) reg r2
+ | (Ccompfs c, [r1;r2]) ->
+ fprintf pp "%a %sfs %a" reg r1 (comparison_name c) reg r2
+ | (Cnotcompfs c, [r1;r2]) ->
+ fprintf pp "%a not(%sfs) %a" reg r1 (comparison_name c) reg r2
+ | _ ->
+ fprintf pp "<bad condition>"
+
+let print_condition0 reg pp cond0 rc =
+ match cond0 with
+ | Ccomp0 c -> fprintf pp "%a %ss 0" reg rc (comparison_name c)
+ | Ccompu0 c -> fprintf pp "%a %su 0" reg rc (comparison_name c)
+ | Ccompl0 c -> fprintf pp "%a %ss 0" reg rc (comparison_name c)
+ | Ccomplu0 c -> fprintf pp "%a %su 0" reg rc (comparison_name c)
+
+let int_of_s14 = function
+ | SHIFT1 -> 1
+ | SHIFT2 -> 2
+ | SHIFT3 -> 3
+ | SHIFT4 -> 4
+
+let print_operation reg pp op = match op with
+ | Omove, [r1] -> reg pp r1
+ | Ointconst n, [] -> fprintf pp "%ld" (camlint_of_coqint n)
+ | Olongconst n, [] -> fprintf pp "%LdL" (camlint64_of_coqint n)
+ | Ofloatconst n, [] -> fprintf pp "%F" (camlfloat_of_coqfloat n)
+ | Osingleconst n, [] -> fprintf pp "%Ff" (camlfloat_of_coqfloat32 n)
+ | Oaddrsymbol(id, ofs), [] ->
+ fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs)
+ | Oaddrstack ofs, [] ->
+ fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs)
+ | Ocast8signed, [r1] -> fprintf pp "int8signed(%a)" reg r1
+ | Ocast16signed, [r1] -> fprintf pp "int16signed(%a)" reg r1
+ | Oadd, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2
+ | Oaddimm n, [r1] -> fprintf pp "%a + %ld" reg r1 (camlint_of_coqint n)
+ | Oaddx(s14), [r1; r2] -> fprintf pp "(%a << %d) + %a" reg r1 (int_of_s14 s14) reg r2
+ | Oaddximm(s14, imm), [r1] -> fprintf pp "(%a << %d) + %ld" reg r1 (int_of_s14 s14) (camlint_of_coqint imm)
+ | Oneg, [r1] -> fprintf pp "-(%a)" reg r1
+ | Osub, [r1;r2] -> fprintf pp "%a - %a" reg r1 reg r2
+ | Orevsubimm(imm), [r1] -> fprintf pp "%ld - %a" (camlint_of_coqint imm) reg r1
+ | Orevsubx(s14), [r1; r2] -> fprintf pp "%a - (%a << %d)" reg r2 reg r1 (int_of_s14 s14)
+ | Orevsubximm(s14, imm), [r1] -> fprintf pp "%ld - (%a << %d)" (camlint_of_coqint imm) reg r1 (int_of_s14 s14)
+ | Omul, [r1;r2] -> fprintf pp "%a * %a" reg r1 reg r2
+ | Omulimm(imm), [r1] -> fprintf pp "%a * %ld" reg r1 (camlint_of_coqint imm)
+ | Omulhs, [r1;r2] -> fprintf pp "%a *hs %a" reg r1 reg r2
+ | Omulhu, [r1;r2] -> fprintf pp "%a *hu %a" reg r1 reg r2
+ | Odiv, [r1;r2] -> fprintf pp "%a /s %a" reg r1 reg r2
+ | Odivu, [r1;r2] -> fprintf pp "%a /u %a" reg r1 reg r2
+ | Omod, [r1;r2] -> fprintf pp "%a %%s %a" reg r1 reg r2
+ | Omodu, [r1;r2] -> fprintf pp "%a %%u %a" reg r1 reg r2
+ | Oand, [r1;r2] -> fprintf pp "%a & %a" reg r1 reg r2
+ | Oandimm n, [r1] -> fprintf pp "%a & %ld" reg r1 (camlint_of_coqint n)
+ | Oor, [r1;r2] -> fprintf pp "%a | %a" reg r1 reg r2
+ | Oorimm n, [r1] -> fprintf pp "%a | %ld" reg r1 (camlint_of_coqint n)
+ | Oxor, [r1;r2] -> fprintf pp "%a ^ %a" reg r1 reg r2
+ | Oxorimm n, [r1] -> fprintf pp "%a ^ %ld" reg r1 (camlint_of_coqint n)
+ | Onxor, [r1;r2] -> fprintf pp "~(%a ^ %a)" reg r1 reg r2
+ | Onxorimm n, [r1] -> fprintf pp "~(%a ^ %ld)" reg r1 (camlint_of_coqint n)
+ | Onot, [r1] -> fprintf pp "~%a" reg r1
+ | Oandn, [r1; r2] -> fprintf pp "(~%a) & %a" reg r1 reg r2
+ | Oandnimm n, [r1] -> fprintf pp "(~%a) & %ld" reg r1 (camlint_of_coqint n)
+ | Oorn, [r1;r2] -> fprintf pp "(~%a) | %a" reg r1 reg r2
+ | Oornimm n, [r1] -> fprintf pp "(~%a) | %ld" reg r1 (camlint_of_coqint n)
+ | Oshl, [r1;r2] -> fprintf pp "%a << %a" reg r1 reg r2
+ | Oshlimm n, [r1] -> fprintf pp "%a << %ld" reg r1 (camlint_of_coqint n)
+ | Oshr, [r1;r2] -> fprintf pp "%a >>s %a" reg r1 reg r2
+ | Oshrimm n, [r1] -> fprintf pp "%a >>s %ld" reg r1 (camlint_of_coqint n)
+ | Oshru, [r1;r2] -> fprintf pp "%a >>u %a" reg r1 reg r2
+ | Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n)
+ | Oshrximm n, [r1] -> fprintf pp "%a >>x %ld" reg r1 (camlint_of_coqint n)
+ | Ororimm n, [r1] -> fprintf pp "(%a ror %ld)" reg r1 (camlint_of_coqint n)
+ | Omadd, [r1; r2; r3] -> fprintf pp "%a + %a * %a" reg r1 reg r2 reg r3
+ | Omaddimm imm, [r1; r2] -> fprintf pp "%a + %a * %ld" reg r1 reg r2 (camlint_of_coqint imm)
+ | Omsub, [r1; r2; r3] -> fprintf pp "%a - %a * %a" reg r1 reg r2 reg r3
+
+ | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
+ | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
+ | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
+ | Ocast32signed, [r1] -> fprintf pp "long32signed(%a)" reg r1
+ | Ocast32unsigned, [r1] -> fprintf pp "long32unsigned(%a)" reg r1
+ | Oaddl, [r1;r2] -> fprintf pp "%a +l %a" reg r1 reg r2
+ | Oaddlimm n, [r1] -> fprintf pp "%a +l %Ld" reg r1 (camlint64_of_coqint n)
+ | Oaddxl(s14), [r1; r2] -> fprintf pp "(%a <<l %d) +l %a" reg r1 (int_of_s14 s14) reg r2
+ | Oaddxlimm(s14, imm), [r1] -> fprintf pp "(%a <<l %d) +l %Ld" reg r1 (int_of_s14 s14) (camlint64_of_coqint imm)
+ | Orevsublimm(imm), [r1] -> fprintf pp "%Ld -l %a" (camlint64_of_coqint imm) reg r1
+ | Orevsubxl(s14), [r1; r2] -> fprintf pp "%a -l (%a <<l %d)" reg r2 reg r1 (int_of_s14 s14)
+ | Orevsubxlimm(s14, imm), [r1] -> fprintf pp "%Ld -l (%a <<l %d)" (camlint64_of_coqint imm) reg r1 (int_of_s14 s14)
+ | Onegl, [r1] -> fprintf pp "-l (%a)" reg r1
+ | Osubl, [r1;r2] -> fprintf pp "%a -l %a" reg r1 reg r2
+ | Omull, [r1;r2] -> fprintf pp "%a *l %a" reg r1 reg r2
+ | Omullimm(imm), [r1] -> fprintf pp "%a *l %Ld" reg r1 (camlint64_of_coqint imm)
+ | Omullhs, [r1;r2] -> fprintf pp "%a *lhs %a" reg r1 reg r2
+ | Omullhu, [r1;r2] -> fprintf pp "%a *lhu %a" reg r1 reg r2
+ | Odivl, [r1;r2] -> fprintf pp "%a /ls %a" reg r1 reg r2
+ | Odivlu, [r1;r2] -> fprintf pp "%a /lu %a" reg r1 reg r2
+ | Omodl, [r1;r2] -> fprintf pp "%a %%ls %a" reg r1 reg r2
+ | Omodlu, [r1;r2] -> fprintf pp "%a %%lu %a" reg r1 reg r2
+ | Oandl, [r1;r2] -> fprintf pp "%a &l %a" reg r1 reg r2
+ | Oandlimm n, [r1] -> fprintf pp "%a &l %Ld" reg r1 (camlint64_of_coqint n)
+ | Oorl, [r1;r2] -> fprintf pp "%a |l %a" reg r1 reg r2
+ | Oorlimm n, [r1] -> fprintf pp "%a |l %Ld" reg r1 (camlint64_of_coqint n)
+ | Onorl, [r1; r2] -> fprintf pp "~(%a |l %a)" reg r1 reg r2
+ | Onorlimm n, [r1] -> fprintf pp "~(%a |l %Ld)" reg r1 (camlint64_of_coqint n)
+ | Oxorl, [r1;r2] -> fprintf pp "%a ^l %a" reg r1 reg r2
+ | Oxorlimm n, [r1] -> fprintf pp "%a ^l %Ld" reg r1 (camlint64_of_coqint n)
+ | Onxorl, [r1;r2] -> fprintf pp "~(%a ^l %a)" reg r1 reg r2
+ | Onxorlimm n, [r1] -> fprintf pp "~(%a ^l %Ld)" reg r1 (camlint64_of_coqint n)
+ | Onotl, [r1] -> fprintf pp "~%a" reg r1
+ | Oandnl, [r1;r2] -> fprintf pp "(~%a) &l %a" reg r1 reg r2
+ | Oandnlimm n, [r1] -> fprintf pp "(~%a) &l %Ld" reg r1 (camlint64_of_coqint n)
+ | Oornl, [r1;r2] -> fprintf pp "(~%a) |l %a" reg r1 reg r2
+ | Oornlimm n, [r1;r2] -> fprintf pp "(~%a) |l %Ld" reg r1 (camlint64_of_coqint n)
+ | Oshll, [r1;r2] -> fprintf pp "%a <<l %a" reg r1 reg r2
+ | Oshllimm n, [r1] -> fprintf pp "%a <<l %Ld" reg r1 (camlint64_of_coqint n)
+ | Oshrl, [r1;r2] -> fprintf pp "%a >>ls %a" reg r1 reg r2
+ | Oshrlimm n, [r1] -> fprintf pp "%a >>ls %ld" reg r1 (camlint_of_coqint n)
+ | Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2
+ | Oshrluimm n, [r1] -> fprintf pp "%a >>lu %ld" reg r1 (camlint_of_coqint n)
+ | Oshrxlimm n, [r1] -> fprintf pp "%a >>lx %ld" reg r1 (camlint_of_coqint n)
+ | Omaddl, [r1; r2; r3] -> fprintf pp "%a +l %a *l %a" reg r1 reg r2 reg r3
+ | Omaddlimm imm, [r1; r2] -> fprintf pp "%a +l %a *l %Ld" reg r1 reg r2 (camlint64_of_coqint imm)
+ | Omsubl, [r1; r2; r3] -> fprintf pp "%a -l %a *l %a" reg r1 reg r2 reg r3
+
+ | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1
+ | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1
+ | Oaddf, [r1;r2] -> fprintf pp "%a +f %a" reg r1 reg r2
+ | Osubf, [r1;r2] -> fprintf pp "%a -f %a" reg r1 reg r2
+ | Omulf, [r1;r2] -> fprintf pp "%a *f %a" reg r1 reg r2
+ | Odivf, [r1;r2] -> fprintf pp "%a /f %a" reg r1 reg r2
+ | Onegfs, [r1] -> fprintf pp "negfs(%a)" reg r1
+ | Oabsfs, [r1] -> fprintf pp "absfs(%a)" reg r1
+ | Oaddfs, [r1;r2] -> fprintf pp "%a +fs %a" reg r1 reg r2
+ | Osubfs, [r1;r2] -> fprintf pp "%a -fs %a" reg r1 reg r2
+ | Omulfs, [r1;r2] -> fprintf pp "%a *fs %a" reg r1 reg r2
+ | Odivfs, [r1;r2] -> fprintf pp "%a /fs %a" reg r1 reg r2
+ | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
+ | Ofloatofsingle, [r1] -> fprintf pp "floatofsingle(%a)" reg r1
+ | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
+ | Ointuoffloat, [r1] -> fprintf pp "intuoffloat(%a)" reg r1
+ | Ointofsingle, [r1] -> fprintf pp "intofsingle(%a)" reg r1
+ | Ointuofsingle, [r1] -> fprintf pp "intuofsingle(%a)" reg r1
+ | Osingleofint, [r1] -> fprintf pp "singleofint(%a)" reg r1
+ | Osingleofintu, [r1] -> fprintf pp "singleofintu(%a)" reg r1
+ | Olongoffloat, [r1] -> fprintf pp "longoffloat(%a)" reg r1
+ | Olonguoffloat, [r1] -> fprintf pp "longuoffloat(%a)" reg r1
+ | Ofloatoflong, [r1] -> fprintf pp "floatoflong(%a)" reg r1
+ | Ofloatoflongu, [r1] -> fprintf pp "floatoflongu(%a)" reg r1
+ | Olongofsingle, [r1] -> fprintf pp "longofsingle(%a)" reg r1
+ | Olonguofsingle, [r1] -> fprintf pp "longuofsingle(%a)" reg r1
+ | Osingleoflong, [r1] -> fprintf pp "singleoflong(%a)" reg r1
+ | Osingleoflongu, [r1] -> fprintf pp "singleoflongu(%a)" reg r1
+ | Ocmp c, args -> print_condition reg pp (c, args)
+
+ | Oextfz(stop, start), [r1] -> fprintf pp "extfz(%ld, %ld, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1
+ | Oextfs(stop, start), [r1] -> fprintf pp "extfs(%ld, %ld, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1
+ | Oextfzl(stop, start), [r1] -> fprintf pp "extfzl(%ld, %ld, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1
+ | Oextfsl(stop, start), [r1] -> fprintf pp "extfsl(%ld, %ld, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1
+ | Oinsf(stop, start), [r1; r2] -> fprintf pp "insf(%ld, %ld, %a, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1 reg r2
+ | Oinsfl(stop, start), [r1; r2] -> fprintf pp "insfl(%ld, %ld, %a, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1 reg r2
+ | Osel(cond0, ty), [r1; r2; rc] ->
+ print_condition0 reg pp cond0 rc;
+ fprintf pp " ? %a : %a" reg r1 reg r2
+ | Oselimm(cond0, imm), [r1; rc] ->
+ print_condition0 reg pp cond0 rc;
+ fprintf pp " ? %a : %ld" reg r1 (camlint_of_coqint imm)
+ | Osellimm(cond0, imm), [r1; rc] ->
+ print_condition0 reg pp cond0 rc;
+ fprintf pp " ? %a :l %Ld" reg r1 (camlint64_of_coqint imm)
+ | _, _ -> fprintf pp "<bad operator>"
+
+let print_addressing reg pp = function
+ | Aindexed2XS scale, [r1;r2] -> fprintf pp "%a + (%a << %ld)" reg r1 reg r2 (camlint_of_coqint scale)
+ | Aindexed2, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2
+ | Aindexed n, [r1] -> fprintf pp "%a + %Ld" reg r1 (camlint64_of_ptrofs n)
+ | Aglobal(id, ofs), [] ->
+ fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs)
+ | Ainstack ofs, [] -> fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs)
+ | _ -> fprintf pp "<bad addressing>"
diff --git a/kvx/SelectLong.vp b/kvx/SelectLong.vp
new file mode 100644
index 00000000..b3638eca
--- /dev/null
+++ b/kvx/SelectLong.vp
@@ -0,0 +1,463 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Instruction selection for 64-bit integer operations *)
+
+Require Import Coqlib.
+Require Import Compopts.
+Require Import AST Integers Floats.
+Require Import Op CminorSel.
+Require Import OpHelpers.
+Require Import SelectOp SplitLong.
+Require Import ExtValues.
+Require Import DecBoolOps.
+
+Local Open Scope cminorsel_scope.
+Local Open Scope string_scope.
+
+Section SELECT.
+
+Context {hf: helper_functions}.
+
+Definition longconst (n: int64) : expr :=
+ if Archi.splitlong then SplitLong.longconst n else Eop (Olongconst n) Enil.
+
+Definition is_longconst (e: expr) :=
+ if Archi.splitlong then SplitLong.is_longconst e else
+ match e with
+ | Eop (Olongconst n) Enil => Some n
+ | _ => None
+ end.
+
+Definition intoflong (e: expr) :=
+ if Archi.splitlong then SplitLong.intoflong e else
+ match is_longconst e with
+ | Some n => Eop (Ointconst (Int.repr (Int64.unsigned n))) Enil
+ | None => Eop Olowlong (e ::: Enil)
+ end.
+
+Definition longofint (e: expr) :=
+ if Archi.splitlong then SplitLong.longofint e else
+ match is_intconst e with
+ | Some n => longconst (Int64.repr (Int.signed n))
+ | None => Eop Ocast32signed (e ::: Enil)
+ end.
+
+Definition longofintu (e: expr) :=
+ if Archi.splitlong then SplitLong.longofintu e else
+ match is_intconst e with
+ | Some n => longconst (Int64.repr (Int.unsigned n))
+ | None => Eop Ocast32unsigned (e ::: Enil)
+ end.
+
+(** ** Integer addition and pointer addition *)
+
+Definition addlimm_shllimm sh k2 e1 :=
+ if Compopts.optim_addx tt
+ then
+ match shift1_4_of_z (Int.unsigned sh) with
+ | Some s14 => Eop (Oaddxlimm s14 k2) (e1:::Enil)
+ | None => Eop (Oaddlimm k2) ((Eop (Oshllimm sh) (e1:::Enil)):::Enil)
+ end
+ else Eop (Oaddlimm k2) ((Eop (Oshllimm sh) (e1:::Enil)):::Enil).
+
+Nondetfunction addlimm (n: int64) (e: expr) :=
+ if Int64.eq n Int64.zero then e else
+ match e with
+ | Eop (Olongconst m) Enil => longconst (Int64.add n m)
+ | Eop (Oaddrsymbol s m) Enil =>
+ (if Compopts.optim_globaladdroffset tt
+ then Eop (Oaddrsymbol s (Ptrofs.add (Ptrofs.of_int64 n) m)) Enil
+ else Eop (Oaddlimm n) (e ::: Enil))
+ | Eop (Oaddrstack m) Enil => Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int64 n) m)) Enil
+ | Eop (Oaddlimm m) (t ::: Enil) => Eop (Oaddlimm(Int64.add n m)) (t ::: Enil)
+ | Eop (Oaddxlimm sh m) (t ::: Enil) => Eop (Oaddxlimm sh (Int64.add n m)) (t ::: Enil)
+ | Eop (Oshllimm sh) (t1:::Enil) => addlimm_shllimm sh n t1
+ | _ => Eop (Oaddlimm n) (e ::: Enil)
+ end.
+
+Definition addl_shllimm n e1 e2 :=
+ if Compopts.optim_addx tt
+ then
+ match shift1_4_of_z (Int.unsigned n) with
+ | Some s14 => Eop (Oaddxl s14) (e1:::e2:::Enil)
+ | None => Eop Oaddl (e2:::(Eop (Oshllimm n) (e1:::Enil)):::Enil)
+ end
+ else Eop Oaddl (e2:::(Eop (Oshllimm n) (e1:::Enil)):::Enil).
+
+Nondetfunction addl (e1: expr) (e2: expr) :=
+ if Archi.splitlong then SplitLong.addl e1 e2 else
+ match e1, e2 with
+ | Eop (Olongconst n1) Enil, t2 => addlimm n1 t2
+ | t1, Eop (Olongconst n2) Enil => addlimm n2 t1
+ | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddlimm n2) (t2:::Enil) =>
+ addlimm (Int64.add n1 n2) (Eop Oaddl (t1:::t2:::Enil))
+ | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddrstack n2) Enil =>
+ Eop Oaddl (Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int64 n1) n2)) Enil ::: t1 ::: Enil)
+ | Eop (Oaddrstack n1) Enil, Eop (Oaddlimm n2) (t2:::Enil) =>
+ Eop Oaddl (Eop (Oaddrstack (Ptrofs.add n1 (Ptrofs.of_int64 n2))) Enil ::: t2 ::: Enil)
+ | Eop (Oaddlimm n1) (t1:::Enil), t2 =>
+ addlimm n1 (Eop Oaddl (t1:::t2:::Enil))
+ | t1, Eop (Oaddlimm n2) (t2:::Enil) =>
+ addlimm n2 (Eop Oaddl (t1:::t2:::Enil))
+ | t1, (Eop Omull (t2:::t3:::Enil)) =>
+ Eop Omaddl (t1:::t2:::t3:::Enil)
+ | (Eop Omull (t2:::t3:::Enil)), t1 =>
+ Eop Omaddl (t1:::t2:::t3:::Enil)
+ | t1, (Eop (Omullimm n) (t2:::Enil)) =>
+ Eop (Omaddlimm n) (t1:::t2:::Enil)
+ | (Eop (Omullimm n) (t2:::Enil)), t1 =>
+ Eop (Omaddlimm n) (t1:::t2:::Enil)
+ | (Eop (Oshllimm n) (t1:::Enil)), t2 =>
+ addl_shllimm n t1 t2
+ | t2, (Eop (Oshllimm n) (t1:::Enil)) =>
+ addl_shllimm n t1 t2
+ | _, _ => Eop Oaddl (e1:::e2:::Enil)
+ end.
+
+(** ** Integer and pointer subtraction *)
+
+Nondetfunction subl (e1: expr) (e2: expr) :=
+ if Archi.splitlong then SplitLong.subl e1 e2 else
+ match e1, e2 with
+ | t1, Eop (Olongconst n2) Enil =>
+ addlimm (Int64.neg n2) t1
+ | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddlimm n2) (t2:::Enil) =>
+ addlimm (Int64.sub n1 n2) (Eop Osubl (t1:::t2:::Enil))
+ | Eop (Oaddlimm n1) (t1:::Enil), t2 =>
+ addlimm n1 (Eop Osubl (t1:::t2:::Enil))
+ | t1, Eop (Oaddlimm n2) (t2:::Enil) =>
+ addlimm (Int64.neg n2) (Eop Osubl (t1:::t2:::Enil))
+ | t1, (Eop Omull (t2:::t3:::Enil)) =>
+ Eop Omsubl (t1:::t2:::t3:::Enil)
+ | t1, (Eop (Omullimm n) (t2:::Enil)) =>
+ Eop (Omaddlimm (Int64.neg n)) (t1:::t2:::Enil)
+ | _, _ => Eop Osubl (e1:::e2:::Enil)
+ end.
+
+Definition negl (e: expr) :=
+ if Archi.splitlong then SplitLong.negl e else
+ match is_longconst e with
+ | Some n => longconst (Int64.neg n)
+ | None => Eop Onegl (e ::: Enil)
+ end.
+
+(** ** Immediate shifts *)
+
+Nondetfunction shllimm (e1: expr) (n: int) :=
+ if Archi.splitlong then SplitLong.shllimm e1 n else
+ if Int.eq n Int.zero then
+ e1
+ else if negb (Int.ltu n Int64.iwordsize') then
+ Eop Oshll (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+ else match e1 with
+ | Eop (Olongconst n1) Enil =>
+ longconst (Int64.shl' n1 n)
+ | Eop (Oshllimm n1) (t1:::Enil) =>
+ if Int.ltu (Int.add n n1) Int64.iwordsize'
+ then Eop (Oshllimm (Int.add n n1)) (t1:::Enil)
+ else Eop (Oshllimm n) (e1:::Enil)
+ | _ =>
+ Eop (Oshllimm n) (e1:::Enil)
+ end.
+
+Nondetfunction shrluimm (e1: expr) (n: int) :=
+ if Archi.splitlong then SplitLong.shrluimm e1 n else
+ if Int.eq n Int.zero then e1 else
+ if negb (Int.ltu n Int64.iwordsize') then
+ Eop Oshrlu (e1:::Eop (Ointconst n) Enil:::Enil)
+ else
+ match e1 with
+ | Eop (Olongconst n1) Enil =>
+ longconst (Int64.shru' n1 n)
+ | Eop (Oshrluimm n1) (t1:::Enil) =>
+ if Int.ltu (Int.add n n1) Int64.iwordsize'
+ then Eop (Oshrluimm (Int.add n n1)) (t1:::Enil)
+ else Eop (Oshrluimm n) (e1:::Enil)
+ | Eop (Oshllimm n1) (t1:::Enil) =>
+ let stop := Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one) in
+ let start := Z.sub (Z.add (Z.add (Int.unsigned n) stop) Z.one) Int64.zwordsize in
+ if is_bitfieldl stop start
+ then Eop (Oextfzl stop start) (t1:::Enil)
+ else Eop (Oshrluimm n) (e1:::Enil)
+ | _ =>
+ Eop (Oshrluimm n) (e1:::Enil)
+ end.
+
+Nondetfunction shrlimm (e1: expr) (n: int) :=
+ if Archi.splitlong then SplitLong.shrlimm e1 n else
+ if Int.eq n Int.zero then e1 else
+ if negb (Int.ltu n Int64.iwordsize') then
+ Eop Oshrl (e1:::Eop (Ointconst n) Enil:::Enil)
+ else
+ match e1 with
+ | Eop (Olongconst n1) Enil =>
+ longconst (Int64.shr' n1 n)
+ | Eop (Oshrlimm n1) (t1:::Enil) =>
+ if Int.ltu (Int.add n n1) Int64.iwordsize'
+ then Eop (Oshrlimm (Int.add n n1)) (t1:::Enil)
+ else Eop (Oshrlimm n) (e1:::Enil)
+ | Eop (Oshllimm n1) (t1:::Enil) =>
+ let stop := Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one) in
+ let start := Z.sub (Z.add (Z.add (Int.unsigned n) stop) Z.one) Int64.zwordsize in
+ if is_bitfieldl stop start
+ then Eop (Oextfsl stop start) (t1:::Enil)
+ else Eop (Oshrlimm n) (e1:::Enil)
+ | _ =>
+ Eop (Oshrlimm n) (e1:::Enil)
+ end.
+
+(** ** General shifts *)
+
+Definition shll (e1: expr) (e2: expr) :=
+ if Archi.splitlong then SplitLong.shll e1 e2 else
+ match is_intconst e2 with
+ | Some n2 => shllimm e1 n2
+ | None => Eop Oshll (e1:::e2:::Enil)
+ end.
+
+Definition shrl (e1: expr) (e2: expr) :=
+ if Archi.splitlong then SplitLong.shrl e1 e2 else
+ match is_intconst e2 with
+ | Some n2 => shrlimm e1 n2
+ | None => Eop Oshrl (e1:::e2:::Enil)
+ end.
+
+Definition shrlu (e1: expr) (e2: expr) :=
+ if Archi.splitlong then SplitLong.shrlu e1 e2 else
+ match is_intconst e2 with
+ | Some n2 => shrluimm e1 n2
+ | _ => Eop Oshrlu (e1:::e2:::Enil)
+ end.
+
+(** ** Integer multiply *)
+
+Definition mullimm_base (n1: int64) (e2: expr) :=
+ match Int64.one_bits' n1 with
+ | i :: nil =>
+ shllimm e2 i
+ | i :: j :: nil =>
+ Elet e2 (addl (shllimm (Eletvar 0) i) (shllimm (Eletvar 0) j))
+ | _ =>
+ Eop (Omullimm n1) (e2 ::: Enil)
+ end.
+
+Nondetfunction mullimm (n1: int64) (e2: expr) :=
+ if Archi.splitlong then SplitLong.mullimm n1 e2
+ else if Int64.eq n1 Int64.zero then longconst Int64.zero
+ else if Int64.eq n1 Int64.one then e2
+ else match e2 with
+ | Eop (Olongconst n2) Enil => longconst (Int64.mul n1 n2)
+ | Eop (Oaddlimm n2) (t2:::Enil) => addlimm (Int64.mul n1 n2) (mullimm_base n1 t2)
+ | _ => mullimm_base n1 e2
+ end.
+
+Nondetfunction mull (e1: expr) (e2: expr) :=
+ if Archi.splitlong then SplitLong.mull e1 e2 else
+ match e1, e2 with
+ | Eop (Olongconst n1) Enil, t2 => mullimm n1 t2
+ | t1, Eop (Olongconst n2) Enil => mullimm n2 t1
+ | _, _ => Eop Omull (e1:::e2:::Enil)
+ end.
+
+Definition mullhu (e1: expr) (n2: int64) :=
+ if Archi.splitlong then SplitLong.mullhu e1 n2 else
+ Eop Omullhu (e1 ::: longconst n2 ::: Enil).
+
+Definition mullhs (e1: expr) (n2: int64) :=
+ if Archi.splitlong then SplitLong.mullhs e1 n2 else
+ Eop Omullhs (e1 ::: longconst n2 ::: Enil).
+
+(** ** Bitwise and, or, xor *)
+
+Nondetfunction andlimm (n1: int64) (e2: expr) :=
+ if Int64.eq n1 Int64.zero then longconst Int64.zero else
+ if Int64.eq n1 Int64.mone then e2 else
+ match e2 with
+ | Eop (Olongconst n2) Enil =>
+ longconst (Int64.and n1 n2)
+ | Eop (Oandlimm n2) (t2:::Enil) =>
+ Eop (Oandlimm (Int64.and n1 n2)) (t2:::Enil)
+ | Eop Onotl (t2:::Enil) => Eop (Oandnlimm n1) (t2:::Enil)
+ | _ =>
+ Eop (Oandlimm n1) (e2:::Enil)
+ end.
+
+Nondetfunction andl (e1: expr) (e2: expr) :=
+ if Archi.splitlong then SplitLong.andl e1 e2 else
+ match e1, e2 with
+ | Eop (Olongconst n1) Enil, t2 => andlimm n1 t2
+ | t1, Eop (Olongconst n2) Enil => andlimm n2 t1
+ | (Eop Onotl (t1:::Enil)), t2 => Eop Oandnl (t1:::t2:::Enil)
+ | t1, (Eop Onotl (t2:::Enil)) => Eop Oandnl (t2:::t1:::Enil)
+ | _, _ => Eop Oandl (e1:::e2:::Enil)
+ end.
+
+Nondetfunction orlimm (n1: int64) (e2: expr) :=
+ if Int64.eq n1 Int64.zero then e2 else
+ if Int64.eq n1 Int64.mone then longconst Int64.mone else
+ match e2 with
+ | Eop (Olongconst n2) Enil => longconst (Int64.or n1 n2)
+ | Eop (Oorlimm n2) (t2:::Enil) => Eop (Oorlimm (Int64.or n1 n2)) (t2:::Enil)
+ | Eop Onotl (t2:::Enil) => Eop (Oornlimm n1) (t2:::Enil)
+ | _ => Eop (Oorlimm n1) (e2:::Enil)
+ end.
+
+Nondetfunction orl (e1: expr) (e2: expr) :=
+ if Archi.splitlong then SplitLong.orl e1 e2 else
+ match e1, e2 with
+ | Eop (Olongconst n1) Enil, t2 => orlimm n1 t2
+ | t1, Eop (Olongconst n2) Enil => orlimm n2 t1
+ | (Eop Onotl (t1:::Enil)), t2 => Eop Oornl (t1:::t2:::Enil)
+ | t1, (Eop Onotl (t2:::Enil)) => Eop Oornl (t2:::t1:::Enil)
+ | (Eop (Oandlimm nmask) (prev:::Enil)),
+ (Eop (Oandlimm mask)
+ ((Eop (Oshllimm start) (fld:::Enil)):::Enil)) =>
+ let zstart := Int.unsigned start in
+ let zstop := int64_highest_bit mask in
+ if is_bitfieldl zstop zstart
+ then
+ let mask' := Int64.repr (zbitfield_mask zstop zstart) in
+ if and_dec (Int64.eq_dec mask mask')
+ (Int64.eq_dec nmask (Int64.not mask'))
+ then Eop (Oinsfl zstop zstart) (prev:::fld:::Enil)
+ else Eop Oorl (e1:::e2:::Enil)
+ else Eop Oorl (e1:::e2:::Enil)
+ | (Eop (Oandlimm nmask) (prev:::Enil)),
+ (Eop (Oandlimm mask) (fld:::Enil)) =>
+ let zstart := 0 in
+ let zstop := int64_highest_bit mask in
+ if is_bitfieldl zstop zstart
+ then
+ let mask' := Int64.repr (zbitfield_mask zstop zstart) in
+ if and_dec (Int64.eq_dec mask mask')
+ (Int64.eq_dec nmask (Int64.not mask'))
+ then Eop (Oinsfl zstop zstart) (prev:::fld:::Enil)
+ else Eop Oorl (e1:::e2:::Enil)
+ else Eop Oorl (e1:::e2:::Enil)
+ | _, _ => Eop Oorl (e1:::e2:::Enil)
+ end.
+
+Nondetfunction xorlimm (n1: int64) (e2: expr) :=
+ if Int64.eq n1 Int64.zero then e2 else
+ if Int64.eq n1 Int64.mone
+ then Eop Onotl (e2:::Enil)
+ else
+ match e2 with
+ | Eop (Olongconst n2) Enil => longconst (Int64.xor n1 n2)
+ | Eop (Oxorlimm n2) (t2:::Enil) =>
+ let n := Int64.xor n1 n2 in
+ if Int64.eq n Int64.zero then t2 else Eop (Oxorlimm n) (t2:::Enil)
+ | _ => Eop (Oxorlimm n1) (e2:::Enil)
+ end.
+
+Nondetfunction xorl (e1: expr) (e2: expr) :=
+ if Archi.splitlong then SplitLong.xorl e1 e2 else
+ match e1, e2 with
+ | Eop (Olongconst n1) Enil, t2 => xorlimm n1 t2
+ | t1, Eop (Olongconst n2) Enil => xorlimm n2 t1
+ | _, _ => Eop Oxorl (e1:::e2:::Enil)
+ end.
+
+(** ** Integer logical negation *)
+
+Nondetfunction notl (e: expr) :=
+ match e with
+ | Eop Oandl (e1:::e2:::Enil) => Eop Onandl (e1:::e2:::Enil)
+ | Eop (Oandlimm n) (e1:::Enil) => Eop (Onandlimm n) (e1:::Enil)
+ | Eop Oorl (e1:::e2:::Enil) => Eop Onorl (e1:::e2:::Enil)
+ | Eop (Oorlimm n) (e1:::Enil) => Eop (Onorlimm n) (e1:::Enil)
+ | Eop Oxorl (e1:::e2:::Enil) => Eop Onxorl (e1:::e2:::Enil)
+ | Eop (Oxorlimm n) (e1:::Enil) => Eop (Onxorlimm n) (e1:::Enil)
+ | Eop Onandl (e1:::e2:::Enil) => Eop Oandl (e1:::e2:::Enil)
+ | Eop (Onandlimm n) (e1:::Enil) => Eop (Oandlimm n) (e1:::Enil)
+ | Eop Onorl (e1:::e2:::Enil) => Eop Oorl (e1:::e2:::Enil)
+ | Eop (Onorlimm n) (e1:::Enil) => Eop (Oorlimm n) (e1:::Enil)
+ | Eop Onxorl (e1:::e2:::Enil) => Eop Oxorl (e1:::e2:::Enil)
+ | Eop (Onxorlimm n) (e1:::Enil) => Eop (Oxorlimm n) (e1:::Enil)
+ | Eop Oandnl (e1:::e2:::Enil) => Eop Oornl (e2:::e1:::Enil)
+ | Eop (Oandnlimm n) (e1:::Enil) => Eop (Oorlimm (Int64.not n)) (e1:::Enil)
+ | Eop Oornl (e1:::e2:::Enil) => Eop Oandnl (e2:::e1:::Enil)
+ | Eop (Oornlimm n) (e1:::Enil) => Eop (Oandlimm (Int64.not n)) (e1:::Enil)
+ | Eop Onotl (e1:::Enil) => e1
+ | Eop (Olongconst k) Enil => Eop (Olongconst (Int64.not k)) Enil
+ | _ => Eop Onotl (e:::Enil)
+ end.
+(* old: if Archi.splitlong then SplitLong.notl e else xorlimm Int64.mone e. *)
+
+(** ** Integer division and modulus *)
+
+Definition divlu_base (e1: expr) (e2: expr) := SplitLong.divlu_base e1 e2.
+Definition modlu_base (e1: expr) (e2: expr) := SplitLong.modlu_base e1 e2.
+Definition divls_base (e1: expr) (e2: expr) := SplitLong.divls_base e1 e2.
+Definition modls_base (e1: expr) (e2: expr) := SplitLong.modls_base e1 e2.
+
+Definition shrxlimm (e: expr) (n: int) :=
+ if Archi.splitlong then SplitLong.shrxlimm e n else
+ if Int.eq n Int.zero then e else Eop (Oshrxlimm n) (e ::: Enil).
+
+(** ** Comparisons *)
+
+Definition cmplu (c: comparison) (e1 e2: expr) :=
+ if Archi.splitlong then SplitLong.cmplu c e1 e2 else
+ match is_longconst e1, is_longconst e2 with
+ | Some n1, Some n2 =>
+ Eop (Ointconst (if Int64.cmpu c n1 n2 then Int.one else Int.zero)) Enil
+ | Some n1, None => Eop (Ocmp (Ccompluimm (swap_comparison c) n1)) (e2:::Enil)
+ | None, Some n2 => Eop (Ocmp (Ccompluimm c n2)) (e1:::Enil)
+ | None, None => Eop (Ocmp (Ccomplu c)) (e1:::e2:::Enil)
+ end.
+
+Definition cmpl (c: comparison) (e1 e2: expr) :=
+ if Archi.splitlong then SplitLong.cmpl c e1 e2 else
+ match is_longconst e1, is_longconst e2 with
+ | Some n1, Some n2 =>
+ Eop (Ointconst (if Int64.cmp c n1 n2 then Int.one else Int.zero)) Enil
+ | Some n1, None => Eop (Ocmp (Ccomplimm (swap_comparison c) n1)) (e2:::Enil)
+ | None, Some n2 => Eop (Ocmp (Ccomplimm c n2)) (e1:::Enil)
+ | None, None => Eop (Ocmp (Ccompl c)) (e1:::e2:::Enil)
+ end.
+
+(** ** Floating-point conversions *)
+
+Definition longoffloat (e: expr) :=
+ if Archi.splitlong then SplitLong.longoffloat e else
+ Eop Olongoffloat (e:::Enil).
+
+Definition longuoffloat (e: expr) :=
+ if Archi.splitlong then SplitLong.longuoffloat e else
+ Eop Olonguoffloat (e:::Enil).
+
+Definition floatoflong (e: expr) :=
+ if Archi.splitlong then SplitLong.floatoflong e else
+ Eop Ofloatoflong (e:::Enil).
+
+Definition floatoflongu (e: expr) :=
+ if Archi.splitlong then SplitLong.floatoflongu e else
+ Eop Ofloatoflongu (e:::Enil).
+
+Definition longofsingle (e: expr) := longoffloat (floatofsingle e).
+
+Definition longuofsingle (e: expr) := longuoffloat (floatofsingle e).
+
+Definition singleoflong (e: expr) := SplitLong.singleoflong e.
+
+Definition singleoflongu (e: expr) := SplitLong.singleoflongu e.
+
+End SELECT.
+
+(* Local Variables: *)
+(* mode: coq *)
+(* End: *)
diff --git a/kvx/SelectLongproof.v b/kvx/SelectLongproof.v
new file mode 100644
index 00000000..c3abdbc7
--- /dev/null
+++ b/kvx/SelectLongproof.v
@@ -0,0 +1,951 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Correctness of instruction selection for 64-bit integer operations *)
+
+Require Import String Coqlib Maps Integers Floats Errors.
+Require Archi.
+Require Import AST Values ExtValues Memory Globalenvs Events.
+Require Import Cminor Op CminorSel.
+Require Import OpHelpers OpHelpersproof.
+Require Import SelectOp SelectOpproof SplitLong SplitLongproof.
+Require Import SelectLong.
+Require Import DecBoolOps.
+Require Import Lia.
+
+Local Open Scope cminorsel_scope.
+Local Open Scope string_scope.
+
+(** * Correctness of the instruction selection functions for 64-bit operators *)
+
+Section CMCONSTR.
+
+Variable prog: program.
+Variable hf: helper_functions.
+Hypothesis HELPERS: helper_functions_declared prog hf.
+Let ge := Genv.globalenv prog.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
+Definition unary_constructor_sound (cstr: expr -> expr) (sem: val -> val) : Prop :=
+ forall le a x,
+ eval_expr ge sp e m le a x ->
+ exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef (sem x) v.
+
+Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> val) : Prop :=
+ forall le a x b y,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v.
+
+Definition partial_unary_constructor_sound (cstr: expr -> expr) (sem: val -> option val) : Prop :=
+ forall le a x y,
+ eval_expr ge sp e m le a x ->
+ sem x = Some y ->
+ exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef y v.
+
+Definition partial_binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> option val) : Prop :=
+ forall le a x b y z,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ sem x y = Some z ->
+ exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef z v.
+
+Theorem eval_longconst:
+ forall le n, eval_expr ge sp e m le (longconst n) (Vlong n).
+Proof.
+ unfold longconst; intros; destruct Archi.splitlong.
+ apply SplitLongproof.eval_longconst.
+ EvalOp.
+Qed.
+
+Lemma is_longconst_sound:
+ forall v a n le,
+ is_longconst a = Some n -> eval_expr ge sp e m le a v -> v = Vlong n.
+Proof with (try discriminate).
+ intros. unfold is_longconst in *. destruct Archi.splitlong.
+ eapply SplitLongproof.is_longconst_sound; eauto.
+ assert (a = Eop (Olongconst n) Enil).
+ { destruct a... destruct o... destruct e0... congruence. }
+ subst a. InvEval. auto.
+Qed.
+
+Theorem eval_intoflong: unary_constructor_sound intoflong Val.loword.
+Proof.
+ unfold intoflong; destruct Archi.splitlong. apply SplitLongproof.eval_intoflong.
+ red; intros. destruct (is_longconst a) as [n|] eqn:C.
+- TrivialExists. simpl. erewrite (is_longconst_sound x) by eauto. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_longofintu: unary_constructor_sound longofintu Val.longofintu.
+Proof.
+ unfold longofintu; destruct Archi.splitlong. apply SplitLongproof.eval_longofintu.
+ red; intros. destruct (is_intconst a) as [n|] eqn:C.
+- econstructor; split. apply eval_longconst.
+ exploit is_intconst_sound; eauto. intros; subst x. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_longofint: unary_constructor_sound longofint Val.longofint.
+Proof.
+ unfold longofint; destruct Archi.splitlong. apply SplitLongproof.eval_longofint.
+ red; intros. destruct (is_intconst a) as [n|] eqn:C.
+- econstructor; split. apply eval_longconst.
+ exploit is_intconst_sound; eauto. intros; subst x. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_negl: unary_constructor_sound negl Val.negl.
+Proof.
+ unfold negl. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_negl; auto.
+ red; intros. destruct (is_longconst a) as [n|] eqn:C.
+- exploit is_longconst_sound; eauto. intros EQ; subst x.
+ econstructor; split. apply eval_longconst. auto.
+- TrivialExists.
+Qed.
+
+
+Theorem eval_addlimm_shllimm:
+ forall sh k2, unary_constructor_sound (addlimm_shllimm sh k2) (fun x => ExtValues.addxl sh x (Vlong k2)).
+Proof.
+ red; unfold addlimm_shllimm; intros.
+ destruct (Compopts.optim_addx tt).
+ {
+ destruct (shift1_4_of_z (Int.unsigned sh)) as [s14 |] eqn:SHIFT.
+ - TrivialExists. simpl.
+ f_equal.
+ unfold shift1_4_of_z, int_of_shift1_4, z_of_shift1_4 in *.
+ destruct (Z.eq_dec _ _) as [e1|].
+ { replace s14 with SHIFT1 by congruence.
+ destruct x; simpl; trivial.
+ replace (Int.ltu _ _) with true by reflexivity.
+ unfold Int.ltu.
+ rewrite e1.
+ replace (if zlt _ _ then true else false) with true by reflexivity.
+ rewrite <- e1.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ }
+ destruct (Z.eq_dec _ _) as [e2|].
+ { replace s14 with SHIFT2 by congruence.
+ destruct x; simpl; trivial.
+ replace (Int.ltu _ _) with true by reflexivity.
+ unfold Int.ltu.
+ rewrite e2.
+ replace (if zlt _ _ then true else false) with true by reflexivity.
+ rewrite <- e2.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ }
+ destruct (Z.eq_dec _ _) as [e3|].
+ { replace s14 with SHIFT3 by congruence.
+ destruct x; simpl; trivial.
+ replace (Int.ltu _ _) with true by reflexivity.
+ unfold Int.ltu.
+ rewrite e3.
+ replace (if zlt _ _ then true else false) with true by reflexivity.
+ rewrite <- e3.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ }
+ destruct (Z.eq_dec _ _) as [e4|].
+ { replace s14 with SHIFT4 by congruence.
+ destruct x; simpl; trivial.
+ replace (Int.ltu _ _) with true by reflexivity.
+ unfold Int.ltu.
+ rewrite e4.
+ replace (if zlt _ _ then true else false) with true by reflexivity.
+ rewrite <- e4.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ }
+ discriminate.
+ - unfold addxl. rewrite Val.addl_commut.
+ TrivialExists.
+ repeat (try eassumption; try econstructor).
+ simpl.
+ reflexivity.
+ }
+ { unfold addxl. rewrite Val.addl_commut.
+ TrivialExists.
+ repeat (try eassumption; try econstructor).
+ simpl.
+ reflexivity.
+ }
+Qed.
+
+Theorem eval_addlimm: forall n, unary_constructor_sound (addlimm n) (fun v => Val.addl v (Vlong n)).
+Proof.
+ unfold addlimm; intros; red; intros.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero.
+ subst. exists x; split; auto.
+ destruct x; simpl; rewrite ?Int64.add_zero, ?Ptrofs.add_zero; auto.
+ destruct (addlimm_match a); InvEval.
+- econstructor; split. apply eval_longconst. rewrite Int64.add_commut; auto.
+- destruct (Compopts.optim_globaladdroffset _).
+ + econstructor; split. EvalOp. simpl; eauto.
+ unfold Genv.symbol_address. destruct (Genv.find_symbol ge s); simpl; auto.
+ destruct Archi.ptr64; auto. rewrite Ptrofs.add_commut; auto.
+ + TrivialExists. repeat econstructor. simpl. trivial.
+- econstructor; split. EvalOp. simpl; eauto.
+ destruct sp; simpl; auto. destruct Archi.ptr64; auto.
+ rewrite Ptrofs.add_assoc, (Ptrofs.add_commut m0). auto.
+- subst x. rewrite Val.addl_assoc. rewrite Int64.add_commut. TrivialExists.
+- TrivialExists; simpl. subst x.
+ destruct v1; simpl; trivial.
+ destruct (Int.ltu _ _); simpl; trivial.
+ rewrite Int64.add_assoc. rewrite Int64.add_commut.
+ reflexivity.
+- pose proof eval_addlimm_shllimm as ADDXL.
+ unfold unary_constructor_sound in ADDXL.
+ unfold addxl in ADDXL.
+ rewrite Val.addl_commut.
+ subst x.
+ apply ADDXL; assumption.
+- TrivialExists.
+Qed.
+
+Lemma eval_addxl: forall n, binary_constructor_sound (addl_shllimm n) (ExtValues.addxl n).
+Proof.
+ red.
+ intros.
+ unfold addl_shllimm.
+ destruct (Compopts.optim_addx tt).
+ {
+ destruct (shift1_4_of_z (Int.unsigned n)) as [s14 |] eqn:SHIFT.
+ - TrivialExists.
+ simpl.
+ f_equal. f_equal.
+ unfold shift1_4_of_z, int_of_shift1_4, z_of_shift1_4 in *.
+ destruct (Z.eq_dec _ _) as [e1|].
+ { replace s14 with SHIFT1 by congruence.
+ rewrite <- e1.
+ apply Int.repr_unsigned. }
+ destruct (Z.eq_dec _ _) as [e2|].
+ { replace s14 with SHIFT2 by congruence.
+ rewrite <- e2.
+ apply Int.repr_unsigned. }
+ destruct (Z.eq_dec _ _) as [e3|].
+ { replace s14 with SHIFT3 by congruence.
+ rewrite <- e3.
+ apply Int.repr_unsigned. }
+ destruct (Z.eq_dec _ _) as [e4|].
+ { replace s14 with SHIFT4 by congruence.
+ rewrite <- e4.
+ apply Int.repr_unsigned. }
+ discriminate.
+ (* Oaddxl *)
+ - TrivialExists;
+ repeat econstructor; eassumption.
+ }
+ { TrivialExists;
+ repeat econstructor; eassumption.
+ }
+Qed.
+
+Theorem eval_addl: binary_constructor_sound addl Val.addl.
+Proof.
+ unfold addl. destruct Archi.splitlong eqn:SL.
+ apply SplitLongproof.eval_addl. apply Archi.splitlong_ptr32; auto.
+(*
+ assert (SF: Archi.ptr64 = true).
+ { Local Transparent Archi.splitlong. unfold Archi.splitlong in SL.
+ destruct Archi.ptr64; simpl in *; congruence. }
+*)
+(*
+ assert (B: forall id ofs n,
+ Genv.symbol_address ge id (Ptrofs.add ofs (Ptrofs.repr n)) =
+ Val.addl (Genv.symbol_address ge id ofs) (Vlong (Int64.repr n))).
+ { intros. replace (Ptrofs.repr n) with (Ptrofs.of_int64 (Int64.repr n)) by auto with ptrofs.
+ apply Genv.shift_symbol_address_64; auto. }
+
+*)
+ red; intros until y.
+ case (addl_match a b); intros; InvEval.
+ - rewrite Val.addl_commut. apply eval_addlimm; auto.
+ - apply eval_addlimm; auto.
+ - subst.
+ replace (Val.addl (Val.addl v1 (Vlong n1)) (Val.addl v0 (Vlong n2)))
+ with (Val.addl (Val.addl v1 v0) (Val.addl (Vlong n1) (Vlong n2))).
+ apply eval_addlimm. EvalOp.
+ repeat rewrite Val.addl_assoc. decEq. apply Val.addl_permut.
+ - subst. econstructor; split.
+ EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto.
+ rewrite Val.addl_commut. destruct sp; simpl; auto.
+ destruct v1; simpl; auto.
+ destruct Archi.ptr64 eqn:SF; auto.
+ apply Val.lessdef_same. f_equal. rewrite ! Ptrofs.add_assoc. f_equal.
+ rewrite (Ptrofs.add_commut (Ptrofs.of_int64 n1)), Ptrofs.add_assoc. f_equal. auto with ptrofs.
+ - subst. econstructor; split.
+ EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto.
+ destruct sp; simpl; auto.
+ destruct v1; simpl; auto.
+ destruct Archi.ptr64 eqn:SF; auto.
+ apply Val.lessdef_same. f_equal. rewrite ! Ptrofs.add_assoc. f_equal. f_equal.
+ rewrite Ptrofs.add_commut. auto with ptrofs.
+ - subst.
+ replace (Val.addl (Val.addl v1 (Vlong n1)) y)
+ with (Val.addl (Val.addl v1 y) (Vlong n1)).
+ apply eval_addlimm. EvalOp.
+ repeat rewrite Val.addl_assoc. decEq. apply Val.addl_commut.
+ - subst.
+ replace (Val.addl x (Val.addl v1 (Vlong n2)))
+ with (Val.addl (Val.addl x v1) (Vlong n2)).
+ apply eval_addlimm. EvalOp.
+ repeat rewrite Val.addl_assoc. reflexivity.
+ - subst. TrivialExists.
+ - subst. rewrite Val.addl_commut. TrivialExists.
+ - subst. TrivialExists.
+ - subst. rewrite Val.addl_commut. TrivialExists.
+ - subst. pose proof eval_addxl as ADDXL.
+ unfold binary_constructor_sound in ADDXL.
+ rewrite Val.addl_commut.
+ apply ADDXL; assumption.
+ (* Oaddxl *)
+ - subst. pose proof eval_addxl as ADDXL.
+ unfold binary_constructor_sound in ADDXL.
+ apply ADDXL; assumption.
+ - TrivialExists.
+Qed.
+
+Theorem eval_subl: binary_constructor_sound subl Val.subl.
+Proof.
+ unfold subl. destruct Archi.splitlong eqn:SL.
+ apply SplitLongproof.eval_subl. apply Archi.splitlong_ptr32; auto.
+ red; intros; destruct (subl_match a b); InvEval.
+- rewrite Val.subl_addl_opp. apply eval_addlimm; auto.
+- subst. rewrite Val.subl_addl_l. rewrite Val.subl_addl_r.
+ rewrite Val.addl_assoc. simpl. rewrite Int64.add_commut. rewrite <- Int64.sub_add_opp.
+ apply eval_addlimm; EvalOp.
+- subst. rewrite Val.subl_addl_l. apply eval_addlimm; EvalOp.
+- subst. rewrite Val.subl_addl_r.
+ apply eval_addlimm; EvalOp.
+- TrivialExists. simpl. subst. reflexivity.
+- TrivialExists. simpl. subst.
+ destruct v1; destruct x; simpl; trivial.
+ + f_equal. f_equal.
+ rewrite <- Int64.neg_mul_distr_r.
+ rewrite Int64.sub_add_opp.
+ reflexivity.
+ + destruct (Archi.ptr64) eqn:ARCHI64; simpl; trivial.
+ f_equal. f_equal.
+ rewrite <- Int64.neg_mul_distr_r.
+ rewrite Ptrofs.sub_add_opp.
+ unfold Ptrofs.add.
+ f_equal. f_equal.
+ rewrite (Ptrofs.agree64_neg ARCHI64 (Ptrofs.of_int64 (Int64.mul i n)) (Int64.mul i n)).
+ rewrite (Ptrofs.agree64_of_int ARCHI64 (Int64.neg (Int64.mul i n))).
+ reflexivity.
+ apply (Ptrofs.agree64_of_int ARCHI64).
+- TrivialExists.
+Qed.
+
+Theorem eval_shllimm: forall n, unary_constructor_sound (fun e => shllimm e n) (fun v => Val.shll v (Vint n)).
+Proof.
+ intros; unfold shllimm. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shllimm; auto.
+ red; intros.
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ exists x; split; auto. subst n; destruct x; simpl; auto.
+ destruct (Int.ltu Int.zero Int64.iwordsize'); auto.
+ change (Int64.shl' i Int.zero) with (Int64.shl i Int64.zero). rewrite Int64.shl_zero; auto.
+ destruct (Int.ltu n Int64.iwordsize') eqn:LT; simpl.
+ assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshllimm n) (a:::Enil)) v
+ /\ Val.lessdef (Val.shll x (Vint n)) v) by TrivialExists.
+ destruct (shllimm_match a); InvEval.
+- econstructor; split. apply eval_longconst. simpl; rewrite LT; auto.
+- destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto.
+ subst. econstructor; split. EvalOp. simpl; eauto.
+ destruct v1; simpl; auto. rewrite LT'.
+ destruct (Int.ltu n1 Int64.iwordsize') eqn:LT1; auto.
+ simpl; rewrite LT. rewrite Int.add_commut, Int64.shl'_shl'; auto. rewrite Int.add_commut; auto.
+- apply DEFAULT.
+- TrivialExists. constructor; eauto. constructor. EvalOp. simpl; eauto. constructor. auto.
+Qed.
+
+Theorem eval_shrluimm: forall n, unary_constructor_sound (fun e => shrluimm e n) (fun v => Val.shrlu v (Vint n)).
+Proof.
+ intros; unfold shrluimm. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrluimm; auto.
+ red; intros.
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ exists x; split; auto. subst n; destruct x; simpl; auto.
+ destruct (Int.ltu Int.zero Int64.iwordsize'); auto.
+ change (Int64.shru' i Int.zero) with (Int64.shru i Int64.zero). rewrite Int64.shru_zero; auto.
+ destruct (Int.ltu n Int64.iwordsize') eqn:LT.
+ assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshrluimm n) (a:::Enil)) v
+ /\ Val.lessdef (Val.shrlu x (Vint n)) v) by TrivialExists.
+ destruct (shrluimm_match a); InvEval.
+- econstructor; split. apply eval_longconst. simpl; rewrite LT; auto.
+- destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto.
+ subst. econstructor; split. EvalOp. simpl; eauto.
+ destruct v1; simpl; auto. rewrite LT'.
+ destruct (Int.ltu n1 Int64.iwordsize') eqn:LT1; auto.
+ simpl; rewrite LT. rewrite Int.add_commut, Int64.shru'_shru'; auto. rewrite Int.add_commut; auto.
+- subst x.
+ simpl negb.
+ cbn iota.
+ destruct (is_bitfieldl _ _) eqn:BOUNDS.
+ + exists (extfzl (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one))
+ (Z.sub
+ (Z.add
+ (Z.add (Int.unsigned n) (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)))
+ Z.one) Int64.zwordsize) v1).
+ split.
+ ++ EvalOp.
+ ++ unfold extfzl.
+ rewrite BOUNDS.
+ destruct v1; try (simpl; apply Val.lessdef_undef).
+ replace (Z.sub Int64.zwordsize
+ (Z.add (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)) with (Int.unsigned n1) by lia.
+ replace (Z.sub Int64.zwordsize
+ (Z.sub
+ (Z.add (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)
+ (Z.sub
+ (Z.add
+ (Z.add (Int.unsigned n) (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)))
+ Z.one) Int64.zwordsize))) with (Int.unsigned n) by lia.
+ simpl.
+ destruct (Int.ltu n1 Int64.iwordsize') eqn:Hltu_n1; simpl; trivial.
+ destruct (Int.ltu n Int64.iwordsize') eqn:Hltu_n; simpl; trivial.
+ rewrite Int.repr_unsigned.
+ rewrite Int.repr_unsigned.
+ constructor.
+ + TrivialExists. constructor. econstructor. constructor. eassumption. constructor. simpl. reflexivity. constructor. simpl. reflexivity.
+- apply DEFAULT.
+- TrivialExists. constructor; eauto. constructor. EvalOp. simpl; eauto. constructor. auto.
+Qed.
+
+Theorem eval_shrlimm: forall n, unary_constructor_sound (fun e => shrlimm e n) (fun v => Val.shrl v (Vint n)).
+Proof.
+ intros; unfold shrlimm. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrlimm; auto.
+ red; intros.
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ exists x; split; auto. subst n; destruct x; simpl; auto.
+ destruct (Int.ltu Int.zero Int64.iwordsize'); auto.
+ change (Int64.shr' i Int.zero) with (Int64.shr i Int64.zero). rewrite Int64.shr_zero; auto.
+ destruct (Int.ltu n Int64.iwordsize') eqn:LT.
+ assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshrlimm n) (a:::Enil)) v
+ /\ Val.lessdef (Val.shrl x (Vint n)) v) by TrivialExists.
+ destruct (shrlimm_match a); InvEval.
+- econstructor; split. apply eval_longconst. simpl; rewrite LT; auto.
+- destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto.
+ subst. econstructor; split. EvalOp. simpl; eauto.
+ destruct v1; simpl; auto. rewrite LT'.
+ destruct (Int.ltu n1 Int64.iwordsize') eqn:LT1; auto.
+ simpl; rewrite LT. rewrite Int.add_commut, Int64.shr'_shr'; auto. rewrite Int.add_commut; auto.
+- subst x.
+ simpl negb.
+ cbn iota.
+ destruct (is_bitfieldl _ _) eqn:BOUNDS.
+ + exists (extfsl (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one))
+ (Z.sub
+ (Z.add
+ (Z.add (Int.unsigned n) (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)))
+ Z.one) Int64.zwordsize) v1).
+ split.
+ ++ EvalOp.
+ ++ unfold extfsl.
+ rewrite BOUNDS.
+ destruct v1; try (simpl; apply Val.lessdef_undef).
+ replace (Z.sub Int64.zwordsize
+ (Z.add (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)) with (Int.unsigned n1) by lia.
+ replace (Z.sub Int64.zwordsize
+ (Z.sub
+ (Z.add (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)
+ (Z.sub
+ (Z.add
+ (Z.add (Int.unsigned n) (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)))
+ Z.one) Int64.zwordsize))) with (Int.unsigned n) by lia.
+ simpl.
+ destruct (Int.ltu n1 Int64.iwordsize') eqn:Hltu_n1; simpl; trivial.
+ destruct (Int.ltu n Int64.iwordsize') eqn:Hltu_n; simpl; trivial.
+ rewrite Int.repr_unsigned.
+ rewrite Int.repr_unsigned.
+ constructor.
+ + TrivialExists. constructor. econstructor. constructor. eassumption. constructor. simpl. reflexivity. constructor. simpl. reflexivity.
+- apply DEFAULT.
+- TrivialExists. constructor; eauto. constructor. EvalOp. simpl; eauto. constructor. auto.
+Qed.
+
+Theorem eval_shll: binary_constructor_sound shll Val.shll.
+Proof.
+ unfold shll. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shll; auto.
+ red; intros. destruct (is_intconst b) as [n2|] eqn:C.
+- exploit is_intconst_sound; eauto. intros EQ; subst y. apply eval_shllimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_shrlu: binary_constructor_sound shrlu Val.shrlu.
+Proof.
+ unfold shrlu. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrlu; auto.
+ red; intros. destruct (is_intconst b) as [n2|] eqn:C.
+- exploit is_intconst_sound; eauto. intros EQ; subst y. apply eval_shrluimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_shrl: binary_constructor_sound shrl Val.shrl.
+Proof.
+ unfold shrl. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrl; auto.
+ red; intros. destruct (is_intconst b) as [n2|] eqn:C.
+- exploit is_intconst_sound; eauto. intros EQ; subst y. apply eval_shrlimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_mullimm_base: forall n, unary_constructor_sound (mullimm_base n) (fun v => Val.mull v (Vlong n)).
+Proof.
+ intros; unfold mullimm_base. red; intros.
+ assert (DEFAULT: exists v,
+ eval_expr ge sp e m le (Eop Omull (a ::: longconst n ::: Enil)) v
+ /\ Val.lessdef (Val.mull x (Vlong n)) v).
+ { econstructor; split. EvalOp. constructor. eauto. constructor. apply eval_longconst. constructor. simpl; eauto.
+ auto. }
+ generalize (Int64.one_bits'_decomp n); intros D.
+ destruct (Int64.one_bits' n) as [ | i [ | j [ | ? ? ]]] eqn:B.
+- TrivialExists.
+- replace (Val.mull x (Vlong n)) with (Val.shll x (Vint i)).
+ apply eval_shllimm; auto.
+ simpl in D. rewrite D, Int64.add_zero. destruct x; simpl; auto.
+ rewrite (Int64.one_bits'_range n) by (rewrite B; auto with coqlib).
+ rewrite Int64.shl'_mul; auto.
+- set (le' := x :: le).
+ assert (A0: eval_expr ge sp e m le' (Eletvar O) x) by (constructor; reflexivity).
+ exploit (eval_shllimm i). eexact A0. intros (v1 & A1 & B1).
+ exploit (eval_shllimm j). eexact A0. intros (v2 & A2 & B2).
+ exploit (eval_addl). eexact A1. eexact A2. intros (v3 & A3 & B3).
+ exists v3; split. econstructor; eauto.
+ rewrite D. simpl. rewrite Int64.add_zero. destruct x; auto.
+ simpl in *.
+ rewrite (Int64.one_bits'_range n) in B1 by (rewrite B; auto with coqlib).
+ rewrite (Int64.one_bits'_range n) in B2 by (rewrite B; auto with coqlib).
+ inv B1; inv B2. simpl in B3; inv B3.
+ rewrite Int64.mul_add_distr_r. rewrite <- ! Int64.shl'_mul. auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_mullimm: forall n, unary_constructor_sound (mullimm n) (fun v => Val.mull v (Vlong n)).
+Proof.
+ unfold mullimm. intros; red; intros.
+ destruct Archi.splitlong eqn:SL.
+ eapply SplitLongproof.eval_mullimm; eauto.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero.
+ exists (Vlong Int64.zero); split. apply eval_longconst.
+ destruct x; simpl; auto. subst n; rewrite Int64.mul_zero; auto.
+ predSpec Int64.eq Int64.eq_spec n Int64.one.
+ exists x; split; auto.
+ destruct x; simpl; auto. subst n; rewrite Int64.mul_one; auto.
+ destruct (mullimm_match a); InvEval.
+- econstructor; split. apply eval_longconst. rewrite Int64.mul_commut; auto.
+- exploit (eval_mullimm_base n); eauto. intros (v2 & A2 & B2).
+ exploit (eval_addlimm (Int64.mul n n2)). eexact A2. intros (v3 & A3 & B3).
+ exists v3; split; auto.
+ subst x. destruct v1; simpl; auto.
+ simpl in B2; inv B2. simpl in B3; inv B3. rewrite Int64.mul_add_distr_l.
+ rewrite (Int64.mul_commut n). auto.
+- apply eval_mullimm_base; auto.
+Qed.
+
+Theorem eval_mull: binary_constructor_sound mull Val.mull.
+Proof.
+ unfold mull. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mull; auto.
+ red; intros; destruct (mull_match a b); InvEval.
+- rewrite Val.mull_commut. apply eval_mullimm; auto.
+- apply eval_mullimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_mullhu:
+ forall n, unary_constructor_sound (fun a => mullhu a n) (fun v => Val.mullhu v (Vlong n)).
+Proof.
+ unfold mullhu; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhu; auto.
+ red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto.
+Qed.
+
+Theorem eval_mullhs:
+ forall n, unary_constructor_sound (fun a => mullhs a n) (fun v => Val.mullhs v (Vlong n)).
+Proof.
+ unfold mullhs; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhs; auto.
+ red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto.
+Qed.
+
+Theorem eval_andlimm: forall n, unary_constructor_sound (andlimm n) (fun v => Val.andl v (Vlong n)).
+Proof.
+ unfold andlimm; intros; red; intros.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero.
+ exists (Vlong Int64.zero); split. apply eval_longconst.
+ subst. destruct x; simpl; auto. rewrite Int64.and_zero; auto.
+ predSpec Int64.eq Int64.eq_spec n Int64.mone.
+ exists x; split. assumption.
+ subst. destruct x; simpl; auto. rewrite Int64.and_mone; auto.
+ destruct (andlimm_match a); InvEval; subst.
+- econstructor; split. apply eval_longconst. simpl. rewrite Int64.and_commut; auto.
+- TrivialExists. simpl. rewrite Val.andl_assoc. rewrite Int64.and_commut; auto.
+- TrivialExists.
+- TrivialExists.
+Qed.
+
+Lemma int64_eq_commut: forall x y : int64,
+ (Int64.eq x y) = (Int64.eq y x).
+Proof.
+ intros.
+ predSpec Int64.eq Int64.eq_spec x y;
+ predSpec Int64.eq Int64.eq_spec y x;
+ congruence.
+Qed.
+
+Theorem eval_andl: binary_constructor_sound andl Val.andl.
+Proof.
+ unfold andl; destruct Archi.splitlong. apply SplitLongproof.eval_andl.
+ red; intros. destruct (andl_match a b).
+- InvEval. rewrite Val.andl_commut. apply eval_andlimm; auto.
+- InvEval. apply eval_andlimm; auto.
+- (*andn*) InvEval. TrivialExists. simpl. congruence.
+- (*andn reverse*) InvEval. rewrite Val.andl_commut. TrivialExists; simpl. congruence.
+ (*
+- (* selectl *)
+ InvEval.
+ predSpec Int64.eq Int64.eq_spec zero1 Int64.zero; simpl; TrivialExists.
+ + constructor. econstructor; constructor.
+ constructor; try constructor; try constructor; try eassumption.
+ + simpl in *. f_equal. inv H6.
+ unfold selectl.
+ simpl.
+ destruct v3; simpl; trivial.
+ rewrite int64_eq_commut.
+ destruct (Int64.eq i Int64.zero); simpl.
+ * replace (Int64.repr (Int.signed (Int.neg Int.zero))) with Int64.zero by Int64.bit_solve.
+ destruct y; simpl; trivial.
+ * replace (Int64.repr (Int.signed (Int.neg Int.one))) with Int64.mone by Int64.bit_solve.
+ destruct y; simpl; trivial.
+ rewrite Int64.and_commut. rewrite Int64.and_mone. reflexivity.
+ + constructor. econstructor. constructor. econstructor. constructor. econstructor. constructor. eassumption. constructor. simpl. f_equal. constructor. simpl. f_equal. constructor. simpl. f_equal. constructor. eassumption. constructor.
+ + simpl in *. congruence. *)
+- TrivialExists.
+Qed.
+
+Theorem eval_orlimm: forall n, unary_constructor_sound (orlimm n) (fun v => Val.orl v (Vlong n)).
+Proof.
+ unfold orlimm; intros; red; intros.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero.
+ exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.or_zero; auto.
+ predSpec Int64.eq Int64.eq_spec n Int64.mone.
+ econstructor; split. apply eval_longconst. subst. destruct x; simpl; auto. rewrite Int64.or_mone; auto.
+ destruct (orlimm_match a); InvEval; subst.
+- econstructor; split. apply eval_longconst. simpl. rewrite Int64.or_commut; auto.
+- TrivialExists. simpl. rewrite Val.orl_assoc. rewrite Int64.or_commut; auto.
+- InvEval. TrivialExists.
+- TrivialExists.
+Qed.
+
+
+Theorem eval_orl: binary_constructor_sound orl Val.orl.
+Proof.
+ unfold orl; destruct Archi.splitlong. apply SplitLongproof.eval_orl.
+ red; intros.
+ destruct (orl_match a b).
+- InvEval. rewrite Val.orl_commut. apply eval_orlimm; auto.
+- InvEval. apply eval_orlimm; auto.
+- (*orn*) InvEval. TrivialExists; simpl; congruence.
+- (*orn reversed*) InvEval. rewrite Val.orl_commut. TrivialExists; simpl; congruence.
+
+ - (*insfl first case*)
+ destruct (is_bitfieldl _ _) eqn:Risbitfield.
+ + destruct (and_dec _ _) as [[Rmask Rnmask] | ].
+ * rewrite Rnmask in *.
+ inv H. inv H0. inv H4. inv H3. inv H9. inv H8.
+ simpl in H6, H7.
+ inv H6. inv H7.
+ inv H4. inv H3. inv H7.
+ simpl in H6.
+ inv H6.
+ set (zstop := (int64_highest_bit mask)) in *.
+ set (zstart := (Int.unsigned start)) in *.
+
+ TrivialExists.
+ simpl. f_equal.
+
+ unfold insfl.
+ rewrite Risbitfield.
+ rewrite Rmask.
+ simpl.
+ unfold bitfield_maskl.
+ subst zstart.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ * TrivialExists.
+ + TrivialExists.
+ - destruct (is_bitfieldl _ _) eqn:Risbitfield.
+ + destruct (and_dec _ _) as [[Rmask Rnmask] | ].
+ * rewrite Rnmask in *.
+ inv H. inv H0. inv H4. inv H6. inv H8. inv H3. inv H8.
+ inv H0. simpl in H7. inv H7.
+ set (zstop := (int64_highest_bit mask)) in *.
+ set (zstart := 0) in *.
+
+ TrivialExists. simpl. f_equal.
+ unfold insfl.
+ rewrite Risbitfield.
+ rewrite Rmask.
+ simpl.
+ subst zstart.
+ f_equal.
+ destruct v0; simpl; trivial.
+ unfold Int.ltu, Int64.iwordsize', Int64.zwordsize, Int64.wordsize.
+ rewrite Int.unsigned_repr.
+ ** rewrite Int.unsigned_repr.
+ *** simpl.
+ rewrite Int64.shl'_zero.
+ reflexivity.
+ *** simpl. unfold Int.max_unsigned. unfold Int.modulus.
+ simpl. lia.
+ ** unfold Int.max_unsigned. unfold Int.modulus.
+ simpl. lia.
+ * TrivialExists.
+ + TrivialExists.
+- TrivialExists.
+Qed.
+
+Theorem eval_xorlimm: forall n, unary_constructor_sound (xorlimm n) (fun v => Val.xorl v (Vlong n)).
+Proof.
+ unfold xorlimm; intros; red; intros.
+ predSpec Int64.eq Int64.eq_spec n Int64.zero.
+ - exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.xor_zero; auto.
+ - predSpec Int64.eq Int64.eq_spec n Int64.mone.
+ -- subst n. intros. rewrite <- Val.notl_xorl. TrivialExists.
+ -- destruct (xorlimm_match a); InvEval; subst.
+ + econstructor; split. apply eval_longconst. simpl. rewrite Int64.xor_commut; auto.
+ + rewrite Val.xorl_assoc. simpl. rewrite (Int64.xor_commut n2).
+ predSpec Int64.eq Int64.eq_spec (Int64.xor n n2) Int64.zero.
+ * rewrite H. exists v1; split; auto. destruct v1; simpl; auto. rewrite Int64.xor_zero; auto.
+ * TrivialExists.
+ + TrivialExists.
+Qed.
+
+Theorem eval_xorl: binary_constructor_sound xorl Val.xorl.
+Proof.
+ unfold xorl; destruct Archi.splitlong. apply SplitLongproof.eval_xorl.
+ red; intros. destruct (xorl_match a b).
+- InvEval. rewrite Val.xorl_commut. apply eval_xorlimm; auto.
+- InvEval. apply eval_xorlimm; auto.
+- TrivialExists.
+Qed.
+
+Theorem eval_notl: unary_constructor_sound notl Val.notl.
+Proof.
+ assert (forall v, Val.lessdef (Val.notl (Val.notl v)) v).
+ destruct v; simpl; auto. rewrite Int64.not_involutive; auto.
+ unfold notl; red; intros until x; case (notl_match a); intros; InvEval.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - subst x. exists (Val.andl v1 v0); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ eassumption. constructor. simpl. reflexivity.
+ - subst x. exists (Val.andl v1 (Vlong n)); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ simpl. reflexivity.
+ - subst x. exists (Val.orl v1 v0); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ eassumption. constructor. simpl. reflexivity.
+ - subst x. exists (Val.orl v1 (Vlong n)); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ simpl. reflexivity.
+ - subst x. exists (Val.xorl v1 v0); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ eassumption. constructor. simpl. reflexivity.
+ - subst x. exists (Val.xorl v1 (Vlong n)); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ simpl. reflexivity.
+ (* andn *)
+ - subst x. TrivialExists. simpl.
+ destruct v0; destruct v1; simpl; trivial.
+ f_equal. f_equal.
+ rewrite Int64.not_and_or_not.
+ rewrite Int64.not_involutive.
+ apply Int64.or_commut.
+ - subst x. TrivialExists. simpl.
+ destruct v1; simpl; trivial.
+ f_equal. f_equal.
+ rewrite Int64.not_and_or_not.
+ rewrite Int64.not_involutive.
+ reflexivity.
+ (* orn *)
+ - subst x. TrivialExists. simpl.
+ destruct v0; destruct v1; simpl; trivial.
+ f_equal. f_equal.
+ rewrite Int64.not_or_and_not.
+ rewrite Int64.not_involutive.
+ apply Int64.and_commut.
+ - subst x. TrivialExists. simpl.
+ destruct v1; simpl; trivial.
+ f_equal. f_equal.
+ rewrite Int64.not_or_and_not.
+ rewrite Int64.not_involutive.
+ reflexivity.
+ - subst x. exists v1; split; trivial.
+ - TrivialExists.
+ - TrivialExists.
+Qed.
+
+Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
+Proof.
+ unfold divls_base; red; intros.
+ eapply SplitLongproof.eval_divls_base; eauto.
+Qed.
+
+Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
+Proof.
+ unfold modls_base; red; intros.
+ eapply SplitLongproof.eval_modls_base; eauto.
+Qed.
+
+Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
+Proof.
+ unfold divlu_base; red; intros.
+ eapply SplitLongproof.eval_divlu_base; eauto.
+Qed.
+
+Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
+Proof.
+ unfold modlu_base; red; intros.
+ eapply SplitLongproof.eval_modlu_base; eauto.
+Qed.
+
+Theorem eval_shrxlimm:
+ forall le a n x z,
+ eval_expr ge sp e m le a x ->
+ Val.shrxl x (Vint n) = Some z ->
+ exists v, eval_expr ge sp e m le (shrxlimm a n) v /\ Val.lessdef z v.
+Proof.
+ unfold shrxlimm; intros. destruct Archi.splitlong eqn:SL.
++ eapply SplitLongproof.eval_shrxlimm; eauto using Archi.splitlong_ptr32.
++ predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. destruct x; simpl in H0; inv H0. econstructor; split; eauto.
+ change (Int.ltu Int.zero (Int.repr 63)) with true. simpl. rewrite Int64.shrx'_zero; auto.
+- TrivialExists. simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_cmplu:
+ forall c le a x b y v,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ Val.cmplu (Mem.valid_pointer m) c x y = Some v ->
+ eval_expr ge sp e m le (cmplu c a b) v.
+Proof.
+ unfold cmplu; intros. destruct Archi.splitlong eqn:SL.
+ eapply SplitLongproof.eval_cmplu; eauto using Archi.splitlong_ptr32.
+ unfold Val.cmplu in H1.
+ destruct (Val.cmplu_bool (Mem.valid_pointer m) c x y) as [vb|] eqn:C; simpl in H1; inv H1.
+ destruct (is_longconst a) as [n1|] eqn:LC1; destruct (is_longconst b) as [n2|] eqn:LC2;
+ try (assert (x = Vlong n1) by (eapply is_longconst_sound; eauto));
+ try (assert (y = Vlong n2) by (eapply is_longconst_sound; eauto));
+ subst.
+- simpl in C; inv C. EvalOp. destruct (Int64.cmpu c n1 n2); reflexivity.
+- EvalOp. simpl. rewrite Val.swap_cmplu_bool. rewrite C; auto.
+- EvalOp. simpl; rewrite C; auto.
+- EvalOp. simpl; rewrite C; auto.
+Qed.
+
+Theorem eval_cmpl:
+ forall c le a x b y v,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ Val.cmpl c x y = Some v ->
+ eval_expr ge sp e m le (cmpl c a b) v.
+Proof.
+ unfold cmpl; intros. destruct Archi.splitlong eqn:SL.
+ eapply SplitLongproof.eval_cmpl; eauto.
+ unfold Val.cmpl in H1.
+ destruct (Val.cmpl_bool c x y) as [vb|] eqn:C; simpl in H1; inv H1.
+ destruct (is_longconst a) as [n1|] eqn:LC1; destruct (is_longconst b) as [n2|] eqn:LC2;
+ try (assert (x = Vlong n1) by (eapply is_longconst_sound; eauto));
+ try (assert (y = Vlong n2) by (eapply is_longconst_sound; eauto));
+ subst.
+- simpl in C; inv C. EvalOp. destruct (Int64.cmp c n1 n2); reflexivity.
+- EvalOp. simpl. rewrite Val.swap_cmpl_bool. rewrite C; auto.
+- EvalOp. simpl; rewrite C; auto.
+- EvalOp. simpl; rewrite C; auto.
+Qed.
+
+Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
+Proof.
+ unfold longoffloat; red; intros. destruct Archi.splitlong eqn:SL.
+ eapply SplitLongproof.eval_longoffloat; eauto.
+ TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
+Proof.
+ unfold longuoffloat; red; intros. destruct Archi.splitlong eqn:SL.
+ eapply SplitLongproof.eval_longuoffloat; eauto.
+ TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
+Proof.
+ unfold floatoflong; red; intros. destruct Archi.splitlong eqn:SL.
+ eapply SplitLongproof.eval_floatoflong; eauto.
+ TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
+Proof.
+ unfold floatoflongu; red; intros. destruct Archi.splitlong eqn:SL.
+ eapply SplitLongproof.eval_floatoflongu; eauto.
+ TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
+Proof.
+ unfold longofsingle; red; intros.
+ destruct x; simpl in H0; inv H0. destruct (Float32.to_long f) as [n|] eqn:EQ; simpl in H2; inv H2.
+ exploit eval_floatofsingle; eauto. intros (v & A & B). simpl in B. inv B.
+ apply Float32.to_long_double in EQ.
+ eapply eval_longoffloat; eauto. simpl.
+ change (Float.of_single f) with (Float32.to_double f); rewrite EQ; auto.
+Qed.
+
+Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
+Proof.
+ unfold longuofsingle; red; intros. (* destruct Archi.splitlong eqn:SL. *)
+ destruct x; simpl in H0; inv H0. destruct (Float32.to_longu f) as [n|] eqn:EQ; simpl in H2; inv H2.
+ exploit eval_floatofsingle; eauto. intros (v & A & B). simpl in B. inv B.
+ apply Float32.to_longu_double in EQ.
+ eapply eval_longuoffloat; eauto. simpl.
+ change (Float.of_single f) with (Float32.to_double f); rewrite EQ; auto.
+Qed.
+
+Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
+Proof.
+ unfold singleoflong; red; intros. (* destruct Archi.splitlong eqn:SL. *)
+ eapply SplitLongproof.eval_singleoflong; eauto.
+(* TrivialExists. *)
+Qed.
+
+Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
+Proof.
+ unfold singleoflongu; red; intros. (* destruct Archi.splitlong eqn:SL. *)
+ eapply SplitLongproof.eval_singleoflongu; eauto.
+(* TrivialExists. *)
+Qed.
+
+End CMCONSTR.
diff --git a/kvx/SelectOp.vp b/kvx/SelectOp.vp
new file mode 100644
index 00000000..4e1087f9
--- /dev/null
+++ b/kvx/SelectOp.vp
@@ -0,0 +1,758 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+
+(** Instruction selection for operators *)
+
+(** The instruction selection pass recognizes opportunities for using
+ combined arithmetic and logical operations and addressing modes
+ offered by the target processor. For instance, the expression [x + 1]
+ can take advantage of the "immediate add" instruction of the processor,
+ and on the PowerPC, the expression [(x >> 6) & 0xFF] can be turned
+ into a "rotate and mask" instruction.
+
+ This file defines functions for building CminorSel expressions and
+ statements, especially expressions consisting of operator
+ applications. These functions examine their arguments to choose
+ cheaper forms of operators whenever possible.
+
+ For instance, [add e1 e2] will return a CminorSel expression semantically
+ equivalent to [Eop Oadd (e1 ::: e2 ::: Enil)], but will use a
+ [Oaddimm] operator if one of the arguments is an integer constant,
+ or suppress the addition altogether if one of the arguments is the
+ null integer. In passing, we perform operator reassociation
+ ([(e + c1) * c2] becomes [(e * c2) + (c1 * c2)]) and a small amount
+ of constant propagation.
+
+ On top of the "smart constructor" functions defined below,
+ module [Selection] implements the actual instruction selection pass.
+*)
+
+Require Archi.
+Require Import Coqlib.
+Require Import Compopts.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Op.
+Require Import CminorSel.
+Require Import OpHelpers.
+Require Import ExtValues ExtFloats.
+Require Import DecBoolOps.
+Require Import Chunks.
+Require Import Builtins.
+Require Compopts.
+
+Local Open Scope cminorsel_scope.
+
+Local Open Scope string_scope.
+Local Open Scope error_monad_scope.
+
+Section SELECT.
+
+Context {hf: helper_functions}.
+
+Inductive to_cond0 :=
+| Cond0_some : condition0 -> expr -> to_cond0
+| Cond0_none : to_cond0
+| Cond0_true : to_cond0
+| Cond0_false : to_cond0.
+
+Definition compu0 c e1 :=
+ match c with
+ | Clt => Cond0_false
+ | Cge => Cond0_true
+ | _ => Cond0_some (Ccompu0 c) e1
+ end.
+
+Definition complu0 c e1 :=
+ match c with
+ | Clt => Cond0_false
+ | Cge => Cond0_true
+ | _ => Cond0_some (Ccomplu0 c) e1
+ end.
+
+Nondetfunction cond_to_condition0 (cond : condition) (args : exprlist) :=
+ match cond, args with
+ | (Ccompimm c x), (e1 ::: Enil) =>
+ if Int.eq_dec x Int.zero
+ then Cond0_some (Ccomp0 c) e1
+ else Cond0_none
+
+ | (Ccompuimm c x), (e1 ::: Enil) =>
+ if Int.eq_dec x Int.zero
+ then compu0 c e1
+ else Cond0_none
+
+ | (Ccomplimm c x), (e1 ::: Enil) =>
+ if Int64.eq_dec x Int64.zero
+ then Cond0_some (Ccompl0 c) e1
+ else Cond0_none
+
+ | (Ccompluimm c x), (e1 ::: Enil) =>
+ if Int64.eq_dec x Int64.zero
+ then complu0 c e1
+ else Cond0_none
+
+ | _, _ => Cond0_none
+ end.
+
+(** Ternary operator *)
+Nondetfunction select0 (ty : typ) (cond0 : condition0) (e1 e2 e3: expr) :=
+ match ty, cond0, e1, e2, e3 with
+ | Tint, cond0, e1, (Eop (Ointconst imm) Enil), e3 =>
+ (Eop (Oselimm cond0 imm) (e1 ::: e3 ::: Enil))
+ | Tint, cond0, (Eop (Ointconst imm) Enil), e2, e3 =>
+ (Eop (Oselimm (negate_condition0 cond0) imm) (e2 ::: e3 ::: Enil))
+ | Tlong, cond0, e1, (Eop (Olongconst imm) Enil), e3 =>
+ (Eop (Osellimm cond0 imm) (e1 ::: e3 ::: Enil))
+ | Tlong, cond0, (Eop (Olongconst imm) Enil), e2, e3 =>
+ (Eop (Osellimm (negate_condition0 cond0) imm) (e2 ::: e3 ::: Enil))
+ | _, _, _ => (Eop (Osel cond0 ty) (e1 ::: e2 ::: e3 ::: Enil))
+ end.
+
+Definition same_expr_pure (e1 e2: expr) :=
+ match e1, e2 with
+ | Evar v1, Evar v2 => if ident_eq v1 v2 then true else false
+ | _, _ => false
+ end.
+
+Definition select (ty : typ) (cond : condition) (args : exprlist) (e1 e2: expr) : option expr :=
+ Some (if same_expr_pure e1 e2 then e1 else
+ match cond_to_condition0 cond args with
+ | Cond0_none => select0 ty (Ccomp0 Cne) e1 e2 (Eop (Ocmp cond) args)
+ | Cond0_some cond0 ec => select0 ty cond0 e1 e2 ec
+ | Cond0_true => e1
+ | Cond0_false => e2
+ end).
+
+
+(** ** Constants **)
+
+Definition addrsymbol (id: ident) (ofs: ptrofs) :=
+ Eop (Oaddrsymbol id ofs) Enil.
+
+Definition addrstack (ofs: ptrofs) :=
+ Eop (Oaddrstack ofs) Enil.
+
+(** ** Integer addition and pointer addition *)
+
+Definition addimm_shlimm sh k2 e1 :=
+ if Compopts.optim_addx tt
+ then
+ match shift1_4_of_z (Int.unsigned sh) with
+ | Some s14 => Eop (Oaddximm s14 k2) (e1:::Enil)
+ | None => Eop (Oaddimm k2) ((Eop (Oshlimm sh) (e1:::Enil)):::Enil)
+ end
+ else Eop (Oaddimm k2) ((Eop (Oshlimm sh) (e1:::Enil)):::Enil).
+
+Nondetfunction addimm (n: int) (e: expr) :=
+ if Int.eq n Int.zero then e else
+ match e with
+ | Eop (Ointconst m) Enil => Eop (Ointconst (Int.add n m)) Enil
+ | Eop (Oaddrsymbol s m) Enil => Eop (Oaddrsymbol s (Ptrofs.add (Ptrofs.of_int n) m)) Enil
+ | Eop (Oaddrstack m) Enil => Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int n) m)) Enil
+ | Eop (Oaddimm m) (t ::: Enil) => Eop (Oaddimm(Int.add n m)) (t ::: Enil)
+ | Eop (Oaddximm sh m) (t ::: Enil) => Eop (Oaddximm sh (Int.add n m)) (t ::: Enil)
+ | Eop (Oshlimm sh) (t1:::Enil) => addimm_shlimm sh n t1
+ | _ => Eop (Oaddimm n) (e ::: Enil)
+ end.
+
+Definition add_shlimm n e1 e2 :=
+ if Compopts.optim_addx tt
+ then
+ match shift1_4_of_z (Int.unsigned n) with
+ | Some s14 => Eop (Oaddx s14) (e1:::e2:::Enil)
+ | None => Eop Oadd (e2:::(Eop (Oshlimm n) (e1:::Enil)):::Enil)
+ end
+ else Eop Oadd (e2:::(Eop (Oshlimm n) (e1:::Enil)):::Enil).
+
+Nondetfunction add (e1: expr) (e2: expr) :=
+ match e1, e2 with
+ | Eop (Ointconst n1) Enil, t2 => addimm n1 t2
+ | t1, Eop (Ointconst n2) Enil => addimm n2 t1
+ | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) =>
+ addimm (Int.add n1 n2) (Eop Oadd (t1:::t2:::Enil))
+ | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddrstack n2) Enil =>
+ Eop Oadd (Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int n1) n2)) Enil ::: t1 ::: Enil)
+ | Eop (Oaddrstack n1) Enil, Eop (Oaddimm n2) (t2:::Enil) =>
+ Eop Oadd (Eop (Oaddrstack (Ptrofs.add n1 (Ptrofs.of_int n2))) Enil ::: t2 ::: Enil)
+ | Eop (Oaddimm n1) (t1:::Enil), t2 =>
+ addimm n1 (Eop Oadd (t1:::t2:::Enil))
+ | t1, Eop (Oaddimm n2) (t2:::Enil) =>
+ addimm n2 (Eop Oadd (t1:::t2:::Enil))
+ | t1, (Eop Omul (t2:::t3:::Enil)) =>
+ if Compopts.optim_madd tt
+ then Eop Omadd (t1:::t2:::t3:::Enil)
+ else Eop Oadd (e1:::e2:::Enil)
+ | (Eop Omul (t2:::t3:::Enil)), t1 =>
+ if Compopts.optim_madd tt
+ then Eop Omadd (t1:::t2:::t3:::Enil)
+ else Eop Oadd (e1:::e2:::Enil)
+ | t1, (Eop (Omulimm n) (t2:::Enil)) =>
+ if Compopts.optim_madd tt
+ then Eop (Omaddimm n) (t1:::t2:::Enil)
+ else Eop Oadd (e1:::e2:::Enil)
+ | (Eop (Omulimm n) (t2:::Enil)), t1 =>
+ if Compopts.optim_madd tt
+ then Eop (Omaddimm n) (t1:::t2:::Enil)
+ else Eop Oadd (e1:::e2:::Enil)
+ | (Eop (Oshlimm n) (t1:::Enil)), t2 =>
+ add_shlimm n t1 t2
+ | t2, (Eop (Oshlimm n) (t1:::Enil)) =>
+ add_shlimm n t1 t2
+ | _, _ => Eop Oadd (e1:::e2:::Enil)
+ end.
+
+(** ** Integer and pointer subtraction *)
+
+Nondetfunction sub (e1: expr) (e2: expr) :=
+ match e1, e2 with
+ | t1, Eop (Ointconst n2) Enil =>
+ addimm (Int.neg n2) t1
+ | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) =>
+ addimm (Int.sub n1 n2) (Eop Osub (t1:::t2:::Enil))
+ | Eop (Oaddimm n1) (t1:::Enil), t2 =>
+ addimm n1 (Eop Osub (t1:::t2:::Enil))
+ | t1, Eop (Oaddimm n2) (t2:::Enil) =>
+ addimm (Int.neg n2) (Eop Osub (t1:::t2:::Enil))
+ | t1, (Eop Omul (t2:::t3:::Enil)) =>
+ Eop Omsub (t1:::t2:::t3:::Enil)
+ | t1, (Eop (Omulimm n) (t2:::Enil)) =>
+ if Compopts.optim_madd tt
+ then Eop (Omaddimm (Int.neg n)) (t1:::t2:::Enil)
+ else Eop Osub (e1:::e2:::Enil)
+ | _, _ => Eop Osub (e1:::e2:::Enil)
+ end.
+
+Nondetfunction negint (e: expr) :=
+ match e with
+ | Eop (Ointconst n) Enil => Eop (Ointconst (Int.neg n)) Enil
+ | _ => Eop Oneg (e ::: Enil)
+ end.
+
+(** ** Immediate shifts *)
+
+Nondetfunction shlimm (e1: expr) (n: int) :=
+ if Int.eq n Int.zero then
+ e1
+ else if negb (Int.ltu n Int.iwordsize) then
+ Eop Oshl (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+ else match e1 with
+ | Eop (Ointconst n1) Enil =>
+ Eop (Ointconst (Int.shl n1 n)) Enil
+ | Eop (Oshlimm n1) (t1:::Enil) =>
+ if Int.ltu (Int.add n n1) Int.iwordsize
+ then Eop (Oshlimm (Int.add n n1)) (t1:::Enil)
+ else Eop (Oshlimm n) (e1:::Enil)
+ | _ =>
+ Eop (Oshlimm n) (e1:::Enil)
+ end.
+
+Nondetfunction shruimm (e1: expr) (n: int) :=
+ if Int.eq n Int.zero then
+ e1
+ else if negb (Int.ltu n Int.iwordsize) then
+ Eop Oshru (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+ else match e1 with
+ | Eop (Ointconst n1) Enil =>
+ Eop (Ointconst (Int.shru n1 n)) Enil
+ | Eop (Oshruimm n1) (t1:::Enil) =>
+ if Int.ltu (Int.add n n1) Int.iwordsize
+ then Eop (Oshruimm (Int.add n n1)) (t1:::Enil)
+ else Eop (Oshruimm n) (e1:::Enil)
+ | Eop (Oshlimm n1) (t1:::Enil) =>
+ let stop := Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one) in
+ let start := Z.sub (Z.add (Z.add (Int.unsigned n) stop) Z.one) Int.zwordsize in
+ if is_bitfield stop start
+ then Eop (Oextfz stop start) (t1:::Enil)
+ else Eop (Oshruimm n) (e1:::Enil)
+ | _ =>
+ Eop (Oshruimm n) (e1:::Enil)
+ end.
+
+Nondetfunction shrimm (e1: expr) (n: int) :=
+ if Int.eq n Int.zero then
+ e1
+ else if negb (Int.ltu n Int.iwordsize) then
+ Eop Oshr (e1 ::: Eop (Ointconst n) Enil ::: Enil)
+ else match e1 with
+ | Eop (Ointconst n1) Enil =>
+ Eop (Ointconst (Int.shr n1 n)) Enil
+ | Eop (Oshrimm n1) (t1:::Enil) =>
+ if Int.ltu (Int.add n n1) Int.iwordsize
+ then Eop (Oshrimm (Int.add n n1)) (t1:::Enil)
+ else Eop (Oshrimm n) (e1:::Enil)
+ | Eop (Oshlimm n1) (t1:::Enil) =>
+ let stop := Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one) in
+ let start := Z.sub (Z.add (Z.add (Int.unsigned n) stop) Z.one) Int.zwordsize in
+ if is_bitfield stop start
+ then Eop (Oextfs stop start) (t1:::Enil)
+ else Eop (Oshrimm n) (e1:::Enil)
+ | _ =>
+ Eop (Oshrimm n) (e1:::Enil)
+ end.
+
+(** ** Integer multiply *)
+
+Definition mulimm_base (n1: int) (e2: expr) :=
+ match Int.one_bits n1 with
+ | i :: nil =>
+ shlimm e2 i
+ | i :: j :: nil =>
+ Elet e2 (add (shlimm (Eletvar 0) i) (shlimm (Eletvar 0) j))
+ | _ =>
+ Eop (Omulimm n1) (e2 ::: Enil)
+ end.
+
+Nondetfunction mulimm (n1: int) (e2: expr) :=
+ if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil
+ else if Int.eq n1 Int.one then e2
+ else match e2 with
+ | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.mul n1 n2)) Enil
+ | Eop (Oaddimm n2) (t2:::Enil) => addimm (Int.mul n1 n2) (mulimm_base n1 t2)
+ | _ => mulimm_base n1 e2
+ end.
+
+Nondetfunction mul (e1: expr) (e2: expr) :=
+ match e1, e2 with
+ | Eop (Ointconst n1) Enil, t2 => mulimm n1 t2
+ | t1, Eop (Ointconst n2) Enil => mulimm n2 t1
+ | _, _ => Eop Omul (e1:::e2:::Enil)
+ end.
+
+Definition mulhs (e1: expr) (e2: expr) :=
+ if Archi.ptr64 then
+ Eop Olowlong
+ (Eop (Oshrlimm (Int.repr 32))
+ (Eop Omull (Eop Ocast32signed (e1 ::: Enil) :::
+ Eop Ocast32signed (e2 ::: Enil) ::: Enil) ::: Enil)
+ ::: Enil)
+ else
+ Eop Omulhs (e1 ::: e2 ::: Enil).
+
+Definition mulhu (e1: expr) (e2: expr) :=
+ if Archi.ptr64 then
+ Eop Olowlong
+ (Eop (Oshrluimm (Int.repr 32))
+ (Eop Omull (Eop Ocast32unsigned (e1 ::: Enil) :::
+ Eop Ocast32unsigned (e2 ::: Enil) ::: Enil) ::: Enil)
+ ::: Enil)
+ else
+ Eop Omulhu (e1 ::: e2 ::: Enil).
+
+(** ** Bitwise and, or, xor *)
+
+Nondetfunction andimm (n1: int) (e2: expr) :=
+ if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil
+ else if Int.eq n1 Int.mone then e2
+ else match e2 with
+ | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.and n1 n2)) Enil
+ | Eop (Oandimm n2) (t2:::Enil) => Eop (Oandimm (Int.and n1 n2)) (t2:::Enil)
+ | Eop Onot (t2:::Enil) => Eop (Oandnimm n1) (t2:::Enil)
+ | _ => Eop (Oandimm n1) (e2:::Enil)
+ end.
+
+Nondetfunction and (e1: expr) (e2: expr) :=
+ match e1, e2 with
+ | Eop (Ointconst n1) Enil, t2 => andimm n1 t2
+ | t1, Eop (Ointconst n2) Enil => andimm n2 t1
+ | (Eop Onot (t1:::Enil)), t2 => Eop Oandn (t1:::t2:::Enil)
+ | t1, (Eop Onot (t2:::Enil)) => Eop Oandn (t2:::t1:::Enil)
+ | _, _ => Eop Oand (e1:::e2:::Enil)
+ end.
+
+Nondetfunction orimm (n1: int) (e2: expr) :=
+ if Int.eq n1 Int.zero then e2
+ else if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil
+ else match e2 with
+ | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil
+ | Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil)
+ | Eop Onot (t2:::Enil) => Eop (Oornimm n1) (t2:::Enil)
+ | _ => Eop (Oorimm n1) (e2:::Enil)
+ end.
+
+Nondetfunction or (e1: expr) (e2: expr) :=
+ match e1, e2 with
+ | Eop (Ointconst n1) Enil, t2 => orimm n1 t2
+ | t1, Eop (Ointconst n2) Enil => orimm n2 t1
+ | Eop (Oshlimm n1) (t1:::Enil), Eop (Oshruimm n2) (t2:::Enil) =>
+ if Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2
+ then Eop (Ororimm n2) (t1:::Enil)
+ else Eop Oor (e1:::e2:::Enil)
+ | Eop (Oshruimm n2) (t2:::Enil), Eop (Oshlimm n1) (t1:::Enil) =>
+ if Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2
+ then Eop (Ororimm n2) (t1:::Enil)
+ else Eop Oor (e1:::e2:::Enil)
+ | (Eop Onot (t1:::Enil)), t2 => Eop Oorn (t1:::t2:::Enil)
+ | t1, (Eop Onot (t2:::Enil)) => Eop Oorn (t2:::t1:::Enil)
+ | (Eop (Oandimm nmask) (prev:::Enil)),
+ (Eop (Oandimm mask)
+ ((Eop (Oshlimm start) (fld:::Enil)):::Enil)) =>
+ let zstart := Int.unsigned start in
+ let zstop := int_highest_bit mask in
+ if is_bitfield zstop zstart
+ then
+ let mask' := Int.repr (zbitfield_mask zstop zstart) in
+ if and_dec (Int.eq_dec mask mask')
+ (Int.eq_dec nmask (Int.not mask'))
+ then Eop (Oinsf zstop zstart) (prev:::fld:::Enil)
+ else Eop Oor (e1:::e2:::Enil)
+ else Eop Oor (e1:::e2:::Enil)
+ | (Eop (Oandimm mask)
+ ((Eop (Oshlimm start) (fld:::Enil)):::Enil)),
+ (Eop (Oandimm nmask) (prev:::Enil)) =>
+ let zstart := Int.unsigned start in
+ let zstop := int_highest_bit mask in
+ if is_bitfield zstop zstart
+ then
+ let mask' := Int.repr (zbitfield_mask zstop zstart) in
+ if and_dec (Int.eq_dec mask mask')
+ (Int.eq_dec nmask (Int.not mask'))
+ then Eop (Oinsf zstop zstart) (prev:::fld:::Enil)
+ else Eop Oor (e1:::e2:::Enil)
+ else Eop Oor (e1:::e2:::Enil)
+ | (Eop (Oandimm nmask) (prev:::Enil)),
+ (Eop (Oandimm mask) (fld:::Enil)) =>
+ let zstart := 0 in
+ let zstop := int_highest_bit mask in
+ if is_bitfield zstop zstart
+ then
+ let mask' := Int.repr (zbitfield_mask zstop zstart) in
+ if and_dec (Int.eq_dec mask mask')
+ (Int.eq_dec nmask (Int.not mask'))
+ then Eop (Oinsf zstop zstart) (prev:::fld:::Enil)
+ else let zstart := 0 in
+ let zstop := int_highest_bit nmask in
+ if is_bitfield zstop zstart
+ then
+ let mask' := Int.repr (zbitfield_mask zstop zstart) in
+ if and_dec (Int.eq_dec nmask mask')
+ (Int.eq_dec mask (Int.not mask'))
+ then Eop (Oinsf zstop zstart) (fld:::prev:::Enil)
+ else Eop Oor (e1:::e2:::Enil)
+ else Eop Oor (e1:::e2:::Enil)
+ else Eop Oor (e1:::e2:::Enil)
+ | _, _ => Eop Oor (e1:::e2:::Enil)
+ end.
+
+Nondetfunction xorimm (n1: int) (e2: expr) :=
+ if Int.eq n1 Int.zero
+ then e2
+ else
+ if Int.eq n1 Int.mone
+ then Eop Onot (e2:::Enil)
+ else
+ match e2 with
+ | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.xor n1 n2)) Enil
+ | Eop (Oxorimm n2) (t2:::Enil) =>
+ let n := Int.xor n1 n2 in
+ if Int.eq n Int.zero then t2 else Eop (Oxorimm n) (t2:::Enil)
+ | _ => Eop (Oxorimm n1) (e2:::Enil)
+ end.
+
+Nondetfunction xor (e1: expr) (e2: expr) :=
+ match e1, e2 with
+ | Eop (Ointconst n1) Enil, t2 => xorimm n1 t2
+ | t1, Eop (Ointconst n2) Enil => xorimm n2 t1
+ | _, _ => Eop Oxor (e1:::e2:::Enil)
+ end.
+
+(** ** Integer logical negation *)
+
+Nondetfunction notint (e: expr) :=
+ match e with
+ | Eop Oand (e1:::e2:::Enil) => Eop Onand (e1:::e2:::Enil)
+ | Eop (Oandimm n) (e1:::Enil) => Eop (Onandimm n) (e1:::Enil)
+ | Eop Oor (e1:::e2:::Enil) => Eop Onor (e1:::e2:::Enil)
+ | Eop (Oorimm n) (e1:::Enil) => Eop (Onorimm n) (e1:::Enil)
+ | Eop Oxor (e1:::e2:::Enil) => Eop Onxor (e1:::e2:::Enil)
+ | Eop (Oxorimm n) (e1:::Enil) => Eop (Onxorimm n) (e1:::Enil)
+ | Eop Onand (e1:::e2:::Enil) => Eop Oand (e1:::e2:::Enil)
+ | Eop (Onandimm n) (e1:::Enil) => Eop (Oandimm n) (e1:::Enil)
+ | Eop Onor (e1:::e2:::Enil) => Eop Oor (e1:::e2:::Enil)
+ | Eop (Onorimm n) (e1:::Enil) => Eop (Oorimm n) (e1:::Enil)
+ | Eop Onxor (e1:::e2:::Enil) => Eop Oxor (e1:::e2:::Enil)
+ | Eop (Onxorimm n) (e1:::Enil) => Eop (Oxorimm n) (e1:::Enil)
+ | Eop Oandn (e1:::e2:::Enil) => Eop Oorn (e2:::e1:::Enil)
+ | Eop (Oandnimm n) (e1:::Enil) => Eop (Oorimm (Int.not n)) (e1:::Enil)
+ | Eop Oorn (e1:::e2:::Enil) => Eop Oandn (e2:::e1:::Enil)
+ | Eop (Oornimm n) (e1:::Enil) => Eop (Oandimm (Int.not n)) (e1:::Enil)
+ | Eop Onot (e1:::Enil) => e1
+ | Eop (Ointconst k) Enil => Eop (Ointconst (Int.not k)) Enil
+ | _ => Eop Onot (e:::Enil)
+ end.
+
+(** ** Integer division and modulus *)
+
+Definition divs_base (e1: expr) (e2: expr) :=
+ Eexternal i32_sdiv sig_ii_i (e1 ::: e2 ::: Enil).
+
+Definition mods_base (e1: expr) (e2: expr) :=
+ Eexternal i32_smod sig_ii_i (e1 ::: e2 ::: Enil).
+
+Definition divu_base (e1: expr) (e2: expr) :=
+ Eexternal i32_udiv sig_ii_i (e1 ::: e2 ::: Enil).
+
+Definition modu_base (e1: expr) (e2: expr) :=
+ Eexternal i32_umod sig_ii_i (e1 ::: e2 ::: Enil).
+
+Definition shrximm (e1: expr) (n2: int) :=
+ if Int.eq n2 Int.zero then e1 else Eop (Oshrximm n2) (e1:::Enil).
+
+(* Alternate definition, not convenient for strength reduction during constant propagation *)
+(*
+(* n2 will be less than 31. *)
+
+Definition shrximm_inner (e1: expr) (n2: int) :=
+ Eop (Oshruimm (Int.sub Int.iwordsize n2))
+ ((Eop (Oshrimm (Int.repr (Int.zwordsize - 1)))
+ (e1 ::: Enil))
+ ::: Enil).
+
+Definition shrximm (e1: expr) (n2: int) :=
+ if Int.eq n2 Int.zero then e1
+ else Eop (Oshrimm n2)
+ ((Eop Oadd (e1 ::: shrximm_inner e1 n2 ::: Enil))
+ ::: Enil).
+*)
+
+(** ** General shifts *)
+
+Nondetfunction shl (e1: expr) (e2: expr) :=
+ match e2 with
+ | Eop (Ointconst n2) Enil => shlimm e1 n2
+ | _ => Eop Oshl (e1:::e2:::Enil)
+ end.
+
+Nondetfunction shr (e1: expr) (e2: expr) :=
+ match e2 with
+ | Eop (Ointconst n2) Enil => shrimm e1 n2
+ | _ => Eop Oshr (e1:::e2:::Enil)
+ end.
+
+Nondetfunction shru (e1: expr) (e2: expr) :=
+ match e2 with
+ | Eop (Ointconst n2) Enil => shruimm e1 n2
+ | _ => Eop Oshru (e1:::e2:::Enil)
+ end.
+
+(** ** Floating-point arithmetic *)
+
+Definition negf (e: expr) := Eop Onegf (e ::: Enil).
+Definition absf (e: expr) := Eop Oabsf (e ::: Enil).
+Definition addf (e1 e2: expr) := Eop Oaddf (e1 ::: e2 ::: Enil).
+Definition subf (e1 e2: expr) := Eop Osubf (e1 ::: e2 ::: Enil).
+Definition mulf (e1 e2: expr) := Eop Omulf (e1 ::: e2 ::: Enil).
+
+Definition negfs (e: expr) := Eop Onegfs (e ::: Enil).
+Definition absfs (e: expr) := Eop Oabsfs (e ::: Enil).
+Definition addfs (e1 e2: expr) := Eop Oaddfs (e1 ::: e2 ::: Enil).
+Definition subfs (e1 e2: expr) := Eop Osubfs (e1 ::: e2 ::: Enil).
+Definition mulfs (e1 e2: expr) := Eop Omulfs (e1 ::: e2 ::: Enil).
+
+(** ** Comparisons *)
+
+Nondetfunction compimm (default: comparison -> int -> condition)
+ (sem: comparison -> int -> int -> bool)
+ (c: comparison) (e1: expr) (n2: int) :=
+ match c, e1 with
+ | c, Eop (Ointconst n1) Enil =>
+ Eop (Ointconst (if sem c n1 n2 then Int.one else Int.zero)) Enil
+ | Ceq, Eop (Ocmp c) el =>
+ if Int.eq_dec n2 Int.zero then
+ Eop (Ocmp (negate_condition c)) el
+ else if Int.eq_dec n2 Int.one then
+ Eop (Ocmp c) el
+ else
+ Eop (Ointconst Int.zero) Enil
+ | Cne, Eop (Ocmp c) el =>
+ if Int.eq_dec n2 Int.zero then
+ Eop (Ocmp c) el
+ else if Int.eq_dec n2 Int.one then
+ Eop (Ocmp (negate_condition c)) el
+ else
+ Eop (Ointconst Int.one) Enil
+ | _, _ =>
+ Eop (Ocmp (default c n2)) (e1 ::: Enil)
+ end.
+
+Nondetfunction comp (c: comparison) (e1: expr) (e2: expr) :=
+ match e1, e2 with
+ | Eop (Ointconst n1) Enil, t2 =>
+ compimm Ccompimm Int.cmp (swap_comparison c) t2 n1
+ | t1, Eop (Ointconst n2) Enil =>
+ compimm Ccompimm Int.cmp c t1 n2
+ | _, _ =>
+ Eop (Ocmp (Ccomp c)) (e1 ::: e2 ::: Enil)
+ end.
+
+Nondetfunction compu (c: comparison) (e1: expr) (e2: expr) :=
+ match e1, e2 with
+ | Eop (Ointconst n1) Enil, t2 =>
+ compimm Ccompuimm Int.cmpu (swap_comparison c) t2 n1
+ | t1, Eop (Ointconst n2) Enil =>
+ compimm Ccompuimm Int.cmpu c t1 n2
+ | _, _ =>
+ Eop (Ocmp (Ccompu c)) (e1 ::: e2 ::: Enil)
+ end.
+
+Definition compf (c: comparison) (e1: expr) (e2: expr) :=
+ Eop (Ocmp (Ccompf c)) (e1 ::: e2 ::: Enil).
+
+Definition compfs (c: comparison) (e1: expr) (e2: expr) :=
+ Eop (Ocmp (Ccompfs c)) (e1 ::: e2 ::: Enil).
+
+(** ** Integer conversions *)
+
+Definition cast8unsigned (e: expr) := andimm (Int.repr 255) e.
+
+Nondetfunction cast8signed (e: expr) :=
+ match e with
+ | Eop (Ointconst n) Enil => Eop (Ointconst (Int.sign_ext 8 n)) Enil
+ | _ => Eop Ocast8signed (e ::: Enil)
+ end.
+
+Definition cast16unsigned (e: expr) := andimm (Int.repr 65535) e.
+
+Nondetfunction cast16signed (e: expr) :=
+ match e with
+ | Eop (Ointconst n) Enil => Eop (Ointconst (Int.sign_ext 16 n)) Enil
+ | _ => Eop Ocast16signed (e ::: Enil)
+ end.
+
+(** ** Floating-point conversions *)
+
+Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
+Definition intuoffloat (e: expr) := Eop Ointuoffloat (e ::: Enil).
+
+Nondetfunction floatofintu (e: expr) :=
+ match e with
+ | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
+ | _ => Eop Ofloatoflongu ((Eop Ocast32unsigned (e ::: Enil)) ::: Enil)
+ end.
+
+Nondetfunction floatofint (e: expr) :=
+ match e with
+ | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_int n)) Enil
+ | _ => Eop Ofloatoflong ((Eop Ocast32signed (e ::: Enil)) ::: Enil)
+ end.
+
+Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
+Definition singleofint (e: expr) := Eop Osingleofint (e ::: Enil).
+
+Definition intuofsingle (e: expr) := Eop Ointuofsingle (e ::: Enil).
+Definition singleofintu (e: expr) := Eop Osingleofintu (e ::: Enil).
+
+Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
+Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
+
+(** ** Recognition of addressing modes for load and store operations *)
+
+Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
+ match e with
+ | Eop (Oaddrstack n) Enil => (Ainstack n, Enil)
+ | Eop (Oaddrsymbol id ofs) Enil =>
+ (if (orb (Archi.pic_code tt) (negb (Compopts.optim_globaladdrtmp tt)))
+ then (Aindexed Ptrofs.zero, e:::Enil)
+ else (Aglobal id ofs, Enil))
+ | Eop (Oaddimm n) (e1:::Enil) => (Aindexed (Ptrofs.of_int n), e1:::Enil)
+ | Eop (Oaddlimm n) (e1:::Enil) => (Aindexed (Ptrofs.of_int64 n), e1:::Enil)
+ | Eop Oaddl (e1:::(Eop (Oshllimm scale) (e2:::Enil)):::Enil) =>
+ (if Compopts.optim_xsaddr tt
+ then let zscale := Int.unsigned scale in
+ if Z.eq_dec zscale (zscale_of_chunk chunk)
+ then (Aindexed2XS zscale, e1:::e2:::Enil)
+ else (Aindexed2, e1:::(Eop (Oshllimm scale) (e2:::Enil)):::Enil)
+ else (Aindexed2, e1:::(Eop (Oshllimm scale) (e2:::Enil)):::Enil))
+ | Eop (Oaddxl sh) (e1:::e2:::Enil) =>
+ let zscale := ExtValues.z_of_shift1_4 sh in
+ let scale := Int.repr zscale in
+ (if Compopts.optim_xsaddr tt
+ then if Z.eq_dec zscale (zscale_of_chunk chunk)
+ then (Aindexed2XS zscale, e2:::e1:::Enil)
+ else (Aindexed2, e2:::(Eop (Oshllimm scale) (e1:::Enil)):::Enil)
+ else (Aindexed2, e2:::(Eop (Oshllimm scale) (e1:::Enil)):::Enil))
+ | Eop Oaddl (e1:::e2:::Enil) => (Aindexed2, e1:::e2:::Enil)
+ | _ => (Aindexed Ptrofs.zero, e:::Enil)
+ end.
+
+(** ** Arguments of builtins *)
+
+Nondetfunction builtin_arg (e: expr) :=
+ match e with
+ | Eop (Ointconst n) Enil => BA_int n
+ | Eop (Oaddrsymbol id ofs) Enil => BA_addrglobal id ofs
+ | Eop (Oaddrstack ofs) Enil => BA_addrstack ofs
+ | Eop Omakelong (Eop (Ointconst h) Enil ::: Eop (Ointconst l) Enil ::: Enil) =>
+ BA_long (Int64.ofwords h l)
+ | Eop Omakelong (h ::: l ::: Enil) => BA_splitlong (BA h) (BA l)
+ | Eload chunk (Ainstack ofs) Enil => BA_loadstack chunk ofs
+ | Eop (Oaddimm n) (e1:::Enil) =>
+ if Archi.ptr64 then BA e else BA_addptr (BA e1) (BA_int n)
+ | Eop (Oaddlimm n) (e1:::Enil) =>
+ if Archi.ptr64 then BA_addptr (BA e1) (BA_long n) else BA e
+ | _ => BA e
+ end.
+
+(* float division *)
+
+Definition divf_base (e1: expr) (e2: expr) :=
+ (* Eop Odivf (e1 ::: e2 ::: Enil). *)
+ Eexternal f64_div sig_ff_f (e1 ::: e2 ::: Enil).
+
+Definition divfs_base1 (e2 : expr) :=
+ Eop Oinvfs (e2 ::: Enil).
+Definition divfs_baseX (e1 : expr) (e2 : expr) :=
+ (* Eop Odivf (e1 ::: e2 ::: Enil). *)
+ Eexternal f32_div sig_ss_s (e1 ::: e2 ::: Enil).
+
+Nondetfunction divfs_base (e1: expr) :=
+ match e1 with
+ | Eop (Osingleconst f) Enil =>
+ (if Float32.eq_dec f ExtFloat32.one
+ then divfs_base1
+ else divfs_baseX e1)
+ | _ => divfs_baseX e1
+ end.
+
+Nondetfunction gen_fma args :=
+ match args with
+ | (Eop Onegf (e1:::Enil)):::e2:::e3:::Enil => Some (Eop Ofmsubf (e3:::e1:::e2:::Enil))
+ | e1:::e2:::e3:::Enil => Some (Eop Ofmaddf (e3:::e1:::e2:::Enil))
+ | _ => None
+ end.
+
+Nondetfunction gen_fmaf args :=
+ match args with
+ | (Eop Onegfs (e1:::Enil)):::e2:::e3:::Enil => Some (Eop Ofmsubfs (e3:::e1:::e2:::Enil))
+ | e1:::e2:::e3:::Enil => Some (Eop Ofmaddfs (e3:::e1:::e2:::Enil))
+ | _ => None
+ end.
+
+Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
+ match b with
+ | BI_fmin => Some (Eop Ominf args)
+ | BI_fmax => Some (Eop Omaxf args)
+ | BI_fminf => Some (Eop Ominfs args)
+ | BI_fmaxf => Some (Eop Omaxfs args)
+ | BI_fma => gen_fma args
+ | BI_fmaf => gen_fmaf args
+ end.
+End SELECT.
+
+(* Local Variables: *)
+(* mode: coq *)
+(* End: *)
diff --git a/kvx/SelectOpproof.v b/kvx/SelectOpproof.v
new file mode 100644
index 00000000..0ede1e2d
--- /dev/null
+++ b/kvx/SelectOpproof.v
@@ -0,0 +1,1901 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Correctness of instruction selection for operators *)
+
+Require Import Builtins.
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import ExtValues.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Cminor.
+Require Import Op.
+Require Import CminorSel.
+Require Import Builtins1.
+Require Import SelectOp.
+Require Import Events.
+Require Import OpHelpers.
+Require Import OpHelpersproof.
+Require Import DecBoolOps.
+Require Import Lia.
+
+Local Open Scope cminorsel_scope.
+Local Open Scope string_scope.
+
+
+(** * Useful lemmas and tactics *)
+
+(** The following are trivial lemmas and custom tactics that help
+ perform backward (inversion) and forward reasoning over the evaluation
+ of operator applications. *)
+
+Ltac EvalOp := eapply eval_Eop; eauto with evalexpr.
+
+Ltac InvEval1 :=
+ match goal with
+ | [ H: (eval_expr _ _ _ _ _ (Eop _ Enil) _) |- _ ] =>
+ inv H; InvEval1
+ | [ H: (eval_expr _ _ _ _ _ (Eop _ (_ ::: Enil)) _) |- _ ] =>
+ inv H; InvEval1
+ | [ H: (eval_expr _ _ _ _ _ (Eop _ (_ ::: _ ::: Enil)) _) |- _ ] =>
+ inv H; InvEval1
+ | [ H: (eval_exprlist _ _ _ _ _ Enil _) |- _ ] =>
+ inv H; InvEval1
+ | [ H: (eval_exprlist _ _ _ _ _ (_ ::: _) _) |- _ ] =>
+ inv H; InvEval1
+ | _ =>
+ idtac
+ end.
+
+Ltac InvEval2 :=
+ match goal with
+ | [ H: (eval_operation _ _ _ nil _ = Some _) |- _ ] =>
+ simpl in H; inv H
+ | [ H: (eval_operation _ _ _ (_ :: nil) _ = Some _) |- _ ] =>
+ simpl in H; FuncInv
+ | [ H: (eval_operation _ _ _ (_ :: _ :: nil) _ = Some _) |- _ ] =>
+ simpl in H; FuncInv
+ | [ H: (eval_operation _ _ _ (_ :: _ :: _ :: nil) _ = Some _) |- _ ] =>
+ simpl in H; FuncInv
+ | _ =>
+ idtac
+ end.
+
+Ltac InvEval := InvEval1; InvEval2; InvEval2.
+
+Ltac TrivialExists :=
+ match goal with
+ | [ |- exists v, _ /\ Val.lessdef ?a v ] => exists a; split; [EvalOp | auto]
+ end.
+
+(** * Correctness of the smart constructors *)
+
+Section CMCONSTR.
+Variable prog: program.
+Variable hf: helper_functions.
+Hypothesis HELPERS: helper_functions_declared prog hf.
+Let ge := Genv.globalenv prog.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
+(* Helper lemmas - from SplitLongproof.v *)
+
+Ltac UseHelper := decompose [Logic.and] arith_helpers_correct; eauto.
+Ltac DeclHelper := red in HELPERS; decompose [Logic.and] HELPERS; eauto.
+
+Lemma eval_helper:
+ forall le id name sg args vargs vres,
+ eval_exprlist ge sp e m le args vargs ->
+ helper_declared prog id name sg ->
+ external_implements name sg vargs vres ->
+ eval_expr ge sp e m le (Eexternal id sg args) vres.
+Proof.
+ intros.
+ red in H0. apply Genv.find_def_symbol in H0. destruct H0 as (b & P & Q).
+ rewrite <- Genv.find_funct_ptr_iff in Q.
+ econstructor; eauto.
+Qed.
+
+Corollary eval_helper_1:
+ forall le id name sg arg1 varg1 vres,
+ eval_expr ge sp e m le arg1 varg1 ->
+ helper_declared prog id name sg ->
+ external_implements name sg (varg1::nil) vres ->
+ eval_expr ge sp e m le (Eexternal id sg (arg1 ::: Enil)) vres.
+Proof.
+ intros. eapply eval_helper; eauto. constructor; auto. constructor.
+Qed.
+
+Corollary eval_helper_2:
+ forall le id name sg arg1 arg2 varg1 varg2 vres,
+ eval_expr ge sp e m le arg1 varg1 ->
+ eval_expr ge sp e m le arg2 varg2 ->
+ helper_declared prog id name sg ->
+ external_implements name sg (varg1::varg2::nil) vres ->
+ eval_expr ge sp e m le (Eexternal id sg (arg1 ::: arg2 ::: Enil)) vres.
+Proof.
+ intros. eapply eval_helper; eauto. constructor; auto. constructor; auto. constructor.
+Qed.
+
+(** We now show that the code generated by "smart constructor" functions
+ such as [Selection.notint] behaves as expected. Continuing the
+ [notint] example, we show that if the expression [e]
+ evaluates to some integer value [Vint n], then [Selection.notint e]
+ evaluates to a value [Vint (Int.not n)] which is indeed the integer
+ negation of the value of [e].
+
+ All proofs follow a common pattern:
+- Reasoning by case over the result of the classification functions
+ (such as [add_match] for integer addition), gathering additional
+ information on the shape of the argument expressions in the non-default
+ cases.
+- Inversion of the evaluations of the arguments, exploiting the additional
+ information thus gathered.
+- Equational reasoning over the arithmetic operations performed,
+ using the lemmas from the [Int] and [Float] modules.
+- Construction of an evaluation derivation for the expression returned
+ by the smart constructor.
+*)
+
+Definition unary_constructor_sound (cstr: expr -> expr) (sem: val -> val) : Prop :=
+ forall le a x,
+ eval_expr ge sp e m le a x ->
+ exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef (sem x) v.
+
+Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> val) : Prop :=
+ forall le a x b y,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v.
+
+Theorem eval_addrsymbol:
+ forall le id ofs,
+ exists v, eval_expr ge sp e m le (addrsymbol id ofs) v /\ Val.lessdef (Genv.symbol_address ge id ofs) v.
+Proof.
+ intros. unfold addrsymbol. econstructor; split.
+ EvalOp. simpl; eauto.
+ auto.
+Qed.
+
+Theorem eval_addrstack:
+ forall le ofs,
+ exists v, eval_expr ge sp e m le (addrstack ofs) v /\ Val.lessdef (Val.offset_ptr sp ofs) v.
+Proof.
+ intros. unfold addrstack. econstructor; split.
+ EvalOp. simpl; eauto.
+ auto.
+Qed.
+
+Theorem eval_addimm_shlimm:
+ forall sh k2, unary_constructor_sound (addimm_shlimm sh k2) (fun x => ExtValues.addx sh x (Vint k2)).
+Proof.
+ red; unfold addimm_shlimm; intros.
+ destruct (Compopts.optim_addx tt).
+ {
+ destruct (shift1_4_of_z (Int.unsigned sh)) as [s14 |] eqn:SHIFT.
+ - TrivialExists. simpl.
+ f_equal.
+ unfold shift1_4_of_z, int_of_shift1_4, z_of_shift1_4 in *.
+ destruct (Z.eq_dec _ _) as [e1|].
+ { replace s14 with SHIFT1 by congruence.
+ destruct x; simpl; trivial.
+ replace (Int.ltu _ _) with true by reflexivity.
+ unfold Int.ltu.
+ rewrite e1.
+ replace (if zlt _ _ then true else false) with true by reflexivity.
+ rewrite <- e1.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ }
+ destruct (Z.eq_dec _ _) as [e2|].
+ { replace s14 with SHIFT2 by congruence.
+ destruct x; simpl; trivial.
+ replace (Int.ltu _ _) with true by reflexivity.
+ unfold Int.ltu.
+ rewrite e2.
+ replace (if zlt _ _ then true else false) with true by reflexivity.
+ rewrite <- e2.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ }
+ destruct (Z.eq_dec _ _) as [e3|].
+ { replace s14 with SHIFT3 by congruence.
+ destruct x; simpl; trivial.
+ replace (Int.ltu _ _) with true by reflexivity.
+ unfold Int.ltu.
+ rewrite e3.
+ replace (if zlt _ _ then true else false) with true by reflexivity.
+ rewrite <- e3.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ }
+ destruct (Z.eq_dec _ _) as [e4|].
+ { replace s14 with SHIFT4 by congruence.
+ destruct x; simpl; trivial.
+ replace (Int.ltu _ _) with true by reflexivity.
+ unfold Int.ltu.
+ rewrite e4.
+ replace (if zlt _ _ then true else false) with true by reflexivity.
+ rewrite <- e4.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ }
+ discriminate.
+ - unfold addx. rewrite Val.add_commut.
+ TrivialExists.
+ repeat (try eassumption; try econstructor).
+ simpl.
+ reflexivity.
+ }
+ { unfold addx. rewrite Val.add_commut.
+ TrivialExists.
+ repeat (try eassumption; try econstructor).
+ simpl.
+ reflexivity.
+ }
+Qed.
+
+Theorem eval_addimm:
+ forall n, unary_constructor_sound (addimm n) (fun x => Val.add x (Vint n)).
+Proof.
+ red; unfold addimm; intros until x.
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ - subst n. intros. exists x; split; auto.
+ destruct x; simpl; auto.
+ rewrite Int.add_zero; auto.
+ - case (addimm_match a); intros; InvEval; simpl.
+ + TrivialExists; simpl. rewrite Int.add_commut. auto.
+ + econstructor; split. EvalOp. simpl; eauto.
+ unfold Genv.symbol_address. destruct (Genv.find_symbol ge s); simpl; auto.
+ + econstructor; split. EvalOp. simpl; eauto.
+ destruct sp; simpl; auto.
+ + TrivialExists; simpl. subst x. rewrite Val.add_assoc. rewrite Int.add_commut. auto.
+ + TrivialExists; simpl. subst x.
+ destruct v1; simpl; trivial.
+ destruct (Int.ltu _ _); simpl; trivial.
+ rewrite Int.add_assoc. rewrite Int.add_commut.
+ reflexivity.
+ + pose proof eval_addimm_shlimm as ADDX.
+ unfold unary_constructor_sound in ADDX.
+ unfold addx in ADDX.
+ rewrite Val.add_commut.
+ subst x.
+ apply ADDX; assumption.
+ + TrivialExists.
+Qed.
+
+Lemma eval_addx: forall n, binary_constructor_sound (add_shlimm n) (ExtValues.addx n).
+Proof.
+ red.
+ intros.
+ unfold add_shlimm.
+ destruct (Compopts.optim_addx tt).
+ {
+ destruct (shift1_4_of_z (Int.unsigned n)) as [s14 |] eqn:SHIFT.
+ - TrivialExists.
+ simpl.
+ f_equal. f_equal.
+ unfold shift1_4_of_z, int_of_shift1_4, z_of_shift1_4 in *.
+ destruct (Z.eq_dec _ _) as [e1|].
+ { replace s14 with SHIFT1 by congruence.
+ rewrite <- e1.
+ apply Int.repr_unsigned. }
+ destruct (Z.eq_dec _ _) as [e2|].
+ { replace s14 with SHIFT2 by congruence.
+ rewrite <- e2.
+ apply Int.repr_unsigned. }
+ destruct (Z.eq_dec _ _) as [e3|].
+ { replace s14 with SHIFT3 by congruence.
+ rewrite <- e3.
+ apply Int.repr_unsigned. }
+ destruct (Z.eq_dec _ _) as [e4|].
+ { replace s14 with SHIFT4 by congruence.
+ rewrite <- e4.
+ apply Int.repr_unsigned. }
+ discriminate.
+ - TrivialExists;
+ repeat econstructor; eassumption.
+ }
+ { TrivialExists;
+ repeat econstructor; eassumption.
+ }
+Qed.
+
+Theorem eval_add: binary_constructor_sound add Val.add.
+Proof.
+ red; intros until y.
+ unfold add; case (add_match a b); intros; InvEval.
+ - rewrite Val.add_commut. apply eval_addimm; auto.
+ - apply eval_addimm; auto.
+ - subst.
+ replace (Val.add (Val.add v1 (Vint n1)) (Val.add v0 (Vint n2)))
+ with (Val.add (Val.add v1 v0) (Val.add (Vint n1) (Vint n2))).
+ apply eval_addimm. EvalOp.
+ repeat rewrite Val.add_assoc. decEq. apply Val.add_permut.
+ - subst. econstructor; split.
+ EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto.
+ rewrite Val.add_commut. destruct sp; simpl; auto.
+ destruct v1; simpl; auto.
+ - subst. econstructor; split.
+ EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto.
+ destruct sp; simpl; auto.
+ destruct v1; simpl; auto.
+ - subst.
+ replace (Val.add (Val.add v1 (Vint n1)) y)
+ with (Val.add (Val.add v1 y) (Vint n1)).
+ apply eval_addimm. EvalOp.
+ repeat rewrite Val.add_assoc. decEq. apply Val.add_commut.
+ - subst.
+ replace (Val.add x (Val.add v1 (Vint n2)))
+ with (Val.add (Val.add x v1) (Vint n2)).
+ apply eval_addimm. EvalOp.
+ repeat rewrite Val.add_assoc. reflexivity.
+ - (* Omadd *)
+ subst. destruct (Compopts.optim_madd tt); TrivialExists;
+ repeat (eauto; econstructor; simpl).
+ - (* Omadd rev *)
+ subst. destruct (Compopts.optim_madd tt); TrivialExists;
+ repeat (eauto; econstructor; simpl).
+ simpl. rewrite Val.add_commut. reflexivity.
+ - (* Omaddimm *)
+ subst. destruct (Compopts.optim_madd tt); TrivialExists;
+ repeat (eauto; econstructor; simpl).
+ - (* Omaddimm rev *)
+ subst. destruct (Compopts.optim_madd tt); TrivialExists;
+ repeat (eauto; econstructor; simpl).
+ simpl. rewrite Val.add_commut. reflexivity.
+ (* Oaddx *)
+ - subst. pose proof eval_addx as ADDX.
+ unfold binary_constructor_sound in ADDX.
+ rewrite Val.add_commut.
+ apply ADDX; assumption.
+ (* Oaddx *)
+ - subst. pose proof eval_addx as ADDX.
+ unfold binary_constructor_sound in ADDX.
+ apply ADDX; assumption.
+ - TrivialExists.
+Qed.
+
+Theorem eval_sub: binary_constructor_sound sub Val.sub.
+Proof.
+ red; intros until y.
+ unfold sub; case (sub_match a b); intros; InvEval.
+ - rewrite Val.sub_add_opp. apply eval_addimm; auto.
+ - subst. rewrite Val.sub_add_l. rewrite Val.sub_add_r.
+ rewrite Val.add_assoc. simpl. rewrite Int.add_commut. rewrite <- Int.sub_add_opp.
+ apply eval_addimm; EvalOp.
+ - subst. rewrite Val.sub_add_l. apply eval_addimm; EvalOp.
+ - subst. rewrite Val.sub_add_r. apply eval_addimm; EvalOp.
+ - TrivialExists. simpl. subst. reflexivity.
+ - destruct (Compopts.optim_madd tt).
+ + TrivialExists. simpl. subst.
+ rewrite sub_add_neg.
+ rewrite neg_mul_distr_r.
+ unfold Val.neg.
+ reflexivity.
+ + TrivialExists. repeat (eauto; econstructor).
+ simpl. subst. reflexivity.
+ - TrivialExists.
+Qed.
+
+Theorem eval_negint: unary_constructor_sound negint (fun v => Val.sub Vzero v).
+Proof.
+ red; intros until x. unfold negint. case (negint_match a); intros; InvEval.
+ TrivialExists.
+ TrivialExists.
+Qed.
+
+Theorem eval_shlimm:
+ forall n, unary_constructor_sound (fun a => shlimm a n)
+ (fun x => Val.shl x (Vint n)).
+Proof.
+ red; intros until x. unfold shlimm.
+
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shl_zero; auto.
+
+ destruct (Int.ltu n Int.iwordsize) eqn:LT; simpl.
+ destruct (shlimm_match a); intros; InvEval.
+ - exists (Vint (Int.shl n1 n)); split. EvalOp.
+ simpl. rewrite LT. auto.
+ - destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?.
+ + exists (Val.shl v1 (Vint (Int.add n n1))); split. EvalOp.
+ subst. destruct v1; simpl; auto.
+ rewrite Heqb.
+ destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto.
+ destruct (Int.ltu n Int.iwordsize) eqn:?; simpl; auto.
+ rewrite Int.add_commut. rewrite Int.shl_shl; auto. rewrite Int.add_commut; auto.
+ + subst. TrivialExists. econstructor. EvalOp. simpl; eauto. constructor.
+ simpl. auto.
+ - TrivialExists.
+ - intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
+ auto.
+Qed.
+
+Theorem eval_shruimm:
+ forall n, unary_constructor_sound (fun a => shruimm a n)
+ (fun x => Val.shru x (Vint n)).
+Proof.
+ red; intros until x. unfold shruimm.
+
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shru_zero; auto.
+
+ destruct (Int.ltu n Int.iwordsize) eqn:LT.
+ destruct (shruimm_match a); intros; InvEval.
+ - exists (Vint (Int.shru n1 n)); split. EvalOp.
+ simpl. rewrite LT; auto.
+ - destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?.
+ exists (Val.shru v1 (Vint (Int.add n n1))); split. EvalOp.
+ subst. destruct v1; simpl; auto.
+ rewrite Heqb.
+ destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto.
+ rewrite LT. rewrite Int.add_commut. rewrite Int.shru_shru; auto. rewrite Int.add_commut; auto.
+ subst. TrivialExists. econstructor. EvalOp. simpl; eauto. constructor.
+ simpl. auto.
+ - subst x.
+ simpl negb.
+ cbn iota.
+ destruct (is_bitfield _ _) eqn:BOUNDS.
+ + exists (extfz (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one))
+ (Z.sub
+ (Z.add
+ (Z.add (Int.unsigned n) (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)))
+ Z.one) Int.zwordsize) v1).
+ split.
+ ++ EvalOp.
+ ++ unfold extfz.
+ rewrite BOUNDS.
+ destruct v1; try (simpl; apply Val.lessdef_undef).
+ replace (Z.sub Int.zwordsize
+ (Z.add (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)) with (Int.unsigned n1) by lia.
+ replace (Z.sub Int.zwordsize
+ (Z.sub
+ (Z.add (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)
+ (Z.sub
+ (Z.add
+ (Z.add (Int.unsigned n) (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)))
+ Z.one) Int.zwordsize))) with (Int.unsigned n) by lia.
+ rewrite Int.repr_unsigned.
+ rewrite Int.repr_unsigned.
+ simpl.
+ destruct (Int.ltu n1 Int.iwordsize) eqn:Hltu_n1; simpl; trivial.
+ simpl.
+ destruct (Int.ltu n Int.iwordsize) eqn:Hltu_n; simpl; trivial.
+ + TrivialExists. constructor. econstructor. constructor. eassumption. constructor. simpl. reflexivity. constructor. simpl. reflexivity.
+ - TrivialExists.
+ - intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
+ auto.
+Qed.
+
+Theorem eval_shrimm:
+ forall n, unary_constructor_sound (fun a => shrimm a n)
+ (fun x => Val.shr x (Vint n)).
+Proof.
+ red; intros until x. unfold shrimm.
+
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shr_zero; auto.
+
+ destruct (Int.ltu n Int.iwordsize) eqn:LT.
+ destruct (shrimm_match a); intros; InvEval.
+ - exists (Vint (Int.shr n1 n)); split. EvalOp.
+ simpl. rewrite LT; auto.
+ - destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?.
+ exists (Val.shr v1 (Vint (Int.add n n1))); split. EvalOp.
+ subst. destruct v1; simpl; auto.
+ rewrite Heqb.
+ destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto.
+ rewrite LT.
+ rewrite Int.add_commut. rewrite Int.shr_shr; auto. rewrite Int.add_commut; auto.
+ subst. TrivialExists. econstructor. EvalOp. simpl; eauto. constructor.
+ simpl. auto.
+ - subst x.
+ simpl negb.
+ cbn iota.
+ destruct (is_bitfield _ _) eqn:BOUNDS.
+ + exists (extfs (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one))
+ (Z.sub
+ (Z.add
+ (Z.add (Int.unsigned n) (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)))
+ Z.one) Int.zwordsize) v1).
+ split.
+ ++ EvalOp.
+ ++ unfold extfs.
+ rewrite BOUNDS.
+ destruct v1; try (simpl; apply Val.lessdef_undef).
+ replace (Z.sub Int.zwordsize
+ (Z.add (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)) with (Int.unsigned n1) by lia.
+ replace (Z.sub Int.zwordsize
+ (Z.sub
+ (Z.add (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)
+ (Z.sub
+ (Z.add
+ (Z.add (Int.unsigned n) (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)))
+ Z.one) Int.zwordsize))) with (Int.unsigned n) by lia.
+ rewrite Int.repr_unsigned.
+ rewrite Int.repr_unsigned.
+ simpl.
+ destruct (Int.ltu n1 Int.iwordsize) eqn:Hltu_n1; simpl; trivial.
+ simpl.
+ destruct (Int.ltu n Int.iwordsize) eqn:Hltu_n; simpl; trivial.
+ + TrivialExists. constructor. econstructor. constructor. eassumption. constructor. simpl. reflexivity. constructor. simpl. reflexivity.
+ - TrivialExists.
+ - intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
+ auto.
+Qed.
+
+Lemma eval_mulimm_base:
+ forall n, unary_constructor_sound (mulimm_base n) (fun x => Val.mul x (Vint n)).
+Proof.
+ intros; red; intros; unfold mulimm_base.
+
+ assert (DFL: exists v, eval_expr ge sp e m le (Eop Omul (Eop (Ointconst n) Enil ::: a ::: Enil)) v /\ Val.lessdef (Val.mul x (Vint n)) v).
+ TrivialExists. econstructor. EvalOp. simpl; eauto. econstructor. eauto. constructor.
+ rewrite Val.mul_commut. auto.
+
+ generalize (Int.one_bits_decomp n).
+ generalize (Int.one_bits_range n).
+ destruct (Int.one_bits n).
+ - intros. TrivialExists.
+ - destruct l.
+ + intros. rewrite H1. simpl.
+ rewrite Int.add_zero.
+ replace (Vint (Int.shl Int.one i)) with (Val.shl Vone (Vint i)). rewrite Val.shl_mul.
+ apply eval_shlimm. auto. simpl. rewrite H0; auto with coqlib.
+ + destruct l.
+ intros. rewrite H1. simpl.
+ exploit (eval_shlimm i (x :: le) (Eletvar 0) x). constructor; auto. intros [v1 [A1 B1]].
+ exploit (eval_shlimm i0 (x :: le) (Eletvar 0) x). constructor; auto. intros [v2 [A2 B2]].
+ exploit (eval_add (x :: le)). eexact A1. eexact A2. intros [v [A B]].
+ exists v; split. econstructor; eauto.
+ rewrite Int.add_zero.
+ replace (Vint (Int.add (Int.shl Int.one i) (Int.shl Int.one i0)))
+ with (Val.add (Val.shl Vone (Vint i)) (Val.shl Vone (Vint i0))).
+ rewrite Val.mul_add_distr_r.
+ repeat rewrite Val.shl_mul. eapply Val.lessdef_trans. 2: eauto. apply Val.add_lessdef; auto.
+ simpl. repeat rewrite H0; auto with coqlib.
+ intros. TrivialExists.
+Qed.
+
+Theorem eval_mulimm:
+ forall n, unary_constructor_sound (mulimm n) (fun x => Val.mul x (Vint n)).
+Proof.
+ intros; red; intros until x; unfold mulimm.
+
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ intros. exists (Vint Int.zero); split. EvalOp.
+ destruct x; simpl; auto. subst n. rewrite Int.mul_zero. auto.
+
+ predSpec Int.eq Int.eq_spec n Int.one.
+ intros. exists x; split; auto.
+ destruct x; simpl; auto. subst n. rewrite Int.mul_one. auto.
+
+ case (mulimm_match a); intros; InvEval.
+ - TrivialExists. simpl. rewrite Int.mul_commut; auto.
+ - subst. rewrite Val.mul_add_distr_l.
+ exploit eval_mulimm_base; eauto. instantiate (1 := n). intros [v' [A1 B1]].
+ exploit (eval_addimm (Int.mul n n2) le (mulimm_base n t2) v'). auto. intros [v'' [A2 B2]].
+ exists v''; split; auto. eapply Val.lessdef_trans. eapply Val.add_lessdef; eauto.
+ rewrite Val.mul_commut; auto.
+ - apply eval_mulimm_base; auto.
+Qed.
+
+Theorem eval_mul: binary_constructor_sound mul Val.mul.
+Proof.
+ red; intros until y.
+ unfold mul; case (mul_match a b); intros; InvEval.
+ rewrite Val.mul_commut. apply eval_mulimm. auto.
+ apply eval_mulimm. auto.
+ TrivialExists.
+Qed.
+
+Theorem eval_mulhs: binary_constructor_sound mulhs Val.mulhs.
+Proof.
+ red; intros. unfold mulhs; destruct Archi.ptr64 eqn:SF.
+- econstructor; split.
+ EvalOp. constructor. EvalOp. constructor. EvalOp. constructor. EvalOp. simpl; eauto.
+ constructor. EvalOp. simpl; eauto. constructor.
+ simpl; eauto. constructor. simpl; eauto. constructor. simpl; eauto.
+ destruct x; simpl; auto. destruct y; simpl; auto.
+ change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl.
+ apply Val.lessdef_same. f_equal.
+ transitivity (Int.repr (Z.shiftr (Int.signed i * Int.signed i0) 32)).
+ unfold Int.mulhs; f_equal. rewrite Zbits.Zshiftr_div_two_p by lia. reflexivity.
+ apply Int.same_bits_eq; intros n N.
+ change Int.zwordsize with 32 in *.
+ assert (N1: 0 <= n < 64) by lia.
+ rewrite Int64.bits_loword by auto.
+ rewrite Int64.bits_shr' by auto.
+ change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64.
+ rewrite zlt_true by lia.
+ rewrite Int.testbit_repr by auto.
+ unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia).
+ transitivity (Z.testbit (Int.signed i * Int.signed i0) (n + 32)).
+ rewrite Z.shiftr_spec by lia. auto.
+ apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr.
+ change Int64.zwordsize with 64; lia.
+- TrivialExists.
+Qed.
+
+Theorem eval_mulhu: binary_constructor_sound mulhu Val.mulhu.
+Proof.
+ red; intros. unfold mulhu; destruct Archi.ptr64 eqn:SF.
+- econstructor; split.
+ EvalOp. constructor. EvalOp. constructor. EvalOp. constructor. EvalOp. simpl; eauto.
+ constructor. EvalOp. simpl; eauto. constructor.
+ simpl; eauto. constructor. simpl; eauto. constructor. simpl; eauto.
+ destruct x; simpl; auto. destruct y; simpl; auto.
+ change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl.
+ apply Val.lessdef_same. f_equal.
+ transitivity (Int.repr (Z.shiftr (Int.unsigned i * Int.unsigned i0) 32)).
+ unfold Int.mulhu; f_equal. rewrite Zbits.Zshiftr_div_two_p by lia. reflexivity.
+ apply Int.same_bits_eq; intros n N.
+ change Int.zwordsize with 32 in *.
+ assert (N1: 0 <= n < 64) by lia.
+ rewrite Int64.bits_loword by auto.
+ rewrite Int64.bits_shru' by auto.
+ change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64.
+ rewrite zlt_true by lia.
+ rewrite Int.testbit_repr by auto.
+ unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia).
+ transitivity (Z.testbit (Int.unsigned i * Int.unsigned i0) (n + 32)).
+ rewrite Z.shiftr_spec by lia. auto.
+ apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr.
+ change Int64.zwordsize with 64; lia.
+- TrivialExists.
+Qed.
+
+Theorem eval_andimm:
+ forall n, unary_constructor_sound (andimm n) (fun x => Val.and x (Vint n)).
+Proof.
+ intros; red; intros until x. unfold andimm.
+
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ intros. exists (Vint Int.zero); split. EvalOp.
+ destruct x; simpl; auto. subst n. rewrite Int.and_zero. auto.
+
+ predSpec Int.eq Int.eq_spec n Int.mone.
+ intros. exists x; split; auto.
+ subst. destruct x; simpl; auto. rewrite Int.and_mone; auto.
+
+ case (andimm_match a); intros.
+ - InvEval. TrivialExists. simpl. rewrite Int.and_commut; auto.
+ - InvEval. subst. rewrite Val.and_assoc. simpl. rewrite Int.and_commut. TrivialExists.
+ - InvEval. TrivialExists. simpl; congruence.
+ - TrivialExists.
+Qed.
+
+Theorem eval_and: binary_constructor_sound and Val.and.
+Proof.
+ red; intros until y; unfold and; case (and_match a b); intros; InvEval.
+ - rewrite Val.and_commut. apply eval_andimm; auto.
+ - apply eval_andimm; auto.
+ - (*andn*) TrivialExists; simpl; congruence.
+ - (*andn reverse*) rewrite Val.and_commut. TrivialExists; simpl; congruence.
+ - TrivialExists.
+Qed.
+
+Theorem eval_orimm:
+ forall n, unary_constructor_sound (orimm n) (fun x => Val.or x (Vint n)).
+Proof.
+ intros; red; intros until x. unfold orimm.
+
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ intros. subst. exists x; split; auto.
+ destruct x; simpl; auto. rewrite Int.or_zero; auto.
+
+ predSpec Int.eq Int.eq_spec n Int.mone.
+ intros. exists (Vint Int.mone); split. EvalOp.
+ destruct x; simpl; auto. subst n. rewrite Int.or_mone. auto.
+
+ destruct (orimm_match a); intros; InvEval.
+ - TrivialExists. simpl. rewrite Int.or_commut; auto.
+ - subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists.
+ - InvEval. TrivialExists. simpl; congruence.
+ - TrivialExists.
+Qed.
+
+
+Remark eval_same_expr:
+ forall a1 a2 le v1 v2,
+ same_expr_pure a1 a2 = true ->
+ eval_expr ge sp e m le a1 v1 ->
+ eval_expr ge sp e m le a2 v2 ->
+ a1 = a2 /\ v1 = v2.
+Proof.
+ intros until v2.
+ destruct a1; simpl; try (intros; discriminate).
+ destruct a2; simpl; try (intros; discriminate).
+ case (ident_eq i i0); intros.
+ subst i0. inversion H0. inversion H1. split. auto. congruence.
+ discriminate.
+Qed.
+
+Lemma int_eq_commut: forall x y : int,
+ (Int.eq x y) = (Int.eq y x).
+Proof.
+ intros.
+ predSpec Int.eq Int.eq_spec x y;
+ predSpec Int.eq Int.eq_spec y x;
+ congruence.
+Qed.
+
+Theorem eval_or: binary_constructor_sound or Val.or.
+Proof.
+ unfold or; red; intros.
+ assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop Oor (a:::b:::Enil)) v /\ Val.lessdef (Val.or x y) v) by TrivialExists.
+ assert (ROR: forall v n1 n2,
+ Int.add n1 n2 = Int.iwordsize ->
+ Val.lessdef (Val.or (Val.shl v (Vint n1)) (Val.shru v (Vint n2)))
+ (Val.ror v (Vint n2))).
+ { intros. destruct v; simpl; auto.
+ destruct (Int.ltu n1 Int.iwordsize) eqn:N1; auto.
+ destruct (Int.ltu n2 Int.iwordsize) eqn:N2; auto.
+ simpl. rewrite <- Int.or_ror; auto. }
+
+ destruct (or_match a b); InvEval.
+
+ - rewrite Val.or_commut. apply eval_orimm; auto.
+ - apply eval_orimm; auto.
+ - predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize; auto.
+ destruct (same_expr_pure t1 t2) eqn:?; auto.
+ InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst.
+ exists (Val.ror v0 (Vint n2)); split. EvalOp. apply ROR; auto.
+ - predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize; auto.
+ destruct (same_expr_pure t1 t2) eqn:?; auto.
+ InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst.
+ exists (Val.ror v1 (Vint n2)); split. EvalOp. rewrite Val.or_commut. apply ROR; auto.
+ - (*orn*) TrivialExists; simpl; congruence.
+ - (*orn reversed*) rewrite Val.or_commut. TrivialExists; simpl; congruence.
+ - set (zstop := (int_highest_bit mask)).
+ set (zstart := (Int.unsigned start)).
+ destruct (is_bitfield _ _) eqn:Risbitfield.
+ + destruct (and_dec _ _) as [[Rmask Rnmask] | ].
+ * simpl in H6.
+ injection H6.
+ clear H6.
+ intro. subst y. subst x.
+ TrivialExists. simpl. f_equal.
+ unfold insf.
+ rewrite Risbitfield.
+ rewrite Rmask.
+ rewrite Rnmask.
+ simpl.
+ unfold bitfield_mask.
+ subst v0.
+ subst zstart.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ * apply DEFAULT.
+ + apply DEFAULT.
+ - set (zstop := (int_highest_bit mask)).
+ set (zstart := (Int.unsigned start)).
+ destruct (is_bitfield _ _) eqn:Risbitfield.
+ + destruct (and_dec _ _) as [[Rmask Rnmask] | ].
+ * simpl in H6.
+ injection H6.
+ clear H6.
+ intro. subst y. subst x.
+ TrivialExists.
+ rewrite Val.or_commut.
+ simpl. f_equal.
+ unfold insf.
+ rewrite Risbitfield.
+ rewrite Rmask.
+ rewrite Rnmask.
+ simpl.
+ unfold bitfield_mask.
+ subst v1.
+ subst zstart.
+ rewrite Int.repr_unsigned.
+ reflexivity.
+ * apply DEFAULT.
+ + apply DEFAULT.
+ - set (zstop := (int_highest_bit mask)).
+ set (zstart := 0).
+ destruct (is_bitfield _ _) eqn:Risbitfield.
+ + destruct (and_dec _ _) as [[Rmask Rnmask] | ].
+ * subst y. subst x.
+ TrivialExists. simpl. f_equal.
+ unfold insf.
+ rewrite Risbitfield.
+ rewrite Rmask.
+ rewrite Rnmask.
+ simpl.
+ unfold bitfield_mask.
+ subst zstart.
+ rewrite (Val.or_commut (Val.and v1 _)).
+ rewrite (Val.or_commut (Val.and v1 _)).
+ destruct v0; simpl; trivial.
+ unfold Int.ltu, Int.iwordsize, Int.zwordsize.
+ rewrite Int.unsigned_repr.
+ ** rewrite Int.unsigned_repr.
+ *** simpl.
+ rewrite Int.shl_zero.
+ reflexivity.
+ *** simpl.
+ unfold Int.max_unsigned, Int.modulus.
+ simpl.
+ lia.
+ ** unfold Int.max_unsigned, Int.modulus.
+ simpl.
+ lia.
+ * clear Risbitfield. clear o.
+ clear zstop.
+ set (zstop := (int_highest_bit nmask)).
+ destruct (is_bitfield _ _) eqn:Risbitfield.
+ ++ destruct (and_dec _ _) as [[Rmask Rnmask] | ].
+ ** subst y. subst x.
+ TrivialExists. simpl. f_equal.
+ rewrite Val.or_commut.
+ unfold insf.
+ rewrite Risbitfield.
+ rewrite Rmask.
+ rewrite Rnmask.
+ simpl.
+ unfold bitfield_mask.
+ subst zstart.
+ rewrite (Val.or_commut (Val.and v0 _)).
+ rewrite (Val.or_commut (Val.and v0 _)).
+ destruct v1; simpl; trivial.
+ unfold Int.ltu, Int.iwordsize, Int.zwordsize.
+ rewrite Int.unsigned_repr.
+ *** rewrite Int.unsigned_repr.
+ **** simpl.
+ rewrite Int.shl_zero.
+ reflexivity.
+ **** simpl.
+ unfold Int.max_unsigned, Int.modulus.
+ simpl.
+ lia.
+ *** unfold Int.max_unsigned, Int.modulus.
+ simpl.
+ lia.
+ ** apply DEFAULT.
+ ++ apply DEFAULT.
+ + apply DEFAULT.
+ - apply DEFAULT.
+Qed.
+
+Theorem eval_xorimm:
+ forall n, unary_constructor_sound (xorimm n) (fun x => Val.xor x (Vint n)).
+Proof.
+ intros; red; intros until x. unfold xorimm.
+
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ - intros. exists x; split. auto.
+ destruct x; simpl; auto. subst n. rewrite Int.xor_zero. auto.
+ - predSpec Int.eq Int.eq_spec n Int.mone.
+ -- subst n. intros. rewrite <- Val.not_xor. TrivialExists.
+ -- intros. destruct (xorimm_match a); intros; InvEval.
+ + TrivialExists. simpl. rewrite Int.xor_commut; auto.
+ + subst. rewrite Val.xor_assoc. simpl. rewrite Int.xor_commut.
+ predSpec Int.eq Int.eq_spec (Int.xor n2 n) Int.zero.
+ * exists v1; split; auto. destruct v1; simpl; auto. rewrite H1, Int.xor_zero; auto.
+ * TrivialExists.
+ + TrivialExists.
+Qed.
+
+Theorem eval_xor: binary_constructor_sound xor Val.xor.
+Proof.
+ red; intros until y; unfold xor; case (xor_match a b); intros; InvEval.
+ - rewrite Val.xor_commut. apply eval_xorimm; auto.
+ - apply eval_xorimm; auto.
+ - TrivialExists.
+Qed.
+
+Theorem eval_notint: unary_constructor_sound notint Val.notint.
+Proof.
+ assert (forall v, Val.lessdef (Val.notint (Val.notint v)) v).
+ destruct v; simpl; auto. rewrite Int.not_involutive; auto.
+ unfold notint; red; intros until x; case (notint_match a); intros; InvEval.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - TrivialExists; simpl; congruence.
+ - subst x. exists (Val.and v1 v0); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ eassumption. constructor. simpl. reflexivity.
+ - subst x. exists (Val.and v1 (Vint n)); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ simpl. reflexivity.
+ - subst x. exists (Val.or v1 v0); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ eassumption. constructor. simpl. reflexivity.
+ - subst x. exists (Val.or v1 (Vint n)); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ simpl. reflexivity.
+ - subst x. exists (Val.xor v1 v0); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ eassumption. constructor. simpl. reflexivity.
+ - subst x. exists (Val.xor v1 (Vint n)); split; trivial.
+ econstructor. constructor. eassumption. constructor.
+ simpl. reflexivity.
+ (* andn *)
+ - subst x. TrivialExists. simpl.
+ destruct v0; destruct v1; simpl; trivial.
+ f_equal. f_equal.
+ rewrite Int.not_and_or_not.
+ rewrite Int.not_involutive.
+ apply Int.or_commut.
+ - subst x. TrivialExists. simpl.
+ destruct v1; simpl; trivial.
+ f_equal. f_equal.
+ rewrite Int.not_and_or_not.
+ rewrite Int.not_involutive.
+ reflexivity.
+ (* orn *)
+ - subst x. TrivialExists. simpl.
+ destruct v0; destruct v1; simpl; trivial.
+ f_equal. f_equal.
+ rewrite Int.not_or_and_not.
+ rewrite Int.not_involutive.
+ apply Int.and_commut.
+ - subst x. TrivialExists. simpl.
+ destruct v1; simpl; trivial.
+ f_equal. f_equal.
+ rewrite Int.not_or_and_not.
+ rewrite Int.not_involutive.
+ reflexivity.
+ - subst x. exists v1; split; trivial.
+ - TrivialExists.
+ - TrivialExists.
+Qed.
+
+Theorem eval_divs_base:
+ forall le a b x y z,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ Val.divs x y = Some z ->
+ exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
+Proof.
+ intros; unfold divs_base.
+ econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto.
+Qed.
+
+Theorem eval_mods_base:
+ forall le a b x y z,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ Val.mods x y = Some z ->
+ exists v, eval_expr ge sp e m le (mods_base a b) v /\ Val.lessdef z v.
+Proof.
+ intros; unfold mods_base.
+ econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto.
+Qed.
+
+Theorem eval_divu_base:
+ forall le a b x y z,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ Val.divu x y = Some z ->
+ exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
+Proof.
+ intros; unfold divu_base.
+ econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto.
+Qed.
+
+(* For using 64-bit unsigned division for 32-bit
+
+ intros until z.
+ intros Hax Hby Hdiv. unfold divu_base.
+ pose proof (divu_is_divlu x y) as DIVU.
+ destruct (Val.divlu (Val.longofintu x) (Val.longofintu y))
+ as [ ql | ] eqn:Ediv.
+ { TrivialExists.
+ { econstructor. eapply eval_helper_2; eauto.
+ { econstructor. econstructor. eassumption.
+ constructor. simpl. reflexivity. }
+ { econstructor. econstructor. eassumption.
+ constructor. simpl. reflexivity. }
+ { DeclHelper. }
+ { UseHelper. }
+ constructor. }
+ simpl.
+ congruence.
+ }
+ congruence.
+Qed.
+ *)
+
+Theorem eval_modu_base:
+ forall le a b x y z,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ Val.modu x y = Some z ->
+ exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v.
+Proof.
+ intros; unfold modu_base.
+ econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto.
+Qed.
+
+(* for using 64-bit unsigned modulo for 32-bit
+
+ intros until z.
+ intros Hax Hby Hmod. unfold modu_base.
+ pose proof (modu_is_modlu x y) as MODU.
+ destruct (Val.modlu (Val.longofintu x) (Val.longofintu y))
+ as [ ql | ] eqn:Emod.
+ { TrivialExists.
+ { econstructor. eapply eval_helper_2; eauto.
+ { econstructor. econstructor. eassumption.
+ constructor. simpl. reflexivity. }
+ { econstructor. econstructor. eassumption.
+ constructor. simpl. reflexivity. }
+ { DeclHelper. }
+ { UseHelper. }
+ constructor. }
+ simpl.
+ congruence.
+ }
+ congruence.
+Qed.
+ *)
+
+Theorem eval_shrximm:
+ forall le a n x z,
+ eval_expr ge sp e m le a x ->
+ Val.shrx x (Vint n) = Some z ->
+ exists v, eval_expr ge sp e m le (shrximm a n) v /\ Val.lessdef z v.
+Proof.
+ intros. unfold shrximm.
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ subst n. exists x; split; auto.
+ destruct x; simpl in H0; try discriminate.
+ destruct (Int.ltu Int.zero (Int.repr 31)); inv H0.
+ replace (Int.shrx i Int.zero) with i. auto.
+ unfold Int.shrx, Int.divs. rewrite Int.shl_zero.
+ change (Int.signed Int.one) with 1. rewrite Z.quot_1_r. rewrite Int.repr_signed; auto.
+ econstructor; split. EvalOp.
+ simpl. rewrite H0. simpl. reflexivity. auto.
+Qed.
+
+Theorem eval_shl: binary_constructor_sound shl Val.shl.
+Proof.
+ red; intros until y; unfold shl; case (shl_match b); intros.
+ InvEval. apply eval_shlimm; auto.
+ TrivialExists.
+Qed.
+
+Theorem eval_shr: binary_constructor_sound shr Val.shr.
+Proof.
+ red; intros until y; unfold shr; case (shr_match b); intros.
+ InvEval. apply eval_shrimm; auto.
+ TrivialExists.
+Qed.
+
+Theorem eval_shru: binary_constructor_sound shru Val.shru.
+Proof.
+ red; intros until y; unfold shru; case (shru_match b); intros.
+ InvEval. apply eval_shruimm; auto.
+ TrivialExists.
+Qed.
+
+Theorem eval_negf: unary_constructor_sound negf Val.negf.
+Proof.
+ red; intros. TrivialExists.
+Qed.
+
+Theorem eval_absf: unary_constructor_sound absf Val.absf.
+Proof.
+ red; intros. TrivialExists.
+Qed.
+
+Theorem eval_addf: binary_constructor_sound addf Val.addf.
+Proof.
+ red; intros; TrivialExists.
+Qed.
+
+Theorem eval_subf: binary_constructor_sound subf Val.subf.
+Proof.
+ red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mulf: binary_constructor_sound mulf Val.mulf.
+Proof.
+ red; intros; TrivialExists.
+Qed.
+
+Theorem eval_negfs: unary_constructor_sound negfs Val.negfs.
+Proof.
+ red; intros. TrivialExists.
+Qed.
+
+Theorem eval_absfs: unary_constructor_sound absfs Val.absfs.
+Proof.
+ red; intros. TrivialExists.
+Qed.
+
+Theorem eval_addfs: binary_constructor_sound addfs Val.addfs.
+Proof.
+ red; intros; TrivialExists.
+Qed.
+
+Theorem eval_subfs: binary_constructor_sound subfs Val.subfs.
+Proof.
+ red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mulfs: binary_constructor_sound mulfs Val.mulfs.
+Proof.
+ red; intros; TrivialExists.
+Qed.
+
+Section COMP_IMM.
+
+Variable default: comparison -> int -> condition.
+Variable intsem: comparison -> int -> int -> bool.
+Variable sem: comparison -> val -> val -> val.
+
+Hypothesis sem_int: forall c x y, sem c (Vint x) (Vint y) = Val.of_bool (intsem c x y).
+Hypothesis sem_undef: forall c v, sem c Vundef v = Vundef.
+Hypothesis sem_eq: forall x y, sem Ceq (Vint x) (Vint y) = Val.of_bool (Int.eq x y).
+Hypothesis sem_ne: forall x y, sem Cne (Vint x) (Vint y) = Val.of_bool (negb (Int.eq x y)).
+Hypothesis sem_default: forall c v n, sem c v (Vint n) = Val.of_optbool (eval_condition (default c n) (v :: nil) m).
+
+Lemma eval_compimm:
+ forall le c a n2 x,
+ eval_expr ge sp e m le a x ->
+ exists v, eval_expr ge sp e m le (compimm default intsem c a n2) v
+ /\ Val.lessdef (sem c x (Vint n2)) v.
+Proof.
+ intros until x.
+ unfold compimm; case (compimm_match c a); intros.
+(* constant *)
+ - InvEval. rewrite sem_int. TrivialExists. simpl. destruct (intsem c0 n1 n2); auto.
+(* eq cmp *)
+ - InvEval. inv H. simpl in H5. inv H5.
+ destruct (Int.eq_dec n2 Int.zero).
+ + subst n2. TrivialExists.
+ simpl. rewrite eval_negate_condition.
+ destruct (eval_condition c0 vl m); simpl.
+ unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto.
+ rewrite sem_undef; auto.
+ + destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists.
+ simpl. destruct (eval_condition c0 vl m); simpl.
+ unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto.
+ rewrite sem_undef; auto.
+ exists (Vint Int.zero); split. EvalOp.
+ destruct (eval_condition c0 vl m); simpl.
+ unfold Vtrue, Vfalse. destruct b; rewrite sem_eq; rewrite Int.eq_false; auto.
+ rewrite sem_undef; auto.
+(* ne cmp *)
+ - InvEval. inv H. simpl in H5. inv H5.
+ destruct (Int.eq_dec n2 Int.zero).
+ + subst n2. TrivialExists.
+ simpl. destruct (eval_condition c0 vl m); simpl.
+ unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto.
+ rewrite sem_undef; auto.
+ + destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists.
+ simpl. rewrite eval_negate_condition. destruct (eval_condition c0 vl m); simpl.
+ unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto.
+ rewrite sem_undef; auto.
+ exists (Vint Int.one); split. EvalOp.
+ destruct (eval_condition c0 vl m); simpl.
+ unfold Vtrue, Vfalse. destruct b; rewrite sem_ne; rewrite Int.eq_false; auto.
+ rewrite sem_undef; auto.
+(* default *)
+ - TrivialExists. simpl. rewrite sem_default. auto.
+Qed.
+
+Hypothesis sem_swap:
+ forall c x y, sem (swap_comparison c) x y = sem c y x.
+
+Lemma eval_compimm_swap:
+ forall le c a n2 x,
+ eval_expr ge sp e m le a x ->
+ exists v, eval_expr ge sp e m le (compimm default intsem (swap_comparison c) a n2) v
+ /\ Val.lessdef (sem c (Vint n2) x) v.
+Proof.
+ intros. rewrite <- sem_swap. eapply eval_compimm; eauto.
+Qed.
+
+End COMP_IMM.
+
+Theorem eval_comp:
+ forall c, binary_constructor_sound (comp c) (Val.cmp c).
+Proof.
+ intros; red; intros until y. unfold comp; case (comp_match a b); intros; InvEval.
+ eapply eval_compimm_swap; eauto.
+ intros. unfold Val.cmp. rewrite Val.swap_cmp_bool; auto.
+ eapply eval_compimm; eauto.
+ TrivialExists.
+Qed.
+
+Theorem eval_compu:
+ forall c, binary_constructor_sound (compu c) (Val.cmpu (Mem.valid_pointer m) c).
+Proof.
+ intros; red; intros until y. unfold compu; case (compu_match a b); intros; InvEval.
+ eapply eval_compimm_swap; eauto.
+ intros. unfold Val.cmpu. rewrite Val.swap_cmpu_bool; auto.
+ eapply eval_compimm; eauto.
+ TrivialExists.
+Qed.
+
+Theorem eval_compf:
+ forall c, binary_constructor_sound (compf c) (Val.cmpf c).
+Proof.
+ intros; red; intros. unfold compf. TrivialExists.
+Qed.
+
+Theorem eval_compfs:
+ forall c, binary_constructor_sound (compfs c) (Val.cmpfs c).
+Proof.
+ intros; red; intros. unfold compfs. TrivialExists.
+Qed.
+
+Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8).
+Proof.
+ red; intros until x. unfold cast8signed. case (cast8signed_match a); intros; InvEval.
+ TrivialExists.
+ TrivialExists.
+Qed.
+
+Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8).
+Proof.
+ red; intros until x. unfold cast8unsigned.
+ rewrite Val.zero_ext_and. apply eval_andimm. compute; auto. discriminate.
+Qed.
+
+Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16).
+Proof.
+ red; intros until x. unfold cast16signed. case (cast16signed_match a); intros; InvEval.
+ TrivialExists.
+ TrivialExists.
+Qed.
+
+Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16).
+Proof.
+ red; intros until x. unfold cast8unsigned.
+ rewrite Val.zero_ext_and. apply eval_andimm. compute; auto. discriminate.
+Qed.
+
+Theorem eval_intoffloat:
+ forall le a x y,
+ eval_expr ge sp e m le a x ->
+ Val.intoffloat x = Some y ->
+ exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
+Proof.
+ intros; unfold intoffloat. TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_intuoffloat:
+ forall le a x y,
+ eval_expr ge sp e m le a x ->
+ Val.intuoffloat x = Some y ->
+ exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
+Proof.
+ intros; unfold intuoffloat. TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_floatofintu:
+ forall le a x y,
+ eval_expr ge sp e m le a x ->
+ Val.floatofintu x = Some y ->
+ exists v, eval_expr ge sp e m le (floatofintu a) v /\ Val.lessdef y v.
+Proof.
+ intros.
+ unfold Val.floatofintu in *.
+ unfold floatofintu.
+ destruct (floatofintu_match a).
+ - InvEval.
+ TrivialExists.
+ - InvEval.
+ TrivialExists.
+ constructor. econstructor. constructor. eassumption. constructor.
+ simpl. f_equal. constructor.
+ simpl.
+ destruct x; simpl; trivial; try discriminate.
+ f_equal.
+ inv H0.
+ f_equal.
+ rewrite Float.of_intu_of_longu.
+ reflexivity.
+Qed.
+
+Theorem eval_floatofint:
+ forall le a x y,
+ eval_expr ge sp e m le a x ->
+ Val.floatofint x = Some y ->
+ exists v, eval_expr ge sp e m le (floatofint a) v /\ Val.lessdef y v.
+Proof.
+ intros.
+ unfold floatofint.
+ destruct (floatofint_match a).
+ - InvEval.
+ TrivialExists.
+ - InvEval.
+ TrivialExists.
+ constructor. econstructor. constructor. eassumption. constructor.
+ simpl. f_equal. constructor.
+ simpl.
+ destruct x; simpl; trivial; try discriminate.
+ f_equal.
+ inv H0.
+ f_equal.
+ rewrite Float.of_int_of_long.
+ reflexivity.
+Qed.
+
+Theorem eval_intofsingle:
+ forall le a x y,
+ eval_expr ge sp e m le a x ->
+ Val.intofsingle x = Some y ->
+ exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
+Proof.
+ intros; unfold intofsingle. TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_singleofint:
+ forall le a x y,
+ eval_expr ge sp e m le a x ->
+ Val.singleofint x = Some y ->
+ exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
+Proof.
+ intros; unfold singleofint; TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_intuofsingle:
+ forall le a x y,
+ eval_expr ge sp e m le a x ->
+ Val.intuofsingle x = Some y ->
+ exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
+Proof.
+ intros; unfold intuofsingle. TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_singleofintu:
+ forall le a x y,
+ eval_expr ge sp e m le a x ->
+ Val.singleofintu x = Some y ->
+ exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
+Proof.
+ intros; unfold intuofsingle. TrivialExists.
+ simpl. rewrite H0. reflexivity.
+Qed.
+
+Theorem eval_singleoffloat: unary_constructor_sound singleoffloat Val.singleoffloat.
+Proof.
+ red; intros. unfold singleoffloat. TrivialExists.
+Qed.
+
+Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle.
+Proof.
+ red; intros. unfold floatofsingle. TrivialExists.
+Qed.
+
+Theorem eval_addressing:
+ forall le chunk a v b ofs,
+ eval_expr ge sp e m le a v ->
+ v = Vptr b ofs ->
+ match addressing chunk a with (mode, args) =>
+ exists vl,
+ eval_exprlist ge sp e m le args vl /\
+ eval_addressing ge sp mode vl = Some v
+ end.
+Proof.
+ intros until v. unfold addressing; case (addressing_match a); intros; InvEval.
+ - exists (@nil val); split. eauto with evalexpr. simpl. auto.
+ - destruct (orb _ _).
+ + exists (Vptr b ofs0 :: nil); split.
+ constructor. EvalOp. simpl. congruence. constructor. simpl. rewrite Ptrofs.add_zero. congruence.
+ + exists (@nil val); split. constructor. simpl; auto.
+ - exists (v1 :: nil); split. eauto with evalexpr. simpl.
+ destruct v1; simpl in H; try discriminate.
+ - exists (v1 :: nil); split. eauto with evalexpr. simpl.
+ destruct v1; simpl in H; try discriminate. destruct Archi.ptr64 eqn:SF; inv H.
+ simpl. auto.
+ - destruct (Compopts.optim_xsaddr tt).
+ + destruct (Z.eq_dec _ _).
+ * exists (v1 :: v2 :: nil); split.
+ repeat (constructor; auto). simpl. rewrite Int.repr_unsigned. destruct v2; simpl in *; congruence.
+ * exists (v1 :: v0 :: nil); split.
+ repeat (constructor; auto). econstructor.
+ repeat (constructor; auto). eassumption. simpl. congruence.
+ simpl. congruence.
+ + exists (v1 :: v0 :: nil); split.
+ repeat (constructor; auto). econstructor.
+ repeat (constructor; auto). eassumption. simpl. congruence.
+ simpl. congruence.
+ - unfold addxl in *.
+ destruct (Compopts.optim_xsaddr tt).
+ + unfold int_of_shift1_4 in *.
+ destruct (Z.eq_dec _ _).
+ * exists (v0 :: v1 :: nil); split.
+ repeat (constructor; auto). simpl.
+ congruence.
+ * eexists; split.
+ repeat (constructor; auto). eassumption.
+ econstructor.
+ repeat (constructor; auto). eassumption. simpl.
+ reflexivity.
+ simpl. congruence.
+ + eexists; split.
+ repeat (constructor; auto). eassumption.
+ econstructor.
+ repeat (constructor; auto). eassumption. simpl.
+ reflexivity.
+ simpl. unfold int_of_shift1_4 in *. congruence.
+ - exists (v1 :: v0 :: nil); split. repeat (constructor; auto). simpl. congruence.
+ - exists (v :: nil); split. eauto with evalexpr. subst. simpl. rewrite Ptrofs.add_zero; auto.
+Qed.
+
+Theorem eval_builtin_arg:
+ forall a v,
+ eval_expr ge sp e m nil a v ->
+ CminorSel.eval_builtin_arg ge sp e m (builtin_arg a) v.
+Proof.
+ intros until v. unfold builtin_arg; case (builtin_arg_match a); intros.
+- InvEval. constructor.
+- InvEval. constructor.
+- InvEval. constructor.
+- InvEval. simpl in H5. inv H5. constructor.
+- InvEval. subst v. constructor; auto.
+- inv H. InvEval. simpl in H6; inv H6. constructor; auto.
+- destruct Archi.ptr64 eqn:SF.
++ constructor; auto.
++ InvEval. replace v with (if Archi.ptr64 then Val.addl v1 (Vint n) else Val.add v1 (Vint n)).
+ repeat constructor; auto.
+ rewrite SF; auto.
+- destruct Archi.ptr64 eqn:SF.
++ InvEval. replace v with (if Archi.ptr64 then Val.addl v1 (Vlong n) else Val.add v1 (Vlong n)).
+ repeat constructor; auto.
++ constructor; auto.
+- constructor; auto.
+Qed.
+
+(* ternary *)
+(* does not work due to possible nondeterminism
+Lemma cond_to_condition0_correct :
+ forall cond : condition,
+ forall al : exprlist,
+ match (cond_to_condition0 cond al) with
+ | None => True
+ | Some(cond0, e1) =>
+ forall le vl v1,
+ eval_expr ge sp e m le e1 v1 ->
+ eval_exprlist ge sp e m le al vl ->
+ (eval_condition0 cond0 v1 m) = (eval_condition cond vl m)
+ end.
+Proof.
+ intros.
+ unfold cond_to_condition0.
+ case (cond_to_condition0_match cond al); trivial.
+ {
+ intros.
+ destruct (Int.eq_dec _ _); trivial.
+ intros until v1.
+ intros He1 Hel.
+ InvEval.
+ simpl.
+ f_equal.
+ eapply eval_expr_determ. eassumption.
+ }
+Qed.
+*)
+
+Lemma eval_neg_condition0:
+ forall cond0: condition0,
+ forall v1: val,
+ forall m: mem,
+ (eval_condition0 (negate_condition0 cond0) v1 m) =
+ option_map negb (eval_condition0 cond0 v1 m).
+Proof.
+ intros.
+ destruct cond0; simpl;
+ try rewrite Val.negate_cmp_bool;
+ try rewrite Val.negate_cmpu_bool;
+ try rewrite Val.negate_cmpl_bool;
+ try rewrite Val.negate_cmplu_bool;
+ reflexivity.
+Qed.
+
+Lemma select_neg:
+ forall a b c,
+ Val.select (option_map negb a) b c =
+ Val.select a c b.
+Proof.
+ destruct a; simpl; trivial.
+ destruct b; simpl; trivial.
+Qed.
+
+Lemma eval_select0:
+ forall le ty cond0 ac vc a1 v1 a2 v2,
+ eval_expr ge sp e m le ac vc ->
+ eval_expr ge sp e m le a1 v1 ->
+ eval_expr ge sp e m le a2 v2 ->
+ exists v,
+ eval_expr ge sp e m le (select0 ty cond0 a1 a2 ac) v
+ /\ Val.lessdef (Val.select (eval_condition0 cond0 vc m) v1 v2 ty) v.
+Proof.
+ intros.
+ unfold select0.
+ destruct (select0_match ty cond0 a1 a2 ac).
+ all: InvEval; econstructor; split;
+ try repeat (try econstructor; try eassumption).
+ all: rewrite eval_neg_condition0; rewrite select_neg; constructor.
+Qed.
+
+Lemma bool_cond0_ne:
+ forall ob : option bool,
+ forall m,
+ (eval_condition0 (Ccomp0 Cne) (Val.of_optbool ob) m) = ob.
+Proof.
+ destruct ob; simpl; trivial.
+ intro.
+ destruct b; reflexivity.
+Qed.
+
+Lemma eval_condition_ccomp_swap :
+ forall c x y m,
+ eval_condition (Ccomp (swap_comparison c)) (x :: y :: nil) m=
+ eval_condition (Ccomp c) (y :: x :: nil) m.
+Proof.
+ intros; unfold eval_condition;
+ apply Val.swap_cmp_bool.
+Qed.
+
+Lemma eval_condition_ccompu_swap :
+ forall c x y m,
+ eval_condition (Ccompu (swap_comparison c)) (x :: y :: nil) m=
+ eval_condition (Ccompu c) (y :: x :: nil) m.
+Proof.
+ intros; unfold eval_condition;
+ apply Val.swap_cmpu_bool.
+Qed.
+
+Lemma eval_condition_ccompl_swap :
+ forall c x y m,
+ eval_condition (Ccompl (swap_comparison c)) (x :: y :: nil) m=
+ eval_condition (Ccompl c) (y :: x :: nil) m.
+Proof.
+ intros; unfold eval_condition;
+ apply Val.swap_cmpl_bool.
+Qed.
+
+Lemma eval_condition_ccomplu_swap :
+ forall c x y m,
+ eval_condition (Ccomplu (swap_comparison c)) (x :: y :: nil) m=
+ eval_condition (Ccomplu c) (y :: x :: nil) m.
+Proof.
+ intros; unfold eval_condition;
+ apply Val.swap_cmplu_bool.
+Qed.
+
+Lemma int_ltu_zero : forall i, Int.ltu i Int.zero = false.
+Proof.
+ intro.
+ unfold Int.ltu.
+ apply zlt_false.
+ rewrite Int.unsigned_zero.
+ pose proof (Int.unsigned_range i).
+ lia.
+Qed.
+
+Lemma cmpu_bool_Clt : forall pred v0 b,
+ Val.cmpu_bool pred Clt v0 (Vint Int.zero) = Some b -> b = false.
+Proof.
+ intros until b. intro CMP.
+ destruct v0; cbn in CMP; try discriminate.
+ inv CMP.
+ apply int_ltu_zero.
+Qed.
+
+Lemma cmpu_bool_Cge : forall pred v0 b,
+ Val.cmpu_bool pred Cge v0 (Vint Int.zero) = Some b -> b = true.
+Proof.
+ intros until b. intro CMP.
+ destruct v0; cbn in CMP; try discriminate.
+ inv CMP.
+ rewrite int_ltu_zero.
+ reflexivity.
+Qed.
+
+Lemma int64_ltu_zero : forall i, Int64.ltu i Int64.zero = false.
+Proof.
+ intro.
+ unfold Int64.ltu.
+ apply zlt_false.
+ rewrite Int64.unsigned_zero.
+ pose proof (Int64.unsigned_range i).
+ lia.
+Qed.
+
+Lemma cmplu_bool_Clt : forall pred v0 b,
+ Val.cmplu_bool pred Clt v0 (Vlong Int64.zero) = Some b -> b = false.
+Proof.
+ intros until b. intro CMP.
+ destruct v0; cbn in CMP; try discriminate.
+ { inv CMP.
+ apply int64_ltu_zero.
+ }
+ repeat rewrite if_same in CMP.
+ discriminate.
+Qed.
+
+Lemma cmplu_bool_Cge : forall pred v0 b,
+ Val.cmplu_bool pred Cge v0 (Vlong Int64.zero) = Some b -> b = true.
+Proof.
+ intros until b. intro CMP.
+ destruct v0; cbn in CMP; try discriminate.
+ { inv CMP.
+ rewrite int64_ltu_zero.
+ reflexivity.
+ }
+ repeat rewrite if_same in CMP.
+ discriminate.
+Qed.
+
+Theorem eval_select:
+ forall le ty cond al vl a1 v1 a2 v2 a b,
+ select ty cond al a1 a2 = Some a ->
+ eval_exprlist ge sp e m le al vl ->
+ eval_expr ge sp e m le a1 v1 ->
+ eval_expr ge sp e m le a2 v2 ->
+ eval_condition cond vl m = Some b ->
+ exists v,
+ eval_expr ge sp e m le a v
+ /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v.
+Proof.
+ unfold select.
+ intros until b.
+ intro Hop; injection Hop; clear Hop; intro; subst a.
+ intros HeL He1 He2 HeC.
+ destruct same_expr_pure eqn:SAME.
+ {
+ destruct (eval_same_expr a1 a2 le v1 v2 SAME He1 He2) as [EQ1 EQ2].
+ subst a2. subst v2.
+ exists v1; split; trivial.
+ cbn.
+ rewrite if_same.
+ apply Val.lessdef_normalize.
+ }
+ unfold cond_to_condition0.
+ destruct (cond_to_condition0_match cond al).
+ {
+ InvEval.
+ rewrite <- HeC.
+ destruct (Int.eq_dec x Int.zero).
+ { subst x.
+ simpl.
+ change (Val.cmp_bool c v0 (Vint Int.zero))
+ with (eval_condition0 (Ccomp0 c) v0 m).
+ eapply eval_select0; eassumption.
+ }
+ simpl.
+ erewrite <- (bool_cond0_ne (Val.cmp_bool c v0 (Vint x))).
+ eapply eval_select0; repeat (try econstructor; try eassumption).
+ }
+ {
+ InvEval.
+ rewrite <- HeC.
+ destruct (Int.eq_dec x Int.zero).
+ { subst x.
+ simpl.
+ change (Val.cmpu_bool (Mem.valid_pointer m) c v0 (Vint Int.zero))
+ with (eval_condition0 (Ccompu0 c) v0 m).
+ destruct c.
+ all: try (eapply eval_select0; eassumption).
+ all: cbn.
+ all: cbn in HeC.
+ {
+ destruct (Val.cmpu_bool (Mem.valid_pointer m) Clt v0 (Vint Int.zero)) eqn:CMP; try discriminate.
+ inv HeC.
+ rewrite (cmpu_bool_Clt (Mem.valid_pointer m) v0 b CMP).
+ cbn.
+ exists v2.
+ split; auto using Val.lessdef_normalize.
+ }
+ {
+ destruct (Val.cmpu_bool (Mem.valid_pointer m) Cge v0 (Vint Int.zero)) eqn:CMP; try discriminate.
+ inv HeC.
+ rewrite (cmpu_bool_Cge (Mem.valid_pointer m) v0 b CMP).
+ cbn.
+ exists v1.
+ split; auto using Val.lessdef_normalize.
+ }
+ }
+ simpl.
+ erewrite <- (bool_cond0_ne (Val.cmpu_bool (Mem.valid_pointer m) c v0 (Vint x))).
+ eapply eval_select0; repeat (try econstructor; try eassumption).
+ }
+ {
+ InvEval.
+ rewrite <- HeC.
+ destruct (Int64.eq_dec x Int64.zero).
+ { subst x.
+ simpl.
+ change (Val.cmpl_bool c v0 (Vlong Int64.zero))
+ with (eval_condition0 (Ccompl0 c) v0 m).
+ eapply eval_select0; eassumption.
+ }
+ simpl.
+ erewrite <- (bool_cond0_ne (Val.cmpl_bool c v0 (Vlong x))).
+ eapply eval_select0; repeat (try econstructor; try eassumption).
+ }
+ {
+ InvEval.
+ rewrite <- HeC.
+ destruct (Int64.eq_dec x Int64.zero).
+ { subst x.
+ simpl.
+ change (Val.cmplu_bool (Mem.valid_pointer m) c v0 (Vlong Int64.zero))
+ with (eval_condition0 (Ccomplu0 c) v0 m).
+ destruct c.
+ all: try (eapply eval_select0; eassumption).
+ all: cbn.
+ all: cbn in HeC.
+ {
+ destruct (Val.cmplu_bool (Mem.valid_pointer m) Clt v0 (Vlong Int64.zero)) eqn:CMP; try discriminate.
+ inv HeC.
+ rewrite (cmplu_bool_Clt (Mem.valid_pointer m) v0 b CMP).
+ cbn.
+ exists v2.
+ split; auto using Val.lessdef_normalize.
+ }
+ {
+ destruct (Val.cmplu_bool (Mem.valid_pointer m) Cge v0 (Vlong Int64.zero)) eqn:CMP; try discriminate.
+ inv HeC.
+ rewrite (cmplu_bool_Cge (Mem.valid_pointer m) v0 b CMP).
+ cbn.
+ exists v1.
+ split; auto using Val.lessdef_normalize.
+ }
+ }
+ simpl.
+ erewrite <- (bool_cond0_ne (Val.cmplu_bool (Mem.valid_pointer m) c v0 (Vlong x))).
+ eapply eval_select0; repeat (try econstructor; try eassumption).
+ }
+ erewrite <- (bool_cond0_ne (Some b)).
+ eapply eval_select0; repeat (try econstructor; try eassumption).
+ rewrite <- HeC.
+ simpl.
+ reflexivity.
+Qed.
+
+(* floating-point division *)
+Theorem eval_divf_base:
+ forall le a b x y,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ exists v, eval_expr ge sp e m le (divf_base a b) v /\ Val.lessdef (Val.divf x y) v.
+Proof.
+ intros; unfold divf_base.
+ econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto.
+Qed.
+
+
+Lemma eval_divfs_base1:
+ forall le a b x y,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ exists v, eval_expr ge sp e m le (divfs_base1 b) v /\ Val.lessdef (ExtValues.invfs y) v.
+Proof.
+ intros; unfold divfs_base1.
+ econstructor; split.
+ repeat (try econstructor; try eassumption).
+ trivial.
+Qed.
+
+Lemma eval_divfs_baseX:
+ forall le a b x y,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ exists v, eval_expr ge sp e m le (divfs_baseX a b) v /\ Val.lessdef (Val.divfs x y) v.
+Proof.
+ intros; unfold divfs_base.
+ econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto.
+Qed.
+
+Theorem eval_divfs_base:
+ forall le a b x y,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ exists v, eval_expr ge sp e m le (divfs_base a b) v /\ Val.lessdef (Val.divfs x y) v.
+Proof.
+ intros; unfold divfs_base.
+ destruct (divfs_base_match _).
+ - destruct (Float32.eq_dec _ _).
+ + exists (Val.divfs x y).
+ split; trivial. repeat (try econstructor; try eassumption).
+ simpl. InvEval. reflexivity.
+ + apply eval_divfs_baseX; assumption.
+ - apply eval_divfs_baseX; assumption.
+Qed.
+
+(** Platform-specific known builtins *)
+
+Lemma eval_fma:
+ forall al a vl v le,
+ gen_fma al = Some a ->
+ eval_exprlist ge sp e m le al vl ->
+ platform_builtin_sem BI_fma vl = Some v ->
+ exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+ unfold gen_fma.
+ intros until le.
+ intro Heval.
+ destruct (gen_fma_match _) in *; try discriminate.
+ all: inversion Heval; subst a; clear Heval; intro; InvEval.
+ - subst v1.
+ TrivialExists.
+ destruct v0; simpl; trivial;
+ destruct v2; simpl; trivial;
+ destruct v3; simpl; trivial.
+ - intro Heval.
+ simpl in Heval.
+ inv Heval.
+ TrivialExists.
+ destruct v0; simpl; trivial;
+ destruct v1; simpl; trivial;
+ destruct v2; simpl; trivial.
+Qed.
+
+Lemma eval_fmaf:
+ forall al a vl v le,
+ gen_fmaf al = Some a ->
+ eval_exprlist ge sp e m le al vl ->
+ platform_builtin_sem BI_fmaf vl = Some v ->
+ exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+ unfold gen_fmaf.
+ intros until le.
+ intro Heval.
+ destruct (gen_fmaf_match _) in *; try discriminate.
+ all: inversion Heval; subst a; clear Heval; intro; InvEval.
+ - subst v1.
+ TrivialExists.
+ destruct v0; simpl; trivial;
+ destruct v2; simpl; trivial;
+ destruct v3; simpl; trivial.
+ - intro Heval.
+ simpl in Heval.
+ inv Heval.
+ TrivialExists.
+ destruct v0; simpl; trivial;
+ destruct v1; simpl; trivial;
+ destruct v2; simpl; trivial.
+Qed.
+
+Theorem eval_platform_builtin:
+ forall bf al a vl v le,
+ platform_builtin bf al = Some a ->
+ eval_exprlist ge sp e m le al vl ->
+ platform_builtin_sem bf vl = Some v ->
+ exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+ destruct bf; intros until le; intro Heval.
+ all: try (inversion Heval; subst a; clear Heval;
+ exists v; split; trivial;
+ repeat (try econstructor; try eassumption)).
+ - apply eval_fma; assumption.
+ - apply eval_fmaf; assumption.
+Qed.
+
+End CMCONSTR.
diff --git a/kvx/Stacklayout.v b/kvx/Stacklayout.v
new file mode 100644
index 00000000..05cfa1d7
--- /dev/null
+++ b/kvx/Stacklayout.v
@@ -0,0 +1,151 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Machine- and ABI-dependent layout information for activation records. *)
+
+Require Import Coqlib.
+Require Import AST Memory Separation.
+Require Import Bounds.
+Require Import Lia.
+
+Local Open Scope sep_scope.
+
+(** The general shape of activation records is as follows,
+ from bottom (lowest offsets) to top:
+- Space for outgoing arguments to function calls.
+- Back link to parent frame
+- Return address
+- Saved values of callee-save registers used by the function.
+- Local stack slots.
+- Space for the stack-allocated data declared in Cminor.
+
+The stack pointer is kept 16-aligned.
+*)
+
+Definition fe_ofs_arg := 0.
+
+Definition make_env (b: bounds) : frame_env :=
+ let w := if Archi.ptr64 then 8 else 4 in
+ let olink := align (4 * b.(bound_outgoing)) w in (* back link *)
+ let oretaddr := olink + w in (* return address *)
+ let ocs := oretaddr + w in (* callee-saves *)
+ let ol := align (size_callee_save_area b ocs) 8 in (* locals *)
+ let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *)
+ let sz := align (ostkdata + b.(bound_stack_data)) 16 in
+ {| fe_size := sz;
+ fe_ofs_link := olink;
+ fe_ofs_retaddr := oretaddr;
+ fe_ofs_local := ol;
+ fe_ofs_callee_save := ocs;
+ fe_stack_data := ostkdata;
+ fe_used_callee_save := b.(used_callee_save) |}.
+
+Lemma frame_env_separated:
+ forall b sp m P,
+ let fe := make_env b in
+ m |= range sp 0 (fe_stack_data fe) ** range sp (fe_stack_data fe + bound_stack_data b) (fe_size fe) ** P ->
+ m |= range sp (fe_ofs_local fe) (fe_ofs_local fe + 4 * bound_local b)
+ ** range sp fe_ofs_arg (fe_ofs_arg + 4 * bound_outgoing b)
+ ** range sp (fe_ofs_link fe) (fe_ofs_link fe + size_chunk Mptr)
+ ** range sp (fe_ofs_retaddr fe) (fe_ofs_retaddr fe + size_chunk Mptr)
+ ** range sp (fe_ofs_callee_save fe) (size_callee_save_area b (fe_ofs_callee_save fe))
+ ** P.
+Proof.
+Local Opaque Z.add Z.mul sepconj range.
+ intros; cbn.
+ set (w := if Archi.ptr64 then 8 else 4).
+ set (olink := align (4 * b.(bound_outgoing)) w).
+ set (oretaddr := olink + w).
+ set (ocs := oretaddr + w).
+ set (ol := align (size_callee_save_area b ocs) 8).
+ set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
+ replace (size_chunk Mptr) with w by (rewrite size_chunk_Mptr; auto).
+ assert (0 < w) by (unfold w; destruct Archi.ptr64; lia).
+ generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
+ assert (0 <= 4 * b.(bound_outgoing)) by lia.
+ assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia).
+ assert (olink + w <= oretaddr) by (unfold oretaddr; lia).
+ assert (oretaddr + w <= ocs) by (unfold ocs; lia).
+ assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
+ assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia).
+ assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia).
+(* Reorder as:
+ outgoing
+ back link
+ retaddr
+ callee-save
+ local *)
+ rewrite sep_swap12.
+ rewrite sep_swap23.
+ rewrite sep_swap34.
+ rewrite sep_swap45.
+(* Apply range_split and range_split2 repeatedly *)
+ unfold fe_ofs_arg.
+ apply range_split_2. fold olink; lia. lia.
+ apply range_split. lia.
+ apply range_split. lia.
+ apply range_split_2. fold ol. lia. lia.
+ apply range_drop_right with ostkdata. lia.
+ eapply sep_drop2. eexact H.
+Qed.
+
+Lemma frame_env_range:
+ forall b,
+ let fe := make_env b in
+ 0 <= fe_stack_data fe /\ fe_stack_data fe + bound_stack_data b <= fe_size fe.
+Proof.
+ intros; cbn.
+ set (w := if Archi.ptr64 then 8 else 4).
+ set (olink := align (4 * b.(bound_outgoing)) w).
+ set (oretaddr := olink + w).
+ set (ocs := oretaddr + w).
+ set (ol := align (size_callee_save_area b ocs) 8).
+ set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
+ assert (0 < w) by (unfold w; destruct Archi.ptr64; lia).
+ generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
+ assert (0 <= 4 * b.(bound_outgoing)) by lia.
+ assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia).
+ assert (olink + w <= oretaddr) by (unfold oretaddr; lia).
+ assert (oretaddr + w <= ocs) by (unfold ocs; lia).
+ assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
+ assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia).
+ assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia).
+ split. lia. apply align_le. lia.
+Qed.
+
+Lemma frame_env_aligned:
+ forall b,
+ let fe := make_env b in
+ (8 | fe_ofs_arg)
+ /\ (8 | fe_ofs_local fe)
+ /\ (8 | fe_stack_data fe)
+ /\ (align_chunk Mptr | fe_ofs_link fe)
+ /\ (align_chunk Mptr | fe_ofs_retaddr fe).
+Proof.
+ intros; cbn.
+ set (w := if Archi.ptr64 then 8 else 4).
+ set (olink := align (4 * b.(bound_outgoing)) w).
+ set (oretaddr := olink + w).
+ set (ocs := oretaddr + w).
+ set (ol := align (size_callee_save_area b ocs) 8).
+ set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
+ assert (0 < w) by (unfold w; destruct Archi.ptr64; lia).
+ replace (align_chunk Mptr) with w by (rewrite align_chunk_Mptr; auto).
+ split. apply Z.divide_0_r.
+ split. apply align_divides; lia.
+ split. apply align_divides; lia.
+ split. apply align_divides; lia.
+ apply Z.divide_add_r. apply align_divides; lia. apply Z.divide_refl.
+Qed.
diff --git a/kvx/TargetPrinter.ml b/kvx/TargetPrinter.ml
new file mode 100644
index 00000000..9e2e3776
--- /dev/null
+++ b/kvx/TargetPrinter.ml
@@ -0,0 +1,892 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(* Printing RISC-V assembly code in asm syntax *)
+
+open Printf
+open Camlcoq
+open Sections
+open AST
+open Asm
+open PrintAsmaux
+open Fileinfo
+
+(* Module containing the printing functions *)
+
+module Target (*: TARGET*) =
+ struct
+
+(* Basic printing functions *)
+
+ let comment = "#"
+
+ type idiv_function_kind =
+ | Idiv_system
+ | Idiv_stsud
+ | Idiv_fp;;
+
+ let idiv_function_kind = function
+ "stsud" -> Idiv_stsud
+ | "system" -> Idiv_system
+ | "fp" -> Idiv_fp
+ | _ -> failwith "unknown integer division kind";;
+
+ let idiv_function_kind_32bit () = idiv_function_kind !Clflags.option_div_i32;;
+ let idiv_function_kind_64bit () = idiv_function_kind !Clflags.option_div_i64;;
+
+ let subst_symbol = function
+ "__compcert_i64_udiv" ->
+ (match idiv_function_kind_64bit () with
+ | Idiv_system | Idiv_fp -> "__udivdi3"
+ | Idiv_stsud -> "__compcert_i64_udiv_stsud")
+ | "__compcert_i64_sdiv" ->
+ (match idiv_function_kind_64bit() with
+ | Idiv_system | Idiv_fp -> "__divdi3"
+ | Idiv_stsud -> "__compcert_i64_sdiv_stsud")
+ | "__compcert_i64_umod" ->
+ (match idiv_function_kind_64bit() with
+ | Idiv_system | Idiv_fp -> "__umoddi3"
+ | Idiv_stsud -> "__compcert_i64_umod_stsud")
+ | "__compcert_i64_smod" ->
+ (match idiv_function_kind_64bit() with
+ | Idiv_system | Idiv_fp -> "__moddi3"
+ | Idiv_stsud -> "__compcert_i64_smod_stsud")
+ | "__compcert_i32_sdiv" as s ->
+ (match idiv_function_kind_32bit() with
+ | Idiv_system -> s
+ | Idiv_fp -> "__compcert_i32_sdiv_fp"
+ | Idiv_stsud -> "__compcert_i32_sdiv_stsud")
+ | "__compcert_i32_udiv" as s ->
+ (match idiv_function_kind_32bit() with
+ | Idiv_system -> s
+ | Idiv_fp -> "__compcert_i32_udiv_fp"
+ | Idiv_stsud -> "__compcert_i32_udiv_stsud")
+ | "__compcert_i32_smod" as s ->
+ (match idiv_function_kind_32bit() with
+ | Idiv_system -> s
+ | Idiv_fp -> "__compcert_i32_smod_fp"
+ | Idiv_stsud -> "__compcert_i32_smod_stsud")
+ | "__compcert_i32_umod" as s ->
+ (match idiv_function_kind_32bit() with
+ | Idiv_system -> s
+ | Idiv_fp -> "__compcert_i32_umod_fp"
+ | Idiv_stsud -> "__compcert_i32_umod_stsud")
+ | "__compcert_f64_div" -> "__divdf3"
+ | "__compcert_f32_div" -> "__divsf3"
+ | x -> x;;
+
+ let symbol oc symb =
+ fprintf oc "%s" (subst_symbol (extern_atom symb))
+
+ let symbol_offset oc (symb, ofs) =
+ symbol oc symb;
+ let ofs = camlint64_of_ptrofs ofs in
+ if ofs <> 0L then fprintf oc " + %Ld" ofs
+
+ let label = elf_label
+
+ let print_label oc lbl = label oc (transl_label lbl)
+
+ let int_reg_name = let open Asmvliw in function
+
+ | GPR0 -> "$r0" | GPR1 -> "$r1" | GPR2 -> "$r2" | GPR3 -> "$r3"
+ | GPR4 -> "$r4" | GPR5 -> "$r5" | GPR6 -> "$r6" | GPR7 -> "$r7"
+ | GPR8 -> "$r8" | GPR9 -> "$r9" | GPR10 -> "$r10" | GPR11 -> "$r11"
+ | GPR12 -> "$r12" | GPR13 -> "$r13" | GPR14 -> "$r14" | GPR15 -> "$r15"
+ | GPR16 -> "$r16" | GPR17 -> "$r17" | GPR18 -> "$r18" | GPR19 -> "$r19"
+ | GPR20 -> "$r20" | GPR21 -> "$r21" | GPR22 -> "$r22" | GPR23 -> "$r23"
+ | GPR24 -> "$r24" | GPR25 -> "$r25" | GPR26 -> "$r26" | GPR27 -> "$r27"
+ | GPR28 -> "$r28" | GPR29 -> "$r29" | GPR30 -> "$r30" | GPR31 -> "$r31"
+ | GPR32 -> "$r32" | GPR33 -> "$r33" | GPR34 -> "$r34" | GPR35 -> "$r35"
+ | GPR36 -> "$r36" | GPR37 -> "$r37" | GPR38 -> "$r38" | GPR39 -> "$r39"
+ | GPR40 -> "$r40" | GPR41 -> "$r41" | GPR42 -> "$r42" | GPR43 -> "$r43"
+ | GPR44 -> "$r44" | GPR45 -> "$r45" | GPR46 -> "$r46" | GPR47 -> "$r47"
+ | GPR48 -> "$r48" | GPR49 -> "$r49" | GPR50 -> "$r50" | GPR51 -> "$r51"
+ | GPR52 -> "$r52" | GPR53 -> "$r53" | GPR54 -> "$r54" | GPR55 -> "$r55"
+ | GPR56 -> "$r56" | GPR57 -> "$r57" | GPR58 -> "$r58" | GPR59 -> "$r59"
+ | GPR60 -> "$r60" | GPR61 -> "$r61" | GPR62 -> "$r62" | GPR63 -> "$r63"
+
+ let ireg oc r = output_string oc (int_reg_name r)
+
+ let int_gpreg_q_name =
+ let open Asmvliw in
+ function
+ | R0R1 -> "$r0r1"
+ | R2R3 -> "$r2r3"
+ | R4R5 -> "$r4r5"
+ | R6R7 -> "$r6r7"
+ | R8R9 -> "$r8r9"
+ | R10R11 -> "$r10r11"
+ | R12R13 -> "$r12r13"
+ | R14R15 -> "$r14r15"
+ | R16R17 -> "$r16r17"
+ | R18R19 -> "$r18r19"
+ | R20R21 -> "$r20r21"
+ | R22R23 -> "$r22r23"
+ | R24R25 -> "$r24r25"
+ | R26R27 -> "$r26r27"
+ | R28R29 -> "$r28r29"
+ | R30R31 -> "$r30r31"
+ | R32R33 -> "$r32r33"
+ | R34R35 -> "$r34r35"
+ | R36R37 -> "$r36r37"
+ | R38R39 -> "$r38r39"
+ | R40R41 -> "$r40r41"
+ | R42R43 -> "$r42r43"
+ | R44R45 -> "$r44r45"
+ | R46R47 -> "$r46r47"
+ | R48R49 -> "$r48r49"
+ | R50R51 -> "$r50r51"
+ | R52R53 -> "$r52r53"
+ | R54R55 -> "$r54r55"
+ | R56R57 -> "$r56r57"
+ | R58R59 -> "$r58r59"
+ | R60R61 -> "$r60r61"
+ | R62R63 -> "$r62r63"
+
+ let int_gpreg_o_name =
+ let open Asmvliw in
+ function
+ | R0R1R2R3 -> "$r0r1r2r3"
+ | R4R5R6R7 -> "$r4r5r6r7"
+ | R8R9R10R11 -> "$r8r9r10r11"
+ | R12R13R14R15 -> "$r12r13r14r15"
+ | R16R17R18R19 -> "$r16r17r18r19"
+ | R20R21R22R23 -> "$r20r21r22r23"
+ | R24R25R26R27 -> "$r24r25r26r27"
+ | R28R29R30R31 -> "$r28r29r30r31"
+ | R32R33R34R35 -> "$r32r33r34r35"
+ | R36R37R38R39 -> "$r36r37r38r39"
+ | R40R41R42R43 -> "$r40r41r42r43"
+ | R44R45R46R47 -> "$r44r45r46r47"
+ | R48R49R50R51 -> "$r48r49r50r51"
+ | R52R53R54R55 -> "$r52r53r54r55"
+ | R56R57R58R59 -> "$r56r57r58r59"
+ | R60R61R62R63 -> "$r60r61r62r63";;
+
+ let gpreg_q oc r = output_string oc (int_gpreg_q_name r)
+ let gpreg_o oc r = output_string oc (int_gpreg_o_name r)
+
+ let preg oc = let open Asmvliw in function
+ | IR r -> ireg oc r
+ | RA -> output_string oc "$ra"
+ | _ -> assert false
+
+ let preg_asm oc ty = preg oc
+
+ let preg_annot = let open Asmvliw in function
+ | IR r -> int_reg_name r
+ | RA -> "$ra"
+ | _ -> assert false
+
+ let scale_of_shift1_4 = let open ExtValues in function
+ | SHIFT1 -> 2
+ | SHIFT2 -> 4
+ | SHIFT3 -> 8
+ | SHIFT4 -> 16;;
+
+(* Names of sections *)
+
+ let name_of_section = function
+ | Section_text -> ".text"
+ | Section_data(Init, true) ->
+ ".section .tdata,\"awT\",@progbits"
+ | Section_data(Uninit, true) ->
+ ".section .tbss,\"awT\",@nobits"
+ | Section_data(Init_reloc, true) ->
+ failwith "Sylvain does not how to fix this"
+ | Section_data(i, false) | Section_small_data(i) ->
+ variable_section ~sec:".data" ~bss:".bss" i
+ | Section_const i | Section_small_const i ->
+ variable_section ~sec:".section .rodata" i
+ | Section_string -> ".section .rodata"
+ | Section_literal -> ".section .rodata"
+ | Section_jumptable -> ".section .rodata"
+ | Section_debug_info _ -> ".section .debug_info,\"\",%progbits"
+ | Section_debug_loc -> ".section .debug_loc,\"\",%progbits"
+ | Section_debug_abbrev -> ".section .debug_abbrev,\"\",%progbits"
+ | Section_debug_line _ -> ".section .debug_line,\"\",%progbits"
+ | Section_debug_ranges -> ".section .debug_ranges,\"\",%progbits"
+ | Section_debug_str -> ".section .debug_str,\"MS\",%progbits,1"
+ | Section_user(s, wr, ex) ->
+ sprintf ".section \"%s\",\"a%s%s\",%%progbits"
+ s (if wr then "w" else "") (if ex then "x" else "")
+ | Section_ais_annotation -> sprintf ".section \"__compcert_ais_annotations\",\"\",@note"
+
+ let section oc sec =
+ fprintf oc " %s\n" (name_of_section sec)
+
+(* Associate labels to floating-point constants and to symbols. *)
+
+ let print_tbl oc (lbl, tbl) =
+ fprintf oc " .balign 8\n";
+ fprintf oc "%a:\n" label lbl;
+ List.iter
+ (fun l -> fprintf oc " .8byte %a\n"
+ print_label l)
+ tbl
+
+ let emit_constants oc lit =
+ if exists_constants () then begin
+ section oc lit;
+ if Hashtbl.length literal64_labels > 0 then
+ begin
+ fprintf oc " .align 3\n";
+ Hashtbl.iter
+ (fun bf lbl -> fprintf oc "%a: .quad 0x%Lx\n" label lbl bf)
+ literal64_labels
+ end;
+ if Hashtbl.length literal32_labels > 0 then
+ begin
+ fprintf oc " .align 2\n";
+ Hashtbl.iter
+ (fun bf lbl ->
+ fprintf oc "%a: .long 0x%lx\n" label lbl bf)
+ literal32_labels
+ end;
+ reset_literals ()
+ end
+
+(* Generate code to load the address of id + ofs in register r *)
+
+ let loadsymbol oc r id ofs =
+ if Archi.pic_code () then begin
+ assert (ofs = Integers.Ptrofs.zero);
+ if C2C.atom_is_thread_local id then begin
+ (* fprintf oc " addd %a = $r13, @tprel(%s)\n" ireg r (extern_atom id) *)
+ fprintf oc " addd %a = $r13, @tlsle(%s)\n" ireg r (extern_atom id)
+ end else begin
+ fprintf oc " make %a = %s\n" ireg r (extern_atom id)
+ end
+ end else
+ begin
+ if C2C.atom_is_thread_local id then begin
+ (* fprintf oc " addd %a = $r13, @tprel(%a)\n" ireg r symbol_offset (id, ofs) *)
+ fprintf oc " addd %a = $r13, @tlsle(%a)\n" ireg r symbol_offset (id, ofs)
+ end else begin
+ fprintf oc " make %a = %a\n" ireg r symbol_offset (id, ofs)
+ end
+ end
+
+(* Emit .file / .loc debugging directives *)
+
+ let print_file_line oc file line =
+ print_file_line oc comment file line
+
+(*
+ let print_location oc loc =
+ if loc <> Cutil.no_loc then print_file_line oc (fst loc) (snd loc)
+*)
+
+(* Add "w" suffix to 32-bit instructions if we are in 64-bit mode *)
+
+ (*let w oc =
+ if Archi.ptr64 then output_string oc "w"
+ *)
+
+ (* Profiling *)
+
+
+ let kvx_profiling_stub oc nr_items
+ profiling_id_table_name
+ profiling_counter_table_name =
+ fprintf oc " make $r0 = %d\n" nr_items;
+ fprintf oc " make $r1 = %s\n" profiling_id_table_name;
+ fprintf oc " make $r2 = %s\n" profiling_counter_table_name;
+ fprintf oc " goto %s\n" profiling_write_table_helper;
+ fprintf oc " ;;\n";;
+
+ (* Offset part of a load or store *)
+
+ let offset oc n = ptrofs oc n
+
+ let addressing oc = function
+ | AOff ofs -> offset oc ofs
+ | AReg ro | ARegXS ro -> ireg oc ro
+
+ let xscale oc = function
+ | ARegXS _ -> fprintf oc ".xs"
+ | _ -> ()
+
+ let lsvariant oc = function
+ | TRAP -> ()
+ | NOTRAP -> output_string oc ".s"
+
+ let icond_name = let open Asmvliw in function
+ | ITne | ITneu -> "ne"
+ | ITeq | ITequ -> "eq"
+ | ITlt -> "lt"
+ | ITge -> "ge"
+ | ITle -> "le"
+ | ITgt -> "gt"
+ | ITltu -> "ltu"
+ | ITgeu -> "geu"
+ | ITleu -> "leu"
+ | ITgtu -> "gtu"
+
+ let icond oc c = fprintf oc "%s" (icond_name c)
+
+ let fcond_name = let open Asmvliw in function
+ | FTone -> "one"
+ | FTueq -> "ueq"
+ | FToeq -> "oeq"
+ | FTune -> "une"
+ | FTolt -> "olt"
+ | FTuge -> "uge"
+ | FToge -> "oge"
+ | FTult -> "ult"
+
+ let fcond oc c = fprintf oc "%s" (fcond_name c)
+
+ let bcond_name = let open Asmvliw in function
+ | BTwnez -> "wnez"
+ | BTweqz -> "weqz"
+ | BTwltz -> "wltz"
+ | BTwgez -> "wgez"
+ | BTwlez -> "wlez"
+ | BTwgtz -> "wgtz"
+ | BTdnez -> "dnez"
+ | BTdeqz -> "deqz"
+ | BTdltz -> "dltz"
+ | BTdgez -> "dgez"
+ | BTdlez -> "dlez"
+ | BTdgtz -> "dgtz"
+
+ let bcond oc c = fprintf oc "%s" (bcond_name c)
+
+(* Printing of instructions *)
+ exception ShouldBeExpanded
+
+ let print_instruction oc = function
+ (* Pseudo-instructions expanded in Asmexpand *)
+ | Pallocframe(sz, ofs) -> assert false
+ | Pfreeframe(sz, ofs) -> assert false
+
+ (* Pseudo-instructions that remain *)
+ | Plabel lbl ->
+ fprintf oc "%a:\n" print_label lbl
+ | Ploadsymbol(rd, id, ofs) ->
+ loadsymbol oc rd id ofs
+ | Pbuiltin(ef, args, res) ->
+ begin match ef with
+ | EF_annot(kind,txt, targs) ->
+ begin match (P.to_int kind) with
+ | 1 -> let annot = annot_text preg_annot "x2" (camlstring_of_coqstring txt) args in
+ fprintf oc "%s annotation: %S\n" comment annot
+ (*| 2 -> let lbl = new_label () in
+ fprintf oc "%a: " label lbl;
+ add_ais_annot lbl preg_annot "x2" (camlstring_of_coqstring txt) args
+ *)| _ -> assert false
+ end
+ | EF_debug(kind, txt, targs) ->
+ print_debug_info comment print_file_line preg_annot "sp" oc
+ (P.to_int kind) (extern_atom txt) args
+ | EF_inline_asm(txt, sg, clob) ->
+ fprintf oc "%s begin inline assembly\n\t" comment;
+ print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
+ fprintf oc "%s end inline assembly\n" comment
+ | EF_profiling(id, coq_kind) ->
+ let kind = Z.to_int coq_kind in
+ assert (kind >= 0);
+ assert (kind <= 1);
+ fprintf oc "%s profiling %a %d\n" comment
+ Profilingaux.pp_id id kind;
+ fprintf oc " make $r63 = %s\n" profiling_counter_table_name;
+ fprintf oc " make $r62 = 1\n";
+ fprintf oc " ;;\n";
+ fprintf oc " afaddd %d[$r63] = $r62\n"
+ (profiling_offset id kind);
+ fprintf oc " ;;\n"
+ | _ ->
+ assert false
+ end
+ | Pnop -> (* FIXME fprintf oc " nop\n" *) ()
+ | Psemi -> fprintf oc ";;\n"
+
+ | Pclzll (rd, rs) -> fprintf oc " clzd %a = %a\n" ireg rd ireg rs
+ | Pclzw (rd, rs) -> fprintf oc " clzw %a = %a\n" ireg rd ireg rs
+ | Pctzll (rd, rs) -> fprintf oc " ctzd %a = %a\n" ireg rd ireg rs
+ | Pctzw (rd, rs) -> fprintf oc " ctzw %a = %a\n" ireg rd ireg rs
+ | Pstsud (rd, rs1, rs2) -> fprintf oc " stsud %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+
+
+ (* Control flow instructions *)
+ | Pget (rd, rs) ->
+ fprintf oc " get %a = %a\n" ireg rd preg rs
+ | Pset (rd, rs) ->
+ fprintf oc " set %a = %a\n" preg rd ireg rs
+ | Pret ->
+ fprintf oc " ret \n"
+ | Pcall(s) ->
+ fprintf oc " call %a\n" symbol s
+ | Picall(rs) ->
+ fprintf oc " icall %a\n" ireg rs
+ | Pgoto(s) ->
+ fprintf oc " goto %a\n" symbol s
+ | Pigoto(rs) ->
+ fprintf oc " igoto %a\n" ireg rs
+ | Pj_l(s) ->
+ fprintf oc " goto %a\n" print_label s
+ | Pcb (bt, r, lbl) | Pcbu (bt, r, lbl) ->
+ fprintf oc " cb.%a %a? %a\n" bcond bt ireg r print_label lbl
+
+ (* For builtins *)
+ | Ploopdo (r, lbl) ->
+ fprintf oc " loopdo %a, %a\n" ireg r print_label lbl
+ | Pgetn(n, dst) ->
+ fprintf oc " get %a = $s%ld\n" ireg dst (camlint_of_coqint n)
+ | Psetn(n, dst) ->
+ fprintf oc " set $s%ld = %a\n" (camlint_of_coqint n) ireg dst
+ | Pwfxl(n, dst) ->
+ fprintf oc " wfxl $s%ld = %a\n" (camlint_of_coqint n) ireg dst
+ | Pwfxm(n, dst) ->
+ fprintf oc " wfxm $s%ld = %a\n" (camlint_of_coqint n) ireg dst
+ | Pldu(dst, addr) ->
+ fprintf oc " ld.u %a = 0[%a]\n" ireg dst ireg addr
+ | Plbzu(dst, addr) ->
+ fprintf oc " lbz.u %a = 0[%a]\n" ireg dst ireg addr
+ | Plhzu(dst, addr) ->
+ fprintf oc " lhz.u %a = 0[%a]\n" ireg dst ireg addr
+ | Plwzu(dst, addr) ->
+ fprintf oc " lwz.u %a = 0[%a]\n" ireg dst ireg addr
+ | Pawait ->
+ fprintf oc " await\n"
+ | Psleep ->
+ fprintf oc " sleep\n"
+ | Pstop ->
+ fprintf oc " stop\n"
+ | Pbarrier ->
+ fprintf oc " barrier\n"
+ | Pfence ->
+ fprintf oc " fence\n"
+ | Pdinval ->
+ fprintf oc " dinval\n"
+ | Pdinvall addr ->
+ fprintf oc " dinvall 0[%a]\n" ireg addr
+ | Pdtouchl addr ->
+ fprintf oc " dtouchl 0[%a]\n" ireg addr
+ | Piinval ->
+ fprintf oc " iinval\n"
+ | Piinvals addr ->
+ fprintf oc " iinvals 0[%a]\n" ireg addr
+ | Pitouchl addr ->
+ fprintf oc " itouchl 0[%a]\n" ireg addr
+ | Pdzerol addr ->
+ fprintf oc " dzerol 0[%a]\n" ireg addr
+(* | Pafaddd(addr, incr_res) ->
+ fprintfoc " afaddd 0[%a] = %a\n" ireg addr ireg incr_res
+ | Pafaddw(addr, incr_res) ->
+ fprintfoc " afaddw 0[%a] = %a\n" ireg addr ireg incr_res *) (* see #157 *)
+ | Palclrd(res, addr) ->
+ fprintf oc " alclrd %a = 0[%a]\n" ireg res ireg addr
+ | Palclrw(res, addr) ->
+ fprintf oc " alclrw %a = 0[%a]\n" ireg res ireg addr
+ | Pjumptable (idx_reg, tbl) ->
+ let lbl = new_label() in
+ (* jumptables := (lbl, tbl) :: !jumptables; *)
+ let base_reg = if idx_reg=Asmvliw.GPR63 then Asmvliw.GPR62 else Asmvliw.GPR63 in
+ fprintf oc "%s jumptable [ " comment;
+ List.iter (fun l -> fprintf oc "%a " print_label l) tbl;
+ fprintf oc "]\n";
+ fprintf oc " make %a = %a\n ;;\n" ireg base_reg label lbl;
+ fprintf oc " ld.xs %a = %a[%a]\n ;;\n" ireg base_reg ireg idx_reg ireg base_reg;
+ fprintf oc " igoto %a\n ;;\n" ireg base_reg;
+ section oc Section_jumptable;
+ print_tbl oc (lbl, tbl);
+ section oc Section_text
+
+ (* Load/Store instructions *)
+ | Plb(trap, rd, ra, adr) ->
+ fprintf oc " lbs%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+ | Plbu(trap, rd, ra, adr) ->
+ fprintf oc " lbz%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+ | Plh(trap, rd, ra, adr) ->
+ fprintf oc " lhs%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+ | Plhu(trap, rd, ra, adr) ->
+ fprintf oc " lhz%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+ | Plw(trap, rd, ra, adr) | Plw_a(trap, rd, ra, adr) | Pfls(trap, rd, ra, adr) ->
+ fprintf oc " lws%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+ | Pld(trap, rd, ra, adr) | Pfld(trap, rd, ra, adr) | Pld_a(trap, rd, ra, adr) -> assert Archi.ptr64;
+ fprintf oc " ld%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra
+ | Plq(rd, ra, adr) ->
+ fprintf oc " lq%a %a = %a[%a]\n" xscale adr gpreg_q rd addressing adr ireg ra
+ | Plo(rd, ra, adr) ->
+ fprintf oc " lo%a %a = %a[%a]\n" xscale adr gpreg_o rd addressing adr ireg ra
+
+ | Psb(rd, ra, adr) ->
+ fprintf oc " sb%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd
+ | Psh(rd, ra, adr) ->
+ fprintf oc " sh%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd
+ | Psw(rd, ra, adr) | Psw_a(rd, ra, adr) | Pfss(rd, ra, adr) ->
+ fprintf oc " sw%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd
+ | Psd(rd, ra, adr) | Psd_a(rd, ra, adr) | Pfsd(rd, ra, adr) -> assert Archi.ptr64;
+ fprintf oc " sd%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd
+ | Psq(rd, ra, adr) ->
+ fprintf oc " sq%a %a[%a] = %a\n" xscale adr addressing adr ireg ra gpreg_q rd
+ | Pso(rd, ra, adr) ->
+ fprintf oc " so%a %a[%a] = %a\n" xscale adr addressing adr ireg ra gpreg_o rd
+
+ (* Arith R instructions *)
+
+ (* Arith RR instructions *)
+ | Pmv(rd, rs) ->
+ fprintf oc " addd %a = %a, 0\n" ireg rd ireg rs
+ | Pcvtl2w(rd, rs) -> assert false
+ | Pnegl(rd, rs) -> assert Archi.ptr64;
+ fprintf oc " negd %a = %a\n" ireg rd ireg rs
+ | Pnegw(rd, rs) ->
+ fprintf oc " negw %a = %a\n" ireg rd ireg rs
+ | Psxwd(rd, rs) ->
+ fprintf oc " sxwd %a = %a\n" ireg rd ireg rs
+ | Pzxwd(rd, rs) ->
+ fprintf oc " zxwd %a = %a\n" ireg rd ireg rs
+ | Pextfz(rd, rs, stop, start) | Pextfzl(rd, rs, stop, start) ->
+ fprintf oc " extfz %a = %a, %ld, %ld\n" ireg rd ireg rs (camlint_of_coqint stop) (camlint_of_coqint start)
+ | Pextfs(rd, rs, stop, start) | Pextfsl(rd, rs, stop, start) ->
+ fprintf oc " extfs %a = %a, %ld, %ld\n" ireg rd ireg rs (camlint_of_coqint stop) (camlint_of_coqint start)
+ | Pinsf(rd, rs, stop, start) | Pinsfl(rd, rs, stop, start) ->
+ fprintf oc " insf %a = %a, %ld, %ld\n" ireg rd ireg rs (camlint_of_coqint stop) (camlint_of_coqint start)
+ | Pfabsd(rd, rs) ->
+ fprintf oc " fabsd %a = %a\n" ireg rd ireg rs
+ | Pfabsw(rd, rs) ->
+ fprintf oc " fabsw %a = %a\n" ireg rd ireg rs
+ | Pfnegd(rd, rs) ->
+ fprintf oc " fnegd %a = %a\n" ireg rd ireg rs
+ | Pfnegw(rd, rs) ->
+ fprintf oc " fnegw %a = %a\n" ireg rd ireg rs
+ | Pfnarrowdw(rd, rs) ->
+ fprintf oc " fnarrowdw %a = %a\n" ireg rd ireg rs
+ | Pfwidenlwd(rd, rs) ->
+ fprintf oc " fwidenlwd %a = %a\n" ireg rd ireg rs
+ | Pfloatuwrnsz(rd, rs) ->
+ fprintf oc " floatuw.rn.s %a = %a, 0\n" ireg rd ireg rs
+ | Pfloatwrnsz(rd, rs) ->
+ fprintf oc " floatw.rn.s %a = %a, 0\n" ireg rd ireg rs
+ | Pfloatudrnsz(rd, rs) ->
+ fprintf oc " floatud.rn.s %a = %a, 0\n" ireg rd ireg rs
+ | Pfloatdrnsz(rd, rs) ->
+ fprintf oc " floatd.rn.s %a = %a, 0\n" ireg rd ireg rs
+ | Pfixedwrzz(rd, rs) ->
+ fprintf oc " fixedw.rz %a = %a, 0\n" ireg rd ireg rs
+ | Pfixeduwrzz(rd, rs) ->
+ fprintf oc " fixeduw.rz %a = %a, 0\n" ireg rd ireg rs
+ | Pfixeddrzz(rd, rs) | Pfixeddrzz_i32(rd, rs) ->
+ fprintf oc " fixedd.rz %a = %a, 0\n" ireg rd ireg rs
+ | Pfixedudrzz(rd, rs) | Pfixedudrzz_i32(rd, rs) ->
+ fprintf oc " fixedud.rz %a = %a, 0\n" ireg rd ireg rs
+
+ (* Arith RI32 instructions *)
+ | Pmake (rd, imm) ->
+ fprintf oc " make %a, %a\n" ireg rd coqint imm
+
+ (* Arith RI64 instructions *)
+ | Pmakel (rd, imm) ->
+ fprintf oc " make %a, %a\n" ireg rd coqint64 imm
+
+ (* Arith RF32 instructions *)
+ | Pmakefs (rd, f) ->
+ let d = Floats.Float32.to_bits f in
+ fprintf oc " make %a, %a %s %.18g\n"
+ ireg rd coqint d comment (camlfloat_of_coqfloat32 f)
+
+ (* Arith RF64 instructions *)
+ | Pmakef (rd, f) ->
+ let d = Floats.Float.to_bits f in
+ fprintf oc " make %a, %a %s %.18g\n"
+ ireg rd coqint64 d comment (camlfloat_of_coqfloat f)
+
+ (* Arith RRR instructions *)
+ | Pcompw (it, rd, rs1, rs2) ->
+ fprintf oc " compw.%a %a = %a, %a\n" icond it ireg rd ireg rs1 ireg rs2
+ | Pcompl (it, rd, rs1, rs2) ->
+ fprintf oc " compd.%a %a = %a, %a\n" icond it ireg rd ireg rs1 ireg rs2
+
+ | Pfcompw (ft, rd, rs1, rs2) ->
+ fprintf oc " fcompw.%a %a = %a, %a\n" fcond ft ireg rd ireg rs1 ireg rs2
+ | Pfcompl (ft, rd, rs1, rs2) ->
+ fprintf oc " fcompd.%a %a = %a, %a\n" fcond ft ireg rd ireg rs1 ireg rs2
+
+ | Paddw (rd, rs1, rs2) ->
+ fprintf oc " addw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Paddxw (s14, rd, rs1, rs2) ->
+ fprintf oc " addx%dw %a = %a, %a\n" (scale_of_shift1_4 s14)
+ ireg rd ireg rs1 ireg rs2
+ | Psubw (rd, rs1, rs2) ->
+ fprintf oc " sbfw %a = %a, %a\n" ireg rd ireg rs2 ireg rs1
+ | Prevsubxw (s14, rd, rs1, rs2) ->
+ fprintf oc " sbfx%dw %a = %a, %a\n" (scale_of_shift1_4 s14)
+ ireg rd ireg rs1 ireg rs2
+ | Pmulw (rd, rs1, rs2) ->
+ fprintf oc " mulw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pandw (rd, rs1, rs2) ->
+ fprintf oc " andw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pnandw (rd, rs1, rs2) ->
+ fprintf oc " nandw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Porw (rd, rs1, rs2) ->
+ fprintf oc " orw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pnorw (rd, rs1, rs2) ->
+ fprintf oc " norw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pxorw (rd, rs1, rs2) ->
+ fprintf oc " xorw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pnxorw (rd, rs1, rs2) ->
+ fprintf oc " nxorw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pandnw (rd, rs1, rs2) ->
+ fprintf oc " andnw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pornw (rd, rs1, rs2) ->
+ fprintf oc " ornw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Psraw (rd, rs1, rs2) ->
+ fprintf oc " sraw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Psrxw (rd, rs1, rs2) ->
+ fprintf oc " srsw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Psrlw (rd, rs1, rs2) ->
+ fprintf oc " srlw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Psllw (rd, rs1, rs2) ->
+ fprintf oc " sllw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pmaddw (rd, rs1, rs2) ->
+ fprintf oc " maddw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pmsubw (rd, rs1, rs2) ->
+ fprintf oc " msbfw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmaddfw (rd, rs1, rs2) ->
+ fprintf oc " ffmaw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmsubfw (rd, rs1, rs2) ->
+ fprintf oc " ffmsw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+
+ | Paddl (rd, rs1, rs2) ->
+ fprintf oc " addd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Paddxl (s14, rd, rs1, rs2) ->
+ fprintf oc " addx%dd %a = %a, %a\n" (scale_of_shift1_4 s14)
+ ireg rd ireg rs1 ireg rs2
+ | Psubl (rd, rs1, rs2) ->
+ fprintf oc " sbfd %a = %a, %a\n" ireg rd ireg rs2 ireg rs1
+ | Prevsubxl (s14, rd, rs1, rs2) ->
+ fprintf oc " sbfx%dd %a = %a, %a\n" (scale_of_shift1_4 s14)
+ ireg rd ireg rs1 ireg rs2
+ | Pandl (rd, rs1, rs2) ->
+ fprintf oc " andd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pnandl (rd, rs1, rs2) ->
+ fprintf oc " nandd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Porl (rd, rs1, rs2) ->
+ fprintf oc " ord %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pnorl (rd, rs1, rs2) ->
+ fprintf oc " nord %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pxorl (rd, rs1, rs2) ->
+ fprintf oc " xord %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pnxorl (rd, rs1, rs2) ->
+ fprintf oc " nxord %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pandnl (rd, rs1, rs2) ->
+ fprintf oc " andnd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pornl (rd, rs1, rs2) ->
+ fprintf oc " ornd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pmull (rd, rs1, rs2) ->
+ fprintf oc " muld %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pslll (rd, rs1, rs2) ->
+ fprintf oc " slld %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Psrll (rd, rs1, rs2) ->
+ fprintf oc " srld %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Psrxl (rd, rs1, rs2) ->
+ fprintf oc " srsd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Psral (rd, rs1, rs2) ->
+ fprintf oc " srad %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pmaddl (rd, rs1, rs2) ->
+ fprintf oc " maddd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pmsubl (rd, rs1, rs2) ->
+ fprintf oc " msbfd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmaddfl (rd, rs1, rs2) ->
+ fprintf oc " ffmad %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmsubfl (rd, rs1, rs2) ->
+ fprintf oc " ffmsd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+
+ | Pfaddd (rd, rs1, rs2) ->
+ fprintf oc " faddd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfaddw (rd, rs1, rs2) ->
+ fprintf oc " faddw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfsbfd (rd, rs1, rs2) ->
+ fprintf oc " fsbfd %a = %a, %a\n" ireg rd ireg rs2 ireg rs1
+ | Pfsbfw (rd, rs1, rs2) ->
+ fprintf oc " fsbfw %a = %a, %a\n" ireg rd ireg rs2 ireg rs1
+ | Pfmuld (rd, rs1, rs2) ->
+ fprintf oc " fmuld %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmulw (rd, rs1, rs2) ->
+ fprintf oc " fmulw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmind (rd, rs1, rs2) ->
+ fprintf oc " fmind %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfminw (rd, rs1, rs2) ->
+ fprintf oc " fminw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmaxd (rd, rs1, rs2) ->
+ fprintf oc " fmaxd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmaxw (rd, rs1, rs2) ->
+ fprintf oc " fmaxw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfinvw (rd, rs1) ->
+ fprintf oc " finvw %a = %a\n" ireg rd ireg rs1
+
+ (* Arith RRI32 instructions *)
+ | Pcompiw (it, rd, rs, imm) ->
+ fprintf oc " compw.%a %a = %a, %a\n" icond it ireg rd ireg rs coqint imm
+ | Paddiw (rd, rs, imm) ->
+ fprintf oc " addw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Paddxiw (s14, rd, rs, imm) ->
+ fprintf oc " addx%dw %a = %a, %a\n" (scale_of_shift1_4 s14)
+ ireg rd ireg rs coqint imm
+ | Prevsubiw (rd, rs, imm) ->
+ fprintf oc " sbfw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Prevsubxiw (s14, rd, rs, imm) ->
+ fprintf oc " sbfx%dw %a = %a, %a\n" (scale_of_shift1_4 s14)
+ ireg rd ireg rs coqint imm
+ | Pmuliw (rd, rs, imm) ->
+ fprintf oc " mulw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Pandiw (rd, rs, imm) ->
+ fprintf oc " andw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Pnandiw (rd, rs, imm) ->
+ fprintf oc " nandw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Poriw (rd, rs, imm) ->
+ fprintf oc " orw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Pnoriw (rd, rs, imm) ->
+ fprintf oc " norw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Pxoriw (rd, rs, imm) ->
+ fprintf oc " xorw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Pnxoriw (rd, rs, imm) ->
+ fprintf oc " nxorw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Pandniw (rd, rs, imm) ->
+ fprintf oc " andnw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Porniw (rd, rs, imm) ->
+ fprintf oc " ornw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Psraiw (rd, rs, imm) ->
+ fprintf oc " sraw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Psrxiw (rd, rs, imm) ->
+ fprintf oc " srsw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Psrliw (rd, rs, imm) ->
+ fprintf oc " srlw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Pslliw (rd, rs, imm) ->
+ fprintf oc " sllw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Proriw (rd, rs, imm) ->
+ fprintf oc " rorw %a = %a, %a\n" ireg rd ireg rs coqint imm
+ | Pmaddiw (rd, rs, imm) ->
+ fprintf oc " maddw %a = %a, %a\n" ireg rd ireg rs coqint imm
+
+ | Psllil (rd, rs, imm) ->
+ fprintf oc " slld %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Psrlil (rd, rs, imm) ->
+ fprintf oc " srld %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Psrail (rd, rs, imm) ->
+ fprintf oc " srad %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Psrxil (rd, rs, imm) ->
+ fprintf oc " srsd %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+
+ (* Arith RRI64 instructions *)
+ | Pcompil (it, rd, rs, imm) ->
+ fprintf oc " compd.%a %a = %a, %a\n" icond it ireg rd ireg rs coqint64 imm
+ | Paddil (rd, rs, imm) -> assert Archi.ptr64;
+ fprintf oc " addd %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Paddxil (s14, rd, rs, imm) ->
+ fprintf oc " addx%dd %a = %a, %a\n" (scale_of_shift1_4 s14)
+ ireg rd ireg rs coqint imm
+ | Prevsubil (rd, rs, imm) ->
+ fprintf oc " sbfd %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Prevsubxil (s14, rd, rs, imm) ->
+ fprintf oc " sbfx%dd %a = %a, %a\n" (scale_of_shift1_4 s14)
+ ireg rd ireg rs coqint64 imm
+ | Pmulil (rd, rs, imm) -> assert Archi.ptr64;
+ fprintf oc " muld %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Pandil (rd, rs, imm) -> assert Archi.ptr64;
+ fprintf oc " andd %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Pnandil (rd, rs, imm) -> assert Archi.ptr64;
+ fprintf oc " nandd %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Poril (rd, rs, imm) -> assert Archi.ptr64;
+ fprintf oc " ord %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Pnoril (rd, rs, imm) -> assert Archi.ptr64;
+ fprintf oc " nord %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Pxoril (rd, rs, imm) -> assert Archi.ptr64;
+ fprintf oc " xord %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Pnxoril (rd, rs, imm) -> assert Archi.ptr64;
+ fprintf oc " nxord %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Pandnil (rd, rs, imm) ->
+ fprintf oc " andnd %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Pornil (rd, rs, imm) ->
+ fprintf oc " ornd %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+ | Pmaddil (rd, rs, imm) ->
+ fprintf oc " maddd %a = %a, %a\n" ireg rd ireg rs coqint64 imm
+
+ | Pcmove (bt, rd, rcond, rs) | Pcmoveu (bt, rd, rcond, rs) ->
+ fprintf oc " cmoved.%a %a? %a = %a\n"
+ bcond bt ireg rcond ireg rd ireg rs
+ | Pcmoveiw (bt, rd, rcond, imm) | Pcmoveuiw (bt, rd, rcond, imm) ->
+ fprintf oc " cmoved.%a %a? %a = %a\n"
+ bcond bt ireg rcond ireg rd coqint imm
+ | Pcmoveil (bt, rd, rcond, imm) | Pcmoveuil (bt, rd, rcond, imm) ->
+ fprintf oc " cmoved.%a %a? %a = %a\n"
+ bcond bt ireg rcond ireg rd coqint64 imm
+
+ let get_section_names name =
+ let (text, lit) =
+ match C2C.atom_sections name with
+ | t :: l :: _ -> (t, l)
+ | _ -> (Section_text, Section_literal) in
+ text,lit,Section_jumptable
+
+ let print_align oc alignment =
+ fprintf oc " .balign %d\n" alignment
+
+ let print_jumptable oc jmptbl = ()
+ (* if !jumptables <> [] then
+ begin
+ section oc jmptbl;
+ List.iter (print_tbl oc) !jumptables;
+ jumptables := []
+ end *)
+
+ let print_fun_info = elf_print_fun_info
+
+ let print_optional_fun_info _ = ()
+
+ let print_var_info = elf_print_var_info
+
+ let print_comm_symb oc sz name align =
+ if C2C.atom_is_static name then
+ fprintf oc " .local %a\n" symbol name;
+ fprintf oc " .comm %a, %s, %d\n"
+ symbol name
+ (Z.to_string sz)
+ align
+
+ let print_instructions oc fn =
+ current_function_sig := fn.fn_sig;
+ List.iter (print_instruction oc) fn.fn_code
+
+(* Data *)
+
+ let address = if Archi.ptr64 then ".quad" else ".long"
+
+ let print_prologue oc =
+ (* fprintf oc " .option %s\n" (if Archi.pic_code() then "pic" else "nopic"); *)
+ if !Clflags.option_g then begin
+ section oc Section_text;
+ end
+
+ let print_epilogue oc =
+ print_profiling_epilogue elf_text_print_fun_info Dtors kvx_profiling_stub oc;
+ if !Clflags.option_g then begin
+ Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
+ section oc Section_text;
+ end
+
+ let default_falignment = 2
+
+ let cfi_startproc oc = ()
+ let cfi_endproc oc = ()
+
+ end
+
+let sel_target () =
+ (module Target:TARGET)
diff --git a/kvx/ValueAOp.v b/kvx/ValueAOp.v
new file mode 100644
index 00000000..87554258
--- /dev/null
+++ b/kvx/ValueAOp.v
@@ -0,0 +1,599 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib Compopts.
+Require Import AST Integers Floats Values Memory Globalenvs.
+Require Import Op ExtValues ExtFloats RTL ValueDomain.
+Definition minf := binop_float ExtFloat.min.
+Definition maxf := binop_float ExtFloat.max.
+Definition minfs := binop_single ExtFloat32.min.
+Definition maxfs := binop_single ExtFloat32.max.
+
+Definition ntop3 (x y z: aval) : aval := Ifptr (plub (provenance x) (plub (provenance y) (provenance z))).
+
+Definition triple_op_float (sem: float -> float -> float -> float) (x y z: aval) :=
+ match x, y, z with
+ | F a, F b, F c => F (sem a b c)
+ | _, _, _ => ntop3 x y z
+ end.
+
+Definition triple_op_single (sem: float32 -> float32 -> float32 -> float32) (x y z: aval) :=
+ match x, y, z with
+ | FS a, FS b, FS c => FS (sem a b c)
+ | _, _, _ => ntop3 x y z
+ end.
+
+Definition fmaddf := triple_op_float (fun x y z => Float.fma y z x).
+Definition fmsubf := triple_op_float (fun x y z => Float.fma (Float.neg y) z x).
+Definition fmaddfs := triple_op_single (fun x y z => Float32.fma y z x).
+Definition fmsubfs := triple_op_single (fun x y z => Float32.fma (Float32.neg y) z x).
+
+Definition invfs (y : aval) :=
+ match y with
+ | FS f => FS (ExtFloat32.inv f)
+ | _ => ntop1 y
+ end.
+
+(** Value analysis for RISC V operators *)
+
+Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
+ match cond, vl with
+ | Ccomp c, v1 :: v2 :: nil => cmp_bool c v1 v2
+ | Ccompu c, v1 :: v2 :: nil => cmpu_bool c v1 v2
+ | Ccompimm c n, v1 :: nil => cmp_bool c v1 (I n)
+ | Ccompuimm c n, v1 :: nil => cmpu_bool c v1 (I n)
+ | Ccompl c, v1 :: v2 :: nil => cmpl_bool c v1 v2
+ | Ccomplu c, v1 :: v2 :: nil => cmplu_bool c v1 v2
+ | Ccomplimm c n, v1 :: nil => cmpl_bool c v1 (L n)
+ | Ccompluimm c n, v1 :: nil => cmplu_bool c v1 (L n)
+ | Ccompf c, v1 :: v2 :: nil => cmpf_bool c v1 v2
+ | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2)
+ | Ccompfs c, v1 :: v2 :: nil => cmpfs_bool c v1 v2
+ | Cnotcompfs c, v1 :: v2 :: nil => cnot (cmpfs_bool c v1 v2)
+ | _, _ => Bnone
+ end.
+
+Definition eval_static_addressing (addr: addressing) (vl: list aval): aval :=
+ match addr, vl with
+ | Aindexed n, v1::nil => offset_ptr v1 n
+ | Aindexed2, v1::v2::nil => addl v1 v2
+ | Aindexed2XS scale, v1::v2::nil => addl v1 (shll v2 (I (Int.repr scale)))
+ | Aglobal s ofs, nil => Ptr (Gl s ofs)
+ | Ainstack ofs, nil => Ptr (Stk ofs)
+ | _, _ => Vbot
+ end.
+
+Definition eval_static_condition0 (cond : condition0) (v : aval) : abool :=
+ match cond with
+ | Ccomp0 c => cmp_bool c v (I Int.zero)
+ | Ccompu0 c => cmpu_bool c v (I Int.zero)
+ | Ccompl0 c => cmpl_bool c v (L Int64.zero)
+ | Ccomplu0 c => cmplu_bool c v (L Int64.zero)
+ end.
+
+
+Definition eval_static_extfs (stop : Z) (start : Z) (v : aval) :=
+ if is_bitfield stop start
+ then
+ let stop' := Z.add stop Z.one in
+ match v with
+ | I w =>
+ I (Int.shr (Int.shl w (Int.repr (Z.sub Int.zwordsize stop'))) (Int.repr (Z.sub Int.zwordsize (Z.sub stop' start))))
+ | _ => Vtop
+ end
+ else Vtop.
+
+Definition eval_static_extfz (stop : Z) (start : Z) (v : aval) :=
+ if is_bitfield stop start
+ then
+ let stop' := Z.add stop Z.one in
+ match v with
+ | I w =>
+ I (Int.shru (Int.shl w (Int.repr (Z.sub Int.zwordsize stop'))) (Int.repr (Z.sub Int.zwordsize (Z.sub stop' start))))
+ | _ => Vtop
+ end
+ else Vtop.
+
+Definition eval_static_extfsl (stop : Z) (start : Z) (v : aval) :=
+ if is_bitfieldl stop start
+ then
+ let stop' := Z.add stop Z.one in
+ match v with
+ | L w =>
+ L (Int64.shr' (Int64.shl' w (Int.repr (Z.sub Int64.zwordsize stop'))) (Int.repr (Z.sub Int64.zwordsize (Z.sub stop' start))))
+ | _ => Vtop
+ end
+ else Vtop.
+
+Definition eval_static_extfzl (stop : Z) (start : Z) (v : aval) :=
+ if is_bitfieldl stop start
+ then
+ let stop' := Z.add stop Z.one in
+ match v with
+ | L w =>
+ L (Int64.shru' (Int64.shl' w (Int.repr (Z.sub Int64.zwordsize stop'))) (Int.repr (Z.sub Int64.zwordsize (Z.sub stop' start))))
+ | _ => Vtop
+ end
+ else Vtop.
+
+Definition eval_static_insf stop start prev fld :=
+ let mask := Int.repr (zbitfield_mask stop start) in
+ if is_bitfield stop start
+ then
+ match prev, fld with
+ | (I prevI), (I fldI) =>
+ if Int.ltu (Int.repr start) Int.iwordsize
+ then I (Int.or (Int.and prevI (Int.not mask))
+ (Int.and (Int.shl fldI (Int.repr start)) mask))
+ else Vtop
+ | _, _ => Vtop
+ end
+ else Vtop.
+
+Definition eval_static_insfl stop start prev fld :=
+ let mask := Int64.repr (zbitfield_mask stop start) in
+ if is_bitfieldl stop start
+ then
+ match prev, fld with
+ | (L prevL), (L fldL) =>
+ if Int.ltu (Int.repr start) Int64.iwordsize'
+ then L (Int64.or (Int64.and prevL (Int64.not mask))
+ (Int64.and (Int64.shl' fldL (Int.repr start)) mask))
+ else Vtop
+ | _,_ => Vtop
+ end
+ else Vtop.
+
+Definition eval_static_operation (op: operation) (vl: list aval): aval :=
+ match op, vl with
+ | Omove, v1::nil => v1
+ | Ointconst n, nil => I n
+ | Olongconst n, nil => L n
+ | Ofloatconst n, nil => if propagate_float_constants tt then F n else ntop
+ | Osingleconst n, nil => if propagate_float_constants tt then FS n else ntop
+ | Oaddrsymbol id ofs, nil => Ptr (Gl id ofs)
+ | Oaddrstack ofs, nil => Ptr (Stk ofs)
+ | Ocast8signed, v1 :: nil => sign_ext 8 v1
+ | Ocast16signed, v1 :: nil => sign_ext 16 v1
+ | Oadd, v1::v2::nil => add v1 v2
+ | Oaddimm n, v1::nil => add v1 (I n)
+ | Oaddx shift, v1::v2::nil => add v2 (shl v1 (I (int_of_shift1_4 shift)))
+ | Oaddximm shift n, v1::nil => add (I n) (shl v1 (I (int_of_shift1_4 shift)))
+ | Oneg, v1::nil => neg v1
+ | Osub, v1::v2::nil => sub v1 v2
+ | Orevsubx shift, v1::v2::nil => sub v2 (shl v1 (I (int_of_shift1_4 shift)))
+ | Orevsubimm n, v1::nil => sub (I n) v1
+ | Orevsubximm shift n, v1::nil => sub (I n) (shl v1 (I (int_of_shift1_4 shift)))
+ | Omul, v1::v2::nil => mul v1 v2
+ | Omulimm n, v1::nil => mul v1 (I n)
+ | Omulhs, v1::v2::nil => mulhs v1 v2
+ | Omulhu, v1::v2::nil => mulhu v1 v2
+ | Odiv, v1::v2::nil => divs v1 v2
+ | Odivu, v1::v2::nil => divu v1 v2
+ | Omod, v1::v2::nil => mods v1 v2
+ | Omodu, v1::v2::nil => modu v1 v2
+ | Oand, v1::v2::nil => and v1 v2
+ | Oandimm n, v1::nil => and v1 (I n)
+ | Onand, v1::v2::nil => notint (and v1 v2)
+ | Onandimm n, v1::nil => notint (and v1 (I n))
+ | Oor, v1::v2::nil => or v1 v2
+ | Oorimm n, v1::nil => or v1 (I n)
+ | Onor, v1::v2::nil => notint (or v1 v2)
+ | Onorimm n, v1::nil => notint (or v1 (I n))
+ | Oxor, v1::v2::nil => xor v1 v2
+ | Oxorimm n, v1::nil => xor v1 (I n)
+ | Onxor, v1::v2::nil => notint (xor v1 v2)
+ | Onxorimm n, v1::nil => notint (xor v1 (I n))
+ | Onot, v1::nil => notint v1
+ | Oandn, v1::v2::nil => and (notint v1) v2
+ | Oandnimm n, v1::nil => and (notint v1) (I n)
+ | Oorn, v1::v2::nil => or (notint v1) v2
+ | Oornimm n, v1::nil => or (notint v1) (I n)
+ | Oshl, v1::v2::nil => shl v1 v2
+ | Oshlimm n, v1::nil => shl v1 (I n)
+ | Oshr, v1::v2::nil => shr v1 v2
+ | Oshrimm n, v1::nil => shr v1 (I n)
+ | Ororimm n, v1::nil => ror v1 (I n)
+ | Oshru, v1::v2::nil => shru v1 v2
+ | Oshruimm n, v1::nil => shru v1 (I n)
+ | Oshrximm n, v1::nil => shrx v1 (I n)
+ | Omadd, v1::v2::v3::nil => add v1 (mul v2 v3)
+ | Omaddimm n, v1::v2::nil => add v1 (mul v2 (I n))
+ | Omsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
+ | Omakelong, v1::v2::nil => longofwords v1 v2
+ | Olowlong, v1::nil => loword v1
+ | Ohighlong, v1::nil => hiword v1
+ | Ocast32signed, v1::nil => longofint v1
+ | Ocast32unsigned, v1::nil => longofintu v1
+ | Oaddl, v1::v2::nil => addl v1 v2
+ | Oaddlimm n, v1::nil => addl v1 (L n)
+ | Oaddxl shift, v1::v2::nil => addl v2 (shll v1 (I (int_of_shift1_4 shift)))
+ | Oaddxlimm shift n, v1::nil => addl (L n) (shll v1 (I (int_of_shift1_4 shift)))
+ | Onegl, v1::nil => negl v1
+ | Osubl, v1::v2::nil => subl v1 v2
+ | Orevsubxl shift, v1::v2::nil => subl v2 (shll v1 (I (int_of_shift1_4 shift)))
+ | Orevsublimm n, v1::nil => subl (L n) v1
+ | Orevsubxlimm shift n, v1::nil => subl (L n) (shll v1 (I (int_of_shift1_4 shift)))
+ | Omull, v1::v2::nil => mull v1 v2
+ | Omullimm n, v1::nil => mull v1 (L n)
+ | Omullhs, v1::v2::nil => mullhs v1 v2
+ | Omullhu, v1::v2::nil => mullhu v1 v2
+ | Odivl, v1::v2::nil => divls v1 v2
+ | Odivlu, v1::v2::nil => divlu v1 v2
+ | Omodl, v1::v2::nil => modls v1 v2
+ | Omodlu, v1::v2::nil => modlu v1 v2
+ | Oandl, v1::v2::nil => andl v1 v2
+ | Oandlimm n, v1::nil => andl v1 (L n)
+ | Onandl, v1::v2::nil => notl (andl v1 v2)
+ | Onandlimm n, v1::nil => notl (andl v1 (L n))
+ | Oorl, v1::v2::nil => orl v1 v2
+ | Oorlimm n, v1::nil => orl v1 (L n)
+ | Onorl, v1::v2::nil => notl (orl v1 v2)
+ | Onorlimm n, v1::nil => notl (orl v1 (L n))
+ | Oxorl, v1::v2::nil => xorl v1 v2
+ | Oxorlimm n, v1::nil => xorl v1 (L n)
+ | Onxorl, v1::v2::nil => notl (xorl v1 v2)
+ | Onxorlimm n, v1::nil => notl (xorl v1 (L n))
+ | Onotl, v1::nil => notl v1
+ | Oandnl, v1::v2::nil => andl (notl v1) v2
+ | Oandnlimm n, v1::nil => andl (notl v1) (L n)
+ | Oornl, v1::v2::nil => orl (notl v1) v2
+ | Oornlimm n, v1::nil => orl (notl v1) (L n)
+ | Oshll, v1::v2::nil => shll v1 v2
+ | Oshllimm n, v1::nil => shll v1 (I n)
+ | Oshrl, v1::v2::nil => shrl v1 v2
+ | Oshrlimm n, v1::nil => shrl v1 (I n)
+ | Oshrlu, v1::v2::nil => shrlu v1 v2
+ | Oshrluimm n, v1::nil => shrlu v1 (I n)
+ | Oshrxlimm n, v1::nil => shrxl v1 (I n)
+ | Omaddl, v1::v2::v3::nil => addl v1 (mull v2 v3)
+ | Omaddlimm n, v1::v2::nil => addl v1 (mull v2 (L n))
+ | Omsubl, v1::v2::v3::nil => subl v1 (mull v2 v3)
+ | Onegf, v1::nil => negf v1
+ | Oabsf, v1::nil => absf v1
+ | Oaddf, v1::v2::nil => addf v1 v2
+ | Osubf, v1::v2::nil => subf v1 v2
+ | Omulf, v1::v2::nil => mulf v1 v2
+ | Odivf, v1::v2::nil => divf v1 v2
+ | Ominf, v1::v2::nil => minf v1 v2
+ | Omaxf, v1::v2::nil => maxf v1 v2
+ | Ofmaddf, v1::v2::v3::nil => fmaddf v1 v2 v3
+ | Ofmsubf, v1::v2::v3::nil => fmsubf v1 v2 v3
+ | Onegfs, v1::nil => negfs v1
+ | Oabsfs, v1::nil => absfs v1
+ | Oaddfs, v1::v2::nil => addfs v1 v2
+ | Osubfs, v1::v2::nil => subfs v1 v2
+ | Omulfs, v1::v2::nil => mulfs v1 v2
+ | Odivfs, v1::v2::nil => divfs v1 v2
+ | Ominfs, v1::v2::nil => minfs v1 v2
+ | Omaxfs, v1::v2::nil => maxfs v1 v2
+ | Oinvfs, v1::nil => invfs v1
+ | Ofmaddfs, v1::v2::v3::nil => fmaddfs v1 v2 v3
+ | Ofmsubfs, v1::v2::v3::nil => fmsubfs v1 v2 v3
+ | Osingleoffloat, v1::nil => singleoffloat v1
+ | Ofloatofsingle, v1::nil => floatofsingle v1
+ | Ointoffloat, v1::nil => intoffloat_total v1
+ | Ointuoffloat, v1::nil => intuoffloat_total v1
+ | Ointofsingle, v1::nil => intofsingle_total v1
+ | Ointuofsingle, v1::nil => intuofsingle_total v1
+ | Osingleofint, v1::nil => singleofint v1
+ | Osingleofintu, v1::nil => singleofintu v1
+ | Olongoffloat, v1::nil => longoffloat_total v1
+ | Olonguoffloat, v1::nil => longuoffloat_total v1
+ | Ofloatoflong, v1::nil => floatoflong v1
+ | Ofloatoflongu, v1::nil => floatoflongu v1
+ | Olongofsingle, v1::nil => longofsingle_total v1
+ | Olonguofsingle, v1::nil => longuofsingle_total v1
+ | Osingleoflong, v1::nil => singleoflong v1
+ | Osingleoflongu, v1::nil => singleoflongu v1
+ | Ocmp c, _ => of_optbool (eval_static_condition c vl)
+ | (Oextfz stop start), v0::nil => eval_static_extfz stop start v0
+ | (Oextfs stop start), v0::nil => eval_static_extfs stop start v0
+ | (Oextfzl stop start), v0::nil => eval_static_extfzl stop start v0
+ | (Oextfsl stop start), v0::nil => eval_static_extfsl stop start v0
+ | (Oinsf stop start), v0::v1::nil => eval_static_insf stop start v0 v1
+ | (Oinsfl stop start), v0::v1::nil => eval_static_insfl stop start v0 v1
+ | Osel c ty, v1::v2::vc::nil => select (eval_static_condition0 c vc) v1 v2
+ | Oselimm c imm, v1::vc::nil => select (eval_static_condition0 c vc) v1 (I imm)
+ | Osellimm c imm, v1::vc::nil => select (eval_static_condition0 c vc) v1 (L imm)
+ | _, _ => Vbot
+ end.
+
+Section SOUNDNESS.
+
+Variable bc: block_classification.
+Variable ge: genv.
+Hypothesis GENV: genv_match bc ge.
+Variable sp: block.
+Hypothesis STACK: bc sp = BCstack.
+
+Lemma minf_sound:
+ forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.minf v w) (minf x y).
+Proof.
+ apply (binop_float_sound bc ExtFloat.min); assumption.
+Qed.
+
+Lemma maxf_sound:
+ forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.maxf v w) (maxf x y).
+Proof.
+ apply (binop_float_sound bc ExtFloat.max); assumption.
+Qed.
+
+Lemma minfs_sound:
+ forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.minfs v w) (minfs x y).
+Proof.
+ apply (binop_single_sound bc ExtFloat32.min); assumption.
+Qed.
+
+Lemma maxfs_sound:
+ forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.maxfs v w) (maxfs x y).
+Proof.
+ apply (binop_single_sound bc ExtFloat32.max); assumption.
+Qed.
+
+Lemma invfs_sound:
+ forall v x, vmatch bc v x -> vmatch bc (ExtValues.invfs v) (invfs x).
+Proof.
+ intros v x;
+ intro MATCH;
+ inversion MATCH;
+ cbn;
+ constructor.
+Qed.
+
+Lemma triple_op_float_sound:
+ forall f a x b y c z,
+ vmatch bc a x -> vmatch bc b y -> vmatch bc c z ->
+ vmatch bc (ExtValues.triple_op_float f a b c)
+ (triple_op_float f x y z).
+Proof.
+ intros until z.
+ intros Hax Hby Hcz.
+ inv Hax; cbn; try constructor;
+ inv Hby; cbn; try constructor;
+ inv Hcz; cbn; try constructor.
+Qed.
+
+Lemma triple_op_single_sound:
+ forall f a x b y c z,
+ vmatch bc a x -> vmatch bc b y -> vmatch bc c z ->
+ vmatch bc (ExtValues.triple_op_single f a b c)
+ (triple_op_single f x y z).
+Proof.
+ intros until z.
+ intros Hax Hby Hcz.
+ inv Hax; cbn; try constructor;
+ inv Hby; cbn; try constructor;
+ inv Hcz; cbn; try constructor.
+Qed.
+
+Lemma fmaddf_sound :
+ forall a x b y c z, vmatch bc a x -> vmatch bc b y -> vmatch bc c z ->
+ vmatch bc (ExtValues.fmaddf a b c) (fmaddf x y z).
+Proof.
+ intros. unfold ExtValues.fmaddf, fmaddf.
+ apply triple_op_float_sound; assumption.
+Qed.
+
+Lemma fmaddfs_sound :
+ forall a x b y c z, vmatch bc a x -> vmatch bc b y -> vmatch bc c z ->
+ vmatch bc (ExtValues.fmaddfs a b c) (fmaddfs x y z).
+Proof.
+ intros. unfold ExtValues.fmaddfs, fmaddfs.
+ apply triple_op_single_sound; assumption.
+Qed.
+
+Lemma fmsubf_sound :
+ forall a x b y c z, vmatch bc a x -> vmatch bc b y -> vmatch bc c z ->
+ vmatch bc (ExtValues.fmsubf a b c) (fmsubf x y z).
+Proof.
+ intros. unfold ExtValues.fmsubf, fmsubf.
+ apply triple_op_float_sound; assumption.
+Qed.
+
+Lemma fmsubfs_sound :
+ forall a x b y c z, vmatch bc a x -> vmatch bc b y -> vmatch bc c z ->
+ vmatch bc (ExtValues.fmsubfs a b c) (fmsubfs x y z).
+Proof.
+ intros. unfold ExtValues.fmsubfs, fmsubfs.
+ apply triple_op_single_sound; assumption.
+Qed.
+Hint Resolve minf_sound maxf_sound minfs_sound maxfs_sound invfs_sound fmaddf_sound fmaddfs_sound fmsubf_sound fmsubfs_sound : va.
+
+Theorem eval_static_condition_sound:
+ forall cond vargs m aargs,
+ list_forall2 (vmatch bc) vargs aargs ->
+ cmatch (eval_condition cond vargs m) (eval_static_condition cond aargs).
+Proof.
+ intros until aargs; intros VM. inv VM.
+ destruct cond; auto with va.
+ inv H0.
+ destruct cond; cbn; eauto with va.
+ inv H2.
+ destruct cond; cbn; eauto with va.
+ destruct cond; auto with va.
+Qed.
+
+Theorem eval_static_condition0_sound:
+ forall cond varg m aarg,
+ vmatch bc varg aarg ->
+ cmatch (eval_condition0 cond varg m) (eval_static_condition0 cond aarg).
+Proof.
+ intros until aarg; intro VM.
+ destruct cond; cbn; eauto with va.
+Qed.
+
+Lemma symbol_address_sound:
+ forall id ofs,
+ vmatch bc (Genv.symbol_address ge id ofs) (Ptr (Gl id ofs)).
+Proof.
+ intros; apply symbol_address_sound; apply GENV.
+Qed.
+
+Lemma symbol_address_sound_2:
+ forall id ofs,
+ vmatch bc (Genv.symbol_address ge id ofs) (Ifptr (Gl id ofs)).
+Proof.
+ intros. unfold Genv.symbol_address. destruct (Genv.find_symbol ge id) as [b|] eqn:F.
+ constructor. constructor. apply GENV; auto.
+ constructor.
+Qed.
+
+Hint Resolve symbol_address_sound symbol_address_sound_2: va.
+
+Ltac InvHyps :=
+ match goal with
+ | [H: None = Some _ |- _ ] => discriminate
+ | [H: Some _ = Some _ |- _] => inv H
+ | [H1: match ?vl with nil => _ | _ :: _ => _ end = Some _ ,
+ H2: list_forall2 _ ?vl _ |- _ ] => inv H2; InvHyps
+ | [H: (if Archi.ptr64 then _ else _) = Some _ |- _] => destruct Archi.ptr64 eqn:?; InvHyps
+ | _ => idtac
+ end.
+
+Theorem eval_static_addressing_sound:
+ forall addr vargs vres aargs,
+ eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = Some vres ->
+ list_forall2 (vmatch bc) vargs aargs ->
+ vmatch bc vres (eval_static_addressing addr aargs).
+Proof.
+ unfold eval_addressing, eval_static_addressing; intros;
+ destruct addr; InvHyps; eauto with va.
+ rewrite Ptrofs.add_zero_l; eauto with va.
+Qed.
+
+Theorem eval_static_addressing_sound_none:
+ forall addr vargs aargs,
+ eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = None ->
+ list_forall2 (vmatch bc) vargs aargs ->
+ (eval_static_addressing addr aargs) = Vbot.
+Proof.
+ unfold eval_addressing, eval_static_addressing.
+ intros until aargs. intros Heval_none Hlist.
+ inv Hlist.
+ destruct addr; trivial; discriminate.
+ inv H0.
+ destruct addr; trivial; discriminate.
+ inv H2.
+ destruct addr; trivial; discriminate.
+ inv H3;
+ destruct addr; trivial; discriminate.
+Qed.
+
+Lemma vmatch_vint_ntop1:
+ forall x y, vmatch bc (Vint x) (ntop1 y).
+Proof.
+ intro. unfold ntop1, provenance.
+ destruct y;
+ destruct (va_strict tt);
+ constructor.
+Qed.
+
+Lemma vmatch_vlong_ntop1:
+ forall x y, vmatch bc (Vlong x) (ntop1 y).
+Proof.
+ intro. unfold ntop1, provenance.
+ destruct y;
+ destruct (va_strict tt);
+ constructor.
+Qed.
+
+Hint Resolve vmatch_vint_ntop1 vmatch_vlong_ntop1: va.
+
+Theorem eval_static_operation_sound:
+ forall op vargs m vres aargs,
+ eval_operation ge (Vptr sp Ptrofs.zero) op vargs m = Some vres ->
+ list_forall2 (vmatch bc) vargs aargs ->
+ vmatch bc vres (eval_static_operation op aargs).
+Proof.
+ unfold eval_operation, eval_static_operation, addx, revsubx, addxl, revsubxl; intros.
+ destruct op; InvHyps; eauto with va.
+ - destruct (propagate_float_constants tt); constructor.
+ - destruct (propagate_float_constants tt); constructor.
+ - rewrite Ptrofs.add_zero_l; eauto with va.
+ - replace(match Val.shl a1 (Vint (int_of_shift1_4 shift)) with
+ | Vint n2 => Vint (Int.add n n2)
+ | Vptr b2 ofs2 =>
+ if Archi.ptr64
+ then Vundef
+ else Vptr b2 (Ptrofs.add ofs2 (Ptrofs.of_int n))
+ | _ => Vundef
+ end) with (Val.add (Vint n) (Val.shl a1 (Vint (int_of_shift1_4 shift)))).
+ + eauto with va.
+ + destruct a1; destruct shift; reflexivity.
+ - (*revsubimm*) inv H1; constructor.
+ - replace (match Val.shl a1 (Vint (int_of_shift1_4 shift)) with
+ | Vint n2 => Vint (Int.sub n n2)
+ | _ => Vundef
+ end) with (Val.sub (Vint n) (Val.shl a1 (Vint (int_of_shift1_4 shift)))).
+ + eauto with va.
+ + destruct n; destruct shift; reflexivity.
+ - (* shrx *)
+ inv H1; cbn; try constructor.
+ all: destruct Int.ltu; [cbn | constructor; fail].
+ all: auto with va.
+ - inv H1; constructor.
+ - replace (match Val.shll a1 (Vint (int_of_shift1_4 shift)) with
+ | Vlong n2 => Vlong (Int64.sub n n2)
+ | _ => Vundef
+ end) with (Val.subl (Vlong n) (Val.shll a1 (Vint (int_of_shift1_4 shift)))).
+ + eauto with va.
+ + destruct a1; destruct shift; reflexivity.
+ - apply of_optbool_sound. eapply eval_static_condition_sound; eauto.
+
+ (* extfz *)
+ - unfold extfz, eval_static_extfz.
+ destruct (is_bitfield _ _).
+ + inv H1; constructor.
+ + constructor.
+
+ (* extfs *)
+ - unfold extfs, eval_static_extfs.
+ destruct (is_bitfield _ _).
+ + inv H1; constructor.
+ + constructor.
+
+ (* extfzl *)
+ - unfold extfzl, eval_static_extfzl.
+ destruct (is_bitfieldl _ _).
+ + inv H1; constructor.
+ + constructor.
+
+ (* extfsl *)
+ - unfold extfsl, eval_static_extfsl.
+ destruct (is_bitfieldl _ _).
+ + inv H1; constructor.
+ + constructor.
+
+ (* insf *)
+ - unfold insf, eval_static_insf.
+ destruct (is_bitfield _ _).
+ + inv H1; inv H0; cbn; try constructor; destruct (Int.ltu _ _); cbn; constructor.
+ + constructor.
+ (* insfl *)
+ - unfold insfl, eval_static_insfl.
+ destruct (is_bitfieldl _ _).
+ + inv H1; inv H0; cbn; try constructor; destruct (Int.ltu _ _); cbn; constructor.
+ + constructor.
+ (* select *)
+ - apply select_sound; auto. eapply eval_static_condition0_sound; eauto.
+ (* select imm *)
+ - apply select_sound; auto with va. eapply eval_static_condition0_sound; eauto.
+ (* select long imm *)
+ - apply select_sound; auto with va. eapply eval_static_condition0_sound; eauto.
+Qed.
+
+End SOUNDNESS.
+
diff --git a/kvx/bitmasks.py b/kvx/bitmasks.py
new file mode 100755
index 00000000..9f6987d6
--- /dev/null
+++ b/kvx/bitmasks.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+def bitmask(to, fr):
+ bit_to = 1<<to
+ return (bit_to | (bit_to - 1)) & ~((1<<fr)-1)
+
+def bitmask2(to, fr):
+ bit_to = 1<<to
+ return bit_to + (bit_to - (1 << fr))
+
+for stop in range(32):
+ for start in range(stop+1):
+ assert(bitmask(stop, start) == bitmask2(stop, start))
diff --git a/kvx/extractionMachdep.v b/kvx/extractionMachdep.v
new file mode 100644
index 00000000..2e409931
--- /dev/null
+++ b/kvx/extractionMachdep.v
@@ -0,0 +1,32 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(* Additional extraction directives specific to the RISC-V port *)
+
+Require Archi Asm.
+
+(* Archi *)
+
+Extract Constant Archi.ptr64 => " Configuration.model = ""64"" ".
+Extract Constant Archi.pic_code => "fun () -> false". (* for the time being *)
+
+Extract Constant Peephole.print_found_store =>
+"fun offset x -> Printf.printf ""found offset = %ld\n"" (Camlcoq.camlint_of_coqint offset); x".
+
+(* Asm *)
+(*
+Extract Constant Asm.low_half => "fun _ _ _ -> assert false".
+Extract Constant Asm.high_half => "fun _ _ _ -> assert false".
+*)
diff --git a/kvx/unittest/Makefile b/kvx/unittest/Makefile
new file mode 100644
index 00000000..fcbede2d
--- /dev/null
+++ b/kvx/unittest/Makefile
@@ -0,0 +1,13 @@
+# Needs to be called from CompCert root directory
+# $ make -f kvx/unittest/Makefile postpass_test
+
+include Makefile.extr
+
+TEST_CMX=kvx/unittest/postpass_test.cmx
+
+UNITTEST_OBJS:=$(shell $(MODORDER) $(TEST_CMX))
+
+postpass_test: $(UNITTEST_OBJS)
+ @echo "Linking $@ $(UNITTEST_OBJS)"
+ @$(OCAMLOPT) -o $@ $(LIBS) $(LINK_OPT) $+
+
diff --git a/kvx/unittest/postpass_test.ml b/kvx/unittest/postpass_test.ml
new file mode 100644
index 00000000..434bfaf7
--- /dev/null
+++ b/kvx/unittest/postpass_test.ml
@@ -0,0 +1,12 @@
+open Printf
+open Asmblock
+open Integers
+open PostpassSchedulingOracle
+open BinNums
+
+let test_schedule_sd =
+ let sd_inst = PStore (PStoreRRO (Psd, GPR12, GPR16, (Ofsimm (Ptrofs.of_int @@ Int.intval Z0))))
+ in let bb = { header = []; body = [sd_inst]; exit = None }
+ in List.iter print_bb (smart_schedule bb)
+
+let _ = test_schedule_sd; printf "Done\n"