diff options
Diffstat (limited to 'kvx')
55 files changed, 26869 insertions, 0 deletions
diff --git a/kvx/Archi.v b/kvx/Archi.v new file mode 100644 index 00000000..6d59a3d1 --- /dev/null +++ b/kvx/Archi.v @@ -0,0 +1,80 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Architecture-dependent parameters for MPPA KVX. Mostly copied from the Risc-V backend *) + +Require Import ZArith List. +Require Import Binary Bits. + +Definition ptr64 := true. + +Definition big_endian := false. + +Definition align_int64 := 8%Z. +Definition align_float64 := 8%Z. + +Definition splitlong := false. + +Lemma splitlong_ptr32: splitlong = true -> ptr64 = false. +Proof. + unfold splitlong. congruence. +Qed. + +(** FIXME - Check the properties below *) + +(** Section 7.3: "Except when otherwise stated, if the result of a + floating-point operation is NaN, it is the canonical NaN. The + canonical NaN has a positive sign and all significand bits clear + except the MSB, a.k.a. the quiet bit." + We need to extend the [choose_binop_pl] functions to account for + this case. *) + +Definition default_nan_64 := (false, iter_nat 51 _ xO xH). +Definition default_nan_32 := (false, iter_nat 22 _ xO xH). + +(* Always choose the first NaN argument, if any *) + +Definition choose_nan_64 (l: list (bool * positive)) : bool * positive := + match l with nil => default_nan_64 | n :: _ => n end. + +Definition choose_nan_32 (l: list (bool * positive)) : bool * positive := + match l with nil => default_nan_32 | n :: _ => n end. + +Definition fpu_returns_default_qNaN := false. + +Lemma choose_nan_64_idem: forall n, + choose_nan_64 (n :: n :: nil) = choose_nan_64 (n :: nil). +Proof. auto. Qed. + +Lemma choose_nan_32_idem: forall n, + choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil). +Proof. auto. Qed. + +Definition fma_order {A: Type} (x y z: A) := (x, z, y). + +Definition fma_invalid_mul_is_nan := false. +Definition float_of_single_preserves_sNaN := false. + +Global Opaque ptr64 big_endian splitlong + default_nan_64 choose_nan_64 + default_nan_32 choose_nan_32 + fma_order fma_invalid_mul_is_nan + float_of_single_preserves_sNaN. + +(** Whether to generate position-independent code or not *) + +Parameter pic_code: unit -> bool. + +Definition has_notrap_loads := true. diff --git a/kvx/Asm.v b/kvx/Asm.v new file mode 100644 index 00000000..fd20316c --- /dev/null +++ b/kvx/Asm.v @@ -0,0 +1,758 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Abstract syntax for KVX textual assembly language. + + Each emittable instruction is defined here. ';;' is also defined as an instruction. + The goal of this representation is to stay compatible with the rest of the generic backend of CompCert + We define [unfold : list bblock -> list instruction] + An Asm function is then defined as : [fn_sig], [fn_blocks], [fn_code], and a proof of [unfold fn_blocks = fn_code] + [fn_code] has no semantic. Instead, the semantic of Asm is given by using the AsmVLIW semantic on [fn_blocks] *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import ExtValues. +Require Import Memory. +Require Import Events. +Require Import Globalenvs. +Require Import Smallstep. +Require Import Locations. +Require Stacklayout. +Require Import Conventions. +Require Export Asmvliw. +Require Import Linking. +Require Import Errors. + +(** Definitions for OCaml code *) +Definition label := positive. + +(* Necessary definition for Asmexpandaux.mli *) +Definition preg := preg. + +Inductive addressing : Type := + | AOff (ofs: offset) + | AReg (ro: ireg) + | ARegXS (ro: ireg) +. + +(** * Syntax *) +Inductive instruction : Type := + (** pseudo instructions *) + | Pallocframe (sz: Z) (pos: ptrofs) (**r allocate new stack frame *) + | Pfreeframe (sz: Z) (pos: ptrofs) (**r deallocate stack frame and restore previous frame *) + | Plabel (lbl: label) (**r define a code label *) + | Ploadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (**r load the address of a symbol *) + | Pbuiltin: external_function -> list (builtin_arg preg) + -> builtin_res preg -> instruction (**r built-in function (pseudo) *) + | Psemi (**r semi colon separating bundles *) + | Pnop (**r instruction that does nothing *) + + (** Control flow instructions *) + | Pget (rd: ireg) (rs: preg) (**r get system register *) + | Pset (rd: preg) (rs: ireg) (**r set system register *) + | Pret (**r return *) + | Pcall (l: label) (**r function call *) + | Picall (rs: ireg) (**r function call on register *) + (* Pgoto is for tailcalls, Pj_l is for jumping to a particular label *) + | Pgoto (l: label) (**r goto *) + | Pigoto (rs: ireg) (**r goto from register *) + | Pj_l (l: label) (**r jump to label *) + | Pcb (bt: btest) (r: ireg) (l: label) (**r branch based on btest *) + | Pcbu (bt: btest) (r: ireg) (l: label) (**r branch based on btest with unsigned semantics *) + | Pjumptable (r: ireg) (labels: list label) + + (* For builtins *) + | Ploopdo (count: ireg) (loopend: label) + | Pgetn (n: int) (dst: ireg) + | Psetn (n: int) (src: ireg) + | Pwfxl (n: int) (src: ireg) + | Pwfxm (n: int) (src: ireg) + | Pldu (dst: ireg) (addr: ireg) + | Plbzu (dst: ireg) (addr: ireg) + | Plhzu (dst: ireg) (addr: ireg) + | Plwzu (dst: ireg) (addr: ireg) + | Pawait + | Psleep + | Pstop + | Pbarrier + | Pfence + | Pdinval + | Pdinvall (addr: ireg) + | Pdtouchl (addr: ireg) + | Piinval + | Piinvals (addr: ireg) + | Pitouchl (addr: ireg) + | Pdzerol (addr: ireg) +(*| Pafaddd (addr: ireg) (incr_res: ireg) + | Pafaddw (addr: ireg) (incr_res: ireg) *) (* see #157 *) + | Palclrd (dst: ireg) (addr: ireg) + | Palclrw (dst: ireg) (addr: ireg) + | Pclzll (rd rs: ireg) + | Pclzw (rd rs: ireg) + | Pctzll (rd rs: ireg) + | Pctzw (rd rs: ireg) + | Pstsud (rd rs1 rs2: ireg) + + (** Loads *) + | Plb (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load byte *) + | Plbu (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load byte unsigned *) + | Plh (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load half word *) + | Plhu (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load half word unsigned *) + | Plw (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load int32 *) + | Plw_a (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load any32 *) + | Pld (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load int64 *) + | Pld_a (trap: trapping_mode) (rd: ireg) (ra: ireg) (ofs: addressing) (**r load any64 *) + | Pfls (trap: trapping_mode) (rd: freg) (ra: ireg) (ofs: addressing) (**r load float *) + | Pfld (trap: trapping_mode) (rd: freg) (ra: ireg) (ofs: addressing) (**r load 64-bit float *) + | Plq (rs: gpreg_q) (ra: ireg) (ofs: addressing) (**r load 2*64-bit *) + | Plo (rs: gpreg_o) (ra: ireg) (ofs: addressing) (**r load 4*64-bit *) + + (** Stores *) + | Psb (rs: ireg) (ra: ireg) (ofs: addressing) (**r store byte *) + | Psh (rs: ireg) (ra: ireg) (ofs: addressing) (**r store half byte *) + | Psw (rs: ireg) (ra: ireg) (ofs: addressing) (**r store int32 *) + | Psw_a (rs: ireg) (ra: ireg) (ofs: addressing) (**r store any32 *) + | Psd (rs: ireg) (ra: ireg) (ofs: addressing) (**r store int64 *) + | Psd_a (rs: ireg) (ra: ireg) (ofs: addressing) (**r store any64 *) + | Pfss (rs: freg) (ra: ireg) (ofs: addressing) (**r store float *) + | Pfsd (rs: freg) (ra: ireg) (ofs: addressing) (**r store 64-bit float *) + + | Psq (rs: gpreg_q) (ra: ireg) (ofs: addressing) (**r store 2*64-bit *) + | Pso (rs: gpreg_o) (ra: ireg) (ofs: addressing) (**r store 2*64-bit *) + + (** Arith RR *) + | Pmv (rd rs: ireg) (**r register move *) + | Pnegw (rd rs: ireg) (**r negate word *) + | Pnegl (rd rs: ireg) (**r negate long *) + | Pcvtl2w (rd rs: ireg) (**r Convert Long to Word *) + | Psxwd (rd rs: ireg) (**r Sign Extend Word to Double Word *) + | Pzxwd (rd rs: ireg) (**r Zero Extend Word to Double Word *) + + | Pextfz (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields unsigned *) + | Pextfs (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields signed *) + + | Pextfzl (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields unsigned *) + | Pextfsl (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r extract bitfields signed *) + + | Pinsf (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r insert bitfield *) + | Pinsfl (rd : ireg) (rs : ireg) (stop : Z) (start : Z) (**r insert bitfield *) + + | Pfabsd (rd rs: ireg) (**r float absolute double *) + | Pfabsw (rd rs: ireg) (**r float absolute word *) + | Pfnegd (rd rs: ireg) (**r float negate double *) + | Pfnegw (rd rs: ireg) (**r float negate word *) + | Pfnarrowdw (rd rs: ireg) (**r float narrow 64 -> 32 bits *) + | Pfwidenlwd (rd rs: ireg) (**r float widen 32 -> 64 bits *) + | Pfloatwrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (32 -> 32) *) + | Pfloatuwrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (u32 -> 32) *) + | Pfloatudrnsz (rd rs: ireg) (**r Floating Point Conversion from unsigned integer (64 bits) *) + | Pfloatdrnsz (rd rs: ireg) (**r Floating Point Conversion from integer (64 bits) *) + | Pfixedwrzz (rd rs: ireg) (**r Integer conversion from floating point *) + | Pfixeduwrzz (rd rs: ireg) (**r Integer conversion from floating point (f32 -> 32 bits unsigned *) + | Pfixeddrzz (rd rs: ireg) (**r Integer conversion from floating point (i64 -> 64 bits) *) + | Pfixeddrzz_i32 (rd rs: ireg) (**r Integer conversion from floating point (i32 -> f64) *) + | Pfixedudrzz (rd rs: ireg) (**r unsigned Integer conversion from floating point (u64 -> 64 bits) *) + | Pfixedudrzz_i32 (rd rs: ireg) (**r unsigned Integer conversion from floating point (u32 -> 64 bits) *) + + (** Arith RI32 *) + | Pmake (rd: ireg) (imm: int) (**r load immediate *) + + (** Arith RI64 *) + | Pmakel (rd: ireg) (imm: int64) (**r load immediate long *) + + (** Arith RF32 *) + | Pmakefs (rd: ireg) (imm: float32) + + (** Arith RF64 *) + | Pmakef (rd: ireg) (imm: float) + + (** Arith RRR *) + | Pcompw (it: itest) (rd rs1 rs2: ireg) (**r comparison word *) + | Pcompl (it: itest) (rd rs1 rs2: ireg) (**r comparison long *) + | Pfcompw (ft: ftest) (rd rs1 rs2: ireg) (**r comparison float *) + | Pfcompl (ft: ftest) (rd rs1 rs2: ireg) (**r comparison float64 *) + + | Paddw (rd rs1 rs2: ireg) (**r add word *) + | Paddxw (shift : shift1_4) (rd rs1 rs2: ireg) (**r add word *) + | Psubw (rd rs1 rs2: ireg) (**r sub word *) + | Prevsubxw (shift : shift1_4) (rd rs1 rs2: ireg) (**r add word *) + | Pmulw (rd rs1 rs2: ireg) (**r mul word *) + | Pandw (rd rs1 rs2: ireg) (**r and word *) + | Pnandw (rd rs1 rs2: ireg) (**r nand word *) + | Porw (rd rs1 rs2: ireg) (**r or word *) + | Pnorw (rd rs1 rs2: ireg) (**r nor word *) + | Pxorw (rd rs1 rs2: ireg) (**r xor word *) + | Pnxorw (rd rs1 rs2: ireg) (**r xor word *) + | Pandnw (rd rs1 rs2: ireg) (**r andn word *) + | Pornw (rd rs1 rs2: ireg) (**r orn word *) + | Psraw (rd rs1 rs2: ireg) (**r shift right arithmetic word *) + | Psrxw (rd rs1 rs2: ireg) (**r shift right arithmetic word round to 0*) + | Psrlw (rd rs1 rs2: ireg) (**r shift right logical word *) + | Psllw (rd rs1 rs2: ireg) (**r shift left logical word *) + | Pmaddw (rd rs1 rs2: ireg) (**r multiply-add words *) + | Pmsubw (rd rs1 rs2: ireg) (**r multiply-add words *) + | Pfmaddfw (rd rs1 rs2: ireg) (**r float fused multiply-add words *) + | Pfmsubfw (rd rs1 rs2: ireg) (**r float fused multiply-subtract words *) + | Pfmaddfl (rd rs1 rs2: ireg) (**r float fused multiply-add longs *) + | Pfmsubfl (rd rs1 rs2: ireg) (**r float fused multiply-subtract longs *) + + | Paddl (rd rs1 rs2: ireg) (**r add long *) + | Paddxl (shift : shift1_4) (rd rs1 rs2: ireg) (**r add long shift *) + | Psubl (rd rs1 rs2: ireg) (**r sub long *) + | Prevsubxl (shift : shift1_4) (rd rs1 rs2: ireg) (**r sub long shift *) + | Pandl (rd rs1 rs2: ireg) (**r and long *) + | Pnandl (rd rs1 rs2: ireg) (**r nand long *) + | Porl (rd rs1 rs2: ireg) (**r or long *) + | Pnorl (rd rs1 rs2: ireg) (**r nor long *) + | Pxorl (rd rs1 rs2: ireg) (**r xor long *) + | Pnxorl (rd rs1 rs2: ireg) (**r nxor long *) + | Pandnl (rd rs1 rs2: ireg) (**r andn long *) + | Pornl (rd rs1 rs2: ireg) (**r orn long *) + | Pmull (rd rs1 rs2: ireg) (**r mul long (low part) *) + | Pslll (rd rs1 rs2: ireg) (**r shift left logical long *) + | Psrll (rd rs1 rs2: ireg) (**r shift right logical long *) + | Psral (rd rs1 rs2: ireg) (**r shift right arithmetic long *) + | Psrxl (rd rs1 rs2: ireg) (**r shift right arithmetic long round to 0*) + | Pmaddl (rd rs1 rs2: ireg) (**r multiply-add long *) + | Pmsubl (rd rs1 rs2: ireg) (**r multiply-add long *) + + | Pfaddd (rd rs1 rs2: ireg) (**r Float addition double *) + | Pfaddw (rd rs1 rs2: ireg) (**r Float addition word *) + | Pfsbfd (rd rs1 rs2: ireg) (**r Float sub double *) + | Pfsbfw (rd rs1 rs2: ireg) (**r Float sub word *) + | Pfmuld (rd rs1 rs2: ireg) (**r Float mul double *) + | Pfmulw (rd rs1 rs2: ireg) (**r Float mul word *) + | Pfmind (rd rs1 rs2: ireg) (**r Float min double *) + | Pfminw (rd rs1 rs2: ireg) (**r Float min word *) + | Pfmaxd (rd rs1 rs2: ireg) (**r Float max double *) + | Pfmaxw (rd rs1 rs2: ireg) (**r Float max word *) + | Pfinvw (rd rs1: ireg) (**r Float invert word *) + + (** Arith RRI32 *) + | Pcompiw (it: itest) (rd rs: ireg) (imm: int) (**r comparison imm word *) + + | Paddiw (rd rs: ireg) (imm: int) (**r add imm word *) + | Paddxiw (shift : shift1_4) (rd rs: ireg) (imm: int) (**r add imm word *) + | Prevsubiw (rd rs: ireg) (imm: int) (**r subtract imm word *) + | Prevsubxiw (shift : shift1_4) (rd rs: ireg) (imm: int) (**r subtract imm word *) + | Pmuliw (rd rs: ireg) (imm: int) (**r mul imm word *) + | Pandiw (rd rs: ireg) (imm: int) (**r and imm word *) + | Pnandiw (rd rs: ireg) (imm: int) (**r nand imm word *) + | Poriw (rd rs: ireg) (imm: int) (**r or imm word *) + | Pnoriw (rd rs: ireg) (imm: int) (**r nor imm word *) + | Pxoriw (rd rs: ireg) (imm: int) (**r xor imm word *) + | Pnxoriw (rd rs: ireg) (imm: int) (**r nxor imm word *) + | Pandniw (rd rs: ireg) (imm: int) (**r andn imm word *) + | Porniw (rd rs: ireg) (imm: int) (**r orn imm word *) + | Psraiw (rd rs: ireg) (imm: int) (**r shift right arithmetic imm word *) + | Psrxiw (rd rs: ireg) (imm: int) (**r shift right arithmetic imm word round to 0*) + | Psrliw (rd rs: ireg) (imm: int) (**r shift right logical imm word *) + | Pslliw (rd rs: ireg) (imm: int) (**r shift left logical imm word *) + | Proriw (rd rs: ireg) (imm: int) (**r rotate right imm word *) + | Pmaddiw (rd rs: ireg) (imm: int) (**r multiply add imm word *) + | Psllil (rd rs: ireg) (imm: int) (**r shift left logical immediate long *) + | Psrxil (rd rs: ireg) (imm: int) (**r shift right arithmetic imm word round to 0*) + | Psrlil (rd rs: ireg) (imm: int) (**r shift right logical immediate long *) + | Psrail (rd rs: ireg) (imm: int) (**r shift right arithmetic immediate long *) + + (** Arith RRI64 *) + | Pcompil (it: itest) (rd rs: ireg) (imm: int64) (**r comparison imm long *) + | Paddil (rd rs: ireg) (imm: int64) (**r add immediate long *) + | Paddxil (shift : shift1_4) (rd rs: ireg) (imm: int64) (**r add immediate long *) + | Prevsubil (rd rs: ireg) (imm: int64) (**r subtract imm long *) + | Prevsubxil (shift : shift1_4) (rd rs: ireg) (imm: int64) (**r subtract imm long *) + | Pmulil (rd rs: ireg) (imm: int64) (**r add immediate long *) + | Pandil (rd rs: ireg) (imm: int64) (**r and immediate long *) + | Pnandil (rd rs: ireg) (imm: int64) (**r and immediate long *) + | Poril (rd rs: ireg) (imm: int64) (**r or immediate long *) + | Pnoril (rd rs: ireg) (imm: int64) (**r and immediate long *) + | Pxoril (rd rs: ireg) (imm: int64) (**r xor immediate long *) + | Pnxoril (rd rs: ireg) (imm: int64) (**r xor immediate long *) + | Pandnil (rd rs: ireg) (imm: int64) (**r andn long *) + | Pornil (rd rs: ireg) (imm: int64) (**r orn long *) + | Pmaddil (rd rs: ireg) (imm: int64) (**r multiply add imm long *) + | Pcmove (bt: btest) (rcond rd rs : ireg) (** conditional move *) + | Pcmoveu (bt: btest) (rcond rd rs : ireg) (** conditional move, unsigned semantics *) + | Pcmoveiw (bt: btest) (rcond rd : ireg) (imm: int) (** conditional move *) + | Pcmoveuiw (bt: btest) (rcond rd : ireg) (imm: int) (** conditional move, unsigned semantics *) + | Pcmoveil (bt: btest) (rcond rd : ireg) (imm: int64) (** conditional move *) + | Pcmoveuil (bt: btest) (rcond rd : ireg) (imm: int64) (** conditional move, unsigned semantics *) +. + +(** Correspondance between Asmblock and Asm *) + +Definition control_to_instruction (c: control) := + match c with + | PExpand (Asmvliw.Pbuiltin ef args res) => Pbuiltin ef args res + | PCtlFlow Asmvliw.Pret => Pret + | PCtlFlow (Asmvliw.Pcall l) => Pcall l + | PCtlFlow (Asmvliw.Picall r) => Picall r + | PCtlFlow (Asmvliw.Pgoto l) => Pgoto l + | PCtlFlow (Asmvliw.Pigoto l) => Pigoto l + | PCtlFlow (Asmvliw.Pj_l l) => Pj_l l + | PCtlFlow (Asmvliw.Pcb bt r l) => Pcb bt r l + | PCtlFlow (Asmvliw.Pcbu bt r l) => Pcbu bt r l + | PCtlFlow (Asmvliw.Pjumptable r label) => Pjumptable r label + end. + +Definition basic_to_instruction (b: basic) := + match b with + (** Special basics *) + | Asmvliw.Pget rd rs => Pget rd rs + | Asmvliw.Pset rd rs => Pset rd rs + | Asmvliw.Pnop => Pnop + | Asmvliw.Pallocframe sz pos => Pallocframe sz pos + | Asmvliw.Pfreeframe sz pos => Pfreeframe sz pos + + (** PArith basics *) + (* R *) + | PArithR (Asmvliw.Ploadsymbol id ofs) r => Ploadsymbol r id ofs + + (* RR *) + | PArithRR Asmvliw.Pmv rd rs => Pmv rd rs + | PArithRR Asmvliw.Pnegw rd rs => Pnegw rd rs + | PArithRR Asmvliw.Pnegl rd rs => Pnegl rd rs + | PArithRR Asmvliw.Pcvtl2w rd rs => Pcvtl2w rd rs + | PArithRR Asmvliw.Psxwd rd rs => Psxwd rd rs + | PArithRR Asmvliw.Pzxwd rd rs => Pzxwd rd rs + | PArithRR (Asmvliw.Pextfz stop start) rd rs => Pextfz rd rs stop start + | PArithRR (Asmvliw.Pextfs stop start) rd rs => Pextfs rd rs stop start + | PArithRR (Asmvliw.Pextfzl stop start) rd rs => Pextfzl rd rs stop start + | PArithRR (Asmvliw.Pextfsl stop start) rd rs => Pextfsl rd rs stop start + | PArithRR Asmvliw.Pfabsd rd rs => Pfabsd rd rs + | PArithRR Asmvliw.Pfabsw rd rs => Pfabsw rd rs + | PArithRR Asmvliw.Pfnegd rd rs => Pfnegd rd rs + | PArithRR Asmvliw.Pfnegw rd rs => Pfnegw rd rs + | PArithRR Asmvliw.Pfinvw rd rs => Pfinvw rd rs + | PArithRR Asmvliw.Pfnarrowdw rd rs => Pfnarrowdw rd rs + | PArithRR Asmvliw.Pfwidenlwd rd rs => Pfwidenlwd rd rs + | PArithRR Asmvliw.Pfloatuwrnsz rd rs => Pfloatuwrnsz rd rs + | PArithRR Asmvliw.Pfloatwrnsz rd rs => Pfloatwrnsz rd rs + | PArithRR Asmvliw.Pfloatudrnsz rd rs => Pfloatudrnsz rd rs + | PArithRR Asmvliw.Pfloatdrnsz rd rs => Pfloatdrnsz rd rs + | PArithRR Asmvliw.Pfixedwrzz rd rs => Pfixedwrzz rd rs + | PArithRR Asmvliw.Pfixeduwrzz rd rs => Pfixeduwrzz rd rs + | PArithRR Asmvliw.Pfixeddrzz rd rs => Pfixeddrzz rd rs + | PArithRR Asmvliw.Pfixedudrzz rd rs => Pfixedudrzz rd rs + | PArithRR Asmvliw.Pfixeddrzz_i32 rd rs => Pfixeddrzz_i32 rd rs + | PArithRR Asmvliw.Pfixedudrzz_i32 rd rs => Pfixedudrzz_i32 rd rs + + (* RI32 *) + | PArithRI32 Asmvliw.Pmake rd imm => Pmake rd imm + + (* RI64 *) + | PArithRI64 Asmvliw.Pmakel rd imm => Pmakel rd imm + + (* RF32 *) + | PArithRF32 Asmvliw.Pmakefs rd imm => Pmakefs rd imm + + (* RF64 *) + | PArithRF64 Asmvliw.Pmakef rd imm => Pmakef rd imm + + (* RRR *) + | PArithRRR (Asmvliw.Pcompw it) rd rs1 rs2 => Pcompw it rd rs1 rs2 + | PArithRRR (Asmvliw.Pcompl it) rd rs1 rs2 => Pcompl it rd rs1 rs2 + | PArithRRR (Asmvliw.Pfcompw ft) rd rs1 rs2 => Pfcompw ft rd rs1 rs2 + | PArithRRR (Asmvliw.Pfcompl ft) rd rs1 rs2 => Pfcompl ft rd rs1 rs2 + | PArithRRR Asmvliw.Paddw rd rs1 rs2 => Paddw rd rs1 rs2 + | PArithRRR (Asmvliw.Paddxw shift) rd rs1 rs2 => Paddxw shift rd rs1 rs2 + | PArithRRR Asmvliw.Psubw rd rs1 rs2 => Psubw rd rs1 rs2 + | PArithRRR (Asmvliw.Prevsubxw shift) rd rs1 rs2 => Prevsubxw shift rd rs1 rs2 + | PArithRRR Asmvliw.Pmulw rd rs1 rs2 => Pmulw rd rs1 rs2 + | PArithRRR Asmvliw.Pandw rd rs1 rs2 => Pandw rd rs1 rs2 + | PArithRRR Asmvliw.Pnandw rd rs1 rs2 => Pnandw rd rs1 rs2 + | PArithRRR Asmvliw.Porw rd rs1 rs2 => Porw rd rs1 rs2 + | PArithRRR Asmvliw.Pnorw rd rs1 rs2 => Pnorw rd rs1 rs2 + | PArithRRR Asmvliw.Pxorw rd rs1 rs2 => Pxorw rd rs1 rs2 + | PArithRRR Asmvliw.Pnxorw rd rs1 rs2 => Pnxorw rd rs1 rs2 + | PArithRRR Asmvliw.Pandnw rd rs1 rs2 => Pandnw rd rs1 rs2 + | PArithRRR Asmvliw.Pornw rd rs1 rs2 => Pornw rd rs1 rs2 + | PArithRRR Asmvliw.Psraw rd rs1 rs2 => Psraw rd rs1 rs2 + | PArithRRR Asmvliw.Psrxw rd rs1 rs2 => Psrxw rd rs1 rs2 + | PArithRRR Asmvliw.Psrlw rd rs1 rs2 => Psrlw rd rs1 rs2 + | PArithRRR Asmvliw.Psllw rd rs1 rs2 => Psllw rd rs1 rs2 + + | PArithRRR Asmvliw.Paddl rd rs1 rs2 => Paddl rd rs1 rs2 + | PArithRRR (Asmvliw.Paddxl shift) rd rs1 rs2 => Paddxl shift rd rs1 rs2 + | PArithRRR Asmvliw.Psubl rd rs1 rs2 => Psubl rd rs1 rs2 + | PArithRRR (Asmvliw.Prevsubxl shift) rd rs1 rs2 => Prevsubxl shift rd rs1 rs2 + | PArithRRR Asmvliw.Pandl rd rs1 rs2 => Pandl rd rs1 rs2 + | PArithRRR Asmvliw.Pnandl rd rs1 rs2 => Pnandl rd rs1 rs2 + | PArithRRR Asmvliw.Porl rd rs1 rs2 => Porl rd rs1 rs2 + | PArithRRR Asmvliw.Pnorl rd rs1 rs2 => Pnorl rd rs1 rs2 + | PArithRRR Asmvliw.Pxorl rd rs1 rs2 => Pxorl rd rs1 rs2 + | PArithRRR Asmvliw.Pnxorl rd rs1 rs2 => Pnxorl rd rs1 rs2 + | PArithRRR Asmvliw.Pandnl rd rs1 rs2 => Pandnl rd rs1 rs2 + | PArithRRR Asmvliw.Pornl rd rs1 rs2 => Pornl rd rs1 rs2 + | PArithRRR Asmvliw.Pmull rd rs1 rs2 => Pmull rd rs1 rs2 + | PArithRRR Asmvliw.Pslll rd rs1 rs2 => Pslll rd rs1 rs2 + | PArithRRR Asmvliw.Psrll rd rs1 rs2 => Psrll rd rs1 rs2 + | PArithRRR Asmvliw.Psral rd rs1 rs2 => Psral rd rs1 rs2 + | PArithRRR Asmvliw.Psrxl rd rs1 rs2 => Psrxl rd rs1 rs2 + + | PArithRRR Asmvliw.Pfaddd rd rs1 rs2 => Pfaddd rd rs1 rs2 + | PArithRRR Asmvliw.Pfaddw rd rs1 rs2 => Pfaddw rd rs1 rs2 + | PArithRRR Asmvliw.Pfsbfd rd rs1 rs2 => Pfsbfd rd rs1 rs2 + | PArithRRR Asmvliw.Pfsbfw rd rs1 rs2 => Pfsbfw rd rs1 rs2 + | PArithRRR Asmvliw.Pfmuld rd rs1 rs2 => Pfmuld rd rs1 rs2 + | PArithRRR Asmvliw.Pfmulw rd rs1 rs2 => Pfmulw rd rs1 rs2 + | PArithRRR Asmvliw.Pfmind rd rs1 rs2 => Pfmind rd rs1 rs2 + | PArithRRR Asmvliw.Pfminw rd rs1 rs2 => Pfminw rd rs1 rs2 + | PArithRRR Asmvliw.Pfmaxd rd rs1 rs2 => Pfmaxd rd rs1 rs2 + | PArithRRR Asmvliw.Pfmaxw rd rs1 rs2 => Pfmaxw rd rs1 rs2 + + (* RRI32 *) + | PArithRRI32 (Asmvliw.Pcompiw it) rd rs imm => Pcompiw it rd rs imm + | PArithRRI32 Asmvliw.Paddiw rd rs imm => Paddiw rd rs imm + | PArithRRI32 (Asmvliw.Paddxiw shift) rd rs imm => Paddxiw shift rd rs imm + | PArithRRI32 Asmvliw.Prevsubiw rd rs imm => Prevsubiw rd rs imm + | PArithRRI32 (Asmvliw.Prevsubxiw shift) rd rs imm => Prevsubxiw shift rd rs imm + | PArithRRI32 Asmvliw.Pmuliw rd rs imm => Pmuliw rd rs imm + | PArithRRI32 Asmvliw.Pandiw rd rs imm => Pandiw rd rs imm + | PArithRRI32 Asmvliw.Pnandiw rd rs imm => Pnandiw rd rs imm + | PArithRRI32 Asmvliw.Poriw rd rs imm => Poriw rd rs imm + | PArithRRI32 Asmvliw.Pnoriw rd rs imm => Pnoriw rd rs imm + | PArithRRI32 Asmvliw.Pxoriw rd rs imm => Pxoriw rd rs imm + | PArithRRI32 Asmvliw.Pnxoriw rd rs imm => Pnxoriw rd rs imm + | PArithRRI32 Asmvliw.Pandniw rd rs imm => Pandniw rd rs imm + | PArithRRI32 Asmvliw.Porniw rd rs imm => Porniw rd rs imm + | PArithRRI32 Asmvliw.Psraiw rd rs imm => Psraiw rd rs imm + | PArithRRI32 Asmvliw.Psrxiw rd rs imm => Psrxiw rd rs imm + | PArithRRI32 Asmvliw.Psrliw rd rs imm => Psrliw rd rs imm + | PArithRRI32 Asmvliw.Pslliw rd rs imm => Pslliw rd rs imm + | PArithRRI32 Asmvliw.Proriw rd rs imm => Proriw rd rs imm + | PArithRRI32 Asmvliw.Psllil rd rs imm => Psllil rd rs imm + | PArithRRI32 Asmvliw.Psrlil rd rs imm => Psrlil rd rs imm + | PArithRRI32 Asmvliw.Psrxil rd rs imm => Psrxil rd rs imm + | PArithRRI32 Asmvliw.Psrail rd rs imm => Psrail rd rs imm + + (* RRI64 *) + | PArithRRI64 (Asmvliw.Pcompil it) rd rs imm => Pcompil it rd rs imm + | PArithRRI64 Asmvliw.Paddil rd rs imm => Paddil rd rs imm + | PArithRRI64 (Asmvliw.Paddxil shift) rd rs imm => Paddxil shift rd rs imm + | PArithRRI64 Asmvliw.Prevsubil rd rs imm => Prevsubil rd rs imm + | PArithRRI64 (Asmvliw.Prevsubxil shift) rd rs imm => Prevsubxil shift rd rs imm + | PArithRRI64 Asmvliw.Pmulil rd rs imm => Pmulil rd rs imm + | PArithRRI64 Asmvliw.Pandil rd rs imm => Pandil rd rs imm + | PArithRRI64 Asmvliw.Pnandil rd rs imm => Pnandil rd rs imm + | PArithRRI64 Asmvliw.Poril rd rs imm => Poril rd rs imm + | PArithRRI64 Asmvliw.Pnoril rd rs imm => Pnoril rd rs imm + | PArithRRI64 Asmvliw.Pxoril rd rs imm => Pxoril rd rs imm + | PArithRRI64 Asmvliw.Pnxoril rd rs imm => Pnxoril rd rs imm + | PArithRRI64 Asmvliw.Pandnil rd rs imm => Pandnil rd rs imm + | PArithRRI64 Asmvliw.Pornil rd rs imm => Pornil rd rs imm + + (** ARRR *) + | PArithARRR Asmvliw.Pmaddw rd rs1 rs2 => Pmaddw rd rs1 rs2 + | PArithARRR Asmvliw.Pmaddl rd rs1 rs2 => Pmaddl rd rs1 rs2 + | PArithARRR Asmvliw.Pmsubw rd rs1 rs2 => Pmsubw rd rs1 rs2 + | PArithARRR Asmvliw.Pmsubl rd rs1 rs2 => Pmsubl rd rs1 rs2 + | PArithARRR Asmvliw.Pfmaddfw rd rs1 rs2 => Pfmaddfw rd rs1 rs2 + | PArithARRR Asmvliw.Pfmaddfl rd rs1 rs2 => Pfmaddfl rd rs1 rs2 + | PArithARRR Asmvliw.Pfmsubfw rd rs1 rs2 => Pfmsubfw rd rs1 rs2 + | PArithARRR Asmvliw.Pfmsubfl rd rs1 rs2 => Pfmsubfl rd rs1 rs2 + | PArithARRR (Asmvliw.Pcmove cond) rd rs1 rs2=> Pcmove cond rd rs1 rs2 + | PArithARRR (Asmvliw.Pcmoveu cond) rd rs1 rs2=> Pcmoveu cond rd rs1 rs2 + + (** ARR *) + | PArithARR (Asmvliw.Pinsf stop start) rd rs => Pinsf rd rs stop start + | PArithARR (Asmvliw.Pinsfl stop start) rd rs => Pinsfl rd rs stop start + + (** ARRI32 *) + | PArithARRI32 Asmvliw.Pmaddiw rd rs1 imm => Pmaddiw rd rs1 imm + | PArithARRI32 (Asmvliw.Pcmoveiw cond) rd rs1 imm => Pcmoveiw cond rd rs1 imm + | PArithARRI32 (Asmvliw.Pcmoveuiw cond) rd rs1 imm => Pcmoveuiw cond rd rs1 imm + + (** ARRI64 *) + | PArithARRI64 Asmvliw.Pmaddil rd rs1 imm => Pmaddil rd rs1 imm + | PArithARRI64 (Asmvliw.Pcmoveil cond) rd rs1 imm => Pcmoveil cond rd rs1 imm + | PArithARRI64 (Asmvliw.Pcmoveuil cond) rd rs1 imm => Pcmoveuil cond rd rs1 imm + (** Load *) + | PLoadRRO trap Asmvliw.Plb rd ra ofs => Plb trap rd ra (AOff ofs) + | PLoadRRO trap Asmvliw.Plbu rd ra ofs => Plbu trap rd ra (AOff ofs) + | PLoadRRO trap Asmvliw.Plh rd ra ofs => Plh trap rd ra (AOff ofs) + | PLoadRRO trap Asmvliw.Plhu rd ra ofs => Plhu trap rd ra (AOff ofs) + | PLoadRRO trap Asmvliw.Plw rd ra ofs => Plw trap rd ra (AOff ofs) + | PLoadRRO trap Asmvliw.Plw_a rd ra ofs => Plw_a trap rd ra (AOff ofs) + | PLoadRRO trap Asmvliw.Pld rd ra ofs => Pld trap rd ra (AOff ofs) + | PLoadRRO trap Asmvliw.Pld_a rd ra ofs => Pld_a trap rd ra (AOff ofs) + | PLoadRRO trap Asmvliw.Pfls rd ra ofs => Pfls trap rd ra (AOff ofs) + | PLoadRRO trap Asmvliw.Pfld rd ra ofs => Pfld trap rd ra (AOff ofs) + + | PLoadQRRO qrs ra ofs => Plq qrs ra (AOff ofs) + | PLoadORRO qrs ra ofs => Plo qrs ra (AOff ofs) + + | PLoadRRR trap Asmvliw.Plb rd ra ro => Plb trap rd ra (AReg ro) + | PLoadRRR trap Asmvliw.Plbu rd ra ro => Plbu trap rd ra (AReg ro) + | PLoadRRR trap Asmvliw.Plh rd ra ro => Plh trap rd ra (AReg ro) + | PLoadRRR trap Asmvliw.Plhu rd ra ro => Plhu trap rd ra (AReg ro) + | PLoadRRR trap Asmvliw.Plw rd ra ro => Plw trap rd ra (AReg ro) + | PLoadRRR trap Asmvliw.Plw_a rd ra ro => Plw_a trap rd ra (AReg ro) + | PLoadRRR trap Asmvliw.Pld rd ra ro => Pld trap rd ra (AReg ro) + | PLoadRRR trap Asmvliw.Pld_a rd ra ro => Pld_a trap rd ra (AReg ro) + | PLoadRRR trap Asmvliw.Pfls rd ra ro => Pfls trap rd ra (AReg ro) + | PLoadRRR trap Asmvliw.Pfld rd ra ro => Pfld trap rd ra (AReg ro) + + | PLoadRRRXS trap Asmvliw.Plb rd ra ro => Plb trap rd ra (ARegXS ro) + | PLoadRRRXS trap Asmvliw.Plbu rd ra ro => Plbu trap rd ra (ARegXS ro) + | PLoadRRRXS trap Asmvliw.Plh rd ra ro => Plh trap rd ra (ARegXS ro) + | PLoadRRRXS trap Asmvliw.Plhu rd ra ro => Plhu trap rd ra (ARegXS ro) + | PLoadRRRXS trap Asmvliw.Plw rd ra ro => Plw trap rd ra (ARegXS ro) + | PLoadRRRXS trap Asmvliw.Plw_a rd ra ro => Plw_a trap rd ra (ARegXS ro) + | PLoadRRRXS trap Asmvliw.Pld rd ra ro => Pld trap rd ra (ARegXS ro) + | PLoadRRRXS trap Asmvliw.Pld_a rd ra ro => Pld_a trap rd ra (ARegXS ro) + | PLoadRRRXS trap Asmvliw.Pfls rd ra ro => Pfls trap rd ra (ARegXS ro) + | PLoadRRRXS trap Asmvliw.Pfld rd ra ro => Pfld trap rd ra (ARegXS ro) + + (** Store *) + | PStoreRRO Asmvliw.Psb rd ra ofs => Psb rd ra (AOff ofs) + | PStoreRRO Asmvliw.Psh rd ra ofs => Psh rd ra (AOff ofs) + | PStoreRRO Asmvliw.Psw rd ra ofs => Psw rd ra (AOff ofs) + | PStoreRRO Asmvliw.Psw_a rd ra ofs => Psw_a rd ra (AOff ofs) + | PStoreRRO Asmvliw.Psd rd ra ofs => Psd rd ra (AOff ofs) + | PStoreRRO Asmvliw.Psd_a rd ra ofs => Psd_a rd ra (AOff ofs) + | PStoreRRO Asmvliw.Pfss rd ra ofs => Pfss rd ra (AOff ofs) + | PStoreRRO Asmvliw.Pfsd rd ra ofs => Pfsd rd ra (AOff ofs) + + | PStoreRRR Asmvliw.Psb rd ra ro => Psb rd ra (AReg ro) + | PStoreRRR Asmvliw.Psh rd ra ro => Psh rd ra (AReg ro) + | PStoreRRR Asmvliw.Psw rd ra ro => Psw rd ra (AReg ro) + | PStoreRRR Asmvliw.Psw_a rd ra ro => Psw_a rd ra (AReg ro) + | PStoreRRR Asmvliw.Psd rd ra ro => Psd rd ra (AReg ro) + | PStoreRRR Asmvliw.Psd_a rd ra ro => Psd_a rd ra (AReg ro) + | PStoreRRR Asmvliw.Pfss rd ra ro => Pfss rd ra (AReg ro) + | PStoreRRR Asmvliw.Pfsd rd ra ro => Pfsd rd ra (AReg ro) + + | PStoreRRRXS Asmvliw.Psb rd ra ro => Psb rd ra (ARegXS ro) + | PStoreRRRXS Asmvliw.Psh rd ra ro => Psh rd ra (ARegXS ro) + | PStoreRRRXS Asmvliw.Psw rd ra ro => Psw rd ra (ARegXS ro) + | PStoreRRRXS Asmvliw.Psw_a rd ra ro => Psw_a rd ra (ARegXS ro) + | PStoreRRRXS Asmvliw.Psd rd ra ro => Psd rd ra (ARegXS ro) + | PStoreRRRXS Asmvliw.Psd_a rd ra ro => Psd_a rd ra (ARegXS ro) + | PStoreRRRXS Asmvliw.Pfss rd ra ro => Pfss rd ra (ARegXS ro) + | PStoreRRRXS Asmvliw.Pfsd rd ra ro => Pfsd rd ra (ARegXS ro) + + | PStoreQRRO qrs ra ofs => Psq qrs ra (AOff ofs) + | PStoreORRO qrs ra ofs => Pso qrs ra (AOff ofs) + end. + +(** * Semantics (given through the existence of well-formed VLIW program) *) + +Section RELSEM. + +Definition code := list instruction. + +Fixpoint unfold_label (ll: list label) := + match ll with + | nil => nil + | l :: ll => Plabel l :: unfold_label ll + end. + +Fixpoint unfold_body (lb: list basic) := + match lb with + | nil => nil + | b :: lb => basic_to_instruction b :: unfold_body lb + end. + +Definition unfold_exit (oc: option control) := + match oc with + | None => nil + | Some c => control_to_instruction c :: nil + end. + +Definition unfold_bblock (b: bblock) := unfold_label (header b) ++ + (match (body b), (exit b) with + | (((Asmvliw.Pfreeframe _ _ | Asmvliw.Pallocframe _ _)::nil) as bo), None => + unfold_body bo + | bo, ex => unfold_body bo ++ unfold_exit ex ++ Psemi :: nil + end). + +Fixpoint unfold (lb: bblocks) := + match lb with + | nil => nil + | b :: lb => (unfold_bblock b) ++ unfold lb + end. + +Record function : Type := mkfunction { fn_sig: signature; fn_blocks: bblocks; fn_code: code; + correct: unfold fn_blocks = fn_code }. + +Definition fundef := AST.fundef function. +Definition program := AST.program fundef unit. +Definition genv := Genv.t fundef unit. + +Definition function_proj (f: function) := Asmvliw.mkfunction (fn_sig f) (fn_blocks f). + +Definition fundef_proj (fu: fundef) : Asmvliw.fundef := + match fu with + | Internal f => Internal (function_proj f) + | External ef => External ef + end. + +Definition globdef_proj (gd: globdef fundef unit) : globdef Asmvliw.fundef unit := + match gd with + | Gfun f => Gfun (fundef_proj f) + | Gvar gu => Gvar gu + end. + +Program Definition genv_trans (ge: genv) : Asmvliw.genv := + {| Genv.genv_public := Genv.genv_public ge; + Genv.genv_symb := Genv.genv_symb ge; + Genv.genv_defs := PTree.map1 globdef_proj (Genv.genv_defs ge); + Genv.genv_next := Genv.genv_next ge |}. +Next Obligation. + destruct ge. cbn in *. eauto. +Qed. Next Obligation. + destruct ge; cbn in *. + rewrite PTree.gmap1 in H. + destruct (genv_defs ! b) eqn:GEN. + - eauto. + - discriminate. +Qed. Next Obligation. + destruct ge; cbn in *. + eauto. +Qed. + +Fixpoint prog_defs_proj (l: list (ident * globdef fundef unit)) + : list (ident * globdef Asmvliw.fundef unit) := + match l with + | nil => nil + | (i, gd) :: l => (i, globdef_proj gd) :: prog_defs_proj l + end. + +Definition program_proj (p: program) : Asmvliw.program := + {| prog_defs := prog_defs_proj (prog_defs p); + prog_public := prog_public p; + prog_main := prog_main p + |}. + +End RELSEM. + +Definition semantics (p: program) := Asmvliw.semantics (program_proj p). + +(** Determinacy of the [Asm] semantics. *) + +Lemma semantics_determinate: forall p, determinate (semantics p). +Proof. + intros. apply semantics_determinate. +Qed. + +(** transf_program *) + +Program Definition transf_function (f: Asmvliw.function) : function := + {| fn_sig := Asmvliw.fn_sig f; fn_blocks := Asmvliw.fn_blocks f; + fn_code := unfold (Asmvliw.fn_blocks f) |}. + +Lemma transf_function_proj: forall f, function_proj (transf_function f) = f. +Proof. + intros f. destruct f as [sig blks]. unfold function_proj. cbn. auto. +Qed. + +Definition transf_fundef : Asmvliw.fundef -> fundef := AST.transf_fundef transf_function. + +Lemma transf_fundef_proj: forall f, fundef_proj (transf_fundef f) = f. +Proof. + intros f. destruct f as [f|e]; cbn; auto. + rewrite transf_function_proj. auto. +Qed. + +Definition transf_program : Asmvliw.program -> program := transform_program transf_fundef. + +Lemma program_equals {A B: Type} : forall (p1 p2: AST.program A B), + prog_defs p1 = prog_defs p2 -> + prog_public p1 = prog_public p2 -> + prog_main p1 = prog_main p2 -> + p1 = p2. +Proof. + intros. destruct p1. destruct p2. cbn in *. subst. auto. +Qed. + +Lemma transf_program_proj: forall p, program_proj (transf_program p) = p. +Proof. + intros p. destruct p as [defs pub main]. unfold program_proj. cbn. + apply program_equals; cbn; auto. + induction defs. + - cbn; auto. + - cbn. rewrite IHdefs. + destruct a as [id gd]; cbn. + destruct gd as [f|v]; cbn; auto. + rewrite transf_fundef_proj. auto. +Qed. + +Definition match_prog (p: Asmvliw.program) (tp: program) := + match_program (fun _ f tf => tf = transf_fundef f) eq p tp. + +Lemma transf_program_match: + forall p tp, transf_program p = tp -> match_prog p tp. +Proof. + intros. rewrite <- H. eapply match_transform_program; eauto. +Qed. + +Lemma cons_extract {A: Type} : forall (l: list A) a b, a = b -> a::l = b::l. +Proof. + intros. congruence. +Qed. + +Lemma match_program_transf: + forall p tp, match_prog p tp -> transf_program p = tp. +Proof. + intros p tp H. inversion_clear H. inv H1. + destruct p as [defs pub main]. destruct tp as [tdefs tpub tmain]. cbn in *. + subst. unfold transf_program. unfold transform_program. cbn. + apply program_equals; cbn; auto. + induction H0; cbn; auto. + rewrite IHlist_forall2. apply cons_extract. + destruct a1 as [ida gda]. destruct b1 as [idb gdb]. + cbn in *. + inv H. inv H2. + - cbn in *. subst. auto. + - cbn in *. subst. inv H. auto. +Qed. + +Section PRESERVATION. + +Variable prog: Asmvliw.program. +Variable tprog: program. +Hypothesis TRANSF: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Definition match_states (s1 s2: state) := s1 = s2. + +Lemma symbols_preserved: + forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof (Genv.find_symbol_match TRANSF). + +Lemma senv_preserved: + Senv.equiv ge tge. +Proof (Genv.senv_match TRANSF). + + +Theorem transf_program_correct: + forward_simulation (Asmvliw.semantics prog) (semantics tprog). +Proof. + pose proof (match_program_transf prog tprog TRANSF) as TR. + subst. unfold semantics. rewrite transf_program_proj. + + eapply forward_simulation_step with (match_states := match_states); cbn; auto. + - intros. exists s1. split; auto. congruence. + - intros. inv H. auto. + - intros. exists s1'. inv H0. split; auto. congruence. +Qed. + +End PRESERVATION. diff --git a/kvx/AsmToJSON.ml b/kvx/AsmToJSON.ml new file mode 100644 index 00000000..8a6a97a7 --- /dev/null +++ b/kvx/AsmToJSON.ml @@ -0,0 +1,23 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Bernhard Schommer, AbsInt Angewandte Informatik GmbH *) +(* *) +(* AbsInt Angewandte Informatik GmbH. All rights reserved. This file *) +(* is distributed under the terms of the INRIA Non-Commercial *) +(* License Agreement. *) +(* *) +(* *********************************************************************) + +(* Simple functions to serialize RISC-V Asm to JSON *) + +(* Dummy function *) +let destination: string option ref = ref None + +let sdump_folder = ref "" + +let print_if prog sourcename = + () + +let pp_mnemonics pp = () diff --git a/kvx/Asmaux.v b/kvx/Asmaux.v new file mode 100644 index 00000000..2abd445e --- /dev/null +++ b/kvx/Asmaux.v @@ -0,0 +1,19 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Asm. +Require Import AST. + +(** Constant only needed by Asmexpandaux.ml *) +Program Definition dummy_function := {| fn_code := nil; fn_sig := signature_main; fn_blocks := nil |}. diff --git a/kvx/Asmblock.v b/kvx/Asmblock.v new file mode 100644 index 00000000..17ebac32 --- /dev/null +++ b/kvx/Asmblock.v @@ -0,0 +1,394 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Sequential block semantics for KVX assembly. The syntax is given in AsmVLIW *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Memory. +Require Import Events. +Require Import Globalenvs. +Require Import Smallstep. +Require Import Locations. +Require Stacklayout. +Require Import Conventions. +Require Import Errors. +Require Export Asmvliw. +Require Import Lia. + +(* Notations necessary to hook Asmvliw definitions *) +Notation undef_caller_save_regs := Asmvliw.undef_caller_save_regs. +Notation regset := Asmvliw.regset. +Notation extcall_arg := Asmvliw.extcall_arg. +Notation extcall_arg_pair := Asmvliw.extcall_arg_pair. +Notation extcall_arguments := Asmvliw.extcall_arguments. +Notation set_res := Asmvliw.set_res. +Notation function := Asmvliw.function. +Notation bblocks := Asmvliw.bblocks. +Notation header := Asmvliw.header. +Notation body := Asmvliw.body. +Notation exit := Asmvliw.exit. +Notation correct := Asmvliw.correct. + +(** * Auxiliary utilies on basic blocks *) + +(** ** A unified view of Kalray instructions *) + +Inductive instruction : Type := + | PBasic (i: basic) + | PControl (i: control) +. + +Coercion PBasic: basic >-> instruction. +Coercion PControl: control >-> instruction. + +Definition code := list instruction. +Definition bcode := list basic. + +Fixpoint basics_to_code (l: list basic) := + match l with + | nil => nil + | bi::l => (PBasic bi)::(basics_to_code l) + end. + +Fixpoint code_to_basics (c: code) := + match c with + | (PBasic i)::c => + match code_to_basics c with + | None => None + | Some l => Some (i::l) + end + | _::c => None + | nil => Some nil + end. + +Lemma code_to_basics_id: forall c, code_to_basics (basics_to_code c) = Some c. +Proof. + intros. induction c as [|i c]; cbn; auto. + rewrite IHc. auto. +Qed. + +Lemma code_to_basics_dist: + forall c c' l l', + code_to_basics c = Some l -> + code_to_basics c' = Some l' -> + code_to_basics (c ++ c') = Some (l ++ l'). +Proof. + induction c as [|i c]; cbn; auto. + - intros. inv H. cbn. auto. + - intros. destruct i; try discriminate. destruct (code_to_basics c) eqn:CTB; try discriminate. + inv H. erewrite IHc; eauto. auto. +Qed. + +(** + Asmblockgen will have to translate a Mach control into a list of instructions of the form + i1 :: i2 :: i3 :: ctl :: nil ; where i1..i3 are basic instructions, ctl is a control instruction + These functions provide way to extract the basic / control instructions +*) + +Fixpoint extract_basic (c: code) := + match c with + | nil => nil + | PBasic i :: c => i :: (extract_basic c) + | PControl i :: c => nil + end. + +Fixpoint extract_ctl (c: code) := + match c with + | nil => None + | PBasic i :: c => extract_ctl c + | PControl i :: nil => Some i + | PControl i :: _ => None (* if the first found control instruction isn't the last *) + end. + +(** ** Wellformness of basic blocks *) + +Ltac exploreInst := + repeat match goal with + | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var + | [ H : OK _ = OK _ |- _ ] => monadInv H + | [ |- context[if ?b then _ else _] ] => destruct b + | [ |- context[match ?m with | _ => _ end] ] => destruct m + | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m + | [ H : bind _ _ = OK _ |- _ ] => monadInv H + | [ H : Error _ = OK _ |- _ ] => inversion H + end. + +Definition non_empty_bblock (body: list basic) (exit: option control): Prop + := body <> nil \/ exit <> None. + +Lemma non_empty_bblock_refl: + forall body exit, + non_empty_bblock body exit <-> + Is_true (non_empty_bblockb body exit). +Proof. + intros. split. + - destruct body; destruct exit. + all: cbn; auto. intros. inversion H; contradiction. + - destruct body; destruct exit. + all: cbn; auto. + all: intros; try (right; discriminate); try (left; discriminate). + contradiction. +Qed. + +Definition builtin_alone (body: list basic) (exit: option control) := forall ef args res, + exit = Some (PExpand (Pbuiltin ef args res)) -> body = nil. + + +Lemma builtin_alone_refl: + forall body exit, + builtin_alone body exit <-> Is_true (builtin_aloneb body exit). +Proof. + intros. split. + - destruct body; destruct exit. + all: cbn; auto. + all: exploreInst; cbn; auto. + unfold builtin_alone. intros. assert (Some (Pbuiltin e l b0) = Some (Pbuiltin e l b0)); auto. + assert (b :: body = nil). eapply H; eauto. discriminate. + - destruct body; destruct exit. + all: cbn; auto; try constructor. + + exploreInst; try discriminate. + cbn. contradiction. + + intros. discriminate. +Qed. + +Definition wf_bblock (body: list basic) (exit: option control) := + non_empty_bblock body exit /\ builtin_alone body exit. + +Lemma wf_bblock_refl: + forall body exit, + wf_bblock body exit <-> Is_true (wf_bblockb body exit). +Proof. + intros. split. + - intros. inv H. apply non_empty_bblock_refl in H0. apply builtin_alone_refl in H1. + apply andb_prop_intro. auto. + - intros. apply andb_prop_elim in H. inv H. + apply non_empty_bblock_refl in H0. apply builtin_alone_refl in H1. + unfold wf_bblock. split; auto. +Qed. + +Ltac bblock_auto_correct := (apply non_empty_bblock_refl; try discriminate; try (left; discriminate); try (right; discriminate)). + +Lemma Istrue_proof_irrelevant (b: bool): forall (p1 p2:Is_true b), p1=p2. +Proof. + destruct b; cbn; auto. + - destruct p1, p2; auto. + - destruct p1. +Qed. + +Lemma bblock_equality bb1 bb2: header bb1=header bb2 -> body bb1 = body bb2 -> exit bb1 = exit bb2 -> bb1 = bb2. +Proof. + destruct bb1 as [h1 b1 e1 c1], bb2 as [h2 b2 e2 c2]; cbn. + intros; subst. + rewrite (Istrue_proof_irrelevant _ c1 c2). + auto. +Qed. + +Program Definition bblock_single_inst (i: instruction) := + match i with + | PBasic b => {| header:=nil; body:=(b::nil); exit:=None |} + | PControl ctl => {| header:=nil; body:=nil; exit:=(Some ctl) |} + end. +Next Obligation. + apply wf_bblock_refl. constructor. + right. discriminate. + constructor. +Qed. + +Lemma length_nonil {A: Type} : forall l:(list A), l <> nil -> (length l > 0)%nat. +Proof. + intros. destruct l; try (contradict H; auto; fail). + cbn. lia. +Qed. + +Lemma to_nat_pos : forall z:Z, (Z.to_nat z > 0)%nat -> z > 0. +Proof. + intros. destruct z; auto. + - contradict H. cbn. apply gt_irrefl. + - apply Zgt_pos_0. + - contradict H. cbn. apply gt_irrefl. +Qed. + +Lemma size_positive (b:bblock): size b > 0. +Proof. + unfold size. destruct b as [hd bdy ex cor]. cbn. + destruct ex; destruct bdy; try (apply to_nat_pos; rewrite Nat2Z.id; cbn; lia). + inversion cor; contradict H; cbn; auto. +Qed. + + +Program Definition no_header (bb : bblock) := {| header := nil; body := body bb; exit := exit bb |}. +Next Obligation. + destruct bb; cbn. assumption. +Defined. + +Lemma no_header_size: + forall bb, size (no_header bb) = size bb. +Proof. + intros. destruct bb as [hd bdy ex COR]. unfold no_header. cbn. reflexivity. +Qed. + +Program Definition stick_header (h : list label) (bb : bblock) := {| header := h; body := body bb; exit := exit bb |}. +Next Obligation. + destruct bb; cbn. assumption. +Defined. + +Lemma stick_header_size: + forall h bb, size (stick_header h bb) = size bb. +Proof. + intros. destruct bb. unfold stick_header. cbn. reflexivity. +Qed. + +Lemma stick_header_no_header: + forall bb, stick_header (header bb) (no_header bb) = bb. +Proof. + intros. destruct bb as [hd bdy ex COR]. cbn. unfold no_header; unfold stick_header; cbn. reflexivity. +Qed. + +(** * Sequential Semantics of basic blocks *) +Section RELSEM. + +(** Execution of arith instructions *) + +Variable ge: genv. + +Definition exec_arith_instr (ai: ar_instruction) (rs: regset): regset := parexec_arith_instr ge ai rs rs. + +(** Auxiliaries for memory accesses *) + +Definition exec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rs: regset) (m: mem) (d a: ireg) (ofs: offset) := parexec_load_offset trap chunk rs rs m m d a ofs. + +Definition exec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ireg) := parexec_load_reg trap chunk rs rs m m d a ro. + +Definition exec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ireg) := parexec_load_regxs trap chunk rs rs m m d a ro. + +Definition exec_load_q_offset (rs: regset) (m: mem) (d : gpreg_q) (a: ireg) (ofs: offset) := parexec_load_q_offset rs rs m m d a ofs. + +Definition exec_load_o_offset (rs: regset) (m: mem) (d : gpreg_o) (a: ireg) (ofs: offset) := parexec_load_o_offset rs rs m m d a ofs. + +Definition exec_store_offset (chunk: memory_chunk) (rs: regset) (m: mem) (s a: ireg) (ofs: offset) := parexec_store_offset chunk rs rs m m s a ofs. + +Definition exec_store_q_offset (rs: regset) (m: mem) (s : gpreg_q) (a: ireg) (ofs: offset) := parexec_store_q_offset rs rs m m s a ofs. + +Definition exec_store_o_offset (rs: regset) (m: mem) (s : gpreg_o) (a: ireg) (ofs: offset) := parexec_store_o_offset rs rs m m s a ofs. + +Definition exec_store_reg (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro: ireg) := parexec_store_reg chunk rs rs m m s a ro. + +Definition exec_store_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro: ireg) := parexec_store_regxs chunk rs rs m m s a ro. + +(** * basic instructions *) + +Definition exec_basic_instr (bi: basic) (rs: regset) (m: mem) : outcome := bstep ge bi rs rs m m. + +Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome := + match body with + | nil => Next rs m + | bi::body' => + match exec_basic_instr bi rs m with + | Next rs' m' => exec_body body' rs' m' + | Stuck => Stuck + end + end. + + +Theorem builtin_body_nil: + forall bb ef args res, exit bb = Some (PExpand (Pbuiltin ef args res)) -> body bb = nil. +Proof. + intros. destruct bb as [hd bdy ex WF]. cbn in *. + apply wf_bblock_refl in WF. inv WF. unfold builtin_alone in H1. + eapply H1; eauto. +Qed. + +Theorem exec_body_app: + forall l l' rs m rs'' m'', + exec_body (l ++ l') rs m = Next rs'' m'' -> + exists rs' m', + exec_body l rs m = Next rs' m' + /\ exec_body l' rs' m' = Next rs'' m''. +Proof. + induction l. + - intros. cbn in H. repeat eexists. auto. + - intros. rewrite <- app_comm_cons in H. cbn in H. + destruct (exec_basic_instr a rs m) eqn:EXEBI. + + apply IHl in H. destruct H as (rs1 & m1 & EXEB1 & EXEB2). + repeat eexists. cbn. rewrite EXEBI. eauto. auto. + + discriminate. +Qed. + +(** Position corresponding to a label *) + +Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) : outcome := par_goto_label f lbl rs rs m. + +Definition eval_branch (f: function) (l: label) (rs: regset) (m: mem) (res: option bool) : outcome := par_eval_branch f l rs rs m res. + +Definition exec_control (f: function) (oc: option control) (rs: regset) (m: mem) : outcome := parexec_control ge f oc rs rs m. + +Definition exec_bblock (f: function) (b: bblock) (rs0: regset) (m: mem) : outcome := + match exec_body (body b) rs0 m with + | Next rs' m' => + let rs1 := nextblock b rs' in exec_control f (exit b) rs1 m' + | Stuck => Stuck + end. + + +(** Execution of the instruction at [rs PC]. *) + +Inductive step: state -> trace -> state -> Prop := + | exec_step_internal: + forall b ofs f bi rs m rs' m', + rs PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal f) -> + find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bi -> + exec_bblock f bi rs m = Next rs' m' -> + step (State rs m) E0 (State rs' m') + | exec_step_builtin: + forall b ofs f ef args res rs m vargs t vres rs' m' bi, + rs PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal f) -> + find_bblock (Ptrofs.unsigned ofs) f.(fn_blocks) = Some bi -> + exit bi = Some (PExpand (Pbuiltin ef args res)) -> + eval_builtin_args ge rs (rs SP) m args vargs -> + external_call ef ge vargs m t vres m' -> + rs' = nextblock bi + (set_res res vres + (undef_regs (map preg_of (destroyed_by_builtin ef)) + (rs#RTMP <- Vundef))) -> + step (State rs m) t (State rs' m') + | exec_step_external: + forall b ef args res rs m t rs' m', + rs PC = Vptr b Ptrofs.zero -> + Genv.find_funct_ptr ge b = Some (External ef) -> + external_call ef ge args m t res m' -> + extcall_arguments rs m (ef_sig ef) args -> + rs' = (set_pair (loc_external_result (ef_sig ef) ) res (undef_caller_save_regs rs))#PC <- (rs RA) -> + step (State rs m) t (State rs' m') + . + +End RELSEM. + +Definition semantics (p: program) := + Semantics step (initial_state p) final_state (Genv.globalenv p). + +Definition data_preg (r: preg) : bool := + match r with + | RA => false + | IR GPRA => false + | IR RTMP => false + | IR _ => true + | PC => false + end. diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v new file mode 100644 index 00000000..a9786e0a --- /dev/null +++ b/kvx/Asmblockdeps.v @@ -0,0 +1,1845 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** * Translation from [Asmvliw] to [AbstractBB] *) + +(** We define a specific instance [L] of [AbstractBB] and translate [bblocks] from [Asmvliw] into [L]. + [AbstractBB] will then define two semantics for [L]: a sequential and a parallel one. + We prove a bisimulation between the parallel semantics of [L] and [AsmVLIW]. + We also prove a bisimulation between the sequential semantics of [L] and [Asmblock]. + Then, the checkers on [Asmblock] and [Asmvliw] are deduced from those of [L]. + *) + +Require Import AST. +Require Import Asmblock. +Require Import Asmblockgenproof0 Asmblockprops. +Require Import Values. +Require Import Globalenvs. +Require Import Memory. +Require Import Errors. +Require Import Integers. +Require Import Floats. +Require Import ZArith. +Require Import Coqlib. +Require Import ImpSimuTest. +Require Import Axioms. +Require Import Parallelizability. +Require Import Asmvliw Permutation. +Require Import Chunks. + +Require Import Lia. + + +Import ListNotations. +Local Open Scope list_scope. + +Open Scope impure. + +(** Definition of [L] *) + +Module P<: ImpParam. +Module R := Pos. + +Section IMPPARAM. + +Definition env := Genv.t fundef unit. + +Inductive genv_wrap := Genv (ge: env) (fn: function). +Definition genv := genv_wrap. + +Variable Ge: genv. + +Inductive value_wrap := + | Val (v: val) + | Memstate (m: mem) +. + +Definition value := value_wrap. + +Inductive control_op := + | Oj_l (l: label) + | Ocb (bt: btest) (l: label) + | Ocbu (bt: btest) (l: label) + | Odiv + | Odivu + | OError + | OIncremPC (sz: Z) + | Ojumptable (l: list label) +. + +Inductive arith_op := + | OArithR (n: arith_name_r) + | OArithRR (n: arith_name_rr) + | OArithRI32 (n: arith_name_ri32) (imm: int) + | OArithRI64 (n: arith_name_ri64) (imm: int64) + | OArithRF32 (n: arith_name_rf32) (imm: float32) + | OArithRF64 (n: arith_name_rf64) (imm: float) + | OArithRRR (n: arith_name_rrr) + | OArithRRI32 (n: arith_name_rri32) (imm: int) + | OArithRRI64 (n: arith_name_rri64) (imm: int64) + | OArithARRR (n: arith_name_arrr) + | OArithARR (n: arith_name_arr) + | OArithARRI32 (n: arith_name_arri32) (imm: int) + | OArithARRI64 (n: arith_name_arri64) (imm: int64) +. + +Coercion OArithR: arith_name_r >-> arith_op. +Coercion OArithRR: arith_name_rr >-> arith_op. +Coercion OArithRI32: arith_name_ri32 >-> Funclass. +Coercion OArithRI64: arith_name_ri64 >-> Funclass. +Coercion OArithRF32: arith_name_rf32 >-> Funclass. +Coercion OArithRF64: arith_name_rf64 >-> Funclass. +Coercion OArithRRR: arith_name_rrr >-> arith_op. +Coercion OArithRRI32: arith_name_rri32 >-> Funclass. +Coercion OArithRRI64: arith_name_rri64 >-> Funclass. + +Inductive load_op := + | OLoadRRO (n: load_name) (trap: trapping_mode) (ofs: offset) + | OLoadRRR (n: load_name) (trap: trapping_mode) + | OLoadRRRXS (n: load_name) (trap: trapping_mode) +. + +Coercion OLoadRRO: load_name >-> Funclass. + +Inductive store_op := + | OStoreRRO (n: store_name) (ofs: offset) + | OStoreRRR (n: store_name) + | OStoreRRRXS (n: store_name) +. + +Coercion OStoreRRO: store_name >-> Funclass. + +Inductive op_wrap := + | Arith (ao: arith_op) + | Load (lo: load_op) + | Store (so: store_op) + | Control (co: control_op) + | Allocframe (sz: Z) (pos: ptrofs) + | Allocframe2 (sz: Z) (pos: ptrofs) + | Freeframe (sz: Z) (pos: ptrofs) + | Freeframe2 (sz: Z) (pos: ptrofs) + | Constant (v: val) + | Fail +. + +Coercion Arith: arith_op >-> op_wrap. +Coercion Load: load_op >-> op_wrap. +Coercion Store: store_op >-> op_wrap. +Coercion Control: control_op >-> op_wrap. + +Definition op := op_wrap. + +Definition arith_eval (ao: arith_op) (l: list value) := + let (ge, fn) := Ge in + match ao, l with + | OArithR n, [] => Some (Val (arith_eval_r ge n)) + + | OArithRR n, [Val v] => Some (Val (arith_eval_rr n v)) + + | OArithRI32 n i, [] => Some (Val (arith_eval_ri32 n i)) + | OArithRI64 n i, [] => Some (Val (arith_eval_ri64 n i)) + | OArithRF32 n i, [] => Some (Val (arith_eval_rf32 n i)) + | OArithRF64 n i, [] => Some (Val (arith_eval_rf64 n i)) + + | OArithRRR n, [Val v1; Val v2] => Some (Val (arith_eval_rrr n v1 v2)) + | OArithRRI32 n i, [Val v] => Some (Val (arith_eval_rri32 n v i)) + | OArithRRI64 n i, [Val v] => Some (Val (arith_eval_rri64 n v i)) + + | OArithARR n, [Val v1; Val v2] => Some (Val (arith_eval_arr n v1 v2)) + | OArithARRR n, [Val v1; Val v2; Val v3] => Some (Val (arith_eval_arrr n v1 v2 v3)) + | OArithARRI32 n i, [Val v1; Val v2] => Some (Val (arith_eval_arri32 n v1 v2 i)) + | OArithARRI64 n i, [Val v1; Val v2] => Some (Val (arith_eval_arri64 n v1 v2 i)) + + | _, _ => None + end. + +Definition exec_incorrect_load trap := + match trap with + | TRAP => None + | NOTRAP => Some (Val Vundef) + end. + +Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v: val) (ofs: offset) := + let (ge, fn) := Ge in + match (eval_offset ofs) with + | OK ptr => match Mem.loadv chunk m (Val.offset_ptr v ptr) with + | None => exec_incorrect_load trap + | Some vl => Some (Val vl) + end + | _ => None + end. + +Definition exec_load_deps_reg (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) := + match Mem.loadv chunk m (Val.addl v vo) with + | None => exec_incorrect_load trap + | Some vl => Some (Val vl) + end. + +Definition exec_load_deps_regxs (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) := + match Mem.loadv chunk m (Val.addl v (Val.shll vo (scale_of_chunk chunk))) with + | None => exec_incorrect_load trap + | Some vl => Some (Val vl) + end. + +Definition load_eval (lo: load_op) (l: list value) := + match lo, l with + | OLoadRRO n trap ofs, [Val v; Memstate m] => exec_load_deps_offset trap (load_chunk n) m v ofs + | OLoadRRR n trap, [Val v; Val vo; Memstate m] => exec_load_deps_reg trap (load_chunk n) m v vo + | OLoadRRRXS n trap, [Val v; Val vo; Memstate m] => exec_load_deps_regxs trap (load_chunk n) m v vo + | _, _ => None + end. + +Definition exec_store_deps_offset (chunk: memory_chunk) (m: mem) (vs va: val) (ofs: offset) := + let (ge, fn) := Ge in + match (eval_offset ofs) with + | OK ptr => match Mem.storev chunk m (Val.offset_ptr va ptr) vs with + | None => None + | Some m' => Some (Memstate m') + end + | _ => None + end. + +Definition exec_store_deps_reg (chunk: memory_chunk) (m: mem) (vs va vo: val) := + match Mem.storev chunk m (Val.addl va vo) vs with + | None => None + | Some m' => Some (Memstate m') + end. + +Definition exec_store_deps_regxs (chunk: memory_chunk) (m: mem) (vs va vo: val) := + match Mem.storev chunk m (Val.addl va (Val.shll vo (scale_of_chunk chunk))) vs with + | None => None + | Some m' => Some (Memstate m') + end. + +Definition store_eval (so: store_op) (l: list value) := + match so, l with + | OStoreRRO n ofs, [Val vs; Val va; Memstate m] => exec_store_deps_offset (store_chunk n) m vs va ofs + | OStoreRRR n, [Val vs; Val va; Val vo; Memstate m] => exec_store_deps_reg (store_chunk n) m vs va vo + | OStoreRRRXS n, [Val vs; Val va; Val vo; Memstate m] => exec_store_deps_regxs (store_chunk n) m vs va vo + | _, _ => None + end. + +Local Open Scope Z. + +Remark size_chunk_positive: forall chunk, + (size_chunk chunk) > 0. +Proof. + destruct chunk; simpl; lia. +Qed. + +Remark size_chunk_small: forall chunk, + (size_chunk chunk) <= 8. +Proof. + destruct chunk; simpl; lia. +Qed. + +Definition disjoint_chunks + (ofs1 : offset) (chunk1 : memory_chunk) + (ofs2 : offset) (chunk2 : memory_chunk) := + Intv.disjoint ((Ptrofs.unsigned ofs1), + ((Ptrofs.unsigned ofs1) + (size_chunk chunk1))) + ((Ptrofs.unsigned ofs2), + ((Ptrofs.unsigned ofs2) + (size_chunk chunk2))). + +Definition small_offset_threshold := 18446744073709551608. + +Lemma store_store_disjoint_offsets : + forall n1 n2 ofs1 ofs2 vs1 vs2 va m0 m1 m2 m1' m2', + (disjoint_chunks ofs1 (store_chunk n1) ofs2 (store_chunk n2)) -> + (Ptrofs.unsigned ofs1) < small_offset_threshold -> + (Ptrofs.unsigned ofs2) < small_offset_threshold -> + store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m0] = Some (Memstate m1) -> + store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m1] = Some (Memstate m2) -> + store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m0] = Some (Memstate m1') -> + store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m1'] = Some (Memstate m2') -> + m2 = m2'. +Proof. + intros until m2'. + intros DISJOINT SMALL1 SMALL2 STORE0 STORE1 STORE0' STORE1'. + unfold disjoint_chunks in DISJOINT. + destruct vs1 as [v1 | ]; simpl in STORE0, STORE1'; try congruence. + destruct vs2 as [v2 | ]; simpl in STORE1, STORE0'; try congruence. + destruct va as [base | ]; try congruence. + unfold exec_store_deps_offset in *. + destruct Ge. + unfold eval_offset in *; simpl in *. + unfold Mem.storev in *. + unfold Val.offset_ptr in *. + destruct base as [ | | | | | wblock wpofs] in * ; try congruence. + destruct (Mem.store _ _ _ _ _) eqn:E0; try congruence. + inv STORE0. + destruct (Mem.store (store_chunk n2) _ _ _ _) eqn:E1; try congruence. + inv STORE1. + destruct (Mem.store (store_chunk n2) m0 _ _ _) eqn:E0'; try congruence. + inv STORE0'. + destruct (Mem.store _ m1' _ _ _) eqn:E1'; try congruence. + inv STORE1'. + assert (Some m2 = Some m2'). + 2: congruence. + rewrite <- E1. + rewrite <- E1'. + eapply Mem.store_store_other. + 2, 3: eassumption. + + right. + pose proof (size_chunk_positive (store_chunk n1)). + pose proof (size_chunk_positive (store_chunk n2)). + pose proof (size_chunk_small (store_chunk n1)). + pose proof (size_chunk_small (store_chunk n2)). + destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]]; + unfold Intv.empty in DIS; simpl in DIS. + 1, 2: lia. + pose proof (Ptrofs.unsigned_range ofs1). + pose proof (Ptrofs.unsigned_range ofs2). + unfold small_offset_threshold in *. + destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1; + destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2; + change Ptrofs.modulus with 18446744073709551616 in *; + lia. +Qed. + +Lemma load_store_disjoint_offsets : + forall n1 n2 tm ofs1 ofs2 vs va m0 m1, + (disjoint_chunks ofs1 (store_chunk n1) ofs2 (load_chunk n2)) -> + (Ptrofs.unsigned ofs1) < small_offset_threshold -> + (Ptrofs.unsigned ofs2) < small_offset_threshold -> + store_eval (OStoreRRO n1 ofs1) [vs; va; Memstate m0] = Some (Memstate m1) -> + load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m1] = + load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m0]. +Proof. + intros until m1. + intros DISJOINT SMALL1 SMALL2 STORE0. + destruct vs as [v | ]; simpl in STORE0; try congruence. + destruct va as [base | ]; try congruence. + unfold exec_store_deps_offset in *. + unfold eval_offset in *; simpl in *. + unfold exec_load_deps_offset. + unfold Mem.storev, Mem.loadv in *. + destruct Ge in *. + unfold eval_offset in *. + unfold Val.offset_ptr in *. + destruct base as [ | | | | | wblock wpofs] in * ; try congruence. + destruct (Mem.store _ _ _ _) eqn:E0; try congruence. + inv STORE0. + assert ( + (Mem.load (load_chunk n2) m1 wblock + (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) = + (Mem.load (load_chunk n2) m0 wblock + (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) ) as LOADS. + { + eapply Mem.load_store_other. + eassumption. + right. + pose proof (size_chunk_positive (store_chunk n1)). + pose proof (size_chunk_positive (load_chunk n2)). + pose proof (size_chunk_small (store_chunk n1)). + pose proof (size_chunk_small (load_chunk n2)). + destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]]; + unfold Intv.empty in DIS; simpl in DIS. + 1,2: lia. + + pose proof (Ptrofs.unsigned_range ofs1). + pose proof (Ptrofs.unsigned_range ofs2). + unfold small_offset_threshold in *. + destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1; + destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2; + change Ptrofs.modulus with 18446744073709551616 in *; + lia. + } + destruct (Mem.load _ m1 _ _) in *; destruct (Mem.load _ m0 _ _) in *; congruence. +Qed. + +Definition goto_label_deps (f: function) (lbl: label) (vpc: val) := + match label_pos lbl 0 (fn_blocks f) with + | None => None + | Some pos => + match vpc with + | Vptr b ofs => Some (Val (Vptr b (Ptrofs.repr pos))) + | _ => None + end + end. + +Definition eval_branch_deps (f: function) (l: label) (vpc: val) (res: option bool) := + match res with + | Some true => goto_label_deps f l vpc + | Some false => Some (Val vpc) + | None => None + end. + +Definition control_eval (o: control_op) (l: list value) := + let (ge, fn) := Ge in + match o, l with + | (Ojumptable tbl), [Val index; Val vpc] => + match index with + | Vint n => + match list_nth_z tbl (Int.unsigned n) with + | None => None + | Some lbl => goto_label_deps fn lbl vpc + end + | _ => None + end + | Oj_l l, [Val vpc] => goto_label_deps fn l vpc + | Ocb bt l, [Val v; Val vpc] => + match cmp_for_btest bt with + | (Some c, Int) => eval_branch_deps fn l vpc (Val.cmp_bool c v (Vint (Int.repr 0))) + | (Some c, Long) => eval_branch_deps fn l vpc (Val.cmpl_bool c v (Vlong (Int64.repr 0))) + | (None, _) => None + end + | Ocbu bt l, [Val v; Val vpc] => + match cmpu_for_btest bt with + | (Some c, Int) => eval_branch_deps fn l vpc (Val.mxcmpu_bool c v (Vint (Int.repr 0))) + | (Some c, Long) => eval_branch_deps fn l vpc (Val.mxcmplu_bool c v (Vlong (Int64.repr 0))) + | (None, _) => None + end + | Odiv, [Val v1; Val v2] => + match Val.divs v1 v2 with + | Some v => Some (Val v) + | None => None + end + | Odivu, [Val v1; Val v2] => + match Val.divu v1 v2 with + | Some v => Some (Val v) + | None => None + end + | OIncremPC sz, [Val vpc] => Some (Val (Val.offset_ptr vpc (Ptrofs.repr sz))) + | OError, _ => None + | _, _ => None + end. + +Definition op_eval (o: op) (l: list value) := + match o, l with + | Arith o, l => arith_eval o l + | Load o, l => load_eval o l + | Store o, l => store_eval o l + | Control o, l => control_eval o l + | Allocframe sz pos, [Val spv; Memstate m] => + let (m1, stk) := Mem.alloc m 0 sz in + let sp := (Vptr stk Ptrofs.zero) in + match Mem.storev Mptr m1 (Val.offset_ptr sp pos) spv with + | None => None + | Some m => Some (Memstate m) + end + | Allocframe2 sz pos, [Val spv; Memstate m] => + let (m1, stk) := Mem.alloc m 0 sz in + let sp := (Vptr stk Ptrofs.zero) in + match Mem.storev Mptr m1 (Val.offset_ptr sp pos) spv with + | None => None + | Some m => Some (Val sp) + end + | Freeframe sz pos, [Val spv; Memstate m] => + match Mem.loadv Mptr m (Val.offset_ptr spv pos) with + | None => None + | Some v => + match spv with + | Vptr stk ofs => + match Mem.free m stk 0 sz with + | None => None + | Some m' => Some (Memstate m') + end + | _ => None + end + end + | Freeframe2 sz pos, [Val spv; Memstate m] => + match Mem.loadv Mptr m (Val.offset_ptr spv pos) with + | None => None + | Some v => + match spv with + | Vptr stk ofs => + match Mem.free m stk 0 sz with + | None => None + | Some m' => Some (Val v) + end + | _ => None + end + end + | Constant v, [] => Some (Val v) + | Fail, _ => None + | _, _ => None + end. + + +Definition arith_op_eq (o1 o2: arith_op): ?? bool := + match o1 with + | OArithR n1 => + match o2 with OArithR n2 => struct_eq n1 n2 | _ => RET false end + | OArithRR n1 => + match o2 with OArithRR n2 => phys_eq n1 n2 | _ => RET false end + | OArithRI32 n1 i1 => + match o2 with OArithRI32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end + | OArithRI64 n1 i1 => + match o2 with OArithRI64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end + | OArithRF32 n1 i1 => + match o2 with OArithRF32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end + | OArithRF64 n1 i1 => + match o2 with OArithRF64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end + | OArithRRR n1 => + match o2 with OArithRRR n2 => phys_eq n1 n2 | _ => RET false end + | OArithRRI32 n1 i1 => + match o2 with OArithRRI32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end + | OArithRRI64 n1 i1 => + match o2 with OArithRRI64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end + | OArithARRR n1 => + match o2 with OArithARRR n2 => phys_eq n1 n2 | _ => RET false end + | OArithARR n1 => + match o2 with OArithARR n2 => phys_eq n1 n2 | _ => RET false end + | OArithARRI32 n1 i1 => + match o2 with OArithARRI32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end + | OArithARRI64 n1 i1 => + match o2 with OArithARRI64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end + end. + +Ltac my_wlp_simplify := wlp_xsimplify ltac:(intros; subst; simpl in * |- *; congruence || intuition eauto with wlp). + +Lemma arith_op_eq_correct o1 o2: + WHEN arith_op_eq o1 o2 ~> b THEN b = true -> o1 = o2. +Proof. + destruct o1, o2; my_wlp_simplify; try congruence. +Qed. +Hint Resolve arith_op_eq_correct: wlp. +Opaque arith_op_eq_correct. + +Definition offset_eq (ofs1 ofs2 : offset): ?? bool := + RET (Ptrofs.eq ofs1 ofs2). + +Lemma offset_eq_correct ofs1 ofs2: + WHEN offset_eq ofs1 ofs2 ~> b THEN b = true -> ofs1 = ofs2. +Proof. + wlp_simplify. + pose (Ptrofs.eq_spec ofs1 ofs2). + rewrite H in *. + trivial. +Qed. +Hint Resolve offset_eq_correct: wlp. + +Definition trapping_mode_eq trap1 trap2 := + RET (match trap1, trap2 with + | TRAP, TRAP | NOTRAP, NOTRAP => true + | TRAP, NOTRAP | NOTRAP, TRAP => false + end). +Lemma trapping_mode_eq_correct t1 t2: + WHEN trapping_mode_eq t1 t2 ~> b THEN b = true -> t1 = t2. +Proof. + wlp_simplify. + destruct t1; destruct t2; trivial; discriminate. +Qed. +Hint Resolve trapping_mode_eq_correct: wlp. + +Definition load_op_eq (o1 o2: load_op): ?? bool := + match o1 with + | OLoadRRO n1 trap ofs1 => + match o2 with + | OLoadRRO n2 trap2 ofs2 => iandb (phys_eq n1 n2) (iandb (offset_eq ofs1 ofs2) (trapping_mode_eq trap trap2)) + | _ => RET false + end + | OLoadRRR n1 trap => + match o2 with + | OLoadRRR n2 trap2 => iandb (phys_eq n1 n2) (trapping_mode_eq trap trap2) + | _ => RET false + end + | OLoadRRRXS n1 trap => + match o2 with + | OLoadRRRXS n2 trap2 => iandb (phys_eq n1 n2) (trapping_mode_eq trap trap2) + | _ => RET false + end + end. + +Lemma load_op_eq_correct o1 o2: + WHEN load_op_eq o1 o2 ~> b THEN b = true -> o1 = o2. +Proof. + destruct o1, o2; wlp_simplify; try discriminate. + { f_equal. + destruct trap, trap0; simpl in *; trivial; discriminate. + pose (Ptrofs.eq_spec ofs ofs0). + rewrite H in *. trivial. } + all: destruct trap, trap0; simpl in *; trivial; discriminate. +Qed. +Hint Resolve load_op_eq_correct: wlp. +Opaque load_op_eq_correct. + +Definition store_op_eq (o1 o2: store_op): ?? bool := + match o1 with + | OStoreRRO n1 ofs1 => + match o2 with OStoreRRO n2 ofs2 => iandb (phys_eq n1 n2) (offset_eq ofs1 ofs2) | _ => RET false end + | OStoreRRR n1 => + match o2 with OStoreRRR n2 => phys_eq n1 n2 | _ => RET false end + | OStoreRRRXS n1 => + match o2 with OStoreRRRXS n2 => phys_eq n1 n2 | _ => RET false end + end. + +Lemma store_op_eq_correct o1 o2: + WHEN store_op_eq o1 o2 ~> b THEN b = true -> o1 = o2. +Proof. + destruct o1, o2; wlp_simplify; try discriminate. + - f_equal. pose (Ptrofs.eq_spec ofs ofs0). + rewrite H in *. trivial. + - congruence. + - congruence. +Qed. +Hint Resolve store_op_eq_correct: wlp. +Opaque store_op_eq_correct. + +Definition control_op_eq (c1 c2: control_op): ?? bool := + match c1 with + | Oj_l l1 => + match c2 with Oj_l l2 => phys_eq l1 l2 | _ => RET false end + | Ocb bt1 l1 => + match c2 with Ocb bt2 l2 => iandb (phys_eq bt1 bt2) (phys_eq l1 l2) | _ => RET false end + | Ocbu bt1 l1 => + match c2 with Ocbu bt2 l2 => iandb (phys_eq bt1 bt2) (phys_eq l1 l2) | _ => RET false end + | Ojumptable tbl1 => + match c2 with Ojumptable tbl2 => phys_eq tbl1 tbl2 | _ => RET false end + | Odiv => + match c2 with Odiv => RET true | _ => RET false end + | Odivu => + match c2 with Odivu => RET true | _ => RET false end + | OIncremPC sz1 => + match c2 with OIncremPC sz2 => RET (Z.eqb sz1 sz2) | _ => RET false end + | OError => + match c2 with OError => RET true | _ => RET false end + end. + +Lemma control_op_eq_correct c1 c2: + WHEN control_op_eq c1 c2 ~> b THEN b = true -> c1 = c2. +Proof. + destruct c1, c2; wlp_simplify; try rewrite Z.eqb_eq in * |-; try congruence. +Qed. +Hint Resolve control_op_eq_correct: wlp. +Opaque control_op_eq_correct. + +Definition op_eq (o1 o2: op): ?? bool := + match o1 with + | Arith i1 => + match o2 with Arith i2 => arith_op_eq i1 i2 | _ => RET false end + | Load i1 => + match o2 with Load i2 => load_op_eq i1 i2 | _ => RET false end + | Store i1 => + match o2 with Store i2 => store_op_eq i1 i2 | _ => RET false end + | Control i1 => + match o2 with Control i2 => control_op_eq i1 i2 | _ => RET false end + | Allocframe sz1 pos1 => + match o2 with Allocframe sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end + | Allocframe2 sz1 pos1 => + match o2 with Allocframe2 sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end + | Freeframe sz1 pos1 => + match o2 with Freeframe sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end + | Freeframe2 sz1 pos1 => + match o2 with Freeframe2 sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end + | Constant c1 => + match o2 with Constant c2 => phys_eq c1 c2 | _ => RET false end + | Fail => + match o2 with Fail => RET true | _ => RET false end + end. + +Theorem op_eq_correct o1 o2: + WHEN op_eq o1 o2 ~> b THEN b=true -> o1 = o2. +Proof. + destruct o1, o2; wlp_simplify; try rewrite Z.eqb_eq in * |- ; try congruence. +Qed. +Hint Resolve op_eq_correct: wlp. +Global Opaque op_eq_correct. + +End IMPPARAM. + +End P. + +Module L <: ISeqLanguage with Module LP:=P. + +Module LP:=P. + +Include MkSeqLanguage P. + +End L. + +Module IST := ImpSimu L ImpPosDict. + +Import L. +Import P. + +(** Compilation from [Asmvliw] to [L] *) + +Local Open Scope positive_scope. + +Definition pmem : R.t := 1. + +Definition ireg_to_pos (ir: ireg) : R.t := + match ir with + | GPR0 => 1 | GPR1 => 2 | GPR2 => 3 | GPR3 => 4 | GPR4 => 5 | GPR5 => 6 | GPR6 => 7 | GPR7 => 8 | GPR8 => 9 | GPR9 => 10 + | GPR10 => 11 | GPR11 => 12 | GPR12 => 13 | GPR13 => 14 | GPR14 => 15 | GPR15 => 16 | GPR16 => 17 | GPR17 => 18 | GPR18 => 19 | GPR19 => 20 + | GPR20 => 21 | GPR21 => 22 | GPR22 => 23 | GPR23 => 24 | GPR24 => 25 | GPR25 => 26 | GPR26 => 27 | GPR27 => 28 | GPR28 => 29 | GPR29 => 30 + | GPR30 => 31 | GPR31 => 32 | GPR32 => 33 | GPR33 => 34 | GPR34 => 35 | GPR35 => 36 | GPR36 => 37 | GPR37 => 38 | GPR38 => 39 | GPR39 => 40 + | GPR40 => 41 | GPR41 => 42 | GPR42 => 43 | GPR43 => 44 | GPR44 => 45 | GPR45 => 46 | GPR46 => 47 | GPR47 => 48 | GPR48 => 49 | GPR49 => 50 + | GPR50 => 51 | GPR51 => 52 | GPR52 => 53 | GPR53 => 54 | GPR54 => 55 | GPR55 => 56 | GPR56 => 57 | GPR57 => 58 | GPR58 => 59 | GPR59 => 60 + | GPR60 => 61 | GPR61 => 62 | GPR62 => 63 | GPR63 => 64 + end +. + +Lemma ireg_to_pos_discr: forall r r', r <> r' -> ireg_to_pos r <> ireg_to_pos r'. +Proof. + destruct r; destruct r'; try contradiction; discriminate. +Qed. + +Definition ppos (r: preg) : R.t := + match r with + | RA => 2 + | PC => 3 + | IR ir => 3 + ireg_to_pos ir + end +. + +Notation "# r" := (ppos r) (at level 100, right associativity). + +Lemma not_eq_add: + forall k n n', n <> n' -> k + n <> k + n'. +Proof. + intros k n n' H1 H2. apply H1; clear H1. eapply Pos.add_reg_l; eauto. +Qed. + +Lemma ppos_discr: forall r r', r <> r' -> ppos r <> ppos r'. +Proof. + destruct r; destruct r'. + all: try discriminate; try contradiction. + - intros. apply not_eq_add. apply ireg_to_pos_discr. congruence. + - intros. unfold ppos. replace (3 + ireg_to_pos g) with ((1 + ireg_to_pos g) + 2). + apply Pos.add_no_neutral. + rewrite Pos.add_comm, Pos.add_assoc. reflexivity. + - intros. unfold ppos. rewrite Pos.add_comm. apply Pos.add_no_neutral. + - intros. unfold ppos. apply not_eq_sym. + replace (3 + ireg_to_pos g) with ((1 + ireg_to_pos g) + 2). apply Pos.add_no_neutral. + rewrite Pos.add_comm, Pos.add_assoc. reflexivity. + - intros. unfold ppos. apply not_eq_sym. rewrite Pos.add_comm. apply Pos.add_no_neutral. +Qed. + +Lemma ppos_pmem_discr: forall r, pmem <> ppos r. +Proof. + intros. destruct r. + - unfold ppos. unfold pmem. apply not_eq_sym. rewrite Pos.add_comm. replace 3 with (2 + 1). rewrite Pos.add_assoc. apply Pos.add_no_neutral. + reflexivity. + - unfold ppos. unfold pmem. discriminate. + - unfold ppos. unfold pmem. discriminate. +Qed. + +(** Inversion functions, used for debug traces *) + +Definition pos_to_ireg (p: R.t) : option gpreg := + match p with + | 1 => Some GPR0 | 2 => Some GPR1 | 3 => Some GPR2 | 4 => Some GPR3 | 5 => Some GPR4 | 6 => Some GPR5 | 7 => Some GPR6 | 8 => Some GPR7 | 9 => Some GPR8 | 10 => Some GPR9 + | 11 => Some GPR10 | 12 => Some GPR11 | 13 => Some GPR12 | 14 => Some GPR13 | 15 => Some GPR14 | 16 => Some GPR15 | 17 => Some GPR16 | 18 => Some GPR17 | 19 => Some GPR18 | 20 => Some GPR19 + | 21 => Some GPR20 | 22 => Some GPR21 | 23 => Some GPR22 | 24 => Some GPR23 | 25 => Some GPR24 | 26 => Some GPR25 | 27 => Some GPR26 | 28 => Some GPR27 | 29 => Some GPR28 | 30 => Some GPR29 + | 31 => Some GPR30 | 32 => Some GPR31 | 33 => Some GPR32 | 34 => Some GPR33 | 35 => Some GPR34 | 36 => Some GPR35 | 37 => Some GPR36 | 38 => Some GPR37 | 39 => Some GPR38 | 40 => Some GPR39 + | 41 => Some GPR40 | 42 => Some GPR41 | 43 => Some GPR42 | 44 => Some GPR43 | 45 => Some GPR44 | 46 => Some GPR45 | 47 => Some GPR46 | 48 => Some GPR47 | 49 => Some GPR48 | 50 => Some GPR49 + | 51 => Some GPR50 | 52 => Some GPR51 | 53 => Some GPR52 | 54 => Some GPR53 | 55 => Some GPR54 | 56 => Some GPR55 | 57 => Some GPR56 | 58 => Some GPR57 | 59 => Some GPR58 | 60 => Some GPR59 + | 61 => Some GPR60 | 62 => Some GPR61 | 63 => Some GPR62 | 64 => Some GPR63 + | _ => None + end. + +Definition inv_ppos (p: R.t) : option preg := + match p with + | 1 => None + | 2 => Some RA | 3 => Some PC + | n => match pos_to_ireg (n-3) with + | None => None + | Some gpr => Some (IR gpr) + end + end. + +Notation "a @ b" := (Econs a b) (at level 102, right associativity). + +(** Translations of instructions *) + +Definition trans_control (ctl: control) : inst := + match ctl with + | Pret => [(#PC, PReg(#RA))] + | Pcall s => [(#RA, PReg(#PC)); (#PC, Op (Arith (OArithR (Ploadsymbol s Ptrofs.zero))) Enil)] + | Picall r => [(#RA, PReg(#PC)); (#PC, PReg(#r))] + | Pgoto s => [(#PC, Op (Arith (OArithR (Ploadsymbol s Ptrofs.zero))) Enil)] + | Pigoto r => [(#PC, PReg(#r))] + | Pj_l l => [(#PC, Op (Control (Oj_l l)) (PReg(#PC) @ Enil))] + | Pcb bt r l => [(#PC, Op (Control (Ocb bt l)) (PReg(#r) @ PReg(#PC) @ Enil))] + | Pcbu bt r l => [(#PC, Op (Control (Ocbu bt l)) (PReg(#r) @ PReg(#PC) @ Enil))] + | Pjumptable r labels => [(#PC, Op (Control (Ojumptable labels)) (PReg(#r) @ PReg(#PC) @ Enil)); + (#GPR62, Op (Constant Vundef) Enil); + (#GPR63, Op (Constant Vundef) Enil) ] + | Pbuiltin ef args res => [(#PC, Op (Control (OError)) Enil)] + end. + +Definition trans_exit (ex: option control) : L.inst := + match ex with + | None => [] + | Some ctl => trans_control ctl + end +. + +Definition trans_arith (ai: ar_instruction) : inst := + match ai with + | PArithR n d => [(#d, Op (Arith (OArithR n)) Enil)] + | PArithRR n d s => [(#d, Op (Arith (OArithRR n)) (PReg(#s) @ Enil))] + | PArithRI32 n d i => [(#d, Op (Arith (OArithRI32 n i)) Enil)] + | PArithRI64 n d i => [(#d, Op (Arith (OArithRI64 n i)) Enil)] + | PArithRF32 n d i => [(#d, Op (Arith (OArithRF32 n i)) Enil)] + | PArithRF64 n d i => [(#d, Op (Arith (OArithRF64 n i)) Enil)] + | PArithRRR n d s1 s2 => [(#d, Op (Arith (OArithRRR n)) (PReg(#s1) @ PReg(#s2) @ Enil))] + | PArithRRI32 n d s i => [(#d, Op (Arith (OArithRRI32 n i)) (PReg(#s) @ Enil))] + | PArithRRI64 n d s i => [(#d, Op (Arith (OArithRRI64 n i)) (PReg(#s) @ Enil))] + | PArithARRR n d s1 s2 => [(#d, Op (Arith (OArithARRR n)) (PReg(#d) @ PReg(#s1) @ PReg(#s2) @ Enil))] + | PArithARR n d s => [(#d, Op (Arith (OArithARR n)) (PReg(#d) @ PReg(#s) @ Enil))] + | PArithARRI32 n d s i => [(#d, Op (Arith (OArithARRI32 n i)) (PReg(#d) @ PReg(#s) @ Enil))] + | PArithARRI64 n d s i => [(#d, Op (Arith (OArithARRI64 n i)) (PReg(#d) @ PReg(#s) @ Enil))] + end. + + +Definition trans_basic (b: basic) : inst := + match b with + | PArith ai => trans_arith ai + | PLoadRRO trap n d a ofs => [(#d, Op (Load (OLoadRRO n trap ofs)) (PReg (#a) @ PReg pmem @ Enil))] + | PLoadRRR trap n d a ro => [(#d, Op (Load (OLoadRRR n trap)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] + | PLoadRRRXS trap n d a ro => [(#d, Op (Load (OLoadRRRXS n trap)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] + | PStoreRRO n s a ofs => [(pmem, Op (Store (OStoreRRO n ofs)) (PReg (#s) @ PReg (#a) @ PReg pmem @ Enil))] + | PLoadQRRO qd a ofs => + let (d0, d1) := gpreg_q_expand qd in + [(#d0, Op (Load (OLoadRRO Pld_a TRAP ofs)) (PReg (#a) @ PReg pmem @ Enil)); + (#d1, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil))] + | PLoadORRO od a ofs => + match gpreg_o_expand od with + | (d0, d1, d2, d3) => + [(#d0, Op (Load (OLoadRRO Pld_a TRAP ofs)) (PReg (#a) @ PReg pmem @ Enil)); + (#d1, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil)); + (#d2, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 16)))) (Old(PReg (#a)) @ PReg pmem @ Enil)); + (#d3, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 24)))) (Old(PReg (#a)) @ PReg pmem @ Enil))] + end + | PStoreRRR n s a ro => [(pmem, Op (Store (OStoreRRR n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] + | PStoreRRRXS n s a ro => [(pmem, Op (Store (OStoreRRRXS n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] + | PStoreQRRO qs a ofs => + let (s0, s1) := gpreg_q_expand qs in + [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil)); + (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))] + | PStoreORRO os a ofs => + match gpreg_o_expand os with + | (s0, s1, s2, s3) => + [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil)); + (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil)); + (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 16)))) (PReg (#s2) @ PReg (#a) @ PReg pmem @ Enil)); + (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 24)))) (PReg (#s3) @ PReg (#a) @ PReg pmem @ Enil))] + end + | Pallocframe sz pos => [(#FP, PReg (#SP)); (#SP, Op (Allocframe2 sz pos) (PReg (#SP) @ PReg pmem @ Enil)); (#RTMP, Op (Constant Vundef) Enil); + (pmem, Op (Allocframe sz pos) (Old (PReg (#SP)) @ PReg pmem @ Enil))] + | Pfreeframe sz pos => [(pmem, Op (Freeframe sz pos) (PReg (#SP) @ PReg pmem @ Enil)); + (#SP, Op (Freeframe2 sz pos) (PReg (#SP) @ Old (PReg pmem) @ Enil)); + (#RTMP, Op (Constant Vundef) Enil)] + | Pget rd ra => match ra with + | RA => [(#rd, PReg(#ra))] + | _ => [(#rd, Op Fail Enil)] + end + | Pset ra rd => match ra with + | RA => [(#ra, PReg(#rd))] + | _ => [(#rd, Op Fail Enil)] + end + | Pnop => [] + end. + +Fixpoint trans_body (b: list basic) : list L.inst := + match b with + | nil => nil + | b :: lb => (trans_basic b) :: (trans_body lb) + end. + +Definition trans_pcincr (sz: Z) (k: L.inst) := (#PC, Op (Control (OIncremPC sz)) (PReg(#PC) @ Enil)) :: k. + +Definition trans_block (b: Asmvliw.bblock) : L.bblock := + trans_body (body b) ++ (trans_pcincr (size b) (trans_exit (exit b)) :: nil). + +Theorem trans_block_noheader_inv: forall bb, trans_block (no_header bb) = trans_block bb. +Proof. + intros. destruct bb as [hd bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity. +Qed. + +Theorem trans_block_header_inv: forall bb hd, trans_block (stick_header hd bb) = trans_block bb. +Proof. + intros. destruct bb as [hdr bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity. +Qed. + +(** Lemmas on the translation *) + +Definition state := L.mem. +Definition exec := L.run. + +Definition match_states (s: Asmvliw.state) (s': state) := + let (rs, m) := s in + s' pmem = Memstate m + /\ forall r, s' (#r) = Val (rs r). + +Definition match_outcome (o:outcome) (s: option state) := + match o with + | Next rs m => exists s', s=Some s' /\ match_states (State rs m) s' + | Stuck => s=None + end. + +Notation "a <[ b <- c ]>" := (assign a b c) (at level 102, right associativity). + +Definition trans_state (s: Asmvliw.state) : state := + let (rs, m) := s in + fun x => if (Pos.eq_dec x pmem) then Memstate m + else match (inv_ppos x) with + | Some r => Val (rs r) + | None => Val Vundef + end. + +Lemma not_eq_IR: + forall r r', r <> r' -> IR r <> IR r'. +Proof. + intros. congruence. +Qed. + +(** Parallelizability test of a bblock (bundle), and bisimulation of the Asmblock and L parallel semantics *) + +Module PChk := ParallelChecks L PosPseudoRegSet. + +Definition bblock_para_check (p: Asmvliw.bblock) : bool := + PChk.is_parallelizable (trans_block p). + +Section SECT_PAR. + +Import PChk. + +Ltac Simplif := + ((rewrite nextblock_inv by eauto with asmgen) + || (rewrite nextblock_inv1 by eauto with asmgen) + || (rewrite Pregmap.gss) + || (rewrite nextblock_pc) + || (rewrite Pregmap.gso by eauto with asmgen) + || (rewrite assign_diff by (auto; try discriminate; try (apply ppos_discr; try discriminate; congruence); try (apply ppos_pmem_discr); + try (apply not_eq_sym; apply ppos_discr; try discriminate; congruence); try (apply not_eq_sym; apply ppos_pmem_discr); auto)) + || (rewrite assign_eq) + ); auto with asmgen. + +Ltac Simpl := repeat Simplif. + +Arguments Pos.add: simpl never. +Arguments ppos: simpl never. + +Variable Ge: genv. + +Lemma trans_arith_par_correct ge fn rsr mr sr rsw mw sw rsw' i: + Ge = Genv ge fn -> + match_states (State rsr mr) sr -> + match_states (State rsw mw) sw -> + parexec_arith_instr ge i rsr rsw = rsw' -> + exists sw', + inst_prun Ge (trans_arith i) sw sr sr = Some sw' + /\ match_states (State rsw' mw) sw'. +Proof. + intros GENV MSR MSW PARARITH. subst. inv MSR. inv MSW. + unfold parexec_arith_instr. destruct i. +(* Ploadsymbol *) + - destruct i. eexists; split; [| split]. + * simpl. reflexivity. + * Simpl. + * simpl. intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithRR *) + - eexists; split; [| split]. + * simpl. rewrite (H0 rs). reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithRI32 *) + - eexists; split; [|split]. + * simpl. reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithRI64 *) + - eexists; split; [|split]. + * simpl. reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithRF32 *) + - eexists; split; [|split]. + * simpl. reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithRF64 *) + - eexists; split; [|split]. + * simpl. reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithRRR *) + - eexists; split; [|split]. + * simpl. rewrite (H0 rs1). rewrite (H0 rs2). reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithRRI32 *) + - eexists; split; [|split]. + * simpl. rewrite (H0 rs). reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithRRI64 *) + - eexists; split; [|split]. + * simpl. rewrite (H0 rs). reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithARRR *) + - eexists; split; [|split]. + * simpl. rewrite (H0 rd). rewrite (H0 rs1). rewrite (H0 rs2). reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithARR *) + - eexists; split; [|split]. + * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithARRI32 *) + - eexists; split; [|split]. + * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* PArithARRI64 *) + - eexists; split; [|split]. + * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +Qed. + + + +Theorem bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw bi: + Ge = Genv ge fn -> + match_states (State rsr mr) sr -> + match_states (State rsw mw) sw -> + match_outcome (bstep ge bi rsr rsw mr mw) (inst_prun Ge (trans_basic bi) sw sr sr). +Proof. + +(* a little tactic to automate reasoning on preg_eq *) +Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core. +Local Ltac preg_eq_discr r rd := + destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto); + rewrite (assign_diff _ (#rd) (#r) _); auto; + rewrite Pregmap.gso; auto. + + intros GENV MSR MSW; inversion MSR as (H & H0); inversion MSW as (H1 & H2). + destruct bi; simpl. +(* Arith *) + - exploit trans_arith_par_correct. 5: eauto. all: eauto. +(* Load *) + - destruct i. + (* Load Offset *) + + destruct i; simpl load_chunk. all: + unfold parexec_load_offset; simpl; unfold exec_load_deps_offset; erewrite GENV, H, H0; + unfold eval_offset; + simpl; auto; + destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto; + eexists; split; try split; Simpl; + intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl. + + (* Load Reg *) + + destruct i; simpl load_chunk. all: + unfold parexec_load_reg; simpl; unfold exec_load_deps_reg; rewrite H, H0; rewrite (H0 rofs); + destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto; + eexists; split; try split; Simpl; + intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl. + + (* Load Reg XS *) + + destruct i; simpl load_chunk. all: + unfold parexec_load_regxs; simpl; unfold exec_load_deps_regxs; rewrite H, H0; rewrite (H0 rofs); + destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto; + eexists; split; try split; Simpl; + intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl. + + (* Load Quad word *) + + unfold parexec_load_q_offset. + destruct (gpreg_q_expand rd) as [rd0 rd1]; destruct Ge; simpl. + rewrite H0, H. + destruct (Mem.loadv Many64 mr _) as [load0 | ]; simpl; auto. + rewrite !(assign_diff _ _ pmem), H; auto. + destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 8)))) as [load1| ]; simpl; auto. + eexists; intuition eauto. + { rewrite !(assign_diff _ _ pmem); auto. } + { preg_eq_discr r rd1. + preg_eq_discr r rd0. } + + (* Load Octuple word *) + + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core. + unfold parexec_load_o_offset. + destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3]; destruct Ge; simpl. + rewrite H0, H. + destruct (Mem.loadv Many64 mr (Val.offset_ptr (rsr ra) ofs)) as [load0 | ]; simpl; auto. + rewrite !(assign_diff _ _ pmem), !H; auto. + destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 8)))) as [load1| ]; simpl; auto. + rewrite !(assign_diff _ _ pmem), !H; auto. + destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 16)))) as [load2| ]; simpl; auto. + rewrite !(assign_diff _ _ pmem), !H; auto. + destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 24)))) as [load3| ]; simpl; auto. + eexists; intuition eauto. + { rewrite !(assign_diff _ _ pmem); auto. } + { preg_eq_discr r rd3. + preg_eq_discr r rd2. + preg_eq_discr r rd1. + preg_eq_discr r rd0. } + +(* Store *) + - destruct i. + (* Store Offset *) + + destruct i; simpl store_chunk. all: + unfold parexec_store_offset; simpl; unfold exec_store_deps_offset; erewrite GENV, H, H0; rewrite (H0 ra); + unfold eval_offset; simpl; auto; + destruct (Mem.storev _ _ _ _) eqn:MEML; simpl; auto; + eexists; split; try split; Simpl; + intros rr; destruct rr; Simpl. + + (* Store Reg *) + + destruct i; simpl store_chunk. all: + unfold parexec_store_reg; simpl; unfold exec_store_deps_reg; rewrite H, H0; rewrite (H0 ra); rewrite (H0 rofs); + destruct (Mem.storev _ _ _ _) eqn:MEML; simpl; auto; + eexists; split; try split; Simpl; + intros rr; destruct rr; Simpl. + + (* Store Reg XS *) + + destruct i; simpl store_chunk. all: + unfold parexec_store_regxs; simpl; unfold exec_store_deps_regxs; rewrite H, H0; rewrite (H0 ra); rewrite (H0 rofs); + destruct (Mem.storev _ _ _ _) eqn:MEML; simpl; auto; + eexists; split; try split; Simpl; + intros rr; destruct rr; Simpl. + + (* Store Quad Word *) + + unfold parexec_store_q_offset. + destruct (gpreg_q_expand rs) as [s0 s1]; destruct Ge; simpl. + rewrite !H0, !H. + destruct (Mem.storev _ _ _ (rsr s0)) as [mem0 | ]; simpl; auto. + rewrite !assign_diff, !H0; auto. + destruct (Mem.storev _ _ _ (rsr s1)) as [mem1 | ]; simpl; auto. + eexists; intuition eauto. + rewrite !assign_diff; auto. + + (* Store Ocuple Word *) + + unfold parexec_store_o_offset. + destruct (gpreg_o_expand rs) as [[[s0 s1] s2] s3]; destruct Ge; simpl. + rewrite !H0, !H. + destruct (Mem.storev _ _ _ (rsr s0)) as [store0 | ]; simpl; auto. + rewrite !assign_diff, !H0; auto. + destruct (Mem.storev _ _ _ (rsr s1)) as [store1 | ]; simpl; auto. + rewrite !assign_diff, !H0; auto. + destruct (Mem.storev _ _ _ (rsr s2)) as [store2 | ]; simpl; auto. + rewrite !assign_diff, !H0; auto. + destruct (Mem.storev _ _ _ (rsr s3)) as [store3 | ]; simpl; auto. + eexists; intuition eauto. + rewrite !assign_diff; auto. + + (* Allocframe *) + - destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS. + * eexists; repeat split. + { Simpl. erewrite !H0, H, MEMAL, MEMS. Simpl. + rewrite H, MEMAL. rewrite MEMS. reflexivity. } + { Simpl. } + { intros rr; destruct rr; Simpl. + destruct (ireg_eq g GPR32); [| destruct (ireg_eq g GPR12); [| destruct (ireg_eq g GPR17)]]; subst; Simpl. } + * simpl; Simpl; erewrite !H0, H, MEMAL, MEMS; auto. + (* Freeframe *) + - erewrite !H0, H. + destruct (Mem.loadv _ _ _) eqn:MLOAD; simpl; auto. + destruct (rsr GPR12) eqn:SPeq; simpl; auto. + destruct (Mem.free _ _ _ _) eqn:MFREE; simpl; auto. + eexists; repeat split. + * simpl. Simpl. erewrite H0, SPeq, MLOAD, MFREE. reflexivity. + * Simpl. + * intros rr; destruct rr; Simpl. destruct (ireg_eq g GPR32); [| destruct (ireg_eq g GPR12); [| destruct (ireg_eq g GPR17)]]; subst; Simpl. +(* Pget *) + - destruct rs eqn:rseq; simpl; auto. + eexists. repeat split. Simpl. intros rr; destruct rr; Simpl. + destruct (ireg_eq g rd); subst; Simpl. +(* Pset *) + - destruct rd eqn:rdeq; simpl; auto. + eexists. repeat split. Simpl. intros rr; destruct rr; Simpl. +(* Pnop *) + - eexists. repeat split; assumption. +Qed. + + +Theorem bisimu_par_body: + forall bdy ge fn rsr mr sr rsw mw sw, + Ge = Genv ge fn -> + match_states (State rsr mr) sr -> + match_states (State rsw mw) sw -> + match_outcome (parexec_wio_body ge bdy rsr rsw mr mw) (prun_iw Ge (trans_body bdy) sw sr). +Proof. + induction bdy as [|i bdy]; simpl; eauto. + intros. + exploit (bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw i); eauto. + destruct (bstep _ _ _ _ _ _); simpl. + - intros (s' & X1 & X2). rewrite X1; simpl; eauto. + - intros X; rewrite X; simpl; auto. +Qed. + +Theorem bisimu_par_control ex sz aux ge fn rsr rsw mr mw sr sw: + Ge = Genv ge fn -> + match_states (State rsr mr) sr -> + match_states (State rsw mw) sw -> + match_outcome (parexec_control ge fn ex (incrPC (Ptrofs.repr sz) rsr) (rsw#PC <- aux) mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr). +Proof. + intros GENV MSR MSW; unfold estep. + simpl in *. inv MSR. inv MSW. + destruct ex. + - destruct c; destruct i; try discriminate; simpl. + all: try (rewrite (H0 PC); eexists; split; try split; Simpl; intros rr; destruct rr; unfold incrPC; Simpl). + + (* Pjumptable *) + + rewrite (H0 PC). Simpl. rewrite (H0 r). unfold incrPC. Simpl. + destruct (rsr r); simpl; auto. destruct (list_nth_z _ _); simpl; auto. + unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl. + destruct (Val.offset_ptr _ _); simpl; auto. + eexists; split; try split; Simpl. intros rr; destruct rr; unfold incrPC; Simpl. + destruct (preg_eq g GPR62). rewrite e. Simpl. + destruct (preg_eq g GPR63). rewrite e. Simpl. Simpl. + + (* Pj_l *) + + rewrite (H0 PC). Simpl. unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. + unfold incrPC. Simpl. destruct (Val.offset_ptr _ _); simpl; auto. + eexists; split; try split; Simpl. intros rr; destruct rr; unfold incrPC; Simpl. + + (* Pcb *) + + rewrite (H0 PC). Simpl. rewrite (H0 r). destruct (cmp_for_btest _); simpl; auto. destruct o; simpl; auto. + unfold par_eval_branch. unfold eval_branch_deps. unfold incrPC. Simpl. destruct i. + ++ destruct (Val.cmp_bool _ _ _); simpl; auto. destruct b. + +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl. + destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl. + intros rr; destruct rr; Simpl. + +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl. + ++ destruct (Val.cmpl_bool _ _ _); simpl; auto. destruct b. + +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl. + destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl. + intros rr; destruct rr; Simpl. + +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl. + + (* Pcbu *) + + rewrite (H0 PC). Simpl. rewrite (H0 r). destruct (cmpu_for_btest _); simpl; auto. destruct o; simpl; auto. + unfold par_eval_branch. unfold eval_branch_deps. unfold incrPC. Simpl. destruct i. + ++ destruct (Val.mxcmpu_bool _ _ _); simpl; auto. destruct b. + +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl. + destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl. + intros rr; destruct rr; Simpl. + +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl. + ++ destruct (Val.mxcmplu_bool _ _ _); simpl; auto. destruct b. + +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl. + destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl. + intros rr; destruct rr; Simpl. + +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl. + + - simpl in *. rewrite (H0 PC). eexists; split; try split; Simpl. + intros rr; destruct rr; unfold incrPC; Simpl. +Qed. + +Theorem bisimu_par_exit ex sz ge fn rsr rsw mr mw sr sw: + Ge = Genv ge fn -> + match_states (State rsr mr) sr -> + match_states (State rsw mw) sw -> + match_outcome (estep ge fn ex (Ptrofs.repr sz) rsr rsw mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr). +Proof. + intros; unfold estep. + exploit (bisimu_par_control ex sz rsw#PC ge fn rsr rsw mr mw sr sw); eauto. + replace (rsw # PC <- (rsw PC)) with rsw; auto. + apply extensionality. intros; destruct x; simpl; auto. +Qed. + +Definition trans_block_aux bdy sz ex := (trans_body bdy) ++ (trans_pcincr sz (trans_exit ex) :: nil). + +Theorem bisimu_par_wio ge fn rsr mr sr bdy ex sz: + Ge = Genv ge fn -> + match_states (State rsr mr) sr -> + match_outcome (parexec_wio ge fn bdy ex (Ptrofs.repr sz) rsr mr) (prun_iw Ge (trans_block_aux bdy sz ex) sr sr). +Proof. + intros GENV MSR. unfold parexec_wio, trans_block_aux. + exploit (bisimu_par_body bdy ge fn rsr mr sr rsr mr sr); eauto. + destruct (parexec_wio_body _ _ _ _ _ _); simpl. + - intros (s' & X1 & X2). + erewrite prun_iw_app_Some; eauto. + exploit (bisimu_par_exit ex sz ge fn rsr rs mr m sr s'); eauto. + subst Ge; simpl. destruct MSR as (Y1 & Y2). erewrite Y2; simpl. + destruct (inst_prun _ _ _ _ _); simpl; auto. + - intros X; erewrite prun_iw_app_None; eauto. +Qed. + +Theorem bisimu_par_wio_bblock ge fn rsr mr sr bdy1 bdy2 ex sz: + Ge = Genv ge fn -> + match_states (State rsr mr) sr -> + match_outcome + match parexec_wio ge fn bdy1 ex (Ptrofs.repr sz) rsr mr with + | Next rs' m' => parexec_wio_body ge bdy2 rsr rs' mr m' + | Stuck => Stuck + end + (prun_iw Ge ((trans_block_aux bdy1 sz ex)++(trans_body bdy2)) sr sr). +Proof. + intros. + exploit (bisimu_par_wio ge fn rsr mr sr bdy1 ex sz); eauto. + destruct (parexec_wio _ _ _ _ _ _); simpl. + - intros (s' & X1 & X2). + erewrite prun_iw_app_Some; eauto. + eapply bisimu_par_body; eauto. + - intros; erewrite prun_iw_app_None; eauto. +Qed. + +Lemma trans_body_perserves_permutation bdy1 bdy2: + Permutation bdy1 bdy2 -> + Permutation (trans_body bdy1) (trans_body bdy2). +Proof. + induction 1; simpl; econstructor; eauto. +Qed. + +Lemma trans_body_app bdy1: forall bdy2, + trans_body (bdy1++bdy2) = (trans_body bdy1) ++ (trans_body bdy2). +Proof. + induction bdy1; simpl; congruence. +Qed. + +Theorem trans_block_perserves_permutation bdy1 bdy2 b: + Permutation (bdy1 ++ bdy2) (body b) -> + Permutation (trans_block b) ((trans_block_aux bdy1 (size b) (exit b))++(trans_body bdy2)). +Proof. + intro H; unfold trans_block, trans_block_aux. + eapply perm_trans. + - eapply Permutation_app_tail. + apply trans_body_perserves_permutation. + apply Permutation_sym; eapply H. + - rewrite trans_body_app. rewrite <-! app_assoc. + apply Permutation_app_head. + apply Permutation_app_comm. +Qed. + +Theorem bisimu_par rs1 m1 s1' b ge fn o2: + Ge = Genv ge fn -> + match_states (State rs1 m1) s1' -> + parexec_bblock ge fn b rs1 m1 o2 -> + exists o2', + prun Ge (trans_block b) s1' o2' + /\ match_outcome o2 o2'. +Proof. + intros GENV MS PAREXEC. + inversion PAREXEC as (bdy1 & bdy2 & PERM & WIO). + exploit trans_block_perserves_permutation; eauto. + intros Perm. + exploit (bisimu_par_wio_bblock ge fn rs1 m1 s1' bdy1 bdy2 (exit b) (size b)); eauto. + rewrite <- WIO. clear WIO. + intros H; eexists; split. 2: eapply H. + unfold prun; eexists; split; eauto. + destruct (prun_iw _ _ _ _); simpl; eauto. +Qed. + +(** sequential execution *) +Theorem bisimu_basic ge fn bi rs m s: + Ge = Genv ge fn -> + match_states (State rs m) s -> + match_outcome (exec_basic_instr ge bi rs m) (inst_run Ge (trans_basic bi) s s). +Proof. + intros; unfold exec_basic_instr. rewrite inst_run_prun. + eapply bisimu_par_wio_basic; eauto. +Qed. + +Lemma bisimu_body: + forall bdy ge fn rs m s, + Ge = Genv ge fn -> + match_states (State rs m) s -> + match_outcome (exec_body ge bdy rs m) (exec Ge (trans_body bdy) s). +Proof. + induction bdy as [|i bdy]; simpl; eauto. + intros. + exploit (bisimu_basic ge fn i rs m s); eauto. + destruct (exec_basic_instr _ _ _ _); simpl. + - intros (s' & X1 & X2). rewrite X1; simpl; eauto. + - intros X; rewrite X; simpl; auto. +Qed. + +Theorem bisimu_exit ge fn b rs m s: + Ge = Genv ge fn -> + match_states (State rs m) s -> + match_outcome (exec_control ge fn (exit b) (nextblock b rs) m) (inst_run Ge (trans_pcincr (size b) (trans_exit (exit b))) s s). +Proof. + intros; unfold exec_control, nextblock. rewrite inst_run_prun. + apply (bisimu_par_control (exit b) (size b) (Val.offset_ptr (rs PC) (Ptrofs.repr (size b))) ge fn rs rs m m s s); auto. +Qed. + +Theorem bisimu rs m b ge fn s: + Ge = Genv ge fn -> + match_states (State rs m) s -> + match_outcome (exec_bblock ge fn b rs m) (exec Ge (trans_block b) s). +Proof. + intros GENV MS. unfold exec_bblock. + exploit (bisimu_body (body b) ge fn rs m s); eauto. + unfold exec, trans_block; simpl. + destruct (exec_body _ _ _ _); simpl. + - intros (s' & X1 & X2). + erewrite run_app_Some; eauto. + exploit (bisimu_exit ge fn b rs0 m0 s'); eauto. + subst Ge; simpl. destruct X2 as (Y1 & Y2). erewrite Y2; simpl. + destruct (inst_run _ _ _); simpl; auto. + - intros X; erewrite run_app_None; eauto. +Qed. + + +Theorem trans_state_match: forall S, match_states S (trans_state S). +Proof. + intros. destruct S as (rs & m). simpl. + split. reflexivity. + intro. destruct r; try reflexivity. + destruct g; reflexivity. +Qed. + + +Lemma state_eq_decomp: + forall rs1 m1 rs2 m2, rs1 = rs2 -> m1 = m2 -> State rs1 m1 = State rs2 m2. +Proof. + intros. congruence. +Qed. + +Theorem state_equiv S1 S2 S': match_states S1 S' -> match_states S2 S' -> S1 = S2. +Proof. + unfold match_states; intros H0 H1. destruct S1 as (rs1 & m1). destruct S2 as (rs2 & m2). inv H0. inv H1. + apply state_eq_decomp. + - apply functional_extensionality. intros. assert (Val (rs1 x) = Val (rs2 x)) by congruence. congruence. + - congruence. +Qed. + +Lemma bblock_para_check_correct ge fn bb rs m rs' m': + Ge = Genv ge fn -> + exec_bblock ge fn bb rs m = Next rs' m' -> + bblock_para_check bb = true -> + det_parexec ge fn bb rs m rs' m'. +Proof. + intros H H0 H1 o H2. unfold bblock_para_check in H1. + exploit (bisimu rs m bb ge fn); eauto. eapply trans_state_match. + rewrite H0; simpl. + intros (s2' & EXEC & MS). + exploit bisimu_par. 2: apply (trans_state_match (State rs m)). all: eauto. + intros (o2' & PRUN & MO). + exploit parallelizable_correct. apply is_para_correct_aux. eassumption. + intro. eapply H3 in PRUN. clear H3. destruct o2'. + - inv PRUN. inv H3. unfold exec in EXEC; unfold trans_state in H. + assert (x = s2') by congruence. subst. clear H. + assert (m0 = s2') by (apply functional_extensionality; auto). subst. clear H4. + destruct o; try discriminate. inv MO. inv H. assert (s2' = x) by congruence. subst. + exploit (state_equiv (State rs' m') (State rs0 m0)). + 2: eapply H4. eapply MS. intro H. inv H. reflexivity. + - unfold match_outcome in MO. destruct o. + + inv MO. inv H3. discriminate. + + clear MO. unfold exec in EXEC. + unfold trans_state in PRUN; rewrite EXEC in PRUN. discriminate. +Qed. + +End SECT_PAR. + +Section SECT_BBLOCK_EQUIV. + +Variable Ge: genv. + +Local Hint Resolve trans_state_match: core. + +Lemma bblock_simu_reduce: + forall p1 p2 ge fn, + Ge = Genv ge fn -> + L.bblock_simu Ge (trans_block p1) (trans_block p2) -> + Asmblockprops.bblock_simu ge fn p1 p2. +Proof. + unfold bblock_simu, res_eq; intros p1 p2 ge fn H1 H2 rs m DONTSTUCK. + generalize (H2 (trans_state (State rs m))); clear H2. + intro H2. + exploit (bisimu Ge rs m p1 ge fn (trans_state (State rs m))); eauto. + exploit (bisimu Ge rs m p2 ge fn (trans_state (State rs m))); eauto. + destruct (exec_bblock ge fn p1 rs m); try congruence. + intros H3 (s2' & exp2 & MS'). unfold exec in exp2, H3. rewrite exp2 in H2. + destruct H2 as (m2' & H2 & H4). discriminate. rewrite H2 in H3. + destruct (exec_bblock ge fn p2 rs m); simpl in H3. + * destruct H3 as (s' & H3 & H5 & H6). inv H3. inv MS'. + replace rs0 with rs1. + - replace m0 with m1; auto. congruence. + - apply functional_extensionality. intros r. + generalize (H0 r). intros Hr. congruence. + * discriminate. +Qed. + +(** Used for debug traces *) + +Definition gpreg_name (gpr: gpreg) := + match gpr with + | GPR0 => Str ("GPR0") | GPR1 => Str ("GPR1") | GPR2 => Str ("GPR2") | GPR3 => Str ("GPR3") | GPR4 => Str ("GPR4") + | GPR5 => Str ("GPR5") | GPR6 => Str ("GPR6") | GPR7 => Str ("GPR7") | GPR8 => Str ("GPR8") | GPR9 => Str ("GPR9") + | GPR10 => Str ("GPR10") | GPR11 => Str ("GPR11") | GPR12 => Str ("GPR12") | GPR13 => Str ("GPR13") | GPR14 => Str ("GPR14") + | GPR15 => Str ("GPR15") | GPR16 => Str ("GPR16") | GPR17 => Str ("GPR17") | GPR18 => Str ("GPR18") | GPR19 => Str ("GPR19") + | GPR20 => Str ("GPR20") | GPR21 => Str ("GPR21") | GPR22 => Str ("GPR22") | GPR23 => Str ("GPR23") | GPR24 => Str ("GPR24") + | GPR25 => Str ("GPR25") | GPR26 => Str ("GPR26") | GPR27 => Str ("GPR27") | GPR28 => Str ("GPR28") | GPR29 => Str ("GPR29") + | GPR30 => Str ("GPR30") | GPR31 => Str ("GPR31") | GPR32 => Str ("GPR32") | GPR33 => Str ("GPR33") | GPR34 => Str ("GPR34") + | GPR35 => Str ("GPR35") | GPR36 => Str ("GPR36") | GPR37 => Str ("GPR37") | GPR38 => Str ("GPR38") | GPR39 => Str ("GPR39") + | GPR40 => Str ("GPR40") | GPR41 => Str ("GPR41") | GPR42 => Str ("GPR42") | GPR43 => Str ("GPR43") | GPR44 => Str ("GPR44") + | GPR45 => Str ("GPR45") | GPR46 => Str ("GPR46") | GPR47 => Str ("GPR47") | GPR48 => Str ("GPR48") | GPR49 => Str ("GPR49") + | GPR50 => Str ("GPR50") | GPR51 => Str ("GPR51") | GPR52 => Str ("GPR52") | GPR53 => Str ("GPR53") | GPR54 => Str ("GPR54") + | GPR55 => Str ("GPR55") | GPR56 => Str ("GPR56") | GPR57 => Str ("GPR57") | GPR58 => Str ("GPR58") | GPR59 => Str ("GPR59") + | GPR60 => Str ("GPR60") | GPR61 => Str ("GPR61") | GPR62 => Str ("GPR62") | GPR63 => Str ("GPR63") + end. + +Definition string_of_name (x: P.R.t): ?? pstring := + if (Pos.eqb x pmem) then + RET (Str "MEM") + else + match inv_ppos x with + | Some RA => RET (Str ("RA")) + | Some PC => RET (Str ("PC")) + | Some (IR gpr) => RET (gpreg_name gpr) + | _ => RET (Str ("UNDEFINED")) + end. + +Definition string_of_name_r (n: arith_name_r): pstring := + match n with + | Ploadsymbol _ _ => "Ploadsymbol" + end. + +Definition string_of_name_rr (n: arith_name_rr): pstring := + match n with + Pmv => "Pmv" + | Pnegw => "Pnegw" + | Pnegl => "Pnegl" + | Pcvtl2w => "Pcvtl2w" + | Psxwd => "Psxwd" + | Pzxwd => "Pzxwd" + | Pextfz _ _ => "Pextfz" + | Pextfs _ _ => "Pextfs" + | Pextfzl _ _ => "Pextfzl" + | Pextfsl _ _ => "Pextfsl" + | Pfabsd => "Pfabsd" + | Pfabsw => "Pfabsw" + | Pfnegd => "Pfnegd" + | Pfnegw => "Pfnegw" + | Pfinvw => "Pfinvw" + | Pfnarrowdw => "Pfnarrowdw" + | Pfwidenlwd => "Pfwidenlwd" + | Pfloatwrnsz => "Pfloatwrnsz" + | Pfloatuwrnsz => "Pfloatuwrnsz" + | Pfloatudrnsz => "Pfloatudrnsz" + | Pfloatdrnsz => "Pfloatdrnsz" + | Pfixedwrzz => "Pfixedwrzz" + | Pfixeduwrzz => "Pfixeduwrzz" + | Pfixeddrzz => "Pfixeddrzz" + | Pfixedudrzz => "Pfixedudrzz" + | Pfixeddrzz_i32 => "Pfixeddrzz_i32" + | Pfixedudrzz_i32 => "Pfixedudrzz_i32" + end. + +Definition string_of_name_ri32 (n: arith_name_ri32): pstring := + match n with + | Pmake => "Pmake" + end. + +Definition string_of_name_ri64 (n: arith_name_ri64): pstring := + match n with + | Pmakel => "Pmakel" + end. + +Definition string_of_name_rf32 (n: arith_name_rf32): pstring := + match n with + | Pmakefs => "Pmakefs" + end. + +Definition string_of_name_rf64 (n: arith_name_rf64): pstring := + match n with + | Pmakef => "Pmakef" + end. + +Definition string_of_name_rrr (n: arith_name_rrr): pstring := + match n with + | Pcompw _ => "Pcompw" + | Pcompl _ => "Pcompl" + | Pfcompw _ => "Pfcompw" + | Pfcompl _ => "Pfcompl" + | Paddw => "Paddw" + | Paddxw _ => "Paddxw" + | Psubw => "Psubw" + | Prevsubxw _ => "Prevsubxw" + | Pmulw => "Pmulw" + | Pandw => "Pandw" + | Pnandw => "Pnandw" + | Porw => "Porw" + | Pnorw => "Pnorw" + | Pxorw => "Pxorw" + | Pnxorw => "Pnxorw" + | Pandnw => "Pandnw" + | Pornw => "Pornw" + | Psraw => "Psraw" + | Psrlw => "Psrlw" + | Psrxw => "Psrxw" + | Psllw => "Psllw" + | Paddl => "Paddl" + | Paddxl _ => "Paddxl" + | Psubl => "Psubl" + | Prevsubxl _ => "Prevsubxl" + | Pandl => "Pandl" + | Pnandl => "Pnandl" + | Porl => "Porl" + | Pnorl => "Pnorl" + | Pxorl => "Pxorl" + | Pnxorl => "Pnxorl" + | Pandnl => "Pandnl" + | Pornl => "Pornl" + | Pmull => "Pmull" + | Pslll => "Pslll" + | Psrll => "Psrll" + | Psrxl => "Psrxl" + | Psral => "Psral" + | Pfaddd => "Pfaddd" + | Pfaddw => "Pfaddw" + | Pfsbfd => "Pfsbfd" + | Pfsbfw => "Pfsbfw" + | Pfmuld => "Pfmuld" + | Pfmulw => "Pfmulw" + | Pfmind => "Pfmind" + | Pfminw => "Pfminw" + | Pfmaxd => "Pfmaxd" + | Pfmaxw => "Pfmaxw" + end. + +Definition string_of_name_rri32 (n: arith_name_rri32): pstring := + match n with + Pcompiw _ => "Pcompiw" + | Paddiw => "Paddiw" + | Paddxiw _ => "Paddxiw" + | Prevsubiw => "Prevsubiw" + | Prevsubxiw _ => "Prevsubxiw" + | Pmuliw => "Pmuliw" + | Pandiw => "Pandiw" + | Pnandiw => "Pnandiw" + | Poriw => "Poriw" + | Pnoriw => "Pnoriw" + | Pxoriw => "Pxoriw" + | Pnxoriw => "Pnxoriw" + | Pandniw => "Pandniw" + | Porniw => "Porniw" + | Psraiw => "Psraiw" + | Psrliw => "Psrliw" + | Psrxiw => "Psrxiw" + | Pslliw => "Pslliw" + | Proriw => "Proriw" + | Psllil => "Psllil" + | Psrlil => "Psrlil" + | Psrail => "Psrail" + | Psrxil => "Psrxil" + end. + +Definition string_of_name_rri64 (n: arith_name_rri64): pstring := + match n with + Pcompil _ => "Pcompil" + | Paddil => "Paddil" + | Prevsubil => "Prevsubil" + | Paddxil _ => "Paddxil" + | Prevsubxil _ => "Prevsubxil" + | Pmulil => "Pmulil" + | Pandil => "Pandil" + | Pnandil => "Pnandil" + | Poril => "Poril" + | Pnoril => "Pnoril" + | Pxoril => "Pxoril" + | Pnxoril => "Pnxoril" + | Pandnil => "Pandnil" + | Pornil => "Pornil" + end. + +Definition string_of_name_arrr (n: arith_name_arrr): pstring := + match n with + | Pmaddw => "Pmaddw" + | Pmaddl => "Pmaddl" + | Pmsubw => "Pmsubw" + | Pmsubl => "Pmsubl" + | Pcmove _ => "Pcmove" + | Pcmoveu _ => "Pcmoveu" + | Pfmaddfw => "Pfmaddfw" + | Pfmaddfl => "Pfmaddfl" + | Pfmsubfw => "Pfmsubfw" + | Pfmsubfl => "Pfmsubfl" + end. + +Definition string_of_name_arr (n: arith_name_arr): pstring := + match n with + | Pinsf _ _ => "Pinsf" + | Pinsfl _ _ => "Pinsfl" + end. + +Definition string_of_name_arri32 (n: arith_name_arri32): pstring := + match n with + | Pmaddiw => "Pmaddw" + | Pcmoveiw _ => "Pcmoveiw" + | Pcmoveuiw _ => "Pcmoveuiw" + end. + +Definition string_of_name_arri64 (n: arith_name_arri64): pstring := + match n with + | Pmaddil => "Pmaddl" + | Pcmoveil _ => "Pcmoveil" + | Pcmoveuil _ => "Pcmoveuil" + end. + +Definition string_of_arith (op: arith_op): pstring := + match op with + | OArithR n => string_of_name_r n + | OArithRR n => string_of_name_rr n + | OArithRI32 n _ => string_of_name_ri32 n + | OArithRI64 n _ => string_of_name_ri64 n + | OArithRF32 n _ => string_of_name_rf32 n + | OArithRF64 n _ => string_of_name_rf64 n + | OArithRRR n => string_of_name_rrr n + | OArithRRI32 n _ => string_of_name_rri32 n + | OArithRRI64 n _ => string_of_name_rri64 n + | OArithARRR n => string_of_name_arrr n + | OArithARR n => string_of_name_arr n + | OArithARRI32 n _ => string_of_name_arri32 n + | OArithARRI64 n _ => string_of_name_arri64 n + end. + +Definition string_of_load_name (n: load_name) : pstring := + match n with + Plb => "Plb" + | Plbu => "Plbu" + | Plh => "Plh" + | Plhu => "Plhu" + | Plw => "Plw" + | Plw_a => "Plw_a" + | Pld => "Pld" + | Pld_a => "Pld_a" + | Pfls => "Pfls" + | Pfld => "Pfld" + end. + +Definition string_of_load (op: load_op): pstring := + match op with + | OLoadRRO n _ _ => string_of_load_name n + | OLoadRRR n _ => string_of_load_name n + | OLoadRRRXS n _ => string_of_load_name n + end. + +Definition string_of_store_name (n: store_name) : pstring := + match n with + Psb => "Psb" + | Psh => "Psh" + | Psw => "Psw" + | Psw_a => "Psw_a" + | Psd => "Psd" + | Psd_a => "Psd_a" + | Pfss => "Pfss" + | Pfsd => "Pfsd" + end. + +Definition string_of_store (op: store_op) : pstring := + match op with + | OStoreRRO n _ => string_of_store_name n + | OStoreRRR n => string_of_store_name n + | OStoreRRRXS n => string_of_store_name n + end. + +Definition string_of_control (op: control_op) : pstring := + match op with + | Oj_l _ => "Oj_l" + | Ocb _ _ => "Ocb" + | Ocbu _ _ => "Ocbu" + | Odiv => "Odiv" + | Odivu => "Odivu" + | Ojumptable _ => "Ojumptable" + | OError => "OError" + | OIncremPC _ => "OIncremPC" + end. + +Definition string_of_op (op: P.op): ?? pstring := + match op with + | Arith op => RET (string_of_arith op) + | Load op => RET (string_of_load op) + | Store op => RET (string_of_store op) + | Control op => RET (string_of_control op) + | Allocframe _ _ => RET (Str "Allocframe") + | Allocframe2 _ _ => RET (Str "Allocframe2") + | Freeframe _ _ => RET (Str "Freeframe") + | Freeframe2 _ _ => RET (Str "Freeframe2") + | Constant _ => RET (Str "Constant") + | Fail => RET (Str "Fail") + end. + +End SECT_BBLOCK_EQUIV. + +(** REWRITE RULES *) + +Definition is_constant (o: op): bool := + match o with + | Constant _ | OArithR _ | OArithRI32 _ _ | OArithRI64 _ _ | OArithRF32 _ _ | OArithRF64 _ _ => true + | _ => false + end. + +Lemma is_constant_correct ge o: is_constant o = true -> op_eval ge o [] <> None. +Proof. + destruct o; simpl in * |- *; try congruence. + destruct ao; simpl in * |- *; try congruence; + destruct n; simpl in * |- *; try congruence; + unfold arith_eval; destruct ge; simpl in * |- *; try congruence. +Qed. + +Definition main_reduce (t: Terms.term):= RET (Terms.nofail is_constant t). + +Local Hint Resolve is_constant_correct: wlp. + +Lemma main_reduce_correct t: + WHEN main_reduce t ~> pt THEN Terms.match_pt t pt. +Proof. + wlp_simplify. +Qed. + +Definition reduce := {| Terms.result := main_reduce; Terms.result_correct := main_reduce_correct |}. + +Definition bblock_simu_test (verb: bool) (p1 p2: Asmvliw.bblock) : ?? bool := + if verb then + IST.verb_bblock_simu_test reduce string_of_name string_of_op (trans_block p1) (trans_block p2) + else + IST.bblock_simu_test reduce (trans_block p1) (trans_block p2). + +Local Hint Resolve IST.bblock_simu_test_correct bblock_simu_reduce IST.verb_bblock_simu_test_correct: wlp. + +(** Main simulation (Impure) theorem *) +Theorem bblock_simu_test_correct verb p1 p2 : + WHEN bblock_simu_test verb p1 p2 ~> b THEN b=true -> forall ge fn, Asmblockprops.bblock_simu ge fn p1 p2. +Proof. + wlp_simplify. +Qed. +Hint Resolve bblock_simu_test_correct: wlp. + +(** ** Coerce bblock_simu_test into a pure function (this is a little unsafe like all oracles in CompCert). *) + +Import UnsafeImpure. + +Definition pure_bblock_simu_test (verb: bool) (p1 p2: Asmvliw.bblock): bool := + match unsafe_coerce (bblock_simu_test verb p1 p2) with + | Some b => b + | None => false + end. + +Theorem pure_bblock_simu_test_correct verb p1 p2 ge fn: pure_bblock_simu_test verb p1 p2 = true -> Asmblockprops.bblock_simu ge fn p1 p2. +Proof. + unfold pure_bblock_simu_test. + destruct (unsafe_coerce (bblock_simu_test verb p1 p2)) eqn: UNSAFE; try discriminate. + intros; subst. eapply bblock_simu_test_correct; eauto. + apply unsafe_coerce_not_really_correct; eauto. +Qed. + +Definition bblock_simub: Asmvliw.bblock -> Asmvliw.bblock -> bool := pure_bblock_simu_test true. + +Lemma bblock_simub_correct p1 p2 ge fn: bblock_simub p1 p2 = true -> Asmblockprops.bblock_simu ge fn p1 p2. +Proof. + eapply (pure_bblock_simu_test_correct true). +Qed. diff --git a/kvx/Asmblockgen.v b/kvx/Asmblockgen.v new file mode 100644 index 00000000..ab827b1c --- /dev/null +++ b/kvx/Asmblockgen.v @@ -0,0 +1,1211 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** * Translation from Machblock to KVX assembly language (Asmblock) + Inspired from the Mach->Asm pass of other backends, but adapted to the block structure *) + +Require Archi. +Require Import Coqlib Errors. +Require Import AST Integers Floats Memdata. +Require Import Op Locations Machblock Asmvliw Asmblock. +Require ExtValues. +Require Import Chunks. + +Local Open Scope string_scope. +Local Open Scope error_monad_scope. + +Import PArithCoercions. + +(** The code generation functions take advantage of several + characteristics of the [Mach] code generated by earlier passes of the + compiler, mostly that argument and result registers are of the correct + types. These properties are true by construction, but it's easier to + recheck them during code generation and fail if they do not hold. *) + +(** Extracting integer or float registers. *) + +Inductive immed32 : Type := + | Imm32_single (imm: int). + +Definition make_immed32 (val: int) := Imm32_single val. + +Inductive immed64 : Type := + | Imm64_single (imm: int64) +. + +Definition make_immed64 (val: int64) := Imm64_single val. + +Notation "a ::g b" := (cons (A:=instruction) a b) (at level 49, right associativity). +Notation "a ::i b" := (cons (A:=basic) a b) (at level 49, right associativity). +Notation "a ::b lb" := ((bblock_single_inst a) :: lb) (at level 49, right associativity). +Notation "a ++g b" := (app (A:=instruction) a b) (at level 49, right associativity). +Notation "a @@ b" := (app a b) (at level 49, right associativity). + +Definition loadimm32 (r: ireg) (n: int) := + match make_immed32 n with + | Imm32_single imm => Pmake r imm + end. + +Definition opimm32 (op: arith_name_rrr) + (opimm: arith_name_rri32) + (rd rs: ireg) (n: int) := + match make_immed32 n with + | Imm32_single imm => opimm rd rs imm + end. + +Definition addimm32 := opimm32 Paddw Paddiw. +Definition mulimm32 := opimm32 Pmulw Pmuliw. +Definition andimm32 := opimm32 Pandw Pandiw. +Definition nandimm32 := opimm32 Pnandw Pnandiw. +Definition orimm32 := opimm32 Porw Poriw. +Definition norimm32 := opimm32 Pnorw Pnoriw. +Definition xorimm32 := opimm32 Pxorw Pxoriw. +Definition nxorimm32 := opimm32 Pnxorw Pnxoriw. + +Definition loadimm64 (r: ireg) (n: int64) := + match make_immed64 n with + | Imm64_single imm => Pmakel r imm + end. + +Definition opimm64 (op: arith_name_rrr) + (opimm: arith_name_rri64) + (rd rs: ireg) (n: int64) := + match make_immed64 n with + | Imm64_single imm => opimm rd rs imm +end. + +Definition addimm64 := opimm64 Paddl Paddil. +Definition mulimm64 := opimm64 Pmull Pmulil. +Definition orimm64 := opimm64 Porl Poril. +Definition andimm64 := opimm64 Pandl Pandil. +Definition xorimm64 := opimm64 Pxorl Pxoril. +Definition norimm64 := opimm64 Pnorl Pnoril. +Definition nandimm64 := opimm64 Pnandl Pnandil. +Definition nxorimm64 := opimm64 Pnxorl Pnxoril. + +Definition addptrofs (rd rs: ireg) (n: ptrofs) := + if Ptrofs.eq_dec n Ptrofs.zero then + Pmv rd rs + else + addimm64 rd rs (Ptrofs.to_int64 n). + +(** Translation of conditional branches. *) + +Definition transl_comp + (c: comparison) (s: signedness) (r1 r2: ireg) (lbl: label) (k: code) : list instruction := + Pcompw (itest_for_cmp c s) RTMP r1 r2 ::g Pcb BTwnez RTMP lbl ::g k. + +Definition transl_compi + (c: comparison) (s: signedness) (r: ireg) (imm: int) (lbl: label) (k: code) : list instruction := + Pcompiw (itest_for_cmp c s) RTMP r imm ::g Pcb BTwnez RTMP lbl ::g k. + +Definition transl_compl + (c: comparison) (s: signedness) (r1 r2: ireg) (lbl: label) (k: code) : list instruction := + Pcompl (itest_for_cmp c s) RTMP r1 r2 ::g Pcb BTwnez RTMP lbl ::g k. + +Definition transl_compil + (c: comparison) (s: signedness) (r: ireg) (imm: int64) (lbl: label) (k: code) : list instruction := + Pcompil (itest_for_cmp c s) RTMP r imm ::g Pcb BTwnez RTMP lbl ::g k. + +Definition select_comp (n: int) (c: comparison) : option comparison := + if Int.eq n Int.zero then + match c with + | Ceq => Some Ceq + | Cne => Some Cne + | _ => None + end + else + None + . + +Definition transl_opt_compuimm + (n: int) (c: comparison) (r1: ireg) (lbl: label) (k: code) : list instruction := + if Int.eq n Int.zero then + match c with + | Ceq => Pcbu BTweqz r1 lbl ::g k + | Cne => Pcbu BTwnez r1 lbl ::g k + | _ => transl_compi c Unsigned r1 n lbl k + end + else + transl_compi c Unsigned r1 n lbl k + . + +Definition select_compl (n: int64) (c: comparison) : option comparison := + if Int64.eq n Int64.zero then + match c with + | Ceq => Some Ceq + | Cne => Some Cne + | _ => None + end + else + None + . + +Definition transl_opt_compluimm + (n: int64) (c: comparison) (r1: ireg) (lbl: label) (k: code) : list instruction := + if Int64.eq n Int64.zero then + match c with + | Ceq => Pcbu BTdeqz r1 lbl ::g k + | Cne => Pcbu BTdnez r1 lbl ::g k + | _ => transl_compil c Unsigned r1 n lbl k + end + else + transl_compil c Unsigned r1 n lbl k + . + +Definition transl_comp_float32 (cmp: comparison) (r1 r2: ireg) (lbl: label) (k: code) := + match ftest_for_cmp cmp with + | Normal ft => Pfcompw ft GPR32 r1 r2 ::g Pcb BTwnez GPR32 lbl ::g k + | Reversed ft => Pfcompw ft GPR32 r2 r1 ::g Pcb BTwnez GPR32 lbl ::g k + end. + +Definition transl_comp_notfloat32 (cmp: comparison) (r1 r2: ireg) (lbl: label) (k: code) := + match notftest_for_cmp cmp with + | Normal ft => Pfcompw ft GPR32 r1 r2 ::g Pcb BTwnez GPR32 lbl ::g k + | Reversed ft => Pfcompw ft GPR32 r2 r1 ::g Pcb BTwnez GPR32 lbl ::g k + end. + +Definition transl_comp_float64 (cmp: comparison) (r1 r2: ireg) (lbl: label) (k: code) := + match ftest_for_cmp cmp with + | Normal ft => Pfcompl ft GPR32 r1 r2 ::g Pcb BTwnez GPR32 lbl ::g k + | Reversed ft => Pfcompl ft GPR32 r2 r1 ::g Pcb BTwnez GPR32 lbl ::g k + end. + +Definition transl_comp_notfloat64 (cmp: comparison) (r1 r2: ireg) (lbl: label) (k: code) := + match notftest_for_cmp cmp with + | Normal ft => Pfcompl ft GPR32 r1 r2 ::g Pcb BTwnez GPR32 lbl ::g k + | Reversed ft => Pfcompl ft GPR32 r2 r1 ::g Pcb BTwnez GPR32 lbl ::g k + end. + +Definition transl_cbranch + (cond: condition) (args: list mreg) (lbl: label) (k: code) : res (list instruction ) := + match cond, args with + | Ccompuimm c n, a1 :: nil => + do r1 <- ireg_of a1; + OK (transl_opt_compuimm n c r1 lbl k) + | Ccomp c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_comp c Signed r1 r2 lbl k) + | Ccompu c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_comp c Unsigned r1 r2 lbl k) + | Ccompimm c n, a1 :: nil => + do r1 <- ireg_of a1; + OK (if Int.eq n Int.zero then + Pcb (btest_for_cmpswz c) r1 lbl ::g k + else + transl_compi c Signed r1 n lbl k + ) + | Ccompluimm c n, a1 :: nil => + do r1 <- ireg_of a1; + OK (transl_opt_compluimm n c r1 lbl k) + | Ccompl c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_compl c Signed r1 r2 lbl k) + | Ccomplu c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_compl c Unsigned r1 r2 lbl k) + | Ccomplimm c n, a1 :: nil => + do r1 <- ireg_of a1; + OK (if Int64.eq n Int64.zero then + Pcb (btest_for_cmpsdz c) r1 lbl ::g k + else + transl_compil c Signed r1 n lbl k + ) + | Ccompf c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_comp_float64 c r1 r2 lbl k) + | Cnotcompf c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_comp_notfloat64 c r1 r2 lbl k) + | Ccompfs c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_comp_float32 c r1 r2 lbl k) + | Cnotcompfs c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_comp_notfloat32 c r1 r2 lbl k) + | _, _ => + Error(msg "Asmgenblock.transl_cbranch") + end. + +(** Translation of a condition operator. The generated code sets the + [rd] target register to 0 or 1 depending on the truth value of the + condition. *) + +Definition transl_cond_int32s (cmp: comparison) (rd r1 r2: ireg) (k: bcode) := + Pcompw (itest_for_cmp cmp Signed) rd r1 r2 ::i k. + +Definition transl_cond_int32u (cmp: comparison) (rd r1 r2: ireg) (k: bcode) := + Pcompw (itest_for_cmp cmp Unsigned) rd r1 r2 ::i k. + +Definition transl_cond_int64s (cmp: comparison) (rd r1 r2: ireg) (k: bcode) := + Pcompl (itest_for_cmp cmp Signed) rd r1 r2 ::i k. + +Definition transl_cond_int64u (cmp: comparison) (rd r1 r2: ireg) (k: bcode) := + Pcompl (itest_for_cmp cmp Unsigned) rd r1 r2 ::i k. + +Definition transl_condimm_int32s (cmp: comparison) (rd r1: ireg) (n: int) (k: bcode) := + Pcompiw (itest_for_cmp cmp Signed) rd r1 n ::i k. + +Definition transl_condimm_int32u (cmp: comparison) (rd r1: ireg) (n: int) (k: bcode) := + Pcompiw (itest_for_cmp cmp Unsigned) rd r1 n ::i k. + +Definition transl_condimm_int64s (cmp: comparison) (rd r1: ireg) (n: int64) (k: bcode) := + Pcompil (itest_for_cmp cmp Signed) rd r1 n ::i k. + +Definition transl_condimm_int64u (cmp: comparison) (rd r1: ireg) (n: int64) (k: bcode) := + Pcompil (itest_for_cmp cmp Unsigned) rd r1 n ::i k. + + +Definition transl_cond_float32 (cmp: comparison) (rd r1 r2: ireg) (k: bcode) := + match ftest_for_cmp cmp with + | Normal ft => Pfcompw ft rd r1 r2 ::i k + | Reversed ft => Pfcompw ft rd r2 r1 ::i k + end. + +Definition transl_cond_notfloat32 (cmp: comparison) (rd r1 r2: ireg) (k: bcode) := + match notftest_for_cmp cmp with + | Normal ft => Pfcompw ft rd r1 r2 ::i k + | Reversed ft => Pfcompw ft rd r2 r1 ::i k + end. + +Definition transl_cond_float64 (cmp: comparison) (rd r1 r2: ireg) (k: bcode) := + match ftest_for_cmp cmp with + | Normal ft => Pfcompl ft rd r1 r2 ::i k + | Reversed ft => Pfcompl ft rd r2 r1 ::i k + end. + +Definition transl_cond_notfloat64 (cmp: comparison) (rd r1 r2: ireg) (k: bcode) := + match notftest_for_cmp cmp with + | Normal ft => Pfcompl ft rd r1 r2 ::i k + | Reversed ft => Pfcompl ft rd r2 r1 ::i k + end. + + +(* CoMPare Unsigned Words to Zero *) +Definition btest_for_cmpuwz (c: comparison) := + match c with + | Cne => OK BTwnez + | Ceq => OK BTweqz + | Clt => Error (msg "btest_for_compuwz: Clt") (* TODO reachable *) + | Cge => Error (msg "btest_for_compuwz: Cge") + | Cle => OK BTweqz + | Cgt => OK BTwnez + end. + +(* CoMPare Unsigned Words to Zero *) +Definition btest_for_cmpudz (c: comparison) := + match c with + | Cne => OK BTdnez + | Ceq => OK BTdeqz + | Clt => Error (msg "btest_for_compudz: Clt") + | Cge => Error (msg "btest_for_compudz: Cge") + | Cle => OK BTdeqz + | Cgt => OK BTdnez + end. + +Definition conditional_move (cond0 : condition0) (rc rd rs : ireg) : + res basic := + if ireg_eq rd rs + then OK Pnop + else + (match cond0 with + | Ccomp0 cmp => + OK (PArith (Pcmove (btest_for_cmpswz cmp) rd rc rs)) + | Ccompu0 cmp => + do bt <- btest_for_cmpuwz cmp; + OK (PArith (Pcmoveu bt rd rc rs)) + | Ccompl0 cmp => + OK (PArith (Pcmove (btest_for_cmpsdz cmp) rd rc rs)) + | Ccomplu0 cmp => + do bt <- btest_for_cmpudz cmp; + OK (PArith (Pcmoveu bt rd rc rs)) + end). + +Definition conditional_move_imm32 (cond0 : condition0) (rc rd : ireg) (imm : int) : res basic := + match cond0 with + | Ccomp0 cmp => + OK (PArith (Pcmoveiw (btest_for_cmpswz cmp) rd rc imm)) + | Ccompu0 cmp => + do bt <- btest_for_cmpuwz cmp; + OK (PArith (Pcmoveuiw bt rd rc imm)) + | Ccompl0 cmp => + OK (PArith (Pcmoveiw (btest_for_cmpsdz cmp) rd rc imm)) + | Ccomplu0 cmp => + do bt <- btest_for_cmpudz cmp; + OK (PArith (Pcmoveuiw bt rd rc imm)) + end. + +Definition conditional_move_imm64 (cond0 : condition0) (rc rd : ireg) (imm : int64) : res basic := + match cond0 with + | Ccomp0 cmp => + OK (PArith (Pcmoveil (btest_for_cmpswz cmp) rd rc imm)) + | Ccompu0 cmp => + do bt <- btest_for_cmpuwz cmp; + OK (PArith (Pcmoveuil bt rd rc imm)) + | Ccompl0 cmp => + OK (PArith (Pcmoveil (btest_for_cmpsdz cmp) rd rc imm)) + | Ccomplu0 cmp => + do bt <- btest_for_cmpudz cmp; + OK (PArith (Pcmoveuil bt rd rc imm)) + end. + +Definition transl_cond_op + (cond: condition) (rd: ireg) (args: list mreg) (k: bcode) := + match cond, args with + | Ccomp c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_cond_int32s c rd r1 r2 k) + | Ccompu c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_cond_int32u c rd r1 r2 k) + | Ccompimm c n, a1 :: nil => + do r1 <- ireg_of a1; + OK (transl_condimm_int32s c rd r1 n k) + | Ccompuimm c n, a1 :: nil => + do r1 <- ireg_of a1; + OK (transl_condimm_int32u c rd r1 n k) + | Ccompl c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_cond_int64s c rd r1 r2 k) + | Ccomplu c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_cond_int64u c rd r1 r2 k) + | Ccomplimm c n, a1 :: nil => + do r1 <- ireg_of a1; + OK (transl_condimm_int64s c rd r1 n k) + | Ccompluimm c n, a1 :: nil => + do r1 <- ireg_of a1; + OK (transl_condimm_int64u c rd r1 n k) + | Ccompfs c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_cond_float32 c rd r1 r2 k) + | Cnotcompfs c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_cond_notfloat32 c rd r1 r2 k) + | Ccompf c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_cond_float64 c rd r1 r2 k) + | Cnotcompf c, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (transl_cond_notfloat64 c rd r1 r2 k) + | _, _ => + Error(msg "Asmblockgen.transl_cond_op") +end. + +(** Translation of the arithmetic operation [r <- op(args)]. + The corresponding instructions are prepended to [k]. *) + +Definition transl_op + (op: operation) (args: list mreg) (res: mreg) (k: bcode) := + match op, args with + | Omove, a1 :: nil => + match preg_of res, preg_of a1 with + | IR r, IR a => OK (Pmv r a ::i k) + | _ , _ => Error(msg "Asmgenblock.transl_op: Omove") + end + | Ointconst n, nil => + do rd <- ireg_of res; + OK (loadimm32 rd n ::i k) + | Olongconst n, nil => + do rd <- ireg_of res; + OK (loadimm64 rd n ::i k) + | Ofloatconst f, nil => + do rd <- freg_of res; + OK (Pmakef rd f ::i k) + | Osingleconst f, nil => + do rd <- freg_of res; + OK (Pmakefs rd f ::i k) + | Oaddrsymbol s ofs, nil => + do rd <- ireg_of res; + OK (if Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero) + then Ploadsymbol s Ptrofs.zero rd ::i addptrofs rd rd ofs ::i k + else Ploadsymbol s ofs rd ::i k) + | Oaddrstack n, nil => + do rd <- ireg_of res; + OK (addptrofs rd SP n ::i k) + + | Ocast8signed, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pslliw rd rs (Int.repr 24) ::i Psraiw rd rd (Int.repr 24) ::i k) + | Ocast16signed, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pslliw rd rs (Int.repr 16) ::i Psraiw rd rd (Int.repr 16) ::i k) + | Oadd, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Paddw rd rs1 rs2 ::i k) + | Oaddimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (addimm32 rd rs n ::i k) + | Oaddx shift, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Paddxw shift rd rs1 rs2 ::i k) + | Oaddximm shift n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Paddxiw shift rd rs n ::i k) + | Oaddxl shift, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Paddxl shift rd rs1 rs2 ::i k) + | Oaddxlimm shift n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Paddxil shift rd rs n ::i k) + | Oneg, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pnegw rd rs ::i k) + | Osub, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Psubw rd rs1 rs2 ::i k) + | Orevsubimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Prevsubiw rd rs n ::i k) + | Orevsubx shift, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Prevsubxw shift rd rs1 rs2 ::i k) + | Orevsubximm shift n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Prevsubxiw shift rd rs n ::i k) + | Omul, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pmulw rd rs1 rs2 ::i k) + | Omulimm n, a1 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; + OK (mulimm32 rd rs1 n ::i k) + | Omulhs, _ => Error(msg "Asmblockgen.transl_op: Omulhs") (* Normalement pas émis *) + | Omulhu, _ => Error(msg "Asmblockgen.transl_op: Omulhu") (* Normalement pas émis *) + | Oand, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pandw rd rs1 rs2 ::i k) + | Oandimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (andimm32 rd rs n ::i k) + | Onand, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pnandw rd rs1 rs2 ::i k) + | Onandimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (nandimm32 rd rs n ::i k) + | Oor, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Porw rd rs1 rs2 ::i k) + | Onor, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pnorw rd rs1 rs2 ::i k) + | Oorimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (orimm32 rd rs n ::i k) + | Onorimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (norimm32 rd rs n ::i k) + | Oxor, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pxorw rd rs1 rs2 ::i k) + | Oxorimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (xorimm32 rd rs n ::i k) + | Onxor, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pnxorw rd rs1 rs2 ::i k) + | Onxorimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (nxorimm32 rd rs n ::i k) + | Onot, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (xorimm32 rd rs Int.mone ::i k) + | Oandn, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pandnw rd rs1 rs2 ::i k) + | Oandnimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pandniw rd rs n ::i k) + | Oorn, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pornw rd rs1 rs2 ::i k) + | Oornimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Porniw rd rs n ::i k) + | Oshl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Psllw rd rs1 rs2 ::i k) + | Oshlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pslliw rd rs n ::i k) + | Oshr, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Psraw rd rs1 rs2 ::i k) + | Oshrimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Psraiw rd rs n ::i k) + | Oshru, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Psrlw rd rs1 rs2 ::i k) + | Oshruimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Psrliw rd rs n ::i k) + | Oshrximm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Psrxiw rd rs n ::i k) + | Ororimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Proriw rd rs n ::i k) + | Omadd, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do r1 <- ireg_of a1; + do r2 <- ireg_of a2; + do r3 <- ireg_of a3; + OK (Pmaddw r1 r2 r3 ::i k) + | Omaddimm n, a1 :: a2 :: nil => + assertion (mreg_eq a1 res); + do r1 <- ireg_of a1; + do r2 <- ireg_of a2; + OK (Pmaddiw r1 r2 n ::i k) + | Omsub, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do r1 <- ireg_of a1; + do r2 <- ireg_of a2; + do r3 <- ireg_of a3; + OK (Pmsubw r1 r2 r3 ::i k) + (* [Omakelong], [Ohighlong] should not occur *) + | Olowlong, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pcvtl2w rd rs ::i k) + | Ocast32signed, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Psxwd rd rs ::i k) + | Ocast32unsigned, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pzxwd rd rs ::i k) +(* assertion (ireg_eq rd rs); + OK (Pcvtw2l rd ::i Psllil rd rd (Int.repr 32) ::i Psrlil rd rd (Int.repr 32) ::i k) *) + | Oaddl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Paddl rd rs1 rs2 ::i k) + | Oaddlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (addimm64 rd rs n ::i k) + | Onegl, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pnegl rd rs ::i k) + | Osubl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Psubl rd rs1 rs2 ::i k) + | Orevsubxl shift, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Prevsubxl shift rd rs1 rs2 ::i k) + | Orevsublimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Prevsubil rd rs n ::i k) + | Orevsubxlimm shift n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Prevsubxil shift rd rs n ::i k) + | Omull, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pmull rd rs1 rs2 ::i k) + | Omullimm n, a1 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; + OK (mulimm64 rd rs1 n ::i k) + | Omullhs, _ => Error (msg "Asmblockgen.transl_op: Omullhs") (* Normalement pas émis *) + | Omullhu, _ => Error (msg "Asmblockgen.transl_op: Omullhu") (* Normalement pas émis *) + | Odivl, _ => Error (msg "Asmblockgen.transl_op: Odivl") (* Géré par fonction externe *) + | Odivlu, _ => Error (msg "Asmblockgen.transl_op: Odivlu") (* Géré par fonction externe *) + | Omodl, _ => Error (msg "Asmblockgen.transl_op: Omodl") (* Géré par fonction externe *) + | Omodlu, _ => Error (msg "Asmblockgen.transl_op: Omodlu") (* Géré par fonction externe *) + | Onotl, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (xorimm64 rd rs Int64.mone ::i k) + | Oandl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pandl rd rs1 rs2 ::i k) + | Oandlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (andimm64 rd rs n ::i k) + | Onandl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pnandl rd rs1 rs2 ::i k) + | Onandlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (nandimm64 rd rs n ::i k) + | Oorl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Porl rd rs1 rs2 ::i k) + | Oorlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (orimm64 rd rs n ::i k) + | Onorl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pnorl rd rs1 rs2 ::i k) + | Onorlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (norimm64 rd rs n ::i k) + | Oxorl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pxorl rd rs1 rs2 ::i k) + | Oxorlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (xorimm64 rd rs n ::i k) + | Onxorl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pnxorl rd rs1 rs2 ::i k) + | Onxorlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (nxorimm64 rd rs n ::i k) + | Oandnl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pandnl rd rs1 rs2 ::i k) + | Oandnlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pandnil rd rs n ::i k) + | Oornl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pornl rd rs1 rs2 ::i k) + | Oornlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pornil rd rs n ::i k) + | Oshll, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pslll rd rs1 rs2 ::i k) + | Oshllimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Psllil rd rs n ::i k) + | Oshrl, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Psral rd rs1 rs2 ::i k) + | Oshrlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Psrail rd rs n ::i k) + | Oshrlu, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Psrll rd rs1 rs2 ::i k) + | Oshrluimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Psrlil rd rs n ::i k) + | Oshrxlimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Psrxil rd rs n ::i k) + | Omaddl, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do r1 <- ireg_of a1; + do r2 <- ireg_of a2; + do r3 <- ireg_of a3; + OK (Pmaddl r1 r2 r3 ::i k) + | Omaddlimm n, a1 :: a2 :: nil => + assertion (mreg_eq a1 res); + do r1 <- ireg_of a1; + do r2 <- ireg_of a2; + OK (Pmaddil r1 r2 n ::i k) + | Omsubl, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do r1 <- ireg_of a1; + do r2 <- ireg_of a2; + do r3 <- ireg_of a3; + OK (Pmsubl r1 r2 r3 ::i k) + | Oabsf, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfabsd rd rs ::i k) + | Oabsfs, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfabsw rd rs ::i k) + | Oaddf, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfaddd rd rs1 rs2 ::i k) + | Oaddfs, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfaddw rd rs1 rs2 ::i k) + | Osubf, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfsbfd rd rs1 rs2 ::i k) + | Osubfs, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfsbfw rd rs1 rs2 ::i k) + | Omulf, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfmuld rd rs1 rs2 ::i k) + | Omulfs, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfmulw rd rs1 rs2 ::i k) + | Ominf, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfmind rd rs1 rs2 ::i k) + | Ominfs, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfminw rd rs1 rs2 ::i k) + | Omaxf, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfmaxd rd rs1 rs2 ::i k) + | Omaxfs, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfmaxw rd rs1 rs2 ::i k) + | Onegf, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfnegd rd rs ::i k) + | Onegfs, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfnegw rd rs ::i k) + | Oinvfs, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfinvw rd rs ::i k) + + | Ofmaddf, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do rs1 <- freg_of a1; + do rs2 <- freg_of a2; + do rs3 <- freg_of a3; + OK (Pfmaddfl rs1 rs2 rs3 ::i k) + | Ofmaddfs, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do rs1 <- freg_of a1; + do rs2 <- freg_of a2; + do rs3 <- freg_of a3; + OK (Pfmaddfw rs1 rs2 rs3 ::i k) + | Ofmsubf, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do rs1 <- freg_of a1; + do rs2 <- freg_of a2; + do rs3 <- freg_of a3; + OK (Pfmsubfl rs1 rs2 rs3 ::i k) + | Ofmsubfs, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do rs1 <- freg_of a1; + do rs2 <- freg_of a2; + do rs3 <- freg_of a3; + OK (Pfmsubfw rs1 rs2 rs3 ::i k) + + | Osingleofint, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pfloatwrnsz rd rs ::i k) + | Osingleofintu, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pfloatuwrnsz rd rs ::i k) + | Ofloatoflong, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pfloatdrnsz rd rs ::i k) + | Ofloatoflongu, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pfloatudrnsz rd rs ::i k) + | Ointofsingle, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixedwrzz rd rs ::i k) + | Ointuofsingle, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixeduwrzz rd rs ::i k) + | Olongoffloat, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixeddrzz rd rs ::i k) + | Ointoffloat, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixeddrzz_i32 rd rs ::i k) + | Ointuoffloat, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixedudrzz_i32 rd rs ::i k) + | Olonguoffloat, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfixedudrzz rd rs ::i k) + + | Ofloatofsingle, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfwidenlwd rd rs ::i k) + | Osingleoffloat, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfnarrowdw rd rs ::i k) + + + | Odivf , _ => Error (msg "Asmblockgen.transl_op: Odivf") + | Odivfs, _ => Error (msg "Asmblockgen.transl_op: Odivfs") + + (* We use the Splitlong instead for these four conversions *) + | Osingleoflong , _ => Error (msg "Asmblockgen.transl_op: Osingleoflong") + | Osingleoflongu , _ => Error (msg "Asmblockgen.transl_op: Osingleoflongu") + | Olongofsingle , _ => Error (msg "Asmblockgen.transl_op: Olongofsingle") + | Olonguofsingle , _ => Error (msg "Asmblockgen.transl_op: Olonguofsingle") + + + | Ocmp cmp, _ => + do rd <- ireg_of res; + transl_cond_op cmp rd args k + + + | Oextfz stop start, a1 :: nil => + assertion (ExtValues.is_bitfield stop start); + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pextfz stop start rd rs ::i k) + + | Oextfs stop start, a1 :: nil => + assertion (ExtValues.is_bitfield stop start); + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pextfs stop start rd rs ::i k) + + | Oextfzl stop start, a1 :: nil => + assertion (ExtValues.is_bitfieldl stop start); + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pextfzl stop start rd rs ::i k) + + | Oextfsl stop start, a1 :: nil => + assertion (ExtValues.is_bitfieldl stop start); + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pextfsl stop start rd rs ::i k) + + | Oinsf stop start, a0 :: a1 :: nil => + assertion (ExtValues.is_bitfield stop start); + assertion (mreg_eq a0 res); + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pinsf stop start rd rs ::i k) + + | Oinsfl stop start, a0 :: a1 :: nil => + assertion (ExtValues.is_bitfieldl stop start); + assertion (mreg_eq a0 res); + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (Pinsfl stop start rd rs ::i k) + + | Osel cond0 ty, aT :: aF :: aC :: nil => + assertion (mreg_eq aT res); + do rT <- ireg_of aT; + do rF <- ireg_of aF; + do rC <- ireg_of aC; + do op <- conditional_move (negate_condition0 cond0) rC rT rF; + OK (op ::i k) + + | Oselimm cond0 imm, aT :: aC :: nil => + assertion (mreg_eq aT res); + do rT <- ireg_of aT; + do rC <- ireg_of aC; + do op <- conditional_move_imm32 (negate_condition0 cond0) rC rT imm; + OK (op ::i k) + + + | Osellimm cond0 imm, aT :: aC :: nil => + assertion (mreg_eq aT res); + do rT <- ireg_of aT; + do rC <- ireg_of aC; + do op <- conditional_move_imm64 (negate_condition0 cond0) rC rT imm; + OK (op ::i k) + + | _, _ => + Error(msg "Asmgenblock.transl_op") + end. + +(** Accessing data in the stack frame. *) + +Definition indexed_memory_access + (mk_instr: ireg -> offset -> basic) + (base: ireg) (ofs: ptrofs) := + match make_immed64 (Ptrofs.to_int64 ofs) with + | Imm64_single imm => + mk_instr base (Ptrofs.of_int64 imm) +end. + +Definition loadind (base: ireg) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: bcode) := + match ty, preg_of dst with + | Tint, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Plw rd) base ofs ::i k) + | Tlong, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pld rd) base ofs ::i k) + | Tsingle, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pfls rd) base ofs ::i k) + | Tfloat, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pfld rd) base ofs ::i k) + | Tany32, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Plw_a rd) base ofs ::i k) + | Tany64, IR rd => OK (indexed_memory_access (PLoadRRO TRAP Pld_a rd) base ofs ::i k) + | _, _ => Error (msg "Asmblockgen.loadind") + end. + +Definition storeind (src: mreg) (base: ireg) (ofs: ptrofs) (ty: typ) (k: bcode) := + match ty, preg_of src with + | Tint, IR rd => OK (indexed_memory_access (PStoreRRO Psw rd) base ofs ::i k) + | Tlong, IR rd => OK (indexed_memory_access (PStoreRRO Psd rd) base ofs ::i k) + | Tsingle, IR rd => OK (indexed_memory_access (PStoreRRO Pfss rd) base ofs ::i k) + | Tfloat, IR rd => OK (indexed_memory_access (PStoreRRO Pfsd rd) base ofs ::i k) + | Tany32, IR rd => OK (indexed_memory_access (PStoreRRO Psw_a rd) base ofs ::i k) + | Tany64, IR rd => OK (indexed_memory_access (PStoreRRO Psd_a rd) base ofs ::i k) + | _, _ => Error (msg "Asmblockgen.storeind") + end. + +Definition loadind_ptr (base: ireg) (ofs: ptrofs) (dst: ireg) := + indexed_memory_access (PLoadRRO TRAP Pld dst) base ofs. + +Definition storeind_ptr (src: ireg) (base: ireg) (ofs: ptrofs) := + indexed_memory_access (PStoreRRO Psd src) base ofs. + +(** Translation of memory accesses: loads, and stores. *) + +Definition transl_memory_access2 + (mk_instr: ireg -> ireg -> basic) + (addr: addressing) (args: list mreg) (k: bcode) : res bcode := + match addr, args with + | Aindexed2, a1 :: a2 :: nil => + do rs1 <- ireg_of a1; + do rs2 <- ireg_of a2; + OK (mk_instr rs1 rs2 ::i k) + | _, _ => Error (msg "Asmblockgen.transl_memory_access2") + end. + +Definition transl_memory_access2XS + (chunk: memory_chunk) + (mk_instr: ireg -> ireg -> basic) + scale (args: list mreg) (k: bcode) : res bcode := + match args with + | (a1 :: a2 :: nil) => + assertion (Z.eqb (zscale_of_chunk chunk) scale); + do rs1 <- ireg_of a1; + do rs2 <- ireg_of a2; + OK (mk_instr rs1 rs2 ::i k) + | _ => Error (msg "Asmblockgen.transl_memory_access2XS") + end. + +Definition transl_memory_access + (mk_instr: ireg -> offset -> basic) + (addr: addressing) (args: list mreg) (k: bcode) : res bcode := + match addr, args with + | Aindexed ofs, a1 :: nil => + do rs <- ireg_of a1; + OK (indexed_memory_access mk_instr rs ofs ::i k) + | Aglobal id ofs, nil => + OK (Ploadsymbol id ofs RTMP ::i (mk_instr RTMP Ptrofs.zero ::i k)) + | Ainstack ofs, nil => + OK (indexed_memory_access mk_instr SP ofs ::i k) + | _, _ => + Error(msg "Asmblockgen.transl_memory_access") + end. + +Definition chunk2load (chunk: memory_chunk) := + match chunk with + | Mint8signed => Plb + | Mint8unsigned => Plbu + | Mint16signed => Plh + | Mint16unsigned => Plhu + | Mint32 => Plw + | Mint64 => Pld + | Mfloat32 => Pfls + | Mfloat64 => Pfld + | Many32 => Plw_a + | Many64 => Pld_a + end. + +Definition transl_load_rro (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing) + (args: list mreg) (dst: mreg) (k: bcode) : res bcode := + do r <- ireg_of dst; + transl_memory_access (PLoadRRO trap (chunk2load chunk) r) addr args k. + +Definition transl_load_rrr (trap: trapping_mode) (chunk: memory_chunk) (addr: addressing) + (args: list mreg) (dst: mreg) (k: bcode) : res bcode := + do r <- ireg_of dst; + transl_memory_access2 (PLoadRRR trap (chunk2load chunk) r) addr args k. + +Definition transl_load_rrrXS (trap: trapping_mode) (chunk: memory_chunk) (scale : Z) + (args: list mreg) (dst: mreg) (k: bcode) : res bcode := + do r <- ireg_of dst; + transl_memory_access2XS chunk (PLoadRRRXS trap (chunk2load chunk) r) scale args k. + +Definition transl_load (trap : trapping_mode) + (chunk: memory_chunk) (addr: addressing) + (args: list mreg) (dst: mreg) (k: bcode) : res bcode := + match addr with + | Aindexed2XS scale => transl_load_rrrXS trap chunk scale args dst k + | Aindexed2 => transl_load_rrr trap chunk addr args dst k + | _ => transl_load_rro trap chunk addr args dst k + end. + +Definition chunk2store (chunk: memory_chunk) := + match chunk with + | Mint8signed | Mint8unsigned => Psb + | Mint16signed | Mint16unsigned => Psh + | Mint32 => Psw + | Mint64 => Psd + | Mfloat32 => Pfss + | Mfloat64 => Pfsd + | Many32 => Psw_a + | Many64 => Psd_a + end. + +Definition transl_store_rro (chunk: memory_chunk) (addr: addressing) + (args: list mreg) (src: mreg) (k: bcode) : res bcode := + do r <- ireg_of src; + transl_memory_access (PStoreRRO (chunk2store chunk) r) addr args k. + +Definition transl_store_rrr (chunk: memory_chunk) (addr: addressing) + (args: list mreg) (src: mreg) (k: bcode) : res bcode := + do r <- ireg_of src; + transl_memory_access2 (PStoreRRR (chunk2store chunk) r) addr args k. + +Definition transl_store_rrrxs (chunk: memory_chunk) (scale: Z) + (args: list mreg) (src: mreg) (k: bcode) : res bcode := + do r <- ireg_of src; + transl_memory_access2XS chunk (PStoreRRRXS (chunk2store chunk) r) scale args k. + +Definition transl_store (chunk: memory_chunk) (addr: addressing) + (args: list mreg) (src: mreg) (k: bcode) : res bcode := + match addr with + | Aindexed2 => transl_store_rrr chunk addr args src k + | Aindexed2XS scale => transl_store_rrrxs chunk scale args src k + | _ => transl_store_rro chunk addr args src k + end. + +(** Function epilogue *) + +Definition make_epilogue (f: Machblock.function) (k: code) := + (loadind_ptr SP f.(fn_retaddr_ofs) GPRA) + ::g Pset RA GPRA ::g Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) ::g k. + +(** Translation of a Machblock instruction. *) + +Definition transl_instr_basic (f: Machblock.function) (i: Machblock.basic_inst) + (ep: bool) (k: bcode) := + match i with + | MBgetstack ofs ty dst => + loadind SP ofs ty dst k + | MBsetstack src ofs ty => + storeind src SP ofs ty k + | MBgetparam ofs ty dst => + (* load via the frame pointer if it is valid *) + do c <- loadind FP ofs ty dst k; + OK (if ep then c + else (loadind_ptr SP f.(fn_link_ofs) FP) ::i c) + | MBop op args res => + transl_op op args res k + | MBload trap chunk addr args dst => + transl_load trap chunk addr args dst k + | MBstore chunk addr args src => + transl_store chunk addr args src k + end. + +Definition transl_instr_control (f: Machblock.function) (oi: option Machblock.control_flow_inst) + : res code := + match oi with + | None => OK nil + | Some i => + match i with + | MBcall sig (inl r) => + do r1 <- ireg_of r; OK ((Picall r1) ::g nil) + | MBcall sig (inr symb) => + OK ((Pcall symb) ::g nil) + | MBtailcall sig (inr symb) => + OK (make_epilogue f ((Pgoto symb) ::g nil)) + | MBtailcall sig (inl r) => + do r1 <- ireg_of r; OK (make_epilogue f ((Pigoto r1) ::g nil)) + | MBbuiltin ef args res => + OK (Pbuiltin ef (List.map (map_builtin_arg preg_of) args) (map_builtin_res preg_of res) ::g nil) + | MBgoto lbl => + OK (Pj_l lbl ::g nil) + | MBcond cond args lbl => + transl_cbranch cond args lbl nil + | MBreturn => + OK (make_epilogue f (Pret ::g nil)) + | MBjumptable arg tbl => + do r <- ireg_of arg; + OK (Pjumptable r tbl ::g nil) + end + end. + +(** Translation of a code sequence *) + +Definition fp_is_parent (before: bool) (i: Machblock.basic_inst) : bool := + match i with + | MBgetstack ofs ty dst => before && negb (mreg_eq dst MFP) + | MBsetstack src ofs ty => before + | MBgetparam ofs ty dst => negb (mreg_eq dst MFP) + | MBop op args res => before && negb (mreg_eq res MFP) + | MBload trapping_mode chunk addr args dst => before && negb (mreg_eq dst MFP) + | MBstore chunk addr args res => before + end. + +(** This is the naive definition, which is not tail-recursive unlike the other backends *) + +Fixpoint transl_basic_code (f: Machblock.function) (il: list Machblock.basic_inst) (it1p: bool) := + match il with + | nil => OK nil + | i1 :: il' => + do k <- transl_basic_code f il' (fp_is_parent it1p i1); + transl_instr_basic f i1 it1p k + end. + +(* (** This is an equivalent definition in continuation-passing style + that runs in constant stack space. *) + +Fixpoint transl_basic_rec (f: Machblock.function) (il: list Machblock.basic_inst) + (it1p: bool) (k: bcode -> res bcode) := + match il with + | nil => k nil + | i1 :: il' => + transl_basic_rec f il' (fp_is_parent it1p i1) + (fun c1 => do c2 <- transl_instr_basic f i1 it1p c1; k c2) + end. + +Definition transl_basic_code' (f: Machblock.function) (il: list Machblock.basic_inst) (it1p: bool) := + transl_basic_rec f il it1p (fun c => OK c). *) + +(** Translation of a whole function. Note that we must check + that the generated code contains less than [2^64] instructions, + otherwise the offset part of the [PC] code pointer could wrap + around, leading to incorrect executions. *) + +(* gen_bblocks can generate two bblocks if the ctl is a PExpand (since the PExpand must be alone in its block) *) +Program Definition gen_bblocks (hd: list label) (c: list basic) (ctl: list instruction) := + match (extract_ctl ctl) with + | None => + match c with + | nil => {| header := hd; body := Pnop::nil; exit := None |} :: nil + | i::c => {| header := hd; body := ((i::c) ++ extract_basic ctl); exit := None |} :: nil + end + | Some (PExpand (Pbuiltin ef args res)) => + match c with + | nil => {| header := hd; body := nil; exit := Some (PExpand (Pbuiltin ef args res)) |} :: nil + | _ => {| header := hd; body := c; exit := None |} + :: {| header := nil; body := nil; exit := Some (PExpand (Pbuiltin ef args res)) |} :: nil + end + | Some ex => {| header := hd; body := (c ++ extract_basic ctl); exit := Some ex |} :: nil + end +. +Next Obligation. + apply wf_bblock_refl. constructor. + left. auto. + discriminate. +Qed. Next Obligation. + apply wf_bblock_refl. constructor. + right. discriminate. + unfold builtin_alone. intros. pose (H ef args res). rewrite H0 in n. contradiction. +Qed. + +Definition transl_block (f: Machblock.function) (fb: Machblock.bblock) (ep: bool) : res (list bblock) := + do c <- transl_basic_code f fb.(Machblock.body) ep; + do ctl <- transl_instr_control f fb.(Machblock.exit); + OK (gen_bblocks fb.(Machblock.header) c ctl) +. + +Fixpoint transl_blocks (f: Machblock.function) (lmb: list Machblock.bblock) (ep: bool) := + match lmb with + | nil => OK nil + | mb :: lmb => + do lb <- transl_block f mb (if Machblock.header mb then ep else false); + do lb' <- transl_blocks f lmb false; + OK (lb @@ lb') + end +. + +Program Definition make_prologue (f: Machblock.function) lb := + ({| header := nil; body := Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::i + Pget GPRA RA ::i + storeind_ptr GPRA SP f.(fn_retaddr_ofs) ::i nil; + exit := None |} :: lb). + +Definition transl_function (f: Machblock.function) := + do lb <- transl_blocks f f.(Machblock.fn_code) true; + OK (mkfunction f.(Machblock.fn_sig) + (make_prologue f lb)). + +Definition transf_function (f: Machblock.function) : res Asmvliw.function := + do tf <- transl_function f; + if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks)) + then Error (msg "code size exceeded") + else OK tf. + +Definition transf_fundef (f: Machblock.fundef) : res Asmvliw.fundef := + transf_partial_fundef transf_function f. + +Definition transf_program (p: Machblock.program) : res Asmvliw.program := + transform_partial_program transf_fundef p. diff --git a/kvx/Asmblockgenproof.v b/kvx/Asmblockgenproof.v new file mode 100644 index 00000000..6e3029d8 --- /dev/null +++ b/kvx/Asmblockgenproof.v @@ -0,0 +1,1808 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Correctness proof for kvx/Asmblock generation: main proof. *) + +Require Import Coqlib Errors. +Require Import Integers Floats AST Linking. +Require Import Values Memory Events Globalenvs Smallstep. +Require Import Op Locations Machblock Conventions Asmblock. +Require Import Asmblockgen Asmblockgenproof0 Asmblockgenproof1 Asmblockprops. +Require Import Axioms. +Require Import Lia. + +Module MB := Machblock. +Module AB := Asmvliw. + +Definition match_prog (p: Machblock.program) (tp: Asmvliw.program) := + match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp. + +Lemma transf_program_match: + forall p tp, transf_program p = OK tp -> match_prog p tp. +Proof. + intros. eapply match_transform_partial_program; eauto. +Qed. + +Section PRESERVATION. + +Variable prog: Machblock.program. +Variable tprog: Asmvliw.program. +Hypothesis TRANSF: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma symbols_preserved: + forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof (Genv.find_symbol_match TRANSF). + +Lemma senv_preserved: + Senv.equiv ge tge. +Proof (Genv.senv_match TRANSF). + +Lemma functions_translated: + forall b f, + Genv.find_funct_ptr ge b = Some f -> + exists tf, + Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf. +Proof (Genv.find_funct_ptr_transf_partial TRANSF). + +Lemma functions_transl: + forall fb f tf, + Genv.find_funct_ptr ge fb = Some (Internal f) -> + transf_function f = OK tf -> + Genv.find_funct_ptr tge fb = Some (Internal tf). +Proof. + intros. exploit functions_translated; eauto. intros [tf' [A B]]. + monadInv B. rewrite H0 in EQ; inv EQ; auto. +Qed. + +Lemma transf_function_no_overflow: + forall f tf, + transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned. +Proof. + intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. + lia. +Qed. + +Section TRANSL_LABEL. (* Lemmas on translation of MB.is_label into AB.is_label *) + +Lemma gen_bblocks_label: + forall hd bdy ex tbb tc, + gen_bblocks hd bdy ex = tbb::tc -> + header tbb = hd. +Proof. + intros until tc. intros GENB. unfold gen_bblocks in GENB. + destruct (extract_ctl ex); try destruct c; try destruct i; try destruct bdy. + all: inv GENB; simpl; auto. +Qed. + +Lemma gen_bblocks_label2: + forall hd bdy ex tbb1 tbb2, + gen_bblocks hd bdy ex = tbb1::tbb2::nil -> + header tbb2 = nil. +Proof. + intros until tbb2. intros GENB. unfold gen_bblocks in GENB. + destruct (extract_ctl ex); try destruct c; try destruct i; try destruct bdy. + all: inv GENB; simpl; auto. +Qed. + +Remark in_dec_transl: + forall lbl hd, + (if in_dec lbl hd then true else false) = (if MB.in_dec lbl hd then true else false). +Proof. + intros. destruct (in_dec lbl hd), (MB.in_dec lbl hd). all: tauto. +Qed. + +Lemma transl_is_label: + forall lbl bb tbb f ep tc, + transl_block f bb ep = OK (tbb::tc) -> + is_label lbl tbb = MB.is_label lbl bb. +Proof. + intros until tc. intros TLB. + destruct tbb as [thd tbdy tex]; simpl in *. + monadInv TLB. + unfold is_label. simpl. + apply gen_bblocks_label in H0. simpl in H0. subst. + rewrite in_dec_transl. auto. +Qed. + +Lemma transl_is_label_false2: + forall lbl bb f ep tbb1 tbb2, + transl_block f bb ep = OK (tbb1::tbb2::nil) -> + is_label lbl tbb2 = false. +Proof. + intros until tbb2. intros TLB. + destruct tbb2 as [thd tbdy tex]; simpl in *. + monadInv TLB. apply gen_bblocks_label2 in H0. simpl in H0. subst. + apply is_label_correct_false. simpl. auto. +Qed. + +Lemma transl_is_label2: + forall f bb ep tbb1 tbb2 lbl, + transl_block f bb ep = OK (tbb1::tbb2::nil) -> + is_label lbl tbb1 = MB.is_label lbl bb + /\ is_label lbl tbb2 = false. +Proof. + intros. split. eapply transl_is_label; eauto. eapply transl_is_label_false2; eauto. +Qed. + +Lemma transl_block_nonil: + forall f c ep tc, + transl_block f c ep = OK tc -> + tc <> nil. +Proof. + intros. monadInv H. unfold gen_bblocks. + destruct (extract_ctl x0); try destruct c0; try destruct x; try destruct i. + all: discriminate. +Qed. + +Lemma transl_block_limit: forall f bb ep tbb1 tbb2 tbb3 tc, + ~transl_block f bb ep = OK (tbb1 :: tbb2 :: tbb3 :: tc). +Proof. + intros. intro. monadInv H. + unfold gen_bblocks in H0. + destruct (extract_ctl x0); try destruct x; try destruct c; try destruct i. + all: discriminate. +Qed. + +Lemma find_label_transl_false: + forall x f lbl bb ep x', + transl_block f bb ep = OK x -> + MB.is_label lbl bb = false -> + find_label lbl (x++x') = find_label lbl x'. +Proof. + intros until x'. intros TLB MBis; simpl; auto. + destruct x as [|x0 x1]; simpl; auto. + destruct x1 as [|x1 x2]; simpl; auto. + - erewrite <- transl_is_label in MBis; eauto. rewrite MBis. auto. + - destruct x2 as [|x2 x3]; simpl; auto. + + erewrite <- transl_is_label in MBis; eauto. rewrite MBis. + erewrite transl_is_label_false2; eauto. + + apply transl_block_limit in TLB. destruct TLB. +Qed. + +Lemma transl_blocks_label: + forall lbl f c tc ep, + transl_blocks f c ep = OK tc -> + match MB.find_label lbl c with + | None => find_label lbl tc = None + | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_blocks f c' false = OK tc' + end. +Proof. + induction c; simpl; intros. + inv H. auto. + monadInv H. + destruct (MB.is_label lbl a) eqn:MBis. + - destruct x as [|tbb tc]. { apply transl_block_nonil in EQ. contradiction. } + simpl find_label. exploit transl_is_label; eauto. intros ABis. rewrite MBis in ABis. + rewrite ABis. + eexists. eexists. split; eauto. simpl transl_blocks. + assert (MB.header a <> nil). + { apply MB.is_label_correct_true in MBis. + destruct (MB.header a). contradiction. discriminate. } + destruct (MB.header a); try contradiction. + rewrite EQ. simpl. rewrite EQ1. simpl. auto. + - apply IHc in EQ1. destruct (MB.find_label lbl c). + + destruct EQ1 as (tc' & FIND & TLBS). exists tc'; eexists; auto. + erewrite find_label_transl_false; eauto. + + erewrite find_label_transl_false; eauto. +Qed. + +Lemma find_label_nil: + forall bb lbl c, + header bb = nil -> + find_label lbl (bb::c) = find_label lbl c. +Proof. + intros. destruct bb as [hd bdy ex]; simpl in *. subst. + assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false). + { erewrite <- is_label_correct_false. simpl. auto. } + rewrite H. auto. +Qed. + +Theorem transl_find_label: + forall lbl f tf, + transf_function f = OK tf -> + match MB.find_label lbl f.(MB.fn_code) with + | None => find_label lbl tf.(fn_blocks) = None + | Some c => exists tc, find_label lbl tf.(fn_blocks) = Some tc /\ transl_blocks f c false = OK tc + end. +Proof. + intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); inv EQ0. clear g. + monadInv EQ. unfold make_prologue. simpl fn_blocks. repeat (rewrite find_label_nil); simpl; auto. + eapply transl_blocks_label; eauto. +Qed. + +End TRANSL_LABEL. + +(** A valid branch in a piece of Machblock code translates to a valid ``go to'' + transition in the generated Asmblock code. *) + +Lemma find_label_goto_label: + forall f tf lbl rs m c' b ofs, + Genv.find_funct_ptr ge b = Some (Internal f) -> + transf_function f = OK tf -> + rs PC = Vptr b ofs -> + MB.find_label lbl f.(MB.fn_code) = Some c' -> + exists tc', exists rs', + goto_label tf lbl rs m = Next rs' m + /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc' + /\ forall r, r <> PC -> rs'#r = rs#r. +Proof. + intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2. + intros (tc & A & B). + exploit label_pos_code_tail; eauto. instantiate (1 := 0). + intros [pos' [P [Q R]]]. + exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))). + split. unfold goto_label. unfold par_goto_label. rewrite P. rewrite H1. auto. + split. rewrite Pregmap.gss. constructor; auto. + rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q. + auto. lia. + generalize (transf_function_no_overflow _ _ H0). lia. + intros. apply Pregmap.gso; auto. +Qed. + +(** Existence of return addresses *) + +Lemma return_address_exists: + forall b f c, is_tail (b :: c) f.(MB.fn_code) -> + exists ra, return_address_offset f c ra. +Proof. + intros. eapply Asmblockgenproof0.return_address_exists; eauto. + +- intros. monadInv H0. + destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. monadInv EQ. simpl. + exists x; exists true; split; auto. + repeat constructor. +- exact transf_function_no_overflow. +Qed. + +(** * Proof of semantic preservation *) + +(** Semantic preservation is proved using a complex simulation diagram + of the following form. +<< + MB.step + ----------------------------------------> + header body exit + st1 -----> st2 -----> st3 ------------------> st4 + | | | | + | (A) | (B) | (C) | + match_codestate | | | | + | header | body1 | body2 | match_states + cs1 -----> cs2 -----> cs3 ------> cs4 | + | / \ exit | + match_asmstate | --------------- --->--- | + | / match_asmstate \ | + st'1 ---------------------------------------> st'2 + AB.step * +>> + The invariant between each MB.step/AB.step is the [match_states] predicate below. + However, we also need to introduce an intermediary state [Codestate] which allows + us to reason on a finer grain, executing header, body and exit separately. + + This [Codestate] consists in a state like [Asmblock.State], except that the + code is directly stored in the state, much like [Machblock.State]. It also features + additional useful elements to keep track of while executing a bblock. +*) + +Remark preg_of_not_FP: forall r, negb (mreg_eq r MFP) = true -> IR FP <> preg_of r. +Proof. + intros. change (IR FP) with (preg_of MFP). red; intros. + exploit preg_of_injective; eauto. intros; subst r; discriminate. +Qed. + +Inductive match_states: Machblock.state -> Asmvliw.state -> Prop := + | match_states_intro: + forall s fb sp c ep ms m m' rs f tf tc + (STACKS: match_stack ge s) + (FIND: Genv.find_funct_ptr ge fb = Some (Internal f)) + (MEXT: Mem.extends m m') + (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc) + (AG: agree ms sp rs) + (DXP: ep = true -> rs#FP = parent_sp s), + match_states (Machblock.State s fb sp c ms m) + (Asmvliw.State rs m') + | match_states_call: + forall s fb ms m m' rs + (STACKS: match_stack ge s) + (MEXT: Mem.extends m m') + (AG: agree ms (parent_sp s) rs) + (ATPC: rs PC = Vptr fb Ptrofs.zero) + (ATLR: rs RA = parent_ra s), + match_states (Machblock.Callstate s fb ms m) + (Asmvliw.State rs m') + | match_states_return: + forall s ms m m' rs + (STACKS: match_stack ge s) + (MEXT: Mem.extends m m') + (AG: agree ms (parent_sp s) rs) + (ATPC: rs PC = parent_ra s), + match_states (Machblock.Returnstate s ms m) + (Asmvliw.State rs m'). + +Record codestate := + Codestate { pstate: state; (**r projection to Asmblock.state *) + pheader: list label; + pbody1: list basic; (**r list of basic instructions coming from the translation of the Machblock body *) + pbody2: list basic; (**r list of basic instructions coming from the translation of the Machblock exit *) + pctl: option control; (**r exit instruction, coming from the translation of the Machblock exit *) + ep: bool; (**r reflects the [ep] variable used in the translation *) + rem: list AB.bblock; (**r remaining bblocks to execute *) + cur: bblock (**r current bblock to execute - to keep track of its size when incrementing PC *) + }. + +(* The part that deals with Machblock <-> Codestate agreement + * Note about DXP: the property of [ep] only matters if the current block doesn't have a header, hence the condition *) +Inductive match_codestate fb: Machblock.state -> codestate -> Prop := + | match_codestate_intro: + forall s sp ms m rs0 m0 f tc ep c bb tbb tbc tbi + (STACKS: match_stack ge s) + (FIND: Genv.find_funct_ptr ge fb = Some (Internal f)) + (MEXT: Mem.extends m m0) + (TBC: transl_basic_code f (MB.body bb) (if MB.header bb then ep else false) = OK tbc) + (TIC: transl_instr_control f (MB.exit bb) = OK tbi) + (TBLS: transl_blocks f c false = OK tc) + (AG: agree ms sp rs0) + (DXP: (if MB.header bb then ep else false) = true -> rs0#FP = parent_sp s) + , + match_codestate fb (Machblock.State s fb sp (bb::c) ms m) + {| pstate := (Asmvliw.State rs0 m0); + pheader := (MB.header bb); + pbody1 := tbc; + pbody2 := extract_basic tbi; + pctl := extract_ctl tbi; + ep := ep; + rem := tc; + cur := tbb + |} +. + +(* The part ensuring that the code in Codestate actually resides at [rs PC] *) +Inductive match_asmstate fb: codestate -> Asmvliw.state -> Prop := + | match_asmstate_some: + forall rs f tf tc m tbb ofs ep tbdy tex lhd + (FIND: Genv.find_funct_ptr ge fb = Some (Internal f)) + (TRANSF: transf_function f = OK tf) + (PCeq: rs PC = Vptr fb ofs) + (TAIL: code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (tbb::tc)) + , + match_asmstate fb + {| pstate := (Asmvliw.State rs m); + pheader := lhd; + pbody1 := tbdy; + pbody2 := extract_basic tex; + pctl := extract_ctl tex; + ep := ep; + rem := tc; + cur := tbb |} + (Asmvliw.State rs m) +. + +(* Useful for dealing with the many cases in some proofs *) +Ltac exploreInst := + repeat match goal with + | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var + | [ H : OK _ = OK _ |- _ ] => monadInv H + | [ |- context[if ?b then _ else _] ] => destruct b + | [ |- context[match ?m with | _ => _ end] ] => destruct m + | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m + | [ H : bind _ _ = OK _ |- _ ] => monadInv H + | [ H : Error _ = OK _ |- _ ] => inversion H + end. + +(** Some translation properties *) + +Lemma transl_blocks_nonil: + forall f bb c tc ep, + transl_blocks f (bb::c) ep = OK tc -> + exists tbb tc', tc = tbb :: tc'. +Proof. + intros until ep0. intros TLBS. monadInv TLBS. monadInv EQ. unfold gen_bblocks. + destruct (extract_ctl x2). + - destruct c0; destruct i; simpl; eauto. destruct x1; simpl; eauto. + - destruct x1; simpl; eauto. +Qed. + +Lemma no_builtin_preserved: + forall f ex x2, + (forall ef args res, ex <> Some (MBbuiltin ef args res)) -> + transl_instr_control f ex = OK x2 -> + (exists i, extract_ctl x2 = Some (PCtlFlow i)) + \/ extract_ctl x2 = None. +Proof. + intros until x2. intros Hbuiltin TIC. + destruct ex. + - destruct c. + (* MBcall *) + + simpl in TIC. exploreInst; simpl; eauto. + (* MBtailcall *) + + simpl in TIC. exploreInst; simpl; eauto. + (* MBbuiltin *) + + assert (H: Some (MBbuiltin e l b) <> Some (MBbuiltin e l b)). + apply Hbuiltin. contradict H; auto. + (* MBgoto *) + + simpl in TIC. exploreInst; simpl; eauto. + (* MBcond *) + + simpl in TIC. unfold transl_cbranch in TIC. exploreInst; simpl; eauto. + * unfold transl_opt_compuimm. exploreInst; simpl; eauto. + * unfold transl_opt_compluimm. exploreInst; simpl; eauto. + * unfold transl_comp_float64. exploreInst; simpl; eauto. + * unfold transl_comp_notfloat64. exploreInst; simpl; eauto. + * unfold transl_comp_float32. exploreInst; simpl; eauto. + * unfold transl_comp_notfloat32. exploreInst; simpl; eauto. + (* MBjumptable *) + + simpl in TIC. exploreInst; simpl; eauto. + (* MBreturn *) + + simpl in TIC. monadInv TIC. simpl. eauto. + - monadInv TIC. simpl; auto. +Qed. + +Lemma transl_blocks_distrib: + forall c f bb tbb tc ep, + transl_blocks f (bb::c) ep = OK (tbb::tc) + -> (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) + -> transl_block f bb (if MB.header bb then ep else false) = OK (tbb :: nil) + /\ transl_blocks f c false = OK tc. +Proof. + intros until ep0. intros TLBS Hbuiltin. + destruct bb as [hd bdy ex]. + monadInv TLBS. monadInv EQ. + exploit no_builtin_preserved; eauto. intros Hectl. destruct Hectl. + - destruct H as [i Hectl]. + unfold gen_bblocks in H0. rewrite Hectl in H0. inv H0. + simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl. + unfold gen_bblocks. rewrite Hectl. auto. + - unfold gen_bblocks in H0. rewrite H in H0. + destruct x1 as [|bi x1]. + + simpl in H0. inv H0. simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl. + unfold gen_bblocks. rewrite H. auto. + + simpl in H0. inv H0. simpl in *. unfold transl_block; simpl. rewrite EQ0. rewrite EQ. simpl. + unfold gen_bblocks. rewrite H. auto. +Qed. + +Lemma gen_bblocks_nobuiltin: + forall thd tbdy tex tbb, + (tbdy <> nil \/ extract_ctl tex <> None) -> + (forall ef args res, extract_ctl tex <> Some (PExpand (Pbuiltin ef args res))) -> + gen_bblocks thd tbdy tex = tbb :: nil -> + header tbb = thd + /\ body tbb = tbdy ++ extract_basic tex + /\ exit tbb = extract_ctl tex. +Proof. + intros until tbb. intros Hnonil Hnobuiltin GENB. unfold gen_bblocks in GENB. + destruct (extract_ctl tex) eqn:ECTL. + - destruct c. + + destruct i; try (inv GENB; simpl; auto; fail). + assert False. eapply Hnobuiltin. eauto. destruct H. + + inv GENB. simpl. auto. + - inversion Hnonil. + + destruct tbdy as [|bi tbdy]; try (contradict H; simpl; auto; fail). inv GENB. auto. + + contradict H; simpl; auto. +Qed. + +Lemma transl_instr_basic_nonil: + forall k f bi ep x, + transl_instr_basic f bi ep k = OK x -> + x <> nil. +Proof. + intros until x. intros TIB. + destruct bi. + - simpl in TIB. unfold loadind in TIB. exploreInst; try discriminate. + - simpl in TIB. unfold storeind in TIB. exploreInst; try discriminate. + - simpl in TIB. monadInv TIB. unfold loadind in EQ. exploreInst; try discriminate. + - simpl in TIB. unfold transl_op in TIB. exploreInst; try discriminate. + unfold transl_cond_op in EQ0. exploreInst; try discriminate. + unfold transl_cond_float64. exploreInst; try discriminate. + unfold transl_cond_notfloat64. exploreInst; try discriminate. + unfold transl_cond_float32. exploreInst; try discriminate. + unfold transl_cond_notfloat32. exploreInst; try discriminate. + - simpl in TIB. unfold transl_load in TIB. exploreInst; try discriminate. + all: monadInv TIB; unfold transl_memory_access in EQ0; unfold transl_memory_access2 in EQ0; unfold transl_memory_access2XS in EQ0; exploreInst; try discriminate. + - simpl in TIB. unfold transl_store in TIB. exploreInst; try discriminate. + all: monadInv TIB; unfold transl_memory_access in EQ0; unfold transl_memory_access2 in EQ0; unfold transl_memory_access2XS in EQ0; exploreInst; try discriminate. +Qed. + +Lemma transl_basic_code_nonil: + forall bdy f x ep, + bdy <> nil -> + transl_basic_code f bdy ep = OK x -> + x <> nil. +Proof. + induction bdy as [|bi bdy]. + intros. contradict H0; auto. + destruct bdy as [|bi2 bdy]. + - clear IHbdy. intros f x b _ TBC. simpl in TBC. eapply transl_instr_basic_nonil; eauto. + - intros f x b Hnonil TBC. remember (bi2 :: bdy) as bdy'. + monadInv TBC. + assert (x0 <> nil). + eapply IHbdy; eauto. subst bdy'. discriminate. + eapply transl_instr_basic_nonil; eauto. +Qed. + +Lemma transl_instr_control_nonil: + forall ex f x, + ex <> None -> + transl_instr_control f ex = OK x -> + extract_ctl x <> None. +Proof. + intros ex f x Hnonil TIC. + destruct ex as [ex|]. + - clear Hnonil. destruct ex. + all: try (simpl in TIC; exploreInst; discriminate). + + simpl in TIC. unfold transl_cbranch in TIC. exploreInst; try discriminate. + * unfold transl_opt_compuimm. exploreInst; try discriminate. + * unfold transl_opt_compluimm. exploreInst; try discriminate. + * unfold transl_comp_float64. exploreInst; try discriminate. + * unfold transl_comp_notfloat64. exploreInst; try discriminate. + * unfold transl_comp_float32. exploreInst; try discriminate. + * unfold transl_comp_notfloat32. exploreInst; try discriminate. + - contradict Hnonil; auto. +Qed. + +Lemma transl_instr_control_nobuiltin: + forall f ex x, + (forall ef args res, ex <> Some (MBbuiltin ef args res)) -> + transl_instr_control f ex = OK x -> + (forall ef args res, extract_ctl x <> Some (PExpand (Pbuiltin ef args res))). +Proof. + intros until x. intros Hnobuiltin TIC. intros until res. + unfold transl_instr_control in TIC. exploreInst. + all: try discriminate. + - assert False. eapply Hnobuiltin; eauto. destruct H. + - unfold transl_cbranch in TIC. exploreInst. + all: try discriminate. + * unfold transl_opt_compuimm. exploreInst. all: try discriminate. + * unfold transl_opt_compluimm. exploreInst. all: try discriminate. + * unfold transl_comp_float64. exploreInst; try discriminate. + * unfold transl_comp_notfloat64. exploreInst; try discriminate. + * unfold transl_comp_float32. exploreInst; try discriminate. + * unfold transl_comp_notfloat32. exploreInst; try discriminate. +Qed. + +(* Proving that one can decompose a [match_state] relation into a [match_codestate] + and a [match_asmstate], along with some helpful properties tying both relations together *) + +Theorem match_state_codestate: + forall mbs abs s fb sp bb c ms m, + (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) -> + (MB.body bb <> nil \/ MB.exit bb <> None) -> + mbs = (Machblock.State s fb sp (bb::c) ms m) -> + match_states mbs abs -> + exists cs fb f tbb tc ep, + match_codestate fb mbs cs /\ match_asmstate fb cs abs + /\ Genv.find_funct_ptr ge fb = Some (Internal f) + /\ transl_blocks f (bb::c) ep = OK (tbb::tc) + /\ body tbb = pbody1 cs ++ pbody2 cs + /\ exit tbb = pctl cs + /\ cur cs = tbb /\ rem cs = tc + /\ pstate cs = abs. +Proof. + intros until m. intros Hnobuiltin Hnotempty Hmbs MS. subst. inv MS. + inv AT. clear H0. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. + exploit transl_blocks_distrib; eauto. intros (TLB & TLBS). clear H2. + monadInv TLB. exploit gen_bblocks_nobuiltin; eauto. + { inversion Hnotempty. + - destruct (MB.body bb) as [|bi bdy]; try (contradict H0; simpl; auto; fail). + left. eapply transl_basic_code_nonil; eauto. + - destruct (MB.exit bb) as [ei|]; try (contradict H0; simpl; auto; fail). + right. eapply transl_instr_control_nonil; eauto. } + eapply transl_instr_control_nobuiltin; eauto. + intros (Hth & Htbdy & Htexit). + exists {| pstate := (State rs m'); pheader := (Machblock.header bb); pbody1 := x; pbody2 := extract_basic x0; + pctl := extract_ctl x0; ep := ep0; rem := tc'; cur := tbb |}, fb, f, tbb, tc', ep0. + repeat split. 1-2: econstructor; eauto. + { destruct (MB.header bb). eauto. discriminate. } eauto. + unfold transl_blocks. fold transl_blocks. unfold transl_block. rewrite EQ. simpl. rewrite EQ1; simpl. + rewrite TLBS. simpl. rewrite H2. + all: simpl; auto. +Qed. + +Definition mb_remove_body (bb: MB.bblock) := + {| MB.header := MB.header bb; MB.body := nil; MB.exit := MB.exit bb |}. + +Lemma exec_straight_pnil: + forall c rs1 m1 rs2 m2, + exec_straight tge c rs1 m1 (Pnop ::g nil) rs2 m2 -> + exec_straight tge c rs1 m1 nil rs2 m2. +Proof. + intros. eapply exec_straight_trans. eapply H. econstructor; eauto. +Qed. + +Lemma transl_block_nobuiltin: + forall f bb ep tbb, + (MB.body bb <> nil \/ MB.exit bb <> None) -> + (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) -> + transl_block f bb ep = OK (tbb :: nil) -> + exists c c', + transl_basic_code f (MB.body bb) ep = OK c + /\ transl_instr_control f (MB.exit bb) = OK c' + /\ body tbb = c ++ extract_basic c' + /\ exit tbb = extract_ctl c'. +Proof. + intros until tbb. intros Hnonil Hnobuiltin TLB. monadInv TLB. destruct Hnonil. + - eexists. eexists. split; eauto. split; eauto. eapply gen_bblocks_nobuiltin; eauto. + left. eapply transl_basic_code_nonil; eauto. eapply transl_instr_control_nobuiltin; eauto. + - eexists. eexists. split; eauto. split; eauto. eapply gen_bblocks_nobuiltin; eauto. + right. eapply transl_instr_control_nonil; eauto. eapply transl_instr_control_nobuiltin; eauto. +Qed. + +Lemma nextblock_preserves: + forall rs rs' bb r, + rs' = nextblock bb rs -> + data_preg r = true -> + rs r = rs' r. +Proof. + intros. destruct r; try discriminate. + subst. Simpl. +Qed. + +Remark cons3_app {A: Type}: + forall a b c (l: list A), + a :: b :: c :: l = (a :: b :: c :: nil) ++ l. +Proof. + intros. simpl. auto. +Qed. + +Lemma exec_straight_opt_body2: + forall c rs1 m1 c' rs2 m2, + exec_straight_opt tge c rs1 m1 c' rs2 m2 -> + exists body, + exec_body tge body rs1 m1 = Next rs2 m2 + /\ (basics_to_code body) ++g c' = c. +Proof. + intros until m2. intros EXES. + inv EXES. + - exists nil. split; auto. + - eapply exec_straight_body2. auto. +Qed. + +Lemma extract_basics_to_code: + forall lb c, + extract_basic (basics_to_code lb ++ c) = lb ++ extract_basic c. +Proof. + induction lb; intros; simpl; congruence. +Qed. + +Lemma extract_ctl_basics_to_code: + forall lb c, + extract_ctl (basics_to_code lb ++ c) = extract_ctl c. +Proof. + induction lb; intros; simpl; congruence. +Qed. + +(* See (C) in the diagram. The proofs are mostly adapted from the previous Mach->Asm proofs, but are + unfortunately quite cumbersome. To reproduce them, it's best to have a Coq IDE with you and see by + yourself the steps *) +Theorem step_simu_control: + forall bb' fb fn s sp c ms' m' rs2 m2 t S'' rs1 m1 tbb tbdy2 tex cs2, + MB.body bb' = nil -> + (forall ef args res, MB.exit bb' <> Some (MBbuiltin ef args res)) -> + Genv.find_funct_ptr tge fb = Some (Internal fn) -> + pstate cs2 = (Asmvliw.State rs2 m2) -> + pbody1 cs2 = nil -> pbody2 cs2 = tbdy2 -> pctl cs2 = tex -> + cur cs2 = tbb -> + match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2 -> + match_asmstate fb cs2 (Asmvliw.State rs1 m1) -> + exit_step return_address_offset ge (MB.exit bb') (MB.State s fb sp (bb'::c) ms' m') t S'' -> + (exists rs3 m3 rs4 m4, + exec_body tge tbdy2 rs2 m2 = Next rs3 m3 + /\ exec_control_rel tge fn tex tbb rs3 m3 rs4 m4 + /\ match_states S'' (State rs4 m4)). +Proof. + intros until cs2. intros Hbody Hbuiltin FIND Hpstate Hpbody1 Hpbody2 Hpctl Hcur MCS MAS ESTEP. + inv ESTEP. + - inv MCS. inv MAS. simpl in *. + inv Hpstate. + destruct ctl. + + (* MBcall *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + destruct s1 as [rf|fid]; simpl in H7. + * (* Indirect call *) + monadInv H1. + assert (ms' rf = Vptr f' Ptrofs.zero). + { unfold find_function_ptr in H14. destruct (ms' rf); try discriminate. + revert H14; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. } + assert (rs2 x = Vptr f' Ptrofs.zero). + { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. } + generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. + remember (Ptrofs.add _ _) as ofs'. + assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc). + { econstructor; eauto. } + assert (f1 = f) by congruence. subst f1. + exploit return_address_offset_correct; eauto. intros; subst ra. + + repeat eexists. + rewrite H6. econstructor; eauto. + rewrite H7. econstructor; eauto. + econstructor; eauto. + econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. Simpl. + simpl. Simpl. rewrite PCeq. rewrite Heqofs'. simpl. auto. + + * (* Direct call *) + monadInv H1. + generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. + remember (Ptrofs.add _ _) as ofs'. + assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc). + econstructor; eauto. + assert (f1 = f) by congruence. subst f1. + exploit return_address_offset_correct; eauto. intros; subst ra. + repeat eexists. + rewrite H6. econstructor; eauto. + rewrite H7. econstructor; eauto. + econstructor; eauto. + econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. Simpl. + Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. simpl in H14. rewrite H14. auto. + Simpl. simpl. subst. Simpl. simpl. unfold Val.offset_ptr. rewrite PCeq. auto. + + (* MBtailcall *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + exploit Mem.loadv_extends. eauto. eexact H15. auto. simpl. intros [parent' [A B]]. + destruct s1 as [rf|fid]; simpl in H13. + * monadInv H1. + assert (ms' rf = Vptr f' Ptrofs.zero). + { destruct (ms' rf); try discriminate. revert H13. predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. } + assert (rs2 x = Vptr f' Ptrofs.zero). + { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. } + + assert (f = f1) by congruence. subst f1. clear FIND1. clear H14. + exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). + exploit exec_straight_body; eauto. + { simpl. eauto. } + intros EXEB. + repeat eexists. + rewrite H6. simpl extract_basic. eauto. + rewrite H7. simpl extract_ctl. simpl. reflexivity. + econstructor; eauto. + { apply agree_set_other. + - econstructor; auto with asmgen. + + apply V. + + intro r. destruct r; apply V; auto. + - eauto with asmgen. } + assert (IR x <> IR GPR12 /\ IR x <> IR GPR32 /\ IR x <> IR GPR16). + { clear - EQ. destruct x; repeat split; try discriminate. + all: unfold ireg_of in EQ; destruct rf; try discriminate. } + Simpl. inv H1. inv H3. rewrite Z; auto; try discriminate. + * monadInv H1. assert (f = f1) by congruence. subst f1. clear FIND1. clear H14. + exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). + exploit exec_straight_body; eauto. + simpl. eauto. + intros EXEB. + repeat eexists. + rewrite H6. simpl extract_basic. eauto. + rewrite H7. simpl extract_ctl. simpl. reflexivity. + econstructor; eauto. + { apply agree_set_other. + - econstructor; auto with asmgen. + + apply V. + + intro r. destruct r; apply V; auto. + - eauto with asmgen. } + { Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H13. auto. } + + (* MBbuiltin (contradiction) *) + assert (MB.exit bb' <> Some (MBbuiltin e l b)) by (apply Hbuiltin). + rewrite <- H in H1. contradict H1; auto. + + (* MBgoto *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. assert (f1 = f) by congruence. subst f1. clear H11. + remember (nextblock tbb rs2) as rs2'. + exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND'. + assert (tf = fn) by congruence. subst tf. + exploit find_label_goto_label. + eauto. eauto. + instantiate (2 := rs2'). + { subst. unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. eauto. } + eauto. + intros (tc' & rs' & GOTO & AT2 & INV). + + eexists. eexists. repeat eexists. repeat split. + rewrite H6. simpl extract_basic. simpl. eauto. + rewrite H7. simpl extract_ctl. simpl. rewrite <- Heqrs2'. eauto. + econstructor; eauto. + rewrite Heqrs2' in INV. unfold nextblock, incrPC in INV. + eapply agree_exten; eauto with asmgen. + assert (forall r : preg, r <> PC -> rs' r = rs2 r). + { intros. destruct r. + - destruct g. all: rewrite INV; Simpl; auto. + - rewrite INV; Simpl; auto. + - contradiction. } + eauto with asmgen. + congruence. + + (* MBcond *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + * (* MBcond true *) + assert (f0 = f) by congruence. subst f0. + exploit eval_condition_lessdef. + eapply preg_vals; eauto. + all: eauto. + intros EC. + exploit transl_cbranch_correct_true; eauto. intros (rs' & jmp & A & B & C). + exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC). + assert (PCeq': rs2 PC = rs' PC). { inv A; auto. erewrite <- exec_straight_pc. 2: eapply H. eauto. } + rewrite PCeq' in PCeq. + assert (f1 = f) by congruence. subst f1. + exploit find_label_goto_label. + 4: eapply H16. 1-2: eauto. instantiate (2 := (nextblock tbb rs')). rewrite nextblock_pc. + unfold Val.offset_ptr. rewrite PCeq. eauto. + intros (tc' & rs3 & GOTOL & TLPC & Hrs3). + exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'. + assert (tf = fn) by congruence. subst tf. + + repeat eexists. + rewrite H6. rewrite <- BTC. rewrite extract_basics_to_code. simpl. rewrite app_nil_r. eauto. + rewrite H7. rewrite <- BTC. rewrite extract_ctl_basics_to_code. simpl extract_ctl. rewrite B. eauto. + + econstructor; eauto. + eapply agree_exten with rs2; eauto with asmgen. + { intros. destruct r; try destruct g; try discriminate. + all: rewrite Hrs3; try discriminate; unfold nextblock, incrPC; Simpl. } + intros. discriminate. + + * (* MBcond false *) + assert (f0 = f) by congruence. subst f0. + exploit eval_condition_lessdef. + eapply preg_vals; eauto. + all: eauto. + intros EC. + + exploit transl_cbranch_correct_false; eauto. intros (rs' & jmp & A & B & C). + exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC). + assert (PCeq': rs2 PC = rs' PC). { inv A; auto. erewrite <- exec_straight_pc. 2: eapply H. eauto. } + rewrite PCeq' in PCeq. + exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'. + assert (tf = fn) by congruence. subst tf. + + assert (NOOV: size_blocks fn.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. + + repeat eexists. + rewrite H6. rewrite <- BTC. rewrite extract_basics_to_code. simpl. rewrite app_nil_r. eauto. + rewrite H7. rewrite <- BTC. rewrite extract_ctl_basics_to_code. simpl extract_ctl. rewrite B. eauto. + + econstructor; eauto. + unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. econstructor; eauto. + eapply agree_exten with rs2; eauto with asmgen. + { intros. destruct r; try destruct g; try discriminate. + all: rewrite <- C; try discriminate; unfold nextblock, incrPC; Simpl. } + intros. discriminate. + + (* MBjumptable *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. + monadInv H1. + generalize (transf_function_no_overflow _ _ TRANSF0); intro NOOV. + assert (f1 = f) by congruence. subst f1. + exploit find_label_goto_label. 4: eapply H16. 1-2: eauto. instantiate (2 := (nextblock tbb rs2) # GPR62 <- Vundef # GPR63 <- Vundef). + unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. reflexivity. + exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND3. assert (fn = tf) by congruence. subst fn. + + intros [tc' [rs' [A [B C]]]]. + exploit ireg_val; eauto. rewrite H13. intros LD; inv LD. + + repeat eexists. + rewrite H6. simpl extract_basic. simpl. eauto. + rewrite H7. simpl extract_ctl. simpl. Simpl. rewrite <- H1. unfold Mach.label in H14. unfold label. rewrite H14. eapply A. + econstructor; eauto. + eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen. + { assert (destroyed_by_jumptable = R62 :: R63 :: nil) by auto. rewrite H2 in H0. simpl in H0. inv H0. + destruct (preg_eq r' GPR63). subst. contradiction. + destruct (preg_eq r' GPR62). subst. contradiction. + destruct r'; Simpl. } + discriminate. + + (* MBreturn *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). + exploit exec_straight_body; eauto. + simpl. eauto. + intros EXEB. + assert (f1 = f) by congruence. subst f1. + + repeat eexists. + rewrite H6. simpl extract_basic. eauto. + rewrite H7. simpl extract_ctl. simpl. reflexivity. + econstructor; eauto. + unfold nextblock, incrPC. repeat apply agree_set_other; auto with asmgen. + + - inv MCS. inv MAS. simpl in *. subst. inv Hpstate. + destruct bb' as [hd' bdy' ex']; simpl in *. subst. + monadInv TBC. monadInv TIC. simpl in *. rewrite H5. rewrite H6. + simpl. repeat eexists. + econstructor. 4: instantiate (3 := false). all:eauto. + unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + assert (f = f0) by congruence. subst f0. econstructor; eauto. + generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. eauto. + eapply agree_exten; eauto. intros. Simpl. + discriminate. +Qed. + +Definition mb_remove_first (bb: MB.bblock) := + {| MB.header := MB.header bb; MB.body := tail (MB.body bb); MB.exit := MB.exit bb |}. + +Lemma exec_straight_body: + forall c c' lc rs1 m1 rs2 m2, + exec_straight tge c rs1 m1 c' rs2 m2 -> + code_to_basics c = Some lc -> + exists l ll, + c = l ++ c' + /\ code_to_basics l = Some ll + /\ exec_body tge ll rs1 m1 = Next rs2 m2. +Proof. + induction c; try (intros; inv H; fail). + intros until m2. intros EXES CTB. inv EXES. + - exists (i1 ::g nil),(i1::nil). repeat (split; simpl; auto). rewrite H6. auto. + - inv CTB. destruct (code_to_basics c); try discriminate. inv H0. + eapply IHc in H7; eauto. destruct H7 as (l' & ll & Hc & CTB & EXECB). subst. + exists (i ::g l'),(i::ll). repeat (split; simpl; auto). + rewrite CTB. auto. + rewrite H1. auto. +Qed. + +Lemma basics_to_code_app: + forall c l x ll, + basics_to_code c = l ++ basics_to_code x -> + code_to_basics l = Some ll -> + c = ll ++ x. +Proof. + intros. apply (f_equal code_to_basics) in H. + erewrite code_to_basics_dist in H; eauto. 2: eapply code_to_basics_id. + rewrite code_to_basics_id in H. inv H. auto. +Qed. + +Lemma basics_to_code_app2: + forall i c l x ll, + (PBasic i) :: basics_to_code c = l ++ basics_to_code x -> + code_to_basics l = Some ll -> + i :: c = ll ++ x. +Proof. + intros until ll. intros. + exploit basics_to_code_app. instantiate (3 := (i::c)). simpl. + all: eauto. +Qed. + +(* Handling the individual instructions of theorem (B) in the above diagram. A bit less cumbersome, but still tough *) +Theorem step_simu_basic: + forall bb bb' s fb sp c ms m rs1 m1 ms' m' bi cs1 tbdy bdy, + MB.header bb = nil -> MB.body bb = bi::(bdy) -> (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) -> + bb' = {| MB.header := nil; MB.body := bdy; MB.exit := MB.exit bb |} -> + basic_step ge s fb sp ms m bi ms' m' -> + pstate cs1 = (State rs1 m1) -> pbody1 cs1 = tbdy -> + match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 -> + (exists rs2 m2 l cs2 tbdy', + cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := tbdy'; pbody2 := pbody2 cs1; + pctl := pctl cs1; ep := fp_is_parent (ep cs1) bi; rem := rem cs1; cur := cur cs1 |} + /\ tbdy = l ++ tbdy' + /\ exec_body tge l rs1 m1 = Next rs2 m2 + /\ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2). +Proof. + intros until bdy. intros Hheader Hbody Hnobuiltin (* Hnotempty *) Hbb' BSTEP Hpstate Hpbody1 MCS. inv MCS. + simpl in *. inv Hpstate. + rewrite Hbody in TBC. monadInv TBC. + inv BSTEP. + + - (* MBgetstack *) + simpl in EQ0. + unfold Mach.load_stack in H. + exploit Mem.loadv_extends; eauto. intros [v' [A B]]. + rewrite (sp_val _ _ _ AG) in A. + exploit loadind_correct; eauto with asmgen. + intros (rs2 & EXECS & Hrs'1 & Hrs'2). + eapply exec_straight_body in EXECS. + 2: eapply code_to_basics_id; eauto. + destruct EXECS as (l & Hlbi & BTC & CTB & EXECB). + exists rs2, m1, Hlbi. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + eapply basics_to_code_app; eauto. + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. } + subst. simpl in Hheadereq. + + eapply match_codestate_intro; eauto. + { simpl. simpl in EQ. rewrite <- Hheadereq in EQ. assumption. } + eapply agree_set_mreg; eauto with asmgen. + intro Hep. simpl in Hep. + destruct (andb_prop _ _ Hep). clear Hep. + rewrite <- Hheadereq in DXP. subst. rewrite <- DXP. rewrite Hrs'2. reflexivity. + discriminate. apply preg_of_not_FP; assumption. reflexivity. + + - (* MBsetstack *) + simpl in EQ0. + unfold Mach.store_stack in H. + assert (Val.lessdef (ms src) (rs1 (preg_of src))). { eapply preg_val; eauto. } + exploit Mem.storev_extends; eauto. intros [m2' [A B]]. + exploit storeind_correct; eauto with asmgen. + rewrite (sp_val _ _ _ AG) in A. eauto. intros [rs' [P Q]]. + + eapply exec_straight_body in P. + 2: eapply code_to_basics_id; eauto. + destruct P as (l & ll & TBC & CTB & EXECB). + exists rs', m2', ll. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + eapply basics_to_code_app; eauto. + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + subst. + eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto. + + eapply agree_undef_regs; eauto with asmgen. + simpl; intros. rewrite Q; auto with asmgen. rewrite Hheader in DXP. auto. + - (* MBgetparam *) + simpl in EQ0. + + assert (f0 = f) by congruence; subst f0. + unfold Mach.load_stack in *. + exploit Mem.loadv_extends. eauto. eexact H0. auto. + intros [parent' [A B]]. rewrite (sp_val _ _ _ AG) in A. + exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'. + exploit Mem.loadv_extends. eauto. eexact H1. auto. + intros [v' [C D]]. + + monadInv EQ0. rewrite Hheader. rewrite Hheader in DXP. + destruct ep0 eqn:EPeq. + + (* RTMP contains parent *) + + exploit loadind_correct. eexact EQ1. + instantiate (2 := rs1). rewrite DXP; eauto. + intros [rs2 [P [Q R]]]. + + eapply exec_straight_body in P. + 2: eapply code_to_basics_id; eauto. + destruct P as (l & ll & BTC & CTB & EXECB). + exists rs2, m1, ll. eexists. + eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + { eapply basics_to_code_app; eauto. } + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. } + subst. + eapply match_codestate_intro; eauto. + + eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen. + simpl; intros. rewrite R; auto with asmgen. + apply preg_of_not_FP; auto. + + (* RTMP does not contain parent *) + + rewrite chunk_of_Tptr in A. + exploit loadind_ptr_correct. eexact A. intros [rs2 [P [Q R]]]. + exploit loadind_correct. eexact EQ1. instantiate (2 := rs2). rewrite Q. eauto. + intros [rs3 [S [T U]]]. + + exploit exec_straight_trans. + eapply P. + eapply S. + intros EXES. + + eapply exec_straight_body in EXES. + 2: simpl. 2: erewrite code_to_basics_id; eauto. + destruct EXES as (l & ll & BTC & CTB & EXECB). + exists rs3, m1, ll. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + eapply basics_to_code_app2; eauto. + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. } + subst. + eapply match_codestate_intro; eauto. + eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto. + instantiate (1 := rs2#FP <- (rs3#FP)). intros. + rewrite Pregmap.gso; auto with asmgen. + congruence. + intros. unfold Pregmap.set. destruct (PregEq.eq r' FP). congruence. auto with asmgen. + simpl; intros. rewrite U; auto with asmgen. + apply preg_of_not_FP; auto. + - (* MBop *) + simpl in EQ0. rewrite Hheader in DXP. + + assert (eval_operation tge sp op (map ms args) m' = Some v). + rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. + exploit eval_operation_lessdef. + eapply preg_vals; eauto. + 2: eexact H0. + all: eauto. + intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. + exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. + + eapply exec_straight_body in P. + 2: eapply code_to_basics_id; eauto. + destruct P as (l & ll & TBC & CTB & EXECB). + exists rs2, m1, ll. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + eapply basics_to_code_app; eauto. + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + subst. + eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto. + apply agree_set_undef_mreg with rs1; auto. + apply Val.lessdef_trans with v'; auto. + simpl; intros. destruct (andb_prop _ _ H1); clear H1. + rewrite R; auto. apply preg_of_not_FP; auto. +Local Transparent destroyed_by_op. + destruct op; simpl; auto; congruence. + - (* MBload *) + simpl in EQ0. rewrite Hheader in DXP. + + assert (eval_addressing tge sp addr (map ms args) = Some a). + rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. + exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1. + intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A. + exploit Mem.loadv_extends; eauto. intros [v' [C D]]. + exploit transl_load_correct; eauto. + intros [rs2 [P [Q R]]]. + + eapply exec_straight_body in P. + 2: eapply code_to_basics_id; eauto. + destruct P as (l & ll & TBC & CTB & EXECB). + exists rs2, m1, ll. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + eapply basics_to_code_app; eauto. + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. } + subst. + eapply match_codestate_intro; eauto. simpl. simpl in EQ. + rewrite <- Hheadereq in EQ. assumption. + eapply agree_set_mreg; eauto with asmgen. + intro Hep. simpl in Hep. + destruct (andb_prop _ _ Hep). clear Hep. + subst. rewrite <- DXP. rewrite R; try discriminate. reflexivity. + apply preg_of_not_FP; assumption. reflexivity. + + - (* notrap1 cannot happen *) + simpl in EQ0. unfold transl_load in EQ0. + destruct addr; simpl in H. + all: unfold transl_load_rrrXS, transl_load_rrr, transl_load_rro in EQ0; + monadInv EQ0; unfold transl_memory_access2XS, transl_memory_access2, transl_memory_access in EQ2; + destruct args as [|h0 t0]; try discriminate; + destruct t0 as [|h1 t1]; try discriminate; + destruct t1 as [|h2 t2]; try discriminate. + + - (* MBload notrap2 TODO *) + simpl in EQ0. rewrite Hheader in DXP. + + assert (eval_addressing tge sp addr (map ms args) = Some a). + rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. + exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1. + intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A. + + destruct (Mem.loadv chunk m1 a') as [v' | ] eqn:Hload. + { + exploit transl_load_correct; eauto. + intros [rs2 [P [Q R]]]. + + eapply exec_straight_body in P. + 2: eapply code_to_basics_id; eauto. + destruct P as (l & ll & TBC & CTB & EXECB). + exists rs2, m1, ll. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + eapply basics_to_code_app; eauto. + eapply match_codestate_intro; eauto. simpl. rewrite Hheader in *. + simpl in EQ. assumption. + + eapply agree_set_undef_mreg; eauto. intros; auto with asmgen. + + simpl. intro. + rewrite R; try congruence. + apply DXP. + destruct ep0; simpl in *; congruence. + apply preg_of_not_FP. + destruct ep0; simpl in *; congruence. + } + { + exploit transl_load_correct_notrap2; eauto. + intros [rs2 [P [Q R]]]. + + eapply exec_straight_body in P. + 2: eapply code_to_basics_id; eauto. + destruct P as (l & ll & TBC & CTB & EXECB). + exists rs2, m1, ll. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + eapply basics_to_code_app; eauto. + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. +(* assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. } + rewrite <- Hheadereq. *) subst. + eapply match_codestate_intro; eauto. simpl. rewrite Hheader in *. simpl in EQ. assumption. + + eapply agree_set_undef_mreg; eauto. intros; auto with asmgen. + simpl. intro. + rewrite R; try congruence. + apply DXP. + destruct ep0; simpl in *; congruence. + apply preg_of_not_FP. + destruct ep0; simpl in *; congruence. + } + - (* MBstore *) + simpl in EQ0. rewrite Hheader in DXP. + + assert (eval_addressing tge sp addr (map ms args) = Some a). + rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. + exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1. + intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A. + assert (Val.lessdef (ms src) (rs1 (preg_of src))). eapply preg_val; eauto. + exploit Mem.storev_extends; eauto. intros [m2' [C D]]. + exploit transl_store_correct; eauto. intros [rs2 [P Q]]. + + eapply exec_straight_body in P. + 2: eapply code_to_basics_id; eauto. + destruct P as (l & ll & TBC & CTB & EXECB). + exists rs2, m2', ll. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + eapply basics_to_code_app; eauto. + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. } + subst. + eapply match_codestate_intro; eauto. simpl. simpl in EQ. + rewrite <- Hheadereq in EQ. assumption. + eapply agree_undef_regs; eauto with asmgen. + intro Hep. simpl in Hep. + subst. rewrite <- DXP. rewrite Q; try discriminate. reflexivity. reflexivity. +Qed. + +Lemma exec_body_trans: + forall l l' rs0 m0 rs1 m1 rs2 m2, + exec_body tge l rs0 m0 = Next rs1 m1 -> + exec_body tge l' rs1 m1 = Next rs2 m2 -> + exec_body tge (l++l') rs0 m0 = Next rs2 m2. +Proof. + induction l. + - simpl. congruence. + - intros until m2. intros EXEB1 EXEB2. + inv EXEB1. destruct (exec_basic_instr _) eqn:EBI; try discriminate. + simpl. rewrite EBI. eapply IHl; eauto. +Qed. + +Definition mb_remove_header bb := {| MB.header := nil; MB.body := MB.body bb; MB.exit := MB.exit bb |}. + +Program Definition remove_header tbb := {| header := nil; body := body tbb; exit := exit tbb |}. +Next Obligation. + destruct tbb. simpl. auto. +Qed. + +Inductive exec_header: codestate -> codestate -> Prop := + | exec_header_cons: forall cs1, + exec_header cs1 {| pstate := pstate cs1; pheader := nil; pbody1 := pbody1 cs1; pbody2 := pbody2 cs1; + pctl := pctl cs1; ep := (if pheader cs1 then ep cs1 else false); rem := rem cs1; + cur := cur cs1 |}. + +(* Theorem (A) in the diagram, the easiest of all *) +Theorem step_simu_header: + forall bb s fb sp c ms m rs1 m1 cs1, + pstate cs1 = (State rs1 m1) -> + match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 -> + (exists cs1', + exec_header cs1 cs1' + /\ match_codestate fb (MB.State s fb sp (mb_remove_header bb::c) ms m) cs1'). +Proof. + intros until cs1. intros Hpstate MCS. + eexists. split; eauto. + econstructor; eauto. + inv MCS. simpl in *. inv Hpstate. + econstructor; eauto. +Qed. + +Lemma step_matchasm_header: + forall fb cs1 cs1' s1, + match_asmstate fb cs1 s1 -> + exec_header cs1 cs1' -> + match_asmstate fb cs1' s1. +Proof. + intros until s1. intros MAS EXH. + inv MAS. inv EXH. + simpl. econstructor; eauto. +Qed. + +(* Theorem (B) in the diagram, using step_simu_basic + induction on the Machblock body *) +Theorem step_simu_body: + forall bb s fb sp c ms m rs1 m1 ms' cs1 m', + MB.header bb = nil -> + (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) -> + body_step ge s fb sp (MB.body bb) ms m ms' m' -> + pstate cs1 = (State rs1 m1) -> + match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 -> + (exists rs2 m2 cs2 ep, + cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := nil; pbody2 := pbody2 cs1; + pctl := pctl cs1; ep := ep; rem := rem cs1; cur := cur cs1 |} + /\ exec_body tge (pbody1 cs1) rs1 m1 = Next rs2 m2 + /\ match_codestate fb (MB.State s fb sp ({| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |}::c) ms' m') cs2). +Proof. + intros bb. destruct bb as [hd bdy ex]; simpl; auto. induction bdy as [|bi bdy]. + - intros until m'. intros Hheader Hnobuiltin BSTEP Hpstate MCS. + inv BSTEP. + exists rs1, m1, cs1, (ep cs1). + inv MCS. inv Hpstate. simpl in *. monadInv TBC. repeat (split; simpl; auto). + econstructor; eauto. + - intros until m'. intros Hheader Hnobuiltin BSTEP Hpstate MCS. inv BSTEP. + rename ms' into ms''. rename m' into m''. rename rs' into ms'. rename m'0 into m'. + exploit (step_simu_basic); eauto. simpl. eauto. simpl; auto. simpl; auto. + intros (rs2 & m2 & l & cs2 & tbdy' & Hcs2 & Happ & EXEB & MCS'). + simpl in *. + exploit IHbdy. auto. 2: eapply H6. 3: eapply MCS'. all: eauto. subst; eauto. simpl; auto. + intros (rs3 & m3 & cs3 & ep & Hcs3 & EXEB' & MCS''). + exists rs3, m3, cs3, ep. + repeat (split; simpl; auto). subst. simpl in *. auto. + rewrite Happ. eapply exec_body_trans; eauto. rewrite Hcs2 in EXEB'; simpl in EXEB'. auto. +Qed. + +Lemma exec_body_control: + forall b rs1 m1 rs2 m2 rs3 m3 fn, + exec_body tge (body b) rs1 m1 = Next rs2 m2 -> + exec_control_rel tge fn (exit b) b rs2 m2 rs3 m3 -> + exec_bblock_rel tge fn b rs1 m1 rs3 m3. +Proof. + intros until fn. intros EXEB EXECTL. + econstructor; eauto. inv EXECTL. + unfold exec_bblock. rewrite EXEB. auto. +Qed. + +Definition mbsize (bb: MB.bblock) := (length (MB.body bb) + length_opt (MB.exit bb))%nat. + +Lemma mbsize_eqz: + forall bb, mbsize bb = 0%nat -> MB.body bb = nil /\ MB.exit bb = None. +Proof. + intros. destruct bb as [hd bdy ex]; simpl in *. unfold mbsize in H. + remember (length _) as a. remember (length_opt _) as b. + assert (a = 0%nat) by lia. assert (b = 0%nat) by lia. subst. clear H. + inv H0. inv H1. destruct bdy; destruct ex; auto. + all: try discriminate. +Qed. + +Lemma mbsize_neqz: + forall bb, mbsize bb <> 0%nat -> (MB.body bb <> nil \/ MB.exit bb <> None). +Proof. + intros. destruct bb as [hd bdy ex]; simpl in *. + destruct bdy; destruct ex; try (right; discriminate); try (left; discriminate). + contradict H. unfold mbsize. simpl. auto. +Qed. + +(* Bringing theorems (A), (B) and (C) together, for the case of the absence of builtin instruction *) +(* This more general form is easier to prove, but the actual theorem is step_simulation_bblock further below *) +Lemma step_simulation_bblock': + forall sf f sp bb bb' bb'' rs m rs' m' s'' c S1, + bb' = mb_remove_header bb -> + body_step ge sf f sp (Machblock.body bb') rs m rs' m' -> + bb'' = mb_remove_body bb' -> + (forall ef args res, MB.exit bb'' <> Some (MBbuiltin ef args res)) -> + exit_step return_address_offset ge (Machblock.exit bb'') (Machblock.State sf f sp (bb'' :: c) rs' m') E0 s'' -> + match_states (Machblock.State sf f sp (bb :: c) rs m) S1 -> + exists S2 : state, plus step tge S1 E0 S2 /\ match_states s'' S2. +Proof. + intros until S1. intros Hbb' BSTEP Hbb'' Hbuiltin ESTEP MS. + destruct (mbsize bb) eqn:SIZE. + - apply mbsize_eqz in SIZE. destruct SIZE as (Hbody & Hexit). + destruct bb as [hd bdy ex]; simpl in *; subst. + inv MS. inv AT. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. rename tc' into tc. + monadInv H2. simpl in *. inv ESTEP. inv BSTEP. + eexists. split. eapply plus_one. + exploit functions_translated; eauto. intros (tf0 & FIND' & TRANSF'). monadInv TRANSF'. + assert (x = tf) by congruence. subst x. + eapply exec_step_internal; eauto. eapply find_bblock_tail; eauto. + unfold exec_bblock. simpl. eauto. + econstructor. eauto. eauto. eauto. + unfold nextblock, incrPC. Simpl. unfold Val.offset_ptr. rewrite <- H. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + econstructor; eauto. + generalize (code_tail_next_int _ _ _ _ NOOV H3). intro CT1. eauto. + eapply agree_exten; eauto. intros. Simpl. + intros. discriminate. + - subst. exploit mbsize_neqz. { instantiate (1 := bb). rewrite SIZE. discriminate. } + intros Hnotempty. + + (* initial setting *) + exploit match_state_codestate. + 2: eapply Hnotempty. + all: eauto. + intros (cs1 & fb & f0 & tbb & tc & ep & MCS & MAS & FIND & TLBS & Hbody & Hexit & Hcur & Hrem & Hpstate). + + (* step_simu_header part *) + assert (exists rs1 m1, pstate cs1 = State rs1 m1). { inv MAS. simpl. eauto. } + destruct H as (rs1 & m1 & Hpstate2). subst. + assert (f = fb). { inv MCS. auto. } subst fb. + exploit step_simu_header. + 2: eapply MCS. + all: eauto. + intros (cs1' & EXEH & MCS2). + + (* step_simu_body part *) + assert (Hpstate': pstate cs1' = pstate cs1). { inv EXEH; auto. } + exploit step_simu_body. + 3: eapply BSTEP. + 4: eapply MCS2. + all: eauto. rewrite Hpstate'. eauto. + intros (rs2 & m2 & cs2 & ep' & Hcs2 & EXEB & MCS'). + + (* step_simu_control part *) + assert (exists tf, Genv.find_funct_ptr tge f = Some (Internal tf)). + { exploit functions_translated; eauto. intros (tf & FIND' & TRANSF'). monadInv TRANSF'. eauto. } + destruct H as (tf & FIND'). + assert (exists tex, pbody2 cs1 = extract_basic tex /\ pctl cs1 = extract_ctl tex). + { inv MAS. simpl in *. eauto. } + destruct H as (tex & Hpbody2 & Hpctl). + inv EXEH. simpl in *. + subst. exploit step_simu_control. + 9: eapply MCS'. all: simpl. + 10: eapply ESTEP. + all: simpl; eauto. + rewrite Hpbody2. rewrite Hpctl. + { inv MAS; simpl in *. inv Hpstate2. eapply match_asmstate_some; eauto. + erewrite exec_body_pc; eauto. } + intros (rs3 & m3 & rs4 & m4 & EXEB' & EXECTL' & MS'). + + (* bringing the pieces together *) + exploit exec_body_trans. + eapply EXEB. + eauto. + intros EXEB2. + exploit exec_body_control; eauto. + rewrite <- Hpbody2 in EXEB2. rewrite <- Hbody in EXEB2. eauto. + rewrite Hexit. rewrite Hpctl. eauto. + intros EXECB. inv EXECB. + exists (State rs4 m4). + split; auto. eapply plus_one. rewrite Hpstate2. + assert (exists ofs, rs1 PC = Vptr f ofs). + { rewrite Hpstate2 in MAS. inv MAS. simpl in *. eauto. } + destruct H0 as (ofs & Hrs1pc). + eapply exec_step_internal; eauto. + + (* proving the initial find_bblock *) + rewrite Hpstate2 in MAS. inv MAS. simpl in *. + assert (f1 = f0) by congruence. subst f0. + rewrite PCeq in Hrs1pc. inv Hrs1pc. + exploit functions_translated; eauto. intros (tf1 & FIND'' & TRANS''). rewrite FIND' in FIND''. + inv FIND''. monadInv TRANS''. rewrite TRANSF0 in EQ. inv EQ. + eapply find_bblock_tail; eauto. +Qed. + +Theorem step_simulation_bblock: + forall sf f sp bb ms m ms' m' S2 c, + body_step ge sf f sp (Machblock.body bb) ms m ms' m' -> + (forall ef args res, MB.exit bb <> Some (MBbuiltin ef args res)) -> + exit_step return_address_offset ge (Machblock.exit bb) (Machblock.State sf f sp (bb :: c) ms' m') E0 S2 -> + forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' -> + exists S2' : state, plus step tge S1' E0 S2' /\ match_states S2 S2'. +Proof. + intros until c. intros BSTEP Hbuiltin ESTEP S1' MS. + eapply step_simulation_bblock'; eauto. + all: destruct bb as [hd bdy ex]; simpl in *; eauto. + inv ESTEP. + - econstructor. inv H; try (econstructor; eauto; fail). + - econstructor. +Qed. + +(** Dealing now with the builtin case *) + +Definition split (c: MB.code) := + match c with + | nil => nil + | bb::c => {| MB.header := MB.header bb; MB.body := MB.body bb; MB.exit := None |} + :: {| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |} :: c + end. + +Lemma cons_ok_eq3 {A: Type} : + forall (x:A) y z x' y' z', + x = x' -> y = y' -> z = z' -> + OK (x::y::z) = OK (x'::y'::z'). +Proof. + intros. subst. auto. +Qed. + +Lemma transl_blocks_split_builtin: + forall bb c ep f ef args res, + MB.exit bb = Some (MBbuiltin ef args res) -> MB.body bb <> nil -> + transl_blocks f (split (bb::c)) ep = transl_blocks f (bb::c) ep. +Proof. + intros until res. intros Hexit Hbody. simpl split. + unfold transl_blocks. fold transl_blocks. unfold transl_block. + simpl. remember (transl_basic_code _ _ _) as tbc. remember (transl_instr_control _ _) as tbi. + remember (transl_blocks _ _ _) as tlbs. + destruct tbc; destruct tbi; destruct tlbs. + all: try simpl; auto. + - simpl. rewrite Hexit in Heqtbi. simpl in Heqtbi. monadInv Heqtbi. simpl. + unfold gen_bblocks. simpl. destruct l. + + exploit transl_basic_code_nonil; eauto. intro. destruct H. + + simpl. rewrite app_nil_r. apply cons_ok_eq3. all: try eapply bblock_equality. all: simpl; auto. +Qed. + +Lemma transl_code_at_pc_split_builtin: + forall rs f f0 bb c ep tf tc ef args res, + MB.body bb <> nil -> MB.exit bb = Some (MBbuiltin ef args res) -> + transl_code_at_pc ge (rs PC) f f0 (bb :: c) ep tf tc -> + transl_code_at_pc ge (rs PC) f f0 (split (bb :: c)) ep tf tc. +Proof. + intros until res. intros Hbody Hexit AT. inv AT. + econstructor; eauto. erewrite transl_blocks_split_builtin; eauto. +Qed. + +Theorem match_states_split_builtin: + forall sf f sp bb c rs m ef args res S1, + MB.body bb <> nil -> MB.exit bb = Some (MBbuiltin ef args res) -> + match_states (Machblock.State sf f sp (bb :: c) rs m) S1 -> + match_states (Machblock.State sf f sp (split (bb::c)) rs m) S1. +Proof. + intros until S1. intros Hbody Hexit MS. + inv MS. + econstructor; eauto. + eapply transl_code_at_pc_split_builtin; eauto. +Qed. + +Theorem step_simulation_builtin: + forall ef args res bb sf f sp c ms m t S2, + MB.body bb = nil -> MB.exit bb = Some (MBbuiltin ef args res) -> + exit_step return_address_offset ge (MB.exit bb) (Machblock.State sf f sp (bb :: c) ms m) t S2 -> + forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' -> + exists S2' : state, plus step tge S1' t S2' /\ match_states S2 S2'. +Proof. + intros until S2. intros Hbody Hexit ESTEP S1' MS. + inv MS. inv AT. monadInv H2. monadInv EQ. + rewrite Hbody in EQ0. monadInv EQ0. + rewrite Hexit in EQ. monadInv EQ. + rewrite Hexit in ESTEP. inv ESTEP. inv H4. + + exploit functions_transl; eauto. intro FN. + generalize (transf_function_no_overflow _ _ H1); intro NOOV. + exploit builtin_args_match; eauto. intros [vargs' [P Q]]. + exploit external_call_mem_extends; eauto. + intros [vres' [m2' [A [B [C D]]]]]. + econstructor; split. apply plus_one. + simpl in H3. + eapply exec_step_builtin. eauto. eauto. + eapply find_bblock_tail; eauto. + simpl. eauto. + erewrite <- sp_val by eauto. + eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. + eauto. + econstructor; eauto. + instantiate (2 := tf); instantiate (1 := x0). + unfold nextblock, incrPC. rewrite Pregmap.gss. + rewrite set_res_other. rewrite undef_regs_other_2. rewrite Pregmap.gso by congruence. + rewrite <- H. simpl. econstructor; eauto. + eapply code_tail_next_int; eauto. + rewrite preg_notin_charact. intros. auto with asmgen. + auto with asmgen. + apply agree_nextblock. eapply agree_set_res; auto. + eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2; auto. + apply Pregmap.gso; auto with asmgen. + congruence. +Qed. + +Lemma next_sep: + forall rs m rs' m', rs = rs' -> m = m' -> Next rs m = Next rs' m'. +Proof. + congruence. +Qed. + +(* Measure to prove finite stuttering, see the other backends *) +Definition measure (s: MB.state) : nat := + match s with + | MB.State _ _ _ _ _ _ => 0%nat + | MB.Callstate _ _ _ _ => 0%nat + | MB.Returnstate _ _ _ => 1%nat + end. + +(* The actual MB.step/AB.step simulation, using the above theorems, plus extra proofs + for the internal and external function cases *) +Theorem step_simulation: + forall S1 t S2, MB.step return_address_offset ge S1 t S2 -> + forall S1' (MS: match_states S1 S1'), + (exists S2', plus step tge S1' t S2' /\ match_states S2 S2') + \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat. +Proof. + induction 1; intros. + +- (* bblock *) + left. destruct (Machblock.exit bb) eqn:MBE; try destruct c0. + all: try(inversion H0; subst; inv H2; eapply step_simulation_bblock; + try (rewrite MBE; try discriminate); eauto). + + (* MBbuiltin *) + destruct (MB.body bb) eqn:MBB. + * inv H. eapply step_simulation_builtin; eauto. rewrite MBE. eauto. + * eapply match_states_split_builtin in MS; eauto. + 2: rewrite MBB; discriminate. + simpl split in MS. + rewrite <- MBB in H. + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb1. + assert (MB.body bb = MB.body bb1). { subst. simpl. auto. } + rewrite H1 in H. subst. + exploit step_simulation_bblock. eapply H. + discriminate. + simpl. constructor. + eauto. + intros (S2' & PLUS1 & MS'). + rewrite MBE in MS'. + assert (exit_step return_address_offset ge (Some (MBbuiltin e l b)) + (MB.State sf f sp ({| MB.header := nil; MB.body := nil; MB.exit := Some (MBbuiltin e l b) |}::c) + rs' m') t s'). + { inv H0. inv H3. econstructor. econstructor; eauto. } + exploit step_simulation_builtin. + 4: eapply MS'. + all: simpl; eauto. + intros (S3' & PLUS'' & MS''). + exists S3'. split; eauto. + eapply plus_trans. eapply PLUS1. eapply PLUS''. eauto. + + inversion H0. subst. eapply step_simulation_bblock; try (rewrite MBE; try discriminate); eauto. + +- (* internal function *) + inv MS. + exploit functions_translated; eauto. intros [tf [A B]]. monadInv B. + generalize EQ; intros EQ'. monadInv EQ'. + destruct (zlt Ptrofs.max_unsigned (size_blocks x0.(fn_blocks))); inversion EQ1. clear EQ1. subst x0. + unfold Mach.store_stack in *. + exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. + intros [m1' [C D]]. + exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto. + intros [m2' [F G]]. + simpl chunk_of_type in F. + exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto. + intros [m3' [P Q]]. + (* Execution of function prologue *) + monadInv EQ0. + set (tfbody := make_prologue f x0) in *. + set (tf := {| fn_sig := MB.fn_sig f; fn_blocks := tfbody |}) in *. + set (rs2 := rs0#FP <- (parent_sp s) #SP <- sp #RTMP <- Vundef). + exploit (Pget_correct tge GPRA RA nil rs2 m2'); auto. + intros (rs' & U' & V'). + exploit (storeind_ptr_correct tge SP (fn_retaddr_ofs f) GPRA nil rs' m2'). + { rewrite chunk_of_Tptr in P. + assert (rs' GPRA = rs0 RA). { apply V'. } + assert (rs' SP = rs2 SP). { apply V'; discriminate. } + rewrite H4. rewrite H3. + rewrite ATLR. + change (rs2 SP) with sp. eexact P. } + intros (rs3 & U & V). + assert (EXEC_PROLOGUE: exists rs3', + exec_straight_blocks tge tf + tf.(fn_blocks) rs0 m' + x0 rs3' m3' + /\ forall r, r <> PC -> rs3' r = rs3 r). + { eexists. split. + - change (fn_blocks tf) with tfbody; unfold tfbody. + econstructor; eauto. unfold exec_bblock. simpl exec_body. + rewrite C. fold sp. rewrite <- (sp_val _ _ _ AG). rewrite chunk_of_Tptr in F. simpl in F. rewrite F. + Simpl. unfold parexec_store_offset. rewrite Ptrofs.of_int64_to_int64. unfold eval_offset. + rewrite chunk_of_Tptr in P. Simpl. rewrite ATLR. unfold Mptr in P. assert (Archi.ptr64 = true) by auto. 2: auto. rewrite H3 in P. rewrite P. + simpl. apply next_sep; eauto. reflexivity. + - intros. destruct V' as (V'' & V'). destruct r. + + Simpl. + destruct (gpreg_eq g0 GPR16). { subst. Simpl. rewrite V; try discriminate. rewrite V''. subst rs2. Simpl. } + destruct (gpreg_eq g0 GPR32). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. } + destruct (gpreg_eq g0 GPR12). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. } + destruct (gpreg_eq g0 GPR17). { subst. Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. } + Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. { destruct g0; try discriminate. contradiction. } + + Simpl. rewrite V; try discriminate. rewrite V'; try discriminate. subst rs2. Simpl. + + contradiction. + } destruct EXEC_PROLOGUE as (rs3' & EXEC_PROLOGUE & Heqrs3'). + exploit exec_straight_steps_2; eauto using functions_transl. + simpl fn_blocks. simpl fn_blocks in g. lia. constructor. + intros (ofs' & X & Y). + left; exists (State rs3' m3'); split. + eapply exec_straight_steps_1; eauto. + simpl fn_blocks. simpl fn_blocks in g. lia. + constructor. + econstructor; eauto. + rewrite X; econstructor; eauto. + apply agree_exten with rs2; eauto with asmgen. + unfold rs2. + apply agree_set_other; auto with asmgen. + apply agree_change_sp with (parent_sp s). + apply agree_undef_regs with rs0. auto. +Local Transparent destroyed_at_function_entry. + simpl; intros; Simpl. + unfold sp; congruence. + + intros. + assert (r <> RTMP). { contradict H3; rewrite H3; unfold data_preg; auto. } + rewrite Heqrs3'. Simpl. rewrite V. inversion V'. rewrite H6. auto. + assert (r <> GPRA). { contradict H3; rewrite H3; unfold data_preg; auto. } + assert (forall r : preg, r <> PC -> r <> GPRA -> rs' r = rs2 r). { apply V'. } + contradict H3; rewrite H3; unfold data_preg; auto. + contradict H3; rewrite H3; unfold data_preg; auto. + contradict H3; rewrite H3; unfold data_preg; auto. + contradict H3; rewrite H3; unfold data_preg; auto. + intros. rewrite Heqrs3'. rewrite V by auto with asmgen. + assert (forall r : preg, r <> PC -> r <> GPRA -> rs' r = rs2 r). { apply V'. } + rewrite H4 by auto with asmgen. reflexivity. discriminate. + +- (* external function *) + inv MS. + exploit functions_translated; eauto. + intros [tf [A B]]. simpl in B. inv B. + exploit extcall_arguments_match; eauto. + intros [args' [C D]]. + exploit external_call_mem_extends; eauto. + intros [res' [m2' [P [Q [R S]]]]]. + left; econstructor; split. + apply plus_one. eapply exec_step_external; eauto. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. + econstructor; eauto. + unfold loc_external_result. + apply agree_set_other; auto. + apply agree_set_pair; auto. + apply agree_undef_caller_save_regs; auto. + +- (* return *) + inv MS. + inv STACKS. simpl in *. + right. split. lia. split. auto. + rewrite <- ATPC in H5. + econstructor; eauto. congruence. +Qed. + +Lemma transf_initial_states: + forall st1, MB.initial_state prog st1 -> + exists st2, AB.initial_state tprog st2 /\ match_states st1 st2. +Proof. + intros. inversion H. unfold ge0 in *. + econstructor; split. + econstructor. + eapply (Genv.init_mem_transf_partial TRANSF); eauto. + replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero) + with (Vptr fb Ptrofs.zero). + econstructor; eauto. + constructor. + apply Mem.extends_refl. + split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence. + intros. rewrite Mach.Regmap.gi. auto. + unfold Genv.symbol_address. + rewrite (match_program_main TRANSF). + rewrite symbols_preserved. + unfold ge; rewrite H1. auto. +Qed. + +Lemma transf_final_states: + forall st1 st2 r, + match_states st1 st2 -> MB.final_state st1 r -> AB.final_state st2 r. +Proof. + intros. inv H0. inv H. constructor. assumption. + compute in H1. inv H1. + generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto. +Qed. + +Definition return_address_offset : Machblock.function -> Machblock.code -> ptrofs -> Prop := + Asmblockgenproof0.return_address_offset. + +Theorem transf_program_correct: + forward_simulation (MB.semantics return_address_offset prog) (Asmblock.semantics tprog). +Proof. + eapply forward_simulation_star with (measure := measure). + - apply senv_preserved. + - eexact transf_initial_states. + - eexact transf_final_states. + - exact step_simulation. +Qed. + +End PRESERVATION. diff --git a/kvx/Asmblockgenproof0.v b/kvx/Asmblockgenproof0.v new file mode 100644 index 00000000..83b574e7 --- /dev/null +++ b/kvx/Asmblockgenproof0.v @@ -0,0 +1,982 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** * "block" version of Asmgenproof0 + + This module is largely adapted from Asmgenproof0.v of the other backends + It needs to stand apart because of the block structure, and the distinction control/basic that there isn't in the other backends + It has similar definitions than Asmgenproof0, but adapted to this new structure *) + +Require Import Coqlib. +Require Intv. +Require Import AST. +Require Import Errors. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Memory. +Require Import Globalenvs. +Require Import Events. +Require Import Smallstep. +Require Import Locations. +Require Import Machblock. +Require Import Asmblock. +Require Import Asmblockgen. +Require Import Conventions1. +Require Import Axioms. +Require Import Asmblockprops. +Require Import Lia. + +Module MB:=Machblock. +Module AB:=Asmblock. + +Lemma ireg_of_eq: + forall r r', ireg_of r = OK r' -> preg_of r = IR r'. +Proof. + unfold ireg_of; intros. destruct (preg_of r); inv H; auto. +Qed. + +Lemma freg_of_eq: + forall r r', freg_of r = OK r' -> preg_of r = IR r'. +Proof. + unfold freg_of; intros. destruct (preg_of r); inv H; auto. +Qed. + +Lemma preg_of_injective: + forall r1 r2, preg_of r1 = preg_of r2 -> r1 = r2. +Proof. + destruct r1; destruct r2; simpl; intros; reflexivity || discriminate. +Qed. + +Lemma undef_regs_other: + forall r rl rs, + (forall r', In r' rl -> r <> r') -> + undef_regs rl rs r = rs r. +Proof. + induction rl; simpl; intros. auto. + rewrite IHrl by auto. rewrite Pregmap.gso; auto. +Qed. + +Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop := + match rl with + | nil => True + | r1 :: nil => r <> preg_of r1 + | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl + end. + +Remark preg_notin_charact: + forall r rl, + preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr). +Proof. + induction rl; simpl; intros. + tauto. + destruct rl. + simpl. split. intros. intuition congruence. auto. + rewrite IHrl. split. + intros [A B]. intros. destruct H. congruence. auto. + auto. +Qed. + +Lemma undef_regs_other_2: + forall r rl rs, + preg_notin r rl -> + undef_regs (map preg_of rl) rs r = rs r. +Proof. + intros. apply undef_regs_other. intros. + exploit list_in_map_inv; eauto. intros [mr [A B]]. subst. + rewrite preg_notin_charact in H. auto. +Qed. + +(** * Agreement between Mach registers and processor registers *) + +Record agree (ms: Mach.regset) (sp: val) (rs: AB.regset) : Prop := mkagree { + agree_sp: rs#SP = sp; + agree_sp_def: sp <> Vundef; + agree_mregs: forall r: mreg, Val.lessdef (ms r) (rs#(preg_of r)) +}. + +Lemma preg_val: + forall ms sp rs r, agree ms sp rs -> Val.lessdef (ms r) rs#(preg_of r). +Proof. + intros. destruct H. auto. +Qed. + +Lemma preg_vals: + forall ms sp rs, agree ms sp rs -> + forall l, Val.lessdef_list (map ms l) (map rs (map preg_of l)). +Proof. + induction l; simpl. constructor. constructor. eapply preg_val; eauto. auto. +Qed. + +Lemma sp_val: + forall ms sp rs, agree ms sp rs -> sp = rs#SP. +Proof. + intros. destruct H; auto. +Qed. + +Lemma ireg_val: + forall ms sp rs r r', + agree ms sp rs -> + ireg_of r = OK r' -> + Val.lessdef (ms r) rs#r'. +Proof. + intros. rewrite <- (ireg_of_eq _ _ H0). eapply preg_val; eauto. +Qed. + +Lemma freg_val: + forall ms sp rs r r', + agree ms sp rs -> + freg_of r = OK r' -> + Val.lessdef (ms r) (rs#r'). +Proof. + intros. rewrite <- (freg_of_eq _ _ H0). eapply preg_val; eauto. +Qed. + +Lemma agree_exten: + forall ms sp rs rs', + agree ms sp rs -> + (forall r, data_preg r = true -> rs'#r = rs#r) -> + agree ms sp rs'. +Proof. + intros. destruct H. split; auto. + rewrite H0; auto. auto. + intros. rewrite H0; auto. apply preg_of_data. +Qed. + +(** Preservation of register agreement under various assignments. *) + +Lemma agree_set_mreg: + forall ms sp rs r v rs', + agree ms sp rs -> + Val.lessdef v (rs'#(preg_of r)) -> + (forall r', data_preg r' = true -> r' <> preg_of r -> rs'#r' = rs#r') -> + agree (Mach.Regmap.set r v ms) sp rs'. +Proof. + intros. destruct H. split; auto. + rewrite H1; auto. apply not_eq_sym. apply preg_of_not_SP. + intros. unfold Mach.Regmap.set. destruct (Mach.RegEq.eq r0 r). congruence. + rewrite H1. auto. apply preg_of_data. + red; intros; elim n. eapply preg_of_injective; eauto. +Qed. + +Corollary agree_set_mreg_parallel: + forall ms sp rs r v v', + agree ms sp rs -> + Val.lessdef v v' -> + agree (Mach.Regmap.set r v ms) sp (Pregmap.set (preg_of r) v' rs). +Proof. + intros. eapply agree_set_mreg; eauto. rewrite Pregmap.gss; auto. intros; apply Pregmap.gso; auto. +Qed. + +Lemma agree_set_other: + forall ms sp rs r v, + agree ms sp rs -> + data_preg r = false -> + agree ms sp (rs#r <- v). +Proof. + intros. apply agree_exten with rs. auto. + intros. apply Pregmap.gso. congruence. +Qed. + +Lemma agree_nextblock: + forall ms sp rs b, + agree ms sp rs -> agree ms sp (nextblock b rs). +Proof. + intros. unfold nextblock. apply agree_set_other. auto. auto. +Qed. + +Lemma agree_set_pair: + forall sp p v v' ms rs, + agree ms sp rs -> + Val.lessdef v v' -> + agree (Mach.set_pair p v ms) sp (set_pair (map_rpair preg_of p) v' rs). +Proof. + intros. destruct p; simpl. +- apply agree_set_mreg_parallel; auto. +- apply agree_set_mreg_parallel. apply agree_set_mreg_parallel; auto. + apply Val.hiword_lessdef; auto. apply Val.loword_lessdef; auto. +Qed. + +Lemma agree_undef_nondata_regs: + forall ms sp rl rs, + agree ms sp rs -> + (forall r, In r rl -> data_preg r = false) -> + agree ms sp (undef_regs rl rs). +Proof. + induction rl; simpl; intros. auto. + apply IHrl. apply agree_exten with rs; auto. + intros. apply Pregmap.gso. red; intros; subst. + assert (data_preg a = false) by auto. congruence. + intros. apply H0; auto. +Qed. + +Lemma agree_undef_regs: + forall ms sp rl rs rs', + agree ms sp rs -> + (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') -> + agree (Mach.undef_regs rl ms) sp rs'. +Proof. + intros. destruct H. split; auto. + rewrite <- agree_sp0. apply H0; auto. + rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP. + intros. destruct (In_dec mreg_eq r rl). + rewrite Mach.undef_regs_same; auto. + rewrite Mach.undef_regs_other; auto. rewrite H0; auto. + apply preg_of_data. + rewrite preg_notin_charact. intros; red; intros. elim n. + exploit preg_of_injective; eauto. congruence. +Qed. + +Lemma agree_set_undef_mreg: + forall ms sp rs r v rl rs', + agree ms sp rs -> + Val.lessdef v (rs'#(preg_of r)) -> + (forall r', data_preg r' = true -> r' <> preg_of r -> preg_notin r' rl -> rs'#r' = rs#r') -> + agree (Mach.Regmap.set r v (Mach.undef_regs rl ms)) sp rs'. +Proof. + intros. apply agree_set_mreg with (rs'#(preg_of r) <- (rs#(preg_of r))); auto. + apply agree_undef_regs with rs; auto. + intros. unfold Pregmap.set. destruct (PregEq.eq r' (preg_of r)). + congruence. auto. + intros. rewrite Pregmap.gso; auto. +Qed. + +Lemma agree_undef_caller_save_regs: + forall ms sp rs, + agree ms sp rs -> + agree (Mach.undef_caller_save_regs ms) sp (undef_caller_save_regs rs). +Proof. + intros. destruct H. unfold Mach.undef_caller_save_regs, undef_caller_save_regs; split. +- unfold proj_sumbool; rewrite dec_eq_true. auto. +- auto. +- intros. unfold proj_sumbool. rewrite dec_eq_false by (apply preg_of_not_SP). + destruct (List.in_dec preg_eq (preg_of r) (List.map preg_of (List.filter is_callee_save all_mregs))); simpl. ++ apply list_in_map_inv in i. destruct i as (mr & A & B). + assert (r = mr) by (apply preg_of_injective; auto). subst mr; clear A. + apply List.filter_In in B. destruct B as [C D]. rewrite D. auto. ++ destruct (is_callee_save r) eqn:CS; auto. + elim n. apply List.in_map. apply List.filter_In. auto using all_mregs_complete. +Qed. + +Lemma agree_change_sp: + forall ms sp rs sp', + agree ms sp rs -> sp' <> Vundef -> + agree ms sp' (rs#SP <- sp'). +Proof. + intros. inv H. split; auto. + intros. rewrite Pregmap.gso; auto with asmgen. +Qed. + +(** Connection between Mach and Asm calling conventions for external + functions. *) + +Lemma extcall_arg_match: + forall ms sp rs m m' l v, + agree ms sp rs -> + Mem.extends m m' -> + Mach.extcall_arg ms m sp l v -> + exists v', AB.extcall_arg rs m' l v' /\ Val.lessdef v v'. +Proof. + intros. inv H1. + exists (rs#(preg_of r)); split. constructor. eapply preg_val; eauto. + unfold Mach.load_stack in H2. + exploit Mem.loadv_extends; eauto. intros [v' [A B]]. + rewrite (sp_val _ _ _ H) in A. + exists v'; split; auto. + econstructor. eauto. assumption. +Qed. + +Lemma extcall_arg_pair_match: + forall ms sp rs m m' p v, + agree ms sp rs -> + Mem.extends m m' -> + Mach.extcall_arg_pair ms m sp p v -> + exists v', AB.extcall_arg_pair rs m' p v' /\ Val.lessdef v v'. +Proof. + intros. inv H1. +- exploit extcall_arg_match; eauto. intros (v' & A & B). exists v'; split; auto. constructor; auto. +- exploit extcall_arg_match. eauto. eauto. eexact H2. intros (v1 & A1 & B1). + exploit extcall_arg_match. eauto. eauto. eexact H3. intros (v2 & A2 & B2). + exists (Val.longofwords v1 v2); split. constructor; auto. apply Val.longofwords_lessdef; auto. +Qed. + + +Lemma extcall_args_match: + forall ms sp rs m m', agree ms sp rs -> Mem.extends m m' -> + forall ll vl, + list_forall2 (Mach.extcall_arg_pair ms m sp) ll vl -> + exists vl', list_forall2 (AB.extcall_arg_pair rs m') ll vl' /\ Val.lessdef_list vl vl'. +Proof. + induction 3; intros. + exists (@nil val); split. constructor. constructor. + exploit extcall_arg_pair_match; eauto. intros [v1' [A B]]. + destruct IHlist_forall2 as [vl' [C D]]. + exists (v1' :: vl'); split; constructor; auto. +Qed. + +Lemma extcall_arguments_match: + forall ms m m' sp rs sg args, + agree ms sp rs -> Mem.extends m m' -> + Mach.extcall_arguments ms m sp sg args -> + exists args', AB.extcall_arguments rs m' sg args' /\ Val.lessdef_list args args'. +Proof. + unfold Mach.extcall_arguments, AB.extcall_arguments; intros. + eapply extcall_args_match; eauto. +Qed. + +Remark builtin_arg_match: + forall ge (rs: regset) sp m a v, + eval_builtin_arg ge (fun r => rs (preg_of r)) sp m a v -> + eval_builtin_arg ge rs sp m (map_builtin_arg preg_of a) v. +Proof. + induction 1; simpl; eauto with barg. +Qed. + +Lemma builtin_args_match: + forall ge ms sp rs m m', agree ms sp rs -> Mem.extends m m' -> + forall al vl, eval_builtin_args ge ms sp m al vl -> + exists vl', eval_builtin_args ge rs sp m' (map (map_builtin_arg preg_of) al) vl' + /\ Val.lessdef_list vl vl'. +Proof. + induction 3; intros; simpl. + exists (@nil val); split; constructor. + exploit (@eval_builtin_arg_lessdef _ ge ms (fun r => rs (preg_of r))); eauto. + intros; eapply preg_val; eauto. + intros (v1' & A & B). + destruct IHlist_forall2 as [vl' [C D]]. + exists (v1' :: vl'); split; constructor; auto. apply builtin_arg_match; auto. +Qed. + +Lemma agree_set_res: + forall res ms sp rs v v', + agree ms sp rs -> + Val.lessdef v v' -> + agree (Mach.set_res res v ms) sp (AB.set_res (map_builtin_res preg_of res) v' rs). +Proof. + induction res; simpl; intros. +- eapply agree_set_mreg; eauto. rewrite Pregmap.gss. auto. + intros. apply Pregmap.gso; auto. +- auto. +- apply IHres2. apply IHres1. auto. + apply Val.hiword_lessdef; auto. + apply Val.loword_lessdef; auto. +Qed. + +Lemma set_res_other: + forall r res v rs, + data_preg r = false -> + set_res (map_builtin_res preg_of res) v rs r = rs r. +Proof. + induction res; simpl; intros. +- apply Pregmap.gso. red; intros; subst r. rewrite preg_of_data in H; discriminate. +- auto. +- rewrite IHres2, IHres1; auto. +Qed. + +(* inspired from Mach *) + +Lemma find_label_tail: + forall lbl c c', MB.find_label lbl c = Some c' -> is_tail c' c. +Proof. + induction c; simpl; intros. discriminate. + destruct (MB.is_label lbl a). inv H. auto with coqlib. eauto with coqlib. +Qed. + +(* inspired from Asmgenproof0 *) + +(* ... skip ... *) + +(** The ``code tail'' of an instruction list [c] is the list of instructions + starting at PC [pos]. *) + +Inductive code_tail: Z -> bblocks -> bblocks -> Prop := + | code_tail_0: forall c, + code_tail 0 c c + | code_tail_S: forall pos bi c1 c2, + code_tail pos c1 c2 -> + code_tail (pos + (size bi)) (bi :: c1) c2. + +Lemma code_tail_pos: + forall pos c1 c2, code_tail pos c1 c2 -> pos >= 0. +Proof. + induction 1. lia. generalize (size_positive bi); intros; lia. +Qed. + +Lemma find_bblock_tail: + forall c1 bi c2 pos, + code_tail pos c1 (bi :: c2) -> + find_bblock pos c1 = Some bi. +Proof. + induction c1; simpl; intros. + inversion H. + destruct (zlt pos 0). generalize (code_tail_pos _ _ _ H); intro; lia. + destruct (zeq pos 0). subst pos. + inv H. auto. generalize (size_positive a) (code_tail_pos _ _ _ H4). intro; lia. + inv H. congruence. replace (pos0 + size a - size a) with pos0 by lia. + eauto. +Qed. + + +Local Hint Resolve code_tail_0 code_tail_S: core. + +Lemma code_tail_next: + forall fn ofs c0, + code_tail ofs fn c0 -> + forall bi c1, c0 = bi :: c1 -> code_tail (ofs + (size bi)) fn c1. +Proof. + induction 1; intros. + - subst; eauto. + - replace (pos + size bi + size bi0) with ((pos + size bi0) + size bi); eauto. + lia. +Qed. + +Lemma size_blocks_pos c: 0 <= size_blocks c. +Proof. + induction c as [| a l ]; simpl; try lia. + generalize (size_positive a); lia. +Qed. + +Remark code_tail_positive: + forall fn ofs c, + code_tail ofs fn c -> 0 <= ofs. +Proof. + induction 1; intros; simpl. + - lia. + - generalize (size_positive bi). lia. +Qed. + +Remark code_tail_size: + forall fn ofs c, + code_tail ofs fn c -> size_blocks fn = ofs + size_blocks c. +Proof. + induction 1; intros; simpl; try lia. +Qed. + +Remark code_tail_bounds fn ofs c: + code_tail ofs fn c -> 0 <= ofs <= size_blocks fn. +Proof. + intro H; + exploit code_tail_size; eauto. + generalize (code_tail_positive _ _ _ H), (size_blocks_pos c). + lia. +Qed. + +Local Hint Resolve code_tail_next: core. + +Lemma code_tail_next_int: + forall fn ofs bi c, + size_blocks fn <= Ptrofs.max_unsigned -> + code_tail (Ptrofs.unsigned ofs) fn (bi :: c) -> + code_tail (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bi)))) fn c. +Proof. + intros. + exploit code_tail_size; eauto. + simpl; generalize (code_tail_positive _ _ _ H0), (size_positive bi), (size_blocks_pos c). + intros. + rewrite Ptrofs.add_unsigned, Ptrofs.unsigned_repr. + - rewrite Ptrofs.unsigned_repr; eauto. + lia. + - rewrite Ptrofs.unsigned_repr; lia. +Qed. + +(** Predictor for return addresses in generated Asm code. + + The [return_address_offset] predicate defined here is used in the + semantics for Mach to determine the return addresses that are + stored in activation records. *) + +(** Consider a Mach function [f] and a sequence [c] of Mach instructions + representing the Mach code that remains to be executed after a + function call returns. The predicate [return_address_offset f c ofs] + holds if [ofs] is the integer offset of the PPC instruction + following the call in the Asm code obtained by translating the + code of [f]. Graphically: +<< + Mach function f |--------- Mcall ---------| + Mach code c | |--------| + | \ \ + | \ \ + | \ \ + Asm code | |--------| + Asm function |------------- Pcall ---------| + + <-------- ofs -------> +>> +*) + +Definition return_address_offset (f: MB.function) (c: MB.code) (ofs: ptrofs) : Prop := + forall tf tc, + transf_function f = OK tf -> + transl_blocks f c false = OK tc -> + code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc. + +Lemma transl_blocks_tail: + forall f c1 c2, is_tail c1 c2 -> + forall tc2 ep2, transl_blocks f c2 ep2 = OK tc2 -> + exists tc1, exists ep1, transl_blocks f c1 ep1 = OK tc1 /\ is_tail tc1 tc2. +Proof. + induction 1; simpl; intros. + exists tc2; exists ep2; split; auto with coqlib. + monadInv H0. exploit IHis_tail; eauto. intros (tc1 & ep1 & A & B). + exists tc1; exists ep1; split. auto. + eapply is_tail_trans with x0; eauto with coqlib. +Qed. + +Lemma is_tail_code_tail: + forall c1 c2, is_tail c1 c2 -> exists ofs, code_tail ofs c2 c1. +Proof. + induction 1; eauto. + destruct IHis_tail; eauto. +Qed. + +Section RETADDR_EXISTS. + +Hypothesis transf_function_inv: + forall f tf, transf_function f = OK tf -> + exists tc ep, transl_blocks f (Machblock.fn_code f) ep = OK tc /\ is_tail tc (fn_blocks tf). + +Hypothesis transf_function_len: + forall f tf, transf_function f = OK tf -> size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned. + + +Lemma return_address_exists: + forall b f c, is_tail (b :: c) f.(MB.fn_code) -> + exists ra, return_address_offset f c ra. +Proof. + intros. destruct (transf_function f) as [tf|] eqn:TF. + + exploit transf_function_inv; eauto. intros (tc1 & ep1 & TR1 & TL1). + exploit transl_blocks_tail; eauto. intros (tc2 & ep2 & TR2 & TL2). + monadInv TR2. + assert (TL3: is_tail x0 (fn_blocks tf)). + { apply is_tail_trans with tc1; auto. + apply is_tail_trans with (x++x0); auto. eapply is_tail_app. + } + exploit is_tail_code_tail. eexact TL3. intros [ofs CT]. + exists (Ptrofs.repr ofs). red; intros. + rewrite Ptrofs.unsigned_repr. congruence. + exploit code_tail_bounds; eauto. + intros; apply transf_function_len in TF. lia. + + exists Ptrofs.zero; red; intros. congruence. +Qed. + +End RETADDR_EXISTS. + +(** [transl_code_at_pc pc fb f c ep tf tc] holds if the code pointer [pc] points + within the Asmblock code generated by translating Machblock function [f], + and [tc] is the tail of the generated code at the position corresponding + to the code pointer [pc]. *) + +Inductive transl_code_at_pc (ge: MB.genv): + val -> block -> MB.function -> MB.code -> bool -> AB.function -> AB.bblocks -> Prop := + transl_code_at_pc_intro: + forall b ofs f c ep tf tc, + Genv.find_funct_ptr ge b = Some(Internal f) -> + transf_function f = Errors.OK tf -> + transl_blocks f c ep = OK tc -> + code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc -> + transl_code_at_pc ge (Vptr b ofs) b f c ep tf tc. + +Remark code_tail_no_bigger: + forall pos c1 c2, code_tail pos c1 c2 -> (length c2 <= length c1)%nat. +Proof. + induction 1; simpl; lia. +Qed. + +Remark code_tail_unique: + forall fn c pos pos', + code_tail pos fn c -> code_tail pos' fn c -> pos = pos'. +Proof. + induction fn; intros until pos'; intros ITA CT; inv ITA; inv CT; auto. + generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia. + generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia. + f_equal. eauto. +Qed. + +Lemma return_address_offset_correct: + forall ge b ofs fb f c tf tc ofs', + transl_code_at_pc ge (Vptr b ofs) fb f c false tf tc -> + return_address_offset f c ofs' -> + ofs' = ofs. +Proof. + intros. inv H. red in H0. + exploit code_tail_unique. eexact H12. eapply H0; eauto. intro. + rewrite <- (Ptrofs.repr_unsigned ofs). + rewrite <- (Ptrofs.repr_unsigned ofs'). + congruence. +Qed. + +(** The [find_label] function returns the code tail starting at the + given label. A connection with [code_tail] is then established. *) + +Fixpoint find_label (lbl: label) (c: bblocks) {struct c} : option bblocks := + match c with + | nil => None + | bb1 :: bbl => if is_label lbl bb1 then Some c else find_label lbl bbl + end. + +Lemma label_pos_code_tail: + forall lbl c pos c', + find_label lbl c = Some c' -> + exists pos', + label_pos lbl pos c = Some pos' + /\ code_tail (pos' - pos) c c' + /\ pos <= pos' <= pos + size_blocks c. +Proof. + induction c. + simpl; intros. discriminate. + simpl; intros until c'. + case (is_label lbl a). + - intros. inv H. exists pos. split; auto. split. + replace (pos - pos) with 0 by lia. constructor. constructor; try lia. + generalize (size_blocks_pos c). generalize (size_positive a). lia. + - intros. generalize (IHc (pos+size a) c' H). intros [pos' [A [B C]]]. + exists pos'. split. auto. split. + replace (pos' - pos) with ((pos' - (pos + (size a))) + (size a)) by lia. + constructor. auto. generalize (size_positive a). lia. +Qed. + +(** Helper lemmas to reason about +- the "code is tail of" property +- correct translation of labels. *) + +Definition tail_nolabel (k c: bblocks) : Prop := + is_tail k c /\ forall lbl, find_label lbl c = find_label lbl k. + +Lemma tail_nolabel_refl: + forall c, tail_nolabel c c. +Proof. + intros; split. apply is_tail_refl. auto. +Qed. + +Lemma tail_nolabel_trans: + forall c1 c2 c3, tail_nolabel c2 c3 -> tail_nolabel c1 c2 -> tail_nolabel c1 c3. +Proof. + intros. destruct H; destruct H0; split. + eapply is_tail_trans; eauto. + intros. rewrite H1; auto. +Qed. + +Definition nolabel (b: bblock) := + match (header b) with nil => True | _ => False end. + +Hint Extern 1 (nolabel _) => exact I : labels. + +Lemma tail_nolabel_cons: + forall b c k, + nolabel b -> tail_nolabel k c -> tail_nolabel k (b :: c). +Proof. + intros. destruct H0. split. + constructor; auto. + intros. simpl. rewrite <- H1. destruct b as [hd bdy ex]; simpl in *. + destruct hd as [|l hd]; simpl in *. + - assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false). + { apply is_label_correct_false. simpl header. apply in_nil. } + rewrite H2. auto. + - contradiction. +Qed. + +Hint Resolve tail_nolabel_refl: labels. + +Ltac TailNoLabel := + eauto with labels; + match goal with + | [ |- tail_nolabel _ (_ :: _) ] => apply tail_nolabel_cons; [auto; exact I | TailNoLabel] + | [ H: Error _ = OK _ |- _ ] => discriminate + | [ H: assertion_failed = OK _ |- _ ] => discriminate + | [ H: OK _ = OK _ |- _ ] => inv H; TailNoLabel + | [ H: bind _ _ = OK _ |- _ ] => monadInv H; TailNoLabel + | [ H: (if ?x then _ else _) = OK _ |- _ ] => destruct x; TailNoLabel + | [ H: match ?x with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct x; TailNoLabel + | _ => idtac + end. + +Remark tail_nolabel_find_label: + forall lbl k c, tail_nolabel k c -> find_label lbl c = find_label lbl k. +Proof. + intros. destruct H. auto. +Qed. + +Remark tail_nolabel_is_tail: + forall k c, tail_nolabel k c -> is_tail k c. +Proof. + intros. destruct H. auto. +Qed. + +Lemma exec_body_pc: + forall ge l rs1 m1 rs2 m2, + exec_body ge l rs1 m1 = Next rs2 m2 -> + rs2 PC = rs1 PC. +Proof. + induction l. + - intros. inv H. auto. + - intros until m2. intro EXEB. + inv EXEB. destruct (exec_basic_instr _ _ _ _) eqn:EBI; try discriminate. + eapply IHl in H0. rewrite H0. + erewrite exec_basic_instr_pc; eauto. +Qed. + +Section STRAIGHTLINE. + +Variable ge: genv. +Variable fn: function. + +(** Straight-line code is composed of processor instructions that execute + in sequence (no branches, no function calls and returns). + The following inductive predicate relates the machine states + before and after executing a straight-line sequence of instructions. + Instructions are taken from the first list instead of being fetched + from memory. *) + +Inductive exec_straight: list instruction -> regset -> mem -> + list instruction -> regset -> mem -> Prop := + | exec_straight_one: + forall i1 c rs1 m1 rs2 m2, + exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 -> + exec_straight ((PBasic i1) ::g c) rs1 m1 c rs2 m2 + | exec_straight_step: + forall i c rs1 m1 rs2 m2 c' rs3 m3, + exec_basic_instr ge i rs1 m1 = Next rs2 m2 -> + exec_straight c rs2 m2 c' rs3 m3 -> + exec_straight ((PBasic i) :: c) rs1 m1 c' rs3 m3. + +Inductive exec_control_rel: option control -> bblock -> regset -> mem -> + regset -> mem -> Prop := + | exec_control_rel_intro: + forall rs1 m1 b rs1' ctl rs2 m2, + rs1' = nextblock b rs1 -> + exec_control ge fn ctl rs1' m1 = Next rs2 m2 -> + exec_control_rel ctl b rs1 m1 rs2 m2. + +Inductive exec_bblock_rel: bblock -> regset -> mem -> regset -> mem -> Prop := + | exec_bblock_rel_intro: + forall rs1 m1 b rs2 m2, + exec_bblock ge fn b rs1 m1 = Next rs2 m2 -> + exec_bblock_rel b rs1 m1 rs2 m2. + +Lemma exec_straight_body: + forall c l rs1 m1 rs2 m2, + exec_straight c rs1 m1 nil rs2 m2 -> + code_to_basics c = Some l -> + exec_body ge l rs1 m1 = Next rs2 m2. +Proof. + induction c as [|i c]. + - intros until m2. intros EXES CTB. inv EXES. + - intros until m2. intros EXES CTB. inv EXES. + + inv CTB. simpl. rewrite H6. auto. + + inv CTB. destruct (code_to_basics c); try discriminate. inv H0. eapply IHc in H7; eauto. + rewrite <- H7. simpl. rewrite H1. auto. +Qed. + +Lemma exec_straight_body2: + forall c rs1 m1 c' rs2 m2, + exec_straight c rs1 m1 c' rs2 m2 -> + exists body, + exec_body ge body rs1 m1 = Next rs2 m2 + /\ (basics_to_code body) ++g c' = c. +Proof. + intros until m2. induction 1. + - exists (i1::nil). split; auto. simpl. rewrite H. auto. + - destruct IHexec_straight as (bdy & EXEB & BTC). + exists (i:: bdy). split; simpl. + + rewrite H. auto. + + congruence. +Qed. + +Lemma exec_straight_trans: + forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3, + exec_straight c1 rs1 m1 c2 rs2 m2 -> + exec_straight c2 rs2 m2 c3 rs3 m3 -> + exec_straight c1 rs1 m1 c3 rs3 m3. +Proof. + induction 1; intros. + apply exec_straight_step with rs2 m2; auto. + apply exec_straight_step with rs2 m2; auto. +Qed. + +Lemma exec_straight_two: + forall i1 i2 c rs1 m1 rs2 m2 rs3 m3, + exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 -> + exec_basic_instr ge i2 rs2 m2 = Next rs3 m3 -> + exec_straight (i1 ::g i2 ::g c) rs1 m1 c rs3 m3. +Proof. + intros. apply exec_straight_step with rs2 m2; auto. + apply exec_straight_one; auto. +Qed. + +Lemma exec_straight_three: + forall i1 i2 i3 c rs1 m1 rs2 m2 rs3 m3 rs4 m4, + exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 -> + exec_basic_instr ge i2 rs2 m2 = Next rs3 m3 -> + exec_basic_instr ge i3 rs3 m3 = Next rs4 m4 -> + exec_straight (i1 ::g i2 ::g i3 ::g c) rs1 m1 c rs4 m4. +Proof. + intros. apply exec_straight_step with rs2 m2; auto. + eapply exec_straight_two; eauto. +Qed. + +(** Like exec_straight predicate, but on blocks *) + +Inductive exec_straight_blocks: bblocks -> regset -> mem -> + bblocks -> regset -> mem -> Prop := + | exec_straight_blocks_one: + forall b1 c rs1 m1 rs2 m2, + exec_bblock ge fn b1 rs1 m1 = Next rs2 m2 -> + rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b1)) -> + exec_straight_blocks (b1 :: c) rs1 m1 c rs2 m2 + | exec_straight_blocks_step: + forall b c rs1 m1 rs2 m2 c' rs3 m3, + exec_bblock ge fn b rs1 m1 = Next rs2 m2 -> + rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b)) -> + exec_straight_blocks c rs2 m2 c' rs3 m3 -> + exec_straight_blocks (b :: c) rs1 m1 c' rs3 m3. + +Lemma exec_straight_blocks_trans: + forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3, + exec_straight_blocks c1 rs1 m1 c2 rs2 m2 -> + exec_straight_blocks c2 rs2 m2 c3 rs3 m3 -> + exec_straight_blocks c1 rs1 m1 c3 rs3 m3. +Proof. + induction 1; intros. + apply exec_straight_blocks_step with rs2 m2; auto. + apply exec_straight_blocks_step with rs2 m2; auto. +Qed. + +(** Linking exec_straight with exec_straight_blocks *) + +Lemma exec_straight_pc: + forall c c' rs1 m1 rs2 m2, + exec_straight c rs1 m1 c' rs2 m2 -> + rs2 PC = rs1 PC. +Proof. + induction c; intros; try (inv H; fail). + inv H. + - eapply exec_basic_instr_pc; eauto. + - rewrite (IHc c' rs3 m3 rs2 m2); auto. + erewrite exec_basic_instr_pc; eauto. +Qed. + +Lemma regset_same_assign (rs: regset) r: + rs # r <- (rs r) = rs. +Proof. + apply functional_extensionality. intros x. destruct (preg_eq x r); subst; Simpl. +Qed. + +Lemma exec_straight_through_singleinst: + forall a b rs1 m1 rs2 m2 rs2' m2' lb, + bblock_single_inst (PBasic a) = b -> + exec_straight (a ::g nil) rs1 m1 nil rs2 m2 -> + nextblock b rs2 = rs2' -> m2 = m2' -> + exec_straight_blocks (b::lb) rs1 m1 lb rs2' m2'. +Proof. + intros. subst. constructor 1. unfold exec_bblock. simpl body. erewrite exec_straight_body; eauto. + simpl. rewrite regset_same_assign. auto. + simpl; auto. unfold nextblock, incrPC; simpl. Simpl. erewrite exec_straight_pc; eauto. +Qed. + +(** The following lemmas show that straight-line executions + (predicate [exec_straight_blocks]) correspond to correct Asm executions. *) + +Lemma exec_straight_steps_1: + forall c rs m c' rs' m', + exec_straight_blocks c rs m c' rs' m' -> + size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned -> + forall b ofs, + rs#PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal fn) -> + code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c -> + plus step ge (State rs m) E0 (State rs' m'). +Proof. + induction 1; intros. + apply plus_one. + econstructor; eauto. + eapply find_bblock_tail. eauto. + eapply plus_left'. + econstructor; eauto. + eapply find_bblock_tail. eauto. + apply IHexec_straight_blocks with b0 (Ptrofs.add ofs (Ptrofs.repr (size b))). + auto. rewrite H0. rewrite H3. reflexivity. + auto. + apply code_tail_next_int; auto. + traceEq. +Qed. + +Lemma exec_straight_steps_2: + forall c rs m c' rs' m', + exec_straight_blocks c rs m c' rs' m' -> + size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned -> + forall b ofs, + rs#PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal fn) -> + code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c -> + exists ofs', + rs'#PC = Vptr b ofs' + /\ code_tail (Ptrofs.unsigned ofs') (fn_blocks fn) c'. +Proof. + induction 1; intros. + exists (Ptrofs.add ofs (Ptrofs.repr (size b1))). split. + rewrite H0. rewrite H2. auto. + apply code_tail_next_int; auto. + apply IHexec_straight_blocks with (Ptrofs.add ofs (Ptrofs.repr (size b))). + auto. rewrite H0. rewrite H3. reflexivity. auto. + apply code_tail_next_int; auto. +Qed. + +End STRAIGHTLINE. + +(** * Properties of the Machblock call stack *) + +Section MATCH_STACK. + +Variable ge: MB.genv. + +Inductive match_stack: list MB.stackframe -> Prop := + | match_stack_nil: + match_stack nil + | match_stack_cons: forall fb sp ra c s f tf tc, + Genv.find_funct_ptr ge fb = Some (Internal f) -> + transl_code_at_pc ge ra fb f c false tf tc -> + sp <> Vundef -> + match_stack s -> + match_stack (Stackframe fb sp ra c :: s). + +Lemma parent_sp_def: forall s, match_stack s -> parent_sp s <> Vundef. +Proof. + induction 1; simpl. + unfold Vnullptr; destruct Archi.ptr64; congruence. + auto. +Qed. + +Lemma parent_ra_def: forall s, match_stack s -> parent_ra s <> Vundef. +Proof. + induction 1; simpl. + unfold Vnullptr; destruct Archi.ptr64; congruence. + inv H0. congruence. +Qed. + +Lemma lessdef_parent_sp: + forall s v, + match_stack s -> Val.lessdef (parent_sp s) v -> v = parent_sp s. +Proof. + intros. inv H0. auto. exploit parent_sp_def; eauto. tauto. +Qed. + +Lemma lessdef_parent_ra: + forall s v, + match_stack s -> Val.lessdef (parent_ra s) v -> v = parent_ra s. +Proof. + intros. inv H0. auto. exploit parent_ra_def; eauto. tauto. +Qed. + +End MATCH_STACK. diff --git a/kvx/Asmblockgenproof1.v b/kvx/Asmblockgenproof1.v new file mode 100644 index 00000000..a18afec8 --- /dev/null +++ b/kvx/Asmblockgenproof1.v @@ -0,0 +1,2500 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** * Proof of correctness for individual instructions *) + +Require Import Coqlib Errors Maps. +Require Import AST Integers Floats Values Memory Globalenvs. +Require Import Op Locations Machblock Conventions. +Require Import Asmblock Asmblockgen Asmblockgenproof0 Asmblockprops. +Require Import Chunks. +Require Import Lia. + +Import PArithCoercions. + +(** Decomposition of integer constants. *) + +Lemma make_immed32_sound: + forall n, + match make_immed32 n with + | Imm32_single imm => n = imm + end. +Proof. + intros; unfold make_immed32. set (lo := Int.sign_ext 12 n). + predSpec Int.eq Int.eq_spec n lo; auto. +Qed. + +Lemma make_immed64_sound: + forall n, + match make_immed64 n with + | Imm64_single imm => n = imm + end. +Proof. + intros; unfold make_immed64. set (lo := Int64.sign_ext 12 n). + predSpec Int64.eq Int64.eq_spec n lo. +- auto. +- set (m := Int64.sub n lo). + set (p := Int64.zero_ext 20 (Int64.shru m (Int64.repr 12))). + predSpec Int64.eq Int64.eq_spec n (Int64.add (Int64.sign_ext 32 (Int64.shl p (Int64.repr 12))) lo). + auto. + auto. +Qed. + + +(** Properties of registers *) + +Lemma ireg_of_not_RTMP: + forall m r, ireg_of m = OK r -> IR r <> IR RTMP. +Proof. + intros. erewrite <- ireg_of_eq; eauto with asmgen. +Qed. + +Lemma ireg_of_not_RTMP': + forall m r, ireg_of m = OK r -> r <> RTMP. +Proof. + intros. apply ireg_of_not_RTMP in H. congruence. +Qed. + +Hint Resolve ireg_of_not_RTMP ireg_of_not_RTMP': asmgen. + + +(** Useful simplification tactic *) + +Ltac Simplif := + ((rewrite nextblock_inv by eauto with asmgen) + || (rewrite nextblock_inv1 by eauto with asmgen) + || (rewrite Pregmap.gss) + || (rewrite nextblock_pc) + || (rewrite Pregmap.gso by eauto with asmgen) + ); auto with asmgen. + +Ltac Simpl := repeat Simplif. + +(** * Correctness of RISC-V constructor functions *) + +Section CONSTRUCTORS. + +Variable ge: genv. +Variable fn: function. + +Lemma loadimm32_correct: + forall rd n k rs m, + exists rs', + exec_straight ge (loadimm32 rd n ::g k) rs m k rs' m + /\ rs'#rd = Vint n + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm32; intros. generalize (make_immed32_sound n); intros E. + destruct (make_immed32 n). +- subst imm. econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. +Qed. + +Lemma loadimm64_correct: + forall rd n k rs m, + exists rs', + exec_straight ge (loadimm64 rd n ::g k) rs m k rs' m + /\ rs'#rd = Vlong n + /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r. +Proof. + unfold loadimm64; intros. generalize (make_immed64_sound n); intros E. + destruct (make_immed64 n). +- subst imm. econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. +Qed. + +Lemma opimm64_correct: + forall (op: arith_name_rrr) + (opi: arith_name_rri64) + (sem: val -> val -> val) m, + (forall d s1 s2 rs, + exec_basic_instr ge (op d s1 s2) rs m = Next ((rs#d <- (sem rs#s1 rs#s2))) m) -> + (forall d s n rs, + exec_basic_instr ge (opi d s n) rs m = Next ((rs#d <- (sem rs#s (Vlong n)))) m) -> + forall rd r1 n k rs, + r1 <> RTMP -> + exists rs', + exec_straight ge (opimm64 op opi rd r1 n ::g k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Vlong n) + /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros. unfold opimm64. generalize (make_immed64_sound n); intros E. + destruct (make_immed64 n). +- subst imm. econstructor; split. + apply exec_straight_one. rewrite H0. simpl; eauto. auto. + split. Simpl. intros; Simpl. +Qed. + +(** Add offset to pointer *) + +Lemma addptrofs_correct: + forall rd r1 n k rs m, + r1 <> RTMP -> + exists rs', + exec_straight ge (addptrofs rd r1 n ::g k) rs m k rs' m + /\ Val.lessdef (Val.offset_ptr rs#r1 n) rs'#rd + /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r. +Proof. + unfold addptrofs; intros. + destruct (Ptrofs.eq_dec n Ptrofs.zero). +- subst n. econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. destruct (rs r1); simpl; auto. rewrite Ptrofs.add_zero; auto. + intros; Simpl. +- unfold addimm64. + exploit (opimm64_correct Paddl Paddil Val.addl); eauto. intros (rs' & A & B & C). + exists rs'; split. eexact A. split; auto. + rewrite B. destruct (rs r1); simpl; auto. + rewrite Ptrofs.of_int64_to_int64 by auto. auto. +Qed. + +Ltac ArgsInv := + repeat (match goal with + | [ H: Error _ = OK _ |- _ ] => discriminate + | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args + | [ H: bind _ _ = OK _ |- _ ] => monadInv H + | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv + | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv + end); + subst; + repeat (match goal with + | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in * + | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in * + end). + +Inductive exec_straight_opt: list instruction -> regset -> mem -> list instruction -> regset -> mem -> Prop := + | exec_straight_opt_refl: forall c rs m, + exec_straight_opt c rs m c rs m + | exec_straight_opt_intro: forall c1 rs1 m1 c2 rs2 m2, + exec_straight ge c1 rs1 m1 c2 rs2 m2 -> + exec_straight_opt c1 rs1 m1 c2 rs2 m2. + +Remark exec_straight_opt_right: + forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2, + exec_straight_opt c1 rs1 m1 c2 rs2 m2 -> + exec_straight ge c2 rs2 m2 c3 rs3 m3 -> + exec_straight ge c1 rs1 m1 c3 rs3 m3. +Proof. + destruct 1; intros. auto. eapply exec_straight_trans; eauto. +Qed. + +Lemma transl_comp_correct: + forall cmp r1 r2 lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_comp cmp Signed r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.cmp_bool cmp rs#r1 rs#r2 = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. esplit. split. +- unfold transl_comp. apply exec_straight_one; simpl; eauto. +- split. + + intros; Simpl. + + intros. + remember (rs # RTMP <- (compare_int (itest_for_cmp cmp Signed) rs # r1 rs # r2)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Signed) rs # r1 rs # r2)). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmp_bool cmp rs#r1 rs#r2) as cmpbool. + destruct cmp; simpl; + unfold Val.cmp; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma transl_compi_correct: + forall cmp r1 n lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_compi cmp Signed r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.cmp_bool cmp rs#r1 (Vint n) = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. esplit. split. +- unfold transl_compi. apply exec_straight_one; simpl; eauto. +- split. + + intros; Simpl. + + intros. + remember (rs # RTMP <- (compare_int (itest_for_cmp cmp Signed) rs # r1 (Vint n))) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Signed) rs # r1 (Vint n))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmp_bool cmp rs#r1 (Vint n)) as cmpbool. + destruct cmp; simpl; + unfold Val.cmp; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma transl_compu_correct: + forall cmp r1 r2 lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_comp cmp Unsigned r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ (Val.mxcmpu_bool cmp rs#r1 rs#r2 = Some b -> + exec_control ge fn (Some (PCtlFlow ((Pcb BTwnez RTMP lbl)))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. esplit. split. +- unfold transl_comp. apply exec_straight_one; simpl; eauto. +- split. + + intros; Simpl. + + intros. + remember (rs # RTMP <- (compare_int (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.mxcmpu_bool cmp rs#r1 rs#r2) as cmpubool. + destruct cmp; simpl; unfold Val.mxcmpu; + rewrite <- Heqcmpubool; destruct cmpubool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma transl_compui_correct: + forall cmp r1 n lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_compi cmp Unsigned r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ (Val.mxcmpu_bool cmp rs#r1 (Vint n) = Some b -> + exec_control ge fn (Some (PCtlFlow ((Pcb BTwnez RTMP lbl)))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. esplit. split. +- unfold transl_compi. apply exec_straight_one; simpl; eauto. +- split. + + intros; Simpl. + + intros. + remember (rs # RTMP <- (compare_int (itest_for_cmp cmp Unsigned) rs # r1 (Vint n))) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_int (itest_for_cmp cmp Unsigned) rs # r1 (Vint n))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.mxcmpu_bool cmp rs#r1 (Vint n)) as cmpubool. + destruct cmp; simpl; unfold Val.mxcmpu; + rewrite <- Heqcmpubool; destruct cmpubool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma transl_compl_correct: + forall cmp r1 r2 lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_compl cmp Signed r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.cmpl_bool cmp rs#r1 rs#r2 = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. esplit. split. +- unfold transl_compl. apply exec_straight_one; simpl; eauto. +- split. + + intros; Simpl. + + intros. + remember (rs # RTMP <- (compare_long (itest_for_cmp cmp Signed) rs # r1 rs # r2)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Signed) rs # r1 rs # r2)). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmpl_bool cmp rs#r1 rs#r2) as cmpbool. + destruct cmp; simpl; + unfold compare_long, Val.cmpl; + rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma transl_compil_correct: + forall cmp r1 n lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_compil cmp Signed r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.cmpl_bool cmp rs#r1 (Vlong n) = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. esplit. split. +- unfold transl_compil. apply exec_straight_one; simpl; eauto. +- split. + + intros; Simpl. + + intros. + remember (rs # RTMP <- (compare_long (itest_for_cmp cmp Signed) rs # r1 (Vlong n))) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Signed) rs # r1 (Vlong n))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmpl_bool cmp rs#r1 (Vlong n)) as cmpbool. + destruct cmp; simpl; + unfold compare_long, Val.cmpl; + rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma swap_comparison_cmpf_eq: + forall v1 v2 cmp, + (Val.cmpf cmp v1 v2) = (Val.cmpf (swap_comparison cmp) v2 v1). +Proof. + intros. unfold Val.cmpf. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. + rewrite Float.cmp_swap. auto. +Qed. + +Lemma swap_comparison_cmpf_bool: + forall cmp ft v1 v2, + ftest_for_cmp cmp = Reversed ft -> + Val.cmpf_bool cmp v1 v2 = Val.cmpf_bool (swap_comparison cmp) v2 v1. +Proof. + intros. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. rewrite Float.cmp_swap. reflexivity. +Qed. + +Lemma transl_compf_correct: + forall cmp r1 r2 lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_comp_float64 cmp r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.cmpf_bool cmp rs#r1 rs#r2 = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. unfold transl_comp_float64. destruct (ftest_for_cmp cmp) eqn:FT. + * esplit. split. + - apply exec_straight_one; simpl; eauto. + - split. + + intros; Simpl. + + intros. remember (rs # RTMP <- (compare_float _ _ _)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_float ft (rs r1) (rs r2))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmpf_bool cmp rs#r1 rs#r2) as cmpbool. + destruct cmp; simpl; + unfold compare_float; + unfold Val.cmpf; simpl in FT; inversion FT; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. + * esplit. split. + - apply exec_straight_one; simpl; eauto. + - split. + + intros; Simpl. + + intros. remember (rs # RTMP <- (compare_float _ _ _)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_float ft (rs r2) (rs r1))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmpf_bool cmp rs#r1 rs#r2) as cmpbool. + erewrite swap_comparison_cmpf_bool in Heqcmpbool; eauto. + destruct cmp; simpl; + unfold compare_float; + unfold Val.cmpf; simpl in FT; inversion FT; simpl in Heqcmpbool; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma cmpf_bool_ne_eq: + forall v1 v2, + Val.cmpf_bool Cne v1 v2 = option_map negb (Val.cmpf_bool Ceq v1 v2). +Proof. + intros. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. rewrite Float.cmp_ne_eq. simpl. reflexivity. +Qed. + +Lemma cmpf_bool_ne_eq_rev: + forall v1 v2, + Val.cmpf_bool Ceq v1 v2 = option_map negb (Val.cmpf_bool Cne v1 v2). +Proof. + intros. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. rewrite Float.cmp_ne_eq. simpl. rewrite negb_involutive. reflexivity. +Qed. + +Lemma option_map_negb_negb: + forall v, + option_map negb (option_map negb v) = v. +Proof. + destruct v; simpl; auto. rewrite negb_involutive. reflexivity. +Qed. + +Lemma notbool_option_map_negb: + forall v, Val.notbool (Val.of_optbool v) = Val.of_optbool (option_map negb v). +Proof. + unfold Val.notbool. unfold Val.of_optbool. + destruct v; auto. destruct b; auto. +Qed. + +Lemma swap_comparison_cmpf_bool_notftest: + forall cmp ft v1 v2, + notftest_for_cmp cmp = Reversed ft -> + Val.cmpf_bool cmp v1 v2 = Val.cmpf_bool (swap_comparison cmp) v2 v1. +Proof. + intros. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. rewrite Float.cmp_swap. reflexivity. +Qed. + +Lemma transl_compnotf_correct: + forall cmp r1 r2 lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_comp_notfloat64 cmp r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ (option_map negb (Val.cmpf_bool cmp rs#r1 rs#r2) = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. unfold transl_comp_notfloat64. destruct (notftest_for_cmp cmp) eqn:FT. + * esplit. split. + - apply exec_straight_one; simpl; eauto. + - split. + + intros; Simpl. + + intros. remember (rs # RTMP <- (compare_float _ _ _)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_float ft (rs r1) (rs r2))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (option_map negb (Val.cmpf_bool cmp rs#r1 rs#r2)) as cmpbool. + destruct cmp; simpl; + unfold compare_float; + unfold Val.cmpf; simpl in FT; inversion FT. + * rewrite cmpf_bool_ne_eq; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + * rewrite cmpf_bool_ne_eq_rev. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. + * esplit. split. + - apply exec_straight_one; simpl; eauto. + - split. + + intros; Simpl. + + intros. remember (rs # RTMP <- (compare_float _ _ _)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_float ft (rs r2) (rs r1))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmpf_bool cmp rs#r1 rs#r2) as cmpbool. + erewrite swap_comparison_cmpf_bool_notftest in Heqcmpbool; eauto. + destruct cmp; simpl; + unfold compare_float; + unfold Val.cmpf; simpl in FT; inversion FT; simpl in Heqcmpbool. + * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma swap_comparison_cmpfs_bool: + forall cmp ft v1 v2, + ftest_for_cmp cmp = Reversed ft -> + Val.cmpfs_bool cmp v1 v2 = Val.cmpfs_bool (swap_comparison cmp) v2 v1. +Proof. + intros. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto. rewrite Float32.cmp_swap. reflexivity. +Qed. + +Lemma transl_compfs_correct: + forall cmp r1 r2 lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_comp_float32 cmp r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.cmpfs_bool cmp rs#r1 rs#r2 = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. unfold transl_comp_float32. destruct (ftest_for_cmp cmp) eqn:FT. + * esplit. split. + - apply exec_straight_one; simpl; eauto. + - split. + + intros; Simpl. + + intros. remember (rs # RTMP <- (compare_single _ _ _)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_single ft (rs r1) (rs r2))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmpfs_bool cmp rs#r1 rs#r2) as cmpbool. + destruct cmp; simpl; + unfold compare_single; + unfold Val.cmpfs; simpl in FT; inversion FT; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. + * esplit. split. + - apply exec_straight_one; simpl; eauto. + - split. + + intros; Simpl. + + intros. remember (rs # RTMP <- (compare_single _ _ _)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_single ft (rs r2) (rs r1))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmpfs_bool cmp rs#r1 rs#r2) as cmpbool. + erewrite swap_comparison_cmpfs_bool in Heqcmpbool; eauto. + destruct cmp; simpl; + unfold compare_single; + unfold Val.cmpfs; simpl in FT; inversion FT; simpl in Heqcmpbool; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma swap_comparison_cmpfs_bool_notftest: + forall cmp ft v1 v2, + notftest_for_cmp cmp = Reversed ft -> + Val.cmpfs_bool cmp v1 v2 = Val.cmpfs_bool (swap_comparison cmp) v2 v1. +Proof. + intros. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto. rewrite Float32.cmp_swap. reflexivity. +Qed. + +Lemma cmpfs_bool_ne_eq: + forall v1 v2, + Val.cmpfs_bool Cne v1 v2 = option_map negb (Val.cmpfs_bool Ceq v1 v2). +Proof. + intros. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto. rewrite Float32.cmp_ne_eq. simpl. reflexivity. +Qed. + +Lemma cmpfs_bool_ne_eq_rev: + forall v1 v2, + Val.cmpfs_bool Ceq v1 v2 = option_map negb (Val.cmpfs_bool Cne v1 v2). +Proof. + intros. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto. rewrite Float32.cmp_ne_eq. simpl. rewrite negb_involutive. reflexivity. +Qed. + +Lemma transl_compnotfs_correct: + forall cmp r1 r2 lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_comp_notfloat32 cmp r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ (option_map negb (Val.cmpfs_bool cmp rs#r1 rs#r2) = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. unfold transl_comp_notfloat32. destruct (notftest_for_cmp cmp) eqn:FT. + * esplit. split. + - apply exec_straight_one; simpl; eauto. + - split. + + intros; Simpl. + + intros. remember (rs # RTMP <- (compare_single _ _ _)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_single ft (rs r1) (rs r2))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (option_map negb (Val.cmpfs_bool cmp rs#r1 rs#r2)) as cmpbool. + destruct cmp; simpl; + unfold compare_single; + unfold Val.cmpfs; simpl in FT; inversion FT. + * rewrite cmpfs_bool_ne_eq; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + * rewrite cmpfs_bool_ne_eq_rev. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. + * esplit. split. + - apply exec_straight_one; simpl; eauto. + - split. + + intros; Simpl. + + intros. remember (rs # RTMP <- (compare_single _ _ _)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_single ft (rs r2) (rs r1))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.cmpfs_bool cmp rs#r1 rs#r2) as cmpbool. + erewrite swap_comparison_cmpfs_bool_notftest in Heqcmpbool; eauto. + destruct cmp; simpl; + unfold compare_single; + unfold Val.cmpfs; simpl in FT; inversion FT; simpl in Heqcmpbool. + * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + * rewrite notbool_option_map_negb. rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma transl_complu_correct: + forall cmp r1 r2 lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_compl cmp Unsigned r1 r2 lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.mxcmplu_bool cmp rs#r1 rs#r2 = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. esplit. split. +- unfold transl_compl. apply exec_straight_one; simpl; eauto. +- split. + + intros; Simpl. + + intros. + remember (rs # RTMP <- (compare_long (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Unsigned) rs # r1 rs # r2)). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.mxcmplu_bool cmp rs#r1 rs#r2) as cmpbool. + destruct cmp; simpl; + unfold compare_long, Val.mxcmplu; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma transl_compilu_correct: + forall cmp r1 n lbl k rs m tbb b, + exists rs', + exec_straight ge (transl_compil cmp Unsigned r1 n lbl k) rs m (Pcb BTwnez RTMP lbl ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.mxcmplu_bool cmp rs#r1 (Vlong n) = Some b -> + exec_control ge fn (Some (PCtlFlow (Pcb BTwnez RTMP lbl))) (nextblock tbb rs') m + = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. esplit. split. +- unfold transl_compil. apply exec_straight_one; simpl; eauto. +- split. + + intros; Simpl. + + intros. + remember (rs # RTMP <- (compare_long (itest_for_cmp cmp Unsigned) rs # r1 (Vlong n))) as rs'. + simpl. assert (Val.cmp_bool Cne (nextblock tbb rs') # RTMP (Vint (Int.repr 0)) = Some b). + { + assert ((nextblock tbb rs') # RTMP = (compare_long (itest_for_cmp cmp Unsigned) rs # r1 (Vlong n))). + { rewrite Heqrs'. auto. } + rewrite H0. rewrite <- H. + remember (Val.mxcmplu_bool cmp rs#r1 (Vlong n)) as cmpbool. + destruct cmp; simpl; + unfold compare_long, Val.mxcmplu; rewrite <- Heqcmpbool; destruct cmpbool; simpl; auto; + destruct b0; simpl; auto. + } + rewrite H0. simpl; auto. +Qed. + +Lemma transl_opt_compuimm_correct: + forall n cmp r1 lbl k rs m b tbb c, + select_comp n cmp = Some c -> + exists rs', exists insn, + exec_straight_opt (transl_opt_compuimm n cmp r1 lbl k) rs m ((PControl insn) ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.mxcmpu_bool cmp rs#r1 (Vint n) = Some b -> + exec_control ge fn (Some insn) (nextblock tbb rs') m = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. +(* unfold transl_opt_compuimm. unfold select_comp in H. rewrite H; simpl. *) + remember c as c'. + destruct c'. + - (* c = Ceq *) + assert (Int.eq n Int.zero = true) as H'. + { remember (Int.eq n Int.zero) as termz. destruct termz; auto. + generalize H. unfold select_comp; rewrite <- Heqtermz; simpl. + discriminate. } + assert (n = (Int.repr 0)) as H0. { + destruct (Int.eq_dec n (Int.repr 0)) as [Ha|Ha]; auto. + generalize (Int.eq_false _ _ Ha). unfold Int.zero in H'. + rewrite H'. discriminate. + } + assert (Ceq = cmp). { + remember cmp as c0'. destruct c0'; auto; generalize H; unfold select_comp; + rewrite H'; simpl; auto; + intros; contradict H; discriminate. + } + unfold transl_opt_compuimm. subst. rewrite H'. + + exists rs, (Pcbu BTweqz r1 lbl). + split. + * constructor. + * split; auto. simpl. intros. + assert (rs r1 = (nextblock tbb rs) r1). + unfold nextblock, incrPC. Simpl. rewrite H1 in H0. + (*assert (Val.cmp_bool Ceq (rs r1) (Vint (Int.repr 0)) = Some b) as EVAL'S. + { rewrite <- H2. rewrite <- H0. rewrite <- H1. auto. }*) + auto; + unfold eval_branch. rewrite H0; auto. + - (* c = Cne *) + assert (Int.eq n Int.zero = true) as H'. + { remember (Int.eq n Int.zero) as termz. destruct termz; auto. + generalize H. unfold select_comp; rewrite <- Heqtermz; simpl. + discriminate. } + assert (n = (Int.repr 0)) as H0. { + destruct (Int.eq_dec n (Int.repr 0)) as [Ha|Ha]; auto. + generalize (Int.eq_false _ _ Ha). unfold Int.zero in H'. + rewrite H'. discriminate. + } + assert (Cne = cmp). { + remember cmp as c0'. destruct c0'; auto; generalize H; unfold select_comp; + rewrite H'; simpl; auto; + intros; contradict H; discriminate. + } + unfold transl_opt_compuimm. subst. rewrite H'. + + exists rs, (Pcbu BTwnez r1 lbl). + split. + * constructor. + * split; auto. simpl. intros. + assert (rs r1 = (nextblock tbb rs) r1). + unfold nextblock, incrPC. Simpl. rewrite H1 in H0. + auto; + unfold eval_branch. rewrite H0. auto. + - (* c = Clt *) contradict H; unfold select_comp; destruct (Int.eq n Int.zero); + destruct cmp; discriminate. + - (* c = Cle *) contradict H; unfold select_comp; destruct (Int.eq n Int.zero); + destruct cmp; discriminate. + - (* c = Cgt *) contradict H; unfold select_comp; destruct (Int.eq n Int.zero); + destruct cmp; discriminate. + - (* c = Cge *) contradict H; unfold select_comp; destruct (Int.eq n Int.zero); + destruct cmp; discriminate. +Qed. + +Lemma transl_opt_compluimm_correct: + forall n cmp r1 lbl k rs m b tbb c, + select_compl n cmp = Some c -> + exists rs', exists insn, + exec_straight_opt (transl_opt_compluimm n cmp r1 lbl k) rs m ((PControl insn) ::g k) rs' m + /\ (forall r : preg, r <> PC -> r <> RTMP -> rs' r = rs r) + /\ ( Val.mxcmplu_bool cmp rs#r1 (Vlong n) = Some b -> + exec_control ge fn (Some insn) (nextblock tbb rs') m = eval_branch fn lbl (nextblock tbb rs') m (Some b)) + . +Proof. + intros. +(* unfold transl_opt_compluimm; rewrite H; simpl. *) + remember c as c'. + destruct c'. + - (* c = Ceq *) + assert (Int64.eq n Int64.zero = true) as H'. + { remember (Int64.eq n Int64.zero) as termz. destruct termz; auto. + generalize H. unfold select_compl; rewrite <- Heqtermz; simpl. + discriminate. } + assert (n = (Int64.repr 0)) as H0. { + destruct (Int64.eq_dec n (Int64.repr 0)) as [Ha|Ha]; auto. + generalize (Int64.eq_false _ _ Ha). unfold Int64.zero in H'. + rewrite H'. discriminate. + } + assert (Ceq = cmp). { + remember cmp as c0'. destruct c0'; auto; generalize H; unfold select_compl; + rewrite H'; simpl; auto; + intros; contradict H; discriminate. + } + unfold transl_opt_compluimm; subst; rewrite H'. + + exists rs, (Pcbu BTdeqz r1 lbl). + split. + * constructor. + * split; auto. simpl. intros. + assert (rs r1 = (nextblock tbb rs) r1). + unfold nextblock, incrPC. Simpl. rewrite H1 in H0. + auto; + unfold eval_branch. rewrite H0; auto. + - (* c = Cne *) + assert (Int64.eq n Int64.zero = true) as H'. + { remember (Int64.eq n Int64.zero) as termz. destruct termz; auto. + generalize H. unfold select_compl; rewrite <- Heqtermz; simpl. + discriminate. } + assert (n = (Int64.repr 0)) as H0. { + destruct (Int64.eq_dec n (Int64.repr 0)) as [Ha|Ha]; auto. + generalize (Int64.eq_false _ _ Ha). unfold Int64.zero in H'. + rewrite H'. discriminate. + } + assert (Cne = cmp). { + remember cmp as c0'. destruct c0'; auto; generalize H; unfold select_compl; + rewrite H'; simpl; auto; + intros; contradict H; discriminate. + } + unfold transl_opt_compluimm; subst; rewrite H'. + + exists rs, (Pcbu BTdnez r1 lbl). + split. + * constructor. + * split; auto. simpl. intros. + assert (rs r1 = (nextblock tbb rs) r1). + unfold nextblock, incrPC. Simpl. rewrite H1 in H0. + auto; + unfold eval_branch. rewrite H0; auto. + - (* c = Clt *) contradict H; unfold select_compl; destruct (Int64.eq n Int64.zero); + destruct cmp; discriminate. + - (* c = Cle *) contradict H; unfold select_compl; destruct (Int64.eq n Int64.zero); + destruct cmp; discriminate. + - (* c = Cgt *) contradict H; unfold select_compl; destruct (Int64.eq n Int64.zero); + destruct cmp; discriminate. + - (* c = Cge *) contradict H; unfold select_compl; destruct (Int64.eq n Int64.zero); + destruct cmp; discriminate. +Qed. + +Local Hint Resolve Val.mxcmpu_bool_correct Val.mxcmplu_bool_correct: core. + +Lemma transl_cbranch_correct_1: + forall cond args lbl k c m ms b sp rs m' tbb, + transl_cbranch cond args lbl k = OK c -> + eval_condition cond (List.map ms args) m = Some b -> + agree ms sp rs -> + Mem.extends m m' -> + exists rs', exists insn, + exec_straight_opt c rs m' ((PControl insn) ::g k) rs' m' + /\ exec_control ge fn (Some insn) (nextblock tbb rs') m' = eval_branch fn lbl (nextblock tbb rs') m' (Some b) + /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros until tbb; intros TRANSL EVAL AG MEXT. + set (vl' := map rs (map preg_of args)). + assert (EVAL': eval_condition cond vl' m' = Some b). + { apply eval_condition_lessdef with (map ms args) m; auto. eapply preg_vals; eauto. } + clear EVAL MEXT AG. + destruct cond; simpl in TRANSL; ArgsInv. +(* Ccomp *) +- exploit (transl_comp_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C). + exists rs', (Pcb BTwnez RTMP lbl). + split. + + constructor. eexact A. + + split; auto. apply C; auto. +(* Ccompu *) +- exploit (transl_compu_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C). + exists rs', (Pcb BTwnez RTMP lbl). + split. + + constructor. eexact A. + + split; auto. apply C; eauto. +(* Ccompimm *) +- remember (Int.eq n Int.zero) as eqz. + destruct eqz. + + assert (n = (Int.repr 0)). { + destruct (Int.eq_dec n (Int.repr 0)) as [H|H]; auto. + generalize (Int.eq_false _ _ H). unfold Int.zero in Heqeqz. + rewrite <- Heqeqz. discriminate. + } + exists rs, (Pcb (btest_for_cmpswz c0) x lbl). + split. + * constructor. + * split; auto. + assert (rs x = (nextblock tbb rs) x). + unfold nextblock, incrPC. Simpl. rewrite H0 in EVAL'. clear H0. + destruct c0; simpl; auto; + unfold eval_branch; rewrite <- H; rewrite EVAL'; auto. + + exploit (transl_compi_correct c0 x n lbl); eauto. intros (rs'2 & A' & B' & C'). + exists rs'2, (Pcb BTwnez RTMP lbl). + split. + * constructor. eexact A'. + * split; auto. + { apply C'; auto. } +(* Ccompuimm *) +- remember (select_comp n c0) as selcomp. + destruct selcomp. + + exploit (transl_opt_compuimm_correct n c0 x lbl k). apply eq_sym. apply Heqselcomp. + intros (rs' & i & A & B & C). + exists rs', i. + split. + * apply A. + * split; auto. apply C. apply EVAL'. + + assert (transl_opt_compuimm n c0 x lbl k = transl_compi c0 Unsigned x n lbl k). + { unfold transl_opt_compuimm. + destruct (Int.eq n Int.zero) eqn:EQN. + all: unfold select_comp in Heqselcomp; rewrite EQN in Heqselcomp; destruct c0; simpl in *; auto. + all: discriminate. } + rewrite H. clear H. + exploit (transl_compui_correct c0 x n lbl); eauto. intros (rs'2 & A' & B' & C'). + exists rs'2, (Pcb BTwnez RTMP lbl). + split. + * constructor. eexact A'. + * split; auto. + { apply C'; auto. } +(* Ccompl *) +- exploit (transl_compl_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C). + exists rs', (Pcb BTwnez RTMP lbl). + split. + + constructor. eexact A. + + split; auto. apply C; auto. +(* Ccomplu *) +- exploit (transl_complu_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C). + exists rs', (Pcb BTwnez RTMP lbl). + split. + + constructor. eexact A. + + split; auto. apply C; eauto. +(* Ccomplimm *) +- remember (Int64.eq n Int64.zero) as eqz. + destruct eqz. + + assert (n = (Int64.repr 0)). { + destruct (Int64.eq_dec n (Int64.repr 0)) as [H|H]; auto. + generalize (Int64.eq_false _ _ H). unfold Int64.zero in Heqeqz. + rewrite <- Heqeqz. discriminate. + } + exists rs, (Pcb (btest_for_cmpsdz c0) x lbl). + split. + * constructor. + * split; auto. + assert (rs x = (nextblock tbb rs) x). + unfold nextblock, incrPC. Simpl. rewrite H0 in EVAL'. clear H0. + destruct c0; simpl; auto; + unfold eval_branch; rewrite <- H; rewrite EVAL'; auto. + + exploit (transl_compil_correct c0 x n lbl); eauto. intros (rs'2 & A' & B' & C'). + exists rs'2, (Pcb BTwnez RTMP lbl). + split. + * constructor. eexact A'. + * split; auto. + { apply C'; auto. } + +(* Ccompluimm *) +- remember (select_compl n c0) as selcomp. + destruct selcomp. + + exploit (transl_opt_compluimm_correct n c0 x lbl k). apply eq_sym. apply Heqselcomp. + intros (rs' & i & A & B & C). + exists rs', i. + split. + * apply A. + * split; eauto. (* apply C. apply EVAL'. *) + + assert (transl_opt_compluimm n c0 x lbl k = transl_compil c0 Unsigned x n lbl k). + { unfold transl_opt_compluimm. + destruct (Int64.eq n Int64.zero) eqn:EQN. + all: unfold select_compl in Heqselcomp; rewrite EQN in Heqselcomp; destruct c0; simpl in *; auto. + all: discriminate. } + rewrite H. clear H. + exploit (transl_compilu_correct c0 x n lbl); eauto. intros (rs'2 & A' & B' & C'). + exists rs'2, (Pcb BTwnez RTMP lbl). + split. + * constructor. eexact A'. + * split; auto. + { apply C'; auto. eapply Val.mxcmplu_bool_correct; eauto. } + +(* Ccompf *) +- exploit (transl_compf_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C). + exists rs', (Pcb BTwnez RTMP lbl). + split. + + constructor. eexact A. + + split; auto. apply C; auto. + +(* Cnotcompf *) +- exploit (transl_compnotf_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C). + exists rs', (Pcb BTwnez RTMP lbl). + split. + + constructor. eexact A. + + split; auto. apply C; auto. + +(* Ccompfs *) +- exploit (transl_compfs_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C). + exists rs', (Pcb BTwnez RTMP lbl). + split. + + constructor. eexact A. + + split; auto. apply C; auto. + +(* Cnotcompfs *) +- exploit (transl_compnotfs_correct c0 x x0 lbl); eauto. intros (rs' & A & B & C). + exists rs', (Pcb BTwnez RTMP lbl). + split. + + constructor. eexact A. + + split; auto. apply C; auto. +Qed. + +Lemma transl_cbranch_correct_true: + forall cond args lbl k c m ms sp rs m' tbb, + transl_cbranch cond args lbl k = OK c -> + eval_condition cond (List.map ms args) m = Some true -> + agree ms sp rs -> + Mem.extends m m' -> + exists rs', exists insn, + exec_straight_opt c rs m' ((PControl insn) ::g k) rs' m' + /\ exec_control ge fn (Some insn) (nextblock tbb rs') m' = goto_label fn lbl (nextblock tbb rs') m' + /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros. eapply transl_cbranch_correct_1 with (b := true); eauto. +Qed. + +Lemma transl_cbranch_correct_false: + forall cond args lbl k c m ms sp rs tbb m', + transl_cbranch cond args lbl k = OK c -> + eval_condition cond (List.map ms args) m = Some false -> + agree ms sp rs -> + Mem.extends m m' -> + exists rs', exists insn, + exec_straight_opt c rs m' ((PControl insn) ::g k) rs' m' + /\ exec_control ge fn (Some insn) (nextblock tbb rs') m' = Next (nextblock tbb rs') m' + /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros. exploit transl_cbranch_correct_1. all: eauto. simpl eval_branch. instantiate (1 := tbb). + intros (rs' & insn & A & B & C). rewrite regset_same_assign in B. + eexists; eexists. split; try split. all: eassumption. +Qed. + +(** Translation of condition operators *) + +Lemma transl_cond_int32s_correct: + forall cmp rd r1 r2 k rs m, + exists rs', + exec_straight ge (basics_to_code (transl_cond_int32s cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.cmp cmp rs#r1 rs#r2) rs'#rd + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +Qed. + + +Lemma transl_cond_int32u_correct: + forall cmp rd r1 r2 k rs m, + exists rs', + exec_straight ge (basics_to_code (transl_cond_int32u cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m + /\ rs'#rd = Val.mxcmpu cmp rs#r1 rs#r2 + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +Qed. + +Lemma transl_cond_int64s_correct: + forall cmp rd r1 r2 k rs m, + exists rs', + exec_straight ge (basics_to_code (transl_cond_int64s cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs#r1 rs#r2)) rs'#rd + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +Qed. + +Lemma transl_cond_int64u_correct: + forall cmp rd r1 r2 k rs m, + exists rs', + exec_straight ge (basics_to_code (transl_cond_int64u cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m + /\ rs'#rd = Val.mxcmplu cmp rs#r1 rs#r2 + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +Qed. + +Lemma transl_condimm_int32s_correct: + forall cmp rd r1 n k rs m, + r1 <> RTMP -> + exists rs', + exec_straight ge (basics_to_code (transl_condimm_int32s cmp rd r1 n k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.cmp cmp rs#r1 (Vint n)) rs'#rd + /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +Qed. + +Local Hint Resolve Val.mxcmpu_correct Val.mxcmplu_correct: core. + +Lemma transl_condimm_int32u_correct: + forall cmp rd r1 n k rs m, + r1 <> RTMP -> + exists rs', + exec_straight ge (basics_to_code (transl_condimm_int32u cmp rd r1 n k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.cmpu (Mem.valid_pointer m) cmp rs#r1 (Vint n)) rs'#rd + /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +Qed. + +Lemma transl_condimm_int64s_correct: + forall cmp rd r1 n k rs m, + r1 <> RTMP -> + exists rs', + exec_straight ge (basics_to_code (transl_condimm_int64s cmp rd r1 n k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.maketotal (Val.cmpl cmp rs#r1 (Vlong n))) rs'#rd + /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +Qed. + +Lemma transl_condimm_int64u_correct: + forall cmp rd r1 n k rs m, + r1 <> RTMP -> + exists rs', + exec_straight ge (basics_to_code (transl_condimm_int64u cmp rd r1 n k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.maketotal (Val.cmplu (Mem.valid_pointer m) cmp rs#r1 (Vlong n))) rs'#rd + /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl; + (econstructor; split; + [ apply exec_straight_one; [simpl; eauto] | + split; intros; Simpl; unfold compare_long; eauto]). +Qed. + +Lemma swap_comparison_cmpfs: + forall v1 v2 cmp, + Val.lessdef (Val.cmpfs cmp v1 v2) (Val.cmpfs (swap_comparison cmp) v2 v1). +Proof. + intros. unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct v1; destruct v2; auto. + rewrite Float32.cmp_swap. auto. +Qed. + +Lemma transl_cond_float32_correct: + forall cmp rd r1 r2 k rs m, + exists rs', + exec_straight ge (basics_to_code (transl_cond_float32 cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.cmpfs cmp rs#r1 rs#r2) rs'#rd + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. apply swap_comparison_cmpfs. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. apply swap_comparison_cmpfs. +- econstructor; split. apply exec_straight_one; [simpl; + eauto]. + split; intros; Simpl. +Qed. + +Lemma transl_cond_nofloat32_correct: + forall cmp rd r1 r2 k rs m, + exists rs', + exec_straight ge (basics_to_code (transl_cond_notfloat32 cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.of_optbool (option_map negb (Val.cmpfs_bool cmp (rs r1) (rs r2)))) rs'#rd + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. + rewrite Float32.cmp_ne_eq. auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. + rewrite Float32.cmp_ne_eq. simpl. destruct (Float32.cmp Ceq f f0); auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl. + destruct (Float32.cmp Clt f f0); auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl. + replace (Cge) with (swap_comparison Cle); auto. rewrite Float32.cmp_swap. + destruct (Float32.cmp _ _ _); auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl. + replace (Clt) with (swap_comparison Cgt); auto. rewrite Float32.cmp_swap. + destruct (Float32.cmp _ _ _); auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpfs. unfold Val.cmpfs_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl. + destruct (Float32.cmp _ _ _); auto. +Qed. + +Lemma swap_comparison_cmpf: + forall v1 v2 cmp, + Val.lessdef (Val.cmpf cmp v1 v2) (Val.cmpf (swap_comparison cmp) v2 v1). +Proof. + intros. unfold Val.cmpf. unfold Val.cmpf_bool. destruct v1; destruct v2; auto. + rewrite Float.cmp_swap. auto. +Qed. + +Lemma transl_cond_float64_correct: + forall cmp rd r1 r2 k rs m, + exists rs', + exec_straight ge (basics_to_code (transl_cond_float64 cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.cmpf cmp rs#r1 rs#r2) rs'#rd + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. apply swap_comparison_cmpf. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. apply swap_comparison_cmpf. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. +Qed. + +Lemma transl_cond_nofloat64_correct: + forall cmp rd r1 r2 k rs m, + exists rs', + exec_straight ge (basics_to_code (transl_cond_notfloat64 cmp rd r1 r2 k)) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.of_optbool (option_map negb (Val.cmpf_bool cmp (rs r1) (rs r2)))) rs'#rd + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + intros. destruct cmp; simpl. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. + rewrite Float.cmp_ne_eq. auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. + rewrite Float.cmp_ne_eq. simpl. destruct (Float.cmp Ceq f f0); auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl. + destruct (Float.cmp Clt f f0); auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl. + replace (Cge) with (swap_comparison Cle); auto. rewrite Float.cmp_swap. + destruct (Float.cmp _ _ _); auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl. + replace (Clt) with (swap_comparison Cgt); auto. rewrite Float.cmp_swap. + destruct (Float.cmp _ _ _); auto. +- econstructor; split. apply exec_straight_one; [simpl; eauto]. + split; intros; Simpl. + unfold Val.cmpf. unfold Val.cmpf_bool. destruct (rs r1); auto. destruct (rs r2); auto. simpl. + destruct (Float.cmp _ _ _); auto. +Qed. + +Lemma transl_cond_op_correct: + forall cond rd args k c rs m, + transl_cond_op cond rd args k = OK c -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ Val.lessdef (Val.of_optbool (eval_condition cond (map rs (map preg_of args)) m)) rs'#rd + /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r. +Proof. + assert (MKTOT: forall ob, Val.of_optbool ob = Val.maketotal (option_map Val.of_bool ob)). + { destruct ob as [[]|]; reflexivity. } + intros until m; intros TR. + destruct cond; simpl in TR; ArgsInv. ++ (* cmp *) + exploit transl_cond_int32s_correct; eauto. simpl. intros (rs' & A & B & C). exists rs'; eauto. ++ (* cmpu *) + exploit transl_cond_int32u_correct; eauto. simpl. intros (rs' & A & B & C). + exists rs'; repeat split; eauto. rewrite B; eapply Val.mxcmpu_correct. ++ (* cmpimm *) + apply transl_condimm_int32s_correct; eauto with asmgen. ++ (* cmpuimm *) + apply transl_condimm_int32u_correct; eauto with asmgen. ++ (* cmpl *) + exploit transl_cond_int64s_correct; eauto. simpl. intros (rs' & A & B & C). + exists rs'; repeat split; eauto. rewrite MKTOT; eauto. ++ (* cmplu *) + exploit transl_cond_int64u_correct; eauto. simpl. intros (rs' & A & B & C). + exists rs'; repeat split; eauto. rewrite B, MKTOT; eauto. + eapply Val.mxcmplu_correct. ++ (* cmplimm *) + exploit transl_condimm_int64s_correct; eauto. instantiate (1 := x); eauto with asmgen. simpl. + intros (rs' & A & B & C). + exists rs'; repeat split; eauto. rewrite MKTOT; eauto. ++ (* cmpluimm *) + exploit transl_condimm_int64u_correct; eauto. instantiate (1 := x); eauto with asmgen. simpl. + intros (rs' & A & B & C). + exists rs'; repeat split; eauto. rewrite MKTOT; eauto. ++ (* cmpfloat *) + exploit transl_cond_float64_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto. ++ (* cmpnosingle *) + exploit transl_cond_nofloat64_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto. ++ (* cmpsingle *) + exploit transl_cond_float32_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto. ++ (* cmpnosingle *) + exploit transl_cond_nofloat32_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto. +Qed. + +(* Translation of arithmetic operations *) + +Ltac SimplEval H := + match type of H with + | Some _ = None _ => discriminate + | Some _ = Some _ => inv H + | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity) +end. + +Ltac TranslOpSimpl := + econstructor; split; + [ apply exec_straight_one; reflexivity + | split; [ apply Val.lessdef_same; simpl; Simpl; fail | intros; simpl; Simpl; fail ] ]. + +Lemma int_eq_comm: + forall (x y: int), + (Int.eq x y) = (Int.eq y x). +Proof. + intros. + unfold Int.eq. + unfold zeq. + destruct (Z.eq_dec _ _); destruct (Z.eq_dec _ _); congruence. +Qed. + +Lemma int64_eq_comm: + forall (x y: int64), + (Int64.eq x y) = (Int64.eq y x). +Proof. + intros. + unfold Int64.eq. + unfold zeq. + destruct (Z.eq_dec _ _); destruct (Z.eq_dec _ _); congruence. +Qed. + +Lemma select_same_lessdef: + forall ty c v, + Val.lessdef (Val.select c v v ty) v. +Proof. + intros. + unfold Val.select. + destruct c; try econstructor. + replace (if b then v else v) with v by (destruct b ; trivial). + destruct v; destruct ty; simpl; econstructor. +Qed. + +Lemma if_neg : forall X, + forall a, + forall b c : X, + (if (negb a) then b else c) = (if a then c else b). +Proof. + destruct a; reflexivity. +Qed. + +Lemma int_ltu_to_neq: + forall x, + Int.ltu Int.zero x = negb (Int.eq x Int.zero). +Proof. + intros. + unfold Int.ltu, Int.eq. + change (Int.unsigned Int.zero) with 0. + pose proof (Int.unsigned_range x) as RANGE. + unfold zlt, zeq. + destruct (Z_lt_dec _ _); destruct (Z.eq_dec _ _); trivial; lia. +Qed. + +Lemma int64_ltu_to_neq: + forall x, + Int64.ltu Int64.zero x = negb (Int64.eq x Int64.zero). +Proof. + intros. + unfold Int64.ltu, Int64.eq. + change (Int64.unsigned Int64.zero) with 0. + pose proof (Int64.unsigned_range x) as RANGE. + unfold zlt, zeq. + destruct (Z_lt_dec _ _); destruct (Z.eq_dec _ _); trivial; lia. +Qed. + +Ltac splitall := repeat match goal with |- _ /\ _ => split end. + +Lemma transl_op_correct: + forall op args res k (rs: regset) m v c, + transl_op op args res k = OK c -> + eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ Val.lessdef v rs'#(preg_of res) + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. +Proof. + assert (SAME: forall v1 v2, v1 = v2 -> Val.lessdef v2 v1). { intros; subst; auto. } +Opaque Int.eq. + intros until c; intros TR EV. + unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl. +- (* Omove *) + destruct (preg_of res), (preg_of m0); inv TR; TranslOpSimpl. +- (* Oaddrsymbol *) + destruct (Archi.pic_code tt && negb (Ptrofs.eq ofs Ptrofs.zero)). ++ set (rs1 := (rs#x <- (Genv.symbol_address ge id Ptrofs.zero))). + exploit (addptrofs_correct x x ofs (basics_to_code k) rs1 m); eauto with asmgen. + intros (rs2 & A & B & C). + exists rs2; split. + apply exec_straight_step with rs1 m; auto. + split. replace ofs with (Ptrofs.add Ptrofs.zero ofs) by (apply Ptrofs.add_zero_l). + rewrite Genv.shift_symbol_address. + replace (rs1 x) with (Genv.symbol_address ge id Ptrofs.zero) in B by (unfold rs1; Simpl). + exact B. + intros. rewrite C by eauto with asmgen. unfold rs1; Simpl. ++ TranslOpSimpl. +- (* Oaddrstack *) + exploit addptrofs_correct. instantiate (1 := SP); auto with asmgen. intros (rs' & A & B & C). + exists rs'; split; eauto. auto with asmgen. +- (* Ocast8signed *) + econstructor; split. + eapply exec_straight_two. simpl;eauto. simpl;eauto. + repeat split; intros; simpl; Simpl. + assert (A: Int.ltu (Int.repr 24) Int.iwordsize = true) by auto. + destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A. + apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. compute; intuition congruence. +- (* Ocast16signed *) + econstructor; split. + eapply exec_straight_two. simpl;eauto. simpl;eauto. + repeat split; intros; Simpl. + assert (A: Int.ltu (Int.repr 16) Int.iwordsize = true) by auto. + destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A. + apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. compute; intuition congruence. +- (* Oshrximm *) + econstructor; split. + + apply exec_straight_one. simpl. eauto. + + repeat split. + * rewrite Pregmap.gss. + destruct (rs x0); simpl; trivial. + unfold Val.maketotal. + destruct (Int.ltu _ _); simpl; trivial. + * intros. + rewrite Pregmap.gso; trivial. +- (* Oshrxlimm *) + econstructor; split. + + apply exec_straight_one. simpl. eauto. + + repeat split. + * rewrite Pregmap.gss. + destruct (rs x0); simpl; trivial. + unfold Val.maketotal. + destruct (Int.ltu _ _); simpl; trivial. + * intros. + rewrite Pregmap.gso; trivial. + +- (* Ocmp *) + exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C). + exists rs'; repeat split; eauto with asmgen. + +- (* Osel *) + unfold conditional_move in *. + destruct (ireg_eq _ _). + { + subst x. inv EQ2. + econstructor; split. + { + apply exec_straight_one. + simpl. reflexivity. + } + split. + { apply select_same_lessdef. } + intros; trivial. + } + + destruct c0; simpl in *. + + all: destruct c. + all: simpl in *. + all: inv EQ2. + all: econstructor; splitall. + all: try apply exec_straight_one. + all: intros; simpl; trivial. + all: unfold Val.select, cmove, cmoveu; simpl. + all: destruct (rs x1); simpl; trivial. + all: try rewrite int_ltu_to_neq. + all: try rewrite int64_ltu_to_neq. + all: try change (Int64.eq Int64.zero Int64.zero) with true. + all: try destruct Archi.ptr64. + all: try rewrite Pregmap.gss. + all: repeat rewrite if_neg. + all: simpl. + all: try destruct (_ || _). + all: try apply Val.lessdef_normalize. + all: trivial. (* no more lessdef *) + all: apply Pregmap.gso; congruence. + +- (* Oselimm *) + unfold conditional_move_imm32 in *. + destruct c0; simpl in *. + + all: destruct c. + all: simpl in *. + all: inv EQ0. + all: econstructor; splitall. + all: try apply exec_straight_one. + all: intros; simpl; trivial. + all: unfold Val.select, cmove, cmoveu; simpl. + all: destruct (rs x0); simpl; trivial. + all: try rewrite int_ltu_to_neq. + all: try rewrite int64_ltu_to_neq. + all: try change (Int64.eq Int64.zero Int64.zero) with true. + all: try destruct Archi.ptr64. + all: try rewrite Pregmap.gss. + all: repeat rewrite if_neg. + all: simpl. + all: try destruct (_ || _). + all: try apply Val.lessdef_normalize. + all: trivial. (* no more lessdef *) + all: apply Pregmap.gso; congruence. + +- (* Osellimm *) + unfold conditional_move_imm64 in *. + destruct c0; simpl in *. + + all: destruct c. + all: simpl in *. + all: inv EQ0. + all: econstructor; splitall. + all: try apply exec_straight_one. + all: intros; simpl; trivial. + all: unfold Val.select, cmove, cmoveu; simpl. + all: destruct (rs x0); simpl; trivial. + all: try rewrite int_ltu_to_neq. + all: try rewrite int64_ltu_to_neq. + all: try change (Int64.eq Int64.zero Int64.zero) with true. + all: try destruct Archi.ptr64. + all: try rewrite Pregmap.gss. + all: repeat rewrite if_neg. + all: simpl. + all: try destruct (_ || _). + all: try apply Val.lessdef_normalize. + all: trivial. (* no more lessdef *) + all: apply Pregmap.gso; congruence. +Qed. + +(** Memory accesses *) + +Lemma indexed_memory_access_correct: + forall mk_instr base ofs k rs m, + exists base' ofs' rs' ptr', + exec_straight_opt (indexed_memory_access mk_instr base ofs ::g k) rs m + (mk_instr base' ofs' ::g k) rs' m + /\ eval_offset ofs' = OK ptr' + /\ Val.offset_ptr rs'#base' ptr' = Val.offset_ptr rs#base ofs + /\ forall r, r <> PC -> rs'#r = rs#r. +Proof. + unfold indexed_memory_access; intros. + (* destruct Archi.ptr64 eqn:SF. *) + assert (Archi.ptr64 = true) as SF; auto. +- generalize (make_immed64_sound (Ptrofs.to_int64 ofs)); intros EQ. + destruct (make_immed64 (Ptrofs.to_int64 ofs)). ++ econstructor; econstructor; econstructor; econstructor; split. + apply exec_straight_opt_refl. + split; auto. simpl. subst imm. rewrite Ptrofs.of_int64_to_int64 by auto. auto. +Qed. + + +Lemma indexed_load_access_correct: + forall trap chunk (mk_instr: ireg -> offset -> basic) rd m, + (forall base ofs rs, + exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset trap chunk rs m rd base ofs) -> + forall (base: ireg) ofs k (rs: regset) v, + Mem.loadv chunk m (Val.offset_ptr rs#base ofs) = Some v -> + exists rs', + exec_straight ge (indexed_memory_access mk_instr base ofs ::g k) rs m k rs' m + /\ rs'#rd = v + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + intros until m; intros EXEC; intros until v; intros LOAD. + exploit indexed_memory_access_correct; eauto. + intros (base' & ofs' & rs' & ptr' & A & PtrEq & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. rewrite EXEC. + unfold exec_load_offset. unfold parexec_load_offset. rewrite PtrEq. rewrite B, LOAD. eauto. Simpl. + split; intros; Simpl. auto. +Qed. + +Lemma indexed_store_access_correct: + forall chunk (mk_instr: ireg -> offset -> basic) r1 m, + (forall base ofs rs, + exec_basic_instr ge (mk_instr base ofs) rs m = exec_store_offset chunk rs m r1 base ofs) -> + forall (base: ireg) ofs k (rs: regset) m', + Mem.storev chunk m (Val.offset_ptr rs#base ofs) (rs#r1) = Some m' -> + exists rs', + exec_straight ge (indexed_memory_access mk_instr base ofs ::g k) rs m k rs' m' + /\ forall r, r <> PC -> rs'#r = rs#r. +Proof. + intros until m; intros EXEC; intros until m'; intros STORE. + exploit indexed_memory_access_correct. (* instantiate (1 := base). eauto. *) + intros (base' & ofs' & rs' & ptr' & A & PtrEq & B & C). + econstructor; split. + eapply exec_straight_opt_right. eapply A. apply exec_straight_one. rewrite EXEC. + unfold exec_store_offset. unfold parexec_store_offset. rewrite PtrEq. rewrite B, C, STORE. + eauto. + discriminate. + auto. +Qed. + +Lemma loadind_correct: + forall (base: ireg) ofs ty dst k c (rs: regset) m v, + loadind base ofs ty dst k = OK c -> + Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ rs'#(preg_of dst) = v + /\ forall r, r <> PC -> r <> preg_of dst -> rs'#r = rs#r. +Proof. + intros until v; intros TR LOAD. + assert (A: exists mk_instr rd, + preg_of dst = IR rd + /\ c = indexed_memory_access mk_instr base ofs :: k + /\ forall base' ofs' rs', + exec_basic_instr ge (mk_instr base' ofs') rs' m = + exec_load_offset TRAP (chunk_of_type ty) rs' m rd base' ofs'). + { unfold loadind in TR. + destruct ty, (preg_of dst); inv TR; econstructor; esplit; eauto. } + destruct A as (mk_instr & rd & rdEq & B & C). subst c. rewrite rdEq. + eapply indexed_load_access_correct; eauto with asmgen. +Qed. + +Lemma storeind_correct: + forall (base: ireg) ofs ty src k c (rs: regset) m m', + storeind src base ofs ty k = OK c -> + Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m' + /\ forall r, r <> PC -> rs'#r = rs#r. +Proof. + intros until m'; intros TR STORE. + assert (A: exists mk_instr rr, + preg_of src = IR rr + /\ c = indexed_memory_access mk_instr base ofs :: k + /\ forall base' ofs' rs', + exec_basic_instr ge (mk_instr base' ofs') rs' m = + exec_store_offset (chunk_of_type ty) rs' m rr base' ofs'). + { unfold storeind in TR. destruct ty, (preg_of src); inv TR; econstructor; esplit; eauto. } + destruct A as (mk_instr & rr & rsEq & B & C). subst c. + eapply indexed_store_access_correct; eauto with asmgen. + congruence. +Qed. + +Ltac bsimpl := unfold exec_bblock; simpl. + +Lemma Pget_correct: + forall (dst: gpreg) (src: preg) k (rs: regset) m, + src = RA -> + exists rs', + exec_straight ge (Pget dst src ::g k) rs m k rs' m + /\ rs'#dst = rs#src + /\ forall r, r <> PC -> r <> dst -> rs'#r = rs#r. +Proof. + intros. econstructor; econstructor; econstructor. +- rewrite H. bsimpl. auto. +- Simpl. +- intros. Simpl. +Qed. + +Lemma Pset_correct: + forall (dst: preg) (src: gpreg) k (rs: regset) m, + dst = RA -> + exists rs', + exec_straight ge (Pset dst src ::g k) rs m k rs' m + /\ rs'#dst = rs#src + /\ forall r, r <> PC -> r <> dst -> rs'#r = rs#r. +Proof. + intros. econstructor; econstructor; econstructor; simpl. + rewrite H. auto. + Simpl. + Simpl. + intros. rewrite H. Simpl. +Qed. + +Lemma loadind_ptr_correct: + forall (base: ireg) ofs (dst: ireg) k (rs: regset) m v, + Mem.loadv Mptr m (Val.offset_ptr rs#base ofs) = Some v -> + exists rs', + exec_straight ge (loadind_ptr base ofs dst ::g k) rs m k rs' m + /\ rs'#dst = v + /\ forall r, r <> PC -> r <> dst -> rs'#r = rs#r. +Proof. + intros. eapply indexed_load_access_correct; eauto with asmgen. + intros. unfold Mptr. assert (Archi.ptr64 = true). auto. rewrite H0. + instantiate (1 := TRAP). + auto. +Qed. + +Lemma storeind_ptr_correct: + forall (base: ireg) ofs (src: ireg) k (rs: regset) m m', + Mem.storev Mptr m (Val.offset_ptr rs#base ofs) rs#src = Some m' -> + exists rs', + exec_straight ge (storeind_ptr src base ofs ::g k) rs m k rs' m' + /\ forall r, r <> PC -> rs'#r = rs#r. +Proof. + intros. eapply indexed_store_access_correct with (r1 := src); eauto with asmgen. + intros. unfold Mptr. assert (Archi.ptr64 = true); auto. +Qed. + +Lemma transl_memory_access_correct: + forall mk_instr addr args k c (rs: regset) m v, + transl_memory_access mk_instr addr args k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v -> + exists base ofs rs' ptr, + exec_straight_opt (basics_to_code c) rs m (mk_instr base ofs ::g (basics_to_code k)) rs' m + /\ eval_offset ofs = OK ptr + /\ Val.offset_ptr rs'#base ptr = v + /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros until v; intros TR EV. + unfold transl_memory_access in TR; destruct addr; ArgsInv. +- (* indexed *) + inv EV. exploit indexed_memory_access_correct; eauto. intros (base' & ofs' & rs' & ptr' & EOPT & EVALOFF & VALOFF & RSEQ). + eexists; eexists; eexists; eexists. split; try split; try split. + eapply EOPT. unfold eval_offset in EVALOFF. inv EVALOFF. eauto. + { intros. destruct r; rewrite RSEQ; auto. } +- (* global *) + simpl in EV. inv EV. inv TR. econstructor; econstructor; econstructor; econstructor; split. + constructor. apply exec_straight_one. simpl; eauto. auto. + split; split; intros; Simpl. + assert (Val.lessdef (Val.offset_ptr (Genv.symbol_address ge i i0) Ptrofs.zero) (Genv.symbol_address ge i i0)). + { apply Val.offset_ptr_zero. } + remember (Genv.symbol_address ge i i0) as symbol. + destruct symbol; auto. + + contradict Heqsymbol; unfold Genv.symbol_address. + destruct (Genv.find_symbol ge i); discriminate. + + contradict Heqsymbol; unfold Genv.symbol_address; + destruct (Genv.find_symbol ge i); discriminate. + + contradict Heqsymbol; unfold Genv.symbol_address; + destruct (Genv.find_symbol ge i); discriminate. + + contradict Heqsymbol; unfold Genv.symbol_address; + destruct (Genv.find_symbol ge i); discriminate. + + simpl. rewrite Ptrofs.add_zero; auto. +- (* stack *) + inv TR. inv EV. + exploit indexed_memory_access_correct; eauto. intros (base' & ofs' & rs' & ptr' & EOPT & EVALOFF & VALOFF & RSEQ). + eexists; eexists; eexists; eexists. split; try split; try split. + eapply EOPT. unfold eval_offset in EVALOFF. inv EVALOFF. eauto. + { intros. destruct r; rewrite RSEQ; auto. } +Qed. + +Lemma transl_memory_access2_correct: + forall mk_instr addr args k c (rs: regset) m v, + transl_memory_access2 mk_instr addr args k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v -> + exists base ro mro mr1 rs', + args = mr1 :: mro :: nil + /\ ireg_of mro = OK ro + /\ exec_straight_opt (basics_to_code c) rs m (mk_instr base ro ::g (basics_to_code k)) rs' m + /\ Val.addl rs'#base rs'#ro = v + /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros until v; intros TR EV. + unfold transl_memory_access2 in TR; destruct addr; ArgsInv. + inv EV. repeat eexists. eassumption. econstructor; eauto. +Qed. + +Lemma transl_memory_access2XS_correct: + forall chunk mk_instr (scale : Z) args k c (rs: regset) m v, + transl_memory_access2XS chunk mk_instr scale args k = OK c -> + eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v -> + exists base ro mro mr1 rs', + args = mr1 :: mro :: nil + /\ ireg_of mro = OK ro + /\ exec_straight_opt (basics_to_code c) rs m (mk_instr base ro ::g (basics_to_code k)) rs' m + /\ Val.addl rs'#base (Val.shll rs'#ro (Vint (Int.repr scale))) = v + /\ (forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r) + /\ scale = (zscale_of_chunk chunk). +Proof. + intros until v; intros TR EV. + unfold transl_memory_access2XS in TR; ArgsInv. + inv EV. repeat eexists. eassumption. econstructor; eauto. + symmetry. + apply Z.eqb_eq. + assumption. +Qed. + +Lemma transl_load_access2_correct: + forall trap chunk (mk_instr: ireg -> ireg -> basic) addr args k c rd (rs: regset) m v mro mr1 ro v', + args = mr1 :: mro :: nil -> + ireg_of mro = OK ro -> + (forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg trap chunk rs m rd base ro) -> + transl_memory_access2 mk_instr addr args k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v -> + Mem.loadv chunk m v = Some v' -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ rs'#rd = v' + /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. +Proof. + intros until v'; intros ARGS IREGE INSTR TR EV LOAD. + exploit transl_memory_access2_correct; eauto. + intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS. + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2. + rewrite INSTR. unfold exec_load_reg. unfold parexec_load_reg. rewrite B, LOAD. reflexivity. Simpl. + split; intros; Simpl. auto. +Qed. + +Lemma transl_load_access2_correct_notrap2: + forall chunk (mk_instr: ireg -> ireg -> basic) addr args k c rd (rs: regset) m v mro mr1 ro, + args = mr1 :: mro :: nil -> + ireg_of mro = OK ro -> + (forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg NOTRAP chunk rs m rd base ro) -> + transl_memory_access2 mk_instr addr args k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v -> + Mem.loadv chunk m v = None -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ rs'#rd = Vundef + /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. +Proof. + intros until ro; intros ARGS IREGE INSTR TR EV LOAD. + exploit transl_memory_access2_correct; eauto. + intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS. + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2. + rewrite INSTR. unfold exec_load_reg. unfold parexec_load_reg. rewrite B, LOAD. reflexivity. Simpl. + split; intros; Simpl. auto. +Qed. + +Lemma transl_load_access2XS_correct: + forall trap chunk (mk_instr: ireg -> ireg -> basic) (scale : Z) args k c rd (rs: regset) m v mro mr1 ro v', + args = mr1 :: mro :: nil -> + ireg_of mro = OK ro -> + (forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs trap chunk rs m rd base ro) -> + transl_memory_access2XS chunk mk_instr scale args k = OK c -> + eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v -> + Mem.loadv chunk m v = Some v' -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ rs'#rd = v' + /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. +Proof. + intros until v'; intros ARGS IREGE INSTR TR EV LOAD. + exploit transl_memory_access2XS_correct; eauto. + intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C & D). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS. + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2. + rewrite INSTR. unfold exec_load_regxs. unfold parexec_load_regxs. + unfold scale_of_chunk. + subst scale. + rewrite B, LOAD. reflexivity. Simpl. + split. trivial. intros. Simpl. +Qed. + +Lemma transl_load_access2XS_correct_notrap2: + forall chunk (mk_instr: ireg -> ireg -> basic) (scale : Z) args k c rd (rs: regset) m v mro mr1 ro, + args = mr1 :: mro :: nil -> + ireg_of mro = OK ro -> + (forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs NOTRAP chunk rs m rd base ro) -> + transl_memory_access2XS chunk mk_instr scale args k = OK c -> + eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v -> + Mem.loadv chunk m v = None -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ rs'#rd = Vundef + /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. +Proof. + intros until ro; intros ARGS IREGE INSTR TR EV LOAD. + exploit transl_memory_access2XS_correct; eauto. + intros (base & ro2 & mro2 & mr2 & rs' & ARGSS & IREGEQ & A & B & C & D). rewrite ARGSS in ARGS. inversion ARGS. subst mr2 mro2. clear ARGS. + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2. + rewrite INSTR. unfold exec_load_regxs. unfold parexec_load_regxs. + unfold scale_of_chunk. + subst scale. + rewrite B, LOAD. reflexivity. Simpl. + split. trivial. intros. Simpl. +Qed. + +Lemma transl_load_access_correct: + forall trap chunk (mk_instr: ireg -> offset -> basic) addr args k c rd (rs: regset) m v v', + (forall base ofs rs, + exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset trap chunk rs m rd base ofs) -> + transl_memory_access mk_instr addr args k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v -> + Mem.loadv chunk m v = Some v' -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ rs'#rd = v' + /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. +Proof. + intros until v'; intros INSTR TR EV LOAD. + exploit transl_memory_access_correct; eauto. + intros (base & ofs & rs' & ptr & A & PtrEq & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. + rewrite INSTR. unfold exec_load_offset. unfold parexec_load_offset. rewrite PtrEq, B, LOAD. reflexivity. Simpl. + split; intros; Simpl. auto. +Qed. + +Lemma transl_load_access_correct_notrap2: + forall chunk (mk_instr: ireg -> offset -> basic) addr args k c rd (rs: regset) m v, + (forall base ofs rs, + exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset NOTRAP chunk rs m rd base ofs) -> + transl_memory_access mk_instr addr args k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v -> + Mem.loadv chunk m v = None -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ rs'#rd = Vundef + /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. +Proof. + intros until v; intros INSTR TR EV LOAD. + exploit transl_memory_access_correct; eauto. + intros (base & ofs & rs' & ptr & A & PtrEq & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. + rewrite INSTR. unfold exec_load_offset. unfold parexec_load_offset. rewrite PtrEq, B, LOAD. reflexivity. Simpl. + split. trivial. intros. Simpl. +Qed. + +Lemma transl_load_memory_access_ok: + forall addr trap chunk args dst k c rs a v m, + (match addr with Aindexed2XS _ | Aindexed2 => False | _ => True end) -> + transl_load trap chunk addr args dst k = OK c -> + eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a -> + Mem.loadv chunk m a = Some v -> + exists mk_instr rd, + preg_of dst = IR rd + /\ transl_memory_access mk_instr addr args k = OK c + /\ forall base ofs rs, + exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset trap chunk rs m rd base ofs. +Proof. + intros until m. intros ADDR TR ? ?. + unfold transl_load in TR. destruct addr; try contradiction. + - monadInv TR. destruct chunk; ArgsInv; econstructor; (esplit; eauto). + - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split; + [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity + | eauto ]. + - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split; + [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity + | eauto ]. +Qed. + +Lemma transl_load_memory_access_ok_notrap2: + forall addr chunk args dst k c rs a m, + (match addr with Aindexed2XS _ | Aindexed2 => False | _ => True end) -> + transl_load NOTRAP chunk addr args dst k = OK c -> + eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a -> + Mem.loadv chunk m a = None -> + exists mk_instr rd, + preg_of dst = IR rd + /\ transl_memory_access mk_instr addr args k = OK c + /\ forall base ofs rs, + exec_basic_instr ge (mk_instr base ofs) rs m = exec_load_offset NOTRAP chunk rs m rd base ofs. +Proof. + intros until m. intros ADDR TR ? ?. + unfold transl_load in TR. destruct addr; try contradiction. + - monadInv TR. destruct chunk; ArgsInv; econstructor; (esplit; eauto). + - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split; + [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity + | eauto ]. + - monadInv TR. destruct chunk. all: ArgsInv; destruct args; try discriminate; monadInv EQ0; eexists; eexists; split; try split; + [ instantiate (1 := (PLoadRRO _ _ x)); simpl; reflexivity + | eauto ]. +Qed. + +Lemma transl_load_memory_access2_ok: + forall trap chunk args dst k c rs a v m, + transl_load trap chunk Aindexed2 args dst k = OK c -> + eval_addressing ge (rs (IR SP)) Aindexed2 (map rs (map preg_of args)) = Some a -> + Mem.loadv chunk m a = Some v -> + exists mk_instr mr0 mro rd ro, + args = mr0 :: mro :: nil + /\ preg_of dst = IR rd + /\ preg_of mro = IR ro + /\ transl_memory_access2 mk_instr Aindexed2 args k = OK c + /\ forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg trap chunk rs m rd base ro. +Proof. + intros until m. intros TR ? ?. + unfold transl_load in TR. subst. monadInv TR. destruct chunk. all: + unfold transl_memory_access2 in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists; + [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity + | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRR _ _ x)); simpl; reflexivity + | eauto]. +Qed. + + +Lemma transl_load_memory_access2_ok_notrap2: + forall chunk args dst k c rs a m, + transl_load NOTRAP chunk Aindexed2 args dst k = OK c -> + eval_addressing ge (rs (IR SP)) Aindexed2 (map rs (map preg_of args)) = Some a -> + Mem.loadv chunk m a = None -> + exists mk_instr mr0 mro rd ro, + args = mr0 :: mro :: nil + /\ preg_of dst = IR rd + /\ preg_of mro = IR ro + /\ transl_memory_access2 mk_instr Aindexed2 args k = OK c + /\ forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_load_reg NOTRAP chunk rs m rd base ro. +Proof. + intros until m. intros TR ? ?. + unfold transl_load in TR. subst. monadInv TR. destruct chunk. all: + unfold transl_memory_access2 in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists; + [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity + | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRR _ _ x)); simpl; reflexivity + | eauto]. +Qed. + +Lemma transl_load_memory_access2XS_ok: + forall scale trap chunk args dst k c rs a v m, + transl_load trap chunk (Aindexed2XS scale) args dst k = OK c -> + eval_addressing ge (rs (IR SP)) (Aindexed2XS scale) (map rs (map preg_of args)) = Some a -> + Mem.loadv chunk m a = Some v -> + exists mk_instr mr0 mro rd ro, + args = mr0 :: mro :: nil + /\ preg_of dst = IR rd + /\ preg_of mro = IR ro + /\ transl_memory_access2XS chunk mk_instr scale args k = OK c + /\ forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs trap chunk rs m rd base ro. +Proof. + intros until m. intros TR ? ?. + unfold transl_load in TR. subst. monadInv TR. destruct chunk. all: + unfold transl_memory_access2XS in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists; + [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity + | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRRXS _ _ x)); simpl; rewrite Heqb; eauto + | eauto]. +Qed. + + +Lemma transl_load_memory_access2XS_ok_notrap2: + forall scale chunk args dst k c rs a m, + transl_load NOTRAP chunk (Aindexed2XS scale) args dst k = OK c -> + eval_addressing ge (rs (IR SP)) (Aindexed2XS scale) (map rs (map preg_of args)) = Some a -> + Mem.loadv chunk m a = None -> + exists mk_instr mr0 mro rd ro, + args = mr0 :: mro :: nil + /\ preg_of dst = IR rd + /\ preg_of mro = IR ro + /\ transl_memory_access2XS chunk mk_instr scale args k = OK c + /\ forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_load_regxs NOTRAP chunk rs m rd base ro. +Proof. + intros until m. intros TR ? ?. + unfold transl_load in TR. subst. monadInv TR. destruct chunk. all: + unfold transl_memory_access2XS in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists; + [ unfold ireg_of in EQ0; destruct (preg_of m1); eauto; try discriminate; monadInv EQ0; reflexivity + | rewrite EQ1; rewrite EQ0; simpl; instantiate (1 := (PLoadRRRXS _ _ x)); simpl; rewrite Heqb; eauto + | eauto]. +Qed. + +Lemma transl_load_correct: + forall trap chunk addr args dst k c (rs: regset) m a v, + transl_load trap chunk addr args dst k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a -> + Mem.loadv chunk m a = Some v -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ rs'#(preg_of dst) = v + /\ forall r, r <> PC -> r <> RTMP -> r <> preg_of dst -> rs'#r = rs#r. +Proof. + intros until v; intros TR EV LOAD. destruct addr. + - exploit transl_load_memory_access2XS_ok; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C). + rewrite rdEq. eapply transl_load_access2XS_correct; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity. + - exploit transl_load_memory_access2_ok; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C). + rewrite rdEq. eapply transl_load_access2_correct; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity. + - exploit transl_load_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity). + intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq; + eapply transl_load_access_correct; eauto with asmgen. + - exploit transl_load_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity). + intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq; + eapply transl_load_access_correct; eauto with asmgen. + - exploit transl_load_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity). + intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq; + eapply transl_load_access_correct; eauto with asmgen. +Qed. + +Lemma transl_load_correct_notrap2: + forall chunk addr args dst k c (rs: regset) m a, + transl_load NOTRAP chunk addr args dst k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a -> + Mem.loadv chunk m a = None -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m + /\ rs'#(preg_of dst) = Vundef + /\ forall r, r <> PC -> r <> RTMP -> r <> preg_of dst -> rs'#r = rs#r. +Proof. + intros until a; intros TR EV LOAD. destruct addr. + - exploit transl_load_memory_access2XS_ok_notrap2; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C). + rewrite rdEq. eapply transl_load_access2XS_correct_notrap2; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity. + - exploit transl_load_memory_access2_ok_notrap2; eauto. intros (mk_instr & mr0 & mro & rd & ro & argsEq & rdEq & roEq & B & C). + rewrite rdEq. eapply transl_load_access2_correct_notrap2; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity. + - exploit transl_load_memory_access_ok_notrap2; eauto; try discriminate; try (simpl; reflexivity). + intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq; + eapply transl_load_access_correct_notrap2; eauto with asmgen. + - exploit transl_load_memory_access_ok_notrap2; eauto; try discriminate; try (simpl; reflexivity). + intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq; + eapply transl_load_access_correct_notrap2; eauto with asmgen. + - exploit transl_load_memory_access_ok_notrap2; eauto; try discriminate; try (simpl; reflexivity). + intros A; destruct A as (mk_instr & rd & rdEq & B & C); rewrite rdEq; + eapply transl_load_access_correct_notrap2; eauto with asmgen. +Qed. + +Lemma transl_store_access2_correct: + forall chunk (mk_instr: ireg -> ireg -> basic) addr args k c r1 (rs: regset) m v mr1 mro ro m', + args = mr1 :: mro :: nil -> + ireg_of mro = OK ro -> + (forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_store_reg chunk rs m r1 base ro) -> + transl_memory_access2 mk_instr addr args k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v -> + Mem.storev chunk m v rs#r1 = Some m' -> + r1 <> RTMP -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m' + /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros until m'; intros ARGS IREG INSTR TR EV STORE NOT31. + exploit transl_memory_access2_correct; eauto. + intros (base & ro2 & mr2 & mro2 & rs' & ARGSS & IREGG & A & B & C). rewrite ARGSS in ARGS. inversion ARGS. subst mro2 mr2. clear ARGS. + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2. + rewrite INSTR. unfold exec_store_reg. unfold parexec_store_reg. rewrite B. rewrite C; try discriminate. rewrite STORE. auto. + intro. inv H. contradiction. + auto. +Qed. + +Lemma transl_store_access2XS_correct: + forall chunk (mk_instr: ireg -> ireg -> basic) scale args k c r1 (rs: regset) m v mr1 mro ro m', + args = mr1 :: mro :: nil -> + ireg_of mro = OK ro -> + (forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_store_regxs chunk rs m r1 base ro) -> + transl_memory_access2XS chunk mk_instr scale args k = OK c -> + eval_addressing ge rs#SP (Aindexed2XS scale) (map rs (map preg_of args)) = Some v -> + Mem.storev chunk m v rs#r1 = Some m' -> + r1 <> RTMP -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m' + /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros until m'; intros ARGS IREG INSTR TR EV STORE NOT31. + exploit transl_memory_access2XS_correct; eauto. + intros (base & ro2 & mr2 & mro2 & rs' & ARGSS & IREGG & A & B & C & D). rewrite ARGSS in ARGS. inversion ARGS. subst mro2 mr2. clear ARGS. + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. assert (ro = ro2) by congruence. subst ro2. + rewrite INSTR. unfold exec_store_regxs. unfold parexec_store_regxs. + unfold scale_of_chunk. + subst scale. + rewrite B. rewrite C; try discriminate. rewrite STORE. auto. + intro. inv H. contradiction. + auto. +Qed. + +Lemma transl_store_access_correct: + forall chunk (mk_instr: ireg -> offset -> basic) addr args k c r1 (rs: regset) m v m', + (forall base ofs rs, + exec_basic_instr ge (mk_instr base ofs) rs m = exec_store_offset chunk rs m r1 base ofs) -> + transl_memory_access mk_instr addr args k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some v -> + Mem.storev chunk m v rs#r1 = Some m' -> + r1 <> RTMP -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m' + /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros until m'; intros INSTR TR EV STORE NOT31. + exploit transl_memory_access_correct; eauto. + intros (base & ofs & rs' & ptr & A & PtrEq & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. apply exec_straight_one. + rewrite INSTR. unfold exec_store_offset. unfold parexec_store_offset. rewrite PtrEq, B. rewrite C; try discriminate. rewrite STORE. auto. + intro. inv H. contradiction. + auto. +Qed. + + +Remark exec_store_offset_8_sign rs m x base ofs: + exec_store_offset Mint8unsigned rs m x base ofs = exec_store_offset Mint8signed rs m x base ofs. +Proof. + unfold exec_store_offset. unfold parexec_store_offset. unfold eval_offset; auto. unfold Mem.storev. + destruct (Val.offset_ptr _ _); auto. erewrite <- Mem.store_signed_unsigned_8. reflexivity. +Qed. + +Remark exec_store_offset_16_sign rs m x base ofs: + exec_store_offset Mint16unsigned rs m x base ofs = exec_store_offset Mint16signed rs m x base ofs. +Proof. + unfold exec_store_offset. unfold parexec_store_offset. unfold eval_offset; auto. unfold Mem.storev. + destruct (Val.offset_ptr _ _); auto. erewrite <- Mem.store_signed_unsigned_16. reflexivity. +Qed. + +Lemma transl_store_memory_access_ok: + forall addr chunk args src k c rs a m m', + (match addr with Aindexed2XS _ | Aindexed2 => False | _ => True end) -> + transl_store chunk addr args src k = OK c -> + eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a -> + Mem.storev chunk m a (rs (preg_of src)) = Some m' -> + exists mk_instr chunk' rr, + preg_of src = IR rr + /\ transl_memory_access mk_instr addr args k = OK c + /\ (forall base ofs rs, + exec_basic_instr ge (mk_instr base ofs) rs m = exec_store_offset chunk' rs m rr base ofs) + /\ Mem.storev chunk m a rs#(preg_of src) = Mem.storev chunk' m a rs#(preg_of src). +Proof. + intros until m'. intros ? TR ? ?. + unfold transl_store in TR. destruct addr; try contradiction. + - monadInv TR. destruct chunk. all: + ArgsInv; eexists; eexists; eexists; split; try split; [ + repeat (destruct args; try discriminate); eassumption + | split; eauto; intros; simpl; try reflexivity]. + eapply exec_store_offset_8_sign. + eapply exec_store_offset_16_sign. + - monadInv TR. destruct chunk. all: + ArgsInv; eexists; eexists; eexists; split; try split; + [ repeat (destruct args; try discriminate); instantiate (1 := PStoreRRO _ x); simpl; eassumption + | split; eauto; intros; simpl; try reflexivity]. + eapply exec_store_offset_8_sign. + eapply exec_store_offset_16_sign. + - monadInv TR. destruct chunk. all: + ArgsInv; eexists; eexists; eexists; split; try split; + [ repeat (destruct args; try discriminate); instantiate (1 := PStoreRRO _ x); simpl; eassumption + | split; eauto; intros; simpl; try reflexivity]. + eapply exec_store_offset_8_sign. + eapply exec_store_offset_16_sign. +Qed. + +Remark exec_store_reg_8_sign rs m x base ofs: + exec_store_reg Mint8unsigned rs m x base ofs = exec_store_reg Mint8signed rs m x base ofs. +Proof. + unfold exec_store_reg. unfold parexec_store_reg. unfold Mem.storev. destruct (Val.addl _ _); auto. + erewrite <- Mem.store_signed_unsigned_8. reflexivity. +Qed. + +Remark exec_store_reg_16_sign rs m x base ofs: + exec_store_reg Mint16unsigned rs m x base ofs = exec_store_reg Mint16signed rs m x base ofs. +Proof. + unfold exec_store_reg. unfold parexec_store_reg. unfold Mem.storev. destruct (Val.addl _ _); auto. + erewrite <- Mem.store_signed_unsigned_16. reflexivity. +Qed. + +Remark exec_store_regxs_8_sign rs m x base ofs: + exec_store_regxs Mint8unsigned rs m x base ofs = exec_store_regxs Mint8signed rs m x base ofs. +Proof. + unfold exec_store_regxs. unfold parexec_store_regxs. unfold Mem.storev. destruct (Val.addl _ _); auto. + erewrite <- Mem.store_signed_unsigned_8. reflexivity. +Qed. + +Remark exec_store_regxs_16_sign rs m x base ofs: + exec_store_regxs Mint16unsigned rs m x base ofs = exec_store_regxs Mint16signed rs m x base ofs. +Proof. + unfold exec_store_regxs. unfold parexec_store_regxs. unfold Mem.storev. destruct (Val.addl _ _); auto. + erewrite <- Mem.store_signed_unsigned_16. reflexivity. +Qed. + +Lemma transl_store_memory_access2_ok: + forall addr chunk args src k c rs a m m', + addr = Aindexed2 -> + transl_store chunk addr args src k = OK c -> + eval_addressing ge (rs (IR SP)) addr (map rs (map preg_of args)) = Some a -> + Mem.storev chunk m a (rs (preg_of src)) = Some m' -> + exists mk_instr chunk' rr mr0 mro ro, + args = mr0 :: mro :: nil + /\ preg_of mro = IR ro + /\ preg_of src = IR rr + /\ transl_memory_access2 mk_instr addr args k = OK c + /\ (forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_store_reg chunk' rs m rr base ro) + /\ Mem.storev chunk m a rs#(preg_of src) = Mem.storev chunk' m a rs#(preg_of src). +Proof. + intros until m'. intros ? TR ? ?. + unfold transl_store in TR. subst addr. monadInv TR. destruct chunk. all: + unfold transl_memory_access2 in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists; + [ ArgsInv; reflexivity + | rewrite EQ1; rewrite EQ0; instantiate (1 := (PStoreRRR _ x)); simpl; reflexivity + | eauto ]. + - simpl. intros. eapply exec_store_reg_8_sign. + - simpl. intros. eapply exec_store_reg_16_sign. +Qed. + +Lemma transl_store_memory_access2XS_ok: + forall scale chunk args src k c rs a m m', + transl_store chunk (Aindexed2XS scale) args src k = OK c -> + eval_addressing ge (rs (IR SP)) (Aindexed2XS scale) (map rs (map preg_of args)) = Some a -> + Mem.storev chunk m a (rs (preg_of src)) = Some m' -> + exists mk_instr chunk' rr mr0 mro ro, + args = mr0 :: mro :: nil + /\ preg_of mro = IR ro + /\ preg_of src = IR rr + /\ transl_memory_access2XS chunk' mk_instr scale args k = OK c + /\ (forall base rs, + exec_basic_instr ge (mk_instr base ro) rs m = exec_store_regxs chunk' rs m rr base ro) + /\ Mem.storev chunk m a rs#(preg_of src) = Mem.storev chunk' m a rs#(preg_of src). +Proof. + intros until m'. intros TR ? ?. + unfold transl_store in TR. monadInv TR. destruct chunk. all: + unfold transl_memory_access2XS in EQ0; repeat (destruct args; try discriminate); monadInv EQ0; ArgsInv; repeat eexists; + [ ArgsInv; reflexivity + | rewrite EQ1; rewrite EQ0; instantiate (1 := (PStoreRRRXS _ x)); simpl; rewrite Heqb; eauto + | eauto ]. + - simpl. intros. eapply exec_store_regxs_8_sign. + - simpl. intros. eapply exec_store_regxs_16_sign. +Qed. + +Lemma transl_store_correct: + forall chunk addr args src k c (rs: regset) m a m', + transl_store chunk addr args src k = OK c -> + eval_addressing ge rs#SP addr (map rs (map preg_of args)) = Some a -> + Mem.storev chunk m a rs#(preg_of src) = Some m' -> + exists rs', + exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m' + /\ forall r, r <> PC -> r <> RTMP -> rs'#r = rs#r. +Proof. + intros until m'; intros TR EV STORE. destruct addr. + - exploit transl_store_memory_access2XS_ok; eauto. intros (mk_instr & chunk' & rr & mr0 & mro & ro & argsEq & roEq & srcEq & A & B & C). + eapply transl_store_access2XS_correct; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity. congruence. + destruct rr; try discriminate. destruct src; simpl in srcEq; try discriminate. + - exploit transl_store_memory_access2_ok; eauto. intros (mk_instr & chunk' & rr & mr0 & mro & ro & argsEq & roEq & srcEq & A & B & C). + eapply transl_store_access2_correct; eauto with asmgen. unfold ireg_of. rewrite roEq. reflexivity. congruence. + destruct rr; try discriminate. destruct src; simpl in srcEq; try discriminate. + - exploit transl_store_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity). + intro A; + destruct A as (mk_instr & chunk' & rr & rrEq & B & C & D); + rewrite D in STORE; clear D; + eapply transl_store_access_correct; eauto with asmgen; try congruence; + destruct rr; try discriminate; destruct src; try discriminate. + - exploit transl_store_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity). + intro A; + destruct A as (mk_instr & chunk' & rr & rrEq & B & C & D); + rewrite D in STORE; clear D; + eapply transl_store_access_correct; eauto with asmgen; try congruence; + destruct rr; try discriminate; destruct src; try discriminate. + - exploit transl_store_memory_access_ok; eauto; try discriminate; try (simpl; reflexivity). + intro A; + destruct A as (mk_instr & chunk' & rr & rrEq & B & C & D); + rewrite D in STORE; clear D; + eapply transl_store_access_correct; eauto with asmgen; try congruence; + destruct rr; try discriminate; destruct src; try discriminate. +Qed. + +Lemma make_epilogue_correct: + forall ge0 f m stk soff cs m' ms rs k tm, + Mach.load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) -> + Mach.load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) -> + Mem.free m stk 0 f.(fn_stacksize) = Some m' -> + agree ms (Vptr stk soff) rs -> + Mem.extends m tm -> + match_stack ge0 cs -> + exists rs', exists tm', + exec_straight ge (make_epilogue f k) rs tm k rs' tm' + /\ agree ms (parent_sp cs) rs' + /\ Mem.extends m' tm' + /\ rs'#RA = parent_ra cs + /\ rs'#SP = parent_sp cs + /\ (forall r, r <> PC -> r <> RA -> r <> SP -> r <> RTMP -> r <> GPRA -> rs'#r = rs#r). +Proof. + intros until tm; intros LP LRA FREE AG MEXT MCS. + exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP'). + exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA'). + exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'. + exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'. + exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT'). + unfold make_epilogue. + rewrite chunk_of_Tptr in *. + + exploit ((loadind_ptr_correct SP (fn_retaddr_ofs f) GPRA (Pset RA GPRA ::g Pfreeframe (fn_stacksize f) (fn_link_ofs f) ::g k)) + rs tm). + - rewrite <- (sp_val _ _ rs AG). simpl. eexact LRA'. + - intros (rs1 & A1 & B1 & C1). + assert (agree ms (Vptr stk soff) rs1) as AG1. + + destruct AG. + apply mkagree; auto. + rewrite C1; discriminate || auto. + intro. rewrite C1; auto; destruct r; simpl; try discriminate. + + exploit (Pset_correct RA GPRA (Pfreeframe (fn_stacksize f) (fn_link_ofs f) ::g k) rs1 tm). auto. + intros (rs2 & A2 & B2 & C2). + econstructor; econstructor; split. + * eapply exec_straight_trans. + { eexact A1. } + { eapply exec_straight_trans. + { eapply A2. } + { apply exec_straight_one. simpl. + rewrite (C2 SP) by auto with asmgen. rewrite <- (sp_val _ _ rs1 AG1). simpl; rewrite LP'. + rewrite FREE'. eauto. } } + * split. apply agree_set_other; auto with asmgen. + apply agree_change_sp with (Vptr stk soff). + apply agree_exten with rs; auto. intros; rewrite C2; auto with asmgen. + eapply parent_sp_def; eauto. + split. auto. + split. Simpl. rewrite B2. auto. + split. Simpl. + intros. Simpl. + rewrite C2; auto. +Qed. + +End CONSTRUCTORS. + + diff --git a/kvx/Asmblockprops.v b/kvx/Asmblockprops.v new file mode 100644 index 00000000..a732d29b --- /dev/null +++ b/kvx/Asmblockprops.v @@ -0,0 +1,357 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Common definition and proofs on Asmblock required by various modules *) + +Require Import Coqlib. +Require Import Integers. +Require Import Memory. +Require Import Globalenvs. +Require Import Values. +Require Import Asmblock. +Require Import Axioms. + +Definition bblock_simu (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) := + forall rs m, + exec_bblock ge f bb rs m <> Stuck -> + exec_bblock ge f bb rs m = exec_bblock ge f bb' rs m. + +Hint Extern 2 (_ <> _) => congruence: asmgen. + +Lemma preg_of_data: + forall r, data_preg (preg_of r) = true. +Proof. + intros. destruct r; reflexivity. +Qed. +Hint Resolve preg_of_data: asmgen. + +Lemma data_diff: + forall r r', + data_preg r = true -> data_preg r' = false -> r <> r'. +Proof. + congruence. +Qed. +Hint Resolve data_diff: asmgen. + +Lemma preg_of_not_PC: + forall r, preg_of r <> PC. +Proof. + intros. apply data_diff; auto with asmgen. +Qed. + +Lemma preg_of_not_SP: + forall r, preg_of r <> SP. +Proof. + intros. unfold preg_of; destruct r; cbn; congruence. +Qed. + +Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen. + + +Lemma nextblock_pc: + forall b rs, (nextblock b rs)#PC = Val.offset_ptr rs#PC (Ptrofs.repr (size b)). +Proof. + intros. apply Pregmap.gss. +Qed. + +Lemma nextblock_inv: + forall b r rs, r <> PC -> (nextblock b rs)#r = rs#r. +Proof. + intros. unfold nextblock. apply Pregmap.gso. red; intro; subst. auto. +Qed. + +Lemma nextblock_inv1: + forall b r rs, data_preg r = true -> (nextblock b rs)#r = rs#r. +Proof. + intros. apply nextblock_inv. red; intro; subst; discriminate. +Qed. + +Ltac Simplif := + ((rewrite nextblock_inv by eauto with asmgen) + || (rewrite nextblock_inv1 by eauto with asmgen) + || (rewrite Pregmap.gss) + || (rewrite nextblock_pc) + || (rewrite Pregmap.gso by eauto with asmgen) + ); auto with asmgen. + +Ltac Simpl := repeat Simplif. + +(* For Asmblockgenproof0 *) + +Theorem exec_basic_instr_pc: + forall ge b rs1 m1 rs2 m2, + exec_basic_instr ge b rs1 m1 = Next rs2 m2 -> + rs2 PC = rs1 PC. +Proof. + intros. destruct b; try destruct i; try destruct i. + all: try (inv H; Simpl). + 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail. + + 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail. + + { (* PLoadQRRO *) + unfold parexec_load_q_offset in H1. + destruct (gpreg_q_expand _) as [r0 r1] in H1. + destruct (Mem.loadv _ _ _) in H1; try discriminate. + destruct (Mem.loadv _ _ _) in H1; try discriminate. + inv H1. Simpl. } + { (* PLoadORRO *) + unfold parexec_load_o_offset in H1. + destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1. + destruct (Mem.loadv _ _ _) in H1; try discriminate. + destruct (Mem.loadv _ _ _) in H1; try discriminate. + destruct (Mem.loadv _ _ _) in H1; try discriminate. + destruct (Mem.loadv _ _ _) in H1; try discriminate. + inv H1. Simpl. } + 1-8: unfold parexec_store_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.storev _ _ _); [inv H1; auto | discriminate]; fail. + 1-8: unfold parexec_store_reg in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]; auto; fail. + 1-8: unfold parexec_store_regxs in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]; auto; fail. + + { (* PStoreQRRO *) + unfold parexec_store_q_offset in H1. + destruct (gpreg_q_expand _) as [r0 r1] in H1. + unfold eval_offset in H1; try discriminate. + destruct (Mem.storev _ _ _) in H1; try discriminate. + destruct (Mem.storev _ _ _) in H1; try discriminate. + inv H1. Simpl. reflexivity. } + { (* PStoreORRO *) + unfold parexec_store_o_offset in H1. + destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1. + unfold eval_offset in H1; try discriminate. + destruct (Mem.storev _ _ _) in H1; try discriminate. + destruct (Mem.storev _ _ _) in H1; try discriminate. + destruct (Mem.storev _ _ _) in H1; try discriminate. + destruct (Mem.storev _ _ _) in H1; try discriminate. + inv H1. Simpl. reflexivity. } + - destruct (Mem.alloc _ _ _). destruct (Mem.store _ _ _ _ _). inv H1. Simpl. discriminate. + - destruct (Mem.loadv _ _ _); try discriminate. destruct (rs1 _); try discriminate. + destruct (Mem.free _ _ _ _). inv H1. Simpl. discriminate. + - destruct rs; try discriminate. inv H1. Simpl. + - destruct rd; try discriminate. inv H1; Simpl. + - reflexivity. +Qed. + +(* For PostpassSchedulingproof *) + +Lemma regset_double_set: + forall r1 r2 (rs: regset) v1 v2, + r1 <> r2 -> + (rs # r1 <- v1 # r2 <- v2) = (rs # r2 <- v2 # r1 <- v1). +Proof. + intros. apply functional_extensionality. intros r. destruct (preg_eq r r1). + - subst. rewrite Pregmap.gso; auto. repeat (rewrite Pregmap.gss). auto. + - destruct (preg_eq r r2). + + subst. rewrite Pregmap.gss. rewrite Pregmap.gso; auto. rewrite Pregmap.gss. auto. + + repeat (rewrite Pregmap.gso; auto). +Qed. + +Lemma next_eq: + forall (rs rs': regset) m m', + rs = rs' -> m = m' -> Next rs m = Next rs' m'. +Proof. + intros; apply f_equal2; auto. +Qed. + +Lemma exec_load_offset_pc_var: + forall trap t rs m rd ra ofs rs' m' v, + exec_load_offset trap t rs m rd ra ofs = Next rs' m' -> + exec_load_offset trap t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_load_offset in *. unfold parexec_load_offset in *. rewrite Pregmap.gso; try discriminate. destruct (eval_offset ofs); try discriminate. + destruct (Mem.loadv _ _ _). + - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate. + - unfold parexec_incorrect_load in *. + destruct trap; try discriminate. + inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate. +Qed. + +Lemma exec_load_reg_pc_var: + forall trap t rs m rd ra ro rs' m' v, + exec_load_reg trap t rs m rd ra ro = Next rs' m' -> + exec_load_reg trap t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_load_reg in *. unfold parexec_load_reg in *. rewrite Pregmap.gso; try discriminate. + destruct (Mem.loadv _ _ _). + - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate. + - unfold parexec_incorrect_load in *. + destruct trap; try discriminate. + inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate. +Qed. + +Lemma exec_load_regxs_pc_var: + forall trap t rs m rd ra ro rs' m' v, + exec_load_regxs trap t rs m rd ra ro = Next rs' m' -> + exec_load_regxs trap t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_load_regxs in *. unfold parexec_load_regxs in *. rewrite Pregmap.gso; try discriminate. + destruct (Mem.loadv _ _ _). + - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate. + - unfold parexec_incorrect_load in *. + destruct trap; try discriminate. + inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate. +Qed. + +Lemma exec_load_offset_q_pc_var: + forall rs m rd ra ofs rs' m' v, + exec_load_q_offset rs m rd ra ofs = Next rs' m' -> + exec_load_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_load_q_offset in *. unfold parexec_load_q_offset in *. + destruct (gpreg_q_expand rd) as [rd0 rd1]. + (* destruct (ireg_eq rd0 ra); try discriminate. *) + rewrite Pregmap.gso; try discriminate. + destruct (Mem.loadv _ _ _); try discriminate. + inv H. + destruct (Mem.loadv _ _ _); try discriminate. + inv H1. f_equal. + rewrite (regset_double_set PC rd0) by discriminate. + rewrite (regset_double_set PC rd1) by discriminate. + reflexivity. +Qed. + +Lemma exec_load_offset_o_pc_var: + forall rs m rd ra ofs rs' m' v, + exec_load_o_offset rs m rd ra ofs = Next rs' m' -> + exec_load_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_load_o_offset in *. unfold parexec_load_o_offset in *. + destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3]. +(* + destruct (ireg_eq rd0 ra); try discriminate. + destruct (ireg_eq rd1 ra); try discriminate. + destruct (ireg_eq rd2 ra); try discriminate. +*) + rewrite Pregmap.gso; try discriminate. + cbn in *. + destruct (Mem.loadv _ _ _); try discriminate. + destruct (Mem.loadv _ _ _); try discriminate. + destruct (Mem.loadv _ _ _); try discriminate. + destruct (Mem.loadv _ _ _); try discriminate. + rewrite (regset_double_set PC rd0) by discriminate. + rewrite (regset_double_set PC rd1) by discriminate. + rewrite (regset_double_set PC rd2) by discriminate. + rewrite (regset_double_set PC rd3) by discriminate. + inv H. + trivial. +Qed. + +Lemma exec_store_offset_pc_var: + forall t rs m rd ra ofs rs' m' v, + exec_store_offset t rs m rd ra ofs = Next rs' m' -> + exec_store_offset t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_store_offset in *. unfold parexec_store_offset in *. rewrite Pregmap.gso; try discriminate. + destruct (eval_offset ofs); try discriminate. + destruct (Mem.storev _ _ _). + - inv H. apply next_eq; auto. + - discriminate. +Qed. + +Lemma exec_store_q_offset_pc_var: + forall rs m rd ra ofs rs' m' v, + exec_store_q_offset rs m rd ra ofs = Next rs' m' -> + exec_store_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_store_q_offset in *. unfold parexec_store_q_offset in *. rewrite Pregmap.gso; try discriminate. + cbn in *. + destruct (gpreg_q_expand _) as [s0 s1]. + destruct (Mem.storev _ _ _); try discriminate. + destruct (Mem.storev _ _ _); try discriminate. + inv H. apply next_eq; auto. +Qed. + +Lemma exec_store_o_offset_pc_var: + forall rs m rd ra ofs rs' m' v, + exec_store_o_offset rs m rd ra ofs = Next rs' m' -> + exec_store_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'. +Proof. + intros. + unfold exec_store_o_offset in *. unfold parexec_store_o_offset in *. + destruct (gpreg_o_expand _) as [[[s0 s1] s2] s3]. + destruct (Mem.storev _ _ _); try discriminate. + destruct (Mem.storev _ _ _); try discriminate. + destruct (Mem.storev _ _ _); try discriminate. + destruct (Mem.storev _ _ _); try discriminate. + inv H. + trivial. +Qed. + +Lemma exec_store_reg_pc_var: + forall t rs m rd ra ro rs' m' v, + exec_store_reg t rs m rd ra ro = Next rs' m' -> + exec_store_reg t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_store_reg in *. unfold parexec_store_reg in *. rewrite Pregmap.gso; try discriminate. + destruct (Mem.storev _ _ _). + - inv H. apply next_eq; auto. + - discriminate. +Qed. + +Lemma exec_store_regxs_pc_var: + forall t rs m rd ra ro rs' m' v, + exec_store_regxs t rs m rd ra ro = Next rs' m' -> + exec_store_regxs t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_store_regxs in *. unfold parexec_store_regxs in *. rewrite Pregmap.gso; try discriminate. + destruct (Mem.storev _ _ _). + - inv H. apply next_eq; auto. + - discriminate. +Qed. + +Theorem exec_basic_instr_pc_var: + forall ge i rs m rs' m' v, + exec_basic_instr ge i rs m = Next rs' m' -> + exec_basic_instr ge i (rs # PC <- v) m = Next (rs' # PC <- v) m'. +Proof. + intros. unfold exec_basic_instr in *. unfold bstep in *. destruct i. + - unfold exec_arith_instr in *. destruct i; destruct i. + all: try (exploreInst; inv H; apply next_eq; auto; + apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate). +(* + (* Some cases treated seperately because exploreInst destructs too much *) + all: try (inv H; apply next_eq; auto; apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate). *) + - destruct i. + + exploreInst; apply exec_load_offset_pc_var; auto. + + exploreInst; apply exec_load_reg_pc_var; auto. + + exploreInst; apply exec_load_regxs_pc_var; auto. + + apply exec_load_offset_q_pc_var; auto. + + apply exec_load_offset_o_pc_var; auto. + - destruct i. + + exploreInst; apply exec_store_offset_pc_var; auto. + + exploreInst; apply exec_store_reg_pc_var; auto. + + exploreInst; apply exec_store_regxs_pc_var; auto. + + apply exec_store_q_offset_pc_var; auto. + + apply exec_store_o_offset_pc_var; auto. + - destruct (Mem.alloc _ _ _) as (m1 & stk). repeat (rewrite Pregmap.gso; try discriminate). + destruct (Mem.storev _ _ _ _); try discriminate. + inv H. apply next_eq; auto. apply functional_extensionality. intros. + rewrite (regset_double_set GPR32 PC); try discriminate. + rewrite (regset_double_set GPR12 PC); try discriminate. + rewrite (regset_double_set FP PC); try discriminate. reflexivity. + - repeat (rewrite Pregmap.gso; try discriminate). + destruct (Mem.loadv _ _ _); try discriminate. + destruct (rs GPR12); try discriminate. + destruct (Mem.free _ _ _ _); try discriminate. + inv H. apply next_eq; auto. + rewrite (regset_double_set GPR32 PC). + rewrite (regset_double_set GPR12 PC). reflexivity. + all: discriminate. + - destruct rs0; try discriminate. inv H. apply next_eq; auto. + repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate. + - destruct rd; try discriminate. inv H. apply next_eq; auto. + repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate. + - inv H. apply next_eq; auto. +Qed. + + diff --git a/kvx/Asmexpand.ml b/kvx/Asmexpand.ml new file mode 100644 index 00000000..f84cf22d --- /dev/null +++ b/kvx/Asmexpand.ml @@ -0,0 +1,642 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* Expanding built-ins and some pseudo-instructions by rewriting + of the RISC-V assembly code. *) + +open Asm +open Asmexpandaux +open AST +open Camlcoq + +exception Error of string + +(* Useful constants and helper functions *) + +let _0 = Integers.Int.zero +let _1 = Integers.Int.one +let _2 = coqint_of_camlint 2l +let _4 = coqint_of_camlint 4l +let _8 = coqint_of_camlint 8l +let _16 = coqint_of_camlint 16l +let _m1 = coqint_of_camlint (-1l) + +let wordsize = if Archi.ptr64 then 8 else 4 + +let align n a = (n + a - 1) land (-a) + +let stack_pointer = Asmvliw.GPR12 + +(* Emit instruction sequences that set or offset a register by a constant. *) +(* + let expand_loadimm32 dst n = + List.iter emit (Asmgen.loadimm32 dst n []) +*) +let expand_addptrofs dst src n = + List.iter emit (basic_to_instruction (Asmvliw.PArith (Asmblockgen.addptrofs dst src n)) :: []) +let expand_storeind_ptr src base ofs = + List.iter emit (basic_to_instruction (Asmblockgen.storeind_ptr src base ofs) :: []) +let expand_loadind_ptr dst base ofs = + List.iter emit (basic_to_instruction (Asmblockgen.loadind_ptr base ofs dst) :: []) + +(* Built-ins. They come in two flavors: + - annotation statements: take their arguments in registers or stack + locations; generate no code; + - inlined by the compiler: take their arguments in arbitrary + registers. +*) + +(* Fix-up code around calls to variadic functions. Floating-point arguments + residing in FP registers need to be moved to integer registers. *) + +let int_param_regs = let open Asmvliw in [| GPR0; GPR1; GPR2; GPR3; GPR4; GPR5; GPR6; GPR7; GPR8; GPR9; GPR10; GPR11 |] +(* let float_param_regs = [| F10; F11; F12; F13; F14; F15; F16; F17 |] *) +let float_param_regs = [| |] + +let fixup_variadic_call pos tyl = assert false +(*if pos < 8 then + match tyl with + | [] -> + () + | (Tint | Tany32) :: tyl -> + fixup_variadic_call (pos + 1) tyl + | Tsingle :: tyl -> + let rs =float_param_regs.(pos) + and rd = int_param_regs.(pos) in + emit (Pfmvxs(rd, rs)); + fixup_variadic_call (pos + 1) tyl + | Tlong :: tyl -> + let pos' = if Archi.ptr64 then pos + 1 else align pos 2 + 2 in + fixup_variadic_call pos' tyl + | (Tfloat | Tany64) :: tyl -> + if Archi.ptr64 then begin + let rs = float_param_regs.(pos) + and rd = int_param_regs.(pos) in + emit (Pfmvxd(rd, rs)); + fixup_variadic_call (pos + 1) tyl + end else begin + let pos = align pos 2 in + if pos < 8 then begin + let rs = float_param_regs.(pos) + and rd1 = int_param_regs.(pos) + and rd2 = int_param_regs.(pos + 1) in + emit (Paddiw(X2, X X2, Integers.Int.neg _16)); + emit (Pfsd(rs, X2, Ofsimm _0)); + emit (Plw(rd1, X2, Ofsimm _0)); + emit (Plw(rd2, X2, Ofsimm _4)); + emit (Paddiw(X2, X X2, _16)); + fixup_variadic_call (pos + 2) tyl + end + end +*) + +let fixup_call sg = + if sg.sig_cc.cc_vararg <> None then fixup_variadic_call 0 sg.sig_args + +(* Handling of annotations *) + +let expand_annot_val kind txt targ args res = + emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none)); + match args, res with + | [BA(Asmvliw.IR src)], BR(Asmvliw.IR dst) -> + if dst <> src then emit (Pmv (dst, src)) + | _, _ -> + raise (Error "ill-formed __builtin_annot_val") + +(* Handling of memcpy *) + +let emit_move dst r = + if dst <> r + then emit (Paddil(dst, r, Z.zero));; + +(* FIXME DMonniaux this is probably not complete *) +let get_builtin_arg dst arg = + match arg with + | BA (Asmvliw.IR reg) -> emit_move dst reg + | BA (ireg) -> failwith "get_builtin_arg: BA_int(not ireg)" + | BA_int _ -> failwith "get_builtin_arg: BA_int" + | BA_long _ -> failwith "get_builtin_arg: BA_long" + | BA_float _ -> failwith "get_builtin_arg: BA_float" + | BA_single _ -> failwith "get_builtin_arg: BA_single" + | BA_loadstack _ -> failwith "get_builtin_arg: BA_loadstack" + | BA_addrstack ofs -> emit (Paddil(dst, stack_pointer, ofs)) + | BA_loadglobal _ -> failwith "get_builtin_arg: BA_loadglobal" + | BA_addrglobal _ -> failwith "get_builtin_arg: BA_addrglobal" + | BA_splitlong _ -> failwith "get_builtin_arg: BA_splitlong" + | BA_addptr _ -> failwith "get_builtin_arg: BA_addptr";; + +let smart_memcpy = true + +(* FIXME DMonniaux this is really suboptimal (byte per byte) *) +let expand_builtin_memcpy_big sz al src dst = + assert (sz > Z.zero); + let dstptr = Asmvliw.GPR62 + and srcptr = Asmvliw.GPR63 + and tmpbuf = Asmvliw.GPR61 + and tmpbuf2 = Asmvliw.R60R61 + and caml_sz = camlint64_of_coqint sz in + get_builtin_arg dstptr dst; + get_builtin_arg srcptr src; + let caml_sz_div16 = Int64.shift_right caml_sz 4 + and sixteen = coqint_of_camlint64 16L in + if smart_memcpy + then + let remaining = ref caml_sz + and offset = ref 0L in + let cpy buf size load store = + (if !remaining >= size + then + let zofs = coqint_of_camlint64 !offset in + begin + emit Psemi; + emit (load buf srcptr (AOff zofs)); + emit Psemi; + emit (store buf dstptr (AOff zofs)); + remaining := Int64.sub !remaining size; + offset := Int64.add !offset size + end) in + begin + (if caml_sz_div16 >= 2L + then + begin + emit (Pmake (tmpbuf, (coqint_of_camlint64 caml_sz_div16))); + emit Psemi; + let lbl = new_label() in + emit (Ploopdo (tmpbuf, lbl)); + emit Psemi; + emit (Plq (tmpbuf2, srcptr, AOff Z.zero)); + emit (Paddil (srcptr, srcptr, sixteen)); + emit Psemi; + emit (Psq (tmpbuf2, dstptr, AOff Z.zero)); + emit (Paddil (dstptr, dstptr, sixteen)); + emit Psemi; + emit (Plabel lbl); + remaining := Int64.sub !remaining (Int64.shift_left caml_sz_div16 4) + end); + + cpy tmpbuf2 16L (fun x y z -> Plq(x, y, z)) (fun x y z -> Psq(x, y, z)); + cpy tmpbuf 8L (fun x y z -> Pld(TRAP, x, y, z)) (fun x y z -> Psd(x, y, z)); + cpy tmpbuf 4L (fun x y z -> Plw(TRAP, x, y, z)) (fun x y z -> Psw(x, y, z)); + cpy tmpbuf 2L (fun x y z -> Plh(TRAP, x, y, z)) (fun x y z -> Psh(x, y, z)); + cpy tmpbuf 1L (fun x y z -> Plb(TRAP, x, y, z)) (fun x y z -> Psb(x, y, z)); + assert (!remaining = 0L) + end + else + begin + emit (Pmake (tmpbuf, sz)); + emit Psemi; + let lbl = new_label() in + emit (Ploopdo (tmpbuf, lbl)); + emit Psemi; + emit (Plb (TRAP, tmpbuf, srcptr, AOff Z.zero)); + emit (Paddil (srcptr, srcptr, Z.one)); + emit Psemi; + emit (Psb (tmpbuf, dstptr, AOff Z.zero)); + emit (Paddil (dstptr, dstptr, Z.one)); + emit Psemi; + emit (Plabel lbl); + end;; + +let expand_builtin_memcpy sz al args = + match args with + | [dst; src] -> + expand_builtin_memcpy_big sz al src dst + | _ -> assert false;; + +(* Handling of volatile reads and writes *) +(* FIXME probably need to check for size of displacement *) +let expand_builtin_vload_common chunk base ofs res = + match chunk, res with + | Mint8unsigned, BR(Asmvliw.IR res) -> + emit (Plbu (TRAP, res, base, AOff ofs)) + | Mint8signed, BR(Asmvliw.IR res) -> + emit (Plb (TRAP, res, base, AOff ofs)) + | Mint16unsigned, BR(Asmvliw.IR res) -> + emit (Plhu (TRAP, res, base, AOff ofs)) + | Mint16signed, BR(Asmvliw.IR res) -> + emit (Plh (TRAP, res, base, AOff ofs)) + | Mint32, BR(Asmvliw.IR res) -> + emit (Plw (TRAP, res, base, AOff ofs)) + | Mint64, BR(Asmvliw.IR res) -> + emit (Pld (TRAP, res, base, AOff ofs)) + | Mint64, BR_splitlong(BR(Asmvliw.IR res1), BR(Asmvliw.IR res2)) -> + let ofs' = Integers.Ptrofs.add ofs _4 in + if base <> res2 then begin + emit (Plw (TRAP, res2, base, AOff ofs)); + emit (Plw (TRAP, res1, base, AOff ofs')) + end else begin + emit (Plw (TRAP, res1, base, AOff ofs')); + emit (Plw (TRAP, res2, base, AOff ofs)) + end + | Mfloat32, BR(Asmvliw.IR res) -> + emit (Pfls (TRAP, res, base, AOff ofs)) + | Mfloat64, BR(Asmvliw.IR res) -> + emit (Pfld (TRAP, res, base, AOff ofs)) + | _ -> + assert false + +let expand_builtin_vload chunk args res = + match args with + | [BA(Asmvliw.IR addr)] -> + expand_builtin_vload_common chunk addr _0 res + | [BA_addrstack ofs] -> + expand_builtin_vload_common chunk stack_pointer ofs res + | [BA_addptr(BA(Asmvliw.IR addr), (BA_int ofs | BA_long ofs))] -> + expand_builtin_vload_common chunk addr ofs res + | _ -> + assert false + + +let expand_builtin_vstore_common chunk base ofs src = + match chunk, src with + | (Mint8signed | Mint8unsigned), BA(Asmvliw.IR src) -> + emit (Psb (src, base, AOff ofs)) + | (Mint16signed | Mint16unsigned), BA(Asmvliw.IR src) -> + emit (Psh (src, base, AOff ofs)) + | Mint32, BA(Asmvliw.IR src) -> + emit (Psw (src, base, AOff ofs)) + | Mint64, BA(Asmvliw.IR src) -> + emit (Psd (src, base, AOff ofs)) + | Mint64, BA_splitlong(BA(Asmvliw.IR src1), BA(Asmvliw.IR src2)) -> + let ofs' = Integers.Ptrofs.add ofs _4 in + emit (Psw (src2, base, AOff ofs)); + emit (Psw (src1, base, AOff ofs')) + | Mfloat32, BA(Asmvliw.IR src) -> + emit (Pfss (src, base, AOff ofs)) + | Mfloat64, BA(Asmvliw.IR src) -> + emit (Pfsd (src, base, AOff ofs)) + | _ -> + assert false + +let expand_builtin_vstore chunk args = + match args with + | [BA(Asmvliw.IR addr); src] -> + expand_builtin_vstore_common chunk addr _0 src + | [BA_addrstack ofs; src] -> + expand_builtin_vstore_common chunk stack_pointer ofs src + | [BA_addptr(BA(Asmvliw.IR addr), (BA_int ofs | BA_long ofs)); src] -> + expand_builtin_vstore_common chunk addr ofs src + | _ -> + assert false + +(* Handling of varargs *) + +(* Size in words of the arguments to a function. This includes both + arguments passed in registers and arguments passed on stack. *) + +let rec args_size sz = function + | [] -> sz + | (Tint | Tsingle | Tany32) :: l -> + args_size (sz + 1) l + | (Tlong | Tfloat | Tany64) :: l -> + args_size (if Archi.ptr64 then sz + 1 else align sz 2 + 2) l + +let arguments_size sg = + args_size 0 sg.sig_args + +let _nbregargs_ = 12 +let _alignment_ = 8 + +let save_arguments first_reg base_ofs = let open Asmvliw in + for i = first_reg to (_nbregargs_ - 1) do begin + expand_storeind_ptr + int_param_regs.(i) + GPR12 + (Integers.Ptrofs.repr (Z.add base_ofs (Z.of_uint ((i - first_reg) * wordsize)))); + emit Psemi + end done + +let vararg_start_ofs : Z.t option ref = ref None + +let expand_builtin_va_start r = (* assert false *) +match !vararg_start_ofs with + | None -> + invalid_arg "Fatal error: va_start used in non-vararg function" + | Some ofs -> + expand_addptrofs Asmvliw.GPR32 stack_pointer (Integers.Ptrofs.repr ofs); + emit Psemi; + expand_storeind_ptr Asmvliw.GPR32 r Integers.Ptrofs.zero + +(* Auxiliary for 64-bit integer arithmetic built-ins. They expand to + two instructions, one computing the low 32 bits of the result, + followed by another computing the high 32 bits. In cases where + the first instruction would overwrite arguments to the second + instruction, we must go through X31 to hold the low 32 bits of the result. +*) + +let expand_int64_arith conflict rl fn = assert false +(*if conflict then (fn X31; emit (Pmv(rl, X31))) else fn rl *) + +(* Byte swaps. There are no specific instructions, so we use standard, + not-very-efficient formulas. *) + +let expand_bswap16 d s = let open Asmvliw in + (* d = (s & 0xFF) << 8 | (s >> 8) & 0xFF *) + emit (Pandiw(GPR32, s, coqint_of_camlint 0xFFl)); emit Psemi; + emit (Pslliw(GPR32, GPR32, _8)); emit Psemi; + emit (Psrliw(d, s, _8)); emit Psemi; + emit (Pandiw(d, d, coqint_of_camlint 0xFFl)); + emit (Porw(d, GPR32, d)); emit Psemi + +let expand_bswap32 d s = let open Asmvliw in + (* d = (s << 24) + | (((s >> 8) & 0xFF) << 16) + | (((s >> 16) & 0xFF) << 8) + | (s >> 24) *) + emit (Pslliw(GPR16, s, coqint_of_camlint 24l)); emit Psemi; + emit (Psrliw(GPR32, s, _8)); emit Psemi; + emit (Pandiw(GPR32, GPR32, coqint_of_camlint 0xFFl)); emit Psemi; + emit (Pslliw(GPR32, GPR32, _16)); emit Psemi; + emit (Porw(GPR16, GPR16, GPR32)); emit Psemi; + emit (Psrliw(GPR32, s, _16)); emit Psemi; + emit (Pandiw(GPR32, GPR32, coqint_of_camlint 0xFFl)); emit Psemi; + emit (Pslliw(GPR32, GPR32, _8)); emit Psemi; + emit (Porw(GPR16, GPR16, GPR32)); emit Psemi; + emit (Psrliw(GPR32, s, coqint_of_camlint 24l)); emit Psemi; + emit (Porw(d, GPR16, GPR32)); emit Psemi + +let expand_bswap64 d s = let open Asmvliw in + (* d = s << 56 + | (((s >> 8) & 0xFF) << 48) + | (((s >> 16) & 0xFF) << 40) + | (((s >> 24) & 0xFF) << 32) + | (((s >> 32) & 0xFF) << 24) + | (((s >> 40) & 0xFF) << 16) + | (((s >> 48) & 0xFF) << 8) + | s >> 56 *) + emit (Psllil(GPR16, s, coqint_of_camlint 56l)); emit Psemi; + List.iter + (fun (n1, n2) -> + emit (Psrlil(GPR32, s, coqint_of_camlint n1)); emit Psemi; + emit (Pandil(GPR32, GPR32, coqint_of_camlint 0xFFl)); emit Psemi; + emit (Psllil(GPR32, GPR32, coqint_of_camlint n2)); emit Psemi; + emit (Porl(GPR16, GPR16, GPR32)); emit Psemi;) + [(8l,48l); (16l,40l); (24l,32l); (32l,24l); (40l,16l); (48l,8l)]; + emit (Psrlil(GPR32, s, coqint_of_camlint 56l)); emit Psemi; + emit (Porl(d, GPR16, GPR32)); emit Psemi + +(* Handling of compiler-inlined builtins *) +let last_system_register = 511l +let not_system_register cn =cn<0l || cn>last_system_register + +let expand_builtin_inline name args res = let open Asmvliw in + match name, args, res with + (* Synchronization *) + | "__builtin_membar", [], _ -> + () + (* Vararg stuff *) + | "__builtin_va_start", [BA(IR a)], _ -> + expand_builtin_va_start a + | "__builtin_kvx_clzw", [BA(IR a)], BR(IR res) -> + emit (Pclzw(res, a)) + | "__builtin_clzll", [BA(IR a)], BR(IR res) -> + emit (Pclzll(res, a)) + | "__builtin_kvx_ctzw", [BA(IR a)], BR(IR res) -> + emit (Pctzw(res, a)) + | "__builtin_ctzll", [BA(IR a)], BR(IR res) -> + emit (Pctzll(res, a)) + | "__builtin_kvx_stsud", [BA(IR a1); BA(IR a2)], BR(IR res) -> + emit (Pstsud(res, a1, a2)) + | "__builtin_kvx_get", [BA_int(n)], BR(IR res) -> + let cn = camlint_of_coqint n in + (if not_system_register cn + then failwith (Printf.sprintf "__builtin_kvx_get(n): n must be between 0 and %ld, was %ld" last_system_register cn) + else emit (Pgetn(n, res))) + | "__builtin_kvx_set", [BA_int(n); BA(IR src)], _ -> + let cn = camlint_of_coqint n in + (if not_system_register cn + then failwith (Printf.sprintf "__builtin_kvx_set(n, val): n must be between 0 and %ld, was %ld" last_system_register cn) + else emit (Psetn(n, src))) + | "__builtin_kvx_wfxl", [BA_int(n); BA(IR src)], _ -> + let cn = camlint_of_coqint n in + (if not_system_register cn + then failwith (Printf.sprintf "__builtin_kvx_wfxl(n, val): n must be between 0 and %ld, was %ld" last_system_register cn) + else emit (Pwfxl(n, src))) + | "__builtin_kvx_wfxm", [BA_int(n); BA(IR src)], _ -> + let cn = camlint_of_coqint n in + (if not_system_register cn + then failwith (Printf.sprintf "__builtin_kvx_wfxm(n, val): n must be between 0 and %ld, was %ld" last_system_register cn) + else emit (Pwfxm(n, src))) + | "__builtin_kvx_ldu", [BA(IR addr)], BR(IR res) -> + emit (Pldu(res, addr)) + | "__builtin_kvx_lbzu", [BA(IR addr)], BR(IR res) -> + emit (Plbzu(res, addr)) + | "__builtin_kvx_lhzu", [BA(IR addr)], BR(IR res) -> + emit (Plhzu(res, addr)) + | "__builtin_kvx_lwzu", [BA(IR addr)], BR(IR res) -> + emit (Plwzu(res, addr)) + | "__builtin_kvx_alclrd", [BA(IR addr)], BR(IR res) -> + emit (Palclrd(res, addr)) + | "__builtin_kvx_alclrw", [BA(IR addr)], BR(IR res) -> + emit (Palclrw(res, addr)) + | "__builtin_kvx_await", [], _ -> + emit Pawait + | "__builtin_kvx_sleep", [], _ -> + emit Psleep + | "__builtin_kvx_stop", [], _ -> + emit Pstop + | "__builtin_kvx_barrier", [], _ -> + emit Pbarrier + | "__builtin_kvx_fence", [], _ -> + emit Pfence + | "__builtin_kvx_dinval", [], _ -> + emit Pdinval + | "__builtin_kvx_dinvall", [BA(IR addr)], _ -> + emit (Pdinvall addr) + | "__builtin_kvx_dtouchl", [BA(IR addr)], _ -> + emit (Pdtouchl addr) + | "__builtin_kvx_iinval", [], _ -> + emit Piinval + | "__builtin_kvx_iinvals", [BA(IR addr)], _ -> + emit (Piinvals addr) + | "__builtin_kvx_itouchl", [BA(IR addr)], _ -> + emit (Pitouchl addr) + | "__builtin_kvx_dzerol", [BA(IR addr)], _ -> + emit (Pdzerol addr) +(*| "__builtin_kvx_afaddd", [BA(IR addr); BA (IR incr_res)], BR(IR res) -> + (if res <> incr_res + then (emit (Asm.Pmv(res, incr_res)); emit Psemi)); + emit (Pafaddd(addr, res)) + | "__builtin_kvx_afaddw", [BA(IR addr); BA (IR incr_res)], BR(IR res) -> + (if res <> incr_res + then (emit (Asm.Pmv(res, incr_res)); emit Psemi)); + emit (Pafaddw(addr, res)) *) (* see #157 *) + | "__builtin_alclrd", [BA(IR addr)], BR(IR res) -> + emit (Palclrd(res, addr)) + | "__builtin_alclrw", [BA(IR addr)], BR(IR res) -> + emit (Palclrw(res, addr)) + | "__builtin_bswap16", [BA(IR a1)], BR(IR res) -> + expand_bswap16 res a1 + | ("__builtin_bswap"| "__builtin_bswap32"), [BA(IR a1)], BR(IR res) -> + expand_bswap32 res a1 + | "__builtin_bswap64", [BA(IR src)], BR(IR res) -> + expand_bswap64 res src + + (* Byte swaps *) +(*| "__builtin_bswap16", [BA(IR a1)], BR(IR res) -> + expand_bswap16 res a1 + | "__builtin_fabs", [BA(FR a1)], BR(FR res) -> + emit (Pfabsd(res, a1)) +*) + (* Catch-all *) + | _ -> + raise (Error ("unrecognized builtin " ^ name)) + +(* Expansion of instructions *) + +let expand_instruction instr = + match instr with + | Pallocframe (sz, ofs) -> + let sg = get_current_function_sig() in + emit (Pmv (Asmvliw.GPR17, stack_pointer)); + if sg.sig_cc.cc_vararg <> None then begin + let n = arguments_size sg in + let extra_sz = if n >= _nbregargs_ then 0 else (* align _alignment_ *) ((_nbregargs_ - n) * wordsize) in + let full_sz = Z.add sz (Z.of_uint extra_sz) in + expand_addptrofs stack_pointer stack_pointer (Integers.Ptrofs.repr (Z.neg full_sz)); + emit Psemi; + expand_storeind_ptr Asmvliw.GPR17 stack_pointer ofs; + emit Psemi; + let va_ofs = + let extra_ofs = if n <= _nbregargs_ then 0 else ((n - _nbregargs_) * wordsize) in + Z.add sz (Z.of_sint extra_ofs) in + vararg_start_ofs := Some va_ofs; + save_arguments n va_ofs + end else begin + let below = Integers.Ptrofs.repr (Z.neg sz) in + expand_addptrofs stack_pointer stack_pointer below; + expand_storeind_ptr stack_pointer stack_pointer (Integers.Ptrofs.add ofs below); + emit Psemi; (* Psemi required to fit in resource constraints *) + vararg_start_ofs := None + end + | Pfreeframe (sz, ofs) -> + let sg = get_current_function_sig() in + let extra_sz = + if sg.sig_cc.cc_vararg <> None then begin + let n = arguments_size sg in + if n >= _nbregargs_ then 0 else (* align _alignment_ *) ((_nbregargs_ - n) * wordsize) + end else 0 in + expand_addptrofs stack_pointer stack_pointer (Integers.Ptrofs.repr (Z.add sz (Z.of_uint extra_sz))) + +(*| Pseqw(rd, rs1, rs2) -> + (* emulate based on the fact that x == 0 iff x <u 1 (unsigned cmp) *) + if rs2 = X0 then begin + emit (Psltiuw(rd, rs1, Int.one)) + end else begin + emit (Pxorw(rd, rs1, rs2)); emit (Psltiuw(rd, X rd, Int.one)) + end + | Psnew(rd, rs1, rs2) -> + (* emulate based on the fact that x != 0 iff 0 <u x (unsigned cmp) *) + if rs2 = X0 then begin + emit (Psltuw(rd, X0, rs1)) + end else begin + emit (Pxorw(rd, rs1, rs2)); emit (Psltuw(rd, X0, X rd)) + end + | Pseql(rd, rs1, rs2) -> + (* emulate based on the fact that x == 0 iff x <u 1 (unsigned cmp) *) + if rs2 = X0 then begin + emit (Psltiul(rd, rs1, Int64.one)) + end else begin + emit (Pxorl(rd, rs1, rs2)); emit (Psltiul(rd, X rd, Int64.one)) + end + | Psnel(rd, rs1, rs2) -> + (* emulate based on the fact that x != 0 iff 0 <u x (unsigned cmp) *) + if rs2 = X0 then begin + emit (Psltul(rd, X0, rs1)) + end else begin + emit (Pxorl(rd, rs1, rs2)); emit (Psltul(rd, X0, X rd)) + end +*)| Pcvtl2w (rd, rs) -> + assert Archi.ptr64; + emit (Paddiw (rd, rs, Integers.Int.zero)) (* 32-bit sign extension *) + +(*| Pjal_r(r, sg) -> + fixup_call sg; emit instr + | Pjal_s(symb, sg) -> + fixup_call sg; emit instr + | Pj_r(r, sg) when r <> X1 -> + fixup_call sg; emit instr + | Pj_s(symb, sg) -> + fixup_call sg; emit instr + +*)| Pbuiltin (ef,args,res) -> + begin match ef with + | EF_builtin (name,sg) -> + expand_builtin_inline (camlstring_of_coqstring name) args res + | EF_vload chunk -> + expand_builtin_vload chunk args res + | EF_vstore chunk -> + expand_builtin_vstore chunk args +(* | EF_annot_val (kind,txt,targ) -> + expand_annot_val kind txt targ args res *) + | EF_memcpy(sz, al) -> + expand_builtin_memcpy sz al args + (* | EF_annot _ | EF_debug _ | EF_inline_asm _ -> + emit instr + *) + | EF_malloc -> failwith "asmexpand: malloc" + | EF_free -> failwith "asmexpand: free" + | EF_debug _ -> failwith "asmexpand: debug" + | EF_annot _ -> emit instr + | EF_annot_val (kind, txt, targ) -> expand_annot_val kind txt targ args res + | EF_external _ -> failwith "asmexpand: external" + | EF_inline_asm _ -> emit instr + | EF_runtime _ -> failwith "asmexpand: runtime" + | EF_profiling _ -> emit instr + end + | _ -> + emit instr + +(* NOTE: Dwarf register maps for RV32G are not yet specified + officially. This is just a placeholder. *) +let int_reg_to_dwarf = let open Asmvliw in function + | GPR0 -> 1 | GPR1 -> 2 | GPR2 -> 3 | GPR3 -> 4 | GPR4 -> 5 + | GPR5 -> 6 | GPR6 -> 7 | GPR7 -> 8 | GPR8 -> 9 | GPR9 -> 10 + | GPR10 -> 11 | GPR11 -> 12 | GPR12 -> 13 | GPR13 -> 14 | GPR14 -> 15 + | GPR15 -> 16 | GPR16 -> 17 | GPR17 -> 18 | GPR18 -> 19 | GPR19 -> 20 + | GPR20 -> 21 | GPR21 -> 22 | GPR22 -> 23 | GPR23 -> 24 | GPR24 -> 25 + | GPR25 -> 26 | GPR26 -> 27 | GPR27 -> 28 | GPR28 -> 29 | GPR29 -> 30 + | GPR30 -> 31 | GPR31 -> 32 | GPR32 -> 33 | GPR33 -> 34 | GPR34 -> 35 + | GPR35 -> 36 | GPR36 -> 37 | GPR37 -> 38 | GPR38 -> 39 | GPR39 -> 40 + | GPR40 -> 41 | GPR41 -> 42 | GPR42 -> 43 | GPR43 -> 44 | GPR44 -> 45 + | GPR45 -> 46 | GPR46 -> 47 | GPR47 -> 48 | GPR48 -> 49 | GPR49 -> 50 + | GPR50 -> 51 | GPR51 -> 52 | GPR52 -> 53 | GPR53 -> 54 | GPR54 -> 55 + | GPR55 -> 56 | GPR56 -> 57 | GPR57 -> 58 | GPR58 -> 59 | GPR59 -> 60 + | GPR60 -> 61 | GPR61 -> 62 | GPR62 -> 63 | GPR63 -> 64 + +let preg_to_dwarf = let open Asmvliw in function + | IR r -> int_reg_to_dwarf r + | RA -> 65 (* FIXME - No idea what is $ra DWARF number in k1-gdb *) + | _ -> assert false + +let expand_function id fn = + try + set_current_function fn; + expand id (* sp= *) 2 preg_to_dwarf expand_instruction fn.fn_code; + Errors.OK (get_current_function ()) + with Error s -> + Errors.Error (Errors.msg (coqstring_of_camlstring s)) + +let expand_fundef id = function + | Internal f -> + begin match expand_function id f with + | Errors.OK tf -> Errors.OK (Internal tf) + | Errors.Error msg -> Errors.Error msg + end + | External ef -> + Errors.OK (External ef) + +let expand_program (p: Asm.program) : Asm.program Errors.res = + AST.transform_partial_program2 expand_fundef (fun id v -> Errors.OK v) p diff --git a/kvx/Asmgen.v b/kvx/Asmgen.v new file mode 100644 index 00000000..61856acf --- /dev/null +++ b/kvx/Asmgen.v @@ -0,0 +1,41 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Integers. +Require Import Mach Asm Asmblock Asmblockgen Machblockgen. +Require Import PostpassScheduling. +Require Import Errors String. +Require Compopts. + +Local Open Scope error_monad_scope. + +Definition time {A B: Type} (name: string) (f: A -> B) : A -> B := Compopts.time name f. + +Definition transf_program (p: Mach.program) : res Asm.program := + let mbp := (time "Machblock generation" Machblockgen.transf_program) p in + do abp <- (time "Asmblock generation" Asmblockgen.transf_program) mbp; + do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp; + OK ((time "Asm generation" Asm.transf_program) abp'). + +Definition transf_function (f: Mach.function) : res Asm.function := + let mbf := Machblockgen.transf_function f in + do abf <- Asmblockgen.transf_function mbf; + OK (Asm.transf_function abf). + +Definition transl_code (f: Mach.function) (l: Mach.code) : res (list Asm.instruction) := + let mbf := Machblockgen.transf_function f in + let mbc := Machblockgen.trans_code l in + do abc <- transl_blocks mbf mbc true; + OK (unfold abc). diff --git a/kvx/Asmgenproof.v b/kvx/Asmgenproof.v new file mode 100644 index 00000000..636c105f --- /dev/null +++ b/kvx/Asmgenproof.v @@ -0,0 +1,96 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Composing all passes from Mach to KVX Asm *) + +Require Import Coqlib Errors. +Require Import Integers Floats AST Linking. +Require Import Values Memory Events Globalenvs Smallstep. +Require Import Op Locations Mach Conventions Asm Asmgen Machblockgen Asmblockgen. +Require Import Machblockgenproof Asmblockgenproof PostpassSchedulingproof. + +Local Open Scope linking_scope. + +Definition block_passes := + mkpass Machblockgenproof.match_prog + ::: mkpass Asmblockgenproof.match_prog + ::: mkpass PostpassSchedulingproof.match_prog + ::: mkpass Asm.match_prog + ::: pass_nil _. + +Definition match_prog := pass_match (compose_passes block_passes). + +Lemma transf_program_match: + forall p tp, Asmgen.transf_program p = OK tp -> match_prog p tp. +Proof. + intros p tp H. + unfold Asmgen.transf_program in H. apply bind_inversion in H. destruct H. + inversion_clear H. apply bind_inversion in H1. destruct H1. + inversion_clear H. inversion H2. unfold time, Compopts.time in *. remember (Machblockgen.transf_program p) as mbp. + unfold match_prog; cbn. + exists mbp; split. apply Machblockgenproof.transf_program_match; auto. + exists x; split. apply Asmblockgenproof.transf_program_match; auto. + exists x0; split. apply PostpassSchedulingproof.transf_program_match; auto. + exists tp; split. apply Asm.transf_program_match; auto. auto. +Qed. + +(** Return Address Offset for Mach *) + +Definition return_address_offset: Mach.function -> Mach.code -> ptrofs -> Prop := + Mach_return_address_offset Asmblockgenproof.return_address_offset. + +Lemma return_address_exists: + forall f sg ros c, is_tail (Mcall sg ros :: c) f.(Mach.fn_code) -> + exists ra, return_address_offset f c ra. +Proof. + intros; unfold return_address_offset; eapply Mach_return_address_exists; eauto. + intros; eapply Asmblockgenproof.return_address_exists; eauto. +Qed. + +(** Main preservation theorem: from Mach to KVX Asm *) + +Section PRESERVATION. + +Variable prog: Mach.program. +Variable tprog: program. +Hypothesis TRANSF: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Theorem transf_program_correct: + forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog). +Proof. + unfold match_prog in TRANSF. cbn in TRANSF. + inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H. + eapply compose_forward_simulations. + exploit Machblockgenproof.transf_program_correct; eauto. + unfold Machblockgenproof.inv_trans_rao. + eapply compose_forward_simulations. apply Asmblockgenproof.transf_program_correct; eauto. + eapply compose_forward_simulations. apply PostpassSchedulingproof.transf_program_correct; eauto. + apply Asm.transf_program_correct. eauto. +Qed. + +End PRESERVATION. + +Instance TransfAsm: TransfLink match_prog := pass_match_link (compose_passes block_passes). + +(*******************************************) +(** Stub actually needed by driver/Compiler *) + +Module Asmgenproof0. + +Definition return_address_offset := return_address_offset. + +End Asmgenproof0. diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v new file mode 100644 index 00000000..45b230e6 --- /dev/null +++ b/kvx/Asmvliw.v @@ -0,0 +1,1729 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Abstract syntax and semantics for VLIW semantics of KVX assembly language. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import ExtValues. +Require Import Memory. +Require Import Events. +Require Import Globalenvs. +Require Import Smallstep. +Require Import Locations. +Require Stacklayout. +Require Import Conventions. +Require Import Errors. +Require Import Sorting.Permutation. +Require Import Chunks. +Require Import Lia. + +(** * Abstract syntax *) + +(** A KVX program is syntactically given as a list of functions. + Each function is associated to a list of bundles of type [bblock] below. + Hence, syntactically, we view each bundle as a basic block: + this view induces our sequential semantics of bundles defined in [Asmblock]. +*) + +(** ** General Purpose registers. *) + +Inductive gpreg: Type := + | GPR0: gpreg | GPR1: gpreg | GPR2: gpreg | GPR3: gpreg | GPR4: gpreg + | GPR5: gpreg | GPR6: gpreg | GPR7: gpreg | GPR8: gpreg | GPR9: gpreg + | GPR10: gpreg | GPR11: gpreg | GPR12: gpreg | GPR13: gpreg | GPR14: gpreg + | GPR15: gpreg | GPR16: gpreg | GPR17: gpreg | GPR18: gpreg | GPR19: gpreg + | GPR20: gpreg | GPR21: gpreg | GPR22: gpreg | GPR23: gpreg | GPR24: gpreg + | GPR25: gpreg | GPR26: gpreg | GPR27: gpreg | GPR28: gpreg | GPR29: gpreg + | GPR30: gpreg | GPR31: gpreg | GPR32: gpreg | GPR33: gpreg | GPR34: gpreg + | GPR35: gpreg | GPR36: gpreg | GPR37: gpreg | GPR38: gpreg | GPR39: gpreg + | GPR40: gpreg | GPR41: gpreg | GPR42: gpreg | GPR43: gpreg | GPR44: gpreg + | GPR45: gpreg | GPR46: gpreg | GPR47: gpreg | GPR48: gpreg | GPR49: gpreg + | GPR50: gpreg | GPR51: gpreg | GPR52: gpreg | GPR53: gpreg | GPR54: gpreg + | GPR55: gpreg | GPR56: gpreg | GPR57: gpreg | GPR58: gpreg | GPR59: gpreg + | GPR60: gpreg | GPR61: gpreg | GPR62: gpreg | GPR63: gpreg. + +Definition ireg := gpreg. +Definition freg := gpreg. + +Lemma gpreg_eq: forall (x y: gpreg), {x=y} + {x<>y}. +Proof. decide equality. Defined. + +Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}. +Proof. decide equality. Defined. + +Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}. +Proof. decide equality. Defined. + +Inductive gpreg_q : Type := +| R0R1 | R2R3 | R4R5 | R6R7 | R8R9 +| R10R11 | R12R13 | R14R15 | R16R17 | R18R19 +| R20R21 | R22R23 | R24R25 | R26R27 | R28R29 +| R30R31 | R32R33 | R34R35 | R36R37 | R38R39 +| R40R41 | R42R43 | R44R45 | R46R47 | R48R49 +| R50R51 | R52R53 | R54R55 | R56R57 | R58R59 +| R60R61 | R62R63. + +Lemma gpreg_q_eq : forall (x y : gpreg_q), {x=y} + {x<>y}. +Proof. decide equality. Defined. + +Definition gpreg_q_expand (x : gpreg_q) : gpreg * gpreg := + match x with + | R0R1 => (GPR0, GPR1) + | R2R3 => (GPR2, GPR3) + | R4R5 => (GPR4, GPR5) + | R6R7 => (GPR6, GPR7) + | R8R9 => (GPR8, GPR9) + | R10R11 => (GPR10, GPR11) + | R12R13 => (GPR12, GPR13) + | R14R15 => (GPR14, GPR15) + | R16R17 => (GPR16, GPR17) + | R18R19 => (GPR18, GPR19) + | R20R21 => (GPR20, GPR21) + | R22R23 => (GPR22, GPR23) + | R24R25 => (GPR24, GPR25) + | R26R27 => (GPR26, GPR27) + | R28R29 => (GPR28, GPR29) + | R30R31 => (GPR30, GPR31) + | R32R33 => (GPR32, GPR33) + | R34R35 => (GPR34, GPR35) + | R36R37 => (GPR36, GPR37) + | R38R39 => (GPR38, GPR39) + | R40R41 => (GPR40, GPR41) + | R42R43 => (GPR42, GPR43) + | R44R45 => (GPR44, GPR45) + | R46R47 => (GPR46, GPR47) + | R48R49 => (GPR48, GPR49) + | R50R51 => (GPR50, GPR51) + | R52R53 => (GPR52, GPR53) + | R54R55 => (GPR54, GPR55) + | R56R57 => (GPR56, GPR57) + | R58R59 => (GPR58, GPR59) + | R60R61 => (GPR60, GPR61) + | R62R63 => (GPR62, GPR63) + end. + +Inductive gpreg_o : Type := +| R0R1R2R3 | R4R5R6R7 | R8R9R10R11 | R12R13R14R15 +| R16R17R18R19 | R20R21R22R23 | R24R25R26R27 | R28R29R30R31 +| R32R33R34R35 | R36R37R38R39 | R40R41R42R43 | R44R45R46R47 +| R48R49R50R51 | R52R53R54R55 | R56R57R58R59 | R60R61R62R63. + +Definition gpreg_o_expand (x : gpreg_o) : gpreg * gpreg * gpreg * gpreg := + match x with + | R0R1R2R3 => (GPR0, GPR1, GPR2, GPR3) + | R4R5R6R7 => (GPR4, GPR5, GPR6, GPR7) + | R8R9R10R11 => (GPR8, GPR9, GPR10, GPR11) + | R12R13R14R15 => (GPR12, GPR13, GPR14, GPR15) + | R16R17R18R19 => (GPR16, GPR17, GPR18, GPR19) + | R20R21R22R23 => (GPR20, GPR21, GPR22, GPR23) + | R24R25R26R27 => (GPR24, GPR25, GPR26, GPR27) + | R28R29R30R31 => (GPR28, GPR29, GPR30, GPR31) + | R32R33R34R35 => (GPR32, GPR33, GPR34, GPR35) + | R36R37R38R39 => (GPR36, GPR37, GPR38, GPR39) + | R40R41R42R43 => (GPR40, GPR41, GPR42, GPR43) + | R44R45R46R47 => (GPR44, GPR45, GPR46, GPR47) + | R48R49R50R51 => (GPR48, GPR49, GPR50, GPR51) + | R52R53R54R55 => (GPR52, GPR53, GPR54, GPR55) + | R56R57R58R59 => (GPR56, GPR57, GPR58, GPR59) + | R60R61R62R63 => (GPR60, GPR61, GPR62, GPR63) + end. + +Lemma gpreg_o_eq : forall (x y : gpreg_o), {x=y} + {x<>y}. +Proof. decide equality. Defined. + +Inductive preg: Type := + | IR: gpreg -> preg (**r integer general purpose registers *) + | RA: preg + | PC: preg + . + +Coercion IR: gpreg >-> preg. + +Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}. +Proof. decide equality. apply ireg_eq. Defined. + +Module PregEq. + Definition t := preg. + Definition eq := preg_eq. +End PregEq. + +(* FIXME - R16 and R32 are excluded *) +Definition preg_of (r: mreg) : preg := + match r with + | R0 => GPR0 | R1 => GPR1 | R2 => GPR2 | R3 => GPR3 | R4 => GPR4 + | R5 => GPR5 | R6 => GPR6 | R7 => GPR7 | R8 => GPR8 | R9 => GPR9 + | R10 => GPR10 | R11 => GPR11 (* | R12 => GPR12 | R13 => GPR13 | R14 => GPR14 *) + | R15 => GPR15 (* | R16 => GPR16 *) | R17 => GPR17 | R18 => GPR18 | R19 => GPR19 + | R20 => GPR20 | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24 + | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 | R29 => GPR29 + | R30 => GPR30 | R31 => GPR31 (* | R32 => GPR32 *) | R33 => GPR33 | R34 => GPR34 + | R35 => GPR35 | R36 => GPR36 | R37 => GPR37 | R38 => GPR38 | R39 => GPR39 + | R40 => GPR40 | R41 => GPR41 | R42 => GPR42 | R43 => GPR43 | R44 => GPR44 + | R45 => GPR45 | R46 => GPR46 | R47 => GPR47 | R48 => GPR48 | R49 => GPR49 + | R50 => GPR50 | R51 => GPR51 | R52 => GPR52 | R53 => GPR53 | R54 => GPR54 + | R55 => GPR55 | R56 => GPR56 | R57 => GPR57 | R58 => GPR58 | R59 => GPR59 + | R60 => GPR60 | R61 => GPR61 | R62 => GPR62 | R63 => GPR63 + end. + +Definition ireg_of (r: mreg) : res ireg := + match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.ireg_of") end. + +Definition freg_of (r: mreg) : res freg := + match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end. + +Module Pregmap := EMap(PregEq). + +(** ** Conventional names for stack pointer ([SP]), return address ([RA]), frame pointer ([FP]) and other temporaries used *) + +Notation "'SP'" := GPR12 (only parsing) : asm. +Notation "'FP'" := GPR17 (only parsing) : asm. +Notation "'MFP'" := R17 (only parsing) : asm. +Notation "'GPRA'" := GPR16 (only parsing) : asm. +Notation "'RTMP'" := GPR32 (only parsing) : asm. + +(** ** Names of tests in comparisons *) + +Inductive btest: Type := + | BTdnez (**r Double Not Equal to Zero *) + | BTdeqz (**r Double Equal to Zero *) + | BTdltz (**r Double Less Than Zero *) + | BTdgez (**r Double Greater Than or Equal to Zero *) + | BTdlez (**r Double Less Than or Equal to Zero *) + | BTdgtz (**r Double Greater Than Zero *) + | BTwnez (**r Word Not Equal to Zero *) + | BTweqz (**r Word Equal to Zero *) + | BTwltz (**r Word Less Than Zero *) + | BTwgez (**r Word Greater Than or Equal to Zero *) + | BTwlez (**r Word Less Than or Equal to Zero *) + | BTwgtz (**r Word Greater Than Zero *) + . + +Inductive itest: Type := + | ITne (**r Not Equal *) + | ITeq (**r Equal *) + | ITlt (**r Less Than *) + | ITge (**r Greater Than or Equal *) + | ITle (**r Less Than or Equal *) + | ITgt (**r Greater Than *) + | ITneu (**r Unsigned Not Equal *) + | ITequ (**r Unsigned Equal *) + | ITltu (**r Less Than Unsigned *) + | ITgeu (**r Greater Than or Equal Unsigned *) + | ITleu (**r Less Than or Equal Unsigned *) + | ITgtu (**r Greater Than Unsigned *) + . + +Inductive ftest: Type := + | FTone (**r Ordered and Not Equal *) + | FTueq (**r Unordered or Equal *) + | FToeq (**r Ordered and Equal *) + | FTune (**r Unordered or Not Equal *) + | FTolt (**r Ordered and Less Than *) + | FTuge (**r Unordered or Greater Than or Equal *) + | FToge (**r Ordered and Greater Than or Equal *) + | FTult (**r Unordered or Less Than *) + . + +(** *** Offsets for load and store instructions. *) + +Definition offset : Type := ptrofs. + +(** *** Labels for goto (in the current function) *) + +Definition label := positive. + +(** ** Instructions *) + +(** We model a subset of the KVX instruction set. + +- Although it is possible to use the 32-bits mode, for now we don't support it. When mapping to actual instructions, the OCaml code in TargetPrinter.ml + throws an error if we are not in 64-bits mode. + +- We follow a design close to the one used for the Risc-V port: one set of + pseudo-instructions for 32-bit integer arithmetic, with suffix W, another + set for 64-bit integer arithmetic, with suffix L. + +- With respect to other CompCert assemblies, we define a type hierarchy of instructions (instead of a flat type). + This helps us to factorize similar cases for the scheduling verifier. + +*) + +(** *** Instructions to be expanded in control-flow *) +Inductive ex_instruction : Type := + (* Pseudo-instructions *) + | Pbuiltin: external_function -> list (builtin_arg preg) + -> builtin_res preg -> ex_instruction (**r built-in function (pseudo) *) +. + +(** Similarly to other CompCert assembly languages, the pseudo-instructions are the following: + +- [Ploadsymbol]: load the address of a symbol in an integer register. + +- [Pallocframe sz pos]: in the formal semantics, this + pseudo-instruction allocates a memory block with bounds [0] and + [sz], stores the value of the stack pointer at offset [pos] in this + block, and sets the stack pointer to the address of the bottom of + this block. + + This cannot be expressed in our memory model, which does not reflect + the fact that stack frames are adjacent and allocated/freed + following a stack discipline. + +- [Pfreeframe sz pos]: in the formal semantics, this pseudo-instruction + reads the word at [pos] of the block pointed by the stack pointer, + frees this block, and sets the stack pointer to the value read. + Again, our memory model cannot comprehend that this operation + frees (logically) the current stack frame. + +- [Pbtbl reg table]: this is a N-way branch, implemented via a jump table +*) + +(** *** Control Flow instructions *) +Inductive cf_instruction : Type := + | Pret (**r return *) + | Pcall (l: label) (**r function call *) + | Picall (r: ireg) (**r function call on register value *) + | Pjumptable (r: ireg) (labels: list label) (**r N-way branch through a jump table (pseudo) *) + + (* Pgoto is for tailcalls, Pj_l is for jumping to a particular label *) + | Pgoto (l: label) (**r goto *) + | Pigoto (r: ireg) (**r goto from register *) + | Pj_l (l: label) (**r jump to label *) + + (* Conditional branches *) + | Pcb (bt: btest) (r: ireg) (l: label) (**r branch based on btest *) + | Pcbu (bt: btest) (r: ireg) (l: label) (**r branch based on btest with unsigned semantics *) +. + +(** *** Loads *) + +(* What follows was the original spec, but is subtly incorrect. + Our definition of the assembly-level memory model is already an abstraction of the real world. + In particular, we consider that a load is incorrect when it points outside of CompCert's visible memory, whereas this memory could be correct at the assembly level. + This means that CompCert would believe an incorrect load would yield 0 whereas it would yield another value. + match chunk with + | Mint8signed | Mint8unsigned | Mint16signed | Mint16unsigned + | Mint32 => Vint Int.zero + | Mint64 => Vlong Int64.zero + | Many32 | Many64 => Vundef + | Mfloat32 => Vsingle Float32.zero + | Mfloat64 => Vfloat Float.zero + end. *) + +Inductive load_name : Type := + | Plb (**r load byte *) + | Plbu (**r load byte unsigned *) + | Plh (**r load half word *) + | Plhu (**r load half word unsigned *) + | Plw (**r load int32 *) + | Plw_a (**r load any32 *) + | Pld (**r load int64 *) + | Pld_a (**r load any64 *) + | Pfls (**r load float *) + | Pfld (**r load 64-bit float *) +. + +Inductive ld_instruction : Type := + | PLoadRRO (trap: trapping_mode) (i: load_name) (rd: ireg) (ra: ireg) (ofs: offset) + | PLoadRRR (trap: trapping_mode) (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg) + | PLoadRRRXS (trap: trapping_mode) (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg) + | PLoadQRRO (rd: gpreg_q) (ra: ireg) (ofs: offset) + | PLoadORRO (rd: gpreg_o) (ra: ireg) (ofs: offset) +. + +(** *** Stores *) +Inductive store_name : Type := + | Psb (**r store byte *) + | Psh (**r store half byte *) + | Psw (**r store int32 *) + | Psw_a (**r store any32 *) + | Psd (**r store int64 *) + | Psd_a (**r store any64 *) + | Pfss (**r store float *) + | Pfsd (**r store 64-bit float *) +. + +Inductive st_instruction : Type := + | PStoreRRO (i: store_name) (rs: ireg) (ra: ireg) (ofs: offset) + | PStoreRRR (i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg) + | PStoreRRRXS(i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg) + | PStoreQRRO (rs: gpreg_q) (ra: ireg) (ofs: offset) + | PStoreORRO (rs: gpreg_o) (ra: ireg) (ofs: offset) +. + +(** *** Arithmetic instructions *) +Inductive arith_name_r : Type := + | Ploadsymbol (id: ident) (ofs: ptrofs) (**r load the address of a symbol *) +. + +Inductive arith_name_rr : Type := + | Pmv (**r register move *) + | Pnegw (**r negate word *) + | Pnegl (**r negate long *) + | Pcvtl2w (**r Convert Long to Word *) + | Psxwd (**r Sign Extend Word to Double Word *) + | Pzxwd (**r Zero Extend Word to Double Word *) + | Pextfz (stop : Z) (start : Z) (**r extract bit field, unsigned *) + | Pextfs (stop : Z) (start : Z) (**r extract bit field, signed *) + | Pextfzl (stop : Z) (start : Z) (**r extract bit field, unsigned *) + | Pextfsl (stop : Z) (start : Z) (**r extract bit field, signed *) + + | Pfabsd (**r float absolute double *) + | Pfabsw (**r float absolute word *) + | Pfnegd (**r float negate double *) + | Pfnegw (**r float negate word *) + | Pfinvw (**r float invert word *) + | Pfnarrowdw (**r float narrow 64 -> 32 bits *) + | Pfwidenlwd (**r Floating Point widen from 32 bits to 64 bits *) + | Pfloatwrnsz (**r Floating Point conversion from integer (int -> SINGLE) *) + | Pfloatuwrnsz (**r Floating Point conversion from integer (unsigned int -> SINGLE) *) + | Pfloatudrnsz (**r Floating Point Conversion from integer (unsigned long -> float) *) + | Pfloatdrnsz (**r Floating Point Conversion from integer (long -> float) *) + | Pfixedwrzz (**r Integer conversion from floating point (single -> int) *) + | Pfixeduwrzz (**r Integer conversion from floating point (single -> unsigned int) *) + | Pfixeddrzz (**r Integer conversion from floating point (float -> long) *) + | Pfixedudrzz (**r Integer conversion from floating point (float -> unsigned long) *) + | Pfixeddrzz_i32 (**r Integer conversion from floating point (float -> int) *) + | Pfixedudrzz_i32 (**r Integer conversion from floating point (float -> unsigned int) *) +. + +Inductive arith_name_ri32 : Type := + | Pmake (**r load immediate *) +. + +Inductive arith_name_ri64 : Type := + | Pmakel (**r load immediate long *) +. + +Inductive arith_name_rf32 : Type := + | Pmakefs (**r load immediate single *) +. + +Inductive arith_name_rf64 : Type := + | Pmakef (**r load immediate float *) +. + +Inductive arith_name_rrr : Type := + | Pcompw (it: itest) (**r comparison word *) + | Pcompl (it: itest) (**r comparison long *) + | Pfcompw (ft: ftest) (**r comparison float32 *) + | Pfcompl (ft: ftest) (**r comparison float64 *) + + | Paddw (**r add word *) + | Paddxw (shift : shift1_4) (**r add shift *) + | Psubw (**r sub word word *) + | Prevsubxw (shift : shift1_4) (**r sub shift word *) + | Pmulw (**r mul word *) + | Pandw (**r and word *) + | Pnandw (**r nand word *) + | Porw (**r or word *) + | Pnorw (**r nor word *) + | Pxorw (**r xor word *) + | Pnxorw (**r nxor word *) + | Pandnw (**r andn word *) + | Pornw (**r orn word *) + | Psraw (**r shift right arithmetic word *) + | Psrxw (**r shift right arithmetic word round to 0*) + | Psrlw (**r shift right logical word *) + | Psllw (**r shift left logical word *) + + | Paddl (**r add long *) + | Paddxl (shift : shift1_4) (**r add shift long *) + | Psubl (**r sub long *) + | Prevsubxl (shift : shift1_4) (**r sub shift long *) + | Pandl (**r and long *) + | Pnandl (**r nand long *) + | Porl (**r or long *) + | Pnorl (**r nor long *) + | Pxorl (**r xor long *) + | Pnxorl (**r nxor long *) + | Pandnl (**r andn long *) + | Pornl (**r orn long *) + | Pmull (**r mul long (low part) *) + | Pslll (**r shift left logical long *) + | Psrll (**r shift right logical long *) + | Psrxl (**r shift right logical long round to 0*) + | Psral (**r shift right arithmetic long *) + + | Pfaddd (**r float add double *) + | Pfaddw (**r float add word *) + | Pfsbfd (**r float sub double *) + | Pfsbfw (**r float sub word *) + | Pfmuld (**r float multiply double *) + | Pfmulw (**r float multiply word *) + | Pfmind (**r float min double *) + | Pfminw (**r float min word *) + | Pfmaxd (**r float max double *) + | Pfmaxw (**r float max word *) +. + +Inductive arith_name_rri32 : Type := + | Pcompiw (it: itest) (**r comparison imm word *) + + | Paddiw (**r add imm word *) + | Paddxiw (shift : shift1_4) + | Prevsubiw (**r add imm word *) + | Prevsubxiw (shift : shift1_4) + | Pmuliw (**r add imm word *) + | Pandiw (**r and imm word *) + | Pnandiw (**r nand imm word *) + | Poriw (**r or imm word *) + | Pnoriw (**r nor imm word *) + | Pxoriw (**r xor imm word *) + | Pnxoriw (**r nxor imm word *) + | Pandniw (**r andn word *) + | Porniw (**r orn word *) + | Psraiw (**r shift right arithmetic imm word *) + | Psrxiw (**r shift right arithmetic imm word round to 0*) + | Psrliw (**r shift right logical imm word *) + | Pslliw (**r shift left logical imm word *) + | Proriw (**r rotate right imm word *) + | Psllil (**r shift left logical immediate long *) + | Psrlil (**r shift right logical immediate long *) + | Psrail (**r shift right arithmetic immediate long *) + | Psrxil (**r shift right arithmetic immediate long round to 0*) +. + +Inductive arith_name_rri64 : Type := + | Pcompil (it: itest) (**r comparison imm long *) + | Paddil (**r add immediate long *) + | Paddxil (shift : shift1_4) + | Prevsubil + | Prevsubxil (shift : shift1_4) + | Pmulil (**r mul immediate long *) + | Pandil (**r and immediate long *) + | Pnandil (**r nand immediate long *) + | Poril (**r or immediate long *) + | Pnoril (**r nor immediate long *) + | Pxoril (**r xor immediate long *) + | Pnxoril (**r nxor immediate long *) + | Pandnil (**r andn immediate long *) + | Pornil (**r orn immediate long *) +. + +Inductive arith_name_arrr : Type := + | Pmaddw (**r multiply add word *) + | Pmaddl (**r multiply add long *) + | Pmsubw (**r multiply subtract word *) + | Pmsubl (**r multiply subtract long *) + | Pcmove (bt: btest) (**r conditional move *) + | Pcmoveu (bt: btest) (**r conditional move, test on unsigned semantics *) + | Pfmaddfw (**r float fused multiply add word *) + | Pfmaddfl (**r float fused multiply add long *) + | Pfmsubfw (**r float fused multiply subtract word *) + | Pfmsubfl (**r float fused multiply subtract long *) +. + +Inductive arith_name_arri32 : Type := + | Pmaddiw (**r multiply add word *) + | Pcmoveiw (bt: btest) + | Pcmoveuiw (bt: btest) +. + +Inductive arith_name_arri64 : Type := + | Pmaddil (**r multiply add long *) + | Pcmoveil (bt: btest) + | Pcmoveuil (bt: btest) +. + +Inductive arith_name_arr : Type := + | Pinsf (stop : Z) (start : Z) (**r insert bit field *) + | Pinsfl (stop : Z) (start : Z) (**r insert bit field *) +. + +Inductive ar_instruction : Type := + | PArithR (i: arith_name_r) (rd: ireg) + | PArithRR (i: arith_name_rr) (rd rs: ireg) + | PArithRI32 (i: arith_name_ri32) (rd: ireg) (imm: int) + | PArithRI64 (i: arith_name_ri64) (rd: ireg) (imm: int64) + | PArithRF32 (i: arith_name_rf32) (rd: ireg) (imm: float32) + | PArithRF64 (i: arith_name_rf64) (rd: ireg) (imm: float) + | PArithRRR (i: arith_name_rrr) (rd rs1 rs2: ireg) + | PArithRRI32 (i: arith_name_rri32) (rd rs: ireg) (imm: int) + | PArithRRI64 (i: arith_name_rri64) (rd rs: ireg) (imm: int64) + | PArithARRR (i: arith_name_arrr) (rd rs1 rs2: ireg) + | PArithARR (i: arith_name_arr) (rd rs: ireg) + | PArithARRI32 (i: arith_name_arri32) (rd rs: ireg) (imm: int) + | PArithARRI64 (i: arith_name_arri64) (rd rs: ireg) (imm: int64) +. + +Module PArithCoercions. + +Coercion PArithR: arith_name_r >-> Funclass. +Coercion PArithRR: arith_name_rr >-> Funclass. +Coercion PArithRI32: arith_name_ri32 >-> Funclass. +Coercion PArithRI64: arith_name_ri64 >-> Funclass. +Coercion PArithRF32: arith_name_rf32 >-> Funclass. +Coercion PArithRF64: arith_name_rf64 >-> Funclass. +Coercion PArithRRR: arith_name_rrr >-> Funclass. +Coercion PArithRRI32: arith_name_rri32 >-> Funclass. +Coercion PArithRRI64: arith_name_rri64 >-> Funclass. +Coercion PArithARRR: arith_name_arrr >-> Funclass. +Coercion PArithARR: arith_name_arr >-> Funclass. +Coercion PArithARRI32: arith_name_arri32 >-> Funclass. +Coercion PArithARRI64: arith_name_arri64 >-> Funclass. + +End PArithCoercions. + +(** ** Basic instructions *) + +Inductive basic : Type := + | PArith (i: ar_instruction) + | PLoad (i: ld_instruction) + | PStore (i: st_instruction) + | Pallocframe (sz: Z) (pos: ptrofs) (**r allocate new stack frame *) + | Pfreeframe (sz: Z) (pos: ptrofs) (**r deallocate stack frame and restore previous frame *) + | Pget (rd: ireg) (rs: preg) (**r get system register *) + | Pset (rd: preg) (rs: ireg) (**r set system register *) + | Pnop (**r virtual instruction that does nothing *) +. + +Coercion PLoad: ld_instruction >-> basic. +Coercion PStore: st_instruction >-> basic. +Coercion PArith: ar_instruction >-> basic. + +(** ** Control-flow instructions *) + +Inductive control : Type := + | PExpand (i: ex_instruction) + | PCtlFlow (i: cf_instruction) +. + +Coercion PExpand: ex_instruction >-> control. +Coercion PCtlFlow: cf_instruction >-> control. + + +(** * Definition of a bblock (ie a bundle) *) + +(** A bundle/bblock must contain at least one instruction. + +This choice simplifies the definition of [find_bblock] below: +indeed, each address of a code block identifies at most one bundle +(which depends on the number of instructions in the bundles of lower addresses). + +*) + +Definition non_empty_body (body: list basic): bool := + match body with + | nil => false + | _ => true + end. + +Definition non_empty_exit (exit: option control): bool := + match exit with + | None => false + | _ => true + end. + +Definition non_empty_bblockb (body: list basic) (exit: option control): bool := non_empty_body body || non_empty_exit exit. + + +(** For now, we consider a builtin is alone in a bundle (and a basic block). + Is there a way to avoid that ? (TODO) + *) +Definition builtin_aloneb (body: list basic) (exit: option control) := + match exit with + | Some (PExpand (Pbuiltin _ _ _)) => + match body with + | nil => true + | _ => false + end + | _ => true + end. + +Definition wf_bblockb (body: list basic) (exit: option control) := + (non_empty_bblockb body exit) && (builtin_aloneb body exit). + +(** A bblock is well-formed if he contains at least one instruction, + and if there is a builtin then it must be alone in this bblock. *) + +Record bblock := mk_bblock { + header: list label; + body: list basic; + exit: option control; + correct: Is_true (wf_bblockb body exit) +}. + +(* FIXME? redundant with definition in Machblock *) +Definition length_opt {A} (o: option A) : nat := + match o with + | Some o => 1 + | None => 0 + end. + +(** The notion of size induces the notion of "valid" code address given by [find_bblock] + The result is in Z to be compatible with operations on PC. + + WARNING: this notion of size is not the same than in Machblock ! + We ignore labels here... + +*) +Definition size (b:bblock): Z := Z.of_nat (length (body b) + length_opt (exit b)). + +Definition bblocks := list bblock. + +Fixpoint size_blocks (l: bblocks): Z := + match l with + | nil => 0 + | b :: l => + (size b) + (size_blocks l) + end + . + +Record function : Type := mkfunction { fn_sig: signature; fn_blocks: bblocks }. +Definition fundef := AST.fundef function. +Definition program := AST.program fundef unit. + +(** * Parallel Semantics of bundles *) + +(** The semantics operates over a single mapping from registers + (type [preg]) to values. We maintain + the convention that integer registers are mapped to values of + type [Tint] or [Tlong] (in 64 bit mode), + and float registers to values of type [Tsingle] or [Tfloat]. *) + +Definition regset := Pregmap.t val. + +Definition genv := Genv.t fundef unit. + +Notation "a # b" := (a b) (at level 1, only parsing) : asm. +Notation "a # b <- c" := (Pregmap.set b c a) (at level 1, b at next level) : asm. + +Open Scope asm. + +(** *** Undefining some registers *) + +Fixpoint undef_regs (l: list preg) (rs: regset) : regset := + match l with + | nil => rs + | r :: l' => undef_regs l' (rs#r <- Vundef) + end. + + +(** *** Assigning a register pair *) +Definition set_pair (p: rpair preg) (v: val) (rs: regset) : regset := + match p with + | One r => rs#r <- v + | Twolong rhi rlo => rs#rhi <- (Val.hiword v) #rlo <- (Val.loword v) + end. + + +(** *** Assigning the result of a builtin *) + +Fixpoint set_res (res: builtin_res preg) (v: val) (rs: regset) : regset := + match res with + | BR r => rs#r <- v + | BR_none => rs + | BR_splitlong hi lo => set_res lo (Val.loword v) (set_res hi (Val.hiword v) rs) + end. + +Local Open Scope asm. + +Section RELSEM. + +Variable ge: genv. + +(** The parallel semantics on bundles is purely small-step and defined as a relation + from the current state (a register set + a memory state) to either [Next rs' m'] + where [rs'] and [m'] are the updated register set and memory state after execution + of the instruction at [rs#PC], or [Stuck] if the processor is stuck. + + The parallel semantics of each instructions handles two states in input: + - the actual input state of the bundle which is only read + - and the other on which every "write" is performed: + it represents a temporary "writes" buffer, from which the final state + of the bundle is computed. + + NB: the sequential semantics defined in [Asmblock] is derived + from the parallel semantics of each instruction by identifying + the read state and the write state. + +*) + +Inductive outcome: Type := + | Next (rs:regset) (m:mem) + | Stuck +. + +(** *** Arithmetic Expressions (including comparisons) *) + +Inductive signedness: Type := Signed | Unsigned. + +Inductive intsize: Type := Int | Long. + +Definition itest_for_cmp (c: comparison) (s: signedness) := + match c, s with + | Cne, Signed => ITne + | Ceq, Signed => ITeq + | Clt, Signed => ITlt + | Cge, Signed => ITge + | Cle, Signed => ITle + | Cgt, Signed => ITgt + | Cne, Unsigned => ITneu + | Ceq, Unsigned => ITequ + | Clt, Unsigned => ITltu + | Cge, Unsigned => ITgeu + | Cle, Unsigned => ITleu + | Cgt, Unsigned => ITgtu + end. + +Inductive oporder_ftest := + | Normal (ft: ftest) + | Reversed (ft: ftest) +. + +Definition ftest_for_cmp (c: comparison) := + match c with + | Ceq => Normal FToeq + | Cne => Normal FTune + | Clt => Normal FTolt + | Cle => Reversed FToge + | Cgt => Reversed FTolt + | Cge => Normal FToge + end. + +Definition notftest_for_cmp (c: comparison) := + match c with + | Ceq => Normal FTune + | Cne => Normal FToeq + | Clt => Normal FTuge + | Cle => Reversed FTult + | Cgt => Reversed FTuge + | Cge => Normal FTult + end. + +(* **** CoMPare Signed Words to Zero *) +Definition btest_for_cmpswz (c: comparison) := + match c with + | Cne => BTwnez + | Ceq => BTweqz + | Clt => BTwltz + | Cge => BTwgez + | Cle => BTwlez + | Cgt => BTwgtz + end. + +(* **** CoMPare Signed Doubles to Zero *) +Definition btest_for_cmpsdz (c: comparison) := + match c with + | Cne => BTdnez + | Ceq => BTdeqz + | Clt => BTdltz + | Cge => BTdgez + | Cle => BTdlez + | Cgt => BTdgtz + end. + +Definition cmp_for_btest (bt: btest) := + match bt with + | BTwnez => (Some Cne, Int) + | BTweqz => (Some Ceq, Int) + | BTwltz => (Some Clt, Int) + | BTwgez => (Some Cge, Int) + | BTwlez => (Some Cle, Int) + | BTwgtz => (Some Cgt, Int) + + | BTdnez => (Some Cne, Long) + | BTdeqz => (Some Ceq, Long) + | BTdltz => (Some Clt, Long) + | BTdgez => (Some Cge, Long) + | BTdlez => (Some Cle, Long) + | BTdgtz => (Some Cgt, Long) + end. + +Definition cmpu_for_btest (bt: btest) := + match bt with + | BTwnez => (Some Cne, Int) + | BTweqz => (Some Ceq, Int) + | BTdnez => (Some Cne, Long) + | BTdeqz => (Some Ceq, Long) + | _ => (None, Int) + end. + + +(** **** Comparing integers *) +Definition compare_int (t: itest) (v1 v2: val): val := + match t with + | ITne => Val.cmp Cne v1 v2 + | ITeq => Val.cmp Ceq v1 v2 + | ITlt => Val.cmp Clt v1 v2 + | ITge => Val.cmp Cge v1 v2 + | ITle => Val.cmp Cle v1 v2 + | ITgt => Val.cmp Cgt v1 v2 + | ITneu => Val.mxcmpu Cne v1 v2 + | ITequ => Val.mxcmpu Ceq v1 v2 + | ITltu => Val.mxcmpu Clt v1 v2 + | ITgeu => Val.mxcmpu Cge v1 v2 + | ITleu => Val.mxcmpu Cle v1 v2 + | ITgtu => Val.mxcmpu Cgt v1 v2 + end. + +Definition compare_long (t: itest) (v1 v2: val): val := + let res := match t with + | ITne => Val.cmpl Cne v1 v2 + | ITeq => Val.cmpl Ceq v1 v2 + | ITlt => Val.cmpl Clt v1 v2 + | ITge => Val.cmpl Cge v1 v2 + | ITle => Val.cmpl Cle v1 v2 + | ITgt => Val.cmpl Cgt v1 v2 + | ITneu => Some (Val.mxcmplu Cne v1 v2) + | ITequ => Some (Val.mxcmplu Ceq v1 v2) + | ITltu => Some (Val.mxcmplu Clt v1 v2) + | ITgeu => Some (Val.mxcmplu Cge v1 v2) + | ITleu => Some (Val.mxcmplu Cle v1 v2) + | ITgtu => Some (Val.mxcmplu Cgt v1 v2) + end in + match res with + | Some v => v + | None => Vundef + end + . + +Definition compare_single (t: ftest) (v1 v2: val): val := + match t with + | FTone | FTueq => Vundef (* unused *) + | FToeq => Val.cmpfs Ceq v1 v2 + | FTune => Val.cmpfs Cne v1 v2 + | FTolt => Val.cmpfs Clt v1 v2 + | FTuge => Val.notbool (Val.cmpfs Clt v1 v2) + | FToge => Val.cmpfs Cge v1 v2 + | FTult => Val.notbool (Val.cmpfs Cge v1 v2) + end. + +Definition compare_float (t: ftest) (v1 v2: val): val := + match t with + | FTone | FTueq => Vundef (* unused *) + | FToeq => Val.cmpf Ceq v1 v2 + | FTune => Val.cmpf Cne v1 v2 + | FTolt => Val.cmpf Clt v1 v2 + | FTuge => Val.notbool (Val.cmpf Clt v1 v2) + | FToge => Val.cmpf Cge v1 v2 + | FTult => Val.notbool (Val.cmpf Cge v1 v2) + end. + +(** **** Arithmetic evaluators *) + +Definition arith_eval_r n := + match n with + | Ploadsymbol s ofs => Genv.symbol_address ge s ofs + end +. + +Definition arith_eval_rr n v := + match n with + | Pmv => v + | Pnegw => Val.neg v + | Pnegl => Val.negl v + | Pcvtl2w => Val.loword v + | Psxwd => Val.longofint v + | Pzxwd => Val.longofintu v + | Pextfz stop start => extfz stop start v + | Pextfs stop start => extfs stop start v + | Pextfzl stop start => extfzl stop start v + | Pextfsl stop start => extfsl stop start v + | Pfnegd => Val.negf v + | Pfnegw => Val.negfs v + | Pfabsd => Val.absf v + | Pfabsw => Val.absfs v + | Pfinvw => ExtValues.invfs v + | Pfnarrowdw => Val.singleoffloat v + | Pfwidenlwd => Val.floatofsingle v + | Pfloatwrnsz => Val.maketotal (Val.singleofint v) + | Pfloatuwrnsz => Val.maketotal (Val.singleofintu v) + | Pfloatudrnsz => Val.maketotal (Val.floatoflongu v) + | Pfloatdrnsz => Val.maketotal (Val.floatoflong v) + | Pfixedwrzz => Val.maketotal (Val.intofsingle v) + | Pfixeduwrzz => Val.maketotal (Val.intuofsingle v) + | Pfixeddrzz => Val.maketotal (Val.longoffloat v) + | Pfixedudrzz => Val.maketotal (Val.longuoffloat v) + | Pfixeddrzz_i32 => Val.maketotal (Val.intoffloat v) + | Pfixedudrzz_i32 => Val.maketotal (Val.intuoffloat v) + end. + +Definition arith_eval_ri32 n i := + match n with + | Pmake => Vint i + end. + +Definition arith_eval_ri64 n i := + match n with + | Pmakel => Vlong i + end. + +Definition arith_eval_rf32 n i := + match n with + | Pmakefs => Vsingle i + end. + +Definition arith_eval_rf64 n i := + match n with + | Pmakef => Vfloat i + end. + +Definition arith_eval_rrr n v1 v2 := + match n with + | Pcompw c => compare_int c v1 v2 + | Pcompl c => compare_long c v1 v2 + | Pfcompw c => compare_single c v1 v2 + | Pfcompl c => compare_float c v1 v2 + + | Paddw => Val.add v1 v2 + | Psubw => Val.sub v1 v2 + | Pmulw => Val.mul v1 v2 + | Pandw => Val.and v1 v2 + | Pnandw => Val.notint (Val.and v1 v2) + | Porw => Val.or v1 v2 + | Pnorw => Val.notint (Val.or v1 v2) + | Pxorw => Val.xor v1 v2 + | Pnxorw => Val.notint (Val.xor v1 v2) + | Pandnw => Val.and (Val.notint v1) v2 + | Pornw => Val.or (Val.notint v1) v2 + | Psrlw => Val.shru v1 v2 + | Psraw => Val.shr v1 v2 + | Psllw => Val.shl v1 v2 + | Psrxw => ExtValues.val_shrx v1 v2 + + | Paddl => Val.addl v1 v2 + | Psubl => Val.subl v1 v2 + | Pandl => Val.andl v1 v2 + | Pnandl => Val.notl (Val.andl v1 v2) + | Porl => Val.orl v1 v2 + | Pnorl => Val.notl (Val.orl v1 v2) + | Pxorl => Val.xorl v1 v2 + | Pnxorl => Val.notl (Val.xorl v1 v2) + | Pandnl => Val.andl (Val.notl v1) v2 + | Pornl => Val.orl (Val.notl v1) v2 + | Pmull => Val.mull v1 v2 + | Pslll => Val.shll v1 v2 + | Psrll => Val.shrlu v1 v2 + | Psral => Val.shrl v1 v2 + | Psrxl => ExtValues.val_shrxl v1 v2 + + | Pfaddd => Val.addf v1 v2 + | Pfaddw => Val.addfs v1 v2 + | Pfsbfd => Val.subf v1 v2 + | Pfsbfw => Val.subfs v1 v2 + | Pfmuld => Val.mulf v1 v2 + | Pfmulw => Val.mulfs v1 v2 + + | Pfmind => ExtValues.minf v1 v2 + | Pfminw => ExtValues.minfs v1 v2 + | Pfmaxd => ExtValues.maxf v1 v2 + | Pfmaxw => ExtValues.maxfs v1 v2 + + | Paddxw shift => ExtValues.addx (int_of_shift1_4 shift) v1 v2 + | Paddxl shift => ExtValues.addxl (int_of_shift1_4 shift) v1 v2 + + | Prevsubxw shift => ExtValues.revsubx (int_of_shift1_4 shift) v1 v2 + | Prevsubxl shift => ExtValues.revsubxl (int_of_shift1_4 shift) v1 v2 + end. + +Definition arith_eval_rri32 n v i := + match n with + | Pcompiw c => compare_int c v (Vint i) + | Paddiw => Val.add v (Vint i) + | Prevsubiw => Val.sub (Vint i) v + | Pmuliw => Val.mul v (Vint i) + | Pandiw => Val.and v (Vint i) + | Pnandiw => Val.notint (Val.and v (Vint i)) + | Poriw => Val.or v (Vint i) + | Pnoriw => Val.notint (Val.or v (Vint i)) + | Pxoriw => Val.xor v (Vint i) + | Pnxoriw => Val.notint (Val.xor v (Vint i)) + | Pandniw => Val.and (Val.notint v) (Vint i) + | Porniw => Val.or (Val.notint v) (Vint i) + | Psraiw => Val.shr v (Vint i) + | Psrxiw => ExtValues.val_shrx v (Vint i) + | Psrliw => Val.shru v (Vint i) + | Pslliw => Val.shl v (Vint i) + | Proriw => Val.ror v (Vint i) + | Psllil => Val.shll v (Vint i) + | Psrxil => ExtValues.val_shrxl v (Vint i) + | Psrlil => Val.shrlu v (Vint i) + | Psrail => Val.shrl v (Vint i) + | Paddxiw shift => ExtValues.addx (int_of_shift1_4 shift) v (Vint i) + | Prevsubxiw shift => ExtValues.revsubx (int_of_shift1_4 shift) v (Vint i) + end. + +Definition arith_eval_rri64 n v i := + match n with + | Pcompil c => compare_long c v (Vlong i) + | Paddil => Val.addl v (Vlong i) + | Prevsubil => Val.subl (Vlong i) v + | Pmulil => Val.mull v (Vlong i) + | Pandil => Val.andl v (Vlong i) + | Pnandil => Val.notl (Val.andl v (Vlong i)) + | Poril => Val.orl v (Vlong i) + | Pnoril => Val.notl (Val.orl v (Vlong i)) + | Pxoril => Val.xorl v (Vlong i) + | Pnxoril => Val.notl (Val.xorl v (Vlong i)) + | Pandnil => Val.andl (Val.notl v) (Vlong i) + | Pornil => Val.orl (Val.notl v) (Vlong i) + | Paddxil shift => ExtValues.addxl (int_of_shift1_4 shift) v (Vlong i) + | Prevsubxil shift => ExtValues.revsubxl (int_of_shift1_4 shift) v (Vlong i) + end. + +Definition cmove bt v1 v2 v3 := + match cmp_for_btest bt with + | (Some c, Int) => + match Val.cmp_bool c v2 (Vint Int.zero) with + | None => Vundef + | Some true => v3 + | Some false => v1 + end + | (Some c, Long) => + match Val.cmpl_bool c v2 (Vlong Int64.zero) with + | None => Vundef + | Some true => v3 + | Some false => v1 + end + | (None, _) => Vundef + end. + +Definition cmoveu bt v1 v2 v3 := + match cmpu_for_btest bt with + | (Some c, Int) => + match Val.mxcmpu_bool c v2 (Vint Int.zero) with + | None => Vundef + | Some true => v3 + | Some false => v1 + end + | (Some c, Long) => + match Val.mxcmplu_bool c v2 (Vlong Int64.zero) with + | None => Vundef + | Some true => v3 + | Some false => v1 + end + | (None, _) => Vundef + end. + +Definition arith_eval_arrr n v1 v2 v3 := + match n with + | Pmaddw => Val.add v1 (Val.mul v2 v3) + | Pmaddl => Val.addl v1 (Val.mull v2 v3) + | Pmsubw => Val.sub v1 (Val.mul v2 v3) + | Pmsubl => Val.subl v1 (Val.mull v2 v3) + | Pcmove bt => cmove bt v1 v2 v3 + | Pcmoveu bt => cmoveu bt v1 v2 v3 + | Pfmaddfw => ExtValues.fmaddfs v1 v2 v3 + | Pfmaddfl => ExtValues.fmaddf v1 v2 v3 + | Pfmsubfw => ExtValues.fmsubfs v1 v2 v3 + | Pfmsubfl => ExtValues.fmsubf v1 v2 v3 + end. + +Definition arith_eval_arr n v1 v2 := + match n with + | Pinsf stop start => ExtValues.insf stop start v1 v2 + | Pinsfl stop start => ExtValues.insfl stop start v1 v2 + end. + +Definition arith_eval_arri32 n v1 v2 v3 := + match n with + | Pmaddiw => Val.add v1 (Val.mul v2 (Vint v3)) + | Pcmoveiw bt => cmove bt v1 v2 (Vint v3) + | Pcmoveuiw bt => cmoveu bt v1 v2 (Vint v3) + end. + +Definition arith_eval_arri64 n v1 v2 v3 := + match n with + | Pmaddil => Val.addl v1 (Val.mull v2 (Vlong v3)) + | Pcmoveil bt => cmove bt v1 v2 (Vlong v3) + | Pcmoveuil bt => cmoveu bt v1 v2 (Vlong v3) + end. + +Definition parexec_arith_instr (ai: ar_instruction) (rsr rsw: regset): regset := + match ai with + | PArithR n d => rsw#d <- (arith_eval_r n) + + | PArithRR n d s => rsw#d <- (arith_eval_rr n rsr#s) + + | PArithRI32 n d i => rsw#d <- (arith_eval_ri32 n i) + | PArithRI64 n d i => rsw#d <- (arith_eval_ri64 n i) + | PArithRF32 n d i => rsw#d <- (arith_eval_rf32 n i) + | PArithRF64 n d i => rsw#d <- (arith_eval_rf64 n i) + + | PArithRRR n d s1 s2 => rsw#d <- (arith_eval_rrr n rsr#s1 rsr#s2) + | PArithRRI32 n d s i => rsw#d <- (arith_eval_rri32 n rsr#s i) + | PArithRRI64 n d s i => rsw#d <- (arith_eval_rri64 n rsr#s i) + + | PArithARRR n d s1 s2 => rsw#d <- (arith_eval_arrr n rsr#d rsr#s1 rsr#s2) + | PArithARR n d s => rsw#d <- (arith_eval_arr n rsr#d rsr#s) + | PArithARRI32 n d s i => rsw#d <- (arith_eval_arri32 n rsr#d rsr#s i) + | PArithARRI64 n d s i => rsw#d <- (arith_eval_arri64 n rsr#d rsr#s i) + end. + +Definition eval_offset (ofs: offset) : res ptrofs := OK ofs. + +(** *** load/store instructions *) + +Definition parexec_incorrect_load trap d rsw mw := + match trap with + | TRAP => Stuck + | NOTRAP => Next (rsw#d <- Vundef) mw + end. + +Definition parexec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a: ireg) (ofs: offset) := + match (eval_offset ofs) with + | OK ptr => match Mem.loadv chunk mr (Val.offset_ptr (rsr a) ptr) with + | None => parexec_incorrect_load trap d rsw mw + | Some v => Next (rsw#d <- v) mw + end + | _ => Stuck + end. + +Definition parexec_load_q_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_q) (a: ireg) (ofs: offset) := + let (rd0, rd1) := gpreg_q_expand d in +(* NB: By construction of [gpreg_q], register rd0 and rd1 are distinct, thus, the register writes cannot overlap. + But we do not need to express/prove this in the semantics. +*) + match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) ofs) with + | None => Stuck + | Some v0 => + match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) with + | None => Stuck + | Some v1 => Next (rsw#rd0 <- v0 #rd1 <- v1) mw + end + end. + +Definition parexec_load_o_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_o) (a: ireg) (ofs: offset) := + match gpreg_o_expand d with + | (rd0, rd1, rd2, rd3) => +(* NB: By construction of [gpreg_o], the four destination registers are pairwise distinct, thus, the register writes cannot overlap. + But we do not need to express/prove this in the semantics. +*) + match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) ofs) with + | None => Stuck + | Some v0 => + match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) with + | None => Stuck + | Some v1 => + match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 16))) with + | None => Stuck + | Some v2 => + match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 24))) with + | None => Stuck + | Some v3 => + Next (rsw#rd0 <- v0 #rd1 <- v1 #rd2 <- v2 #rd3 <- v3) mw + end + end + end + end + end. + +Definition parexec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) := + match Mem.loadv chunk mr (Val.addl (rsr a) (rsr ro)) with + | None => parexec_incorrect_load trap d rsw mw + | Some v => Next (rsw#d <- v) mw + end. + +Definition parexec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) := + match Mem.loadv chunk mr (Val.addl (rsr a) (Val.shll (rsr ro) (scale_of_chunk chunk))) with + | None => parexec_incorrect_load trap d rsw mw + | Some v => Next (rsw#d <- v) mw + end. + +Definition parexec_store_offset (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (s a: ireg) (ofs: offset) := + match (eval_offset ofs) with + | OK ptr => match Mem.storev chunk mr (Val.offset_ptr (rsr a) ptr) (rsr s) with + | None => Stuck + | Some m' => Next rsw m' + end + | _ => Stuck + end. + +Definition parexec_store_reg + (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (s a ro: ireg) := + match Mem.storev chunk mr (Val.addl (rsr a) (rsr ro)) (rsr s) with + | None => Stuck + | Some m' => Next rsw m' + end. + +Definition parexec_store_regxs (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (s a ro: ireg) := + match Mem.storev chunk mr (Val.addl (rsr a) (Val.shll (rsr ro) (scale_of_chunk chunk))) (rsr s) with + | None => Stuck + | Some m' => Next rsw m' + end. + +Definition parexec_store_q_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_q) (a: ireg) (ofs: offset) := + let (s0, s1) := gpreg_q_expand s in + match Mem.storev Many64 mr (Val.offset_ptr (rsr a) ofs) (rsr s0) with + | None => Stuck + | Some m1 => + match Mem.storev Many64 m1 (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) (rsr s1) with + | None => Stuck + | Some m2 => Next rsw m2 + end + end. + +Definition parexec_store_o_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_o) (a: ireg) (ofs: offset) := + match gpreg_o_expand s with + | (s0, s1, s2, s3) => + match Mem.storev Many64 mr (Val.offset_ptr (rsr a) ofs) (rsr s0) with + | None => Stuck + | Some m1 => + match Mem.storev Many64 m1 (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) (rsr s1) with + | None => Stuck + | Some m2 => + match Mem.storev Many64 m2 (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 16))) (rsr s2) with + | None => Stuck + | Some m3 => + match Mem.storev Many64 m3 (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 24))) (rsr s3) with + | None => Stuck + | Some m4 => Next rsw m4 + end + end + end + end + end. + + +Definition load_chunk n := + match n with + | Plb => Mint8signed + | Plbu => Mint8unsigned + | Plh => Mint16signed + | Plhu => Mint16unsigned + | Plw => Mint32 + | Plw_a => Many32 + | Pld => Mint64 + | Pld_a => Many64 + | Pfls => Mfloat32 + | Pfld => Mfloat64 + end. + +Definition store_chunk n := + match n with + | Psb => Mint8unsigned + | Psh => Mint16unsigned + | Psw => Mint32 + | Psw_a => Many32 + | Psd => Mint64 + | Psd_a => Many64 + | Pfss => Mfloat32 + | Pfsd => Mfloat64 + end. + +(** ** Basic (instruction) step *) + +Definition bstep (bi: basic) (rsr rsw: regset) (mr mw: mem) := + match bi with + | PArith ai => Next (parexec_arith_instr ai rsr rsw) mw + + | PLoad (PLoadRRO trap n d a ofs) => parexec_load_offset trap (load_chunk n) rsr rsw mr mw d a ofs + | PLoad (PLoadRRR trap n d a ro) => parexec_load_reg trap (load_chunk n) rsr rsw mr mw d a ro + | PLoad (PLoadRRRXS trap n d a ro) => parexec_load_regxs trap (load_chunk n) rsr rsw mr mw d a ro + | PLoad (PLoadQRRO d a ofs) => + parexec_load_q_offset rsr rsw mr mw d a ofs + | PLoad (PLoadORRO d a ofs) => + parexec_load_o_offset rsr rsw mr mw d a ofs + + | PStoreRRO n s a ofs => parexec_store_offset (store_chunk n) rsr rsw mr mw s a ofs + | PStoreRRR n s a ro => parexec_store_reg (store_chunk n) rsr rsw mr mw s a ro + | PStoreRRRXS n s a ro => parexec_store_regxs (store_chunk n) rsr rsw mr mw s a ro + | PStoreQRRO s a ofs => + parexec_store_q_offset rsr rsw mr mw s a ofs + | PStoreORRO s a ofs => + parexec_store_o_offset rsr rsw mr mw s a ofs + + | Pallocframe sz pos => + let (mw, stk) := Mem.alloc mr 0 sz in + let sp := (Vptr stk Ptrofs.zero) in + match Mem.storev Mptr mw (Val.offset_ptr sp pos) rsr#SP with + | None => Stuck + | Some mw => Next (rsw #FP <- (rsr SP) #SP <- sp #RTMP <- Vundef) mw + end + + | Pfreeframe sz pos => + match Mem.loadv Mptr mr (Val.offset_ptr rsr#SP pos) with + | None => Stuck + | Some v => + match rsr SP with + | Vptr stk ofs => + match Mem.free mr stk 0 sz with + | None => Stuck + | Some mw => Next (rsw#SP <- v #RTMP <- Vundef) mw + end + | _ => Stuck + end + end + | Pget rd ra => + match ra with + | RA => Next (rsw#rd <- (rsr#ra)) mw + | _ => Stuck + end + | Pset ra rd => + match ra with + | RA => Next (rsw#ra <- (rsr#rd)) mw + | _ => Stuck + end + | Pnop => Next rsw mw +end. + +(** *** parexec with writes-in-order *) +Fixpoint parexec_wio_body (body: list basic) (rsr rsw: regset) (mr mw: mem) := + match body with + | nil => Next rsw mw + | bi::body' => + match bstep bi rsr rsw mr mw with + | Next rsw mw => parexec_wio_body body' rsr rsw mr mw + | Stuck => Stuck + end + end. + +(* TODO: redundant w.r.t Machblock ?? *) +Lemma in_dec (lbl: label) (l: list label): { List.In lbl l } + { ~(List.In lbl l) }. +Proof. + apply List.in_dec. + apply Pos.eq_dec. +Qed. + + + +(* Note: copy-paste from Machblock *) +Definition is_label (lbl: label) (bb: bblock) : bool := + if in_dec lbl (header bb) then true else false. + +Lemma is_label_correct_true lbl bb: + List.In lbl (header bb) <-> is_label lbl bb = true. +Proof. + unfold is_label; destruct (in_dec lbl (header bb)); cbn; intuition. +Qed. + +Lemma is_label_correct_false lbl bb: + ~(List.In lbl (header bb)) <-> is_label lbl bb = false. +Proof. + unfold is_label; destruct (in_dec lbl (header bb)); cbn; intuition. +Qed. + + + +(** **** convert a label into a position in the code *) +Fixpoint label_pos (lbl: label) (pos: Z) (lb: bblocks) {struct lb} : option Z := + match lb with + | nil => None + | b :: lb' => if is_label lbl b then Some pos else label_pos lbl (pos + (size b)) lb' + end. + +Definition par_goto_label (f: function) (lbl: label) (rsr rsw: regset) (mw: mem) := + match label_pos lbl 0 (fn_blocks f) with + | None => Stuck + | Some pos => + match rsr#PC with + | Vptr b ofs => Next (rsw#PC <- (Vptr b (Ptrofs.repr pos))) mw + | _ => Stuck + end + end. + +(** **** Parallel Evaluation of a branch *) + +(** Warning: PC is assumed to be already pointing on the next bundle ! *) + +Definition par_eval_branch (f: function) (l: label) (rsr rsw: regset) (mw: mem) (res: option bool) := + match res with + | Some true => par_goto_label f l rsr rsw mw + | Some false => Next (rsw # PC <- (rsr PC)) mw + | None => Stuck + end. + + +(** **** Parallel execution of a control-flow instruction *) + +(** As above: PC is assumed to be incremented on the next block before the control-flow instruction +*) + +Definition parexec_control (f: function) (oc: option control) (rsr rsw: regset) (mw: mem) := + match oc with + | None => Next (rsw#PC <- (rsr#PC)) mw + | Some ic => (**r Branch Control Unit instructions *) + match ic with + | Pret => + Next (rsw#PC <- (rsr#RA)) mw + | Pcall s => + Next (rsw#RA <- (rsr#PC) #PC <- (Genv.symbol_address ge s Ptrofs.zero)) mw + | Picall r => + Next (rsw#RA <- (rsr#PC) #PC <- (rsr#r)) mw + | Pjumptable r tbl => + match rsr#r with + | Vint n => + match list_nth_z tbl (Int.unsigned n) with + | None => Stuck + | Some lbl => par_goto_label f lbl rsr (rsw #GPR62 <- Vundef #GPR63 <- Vundef) mw + end + | _ => Stuck + end + | Pgoto s => + Next (rsw#PC <- (Genv.symbol_address ge s Ptrofs.zero)) mw + | Pigoto r => + Next (rsw#PC <- (rsr#r)) mw + | Pj_l l => + par_goto_label f l rsr rsw mw + | Pcb bt r l => + match cmp_for_btest bt with + | (Some c, Int) => par_eval_branch f l rsr rsw mw (Val.cmp_bool c rsr#r (Vint (Int.repr 0))) + | (Some c, Long) => par_eval_branch f l rsr rsw mw (Val.cmpl_bool c rsr#r (Vlong (Int64.repr 0))) + | (None, _) => Stuck + end + | Pcbu bt r l => + match cmpu_for_btest bt with + | (Some c, Int) => par_eval_branch f l rsr rsw mw (Val.mxcmpu_bool c rsr#r (Vint (Int.repr 0))) + | (Some c, Long) => par_eval_branch f l rsr rsw mw (Val.mxcmplu_bool c rsr#r (Vlong (Int64.repr 0))) + | (None, _) => Stuck + end + (**r Pseudo-instructions *) + | Pbuiltin ef args res => + Stuck (**r treated specially below *) + end + end. + + +Definition incrPC size_b (rs: regset) := + rs#PC <- (Val.offset_ptr rs#PC size_b). + +(** parallel execution of the exit instruction of a bundle *) +Definition estep (f: function) ext size_b (rsr rsw: regset) (mw: mem) + := parexec_control f ext (incrPC size_b rsr) rsw mw. + +Definition parexec_wio f bdy ext size_b (rs: regset) (m: mem): outcome := + match parexec_wio_body bdy rs rs m m with + | Next rsw mw => estep f ext size_b rs rsw mw + | Stuck => Stuck + end. + +(** *** non-deterministic (out-of-order writes) parallel execution of bundles *) +Definition parexec_bblock (f: function) (bundle: bblock) (rs: regset) (m: mem) (o: outcome): Prop := + exists bdy1 bdy2, Permutation (bdy1++bdy2) (body bundle) /\ + o=match parexec_wio f bdy1 (exit bundle) (Ptrofs.repr (size bundle)) rs m with + | Next rsw mw => parexec_wio_body bdy2 rs rsw m mw + | Stuck => Stuck + end. + +(** *** deterministic parallel (out-of-order writes) execution of bundles *) +Definition det_parexec (f: function) (bundle: bblock) (rs: regset) (m: mem) rs' m': Prop := + forall o, parexec_bblock f bundle rs m o -> o = Next rs' m'. + + +(** *** Translation of the LTL/Linear/Mach view of machine registers to + the assembly view. Note that no LTL register maps to [X31]. This + register is reserved as temporary, to be used by the generated RV32G + code. *) + + +(** **** Undefine all registers except SP and callee-save registers *) + +Definition undef_caller_save_regs (rs: regset) : regset := + fun r => + if preg_eq r SP + || In_dec preg_eq r (List.map preg_of (List.filter is_callee_save all_mregs)) + then rs r + else Vundef. + +(** **** Extract the values of the arguments of an external call. + We exploit the calling conventions from module [Conventions], except that + we use assembly registers instead of locations. *) + +Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop := + | extcall_arg_reg: forall r, + extcall_arg rs m (R r) (rs (preg_of r)) + | extcall_arg_stack: forall ofs ty bofs v, + bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> + Mem.loadv (chunk_of_type ty) m + (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v -> + extcall_arg rs m (S Outgoing ofs ty) v. + +Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop := + | extcall_arg_one: forall l v, + extcall_arg rs m l v -> + extcall_arg_pair rs m (One l) v + | extcall_arg_twolong: forall hi lo vhi vlo, + extcall_arg rs m hi vhi -> + extcall_arg rs m lo vlo -> + extcall_arg_pair rs m (Twolong hi lo) (Val.longofwords vhi vlo). + +Definition extcall_arguments + (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop := + list_forall2 (extcall_arg_pair rs m) (loc_arguments sg) args. + + +Definition loc_external_result (sg: signature) : rpair preg := + map_rpair preg_of (loc_result sg). + + +(** ** Looking up bblocks in a code sequence by position. *) +Fixpoint find_bblock (pos: Z) (lb: bblocks) {struct lb} : option bblock := + match lb with + | nil => None + | b :: il => + if zlt pos 0 then None (*r NOTE: It is impossible to branch inside a block *) + else if zeq pos 0 then Some b + else find_bblock (pos - (size b)) il + end. + + +Inductive state: Type := + | State: regset -> mem -> state. + +Definition nextblock (b:bblock) (rs: regset) := + incrPC (Ptrofs.repr (size b)) rs. + +Inductive step: state -> trace -> state -> Prop := + | exec_step_internal: + forall b ofs f bundle rs m rs' m', + rs PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal f) -> + find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle -> + det_parexec f bundle rs m rs' m' -> + step (State rs m) E0 (State rs' m') + | exec_step_builtin: + forall b ofs f ef args res rs m vargs t vres rs' m' bi, + rs PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal f) -> + find_bblock (Ptrofs.unsigned ofs) f.(fn_blocks) = Some bi -> + exit bi = Some (PExpand (Pbuiltin ef args res)) -> + eval_builtin_args ge rs (rs SP) m args vargs -> + external_call ef ge vargs m t vres m' -> + rs' = nextblock bi + (set_res res vres + (undef_regs (map preg_of (destroyed_by_builtin ef)) + (rs#RTMP <- Vundef))) -> + step (State rs m) t (State rs' m') + | exec_step_external: + forall b ef args res rs m t rs' m', + rs PC = Vptr b Ptrofs.zero -> + Genv.find_funct_ptr ge b = Some (External ef) -> + external_call ef ge args m t res m' -> + extcall_arguments rs m (ef_sig ef) args -> + rs' = (set_pair (loc_external_result (ef_sig ef) ) res (undef_caller_save_regs rs))#PC <- (rs RA) -> + step (State rs m) t (State rs' m') + . + + +(** parallel in-order writes execution of bundles *) +Definition parexec_wio_bblock (f: function) (b: bblock) (rs: regset) (m: mem): outcome := + parexec_wio f (body b) (exit b) (Ptrofs.repr (size b)) rs m. + + +Lemma parexec_bblock_write_in_order f b rs m: + parexec_bblock f b rs m (parexec_wio_bblock f b rs m). +Proof. + exists (body b). exists nil. + constructor 1. + - rewrite app_nil_r; auto. + - unfold parexec_wio_bblock. + destruct (parexec_wio f _ _ _); cbn; auto. +Qed. + + +Local Hint Resolve parexec_bblock_write_in_order: core. + +Lemma det_parexec_write_in_order f b rs m rs' m': + det_parexec f b rs m rs' m' -> parexec_wio_bblock f b rs m = Next rs' m'. +Proof. + unfold det_parexec; auto. +Qed. + +End RELSEM. + +(** ** Execution of whole programs. *) + +Inductive initial_state (p: program): state -> Prop := + | initial_state_intro: forall m0, + let ge := Genv.globalenv p in + let rs0 := + (Pregmap.init Vundef) + # PC <- (Genv.symbol_address ge p.(prog_main) Ptrofs.zero) + # SP <- Vnullptr + # RA <- Vnullptr in + Genv.init_mem p = Some m0 -> + initial_state p (State rs0 m0). + +Inductive final_state: state -> int -> Prop := + | final_state_intro: forall rs m r, + rs PC = Vnullptr -> + rs GPR0 = Vint r -> + final_state (State rs m) r. + +Definition semantics (p: program) := + Semantics step (initial_state p) final_state (Genv.globalenv p). + +Remark extcall_arguments_determ: + forall rs m sg args1 args2, + extcall_arguments rs m sg args1 -> extcall_arguments rs m sg args2 -> args1 = args2. +Proof. + intros until m. + assert (A: forall l v1 v2, + extcall_arg rs m l v1 -> extcall_arg rs m l v2 -> v1 = v2). + { intros. inv H; inv H0; congruence. } + assert (B: forall p v1 v2, + extcall_arg_pair rs m p v1 -> extcall_arg_pair rs m p v2 -> v1 = v2). + { intros. inv H; inv H0. + eapply A; eauto. + f_equal; eapply A; eauto. } + assert (C: forall ll vl1, list_forall2 (extcall_arg_pair rs m) ll vl1 -> + forall vl2, list_forall2 (extcall_arg_pair rs m) ll vl2 -> vl1 = vl2). + { + induction 1; intros vl2 EA; inv EA. + auto. + f_equal; eauto. } + intros. eapply C; eauto. +Qed. + +Lemma semantics_determinate p: determinate (semantics p). +Proof. +Ltac Equalities := + match goal with + | [ H1: ?a = ?b, H2: ?a = ?c |- _ ] => + rewrite H1 in H2; inv H2; Equalities + | _ => idtac + end. +Ltac Det_WIO X := + match goal with + | [ H: det_parexec _ _ _ _ _ _ _ |- _ ] => + exploit det_parexec_write_in_order; [ eapply H | idtac]; clear H; intro X + | _ => idtac + end. + intros; constructor; cbn. +- (* determ *) intros s t1 s1 t2 s2 H H0. inv H; Det_WIO X1; + inv H0; Det_WIO X2; Equalities. + + split. constructor. auto. + + unfold parexec_wio_bblock, parexec_wio in X1. destruct (parexec_wio_body _ _ _ _ _ _); try discriminate. + rewrite H8 in X1. discriminate. + + unfold parexec_wio_bblock, parexec_wio in X2. destruct (parexec_wio_body _ _ _ _ _ _); try discriminate. + rewrite H4 in X2. discriminate. + + assert (vargs0 = vargs) by (eapply eval_builtin_args_determ; eauto). subst vargs0. + exploit external_call_determ. eexact H6. eexact H13. intros [A B]. + split. auto. intros. destruct B; auto. subst. auto. + + assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0. + exploit external_call_determ. eexact H3. eexact H8. intros [A B]. + split. auto. intros. destruct B; auto. subst. auto. +- (* trace length *) + red; intros. inv H; cbn. + lia. + eapply external_call_trace_length; eauto. + eapply external_call_trace_length; eauto. +- (* initial states *) + intros s1 s2 H H0; inv H; inv H0; f_equal; congruence. +- (* final no step *) + intros s r H; assert (NOTNULL: forall b ofs, Vnullptr <> Vptr b ofs). + { intros; unfold Vnullptr; destruct Archi.ptr64; congruence. } + inv H. red; intros; red; intros. + inv H; rewrite H0 in *; eelim NOTNULL; eauto. +- (* final states *) + intros s r1 r2 H H0; inv H; inv H0. congruence. +Qed. diff --git a/kvx/BTL_SEsimplify.v b/kvx/BTL_SEsimplify.v new file mode 120000 index 00000000..f190e6d5 --- /dev/null +++ b/kvx/BTL_SEsimplify.v @@ -0,0 +1 @@ +../aarch64/BTL_SEsimplify.v
\ No newline at end of file diff --git a/kvx/Builtins1.v b/kvx/Builtins1.v new file mode 100644 index 00000000..441345bf --- /dev/null +++ b/kvx/Builtins1.v @@ -0,0 +1,61 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Platform-specific built-in functions *) + +Require Import String Coqlib. +Require Import AST Integers Floats Values ExtFloats. +Require Import Builtins0. + +Inductive platform_builtin : Type := +| BI_fmin +| BI_fmax +| BI_fminf +| BI_fmaxf +| BI_fma +| BI_fmaf. + +Local Open Scope string_scope. + +Definition platform_builtin_table : list (string * platform_builtin) := + ("__builtin_fmin", BI_fmin) + :: ("__builtin_fmax", BI_fmax) + :: ("__builtin_fminf", BI_fminf) + :: ("__builtin_fmaxf", BI_fmaxf) + :: ("__builtin_fma", BI_fma) + :: ("__builtin_fmaf", BI_fmaf) + :: nil. + +Definition platform_builtin_sig (b: platform_builtin) : signature := + match b with + | BI_fmin | BI_fmax => + mksignature (Tfloat :: Tfloat :: nil) Tfloat cc_default + | BI_fminf | BI_fmaxf => + mksignature (Tsingle :: Tsingle :: nil) Tsingle cc_default + | BI_fma => + mksignature (Tfloat :: Tfloat :: Tfloat :: nil) Tfloat cc_default + | BI_fmaf => + mksignature (Tsingle :: Tsingle :: Tsingle :: nil) Tsingle cc_default + end. + +Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (sig_res (platform_builtin_sig b)) := + match b with + | BI_fmin => mkbuiltin_n2t Tfloat Tfloat Tfloat ExtFloat.min + | BI_fmax => mkbuiltin_n2t Tfloat Tfloat Tfloat ExtFloat.max + | BI_fminf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.min + | BI_fmaxf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.max + | BI_fma => mkbuiltin_n3t Tfloat Tfloat Tfloat Tfloat Float.fma + | BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma + end. diff --git a/kvx/CBuiltins.ml b/kvx/CBuiltins.ml new file mode 100644 index 00000000..7398e0f4 --- /dev/null +++ b/kvx/CBuiltins.ml @@ -0,0 +1,145 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* Processor-dependent builtin C functions *) + +open C + +let builtins = { + builtin_typedefs = [ + "__builtin_va_list", TPtr(TVoid [], []) + ]; + (* The builtin list is inspired from the GCC file builtin_kvx.h *) + builtin_functions = [ (* Some builtins are commented out because their opcode is not present (yet?) *) + (* BCU Instructions *) + "__builtin_kvx_await", (TVoid [], [], false); (* DONE *) + "__builtin_kvx_barrier", (TVoid [], [], false); (* DONE *) + "__builtin_kvx_doze", (TVoid [], [], false); (* opcode not supported in assembly, not in documentation *) + "__builtin_kvx_wfxl", (TVoid [], [TInt(IUChar, []); TInt(ILongLong, [])], false); (* DONE *) + "__builtin_kvx_wfxm", (TVoid [], [TInt(IUChar, []); TInt(ILongLong, [])], false); (* DONE *) + "__builtin_kvx_sleep", (TVoid [], [], false); (* DONE *) + "__builtin_kvx_stop", (TVoid [], [], false); (* DONE *) + "__builtin_kvx_syncgroup", (TVoid [], [TInt(IULongLong, [])], false); + "__builtin_kvx_tlbread", (TVoid [], [], false); + "__builtin_kvx_tlbwrite", (TVoid [], [], false); + "__builtin_kvx_tlbprobe", (TVoid [], [], false); + "__builtin_kvx_tlbdinval", (TVoid [], [], false); + "__builtin_kvx_tlbiinval", (TVoid [], [], false); + + "__builtin_kvx_get", (TInt(IULongLong, []), [TInt(IInt, [])], false); (* DONE *) + "__builtin_kvx_set", (TVoid [], [TInt(IInt, []); TInt(IULongLong, [])], false); (* DONE *) + + (* LSU Instructions *) + (* acswapd and acswapw done using headers and assembly *) +(* "__builtin_kvx_afaddd", (TInt(IULongLong, []), [TPtr(TVoid [], []); TInt(ILongLong, [])], false); + "__builtin_kvx_afaddw", (TInt(IUInt, []), [TPtr(TVoid [], []); TInt(IInt, [])], false); *) (* see #157 *) + "__builtin_kvx_alclrd", (TInt(IULongLong, []), [TPtr(TVoid [], [])], false); (* DONE *) + "__builtin_kvx_alclrw", (TInt(IUInt, []), [TPtr(TVoid [], [])], false); (* DONE *) + "__builtin_kvx_dinval", (TVoid [], [], false); (* DONE *) + "__builtin_kvx_dinvall", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE *) + "__builtin_kvx_dtouchl", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE *) + "__builtin_kvx_dzerol", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE *) + "__builtin_kvx_fence", (TVoid [], [], false); (* DONE *) + "__builtin_kvx_iinval", (TVoid [], [], false); (* DONE *) + "__builtin_kvx_iinvals", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE *) + "__builtin_kvx_itouchl", (TVoid [], [TPtr(TVoid [], [])], false); (* DONE [not supported by assembler but in documentation] *) + "__builtin_kvx_lbsu", (TInt(IChar, []), [TPtr(TVoid [], [])], false); + "__builtin_kvx_lbzu", (TInt(IUChar, []), [TPtr(TVoid [], [])], false); + "__builtin_kvx_ldu", (TInt(IULongLong, []), [TPtr(TVoid [], [])], false); (* DONE *) + "__builtin_kvx_lhsu", (TInt(IShort, []), [TPtr(TVoid [], [])], false); + "__builtin_kvx_lhzu", (TInt(IUShort, []), [TPtr(TVoid [], [])], false); + "__builtin_kvx_lwzu", (TInt(IUInt, []), [TPtr(TVoid [], [])], false); + + (* ALU Instructions *) + (* "__builtin_kvx_addhp", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *) + (* "__builtin_kvx_adds", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *) + (* "__builtin_kvx_bwlu", (TInt(IUInt, []), + [TInt(IUInt, []); TInt(IUInt, []); TInt(IUInt, []); TInt(IUInt, []); TInt(IUShort, [])], false); *) + (* "__builtin_kvx_bwluhp", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, []); TInt(IUInt, [])], false); *) + (* "__builtin_kvx_bwluwp", (TInt(IULongLong, []), + [TInt(IULongLong, []); TInt(IULongLong, []); TInt(IUInt, [])], false); *) + (* "__builtin_kvx_cbs", (TInt(IInt, []), [TInt(IUInt, [])], false); *) + (* "__builtin_kvx_cbsdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *) + (* "__builtin_kvx_clz", (TInt(IInt, []), [TInt(IUInt, [])], false); *) + "__builtin_kvx_clzw", (TInt(IInt, []), [TInt(IUInt, [])], false); + "__builtin_clzll", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); + "__builtin_kvx_ctzw", (TInt(IInt, []), [TInt(IUInt, [])], false); + "__builtin_ctzll", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); + (* "__builtin_kvx_clzdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *) + (* "__builtin_kvx_cmove", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, []); TInt(IInt, [])], false); *) + (* "__builtin_kvx_ctz", (TInt(IInt, []), [TInt(IUInt, [])], false); *) + "__builtin_kvx_ctzw", (TInt(IInt, []), [TInt(IUInt, [])], false); + "__builtin_kvx_ctzd", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); + (* "__builtin_kvx_ctzdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *) + (* "__builtin_kvx_extfz", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, []); TInt(IUInt, [])], false); *) + (* "__builtin_kvx_landhp", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, []); TInt(IInt, [])], false); *) + (* "__builtin_kvx_sat", (TInt(IInt, []), [TInt(IInt, []); TInt(IUChar, [])], false); *) + "__builtin_kvx_satd", (TInt(ILongLong, []), [TInt(ILongLong, []); TInt(IUChar, [])], false); + (* "__builtin_kvx_sbfhp", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *) + "__builtin_kvx_sbmm8", (TInt(IULongLong, []), [TInt(IULongLong, []); TInt(IULongLong, [])], false); + "__builtin_kvx_sbmmt8", (TInt(IULongLong, []), [TInt(IULongLong, []); TInt(IULongLong, [])], false); + (* "__builtin_kvx_sllhps", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false); *) + (* "__builtin_kvx_srahps", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false); *) + (* "__builtin_kvx_stsu", (TInt(IUInt, []), [TInt(IUInt, []); TInt(IUInt, [])], false); *) + "__builtin_kvx_stsud", (TInt(IULongLong, []), [TInt(IULongLong, []); TInt(IULongLong, [])], false); + + + (* Synchronization *) +(* "__builtin_fence", + (TVoid [], [], false); *) +(* (* Float arithmetic *) + "__builtin_fmadd", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false); + "__builtin_fmsub", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false); + "__builtin_fnmadd", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false); + "__builtin_fnmsub", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false); *) + "__builtin_fabsf", + (TFloat(FFloat, []), + [TFloat(FFloat, [])], false); + "__builtin_fmax", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, [])], false); + "__builtin_fmin", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, [])], false); + "__builtin_fmaxf", + (TFloat(FFloat, []), + [TFloat(FFloat, []); TFloat(FFloat, [])], false); + "__builtin_fminf", + (TFloat(FFloat, []), + [TFloat(FFloat, []); TFloat(FFloat, [])], false); + "__builtin_fma", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false); + "__builtin_fmaf", + (TFloat(FFloat, []), + [TFloat(FFloat, []); TFloat(FFloat, []); TFloat(FFloat, [])], false); +] +} + +let va_list_type = TPtr(TVoid [], []) (* to check! *) +let size_va_list = if Archi.ptr64 then 8 else 4 +let va_list_scalar = true + +(* Expand memory references inside extended asm statements. Used in C2C. *) + +let asm_mem_argument arg = Printf.sprintf "0(%s)" arg diff --git a/kvx/CSE2deps.v b/kvx/CSE2deps.v new file mode 100644 index 00000000..c0deacf0 --- /dev/null +++ b/kvx/CSE2deps.v @@ -0,0 +1,35 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* David Monniaux CNRS, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs Events. +Require Import Op. + + +Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw := + (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk)) + && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk)) + && ((ofsw + size_chunk chunkw <=? ofsr) || + (ofsr + size_chunk chunkr <=? ofsw)). + +Definition may_overlap chunk addr args chunk' addr' args' := + match addr, addr', args, args' with + | (Aindexed ofs), (Aindexed ofs'), + (base :: nil), (base' :: nil) => + if peq base base' + then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk) + else true + | (Ainstack ofs), (Ainstack ofs'), _, _ => + negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk) + | _, _, _, _ => true + end. diff --git a/kvx/CSE2depsproof.v b/kvx/CSE2depsproof.v new file mode 100644 index 00000000..a5f7b317 --- /dev/null +++ b/kvx/CSE2depsproof.v @@ -0,0 +1,146 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* David Monniaux CNRS, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. +Require Import AST Linking. +Require Import Memory Registers Op RTL Maps. + +Require Import Globalenvs Values. +Require Import Linking Values Memory Globalenvs Events Smallstep. +Require Import Registers Op RTL. +Require Import CSE2 CSE2deps. +Require Import Lia. + +Lemma ptrofs_size : + Ptrofs.wordsize = (if Archi.ptr64 then 64 else 32)%nat. +Proof. + unfold Ptrofs.wordsize. + unfold Wordsize_Ptrofs.wordsize. + trivial. +Qed. + +Lemma ptrofs_modulus : + Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296. +Proof. + unfold Ptrofs.modulus. + rewrite ptrofs_size. + destruct Archi.ptr64; reflexivity. +Qed. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Section MEMORY_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + Variable base : val. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : ptrofs. + Hypothesis ADDRW : eval_addressing genv sp + (Aindexed ofsw) (base :: nil) = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aindexed ofsr) (base :: nil) = Some addrr. + + Lemma load_store_away1 : + forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr + \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + cbn in *. + inv ADDRR. + inv ADDRW. + destruct base; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW]; + rewrite OFSW). + all: try rewrite ptrofs_modulus in *. + all: destruct Archi.ptr64. + + all: intuition lia. + Qed. + + Theorem load_store_away : + can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End MEMORY_WRITE. +End SOUNDNESS. + + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Lemma may_overlap_sound: + forall m m' : mem, + forall chunk addr args chunk' addr' args' v a a' rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (eval_addressing genv sp addr' (rs ## args')) = Some a' -> + (may_overlap chunk addr args chunk' addr' args') = false -> + (Mem.storev chunk m a v) = Some m' -> + (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a'). +Proof. + intros until rs. + intros ADDR ADDR' OVERLAP STORE. + destruct addr; destruct addr'; try discriminate. + - (* Aindexed / Aindexed *) + destruct args as [ | base [ | ]]. 1,3: discriminate. + destruct args' as [ | base' [ | ]]. 1,3: discriminate. + cbn in OVERLAP. + destruct (peq base base'). 2: discriminate. + subst base'. + destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP. + 2: discriminate. + cbn in *. + eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. + - (* Ainstack / Ainstack *) + destruct args. 2: discriminate. + destruct args'. 2: discriminate. + cbn in OVERLAP. + destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP. + 2: discriminate. + cbn in *. + eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. +Qed. + +End SOUNDNESS. diff --git a/kvx/Chunks.v b/kvx/Chunks.v new file mode 100644 index 00000000..86d4f0ac --- /dev/null +++ b/kvx/Chunks.v @@ -0,0 +1,36 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import AST. +Require Import Values. +Require Import Integers. +Require Import Coq.ZArith.BinIntDef. +Require Import BinNums. + +Local Open Scope Z_scope. + +Definition zscale_of_chunk (chunk: memory_chunk) : Z := + match chunk with + | Mint8signed => 0 + | Mint8unsigned => 0 + | Mint16signed => 1 + | Mint16unsigned => 1 + | Mint32 => 2 + | Mint64 => 3 + | Mfloat32 => 2 + | Mfloat64 => 3 + | Many32 => 2 + | Many64 => 3 + end. +Definition scale_of_chunk chunk := Vint (Int.repr (zscale_of_chunk chunk)). diff --git a/kvx/CombineOp.v b/kvx/CombineOp.v new file mode 100644 index 00000000..ff1db3cd --- /dev/null +++ b/kvx/CombineOp.v @@ -0,0 +1,141 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Recognition of combined operations, addressing modes and conditions + during the [CSE] phase. *) + +Require Import Coqlib. +Require Import AST. +Require Import Integers. +Require Import Op. +Require Import CSEdomain. + +Section COMBINE. + +Variable get: valnum -> option rhs. + +Function combine_compimm_ne_0 (x: valnum) : option(condition * list valnum) := + match get x with + | Some(Op (Ocmp c) ys) => Some (c, ys) + | _ => None + end. + +Function combine_compimm_eq_0 (x: valnum) : option(condition * list valnum) := + match get x with + | Some(Op (Ocmp c) ys) => Some (negate_condition c, ys) + | _ => None + end. + +Function combine_compimm_eq_1 (x: valnum) : option(condition * list valnum) := + match get x with + | Some(Op (Ocmp c) ys) => Some (c, ys) + | _ => None + end. + +Function combine_compimm_ne_1 (x: valnum) : option(condition * list valnum) := + match get x with + | Some(Op (Ocmp c) ys) => Some (negate_condition c, ys) + | _ => None + end. + +Function combine_cond (cond: condition) (args: list valnum) : option(condition * list valnum) := + match cond, args with + | Ccompimm Cne n, x::nil => + if Int.eq_dec n Int.zero then combine_compimm_ne_0 x + else if Int.eq_dec n Int.one then combine_compimm_ne_1 x + else None + | Ccompimm Ceq n, x::nil => + if Int.eq_dec n Int.zero then combine_compimm_eq_0 x + else if Int.eq_dec n Int.one then combine_compimm_eq_1 x + else None + | Ccompuimm Cne n, x::nil => + if Int.eq_dec n Int.zero then combine_compimm_ne_0 x + else if Int.eq_dec n Int.one then combine_compimm_ne_1 x + else None + | Ccompuimm Ceq n, x::nil => + if Int.eq_dec n Int.zero then combine_compimm_eq_0 x + else if Int.eq_dec n Int.one then combine_compimm_eq_1 x + else None + | _, _ => None + end. + +Function combine_addr (addr: addressing) (args: list valnum) : option(addressing * list valnum) := + match addr, args with + | Aindexed n, x::nil => + match get x with + | Some(Op (Oaddimm m) ys) => + if Archi.ptr64 then None else Some(Aindexed (Ptrofs.add (Ptrofs.of_int m) n), ys) + | Some(Op (Oaddlimm m) ys) => + if Archi.ptr64 then Some(Aindexed (Ptrofs.add (Ptrofs.of_int64 m) n), ys) else None + | _ => None + end + | _, _ => None + end. + +Function combine_op (op: operation) (args: list valnum) : option(operation * list valnum) := + match op, args with + | Oaddimm n, x :: nil => + match get x with + | Some(Op (Oaddimm m) ys) => Some(Oaddimm (Int.add m n), ys) + | _ => None + end + | Oandimm n, x :: nil => + match get x with + | Some(Op (Oandimm m) ys) => + Some(let p := Int.and m n in + if Int.eq p m then (Omove, x :: nil) else (Oandimm p, ys)) + | _ => None + end + | Oorimm n, x :: nil => + match get x with + | Some(Op (Oorimm m) ys) => Some(Oorimm (Int.or m n), ys) + | _ => None + end + | Oxorimm n, x :: nil => + match get x with + | Some(Op (Oxorimm m) ys) => Some(Oxorimm (Int.xor m n), ys) + | _ => None + end + | Oaddlimm n, x :: nil => + match get x with + | Some(Op (Oaddlimm m) ys) => Some(Oaddlimm (Int64.add m n), ys) + | _ => None + end + | Oandlimm n, x :: nil => + match get x with + | Some(Op (Oandlimm m) ys) => + Some(let p := Int64.and m n in + if Int64.eq p m then (Omove, x :: nil) else (Oandlimm p, ys)) + | _ => None + end + | Oorlimm n, x :: nil => + match get x with + | Some(Op (Oorlimm m) ys) => Some(Oorlimm (Int64.or m n), ys) + | _ => None + end + | Oxorlimm n, x :: nil => + match get x with + | Some(Op (Oxorlimm m) ys) => Some(Oxorlimm (Int64.xor m n), ys) + | _ => None + end + | Ocmp cond, _ => + match combine_cond cond args with + | Some(cond', args') => Some(Ocmp cond', args') + | None => None + end + | _, _ => None + end. + +End COMBINE. diff --git a/kvx/CombineOpproof.v b/kvx/CombineOpproof.v new file mode 100644 index 00000000..5dffc565 --- /dev/null +++ b/kvx/CombineOpproof.v @@ -0,0 +1,176 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Recognition of combined operations, addressing modes and conditions + during the [CSE] phase. *) + +Require Import FunInd. +Require Import Coqlib. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Memory. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import CSEdomain. +Require Import CombineOp. + +Section COMBINE. + +Variable ge: genv. +Variable sp: val. +Variable m: mem. +Variable get: valnum -> option rhs. +Variable valu: valnum -> val. +Hypothesis get_sound: forall v rhs, get v = Some rhs -> rhs_eval_to valu ge sp m rhs (valu v). + +Lemma get_op_sound: + forall v op vl, get v = Some (Op op vl) -> eval_operation ge sp op (map valu vl) m = Some (valu v). +Proof. + intros. exploit get_sound; eauto. intros REV; inv REV; auto. +Qed. + +Ltac UseGetSound := + match goal with + | [ H: get _ = Some _ |- _ ] => + let x := fresh "EQ" in (generalize (get_op_sound _ _ _ H); intros x; cbn in x; FuncInv) + end. + +Lemma combine_compimm_ne_0_sound: + forall x cond args, + combine_compimm_ne_0 get x = Some(cond, args) -> + eval_condition cond (map valu args) m = Val.cmp_bool Cne (valu x) (Vint Int.zero) /\ + eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Cne (valu x) (Vint Int.zero). +Proof. + intros until args. functional induction (combine_compimm_ne_0 get x); intros EQ; inv EQ. + (* of cmp *) + UseGetSound. rewrite <- H. + destruct (eval_condition cond (map valu args) m); cbn; auto. destruct b; auto. +Qed. + +Lemma combine_compimm_eq_0_sound: + forall x cond args, + combine_compimm_eq_0 get x = Some(cond, args) -> + eval_condition cond (map valu args) m = Val.cmp_bool Ceq (valu x) (Vint Int.zero) /\ + eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Ceq (valu x) (Vint Int.zero). +Proof. + intros until args. functional induction (combine_compimm_eq_0 get x); intros EQ; inv EQ. + (* of cmp *) + UseGetSound. rewrite <- H. + rewrite eval_negate_condition. + destruct (eval_condition c (map valu args) m); cbn; auto. destruct b; auto. +Qed. + +Lemma combine_compimm_eq_1_sound: + forall x cond args, + combine_compimm_eq_1 get x = Some(cond, args) -> + eval_condition cond (map valu args) m = Val.cmp_bool Ceq (valu x) (Vint Int.one) /\ + eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Ceq (valu x) (Vint Int.one). +Proof. + intros until args. functional induction (combine_compimm_eq_1 get x); intros EQ; inv EQ. + (* of cmp *) + UseGetSound. rewrite <- H. + destruct (eval_condition cond (map valu args) m); cbn; auto. destruct b; auto. +Qed. + +Lemma combine_compimm_ne_1_sound: + forall x cond args, + combine_compimm_ne_1 get x = Some(cond, args) -> + eval_condition cond (map valu args) m = Val.cmp_bool Cne (valu x) (Vint Int.one) /\ + eval_condition cond (map valu args) m = Val.cmpu_bool (Mem.valid_pointer m) Cne (valu x) (Vint Int.one). +Proof. + intros until args. functional induction (combine_compimm_ne_1 get x); intros EQ; inv EQ. + (* of cmp *) + UseGetSound. rewrite <- H. + rewrite eval_negate_condition. + destruct (eval_condition c (map valu args) m); cbn; auto. destruct b; auto. +Qed. + +Theorem combine_cond_sound: + forall cond args cond' args', + combine_cond get cond args = Some(cond', args') -> + eval_condition cond' (map valu args') m = eval_condition cond (map valu args) m. +Proof. + intros. functional inversion H; subst. + (* compimm ne zero *) + - cbn; eapply combine_compimm_ne_0_sound; eauto. + (* compimm ne one *) + - cbn; eapply combine_compimm_ne_1_sound; eauto. + (* compimm eq zero *) + - cbn; eapply combine_compimm_eq_0_sound; eauto. + (* compimm eq one *) + - cbn; eapply combine_compimm_eq_1_sound; eauto. + (* compuimm ne zero *) + - cbn; eapply combine_compimm_ne_0_sound; eauto. + (* compuimm ne one *) + - cbn; eapply combine_compimm_ne_1_sound; eauto. + (* compuimm eq zero *) + - cbn; eapply combine_compimm_eq_0_sound; eauto. + (* compuimm eq one *) + - cbn; eapply combine_compimm_eq_1_sound; eauto. +Qed. + +Theorem combine_addr_sound: + forall addr args addr' args', + combine_addr get addr args = Some(addr', args') -> + eval_addressing ge sp addr' (map valu args') = eval_addressing ge sp addr (map valu args). +Proof. + intros. functional inversion H; subst. +- (* indexed - addimm *) + UseGetSound. cbn. rewrite <- H0. destruct v; auto. cbn; rewrite H7; cbn. + rewrite Ptrofs.add_assoc. auto. +- (* indexed - addimml *) + UseGetSound. cbn. rewrite <- H0. destruct v; auto. cbn; rewrite H7; cbn. + rewrite Ptrofs.add_assoc. auto. +Qed. + +Theorem combine_op_sound: + forall op args op' args', + combine_op get op args = Some(op', args') -> + eval_operation ge sp op' (map valu args') m = eval_operation ge sp op (map valu args) m. +Proof. + intros. functional inversion H; subst. + (* addimm - addimm *) + - UseGetSound. FuncInv. cbn. + rewrite <- H0. rewrite Val.add_assoc. auto. + (* andimm - andimm *) + - UseGetSound; cbn. + generalize (Int.eq_spec p m0); rewrite H7; intros. + rewrite <- H0. rewrite Val.and_assoc. cbn. fold p. rewrite H1. auto. + - UseGetSound; cbn. + rewrite <- H0. rewrite Val.and_assoc. auto. + (* orimm - orimm *) + - UseGetSound. cbn. rewrite <- H0. rewrite Val.or_assoc. auto. + (* xorimm - xorimm *) + - UseGetSound. cbn. rewrite <- H0. rewrite Val.xor_assoc. auto. + (* addlimm - addlimm *) + - UseGetSound. FuncInv. cbn. + rewrite <- H0. rewrite Val.addl_assoc. auto. + (* andlimm - andlimm *) + - UseGetSound; cbn. + generalize (Int64.eq_spec p m0); rewrite H7; intros. + rewrite <- H0. rewrite Val.andl_assoc. cbn. fold p. rewrite H1. auto. + - UseGetSound; cbn. + rewrite <- H0. rewrite Val.andl_assoc. auto. + (* orlimm - orlimm *) + - UseGetSound. cbn. rewrite <- H0. rewrite Val.orl_assoc. auto. + (* xorlimm - xorlimm *) + - UseGetSound. cbn. rewrite <- H0. rewrite Val.xorl_assoc. auto. + (* cmp *) + - cbn. decEq; decEq. eapply combine_cond_sound; eauto. +Qed. + +End COMBINE. diff --git a/kvx/ConstpropOp.vp b/kvx/ConstpropOp.vp new file mode 100644 index 00000000..2a428020 --- /dev/null +++ b/kvx/ConstpropOp.vp @@ -0,0 +1,312 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Strength reduction for operators and conditions. + This is the machine-dependent part of [Constprop]. *) + +Require Archi. +Require Import Coqlib Compopts. +Require Import AST Integers Floats. +Require Import Op Registers. +Require Import ValueDomain. + +(** * Converting known values to constants *) + +Definition const_for_result (a: aval) : option operation := + match a with + | I n => Some(Ointconst n) + | L n => if Archi.ptr64 then Some(Olongconst n) else None + | F n => if Compopts.generate_float_constants tt then Some(Ofloatconst n) else None + | FS n => if Compopts.generate_float_constants tt then Some(Osingleconst n) else None + | Ptr(Gl id ofs) => Some(Oaddrsymbol id ofs) + | Ptr(Stk ofs) => Some(Oaddrstack ofs) + | _ => None + end. + +(** * Operator strength reduction *) + +(** We now define auxiliary functions for strength reduction of + operators and addressing modes: replacing an operator with a cheaper + one if some of its arguments are statically known. These are again + large pattern-matchings expressed in indirect style. *) + +Nondetfunction cond_strength_reduction + (cond: condition) (args: list reg) (vl: list aval) := + match cond, args, vl with + | Ccomp c, r1 :: r2 :: nil, I n1 :: v2 :: nil => + (Ccompimm (swap_comparison c) n1, r2 :: nil) + | Ccomp c, r1 :: r2 :: nil, v1 :: I n2 :: nil => + (Ccompimm c n2, r1 :: nil) + | Ccompu c, r1 :: r2 :: nil, I n1 :: v2 :: nil => + (Ccompuimm (swap_comparison c) n1, r2 :: nil) + | Ccompu c, r1 :: r2 :: nil, v1 :: I n2 :: nil => + (Ccompuimm c n2, r1 :: nil) + | Ccompl c, r1 :: r2 :: nil, L n1 :: v2 :: nil => + (Ccomplimm (swap_comparison c) n1, r2 :: nil) + | Ccompl c, r1 :: r2 :: nil, v1 :: L n2 :: nil => + (Ccomplimm c n2, r1 :: nil) + | Ccomplu c, r1 :: r2 :: nil, L n1 :: v2 :: nil => + (Ccompluimm (swap_comparison c) n1, r2 :: nil) + | Ccomplu c, r1 :: r2 :: nil, v1 :: L n2 :: nil => + (Ccompluimm c n2, r1 :: nil) + | _, _, _ => + (cond, args) + end. + +Definition make_cmp_base (c: condition) (args: list reg) (vl: list aval) := + let (c', args') := cond_strength_reduction c args vl in (Ocmp c', args'). + +Definition make_cmp_imm_eq (c: condition) (args: list reg) (vl: list aval) + (n: int) (r1: reg) (v1: aval) := + if Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1) then (Omove, r1 :: nil) + else if Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1) then (Oxorimm Int.one, r1 :: nil) + else make_cmp_base c args vl. + +Definition make_cmp_imm_ne (c: condition) (args: list reg) (vl: list aval) + (n: int) (r1: reg) (v1: aval) := + if Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1) then (Omove, r1 :: nil) + else if Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1) then (Oxorimm Int.one, r1 :: nil) + else make_cmp_base c args vl. + +Nondetfunction make_cmp (c: condition) (args: list reg) (vl: list aval) := + match c, args, vl with + | Ccompimm Ceq n, r1 :: nil, v1 :: nil => + make_cmp_imm_eq c args vl n r1 v1 + | Ccompimm Cne n, r1 :: nil, v1 :: nil => + make_cmp_imm_ne c args vl n r1 v1 + | Ccompuimm Ceq n, r1 :: nil, v1 :: nil => + make_cmp_imm_eq c args vl n r1 v1 + | Ccompuimm Cne n, r1 :: nil, v1 :: nil => + make_cmp_imm_ne c args vl n r1 v1 + | _, _, _ => + make_cmp_base c args vl + end. + +Definition make_addimm (n: int) (r: reg) := + if Int.eq n Int.zero + then (Omove, r :: nil) + else (Oaddimm n, r :: nil). + +Definition make_shlimm (n: int) (r1 r2: reg) := + if Int.eq n Int.zero then (Omove, r1 :: nil) + else if Int.ltu n Int.iwordsize then (Oshlimm n, r1 :: nil) + else (Oshl, r1 :: r2 :: nil). + +Definition make_shrimm (n: int) (r1 r2: reg) := + if Int.eq n Int.zero then (Omove, r1 :: nil) + else if Int.ltu n Int.iwordsize then (Oshrimm n, r1 :: nil) + else (Oshr, r1 :: r2 :: nil). + +Definition make_shruimm (n: int) (r1 r2: reg) := + if Int.eq n Int.zero then (Omove, r1 :: nil) + else if Int.ltu n Int.iwordsize then (Oshruimm n, r1 :: nil) + else (Oshru, r1 :: r2 :: nil). + +Definition make_mulimm (n: int) (r1 r2: reg) := + if Int.eq n Int.zero then + (Ointconst Int.zero, nil) + else if Int.eq n Int.one then + (Omove, r1 :: nil) + else + match Int.is_power2 n with + | Some l => (Oshlimm l, r1 :: nil) + | None => (Omul, r1 :: r2 :: nil) + end. + +Definition make_andimm (n: int) (r: reg) (a: aval) := + if Int.eq n Int.zero then (Ointconst Int.zero, nil) + else if Int.eq n Int.mone then (Omove, r :: nil) + else if match a with Uns _ m => Int.eq (Int.zero_ext m (Int.not n)) Int.zero + | _ => false end + then (Omove, r :: nil) + else (Oandimm n, r :: nil). + +Definition make_orimm (n: int) (r: reg) := + if Int.eq n Int.zero then (Omove, r :: nil) + else if Int.eq n Int.mone then (Ointconst Int.mone, nil) + else (Oorimm n, r :: nil). + +Definition make_xorimm (n: int) (r: reg) := + if Int.eq n Int.zero then (Omove, r :: nil) + else (Oxorimm n, r :: nil). + +Definition make_divimm n (r1 r2: reg) := + if Int.eq n Int.one then + (Omove, r1 :: nil) + else + match Int.is_power2 n with + | Some l => if Int.ltu l (Int.repr 31) + then (Oshrximm l, r1 :: nil) + else (Odiv, r1 :: r2 :: nil) + | None => (Odiv, r1 :: r2 :: nil) + end. + +Definition make_divuimm n (r1 r2: reg) := + if Int.eq n Int.one then + (Omove, r1 :: nil) + else + match Int.is_power2 n with + | Some l => (Oshruimm l, r1 :: nil) + | None => (Odivu, r1 :: r2 :: nil) + end. + +Definition make_moduimm n (r1 r2: reg) := + match Int.is_power2 n with + | Some l => (Oandimm (Int.sub n Int.one), r1 :: nil) + | None => (Omodu, r1 :: r2 :: nil) + end. + +Definition make_addlimm (n: int64) (r: reg) := + if Int64.eq n Int64.zero + then (Omove, r :: nil) + else (Oaddlimm n, r :: nil). + +Definition make_shllimm (n: int) (r1 r2: reg) := + if Int.eq n Int.zero then (Omove, r1 :: nil) + else if Int.ltu n Int64.iwordsize' then (Oshllimm n, r1 :: nil) + else (Oshll, r1 :: r2 :: nil). + +Definition make_shrlimm (n: int) (r1 r2: reg) := + if Int.eq n Int.zero then (Omove, r1 :: nil) + else if Int.ltu n Int64.iwordsize' then (Oshrlimm n, r1 :: nil) + else (Oshrl, r1 :: r2 :: nil). + +Definition make_shrluimm (n: int) (r1 r2: reg) := + if Int.eq n Int.zero then (Omove, r1 :: nil) + else if Int.ltu n Int64.iwordsize' then (Oshrluimm n, r1 :: nil) + else (Oshrlu, r1 :: r2 :: nil). + +Definition make_mullimm (n: int64) (r1 r2: reg) := + if Int64.eq n Int64.zero then + (Olongconst Int64.zero, nil) + else if Int64.eq n Int64.one then + (Omove, r1 :: nil) + else + match Int64.is_power2' n with + | Some l => (Oshllimm l, r1 :: nil) + | None => (Omull, r1 :: r2 :: nil) + end. + +Definition make_andlimm (n: int64) (r: reg) (a: aval) := + if Int64.eq n Int64.zero then (Olongconst Int64.zero, nil) + else if Int64.eq n Int64.mone then (Omove, r :: nil) + else (Oandlimm n, r :: nil). + +Definition make_orlimm (n: int64) (r: reg) := + if Int64.eq n Int64.zero then (Omove, r :: nil) + else if Int64.eq n Int64.mone then (Olongconst Int64.mone, nil) + else (Oorlimm n, r :: nil). + +Definition make_xorlimm (n: int64) (r: reg) := + if Int64.eq n Int64.zero then (Omove, r :: nil) + else (Oxorlimm n, r :: nil). + +Definition make_divlimm n (r1 r2: reg) := + match Int64.is_power2' n with + | Some l => if Int.ltu l (Int.repr 63) + then (Oshrxlimm l, r1 :: nil) + else (Odivl, r1 :: r2 :: nil) + | None => (Odivl, r1 :: r2 :: nil) + end. + +Definition make_divluimm n (r1 r2: reg) := + match Int64.is_power2' n with + | Some l => (Oshrluimm l, r1 :: nil) + | None => (Odivlu, r1 :: r2 :: nil) + end. + +Definition make_modluimm n (r1 r2: reg) := + match Int64.is_power2 n with + | Some l => (Oandlimm (Int64.sub n Int64.one), r1 :: nil) + | None => (Omodlu, r1 :: r2 :: nil) + end. + +Definition make_mulfimm (n: float) (r r1 r2: reg) := + if Float.eq_dec n (Float.of_int (Int.repr 2)) + then (Oaddf, r :: r :: nil) + else (Omulf, r1 :: r2 :: nil). + +Definition make_mulfsimm (n: float32) (r r1 r2: reg) := + if Float32.eq_dec n (Float32.of_int (Int.repr 2)) + then (Oaddfs, r :: r :: nil) + else (Omulfs, r1 :: r2 :: nil). + +Definition make_cast8signed (r: reg) (a: aval) := + if vincl a (Sgn Ptop 8) then (Omove, r :: nil) else (Ocast8signed, r :: nil). +Definition make_cast16signed (r: reg) (a: aval) := + if vincl a (Sgn Ptop 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil). + +Nondetfunction op_strength_reduction + (op: operation) (args: list reg) (vl: list aval) := + match op, args, vl with + | Ocast8signed, r1 :: nil, v1 :: nil => make_cast8signed r1 v1 + | Ocast16signed, r1 :: nil, v1 :: nil => make_cast16signed r1 v1 + | Oadd, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_addimm n1 r2 + | Oadd, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm n2 r1 + | Osub, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm (Int.neg n2) r1 + | Omul, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_mulimm n1 r2 r1 + | Omul, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_mulimm n2 r1 r2 + | Odiv, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divimm n2 r1 r2 + | Odivu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divuimm n2 r1 r2 + | Omodu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_moduimm n2 r1 r2 + | Oand, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_andimm n1 r2 v2 + | Oand, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_andimm n2 r1 v1 + | Oandimm n, r1 :: nil, v1 :: nil => make_andimm n r1 v1 + | Oor, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_orimm n1 r2 + | Oor, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_orimm n2 r1 + | Oxor, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_xorimm n1 r2 + | Oxor, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_xorimm n2 r1 + | Oshl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shlimm n2 r1 r2 + | Oshr, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrimm n2 r1 r2 + | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2 + | Oaddl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_addlimm n1 r2 + | Oaddl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm n2 r1 + | Osubl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm (Int64.neg n2) r1 + | Omull, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_mullimm n1 r2 r1 + | Omull, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_mullimm n2 r1 r2 + | Odivl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divlimm n2 r1 r2 + | Odivlu, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divluimm n2 r1 r2 + | Omodlu, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_modluimm n2 r1 r2 + | Oandl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_andlimm n1 r2 v2 + | Oandl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_andlimm n2 r1 v1 + | Oandlimm n, r1 :: nil, v1 :: nil => make_andlimm n r1 v1 + | Oorl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_orlimm n1 r2 + | Oorl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_orlimm n2 r1 + | Oxorl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_xorlimm n1 r2 + | Oxorl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_xorlimm n2 r1 + | Oshll, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shllimm n2 r1 r2 + | Oshrl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrlimm n2 r1 r2 + | Oshrlu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrluimm n2 r1 r2 + | Ocmp c, args, vl => make_cmp c args vl + | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2 + | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2 + | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2 + | Omulfs, r1 :: r2 :: nil, FS n1 :: v2 :: nil => make_mulfsimm n1 r2 r1 r2 + | _, _, _ => (op, args) + end. + +Nondetfunction addr_strength_reduction + (addr: addressing) (args: list reg) (vl: list aval) := + match addr, args, vl with + | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil => + if (orb (Archi.pic_code tt) (negb (Compopts.optim_globaladdrtmp tt))) + then (addr, args) + else (Aglobal symb (Ptrofs.add n1 n), nil) + | Aindexed n, r1 :: nil, Ptr(Stk n1) :: nil => + (Ainstack (Ptrofs.add n1 n), nil) + | _, _, _ => + (addr, args) + end. + diff --git a/kvx/ConstpropOpproof.v b/kvx/ConstpropOpproof.v new file mode 100644 index 00000000..f67b8a4e --- /dev/null +++ b/kvx/ConstpropOpproof.v @@ -0,0 +1,749 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Correctness proof for operator strength reduction. *) + +Require Import Coqlib Compopts. +Require Import Integers Floats Values Memory Globalenvs Events. +Require Import Op Registers RTL ValueDomain. +Require Import ConstpropOp. +Require Import Lia. + +Section STRENGTH_REDUCTION. + +Variable bc: block_classification. +Variable ge: genv. +Hypothesis GENV: genv_match bc ge. +Variable sp: block. +Hypothesis STACK: bc sp = BCstack. +Variable ae: AE.t. +Variable e: regset. +Variable m: mem. +Hypothesis MATCH: ematch bc e ae. + +Lemma match_G: + forall r id ofs, + AE.get r ae = Ptr(Gl id ofs) -> Val.lessdef e#r (Genv.symbol_address ge id ofs). +Proof. + intros. apply vmatch_ptr_gl with bc; auto. rewrite <- H. apply MATCH. +Qed. + +Lemma match_S: + forall r ofs, + AE.get r ae = Ptr(Stk ofs) -> Val.lessdef e#r (Vptr sp ofs). +Proof. + intros. apply vmatch_ptr_stk with bc; auto. rewrite <- H. apply MATCH. +Qed. + +Ltac InvApproxRegs := + match goal with + | [ H: _ :: _ = _ :: _ |- _ ] => + injection H; clear H; intros; InvApproxRegs + | [ H: ?v = AE.get ?r ae |- _ ] => + generalize (MATCH r); rewrite <- H; clear H; intro; InvApproxRegs + | _ => idtac + end. + +Ltac SimplVM := + match goal with + | [ H: vmatch _ ?v (I ?n) |- _ ] => + let E := fresh in + assert (E: v = Vint n) by (inversion H; auto); + rewrite E in *; clear H; SimplVM + | [ H: vmatch _ ?v (L ?n) |- _ ] => + let E := fresh in + assert (E: v = Vlong n) by (inversion H; auto); + rewrite E in *; clear H; SimplVM + | [ H: vmatch _ ?v (F ?n) |- _ ] => + let E := fresh in + assert (E: v = Vfloat n) by (inversion H; auto); + rewrite E in *; clear H; SimplVM + | [ H: vmatch _ ?v (FS ?n) |- _ ] => + let E := fresh in + assert (E: v = Vsingle n) by (inversion H; auto); + rewrite E in *; clear H; SimplVM + | [ H: vmatch _ ?v (Ptr(Gl ?id ?ofs)) |- _ ] => + let E := fresh in + assert (E: Val.lessdef v (Genv.symbol_address ge id ofs)) by (eapply vmatch_ptr_gl; eauto); + clear H; SimplVM + | [ H: vmatch _ ?v (Ptr(Stk ?ofs)) |- _ ] => + let E := fresh in + assert (E: Val.lessdef v (Vptr sp ofs)) by (eapply vmatch_ptr_stk; eauto); + clear H; SimplVM + | _ => idtac + end. + +Lemma const_for_result_correct: + forall a op v, + const_for_result a = Some op -> + vmatch bc v a -> + exists v', eval_operation ge (Vptr sp Ptrofs.zero) op nil m = Some v' /\ Val.lessdef v v'. +Proof. + unfold const_for_result. generalize Archi.ptr64; intros ptr64; intros. + destruct a; inv H; SimplVM. +- (* integer *) + exists (Vint n); auto. +- (* long *) + destruct ptr64; inv H2. exists (Vlong n); auto. +- (* float *) + destruct (Compopts.generate_float_constants tt); inv H2. exists (Vfloat f); auto. +- (* single *) + destruct (Compopts.generate_float_constants tt); inv H2. exists (Vsingle f); auto. +- (* pointer *) + destruct p; try discriminate; SimplVM. + + (* global *) + inv H2. exists (Genv.symbol_address ge id ofs); auto. + + (* stack *) + inv H2. exists (Vptr sp ofs); split; auto. cbn. rewrite Ptrofs.add_zero_l; auto. +Qed. + +Lemma cond_strength_reduction_correct: + forall cond args vl, + vl = map (fun r => AE.get r ae) args -> + let (cond', args') := cond_strength_reduction cond args vl in + eval_condition cond' e##args' m = eval_condition cond e##args m. +Proof. + intros until vl. unfold cond_strength_reduction. + case (cond_strength_reduction_match cond args vl); cbn; intros; InvApproxRegs; SimplVM. +- apply Val.swap_cmp_bool. +- auto. +- apply Val.swap_cmpu_bool. +- auto. +- apply Val.swap_cmpl_bool. +- auto. +- apply Val.swap_cmplu_bool. +- auto. +- auto. +Qed. + +Lemma make_cmp_base_correct: + forall c args vl, + vl = map (fun r => AE.get r ae) args -> + let (op', args') := make_cmp_base c args vl in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some v + /\ Val.lessdef (Val.of_optbool (eval_condition c e##args m)) v. +Proof. + intros. unfold make_cmp_base. + generalize (cond_strength_reduction_correct c args vl H). + destruct (cond_strength_reduction c args vl) as [c' args']. intros EQ. + econstructor; split. cbn; eauto. rewrite EQ. auto. +Qed. + +Lemma make_cmp_correct: + forall c args vl, + vl = map (fun r => AE.get r ae) args -> + let (op', args') := make_cmp c args vl in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some v + /\ Val.lessdef (Val.of_optbool (eval_condition c e##args m)) v. +Proof. + intros c args vl. + assert (Y: forall r, vincl (AE.get r ae) (Uns Ptop 1) = true -> + e#r = Vundef \/ e#r = Vint Int.zero \/ e#r = Vint Int.one). + { intros. apply vmatch_Uns_1 with bc Ptop. eapply vmatch_ge. eapply vincl_ge; eauto. apply MATCH. } + unfold make_cmp. case (make_cmp_match c args vl); intros. +- unfold make_cmp_imm_eq. + destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1. ++ cbn in H; inv H. InvBooleans. subst n. + exists (e#r1); split; auto. cbn. + exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto. ++ destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0. +* cbn in H; inv H. InvBooleans. subst n. + exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn. + exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto. +* apply make_cmp_base_correct; auto. +- unfold make_cmp_imm_ne. + destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0. ++ cbn in H; inv H. InvBooleans. subst n. + exists (e#r1); split; auto. cbn. + exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto. ++ destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1. +* cbn in H; inv H. InvBooleans. subst n. + exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn. + exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto. +* apply make_cmp_base_correct; auto. +- unfold make_cmp_imm_eq. + destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1. ++ cbn in H; inv H. InvBooleans. subst n. + exists (e#r1); split; auto. cbn. + exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto. ++ destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0. +* cbn in H; inv H. InvBooleans. subst n. + exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn. + exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto. +* apply make_cmp_base_correct; auto. +- unfold make_cmp_imm_ne. + destruct (Int.eq_dec n Int.zero && vincl v1 (Uns Ptop 1)) eqn:E0. ++ cbn in H; inv H. InvBooleans. subst n. + exists (e#r1); split; auto. cbn. + exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto. ++ destruct (Int.eq_dec n Int.one && vincl v1 (Uns Ptop 1)) eqn:E1. +* cbn in H; inv H. InvBooleans. subst n. + exists (Val.xor e#r1 (Vint Int.one)); split; auto. cbn. + exploit Y; eauto. intros [A | [A | A]]; rewrite A; cbn; auto. +* apply make_cmp_base_correct; auto. +- apply make_cmp_base_correct; auto. +Qed. + +Lemma make_addimm_correct: + forall n r, + let (op, args) := make_addimm n r in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.add e#r (Vint n)) v. +Proof. + intros. unfold make_addimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. + subst. exists (e#r); split; auto. + destruct (e#r); cbn; auto; rewrite ?Int.add_zero, ?Ptrofs.add_zero; auto. + exists (Val.add e#r (Vint n)); split; auto. +Qed. + +Lemma make_shlimm_correct: + forall n r1 r2, + e#r2 = Vint n -> + let (op, args) := make_shlimm n r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shl e#r1 (Vint n)) v. +Proof. + intros; unfold make_shlimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. subst. + exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.shl_zero. auto. + destruct (Int.ltu n Int.iwordsize). + econstructor; split. cbn. eauto. auto. + econstructor; split. cbn. eauto. rewrite H; auto. +Qed. + +Lemma make_shrimm_correct: + forall n r1 r2, + e#r2 = Vint n -> + let (op, args) := make_shrimm n r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shr e#r1 (Vint n)) v. +Proof. + intros; unfold make_shrimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. subst. + exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.shr_zero. auto. + destruct (Int.ltu n Int.iwordsize). + econstructor; split. cbn. eauto. auto. + econstructor; split. cbn. eauto. rewrite H; auto. +Qed. + +Lemma make_shruimm_correct: + forall n r1 r2, + e#r2 = Vint n -> + let (op, args) := make_shruimm n r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shru e#r1 (Vint n)) v. +Proof. + intros; unfold make_shruimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. subst. + exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.shru_zero. auto. + destruct (Int.ltu n Int.iwordsize). + econstructor; split. cbn. eauto. auto. + econstructor; split. cbn. eauto. rewrite H; auto. +Qed. + +Lemma make_mulimm_correct: + forall n r1 r2, + e#r2 = Vint n -> + let (op, args) := make_mulimm n r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mul e#r1 (Vint n)) v. +Proof. + intros; unfold make_mulimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. subst. + exists (Vint Int.zero); split; auto. destruct (e#r1); cbn; auto. rewrite Int.mul_zero; auto. + predSpec Int.eq Int.eq_spec n Int.one; intros. subst. + exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int.mul_one; auto. + destruct (Int.is_power2 n) eqn:?; intros. + rewrite (Val.mul_pow2 e#r1 _ _ Heqo). econstructor; split. cbn; eauto. auto. + econstructor; split; eauto. cbn. rewrite H; auto. +Qed. + +Lemma make_divimm_correct: + forall n r1 r2 v, + Val.divs e#r1 e#r2 = Some v -> + e#r2 = Vint n -> + let (op, args) := make_divimm n r1 r2 in + exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. +Proof. + intros; unfold make_divimm. + predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. + destruct (e#r1) eqn:?; + try (rewrite Val.divs_one in H; exists (Vint i); split; cbn; try rewrite Heqv0; auto); + inv H; auto. + destruct (Int.is_power2 n) eqn:?. + destruct (Int.ltu i (Int.repr 31)) eqn:?. + exists v; split; auto. cbn. + erewrite Val.divs_pow2; eauto. reflexivity. congruence. + exists v; auto. + exists v; auto. +Qed. + +Lemma make_divuimm_correct: + forall n r1 r2 v, + Val.divu e#r1 e#r2 = Some v -> + e#r2 = Vint n -> + let (op, args) := make_divuimm n r1 r2 in + exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. +Proof. + intros; unfold make_divuimm. + predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. + destruct (e#r1) eqn:?; + try (rewrite Val.divu_one in H; exists (Vint i); split; cbn; try rewrite Heqv0; auto); + inv H; auto. + destruct (Int.is_power2 n) eqn:?. + econstructor; split. cbn; eauto. + rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto. + exists v; auto. +Qed. + +Lemma make_moduimm_correct: + forall n r1 r2 v, + Val.modu e#r1 e#r2 = Some v -> + e#r2 = Vint n -> + let (op, args) := make_moduimm n r1 r2 in + exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. +Proof. + intros; unfold make_moduimm. + destruct (Int.is_power2 n) eqn:?. + exists v; split; auto. cbn. decEq. eapply Val.modu_pow2; eauto. congruence. + exists v; auto. +Qed. + +Lemma make_andimm_correct: + forall n r x, + vmatch bc e#r x -> + let (op, args) := make_andimm n r x in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.and e#r (Vint n)) v. +Proof. + intros; unfold make_andimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. + subst n. exists (Vint Int.zero); split; auto. destruct (e#r); cbn; auto. rewrite Int.and_zero; auto. + predSpec Int.eq Int.eq_spec n Int.mone; intros. + subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int.and_mone; auto. + destruct (match x with Uns _ k => Int.eq (Int.zero_ext k (Int.not n)) Int.zero + | _ => false end) eqn:UNS. + destruct x; try congruence. + exists (e#r); split; auto. + inv H; auto. cbn. replace (Int.and i n) with i; auto. + generalize (Int.eq_spec (Int.zero_ext n0 (Int.not n)) Int.zero); rewrite UNS; intro EQ. + Int.bit_solve. destruct (zlt i0 n0). + replace (Int.testbit n i0) with (negb (Int.testbit Int.zero i0)). + rewrite Int.bits_zero. cbn. rewrite andb_true_r. auto. + rewrite <- EQ. rewrite Int.bits_zero_ext by lia. rewrite zlt_true by auto. + rewrite Int.bits_not by auto. apply negb_involutive. + rewrite H6 by auto. auto. + econstructor; split; eauto. auto. +Qed. + +Lemma make_orimm_correct: + forall n r, + let (op, args) := make_orimm n r in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.or e#r (Vint n)) v. +Proof. + intros; unfold make_orimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. + subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int.or_zero; auto. + predSpec Int.eq Int.eq_spec n Int.mone; intros. + subst n. exists (Vint Int.mone); split; auto. destruct (e#r); cbn; auto. rewrite Int.or_mone; auto. + econstructor; split; eauto. auto. +Qed. + +Lemma make_xorimm_correct: + forall n r, + let (op, args) := make_xorimm n r in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.xor e#r (Vint n)) v. +Proof. + intros; unfold make_xorimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. + subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int.xor_zero; auto. + predSpec Int.eq Int.eq_spec n Int.mone; intros. + subst n. exists (Val.notint e#r); split; auto. + econstructor; split; eauto. auto. +Qed. + +Lemma make_addlimm_correct: + forall n r, + let (op, args) := make_addlimm n r in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.addl e#r (Vlong n)) v. +Proof. + intros. unfold make_addlimm. + predSpec Int64.eq Int64.eq_spec n Int64.zero; intros. + subst. exists (e#r); split; auto. + destruct (e#r); cbn; auto; rewrite ? Int64.add_zero, ? Ptrofs.add_zero; auto. + exists (Val.addl e#r (Vlong n)); split; auto. +Qed. + +Lemma make_shllimm_correct: + forall n r1 r2, + e#r2 = Vint n -> + let (op, args) := make_shllimm n r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shll e#r1 (Vint n)) v. +Proof. + intros; unfold make_shllimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. subst. + exists (e#r1); split; auto. destruct (e#r1); cbn; auto. + unfold Int64.shl'. rewrite Z.shiftl_0_r, Int64.repr_unsigned. auto. + destruct (Int.ltu n Int64.iwordsize'). + econstructor; split. cbn. eauto. auto. + econstructor; split. cbn. eauto. rewrite H; auto. +Qed. + +Lemma make_shrlimm_correct: + forall n r1 r2, + e#r2 = Vint n -> + let (op, args) := make_shrlimm n r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shrl e#r1 (Vint n)) v. +Proof. + intros; unfold make_shrlimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. subst. + exists (e#r1); split; auto. destruct (e#r1); cbn; auto. + unfold Int64.shr'. rewrite Z.shiftr_0_r, Int64.repr_signed. auto. + destruct (Int.ltu n Int64.iwordsize'). + econstructor; split. cbn. eauto. auto. + econstructor; split. cbn. eauto. rewrite H; auto. +Qed. + +Lemma make_shrluimm_correct: + forall n r1 r2, + e#r2 = Vint n -> + let (op, args) := make_shrluimm n r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.shrlu e#r1 (Vint n)) v. +Proof. + intros; unfold make_shrluimm. + predSpec Int.eq Int.eq_spec n Int.zero; intros. subst. + exists (e#r1); split; auto. destruct (e#r1); cbn; auto. + unfold Int64.shru'. rewrite Z.shiftr_0_r, Int64.repr_unsigned. auto. + destruct (Int.ltu n Int64.iwordsize'). + econstructor; split. cbn. eauto. auto. + econstructor; split. cbn. eauto. rewrite H; auto. +Qed. + +Lemma make_mullimm_correct: + forall n r1 r2, + e#r2 = Vlong n -> + let (op, args) := make_mullimm n r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mull e#r1 (Vlong n)) v. +Proof. + intros; unfold make_mullimm. + predSpec Int64.eq Int64.eq_spec n Int64.zero; intros. subst. + exists (Vlong Int64.zero); split; auto. destruct (e#r1); cbn; auto. rewrite Int64.mul_zero; auto. + predSpec Int64.eq Int64.eq_spec n Int64.one; intros. subst. + exists (e#r1); split; auto. destruct (e#r1); cbn; auto. rewrite Int64.mul_one; auto. + destruct (Int64.is_power2' n) eqn:?; intros. + exists (Val.shll e#r1 (Vint i)); split; auto. + destruct (e#r1); cbn; auto. + erewrite Int64.is_power2'_range by eauto. + erewrite Int64.mul_pow2' by eauto. auto. + econstructor; split; eauto. cbn; rewrite H; auto. +Qed. + +Lemma make_divlimm_correct: + forall n r1 r2 v, + Val.divls e#r1 e#r2 = Some v -> + e#r2 = Vlong n -> + let (op, args) := make_divlimm n r1 r2 in + exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. +Proof. + intros; unfold make_divlimm. + destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?. + rewrite H0 in H. econstructor; split. cbn; eauto. + erewrite Val.divls_pow2; eauto. auto. + exists v; auto. + exists v; auto. +Qed. + +Lemma make_divluimm_correct: + forall n r1 r2 v, + Val.divlu e#r1 e#r2 = Some v -> + e#r2 = Vlong n -> + let (op, args) := make_divluimm n r1 r2 in + exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. +Proof. + intros; unfold make_divluimm. + destruct (Int64.is_power2' n) eqn:?. + econstructor; split. cbn; eauto. + rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. + cbn. + erewrite Int64.is_power2'_range by eauto. + erewrite Int64.divu_pow2' by eauto. auto. + exists v; auto. +Qed. + +Lemma make_modluimm_correct: + forall n r1 r2 v, + Val.modlu e#r1 e#r2 = Some v -> + e#r2 = Vlong n -> + let (op, args) := make_modluimm n r1 r2 in + exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. +Proof. + intros; unfold make_modluimm. + destruct (Int64.is_power2 n) eqn:?. + exists v; split; auto. cbn. decEq. + rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. + cbn. erewrite Int64.modu_and by eauto. auto. + exists v; auto. +Qed. + +Lemma make_andlimm_correct: + forall n r x, + let (op, args) := make_andlimm n r x in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.andl e#r (Vlong n)) v. +Proof. + intros; unfold make_andlimm. + predSpec Int64.eq Int64.eq_spec n Int64.zero; intros. + subst n. exists (Vlong Int64.zero); split; auto. destruct (e#r); cbn; auto. rewrite Int64.and_zero; auto. + predSpec Int64.eq Int64.eq_spec n Int64.mone; intros. + subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int64.and_mone; auto. + econstructor; split; eauto. auto. +Qed. + +Lemma make_orlimm_correct: + forall n r, + let (op, args) := make_orlimm n r in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.orl e#r (Vlong n)) v. +Proof. + intros; unfold make_orlimm. + predSpec Int64.eq Int64.eq_spec n Int64.zero; intros. + subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int64.or_zero; auto. + predSpec Int64.eq Int64.eq_spec n Int64.mone; intros. + subst n. exists (Vlong Int64.mone); split; auto. destruct (e#r); cbn; auto. rewrite Int64.or_mone; auto. + econstructor; split; eauto. auto. +Qed. + +Lemma make_xorlimm_correct: + forall n r, + let (op, args) := make_xorlimm n r in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.xorl e#r (Vlong n)) v. +Proof. + intros; unfold make_xorlimm. + predSpec Int64.eq Int64.eq_spec n Int64.zero; intros. + subst n. exists (e#r); split; auto. destruct (e#r); cbn; auto. rewrite Int64.xor_zero; auto. + predSpec Int64.eq Int64.eq_spec n Int64.mone; intros. + subst n. exists (Val.notl e#r); split; auto. + econstructor; split; eauto. auto. +Qed. + +Lemma make_mulfimm_correct: + forall n r1 r2, + e#r2 = Vfloat n -> + let (op, args) := make_mulfimm n r1 r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v. +Proof. + intros; unfold make_mulfimm. + destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. + cbn. econstructor; split. eauto. rewrite H; subst n. + destruct (e#r1); cbn; auto. rewrite Float.mul2_add; auto. + cbn. econstructor; split; eauto. +Qed. + +Lemma make_mulfimm_correct_2: + forall n r1 r2, + e#r1 = Vfloat n -> + let (op, args) := make_mulfimm n r2 r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v. +Proof. + intros; unfold make_mulfimm. + destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. + cbn. econstructor; split. eauto. rewrite H; subst n. + destruct (e#r2); cbn; auto. rewrite Float.mul2_add; auto. + rewrite Float.mul_commut; auto. + cbn. econstructor; split; eauto. +Qed. + +Lemma make_mulfsimm_correct: + forall n r1 r2, + e#r2 = Vsingle n -> + let (op, args) := make_mulfsimm n r1 r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v. +Proof. + intros; unfold make_mulfsimm. + destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. + cbn. econstructor; split. eauto. rewrite H; subst n. + destruct (e#r1); cbn; auto. rewrite Float32.mul2_add; auto. + cbn. econstructor; split; eauto. +Qed. + +Lemma make_mulfsimm_correct_2: + forall n r1 r2, + e#r1 = Vsingle n -> + let (op, args) := make_mulfsimm n r2 r1 r2 in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v. +Proof. + intros; unfold make_mulfsimm. + destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. + cbn. econstructor; split. eauto. rewrite H; subst n. + destruct (e#r2); cbn; auto. rewrite Float32.mul2_add; auto. + rewrite Float32.mul_commut; auto. + cbn. econstructor; split; eauto. +Qed. + +Lemma make_cast8signed_correct: + forall r x, + vmatch bc e#r x -> + let (op, args) := make_cast8signed r x in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.sign_ext 8 e#r) v. +Proof. + intros; unfold make_cast8signed. destruct (vincl x (Sgn Ptop 8)) eqn:INCL. + exists e#r; split; auto. + assert (V: vmatch bc e#r (Sgn Ptop 8)). + { eapply vmatch_ge; eauto. apply vincl_ge; auto. } + inv V; cbn; auto. rewrite is_sgn_sign_ext in H4 by auto. rewrite H4; auto. + econstructor; split; cbn; eauto. +Qed. + +Lemma make_cast16signed_correct: + forall r x, + vmatch bc e#r x -> + let (op, args) := make_cast16signed r x in + exists v, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v /\ Val.lessdef (Val.sign_ext 16 e#r) v. +Proof. + intros; unfold make_cast16signed. destruct (vincl x (Sgn Ptop 16)) eqn:INCL. + exists e#r; split; auto. + assert (V: vmatch bc e#r (Sgn Ptop 16)). + { eapply vmatch_ge; eauto. apply vincl_ge; auto. } + inv V; cbn; auto. rewrite is_sgn_sign_ext in H4 by auto. rewrite H4; auto. + econstructor; split; cbn; eauto. +Qed. + +Lemma op_strength_reduction_correct: + forall op args vl v, + vl = map (fun r => AE.get r ae) args -> + eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some v -> + let (op', args') := op_strength_reduction op args vl in + exists w, eval_operation ge (Vptr sp Ptrofs.zero) op' e##args' m = Some w /\ Val.lessdef v w. +Proof. + intros until v; unfold op_strength_reduction; + case (op_strength_reduction_match op args vl); cbn; intros. +- (* cast8signed *) + InvApproxRegs; SimplVM; inv H0. apply make_cast8signed_correct; auto. +- (* cast16signed *) + InvApproxRegs; SimplVM; inv H0. apply make_cast16signed_correct; auto. +- (* add 1 *) + rewrite Val.add_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_addimm_correct; auto. +- (* add 2 *) + InvApproxRegs; SimplVM; inv H0. apply make_addimm_correct; auto. +- (* sub *) + InvApproxRegs; SimplVM; inv H0. rewrite Val.sub_add_opp. apply make_addimm_correct; auto. +- (* mul 1 *) + rewrite Val.mul_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto. +- (* mul 2*) + InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto. +- (* divs *) + assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. + apply make_divimm_correct; auto. +- (* divu *) + assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. + apply make_divuimm_correct; auto. +- (* modu *) + assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. + apply make_moduimm_correct; auto. +- (* and 1 *) + rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto. +- (* and 2 *) + InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto. +- (* andimm *) + inv H; inv H0. apply make_andimm_correct; auto. +- (* or 1 *) + rewrite Val.or_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_orimm_correct; auto. +- (* or 2 *) + InvApproxRegs; SimplVM; inv H0. apply make_orimm_correct; auto. +- (* xor 1 *) + rewrite Val.xor_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_xorimm_correct; auto. +- (* xor 2 *) + InvApproxRegs; SimplVM; inv H0. apply make_xorimm_correct; auto. +- (* shl *) + InvApproxRegs; SimplVM; inv H0. apply make_shlimm_correct; auto. +- (* shr *) + InvApproxRegs; SimplVM; inv H0. apply make_shrimm_correct; auto. +- (* shru *) + InvApproxRegs; SimplVM; inv H0. apply make_shruimm_correct; auto. +- (* addl 1 *) + rewrite Val.addl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_addlimm_correct; auto. +- (* addl 2 *) + InvApproxRegs; SimplVM; inv H0. apply make_addlimm_correct; auto. +- (* subl *) + InvApproxRegs; SimplVM; inv H0. + replace (Val.subl e#r1 (Vlong n2)) with (Val.addl e#r1 (Vlong (Int64.neg n2))). + apply make_addlimm_correct; auto. + unfold Val.addl, Val.subl. destruct Archi.ptr64 eqn:SF, e#r1; auto. + rewrite Int64.sub_add_opp; auto. + rewrite Ptrofs.sub_add_opp. do 2 f_equal. auto with ptrofs. + rewrite Int64.sub_add_opp; auto. +- (* mull 1 *) + rewrite Val.mull_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto. +- (* mull 2 *) + InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto. +- (* divl *) + assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. + apply make_divlimm_correct; auto. +- (* divlu *) + assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. + apply make_divluimm_correct; auto. +- (* modlu *) + assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. + apply make_modluimm_correct; auto. +- (* andl 1 *) + rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto. +- (* andl 2 *) + InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto. +- (* andlimm *) + inv H; inv H0. apply make_andlimm_correct; auto. +- (* orl 1 *) + rewrite Val.orl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_orlimm_correct; auto. +- (* orl 2 *) + InvApproxRegs; SimplVM; inv H0. apply make_orlimm_correct; auto. +- (* xorl 1 *) + rewrite Val.xorl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_xorlimm_correct; auto. +- (* xorl 2 *) + InvApproxRegs; SimplVM; inv H0. apply make_xorlimm_correct; auto. +- (* shll *) + InvApproxRegs; SimplVM; inv H0. apply make_shllimm_correct; auto. +- (* shrl *) + InvApproxRegs; SimplVM; inv H0. apply make_shrlimm_correct; auto. +- (* shrlu *) + InvApproxRegs; SimplVM; inv H0. apply make_shrluimm_correct; auto. +- (* cond *) + inv H0. apply make_cmp_correct; auto. +- (* mulf 1 *) + InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto. +- (* mulf 2 *) + InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) e#r2). + rewrite <- H2. apply make_mulfimm_correct_2; auto. +- (* mulfs 1 *) + InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfsimm_correct; auto. +- (* mulfs 2 *) + InvApproxRegs; SimplVM; inv H0. fold (Val.mulfs (Vsingle n1) e#r2). + rewrite <- H2. apply make_mulfsimm_correct_2; auto. +- (* default *) + exists v; auto. +Qed. + +Lemma addr_strength_reduction_correct: + forall addr args vl res, + vl = map (fun r => AE.get r ae) args -> + eval_addressing ge (Vptr sp Ptrofs.zero) addr e##args = Some res -> + let (addr', args') := addr_strength_reduction addr args vl in + exists res', eval_addressing ge (Vptr sp Ptrofs.zero) addr' e##args' = Some res' /\ Val.lessdef res res'. +Proof. + intros until res. unfold addr_strength_reduction. + destruct (addr_strength_reduction_match addr args vl); cbn; + intros VL EA; InvApproxRegs; SimplVM; try (inv EA). +- destruct (orb _ _). ++ exists (Val.offset_ptr e#r1 n); auto. ++ cbn. rewrite Genv.shift_symbol_address. econstructor; split; eauto. + inv H0; cbn; auto. +- rewrite Ptrofs.add_zero_l. econstructor; split; eauto. + change (Vptr sp (Ptrofs.add n1 n)) with (Val.offset_ptr (Vptr sp n1) n). + inv H0; cbn; auto. +- exists res; auto. +Qed. + +End STRENGTH_REDUCTION. diff --git a/kvx/Conventions1.v b/kvx/Conventions1.v new file mode 100644 index 00000000..d8eff34e --- /dev/null +++ b/kvx/Conventions1.v @@ -0,0 +1,431 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Function calling conventions and other conventions regarding the use of + machine registers and stack slots. *) + +Require Import Coqlib Decidableplus. +Require Import AST Machregs Locations. + +(** * Classification of machine registers *) + +(** Machine registers (type [mreg] in module [Locations]) are divided in + the following groups: +- Callee-save registers, whose value is preserved across a function call. +- Caller-save registers that can be modified during a function call. + + We follow the RISC-V application binary interface (ABI) in our choice + of callee- and caller-save registers. +*) + +Definition is_callee_save (r: mreg) : bool := + match r with + (* | R15 | R16 | R17 *) | R18 | R19 | R20 | R21 | R22 + | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | R31 => true + | _ => false + end. + +Definition int_caller_save_regs := + R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 + :: R10 :: R11 :: R15 (* :: R16 *) :: R17 + (* :: R32 *) :: R33 :: R34 :: R35 :: R36 :: R37 :: R38 :: R39 :: R40 :: R41 + :: R42 :: R43 :: R44 :: R45 :: R46 :: R47 :: R48 :: R49 :: R50 :: R51 + :: R52 :: R53 :: R54 :: R55 :: R56 :: R57 :: R58 :: R59 :: R60 :: R61 + :: R62 :: R63 :: nil. + +Definition float_caller_save_regs : list mreg := nil. + +Definition int_callee_save_regs := + (* R15 :: R16 :: R17 :: *)R18 :: R19 :: R20 :: R21 :: R22 + :: R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 :: R30 :: R31 :: nil. + +Definition float_callee_save_regs : list mreg := nil. + +Definition destroyed_at_call := + List.filter (fun r => negb (is_callee_save r)) all_mregs. + +Definition dummy_int_reg := R63. (**r Used in [Coloring]. *) +Definition dummy_float_reg := R62. (**r Used in [Coloring]. *) + +Definition callee_save_type := mreg_type. + +Definition is_float_reg (r: mreg) := false. + +(** * Function calling conventions *) + +(** The functions in this section determine the locations (machine registers + and stack slots) used to communicate arguments and results between the + caller and the callee during function calls. These locations are functions + of the signature of the function and of the call instruction. + Agreement between the caller and the callee on the locations to use + is guaranteed by our dynamic semantics for Cminor and RTL, which demand + that the signature of the call instruction is identical to that of the + called function. + + Calling conventions are largely arbitrary: they must respect the properties + proved in this section (such as no overlapping between the locations + of function arguments), but this leaves much liberty in choosing actual + locations. To ensure binary interoperability of code generated by our + compiler with libraries compiled by another compiler, we + implement the standard RISC-V conventions. *) + +(** ** Location of function result *) + +(** The result value of a function is passed back to the caller in + registers [R10] or [F10] or [R10,R11], depending on the type of the + returned value. We treat a function without result as a function + with one integer result. *) + + +Definition loc_result (s: signature) : rpair mreg := + match s.(sig_res) with + | Tvoid => One R0 + | Tint8signed => One R0 + | Tint8unsigned => One R0 + | Tint16signed => One R0 + | Tint16unsigned => One R0 + | Tint | Tany32 => One R0 + | Tfloat | Tsingle | Tany64 => One R0 + | Tlong => if Archi.ptr64 then One R0 else One R0 + end. + +(** The result registers have types compatible with that given in the signature. *) + +Lemma loc_result_type: + forall sig, + subtype (proj_sig_res sig) (typ_rpair mreg_type (loc_result sig)) = true. +Proof. + intros. unfold proj_sig_res, loc_result, mreg_type. + destruct (sig_res sig); try destruct Archi.ptr64; cbn; trivial; destruct t; trivial. +Qed. + +(** The result locations are caller-save registers *) + +Lemma loc_result_caller_save: + forall (s: signature), + forall_rpair (fun r => is_callee_save r = false) (loc_result s). +Proof. + intros. unfold loc_result, is_callee_save; + destruct (sig_res s); cbn; auto; try destruct Archi.ptr64; cbn; auto; try destruct t; cbn; auto. +Qed. + +(** If the result is in a pair of registers, those registers are distinct and have type [Tint] at least. *) + +Lemma loc_result_pair: + forall sg, + match loc_result sg with + | One _ => True + | Twolong r1 r2 => + r1 <> r2 /\ proj_sig_res sg = Tlong + /\ subtype Tint (mreg_type r1) = true /\ subtype Tint (mreg_type r2) = true + /\ Archi.ptr64 = false + end. +Proof. + intros. + unfold loc_result; destruct (sig_res sg); auto; + unfold mreg_type; try destruct Archi.ptr64; auto; + destruct t; auto. +Qed. + +(** The location of the result depends only on the result part of the signature *) + +Lemma loc_result_exten: + forall s1 s2, s1.(sig_res) = s2.(sig_res) -> loc_result s1 = loc_result s2. +Proof. + intros. unfold loc_result. rewrite H; auto. +Qed. + +(** ** Location of function arguments *) + +(** The RISC-V ABI states the following convention for passing arguments + to a function: + +- Arguments are passed in registers when possible. + +- Up to eight integer registers (ai: int_param_regs) and up to eight + floating-point registers (fai: float_param_regs) are used for this + purpose. + +- If the arguments to a function are conceptualized as fields of a C + struct, each with pointer alignment, the argument registers are a + shadow of the first eight pointer-words of that struct. If argument + i < 8 is a floating-point type, it is passed in floating-point + register fa_i; otherwise, it is passed in integer register a_i. + +- When primitive arguments twice the size of a pointer-word are passed + on the stack, they are naturally aligned. When they are passed in the + integer registers, they reside in an aligned even-odd register pair, + with the even register holding the least-significant bits. + +- Floating-point arguments to variadic functions (except those that + are explicitly named in the parameter list) are passed in integer + registers. + +- The portion of the conceptual struct that is not passed in argument + registers is passed on the stack. The stack pointer sp points to the + first argument not passed in a register. + +The bit about variadic functions doesn't quite fit CompCert's model. +We do our best by passing the FP arguments in registers, as usual, +and reserving the corresponding integer registers, so that fixup +code can be introduced in the Asmexpand pass. +*) + +Definition param_regs := + R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: nil. + +Definition one_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ) + (rec: Z -> Z -> list (rpair loc)) := + match list_nth_z regs rn with + | Some r => + One(R r) :: rec (rn + 1) ofs + | None => + let ofs := align ofs (typealign ty) in + One(S Outgoing ofs ty) :: rec rn (ofs + (if Archi.ptr64 then 2 else typesize ty)) + end. + +Definition two_args (regs: list mreg) (rn: Z) (ofs: Z) + (rec: Z -> Z -> list (rpair loc)) := + let rn := align rn 2 in + match list_nth_z regs rn, list_nth_z regs (rn + 1) with + | Some r1, Some r2 => + Twolong (R r2) (R r1) :: rec (rn + 2) ofs + | _, _ => + let ofs := align ofs 2 in + Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint) :: + rec rn (ofs + 2) + end. + +Definition hybrid_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ) + (rec: Z -> Z -> list (rpair loc)) := + let rn := align rn 2 in + match list_nth_z regs rn with + | Some r => + One (R r) :: rec (rn + 2) ofs + | None => + let ofs := align ofs 2 in + One (S Outgoing ofs ty) :: rec rn (ofs + 2) + end. + +Fixpoint loc_arguments_rec (va: bool) + (tyl: list typ) (r ofs: Z) {struct tyl} : list (rpair loc) := + match tyl with + | nil => nil + | ty :: tys => one_arg param_regs r ofs ty (loc_arguments_rec va tys) +(* + | (Tint | Tany32) as ty :: tys => + one_arg int_param_regs r ofs ty (loc_arguments_rec va tys) + | Tsingle as ty :: tys => + one_arg float_param_regs r ofs ty (loc_arguments_rec va tys) + | Tlong as ty :: tys => + if Archi.ptr64 + then one_arg int_param_regs r ofs ty (loc_arguments_rec va tys) + else two_args int_param_regs r ofs (loc_arguments_rec va tys) + | (Tfloat | Tany64) as ty :: tys => + if va && negb Archi.ptr64 + then hybrid_arg float_param_regs r ofs ty (loc_arguments_rec va tys) + else one_arg float_param_regs r ofs ty (loc_arguments_rec va tys) +*) + end. + +(* FIX Sylvain: not sure to understand what I have done... *) +Definition has_va (s: signature) : bool := + match s.(sig_cc).(cc_vararg) with + | Some n => true + | None => false + end. + +(** [loc_arguments s] returns the list of locations where to store arguments + when calling a function with signature [s]. *) + +Definition loc_arguments (s: signature) : list (rpair loc) := + loc_arguments_rec (has_va s) s.(sig_args) 0 0. + +(** [size_arguments s] returns the number of [Outgoing] slots used + to call a function with signature [s]. *) + +Definition max_outgoing_1 (accu: Z) (l: loc) : Z := + match l with + | S Outgoing ofs ty => Z.max accu (ofs + typesize ty) + | _ => accu + end. + +Definition max_outgoing_2 (accu: Z) (rl: rpair loc) : Z := + match rl with + | One l => max_outgoing_1 accu l + | Twolong l1 l2 => max_outgoing_1 (max_outgoing_1 accu l1) l2 + end. + +Definition size_arguments (s: signature) : Z := + List.fold_left max_outgoing_2 (loc_arguments s) 0. + +(** Argument locations are either non-temporary registers or [Outgoing] + stack slots at nonnegative offsets. *) + +Definition loc_argument_acceptable (l: loc) : Prop := + match l with + | R r => is_callee_save r = false + | S Outgoing ofs ty => ofs >= 0 /\ (typealign ty | ofs) + | _ => False + end. + +Lemma loc_arguments_rec_charact: + forall va tyl rn ofs p, + ofs >= 0 -> + In p (loc_arguments_rec va tyl rn ofs) -> forall_rpair loc_argument_acceptable p. +Proof. + set (OK := fun (l: list (rpair loc)) => + forall p, In p l -> forall_rpair loc_argument_acceptable p). + set (OKF := fun (f: Z -> Z -> list (rpair loc)) => + forall rn ofs, ofs >= 0 -> OK (f rn ofs)). + set (OKREGS := fun (l: list mreg) => forall r, In r l -> is_callee_save r = false). + assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typealign ty) >= 0). + { intros. + assert (ofs <= align ofs (typealign ty)) by (apply align_le; apply typealign_pos). + lia. } + assert (SK: (if Archi.ptr64 then 2 else 1) > 0). + { destruct Archi.ptr64; lia. } + assert (SKK: forall ty, (if Archi.ptr64 then 2 else typesize ty) > 0). + { intros. destruct Archi.ptr64. lia. apply typesize_pos. } + assert (A: forall regs rn ofs ty f, + OKREGS regs -> OKF f -> ofs >= 0 -> OK (one_arg regs rn ofs ty f)). + { intros until f; intros OR OF OO; red; unfold one_arg; intros. + destruct (list_nth_z regs rn) as [r|] eqn:NTH; destruct H. + - subst p; cbn. apply OR. eapply list_nth_z_in; eauto. + - eapply OF; eauto. + - subst p; cbn. auto using align_divides, typealign_pos. + - eapply OF; [idtac|eauto]. + generalize (AL ofs ty OO) (SKK ty); lia. + } + assert (B: forall regs rn ofs f, + OKREGS regs -> OKF f -> ofs >= 0 -> OK (two_args regs rn ofs f)). + { intros until f; intros OR OF OO; unfold two_args. + set (rn' := align rn 2). + set (ofs' := align ofs 2). + assert (OO': ofs' >= 0) by (apply (AL ofs Tlong); auto). + assert (DFL: OK (Twolong (S Outgoing (ofs' + 1) Tint) (S Outgoing ofs' Tint) + :: f rn' (ofs' + 2))). + { red; cbn; intros. destruct H. + - subst p; cbn. + repeat split; auto using Z.divide_1_l. lia. + - eapply OF; [idtac|eauto]. lia. + } + destruct (list_nth_z regs rn') as [r1|] eqn:NTH1; + destruct (list_nth_z regs (rn' + 1)) as [r2|] eqn:NTH2; + try apply DFL. + red; cbn; intros; destruct H. + - subst p; cbn. split; apply OR; eauto using list_nth_z_in. + - eapply OF; [idtac|eauto]. auto. + } + assert (C: forall regs rn ofs ty f, + OKREGS regs -> OKF f -> ofs >= 0 -> typealign ty = 1 -> OK (hybrid_arg regs rn ofs ty f)). + { intros until f; intros OR OF OO OTY; unfold hybrid_arg; red; intros. + set (rn' := align rn 2) in *. + destruct (list_nth_z regs rn') as [r|] eqn:NTH; destruct H. + - subst p; cbn. apply OR. eapply list_nth_z_in; eauto. + - eapply OF; eauto. + - subst p; cbn. rewrite OTY. split. apply (AL ofs Tlong OO). apply Z.divide_1_l. + - eapply OF; [idtac|eauto]. generalize (AL ofs Tlong OO); cbn; lia. + } + assert (D: OKREGS param_regs). + { red. decide_goal. } + assert (E: OKREGS param_regs). + { red. decide_goal. } + + cut (forall va tyl rn ofs, ofs >= 0 -> OK (loc_arguments_rec va tyl rn ofs)). + unfold OK. eauto. + induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; cbn. + - red; cbn; tauto. + - destruct ty1. ++ (* int *) apply A; auto. ++ (* float *) + apply A; auto. ++ (* long *) + apply A; auto. ++ (* single *) + apply A; auto. ++ (* any32 *) + apply A; auto. ++ (* any64 *) + apply A; auto. +Qed. + +Lemma loc_arguments_acceptable: + forall (s: signature) (p: rpair loc), + In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p. +Proof. + unfold loc_arguments; intros. eapply loc_arguments_rec_charact; eauto. lia. +Qed. + +(** The offsets of [Outgoing] arguments are below [size_arguments s]. *) + +Remark fold_max_outgoing_above: + forall l n, fold_left max_outgoing_2 l n >= n. +Proof. + assert (A: forall n l, max_outgoing_1 n l >= n). + { intros; unfold max_outgoing_1. destruct l as [_ | []]; lia. } + induction l; cbn; intros. + - lia. + - eapply Zge_trans. eauto. + destruct a; cbn. apply A. eapply Zge_trans; eauto. +Qed. + +Lemma size_arguments_above: + forall s, size_arguments s >= 0. +Proof. + intros. apply fold_max_outgoing_above. +Qed. + +Lemma loc_arguments_bounded: + forall (s: signature) (ofs: Z) (ty: typ), + In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) -> + ofs + typesize ty <= size_arguments s. +Proof. + intros until ty. + assert (A: forall n l, n <= max_outgoing_1 n l). + { intros; unfold max_outgoing_1. destruct l as [_ | []]; lia. } + assert (B: forall p n, + In (S Outgoing ofs ty) (regs_of_rpair p) -> + ofs + typesize ty <= max_outgoing_2 n p). + { intros. destruct p; cbn in H; intuition; subst; cbn. + - lia. + - eapply Z.le_trans. 2: apply A. lia. + - lia. } + assert (C: forall l n, + In (S Outgoing ofs ty) (regs_of_rpairs l) -> + ofs + typesize ty <= fold_left max_outgoing_2 l n). + { induction l; cbn; intros. + - contradiction. + - rewrite in_app_iff in H. destruct H. + + eapply Z.le_trans. eapply B; eauto. apply Z.ge_le. apply fold_max_outgoing_above. + + apply IHl; auto. + } + apply C. +Qed. + +Lemma loc_arguments_main: + loc_arguments signature_main = nil. +Proof. + reflexivity. +Qed. + + +(** ** Normalization of function results and parameters *) + +(** No normalization needed. *) + +Definition return_value_needs_normalization (t: rettype): bool := false. +Definition parameter_needs_normalization (t: rettype): bool := false. + diff --git a/kvx/DecBoolOps.v b/kvx/DecBoolOps.v new file mode 100644 index 00000000..1e0a6187 --- /dev/null +++ b/kvx/DecBoolOps.v @@ -0,0 +1,30 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Set Implicit Arguments. + +Theorem and_dec : forall A B C D : Prop, + { A } + { B } -> { C } + { D } -> + { A /\ C } + { (B /\ C) \/ (B /\ D) \/ (A /\ D) }. +Proof. + intros A B C D AB CD. + destruct AB; destruct CD. + - left. tauto. + - right. tauto. + - right. tauto. + - right. tauto. +Qed. + + diff --git a/kvx/DuplicateOpcodeHeuristic.ml b/kvx/DuplicateOpcodeHeuristic.ml new file mode 100644 index 00000000..38702e1b --- /dev/null +++ b/kvx/DuplicateOpcodeHeuristic.ml @@ -0,0 +1,41 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* open Camlcoq *) +open Op +open Integers + +let opcode_heuristic code cond ifso ifnot is_loop_header = + match cond with + | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccompf c | Ccompfs c -> (match c with + | Ceq -> Some false + | Cne -> Some true + | _ -> None + ) + | Cnotcompf c | Cnotcompfs c -> (match c with + | Ceq -> Some true + | Cne -> Some false + | _ -> None + ) + | _ -> None diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml new file mode 120000 index 00000000..ee2674bf --- /dev/null +++ b/kvx/ExpansionOracle.ml @@ -0,0 +1 @@ +../aarch64/ExpansionOracle.ml
\ No newline at end of file diff --git a/kvx/ExtFloats.v b/kvx/ExtFloats.v new file mode 100644 index 00000000..b08503a5 --- /dev/null +++ b/kvx/ExtFloats.v @@ -0,0 +1,54 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Floats Integers ZArith. + +Module ExtFloat. +(** TODO check with the actual KVX; + this is what happens on x86 and may be inappropriate. *) + +Definition min (x : float) (y : float) : float := + match Float.compare x y with + | Some Eq | Some Lt => x + | Some Gt | None => y + end. + +Definition max (x : float) (y : float) : float := + match Float.compare x y with + | Some Eq | Some Gt => x + | Some Lt | None => y + end. +End ExtFloat. + +Module ExtFloat32. +(** TODO check with the actual KVX *) + +Definition min (x : float32) (y : float32) : float32 := + match Float32.compare x y with + | Some Eq | Some Lt => x + | Some Gt | None => y + end. + +Definition max (x : float32) (y : float32) : float32 := + match Float32.compare x y with + | Some Eq | Some Gt => x + | Some Lt | None => y + end. + +Definition one := Float32.of_int (Int.repr (1%Z)). +Definition inv (x : float32) : float32 := + Float32.div one x. + +End ExtFloat32. diff --git a/kvx/ExtValues.v b/kvx/ExtValues.v new file mode 100644 index 00000000..b4e14898 --- /dev/null +++ b/kvx/ExtValues.v @@ -0,0 +1,756 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib. +Require Import Integers. +Require Import Values. +Require Import Floats ExtFloats. +Require Import Lia. + +Open Scope Z_scope. + +Definition abs_diff (x y : Z) := Z.abs (x - y). +Definition abs_diff2 (x y : Z) := + if x <=? y then y - x else x - y. +Lemma abs_diff2_correct : + forall x y : Z, (abs_diff x y) = (abs_diff2 x y). +Proof. + intros. + unfold abs_diff, abs_diff2. + unfold Z.leb. + pose proof (Z.compare_spec x y) as Hspec. + inv Hspec. + - rewrite Z.abs_eq; lia. + - rewrite Z.abs_neq; lia. + - rewrite Z.abs_eq; lia. +Qed. + +Inductive shift1_4 : Type := +| SHIFT1 | SHIFT2 | SHIFT3 | SHIFT4. + +Definition z_of_shift1_4 (x : shift1_4) := + match x with + | SHIFT1 => 1 + | SHIFT2 => 2 + | SHIFT3 => 3 + | SHIFT4 => 4 + end. + +Definition shift1_4_of_z (x : Z) := + if Z.eq_dec x 1 then Some SHIFT1 + else if Z.eq_dec x 2 then Some SHIFT2 + else if Z.eq_dec x 3 then Some SHIFT3 + else if Z.eq_dec x 4 then Some SHIFT4 + else None. + +Lemma shift1_4_of_z_correct : + forall z, + match shift1_4_of_z z with + | Some x => z_of_shift1_4 x = z + | None => True + end. +Proof. + intro. unfold shift1_4_of_z. + destruct (Z.eq_dec _ _); cbn; try congruence. + destruct (Z.eq_dec _ _); cbn; try congruence. + destruct (Z.eq_dec _ _); cbn; try congruence. + destruct (Z.eq_dec _ _); cbn; try congruence. + trivial. +Qed. + +Definition int_of_shift1_4 (x : shift1_4) := + Int.repr (z_of_shift1_4 x). + +Definition is_bitfield stop start := + (Z.leb start stop) + && (Z.geb start Z.zero) + && (Z.ltb stop Int.zwordsize). + +Definition extfz stop start v := + if is_bitfield stop start + then + let stop' := Z.add stop Z.one in + match v with + | Vint w => + Vint (Int.shru (Int.shl w (Int.repr (Z.sub Int.zwordsize stop'))) (Int.repr (Z.sub Int.zwordsize (Z.sub stop' start)))) + | _ => Vundef + end + else Vundef. + + +Definition extfs stop start v := + if is_bitfield stop start + then + let stop' := Z.add stop Z.one in + match v with + | Vint w => + Vint (Int.shr (Int.shl w (Int.repr (Z.sub Int.zwordsize stop'))) (Int.repr (Z.sub Int.zwordsize (Z.sub stop' start)))) + | _ => Vundef + end + else Vundef. + +Definition zbitfield_mask stop start := + (Z.shiftl 1 (Z.succ stop)) - (Z.shiftl 1 start). + +Definition bitfield_mask stop start := + Vint(Int.repr (zbitfield_mask stop start)). + +Definition bitfield_maskl stop start := + Vlong(Int64.repr (zbitfield_mask stop start)). + +Definition insf stop start prev fld := + let mask := bitfield_mask stop start in + if is_bitfield stop start + then + Val.or (Val.and prev (Val.notint mask)) + (Val.and (Val.shl fld (Vint (Int.repr start))) mask) + else Vundef. + +Definition is_bitfieldl stop start := + (Z.leb start stop) + && (Z.geb start Z.zero) + && (Z.ltb stop Int64.zwordsize). + +Definition extfzl stop start v := + if is_bitfieldl stop start + then + let stop' := Z.add stop Z.one in + match v with + | Vlong w => + Vlong (Int64.shru' (Int64.shl' w (Int.repr (Z.sub Int64.zwordsize stop'))) (Int.repr (Z.sub Int64.zwordsize (Z.sub stop' start)))) + | _ => Vundef + end + else Vundef. + + +Definition extfsl stop start v := + if is_bitfieldl stop start + then + let stop' := Z.add stop Z.one in + match v with + | Vlong w => + Vlong (Int64.shr' (Int64.shl' w (Int.repr (Z.sub Int64.zwordsize stop'))) (Int.repr (Z.sub Int64.zwordsize (Z.sub stop' start)))) + | _ => Vundef + end + else Vundef. + +Definition insfl stop start prev fld := + let mask := bitfield_maskl stop start in + if is_bitfieldl stop start + then + Val.orl (Val.andl prev (Val.notl mask)) + (Val.andl (Val.shll fld (Vint (Int.repr start))) mask) + else Vundef. + +Fixpoint highest_bit (x : Z) (n : nat) : Z := + match n with + | O => 0 + | S n1 => + let n' := Z.of_N (N_of_nat n) in + if Z.testbit x n' + then n' + else highest_bit x n1 + end. + +Definition int_highest_bit (x : int) : Z := + highest_bit (Int.unsigned x) (31%nat). + + +Definition int64_highest_bit (x : int64) : Z := + highest_bit (Int64.unsigned x) (63%nat). + +Definition val_shrx (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.ltu n2 (Int.repr 31) + then Vint(Int.shrx n1 n2) + else Vundef + | _, _ => Vundef + end. + +Definition val_shrxl (v1 v2: val): val := + match v1, v2 with + | Vlong n1, Vint n2 => + if Int.ltu n2 (Int.repr 63) + then Vlong(Int64.shrx' n1 n2) + else Vundef + | _, _ => Vundef + end. + +Remark modulus_fits_64: Int.modulus < Int64.max_unsigned. +Proof. + compute. + trivial. +Qed. + +Remark unsigned64_repr : + forall i, + -1 < i < Int.modulus -> + Int64.unsigned (Int64.repr i) = i. +Proof. + intros i H. + destruct H as [Hlow Hhigh]. + apply Int64.unsigned_repr. + split. { lia. } + pose proof modulus_fits_64. + lia. +Qed. + +Theorem divu_is_divlu: forall v1 v2 : val, + Val.divu v1 v2 = + match Val.divlu (Val.longofintu v1) (Val.longofintu v2) with + | None => None + | Some q => Some (Val.loword q) + end. +Proof. + intros. + destruct v1; cbn; trivial. + destruct v2; cbn; trivial. + destruct i as [i_val i_range]. + destruct i0 as [i0_val i0_range]. + cbn. + unfold Int.eq, Int64.eq, Int.zero, Int64.zero. + cbn. + rewrite Int.unsigned_repr by (compute; split; discriminate). + rewrite (Int64.unsigned_repr 0) by (compute; split; discriminate). + rewrite (unsigned64_repr i0_val) by assumption. + destruct (zeq i0_val 0) as [ | Hnot0]; cbn; trivial. + f_equal. f_equal. + unfold Int.divu, Int64.divu. cbn. + rewrite (unsigned64_repr i_val) by assumption. + rewrite (unsigned64_repr i0_val) by assumption. + unfold Int64.loword. + rewrite Int64.unsigned_repr. + reflexivity. + destruct (Z.eq_dec i0_val 1). + {subst i0_val. + pose proof modulus_fits_64. + rewrite Zdiv_1_r. + lia. + } + destruct (Z.eq_dec i_val 0). + { subst i_val. compute. + split; + intro ABSURD; + discriminate ABSURD. } + assert ((i_val / i0_val) < i_val). + { apply Z_div_lt; lia. } + split. + { apply Z_div_pos; lia. } + pose proof modulus_fits_64. + lia. +Qed. + +Theorem modu_is_modlu: forall v1 v2 : val, + Val.modu v1 v2 = + match Val.modlu (Val.longofintu v1) (Val.longofintu v2) with + | None => None + | Some q => Some (Val.loword q) + end. +Proof. + intros. + destruct v1; cbn; trivial. + destruct v2; cbn; trivial. + destruct i as [i_val i_range]. + destruct i0 as [i0_val i0_range]. + cbn. + unfold Int.eq, Int64.eq, Int.zero, Int64.zero. + cbn. + rewrite Int.unsigned_repr by (compute; split; discriminate). + rewrite (Int64.unsigned_repr 0) by (compute; split; discriminate). + rewrite (unsigned64_repr i0_val) by assumption. + destruct (zeq i0_val 0) as [ | Hnot0]; cbn; trivial. + f_equal. f_equal. + unfold Int.modu, Int64.modu. cbn. + rewrite (unsigned64_repr i_val) by assumption. + rewrite (unsigned64_repr i0_val) by assumption. + unfold Int64.loword. + rewrite Int64.unsigned_repr. + reflexivity. + assert((i_val mod i0_val) < i0_val). + apply Z_mod_lt. + lia. + split. + { apply Z_mod_lt. + lia. } + pose proof modulus_fits_64. + lia. +Qed. + +Remark if_zlt_0_half_modulus : + forall T : Type, + forall x y: T, + (if (zlt 0 Int.half_modulus) then x else y) = x. +Proof. + reflexivity. +Qed. + +Remark if_zlt_mone_half_modulus : + forall T : Type, + forall x y: T, + (if (zlt (Int.unsigned Int.mone) Int.half_modulus) then x else y) = y. +Proof. + reflexivity. +Qed. + +Remark if_zlt_min_signed_half_modulus : + forall T : Type, + forall x y: T, + (if (zlt (Int.unsigned (Int.repr Int.min_signed)) + Int.half_modulus) + then x + else y) = y. +Proof. + reflexivity. +Qed. + +Lemma repr_unsigned64_repr: + forall x, Int.repr (Int64.unsigned (Int64.repr x)) = Int.repr x. +Proof. + intros. + apply Int.eqm_samerepr. + unfold Int.eqm. + unfold Zbits.eqmod. + pose proof (Int64.eqm_unsigned_repr x) as H64. + unfold Int64.eqm in H64. + unfold Zbits.eqmod in H64. + destruct H64 as [k64 H64]. + change Int64.modulus with 18446744073709551616 in *. + change Int.modulus with 4294967296. + exists (-4294967296 * k64). + set (y := Int64.unsigned (Int64.repr x)) in *. + rewrite H64. + clear H64. + lia. +Qed. + +(* +Theorem divs_is_divls: forall v1 v2 : val, + match Val.divs v1 v2 with + | Some q => + match Val.divls (Val.longofint v1) (Val.longofint v2) with + | None => False + | Some q' => q = Val.loword q' + end + | None => True + end. +Proof. + intros. + destruct v1; cbn; trivial. + destruct v2; cbn; trivial. + destruct i as [i_val i_range]. + destruct i0 as [i0_val i0_range]. + cbn. + unfold Int.eq, Int64.eq, Int.zero, Int64.zero. + cbn. + replace (Int.unsigned (Int.repr 0)) with 0 in * by reflexivity. + destruct (zeq _ _) as [H0' | Hnot0]; cbn; trivial. + destruct (zeq i_val (Int.unsigned (Int.repr Int.min_signed))) as [Hmin | Hnotmin]; cbn. + { subst. + destruct (zeq i0_val (Int.unsigned Int.mone)) as [Hmone | Hnotmone]; trivial. + unfold Int.signed. cbn. + replace (Int64.unsigned (Int64.repr 0)) with 0 in * by reflexivity. + rewrite if_zlt_min_signed_half_modulus. + replace (if + zeq + (Int64.unsigned + (Int64.repr + (Int.unsigned (Int.repr Int.min_signed) - Int.modulus))) + (Int64.unsigned (Int64.repr Int64.min_signed)) + then true + else false) with false by reflexivity. + cbn. + rewrite orb_false_r. + destruct (zlt i0_val Int.half_modulus) as [Hlt_half | Hge_half]. + { + replace Int.half_modulus with 2147483648 in * by reflexivity. + rewrite Int64.unsigned_repr by (change Int64.max_unsigned with 18446744073709551615; lia). + destruct (zeq _ _) as [ | Hneq0]; try lia. clear Hneq0. + unfold Val.loword. + f_equal. + unfold Int64.divs, Int.divs, Int64.loword. + unfold Int.signed, Int64.signed. cbn. + rewrite if_zlt_min_signed_half_modulus. + change Int.half_modulus with 2147483648 in *. + destruct (zlt _ _) as [discard|]; try lia. clear discard. + change (Int64.unsigned + (Int64.repr + (Int.unsigned (Int.repr Int.min_signed) - Int.modulus))) + with 18446744071562067968. + change Int64.half_modulus with 9223372036854775808. + change Int64.modulus with 18446744073709551616. + cbn. + rewrite (Int64.unsigned_repr i0_val) by (change Int64.max_unsigned with 18446744073709551615; lia). + destruct (zlt i0_val 9223372036854775808) as [discard |]; try lia. + clear discard. + change (Int.unsigned (Int.repr Int.min_signed) - Int.modulus) with (-2147483648). + destruct (Z.eq_dec i0_val 1) as [H1 | Hnot1]. + { subst. + rewrite Z.quot_1_r. + apply Int.eqm_samerepr. + unfold Int.eqm. + change (Int64.unsigned (Int64.repr (-2147483648))) with 18446744071562067968. + unfold Zbits.eqmod. + change Int.modulus with 4294967296. + exists (-4294967296). + compute. + reflexivity. + } + change (-2147483648) with (-(2147483648)). + rewrite Z.quot_opp_l by assumption. + rewrite repr_unsigned64_repr. + reflexivity. + } + destruct (zeq _ _) as [Hmod|Hnmod]. + { + rewrite Int64.unsigned_repr_eq in Hmod. + set (delta := (i0_val - Int.modulus)) in *. + assert (delta = Int64.modulus*(delta/Int64.modulus)) as Hdelta. + { apply Z_div_exact_full_2. + compute. lia. + assumption. } + set (k := (delta / Int64.modulus)) in *. + change Int64.modulus with 18446744073709551616 in *. + change Int.modulus with 4294967296 in *. + change Int.half_modulus with 2147483648 in *. + change (Int.unsigned Int.mone) with 4294967295 in *. + lia. + } + unfold Int.divs, Int64.divs, Val.loword, Int64.loword. + change (Int.unsigned (Int.repr Int.min_signed)) with 2147483648. + change Int.modulus with 4294967296. + change (Int64.signed (Int64.repr (2147483648 - 4294967296))) with (-2147483648). + f_equal. + change (Int.signed {| Int.intval := 2147483648; Int.intrange := i_range |}) + with (-2147483648). + rewrite Int64.signed_repr. + { + replace (Int.signed {| Int.intval := i0_val; Int.intrange := i0_range |}) with (i0_val - 4294967296). + { rewrite repr_unsigned64_repr. + reflexivity. + } + *) + +Lemma big_unsigned_signed: + forall x, + (Int.unsigned x >= Int.half_modulus) -> + (Int.signed x) = (Int.unsigned x) - Int.modulus. +Proof. + destruct x as [xval xrange]. + intro BIG. + unfold Int.signed, Int.unsigned in *. cbn in *. + destruct (zlt _ _). + lia. + trivial. +Qed. + +(* +Lemma signed_0_eqb : + forall x, (Z.eqb (Int.signed x) 0) = Int.eq x Int.zero. +Qed. + *) + +Lemma Z_quot_le: forall a b, + 0 <= a -> 1 <= b -> Z.quot a b <= a. +Proof. + intros a b Ha Hb. + destruct (Z.eq_dec b 1) as [Hb1 | Hb1]. + { (* b=1 *) + subst. + rewrite Z.quot_1_r. + auto with zarith. + } + destruct (Z.eq_dec a 0) as [Ha0 | Ha0]. + { (* a=0 *) + subst. + rewrite Z.quot_0_l. + auto with zarith. + lia. + } + assert ((Z.quot a b) < a). + { + apply Z.quot_lt; lia. + } + auto with zarith. +Qed. + +(* +Lemma divs_is_quot: forall v1 v2 : val, + Val.divs v1 v2 = + match v1, v2 with + | (Vint w1), (Vint w2) => + let q := Z.quot (Int.signed w1) (Int.signed w2) in + if (negb (Z.eqb (Int.signed w2) 0)) + && (Z.geb q Int.min_signed) && (Z.leb q Int.max_signed) + then Some (Vint (Int.repr q)) + else None + | _, _ => None + end. + +Proof. + destruct v1; destruct v2; cbn; trivial. + unfold Int.divs. + rewrite signed_0_eqb. + destruct (Int.eq i0 Int.zero) eqn:Eeq0; cbn; trivial. + destruct (Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone) eqn:EXCEPTION. + { replace (Int.signed i0) with (-1). + replace (Int.signed i) with Int.min_signed. + change Int.min_signed with (-2147483648). + change Int.max_signed with (2147483647). + compute. + reflexivity. + { unfold Int.eq in EXCEPTION. + destruct (zeq _ _) as [Hmin | ] in EXCEPTION; try discriminate. + change Int.min_signed with (-2147483648). + change (Int.unsigned (Int.repr Int.min_signed)) with (2147483648) in *. + rewrite big_unsigned_signed. + change Int.modulus with 4294967296. + lia. + change Int.half_modulus with 2147483648. + lia. + } + unfold Int.eq in EXCEPTION. + destruct (zeq _ _) in EXCEPTION; try discriminate. + destruct (zeq _ _) as [Hmone | ] in EXCEPTION; try discriminate. + destruct i0 as [i0val i0range]; unfold Int.signed in *; cbn in *. + rewrite Hmone. + reflexivity. + } + replace (Int.signed i ÷ Int.signed i0 >=? Int.min_signed) with true. + replace (Int.signed i ÷ Int.signed i0 <=? Int.max_signed) with true. + reflexivity. + { assert (Int.signed i ÷ Int.signed i0 <= Int.max_signed). + { + destruct (Z_lt_le_dec (Int.signed i) 0). + { + apply Z.le_trans with (m:=0). + rewrite <- (Z.quot_0_l (Int.signed i0)). + Require Import Coq.ZArith.Zquot. + apply Z_quot_monotone. + } + assert ( Int.signed i ÷ Int.signed i0 <= Int.signed i). + apply Z_quot_le. + } + } + + *) + +Require Import Coq.ZArith.Zquot. +Lemma Z_quot_pos_pos_bound: forall a b m, + 0 <= a <= m -> 1 <= b -> 0 <= Z.quot a b <= m. +Proof. + intros. + split. + { rewrite <- (Z.quot_0_l b) by lia. + apply Z_quot_monotone; lia. + } + apply Z.le_trans with (m := a). + { + apply Z_quot_le; tauto. + } + tauto. +Qed. +Lemma Z_quot_neg_pos_bound: forall a b m, + m <= a <= 0 -> 1 <= b -> m <= Z.quot a b <= 0. + intros. + assert (0 <= - (a ÷ b) <= -m). + { + rewrite <- Z.quot_opp_l by lia. + apply Z_quot_pos_pos_bound; lia. + } + lia. +Qed. + +Lemma Z_quot_signed_pos_bound: forall a b, + Int.min_signed <= a <= Int.max_signed -> 1 <= b -> + Int.min_signed <= Z.quot a b <= Int.max_signed. +Proof. + intros. + destruct (Z_lt_ge_dec a 0). + { + split. + { apply Z_quot_neg_pos_bound; lia. } + { eapply Z.le_trans with (m := 0). + { apply Z_quot_neg_pos_bound with (m := Int.min_signed); trivial. + split. tauto. auto with zarith. + } + discriminate. + } + } + { split. + { eapply Z.le_trans with (m := 0). + discriminate. + apply Z_quot_pos_pos_bound with (m := Int.max_signed); trivial. + split. lia. tauto. + } + { apply Z_quot_pos_pos_bound; lia. + } + } +Qed. + +Lemma Z_quot_signed_neg_bound: forall a b, + Int.min_signed <= a <= Int.max_signed -> b < -1 -> + Int.min_signed <= Z.quot a b <= Int.max_signed. +Proof. + change Int.min_signed with (-2147483648). + change Int.max_signed with 2147483647. + intros. + + replace b with (-(-b)) by auto with zarith. + rewrite Z.quot_opp_r by lia. + assert (-2147483647 <= (a ÷ - b) <= 2147483648). + 2: lia. + + destruct (Z_lt_ge_dec a 0). + { + replace a with (-(-a)) by auto with zarith. + rewrite Z.quot_opp_l by lia. + assert (-2147483648 <= - a ÷ - b <= 2147483647). + 2: lia. + split. + { + rewrite Z.quot_opp_l by lia. + assert (a ÷ - b <= 2147483648). + 2: lia. + { + apply Z.le_trans with (m := 0). + rewrite <- (Z.quot_0_l (-b)) by lia. + apply Z_quot_monotone; lia. + discriminate. + } + } + assert (- a ÷ - b < -a ). + 2: lia. + apply Z_quot_lt; lia. + } + { + split. + { apply Z.le_trans with (m := 0). + discriminate. + rewrite <- (Z.quot_0_l (-b)) by lia. + apply Z_quot_monotone; lia. + } + { apply Z.le_trans with (m := a). + apply Z_quot_le. + all: lia. + } + } +Qed. + +Lemma sub_add_neg : + forall x y, Val.sub x y = Val.add x (Val.neg y). +Proof. + destruct x; destruct y; cbn; trivial. + f_equal. + apply Int.sub_add_opp. +Qed. + +Lemma neg_mul_distr_r : + forall x y, Val.neg (Val.mul x y) = Val.mul x (Val.neg y). +Proof. + destruct x; destruct y; cbn; trivial. + f_equal. + apply Int.neg_mul_distr_r. +Qed. + +(* pointer diff +Lemma sub_addl_negl : + forall x y, Val.subl x y = Val.addl x (Val.negl y). +Proof. + destruct x; destruct y; cbn; trivial. + + f_equal. apply Int64.sub_add_opp. + + destruct (Archi.ptr64) eqn:ARCHI64; trivial. + f_equal. rewrite Ptrofs.sub_add_opp. + pose (Ptrofs.agree64_neg ARCHI64 (Ptrofs.of_int64 i0) i0) as Hagree. + unfold Ptrofs.agree64 in Hagree. + unfold Ptrofs.add. + f_equal. f_equal. + rewrite Hagree. + pose (Ptrofs.agree64_of_int ARCHI64 (Int64.neg i0)) as Hagree2. + rewrite Hagree2. + reflexivity. + exact (Ptrofs.agree64_of_int ARCHI64 i0). + + destruct (Archi.ptr64) eqn:ARCHI64; cbn; trivial. + destruct (eq_block _ _); cbn; trivial. +Qed. + *) + +Lemma negl_mull_distr_r : + forall x y, Val.negl (Val.mull x y) = Val.mull x (Val.negl y). +Proof. + destruct x; destruct y; cbn; trivial. + f_equal. + apply Int64.neg_mul_distr_r. +Qed. + +Definition addx sh v1 v2 := + Val.add v2 (Val.shl v1 (Vint sh)). + +Definition addxl sh v1 v2 := + Val.addl v2 (Val.shll v1 (Vint sh)). + +Definition revsubx sh v1 v2 := + Val.sub v2 (Val.shl v1 (Vint sh)). + +Definition revsubxl sh v1 v2 := + Val.subl v2 (Val.shll v1 (Vint sh)). + +Definition minf v1 v2 := + match v1, v2 with + | (Vfloat f1), (Vfloat f2) => Vfloat (ExtFloat.min f1 f2) + | _, _ => Vundef + end. + +Definition maxf v1 v2 := + match v1, v2 with + | (Vfloat f1), (Vfloat f2) => Vfloat (ExtFloat.max f1 f2) + | _, _ => Vundef + end. + +Definition minfs v1 v2 := + match v1, v2 with + | (Vsingle f1), (Vsingle f2) => Vsingle (ExtFloat32.min f1 f2) + | _, _ => Vundef + end. + +Definition maxfs v1 v2 := + match v1, v2 with + | (Vsingle f1), (Vsingle f2) => Vsingle (ExtFloat32.max f1 f2) + | _, _ => Vundef + end. + +Definition invfs v1 := + match v1 with + | (Vsingle f1) => Vsingle (ExtFloat32.inv f1) + | _ => Vundef + end. + +Definition triple_op_float f v1 v2 v3 := + match v1, v2, v3 with + | (Vfloat f1), (Vfloat f2), (Vfloat f3) => Vfloat (f f1 f2 f3) + | _, _, _ => Vundef + end. + +Definition triple_op_single f v1 v2 v3 := + match v1, v2, v3 with + | (Vsingle f1), (Vsingle f2), (Vsingle f3) => Vsingle (f f1 f2 f3) + | _, _, _ => Vundef + end. + +Definition fmaddf := triple_op_float (fun f1 f2 f3 => Float.fma f2 f3 f1). +Definition fmaddfs := triple_op_single (fun f1 f2 f3 => Float32.fma f2 f3 f1). + +Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma (Float.neg f2) f3 f1). +Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma (Float32.neg f2) f3 f1). diff --git a/kvx/Machregs.v b/kvx/Machregs.v new file mode 100644 index 00000000..02fa4e6b --- /dev/null +++ b/kvx/Machregs.v @@ -0,0 +1,245 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import String. +Require Import Coqlib. +Require Import Decidableplus. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Op. + +(** ** Machine registers *) + +(** The following type defines the machine registers that can be referenced + as locations. These include: +- Integer registers that can be allocated to RTL pseudo-registers ([Rxx]). +- Floating-point registers that can be allocated to RTL pseudo-registers + ([Fxx]). + + The type [mreg] does not include reserved machine registers such as + the zero register (x0), the link register (x1), the stack pointer + (x2), the global pointer (x3), and the thread pointer (x4). + Finally, register x31 is reserved for use as a temporary by the + assembly-code generator [Asmgen]. +*) + +Inductive mreg: Type := + (* Allocatable General Purpose regs. *) + | R0 | R1 | R2 | R3 | R4 | R5 | R6 | R7 | R8 | R9 + | R10 | R11 (* | R12 | R13 | R14 *) | R15 (* | R16 *) | R17 | R18 | R19 + | R20 | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | R29 + | R30 | R31 (* | R32 *) | R33 | R34 | R35 | R36 | R37 | R38 | R39 + | R40 | R41 | R42 | R43 | R44 | R45 | R46 | R47 | R48 | R49 + | R50 | R51 | R52 | R53 | R54 | R55 | R56 | R57 | R58 | R59 + | R60 | R61 | R62 | R63. + +Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}. +Proof. decide equality. Defined. +Global Opaque mreg_eq. + +Definition all_mregs := + R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 + :: R10 :: R11 (* :: R12 :: R13 :: R14 *) :: R15 (* :: R16 *) :: R17 :: R18 :: R19 + :: R20 :: R21 :: R22 :: R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 + :: R30 :: R31 (* :: R32 *) :: R33 :: R34 :: R35 :: R36 :: R37 :: R38 :: R39 + :: R40 :: R41 :: R42 :: R43 :: R44 :: R45 :: R46 :: R47 :: R48 :: R49 + :: R50 :: R51 :: R52 :: R53 :: R54 :: R55 :: R56 :: R57 :: R58 :: R59 + :: R60 :: R61 :: R62 :: R63 :: nil. + +Lemma all_mregs_complete: + forall (r: mreg), In r all_mregs. +Proof. + assert (forall r, proj_sumbool (In_dec mreg_eq r all_mregs) = true) by (destruct r; reflexivity). + intros. specialize (H r). InvBooleans. auto. +Qed. + +Instance Decidable_eq_mreg : forall (x y: mreg), Decidable (eq x y) := Decidable_eq mreg_eq. + +Instance Finite_mreg : Finite mreg := { + Finite_elements := all_mregs; + Finite_elements_spec := all_mregs_complete +}. + +Definition mreg_type (r: mreg): typ := Tany64. + +Open Scope positive_scope. + +Module IndexedMreg <: INDEXED_TYPE. + Definition t := mreg. + Definition eq := mreg_eq. + Definition index (r: mreg): positive := + match r with + | R0 => 1 | R1 => 2 | R2 => 3 | R3 => 4 | R4 => 5 + | R5 => 6 | R6 => 7 | R7 => 8 | R8 => 9 | R9 => 10 + | R10 => 11 | R11 => 12 (* | R12 => 13 | R13 => 14 | R14 => 15 *) + | R15 => 16 (* | R16 => 17 *) | R17 => 18 | R18 => 19 | R19 => 20 + | R20 => 21 | R21 => 22 | R22 => 23 | R23 => 24 | R24 => 25 + | R25 => 26 | R26 => 27 | R27 => 28 | R28 => 29 | R29 => 30 + | R30 => 31 | R31 => 32 (* | R32 => 33 *) | R33 => 34 | R34 => 35 + | R35 => 36 | R36 => 37 | R37 => 38 | R38 => 39 | R39 => 40 + | R40 => 41 | R41 => 42 | R42 => 43 | R43 => 44 | R44 => 45 + | R45 => 46 | R46 => 47 | R47 => 48 | R48 => 49 | R49 => 50 + | R50 => 51 | R51 => 52 | R52 => 53 | R53 => 54 | R54 => 55 + | R55 => 56 | R56 => 57 | R57 => 58 | R58 => 59 | R59 => 60 + | R60 => 61 | R61 => 62 | R62 => 63 | R63 => 64 + end. + + Lemma index_inj: + forall r1 r2, index r1 = index r2 -> r1 = r2. + Proof. + decide_goal. + Qed. +End IndexedMreg. + +Definition is_stack_reg (r: mreg) : bool := false. + +(** ** Names of registers *) + +Local Open Scope string_scope. + +Definition register_names := + ("R0" , R0) :: ("R1" , R1) :: ("R2" , R2) :: ("R3" , R3) :: ("R4" , R4) + :: ("R5" , R5) :: ("R6" , R6) :: ("R7" , R7) :: ("R8" , R8) :: ("R9" , R9) + :: ("R10", R10) :: ("R11", R11) (* :: ("R12", R12) :: ("R13", R13) :: ("R14", R14) *) + :: ("R15", R15) (* :: ("R16", R16) *) :: ("R17", R17) :: ("R18", R18) :: ("R19", R19) + :: ("R20", R20) :: ("R21", R21) :: ("R22", R22) :: ("R23", R23) :: ("R24", R24) + :: ("R25", R25) :: ("R26", R26) :: ("R27", R27) :: ("R28", R28) :: ("R29", R29) + :: ("R30", R30) :: ("R31", R31) (* :: ("R32", R32) *) :: ("R33", R33) :: ("R34", R34) + :: ("R35", R35) :: ("R36", R36) :: ("R37", R37) :: ("R38", R38) :: ("R39", R39) + :: ("R40", R40) :: ("R41", R41) :: ("R42", R42) :: ("R43", R43) :: ("R44", R44) + :: ("R45", R45) :: ("R46", R46) :: ("R47", R47) :: ("R48", R48) :: ("R49", R49) + :: ("R50", R50) :: ("R51", R51) :: ("R52", R52) :: ("R53", R53) :: ("R54", R54) + :: ("R55", R55) :: ("R56", R56) :: ("R57", R57) :: ("R58", R58) :: ("R59", R59) + :: ("R60", R60) :: ("R61", R61) :: ("R62", R62) :: ("R63", R63) :: nil. + +Definition register_by_name (s: string) : option mreg := + let fix assoc (l: list (string * mreg)) : option mreg := + match l with + | nil => None + | (s1, r1) :: l' => if string_dec s s1 then Some r1 else assoc l' + end + in assoc register_names. + +(** ** Destroyed registers, preferred registers *) + +Definition destroyed_by_op (op: operation): list mreg := nil. +(*match op with + | Ointoffloat | Ointuoffloat | Ointofsingle | Ointuofsingle + | Olongoffloat | Olonguoffloat | Olongofsingle | Olonguofsingle + => F6 :: nil + | _ => nil + end. +*) + +Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg := nil. + +Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := nil. + +Definition destroyed_by_cond (cond: condition): list mreg := nil. + +Definition destroyed_by_jumptable: list mreg := R62 :: R63 :: nil. + +Fixpoint destroyed_by_clobber (cl: list string): list mreg := + match cl with + | nil => nil + | c1 :: cl => + match register_by_name c1 with + | Some r => r :: destroyed_by_clobber cl + | None => destroyed_by_clobber cl + end + end. + +Definition destroyed_by_builtin (ef: external_function): list mreg := + match ef with + | EF_inline_asm txt sg clob => destroyed_by_clobber clob + | EF_memcpy sz al => + if Z.leb sz 15 + then R62 :: R63 :: R61 :: nil + else R62 :: R63 :: R61 :: R60 :: nil + | EF_profiling _ _ => R62 :: R63 ::nil + | _ => nil + end. + +Definition destroyed_by_setstack (ty: typ): list mreg := nil. + +Definition destroyed_at_function_entry: list mreg := R17 :: nil. + +Definition temp_for_parent_frame: mreg := R17. (* Temporary used to store the parent frame, where the arguments are *) + +Definition destroyed_at_indirect_call: list mreg := nil. + (* R10 :: R11 :: R12 :: R13 :: R14 :: R15 :: R16 :: R17 :: nil. *) + +Definition mregs_for_operation (op: operation): list (option mreg) * option mreg := (nil, None). + +(* FIXME DMonniaux this seems to be the place for preferred registers for arguments *) +Definition mregs_for_builtin (ef: external_function): list (option mreg) * list(option mreg) := (nil, nil). + + (* match ef with + | EF_builtin name sg => + if (negb Archi.ptr64) && string_dec name "__builtin_bswap64" then + (Some R6 :: Some R5 :: nil, Some R5 :: Some R6 :: nil) + else + (nil, nil) + | _ => + (nil, nil) + end. *) + +Global Opaque + destroyed_by_op destroyed_by_load destroyed_by_store + destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin + destroyed_by_setstack destroyed_at_function_entry temp_for_parent_frame + mregs_for_operation mregs_for_builtin. + +(** Two-address operations. Return [true] if the first argument and + the result must be in the same location *and* are unconstrained + by [mregs_for_operation]. There are two: the pseudo [Ocast32signed], + because it expands to a no-op owing to the representation of 32-bit + integers as their 64-bit sign extension; and [Ocast32unsigned], + because it builds on the same magic no-op. *) + +Definition two_address_op (op: operation) : bool := + match op with + | Ofmaddf | Ofmaddfs + | Ofmsubf | Ofmsubfs + | Omadd | Omaddimm _ + | Omaddl | Omaddlimm _ + | Omsub | Omsubl + | Osel _ _ | Oselimm _ _ | Osellimm _ _ + | Oinsf _ _ | Oinsfl _ _ => true + | _ => false + end. + +(** Constraints on constant propagation for builtins *) + +Definition builtin_constraints (ef: external_function) : + list builtin_arg_constraint := + match ef with + | EF_builtin id sg => + if string_dec id "__builtin_kvx_get" then OK_const :: nil + else if string_dec id "__builtin_kvx_set" + then OK_const :: OK_default :: nil + else if string_dec id "__builtin_kvx_wfxl" + then OK_const :: OK_default :: nil + else if string_dec id "__builtin_kvx_wfxm" + then OK_const :: OK_default :: nil + else nil + | EF_vload _ => OK_addressing :: nil + | EF_vstore _ => OK_addressing :: OK_default :: nil + | EF_memcpy _ _ => OK_addrstack :: OK_addrstack :: nil + | EF_annot kind txt targs => map (fun _ => OK_all) targs + | EF_debug kind txt targs => map (fun _ => OK_all) targs + | _ => nil + end. diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml new file mode 100644 index 00000000..dbb89727 --- /dev/null +++ b/kvx/Machregsaux.ml @@ -0,0 +1,35 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Auxiliary functions on machine registers *) + +open Camlcoq +open Machregs + +let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31 + +let _ = + List.iter + (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s)) + Machregs.register_names + +let is_scratch_register r = false + +let class_of_type = function + | AST.Tint | AST.Tlong + | AST.Tfloat | AST.Tsingle -> 0 + | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 59 |] diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli new file mode 100644 index 00000000..23ac1c9a --- /dev/null +++ b/kvx/Machregsaux.mli @@ -0,0 +1,20 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Auxiliary functions on machine registers *) + +val is_scratch_register: string -> bool + +val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/kvx/NeedOp.v b/kvx/NeedOp.v new file mode 100644 index 00000000..4578b4e8 --- /dev/null +++ b/kvx/NeedOp.v @@ -0,0 +1,415 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs. +Require Import Op RTL. +Require Import NeedDomain. +Require Import Lia. + +(** Neededness analysis for RISC-V operators *) + +Definition op1 (nv: nval) := nv :: nil. +Definition op2 (nv: nval) := nv :: nv :: nil. +Definition op3 (nv: nval) := nv :: nv :: nv :: nil. + +Definition needs_of_condition (cond: condition): list nval := nil. +Definition needs_of_condition0 (cond0: condition0): list nval := nil. + +Definition needs_of_operation (op: operation) (nv: nval): list nval := + match op with + | Omove => op1 nv + | Ointconst n => nil + | Olongconst n => nil + | Ofloatconst n => nil + | Osingleconst n => nil + | Oaddrsymbol id ofs => nil + | Oaddrstack ofs => nil + | Ocast8signed => op1 (sign_ext 8 nv) + | Ocast16signed => op1 (sign_ext 16 nv) + | Oadd => op2 (modarith nv) + | Oaddimm n => op1 (modarith nv) + | Oaddx _ => op2 (default nv) + | Oaddximm _ _ => op1 (default nv) + | Oneg => op1 (modarith nv) + | Osub => op2 (default nv) + | Orevsubimm _ => op1 (default nv) + | Orevsubx _ => op2 (default nv) + | Orevsubximm _ _ => op1 (default nv) + | Omul => op2 (modarith nv) + | Omulimm _ => op1 (modarith nv) + | Omulhs | Omulhu | Odiv | Odivu | Omod | Omodu => op2 (default nv) + | Oand => op2 (bitwise nv) + | Oandimm n => op1 (andimm nv n) + | Onand => op2 (bitwise nv) + | Onandimm n => op1 (andimm nv n) + | Oor => op2 (bitwise nv) + | Oorimm n => op1 (orimm nv n) + | Onor => op2 (bitwise nv) + | Onorimm n => op1 (orimm nv n) + | Oxor => op2 (bitwise nv) + | Oxorimm n => op1 (bitwise nv) + | Onxor => op2 (bitwise nv) + | Onxorimm n => op1 (bitwise nv) + | Onot => op1 (bitwise nv) + | Oandn => op2 (bitwise nv) + | Oandnimm n => op1 (andimm nv n) + | Oorn => op2 (bitwise nv) + | Oornimm n => op1 (orimm nv n) + | Oshl | Oshr | Oshru => op2 (default nv) + | Oshlimm n => op1 (shlimm nv n) + | Oshrimm n => op1 (shrimm nv n) + | Ororimm n => op1 (ror nv n) + | Oshruimm n => op1 (shruimm nv n) + | Oshrximm n => op1 (default nv) + | Omadd => op3 (modarith nv) + | Omaddimm n => op2 (modarith nv) + | Omsub => op3 (modarith nv) + | Omakelong => op2 (default nv) + | Olowlong | Ohighlong => op1 (default nv) + | Ocast32signed => op1 (default nv) + | Ocast32unsigned => op1 (default nv) + | Oaddl => op2 (default nv) + | Oaddlimm n => op1 (default nv) + | Oaddxl _ => op2 (default nv) + | Oaddxlimm _ _ => op1 (default nv) + | Orevsublimm _ => op1 (default nv) + | Orevsubxl _ => op2 (default nv) + | Orevsubxlimm _ _ => op1 (default nv) + | Onegl => op1 (default nv) + | Osubl => op2 (default nv) + | Omull => op2 (default nv) + | Omullimm _ => op1 (default nv) + | Omullhs | Omullhu | Odivl | Odivlu | Omodl | Omodlu => op2 (default nv) + | Oandl => op2 (default nv) + | Oandlimm n => op1 (default nv) + | Onandl => op2 (default nv) + | Onandlimm n => op1 (default nv) + | Oorl => op2 (default nv) + | Oorlimm n => op1 (default nv) + | Onorl => op2 (default nv) + | Onorlimm n => op1 (default nv) + | Oxorl => op2 (default nv) + | Oxorlimm n => op1 (default nv) + | Onxorl => op2 (default nv) + | Onxorlimm n => op1 (default nv) + | Onotl => op1 (default nv) + | Oandnl => op2 (default nv) + | Oandnlimm n => op1 (default nv) + | Oornl => op2 (default nv) + | Oornlimm n => op1 (default nv) + | Oshll | Oshrl | Oshrlu => op2 (default nv) + | Oshllimm n => op1 (default nv) + | Oshrlimm n => op1 (default nv) + | Oshrluimm n => op1 (default nv) + | Oshrxlimm n => op1 (default nv) + | Omaddl => op3 (default nv) + | Omaddlimm n => op2 (default nv) + | Omsubl => op3 (default nv) + | Onegf | Oabsf => op1 (default nv) + | Oaddf | Osubf | Omulf | Odivf | Ominf | Omaxf => op2 (default nv) + | Ofmaddf | Ofmsubf => op3 (default nv) + | Onegfs | Oabsfs => op1 (default nv) + | Oaddfs | Osubfs | Omulfs | Odivfs | Ominfs | Omaxfs => op2 (default nv) + | Oinvfs => op1 (default nv) + | Ofmaddfs | Ofmsubfs => op3 (default nv) + | Ofloatofsingle | Osingleoffloat => op1 (default nv) + | Ointoffloat | Ointuoffloat => op1 (default nv) + | Olongoffloat | Olonguoffloat | Ofloatoflong | Ofloatoflongu => op1 (default nv) + | Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv) + | Olongofsingle | Olonguofsingle | Osingleoflong | Osingleoflongu => op1 (default nv) + | Ocmp c => needs_of_condition c + | Oextfz _ _ | Oextfs _ _ | Oextfzl _ _ | Oextfsl _ _ => op1 (default nv) + | Oinsf _ _ | Oinsfl _ _ => op2 (default nv) + | Osel c ty => nv :: nv :: needs_of_condition0 c + | Oselimm c imm + | Osellimm c imm => nv :: needs_of_condition0 c + end. + +Definition operation_is_redundant (op: operation) (nv: nval): bool := + match op with + | Ocast8signed => sign_ext_redundant 8 nv + | Ocast16signed => sign_ext_redundant 16 nv + | Oandimm n => andimm_redundant nv n + | Oorimm n => orimm_redundant nv n + | _ => false + end. + +Ltac InvAgree := + match goal with + | [H: vagree_list nil _ _ |- _ ] => inv H; InvAgree + | [H: vagree_list (_::_) _ _ |- _ ] => inv H; InvAgree + | _ => idtac + end. + +Ltac TrivialExists := + match goal with + | [ |- exists v, Some ?x = Some v /\ _ ] => exists x; split; auto + | _ => idtac + end. + +Section SOUNDNESS. + +Variable ge: genv. +Variable sp: block. +Variables m1 m2: mem. +Hypothesis PERM: forall b ofs k p, Mem.perm m1 b ofs k p -> Mem.perm m2 b ofs k p. + +Lemma needs_of_condition_sound: + forall cond args b args', + eval_condition cond args m1 = Some b -> + vagree_list args args' (needs_of_condition cond) -> + eval_condition cond args' m2 = Some b. +Proof. + intros. unfold needs_of_condition in H0. + eapply default_needs_of_condition_sound; eauto. +Qed. + +Let valid_pointer_inj: + forall b1 ofs b2 delta, + inject_id b1 = Some(b2, delta) -> + Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true -> + Mem.valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true. +Proof. + unfold inject_id; intros. inv H. rewrite Ptrofs.add_zero. + rewrite Mem.valid_pointer_nonempty_perm in *. eauto. +Qed. + +Let weak_valid_pointer_inj: + forall b1 ofs b2 delta, + inject_id b1 = Some(b2, delta) -> + Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true -> + Mem.weak_valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true. +Proof. + unfold inject_id; intros. inv H. rewrite Ptrofs.add_zero. + rewrite Mem.weak_valid_pointer_spec in *. + rewrite ! Mem.valid_pointer_nonempty_perm in *. + destruct H0; [left|right]; eauto. +Qed. + +Let weak_valid_pointer_no_overflow: + forall b1 ofs b2 delta, + inject_id b1 = Some(b2, delta) -> + Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true -> + 0 <= Ptrofs.unsigned ofs + Ptrofs.unsigned (Ptrofs.repr delta) <= Ptrofs.max_unsigned. +Proof. + unfold inject_id; intros. inv H. rewrite Z.add_0_r. apply Ptrofs.unsigned_range_2. +Qed. + +Let valid_different_pointers_inj: + forall b1 ofs1 b2 ofs2 b1' delta1 b2' delta2, + b1 <> b2 -> + Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs1) = true -> + Mem.valid_pointer m1 b2 (Ptrofs.unsigned ofs2) = true -> + inject_id b1 = Some (b1', delta1) -> + inject_id b2 = Some (b2', delta2) -> + b1' <> b2' \/ + Ptrofs.unsigned (Ptrofs.add ofs1 (Ptrofs.repr delta1)) <> Ptrofs.unsigned (Ptrofs.add ofs2 (Ptrofs.repr delta2)). +Proof. + unfold inject_id; intros. left; congruence. +Qed. + +Lemma needs_of_condition0_sound: + forall cond arg1 b arg2, + eval_condition0 cond arg1 m1 = Some b -> + vagree arg1 arg2 All -> + eval_condition0 cond arg2 m2 = Some b. +Proof. + intros until arg2. + intros Hcond Hagree. + apply eval_condition0_inj with (f := inject_id) (m1 := m1) (v1 := arg1); cbn; auto. + apply val_inject_lessdef. apply lessdef_vagree. assumption. +Qed. + +Lemma addl_sound: + forall v1 w1 v2 w2 x, + vagree v1 w1 (default x) -> vagree v2 w2 (default x) -> + vagree (Val.addl v1 v2) (Val.addl w1 w2) x. +Proof. + unfold default; intros. + destruct x; cbn in *; trivial. + - unfold Val.addl. + destruct v1; destruct v2; trivial; destruct Archi.ptr64; trivial. + - apply Val.addl_lessdef; trivial. +Qed. + +Lemma subl_lessdef: + forall v1 v1' v2 v2', + Val.lessdef v1 v1' -> Val.lessdef v2 v2' -> Val.lessdef (Val.subl v1 v2) (Val.subl v1' v2'). +Proof. + intros. inv H. inv H0. auto. destruct v1'; cbn; auto. cbn; auto. +Qed. + +Lemma subl_sound: + forall v1 w1 v2 w2 x, + vagree v1 w1 (default x) -> vagree v2 w2 (default x) -> + vagree (Val.subl v1 v2) (Val.subl w1 w2) x. +Proof. + unfold default; intros. + destruct x; cbn in *; trivial. + - unfold Val.subl. + destruct v1; destruct v2; trivial; destruct Archi.ptr64; cbn; trivial. + destruct (eq_block _ _) ; cbn; trivial. + - apply subl_lessdef; trivial. +Qed. + + +Lemma mull_sound: + forall v1 w1 v2 w2 x, + vagree v1 w1 (default x) -> vagree v2 w2 (default x) -> + vagree (Val.mull v1 v2) (Val.mull w1 w2) x. +Proof. + unfold default; intros. + destruct x; cbn in *; trivial. + - unfold Val.mull. + destruct v1; destruct v2; trivial. + - unfold Val.mull. + destruct v1; destruct v2; trivial. + inv H. inv H0. + trivial. +Qed. + + +Remark default_idem: forall nv, default (default nv) = default nv. +Proof. + destruct nv; cbn; trivial. +Qed. + +Lemma vagree_triple_op_float : + forall f a b c x y z nv, + (vagree a x (default nv)) -> + (vagree b y (default nv)) -> + (vagree c z (default nv)) -> + (vagree (ExtValues.triple_op_float f a b c) + (ExtValues.triple_op_float f x y z) nv). +Proof. + induction nv; + intros Hax Hby Hcz. + - trivial. + - cbn in *. destruct a; cbn; trivial. + destruct b; cbn; trivial. + destruct c; cbn; trivial. + - cbn in *. destruct a; cbn; trivial. + destruct b; cbn; trivial. + destruct c; cbn; trivial. + inv Hax. inv Hby. inv Hcz. + cbn. + constructor. +Qed. + +Lemma vagree_triple_op_single : + forall f a b c x y z nv, + (vagree a x (default nv)) -> + (vagree b y (default nv)) -> + (vagree c z (default nv)) -> + (vagree (ExtValues.triple_op_single f a b c) + (ExtValues.triple_op_single f x y z) nv). +Proof. + induction nv; + intros Hax Hby Hcz. + - trivial. + - cbn in *. destruct a; cbn; trivial. + destruct b; cbn; trivial. + destruct c; cbn; trivial. + - cbn in *. destruct a; cbn; trivial. + destruct b; cbn; trivial. + destruct c; cbn; trivial. + inv Hax. inv Hby. inv Hcz. + cbn. + constructor. +Qed. + +Hint Resolve vagree_triple_op_float vagree_triple_op_single : na. + +Lemma needs_of_operation_sound: + forall op args v nv args', + eval_operation ge (Vptr sp Ptrofs.zero) op args m1 = Some v -> + vagree_list args args' (needs_of_operation op nv) -> + nv <> Nothing -> + exists v', + eval_operation ge (Vptr sp Ptrofs.zero) op args' m2 = Some v' + /\ vagree v v' nv. +Proof. + unfold needs_of_operation; intros; destruct op; try (eapply default_needs_of_operation_sound; eauto; fail); + cbn in *; FuncInv; InvAgree; TrivialExists. +- apply sign_ext_sound; auto. compute; auto. +- apply sign_ext_sound; auto. compute; auto. +- apply add_sound; auto. +- apply add_sound; auto with na. +- apply neg_sound; auto. +- apply mul_sound; auto. +- apply mul_sound; auto with na. +- apply and_sound; auto. +- apply andimm_sound; auto. +- apply notint_sound; apply and_sound; auto. +- apply notint_sound; apply andimm_sound; auto. +- apply or_sound; auto. +- apply orimm_sound; auto. +- apply notint_sound; apply or_sound; auto. +- apply notint_sound; apply orimm_sound; auto. +- apply xor_sound; auto. +- apply xor_sound; auto with na. +- apply notint_sound; apply xor_sound; auto. +- apply notint_sound; apply xor_sound; auto with na. +- apply notint_sound; auto. +- apply and_sound; try apply notint_sound; auto with na. +- apply andimm_sound; try apply notint_sound; auto with na. +- apply or_sound; try apply notint_sound; auto with na. +- apply orimm_sound; try apply notint_sound; auto with na. +- apply shlimm_sound; auto. +- apply shrimm_sound; auto. +- apply shruimm_sound; auto. +- apply ror_sound; auto. + (* madd *) +- apply add_sound; try apply mul_sound; auto with na; rewrite modarith_idem; assumption. +- apply add_sound; try apply mul_sound; auto with na; rewrite modarith_idem; assumption. +- repeat rewrite ExtValues.sub_add_neg. + apply add_sound; trivial. + apply neg_sound; trivial. + rewrite modarith_idem. + apply mul_sound; + rewrite modarith_idem; trivial. +- destruct (eval_condition0 _ _ _) as [b|] eqn:EC. + erewrite needs_of_condition0_sound by eauto. + apply select_sound; auto. + cbn; auto with na. + (* select imm *) +- destruct (eval_condition0 _ _ _) as [b|] eqn:EC. + { erewrite needs_of_condition0_sound by eauto. + apply select_sound; auto with na. } + cbn; auto with na. + (* select long imm *) +- destruct (eval_condition0 _ _ _) as [b|] eqn:EC. + { erewrite needs_of_condition0_sound by eauto. + apply select_sound; auto with na. } + cbn; auto with na. +Qed. + +Lemma operation_is_redundant_sound: + forall op nv arg1 args v arg1' args', + operation_is_redundant op nv = true -> + eval_operation ge (Vptr sp Ptrofs.zero) op (arg1 :: args) m1 = Some v -> + vagree_list (arg1 :: args) (arg1' :: args') (needs_of_operation op nv) -> + vagree v arg1' nv. +Proof. + intros. destruct op; cbn in *; try discriminate; inv H1; FuncInv; subst. +- apply sign_ext_redundant_sound; auto. lia. +- apply sign_ext_redundant_sound; auto. lia. +- apply andimm_redundant_sound; auto. +- apply orimm_redundant_sound; auto. +Qed. + +End SOUNDNESS. diff --git a/kvx/Op.v b/kvx/Op.v new file mode 100644 index 00000000..4458adb3 --- /dev/null +++ b/kvx/Op.v @@ -0,0 +1,2014 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Operators and addressing modes. The abstract syntax and dynamic + semantics for the CminorSel, RTL, LTL and Mach languages depend on the + following types, defined in this library: +- [condition]: boolean conditions for conditional branches; +- [operation]: arithmetic and logical operations; +- [addressing]: addressing modes for load and store operations. + + These types are processor-specific and correspond roughly to what the + processor can compute in one instruction. In other terms, these + types reflect the state of the program after instruction selection. + For a processor-independent set of operations, see the abstract + syntax and dynamic semantics of the Cminor language. +*) + +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values ExtValues Memory Globalenvs Events. + +Set Implicit Arguments. + +(** Conditions (boolean-valued operators). *) + +Inductive condition : Type := + | Ccomp (c: comparison) (**r signed integer comparison *) + | Ccompu (c: comparison) (**r unsigned integer comparison *) + | Ccompimm (c: comparison) (n: int) (**r signed integer comparison with a constant *) + | Ccompuimm (c: comparison) (n: int) (**r unsigned integer comparison with a constant *) + | Ccompl (c: comparison) (**r signed 64-bit integer comparison *) + | Ccomplu (c: comparison) (**r unsigned 64-bit integer comparison *) + | Ccomplimm (c: comparison) (n: int64) (**r signed 64-bit integer comparison with a constant *) + | Ccompluimm (c: comparison) (n: int64) (**r unsigned 64-bit integer comparison with a constant *) + | Ccompf (c: comparison) (**r 64-bit floating-point comparison *) + | Cnotcompf (c: comparison) (**r negation of a floating-point comparison *) + | Ccompfs (c: comparison) (**r 32-bit floating-point comparison *) + | Cnotcompfs (c: comparison). (**r negation of a floating-point comparison *) + +Inductive condition0 : Type := + | Ccomp0 (c: comparison) (**r signed integer comparison with 0 *) + | Ccompu0 (c: comparison) (**r unsigned integer comparison with 0 *) + | Ccompl0 (c: comparison) (**r signed 64-bit integer comparison with 0 *) + | Ccomplu0 (c: comparison). (**r unsigned 64-bit integer comparison with 0 *) + +Definition arg_type_of_condition0 (cond: condition0) := + match cond with + | Ccomp0 _ | Ccompu0 _ => Tint + | Ccompl0 _ | Ccomplu0 _ => Tlong + end. + +(** Arithmetic and logical operations. In the descriptions, [rd] is the + result of the operation and [r1], [r2], etc, are the arguments. *) + +Inductive operation : Type := + | Omove (**r [rd = r1] *) + | Ointconst (n: int) (**r [rd] is set to the given integer constant *) + | Olongconst (n: int64) (**r [rd] is set to the given integer constant *) + | Ofloatconst (n: float) (**r [rd] is set to the given float constant *) + | Osingleconst (n: float32)(**r [rd] is set to the given float constant *) + | Oaddrsymbol (id: ident) (ofs: ptrofs) (**r [rd] is set to the address of the symbol plus the given offset *) + | Oaddrstack (ofs: ptrofs) (**r [rd] is set to the stack pointer plus the given offset *) +(*c 32-bit integer arithmetic: *) + | Ocast8signed (**r [rd] is 8-bit sign extension of [r1] *) + | Ocast16signed (**r [rd] is 16-bit sign extension of [r1] *) + | Oadd (**r [rd = r1 + r2] *) + | Oaddimm (n: int) (**r [rd = r1 + n] *) + | Oaddx (shift: shift1_4) (**r [rd = r1 << shift + r2] *) + | Oaddximm (shift: shift1_4) (n: int) (**r [rd = r1 << shift + n] *) + | Oneg (**r [rd = - r1] *) + | Osub (**r [rd = r1 - r2] *) + | Orevsubimm (n: int) (**r [rd = n - r1] *) + | Orevsubx (shift: shift1_4) (**r [rd = r2 -r1 << shift] *) + | Orevsubximm (shift: shift1_4) (n: int) (**r [rd = n -r1 << shift] *) + | Omul (**r [rd = r1 * r2] *) + | Omulimm (n: int) (**r [rd = r1 * n] *) + | Omulhs (**r [rd = high part of r1 * r2, signed] *) + | Omulhu (**r [rd = high part of r1 * r2, unsigned] *) + | Odiv (**r [rd = r1 / r2] (signed) *) + | Odivu (**r [rd = r1 / r2] (unsigned) *) + | Omod (**r [rd = r1 % r2] (signed) *) + | Omodu (**r [rd = r1 % r2] (unsigned) *) + | Oand (**r [rd = r1 & r2] *) + | Oandimm (n: int) (**r [rd = r1 & n] *) + | Onand (**r [rd = ~(r1 & r2)] *) + | Onandimm (n: int) (**r [rd = ~(r1 & n)] *) + | Oor (**r [rd = r1 | r2] *) + | Oorimm (n: int) (**r [rd = r1 | n] *) + | Onor (**r [rd = ~(r1 | r2)] *) + | Onorimm (n: int) (**r [rd = ~(r1 | n)] *) + | Oxor (**r [rd = r1 ^ r2] *) + | Oxorimm (n: int) (**r [rd = r1 ^ n] *) + | Onxor (**r [rd = ~(r1 ^ r2)] *) + | Onxorimm (n: int) (**r [rd = ~(r1 ^ n)] *) + | Onot (**r [rd = ~r1] *) + | Oandn (**r [rd = (~r1) & r2] *) + | Oandnimm (n: int) (**r [rd = (~r1) & n] *) + | Oorn (**r [rd = (~r1) | r2] *) + | Oornimm (n: int) (**r [rd = (~r1) | n] *) + | Oshl (**r [rd = r1 << r2] *) + | Oshlimm (n: int) (**r [rd = r1 << n] *) + | Oshr (**r [rd = r1 >>s r2] (signed) *) + | Oshrimm (n: int) (**r [rd = r1 >>s n] (signed) *) + | Oshru (**r [rd = r1 >>u r2] (unsigned) *) + | Oshruimm (n: int) (**r [rd = r1 >>x n] (unsigned) *) + | Oshrximm (n: int) (**r [rd = r1 / 2^n] (signed) *) + | Ororimm (n: int) (**r rotate right immediate *) + | Omadd (**r [rd = rd + r1 * r2] *) + | Omaddimm (n: int) (**r [rd = rd + r1 * imm] *) + | Omsub (**r [rd = rd - r1 * r2] *) +(*c 64-bit integer arithmetic: *) + | Omakelong (**r [rd = r1 << 32 | r2] *) + | Olowlong (**r [rd = low-word(r1)] *) + | Ohighlong (**r [rd = high-word(r1)] *) + | Ocast32signed (**r [rd] is 32-bit sign extension of [r1] *) + | Ocast32unsigned (**r [rd] is 32-bit zero extension of [r1] *) + | Oaddl (**r [rd = r1 + r2] *) + | Oaddlimm (n: int64) (**r [rd = r1 + n] *) + | Oaddxl (shift: shift1_4) (**r [rd = r1 << shift + r2] *) + | Oaddxlimm (shift: shift1_4) (n: int64) (**r [rd = r1 << shift + n] *) + | Orevsublimm (n: int64) (**r [rd = n - r1] *) + | Orevsubxl (shift: shift1_4) (**r [rd = r2 -r1 << shift] *) + | Orevsubxlimm (shift: shift1_4) (n: int64) (**r [rd = n -r1 << shift] *) + | Onegl (**r [rd = - r1] *) + | Osubl (**r [rd = r1 - r2] *) + | Omull (**r [rd = r1 * r2] *) + | Omullimm (n: int64) (**r [rd = r1 * n] *) + | Omullhs (**r [rd = high part of r1 * r2, signed] *) + | Omullhu (**r [rd = high part of r1 * r2, unsigned] *) + | Odivl (**r [rd = r1 / r2] (signed) *) + | Odivlu (**r [rd = r1 / r2] (unsigned) *) + | Omodl (**r [rd = r1 % r2] (signed) *) + | Omodlu (**r [rd = r1 % r2] (unsigned) *) + | Oandl (**r [rd = r1 & r2] *) + | Oandlimm (n: int64) (**r [rd = r1 & n] *) + | Onandl (**r [rd = ~(r1 & r2)] *) + | Onandlimm (n: int64) (**r [rd = ~(r1 & n)] *) + | Oorl (**r [rd = r1 | r2] *) + | Oorlimm (n: int64) (**r [rd = r1 | n] *) + | Onorl (**r [rd = ~(r1 | r2)] *) + | Onorlimm (n: int64) (**r [rd = ~(r1 | n)] *) + | Oxorl (**r [rd = r1 ^ r2] *) + | Oxorlimm (n: int64) (**r [rd = r1 ^ n] *) + | Onxorl (**r [rd = ~(r1 ^ r2)] *) + | Onxorlimm (n: int64) (**r [rd = ~(r1 ^ n)] *) + | Onotl (**r [rd = ~r1] *) + | Oandnl (**r [rd = (~r1) & r2] *) + | Oandnlimm (n: int64) (**r [rd = (~r1) & n] *) + | Oornl (**r [rd = (~r1) | r2] *) + | Oornlimm (n: int64) (**r [rd = (~r1) | n] *) + | Oshll (**r [rd = r1 << r2] *) + | Oshllimm (n: int) (**r [rd = r1 << n] *) + | Oshrl (**r [rd = r1 >> r2] (signed) *) + | Oshrlimm (n: int) (**r [rd = r1 >> n] (signed) *) + | Oshrlu (**r [rd = r1 >> r2] (unsigned) *) + | Oshrluimm (n: int) (**r [rd = r1 >> n] (unsigned) *) + | Oshrxlimm (n: int) (**r [rd = r1 / 2^n] (signed) *) + | Omaddl (**r [rd = rd + r1 * r2] *) + | Omaddlimm (n: int64) (**r [rd = rd + r1 * imm] *) + | Omsubl (**r [rd = rd - r1 * r2] *) +(*c Floating-point arithmetic: *) + | Onegf (**r [rd = - r1] *) + | Oabsf (**r [rd = abs(r1)] *) + | Oaddf (**r [rd = r1 + r2] *) + | Osubf (**r [rd = r1 - r2] *) + | Omulf (**r [rd = r1 * r2] *) + | Odivf (**r [rd = r1 / r2] *) + | Ominf + | Omaxf + | Ofmaddf + | Ofmsubf + | Onegfs (**r [rd = - r1] *) + | Oabsfs (**r [rd = abs(r1)] *) + | Oaddfs (**r [rd = r1 + r2] *) + | Osubfs (**r [rd = r1 - r2] *) + | Omulfs (**r [rd = r1 * r2] *) + | Odivfs (**r [rd = r1 / r2] *) + | Ominfs + | Omaxfs + | Oinvfs + | Ofmaddfs + | Ofmsubfs + | Osingleoffloat (**r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (**r [rd] is [r1] extended to double-precision float *) +(*c Conversions between int and float: *) + | Ointoffloat (**r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (**r [rd = unsigned_int_of_float64(r1)] *) + | Ointofsingle (**r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (**r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (**r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (**r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (**r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (**r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (**r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (**r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (**r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (**r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (**r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (**r [rd = float32_of_unsigned_int(r1)] *) +(*c Boolean tests: *) + | Ocmp (cond: condition) (**r [rd = 1] if condition holds, [rd = 0] otherwise. *) + | Oextfz (stop : Z) (start : Z) + | Oextfs (stop : Z) (start : Z) + | Oextfzl (stop : Z) (start : Z) + | Oextfsl (stop : Z) (start : Z) + | Oinsf (stop : Z) (start : Z) + | Oinsfl (stop : Z) (start : Z) + | Osel (c0 : condition0) (ty : typ) + | Oselimm (c0 : condition0) (imm: int) + | Osellimm (c0 : condition0) (imm: int64). + +(** Addressing modes. [r1], [r2], etc, are the arguments to the + addressing. *) + +Inductive addressing: Type := + | Aindexed2XS (scale : Z) : addressing (**r Address is [r1 + r2 << scale] *) + | Aindexed2 : addressing (**r Address is [r1 + r2] *) + | Aindexed: ptrofs -> addressing (**r Address is [r1 + offset] *) + | Aglobal: ident -> ptrofs -> addressing (**r Address is global plus offset *) + | Ainstack: ptrofs -> addressing. (**r Address is [stack_pointer + offset] *) + +(** Comparison functions (used in modules [CSE] and [Allocation]). *) + +Definition eq_condition (x y: condition) : {x=y} + {x<>y}. +Proof. + generalize Int.eq_dec Int64.eq_dec; intro. + assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality. + decide equality. +Defined. + +Definition eq_condition0 (x y: condition0) : {x=y} + {x<>y}. +Proof. + generalize Int.eq_dec Int64.eq_dec; intro. + assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality. + decide equality. +Defined. + +Definition eq_addressing (x y: addressing) : {x=y} + {x<>y}. +Proof. + generalize ident_eq Ptrofs.eq_dec Z.eq_dec; intros. + decide equality. +Defined. + +Definition eq_shift1_4 (x y : shift1_4): {x=y} + {x<>y}. +Proof. + decide equality. +Defined. + +Definition eq_operation: forall (x y: operation), {x=y} + {x<>y}. +Proof. + generalize typ_eq Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec ident_eq eq_condition eq_condition0 Z.eq_dec eq_shift1_4; intros. + decide equality. +Defined. + +(* Alternate definition: +Definition beq_operation: forall (x y: operation), bool. +Proof. + generalize Int.eq_dec Int64.eq_dec Ptrofs.eq_dec Float.eq_dec Float32.eq_dec ident_eq eq_condition; boolean_equality. +Defined. + +Definition eq_operation: forall (x y: operation), {x=y} + {x<>y}. +Proof. + decidable_equality_from beq_operation. +Defined. +*) + +Global Opaque eq_condition eq_addressing eq_operation. + +(** * Evaluation functions *) + +(** Evaluation of conditions, operators and addressing modes applied + to lists of values. Return [None] when the computation can trigger an + error, e.g. integer division by zero. [eval_condition] returns a boolean, + [eval_operation] and [eval_addressing] return a value. *) + +Definition eval_condition (cond: condition) (vl: list val) (m: mem): option bool := + match cond, vl with + | Ccomp c, v1 :: v2 :: nil => Val.cmp_bool c v1 v2 + | Ccompu c, v1 :: v2 :: nil => Val.cmpu_bool (Mem.valid_pointer m) c v1 v2 + | Ccompimm c n, v1 :: nil => Val.cmp_bool c v1 (Vint n) + | Ccompuimm c n, v1 :: nil => Val.cmpu_bool (Mem.valid_pointer m) c v1 (Vint n) + | Ccompl c, v1 :: v2 :: nil => Val.cmpl_bool c v1 v2 + | Ccomplu c, v1 :: v2 :: nil => Val.cmplu_bool (Mem.valid_pointer m) c v1 v2 + | Ccomplimm c n, v1 :: nil => Val.cmpl_bool c v1 (Vlong n) + | Ccompluimm c n, v1 :: nil => Val.cmplu_bool (Mem.valid_pointer m) c v1 (Vlong n) + | Ccompf c, v1 :: v2 :: nil => Val.cmpf_bool c v1 v2 + | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2) + | Ccompfs c, v1 :: v2 :: nil => Val.cmpfs_bool c v1 v2 + | Cnotcompfs c, v1 :: v2 :: nil => option_map negb (Val.cmpfs_bool c v1 v2) + | _, _ => None + end. + +Definition eval_condition0 (cond: condition0) (v1: val) (m: mem): option bool := + match cond with + | Ccomp0 c => Val.cmp_bool c v1 (Vint Int.zero) + | Ccompu0 c => Val.cmpu_bool (Mem.valid_pointer m) c v1 (Vint Int.zero) + | Ccompl0 c => Val.cmpl_bool c v1 (Vlong Int64.zero) + | Ccomplu0 c => Val.cmplu_bool (Mem.valid_pointer m) c v1 (Vlong Int64.zero) + end. + +Definition negate_condition0 (cond0 : condition0) : condition0 := + match cond0 with + | Ccomp0 c => Ccomp0 (negate_comparison c) + | Ccompu0 c => Ccompu0 (negate_comparison c) + | Ccompl0 c => Ccompl0 (negate_comparison c) + | Ccomplu0 c => Ccomplu0 (negate_comparison c) + end. + +Definition eval_operation + (F V: Type) (genv: Genv.t F V) (sp: val) + (op: operation) (vl: list val) (m: mem): option val := + match op, vl with + | Omove, v1::nil => Some v1 + | Ointconst n, nil => Some (Vint n) + | Olongconst n, nil => Some (Vlong n) + | Ofloatconst n, nil => Some (Vfloat n) + | Osingleconst n, nil => Some (Vsingle n) + | Oaddrsymbol s ofs, nil => Some (Genv.symbol_address genv s ofs) + | Oaddrstack ofs, nil => Some (Val.offset_ptr sp ofs) + | Ocast8signed, v1 :: nil => Some (Val.sign_ext 8 v1) + | Ocast16signed, v1 :: nil => Some (Val.sign_ext 16 v1) + | Oadd, v1 :: v2 :: nil => Some (Val.add v1 v2) + | Oaddimm n, v1 :: nil => Some (Val.add v1 (Vint n)) + | Oaddx s14, v1 :: v2 :: nil => Some (addx (int_of_shift1_4 s14) v1 v2) + | Oaddximm s14 n, v1 :: nil => Some (addx (int_of_shift1_4 s14) v1 (Vint n)) + | Oneg, v1 :: nil => Some (Val.neg v1) + | Osub, v1 :: v2 :: nil => Some (Val.sub v1 v2) + | Orevsubimm n, v1 :: nil => Some (Val.sub (Vint n) v1) + | Orevsubx shift, v1 :: v2 :: nil => Some (ExtValues.revsubx (int_of_shift1_4 shift) v1 v2) + | Orevsubximm shift n, v1 :: nil => Some (ExtValues.revsubx (int_of_shift1_4 shift) v1 (Vint n)) + | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2) + | Omulimm n, v1 :: nil => Some (Val.mul v1 (Vint n)) + | Omulhs, v1::v2::nil => Some (Val.mulhs v1 v2) + | Omulhu, v1::v2::nil => Some (Val.mulhu v1 v2) + | Odiv, v1 :: v2 :: nil => Val.divs v1 v2 + | Odivu, v1 :: v2 :: nil => Val.divu v1 v2 + | Omod, v1 :: v2 :: nil => Val.mods v1 v2 + | Omodu, v1 :: v2 :: nil => Val.modu v1 v2 + | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2) + | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n)) + | Onand, v1 :: v2 :: nil => Some (Val.notint (Val.and v1 v2)) + | Onandimm n, v1 :: nil => Some (Val.notint (Val.and v1 (Vint n))) + | Oor, v1 :: v2 :: nil => Some (Val.or v1 v2) + | Oorimm n, v1 :: nil => Some (Val.or v1 (Vint n)) + | Onor, v1 :: v2 :: nil => Some (Val.notint (Val.or v1 v2)) + | Onorimm n, v1 :: nil => Some (Val.notint (Val.or v1 (Vint n))) + | Oxor, v1 :: v2 :: nil => Some (Val.xor v1 v2) + | Oxorimm n, v1 :: nil => Some (Val.xor v1 (Vint n)) + | Onxor, v1 :: v2 :: nil => Some (Val.notint (Val.xor v1 v2)) + | Onxorimm n, v1 :: nil => Some (Val.notint (Val.xor v1 (Vint n))) + | Onot, v1 :: nil => Some (Val.notint v1) + | Oandn, v1 :: v2 :: nil => Some (Val.and (Val.notint v1) v2) + | Oandnimm n, v1 :: nil => Some (Val.and (Val.notint v1) (Vint n)) + | Oorn, v1 :: v2 :: nil => Some (Val.or (Val.notint v1) v2) + | Oornimm n, v1 :: nil => Some (Val.or (Val.notint v1) (Vint n)) + | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2) + | Oshlimm n, v1 :: nil => Some (Val.shl v1 (Vint n)) + | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2) + | Oshrimm n, v1 :: nil => Some (Val.shr v1 (Vint n)) + | Ororimm n, v1 :: nil => Some (Val.ror v1 (Vint n)) + | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2) + | Oshruimm n, v1 :: nil => Some (Val.shru v1 (Vint n)) + | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n))) + | Omadd, v1::v2::v3::nil => Some (Val.add v1 (Val.mul v2 v3)) + | (Omaddimm n), v1::v2::nil => Some (Val.add v1 (Val.mul v2 (Vint n))) + | Omsub, v1::v2::v3::nil => Some (Val.sub v1 (Val.mul v2 v3)) + + | Omakelong, v1::v2::nil => Some (Val.longofwords v1 v2) + | Olowlong, v1::nil => Some (Val.loword v1) + | Ohighlong, v1::nil => Some (Val.hiword v1) + | Ocast32signed, v1 :: nil => Some (Val.longofint v1) + | Ocast32unsigned, v1 :: nil => Some (Val.longofintu v1) + | Oaddl, v1 :: v2 :: nil => Some (Val.addl v1 v2) + | Oaddlimm n, v1::nil => Some (Val.addl v1 (Vlong n)) + | Oaddxl s14, v1 :: v2 :: nil => Some (addxl (int_of_shift1_4 s14) v1 v2) + | Oaddxlimm s14 n, v1 :: nil => Some (addxl (int_of_shift1_4 s14) v1 (Vlong n)) + | Onegl, v1::nil => Some (Val.negl v1) + | Osubl, v1::v2::nil => Some (Val.subl v1 v2) + | Orevsublimm n, v1 :: nil => Some (Val.subl (Vlong n) v1) + | Orevsubxl shift, v1 :: v2 :: nil => Some (ExtValues.revsubxl (int_of_shift1_4 shift) v1 v2) + | Orevsubxlimm shift n, v1 :: nil => Some (ExtValues.revsubxl (int_of_shift1_4 shift) v1 (Vlong n)) + | Omull, v1::v2::nil => Some (Val.mull v1 v2) + | Omullimm n, v1::nil => Some (Val.mull v1 (Vlong n)) + | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2) + | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2) + | Odivl, v1::v2::nil => Val.divls v1 v2 + | Odivlu, v1::v2::nil => Val.divlu v1 v2 + | Omodl, v1::v2::nil => Val.modls v1 v2 + | Omodlu, v1::v2::nil => Val.modlu v1 v2 + | Oandl, v1::v2::nil => Some(Val.andl v1 v2) + | Oandlimm n, v1::nil => Some (Val.andl v1 (Vlong n)) + | Onandl, v1::v2::nil => Some(Val.notl (Val.andl v1 v2)) + | Onandlimm n, v1::nil => Some(Val.notl (Val.andl v1 (Vlong n))) + | Oorl, v1::v2::nil => Some(Val.orl v1 v2) + | Oorlimm n, v1::nil => Some (Val.orl v1 (Vlong n)) + | Onorl, v1::v2::nil => Some(Val.notl (Val.orl v1 v2)) + | Onorlimm n, v1::nil => Some(Val.notl (Val.orl v1 (Vlong n))) + | Oxorl, v1::v2::nil => Some(Val.xorl v1 v2) + | Oxorlimm n, v1::nil => Some (Val.xorl v1 (Vlong n)) + | Onxorl, v1::v2::nil => Some(Val.notl (Val.xorl v1 v2)) + | Onxorlimm n, v1::nil => Some(Val.notl (Val.xorl v1 (Vlong n))) + | Onotl, v1 :: nil => Some (Val.notl v1) + | Oandnl, v1 :: v2 :: nil => Some (Val.andl (Val.notl v1) v2) + | Oandnlimm n, v1 :: nil => Some (Val.andl (Val.notl v1) (Vlong n)) + | Oornl, v1 :: v2 :: nil => Some (Val.orl (Val.notl v1) v2) + | Oornlimm n, v1 :: nil => Some (Val.orl (Val.notl v1) (Vlong n)) + | Oshll, v1::v2::nil => Some (Val.shll v1 v2) + | Oshllimm n, v1::nil => Some (Val.shll v1 (Vint n)) + | Oshrl, v1::v2::nil => Some (Val.shrl v1 v2) + | Oshrlimm n, v1::nil => Some (Val.shrl v1 (Vint n)) + | Oshrlu, v1::v2::nil => Some (Val.shrlu v1 v2) + | Oshrluimm n, v1::nil => Some (Val.shrlu v1 (Vint n)) + | Oshrxlimm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n))) + | Omaddl, v1::v2::v3::nil => Some (Val.addl v1 (Val.mull v2 v3)) + | (Omaddlimm n), v1::v2::nil => Some (Val.addl v1 (Val.mull v2 (Vlong n))) + | Omsubl, v1::v2::v3::nil => Some (Val.subl v1 (Val.mull v2 v3)) + + | Onegf, v1::nil => Some (Val.negf v1) + | Oabsf, v1::nil => Some (Val.absf v1) + | Oaddf, v1::v2::nil => Some (Val.addf v1 v2) + | Osubf, v1::v2::nil => Some (Val.subf v1 v2) + | Omulf, v1::v2::nil => Some (Val.mulf v1 v2) + | Odivf, v1::v2::nil => Some (Val.divf v1 v2) + | Ominf, v1::v2::nil => Some (ExtValues.minf v1 v2) + | Omaxf, v1::v2::nil => Some (ExtValues.maxf v1 v2) + | Ofmaddf, v1::v2::v3::nil => Some (ExtValues.fmaddf v1 v2 v3) + | Ofmsubf, v1::v2::v3::nil => Some (ExtValues.fmsubf v1 v2 v3) + + | Onegfs, v1::nil => Some (Val.negfs v1) + | Oabsfs, v1::nil => Some (Val.absfs v1) + | Oaddfs, v1::v2::nil => Some (Val.addfs v1 v2) + | Osubfs, v1::v2::nil => Some (Val.subfs v1 v2) + | Omulfs, v1::v2::nil => Some (Val.mulfs v1 v2) + | Odivfs, v1::v2::nil => Some (Val.divfs v1 v2) + | Ominfs, v1::v2::nil => Some (ExtValues.minfs v1 v2) + | Omaxfs, v1::v2::nil => Some (ExtValues.maxfs v1 v2) + | Oinvfs, v1::nil => Some (ExtValues.invfs v1) + | Ofmaddfs, v1::v2::v3::nil => Some (ExtValues.fmaddfs v1 v2 v3) + | Ofmsubfs, v1::v2::v3::nil => Some (ExtValues.fmsubfs v1 v2 v3) + + | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1) + | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1) + | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1)) + | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1)) + | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1)) + | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1)) + | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1)) + | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1)) + | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1)) + | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1)) + | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1)) + | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1)) + | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1)) + | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1)) + | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1)) + | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1)) + | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m)) + | (Oextfz stop start), v0::nil => Some (extfz stop start v0) + | (Oextfs stop start), v0::nil => Some (extfs stop start v0) + | (Oextfzl stop start), v0::nil => Some (extfzl stop start v0) + | (Oextfsl stop start), v0::nil => Some (extfsl stop start v0) + | (Oinsf stop start), v0::v1::nil => Some (insf stop start v0 v1) + | (Oinsfl stop start), v0::v1::nil => Some (insfl stop start v0 v1) + | Osel c ty, v1::v2::vc::nil => Some(Val.select (eval_condition0 c vc m) v1 v2 ty) + | Oselimm c imm, v1::vc::nil => Some(Val.select (eval_condition0 c vc m) v1 (Vint imm) Tint) + | Osellimm c imm, v1::vc::nil => Some(Val.select (eval_condition0 c vc m) v1 (Vlong imm) Tlong) + | _, _ => None + end. + +Definition eval_addressing + (F V: Type) (genv: Genv.t F V) (sp: val) + (addr: addressing) (vl: list val) : option val := + match addr, vl with + | Aindexed2XS scale, v1 :: v2 :: nil => Some (Val.addl v1 (Val.shll v2 (Vint (Int.repr scale)))) + | Aindexed2, v1 :: v2 :: nil => Some (Val.addl v1 v2) + | Aindexed n, v1 :: nil => Some (Val.offset_ptr v1 n) + | Aglobal s ofs, nil => Some (Genv.symbol_address genv s ofs) + | Ainstack n, nil => Some (Val.offset_ptr sp n) + | _, _ => None + end. + +Remark eval_addressing_Ainstack: + forall (F V: Type) (genv: Genv.t F V) sp ofs, + eval_addressing genv sp (Ainstack ofs) nil = Some (Val.offset_ptr sp ofs). +Proof. + intros. reflexivity. +Qed. + +Remark eval_addressing_Ainstack_inv: + forall (F V: Type) (genv: Genv.t F V) sp ofs vl v, + eval_addressing genv sp (Ainstack ofs) vl = Some v -> vl = nil /\ v = Val.offset_ptr sp ofs. +Proof. + unfold eval_addressing; intros; destruct vl; inv H; auto. +Qed. + +Ltac FuncInv := + match goal with + | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ => + destruct x; cbn in H; FuncInv + | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ => + destruct v; cbn in H; FuncInv + | H: (if Archi.ptr64 then _ else _) = Some _ |- _ => + destruct Archi.ptr64 eqn:?; FuncInv + | H: (Some _ = Some _) |- _ => + injection H; intros; clear H; FuncInv + | H: (None = Some _) |- _ => + discriminate H + | _ => + idtac + end. + +(** * Static typing of conditions, operators and addressing modes. *) + +Definition type_of_condition (c: condition) : list typ := + match c with + | Ccomp _ => Tint :: Tint :: nil + | Ccompu _ => Tint :: Tint :: nil + | Ccompimm _ _ => Tint :: nil + | Ccompuimm _ _ => Tint :: nil + | Ccompl _ => Tlong :: Tlong :: nil + | Ccomplu _ => Tlong :: Tlong :: nil + | Ccomplimm _ _ => Tlong :: nil + | Ccompluimm _ _ => Tlong :: nil + | Ccompf _ => Tfloat :: Tfloat :: nil + | Cnotcompf _ => Tfloat :: Tfloat :: nil + | Ccompfs _ => Tsingle :: Tsingle :: nil + | Cnotcompfs _ => Tsingle :: Tsingle :: nil + end. + +Definition type_of_operation (op: operation) : list typ * typ := + match op with + | Omove => (nil, Tint) (* treated specially *) + | Ointconst _ => (nil, Tint) + | Olongconst _ => (nil, Tlong) + | Ofloatconst f => (nil, Tfloat) + | Osingleconst f => (nil, Tsingle) + | Oaddrsymbol _ _ => (nil, Tptr) + | Oaddrstack _ => (nil, Tptr) + | Ocast8signed => (Tint :: nil, Tint) + | Ocast16signed => (Tint :: nil, Tint) + | Oadd => (Tint :: Tint :: nil, Tint) + | Oaddimm _ => (Tint :: nil, Tint) + | Oaddx _ => (Tint :: Tint :: nil, Tint) + | Oaddximm _ _ => (Tint :: nil, Tint) + | Oneg => (Tint :: nil, Tint) + | Osub => (Tint :: Tint :: nil, Tint) + | Orevsubimm _ => (Tint :: nil, Tint) + | Orevsubx _ => (Tint :: Tint :: nil, Tint) + | Orevsubximm _ _ => (Tint :: nil, Tint) + | Omul => (Tint :: Tint :: nil, Tint) + | Omulimm _ => (Tint :: nil, Tint) + | Omulhs => (Tint :: Tint :: nil, Tint) + | Omulhu => (Tint :: Tint :: nil, Tint) + | Odiv => (Tint :: Tint :: nil, Tint) + | Odivu => (Tint :: Tint :: nil, Tint) + | Omod => (Tint :: Tint :: nil, Tint) + | Omodu => (Tint :: Tint :: nil, Tint) + | Oand => (Tint :: Tint :: nil, Tint) + | Oandimm _ => (Tint :: nil, Tint) + | Onand => (Tint :: Tint :: nil, Tint) + | Onandimm _ => (Tint :: nil, Tint) + | Oor => (Tint :: Tint :: nil, Tint) + | Oorimm _ => (Tint :: nil, Tint) + | Onor => (Tint :: Tint :: nil, Tint) + | Onorimm _ => (Tint :: nil, Tint) + | Oxor => (Tint :: Tint :: nil, Tint) + | Oxorimm _ => (Tint :: nil, Tint) + | Onxor => (Tint :: Tint :: nil, Tint) + | Onxorimm _ => (Tint :: nil, Tint) + | Onot => (Tint :: nil, Tint) + | Oandn => (Tint :: Tint :: nil, Tint) + | Oandnimm _ => (Tint :: nil, Tint) + | Oorn => (Tint :: Tint :: nil, Tint) + | Oornimm _ => (Tint :: nil, Tint) + | Oshl => (Tint :: Tint :: nil, Tint) + | Oshlimm _ => (Tint :: nil, Tint) + | Oshr => (Tint :: Tint :: nil, Tint) + | Oshrimm _ => (Tint :: nil, Tint) + | Oshru => (Tint :: Tint :: nil, Tint) + | Oshruimm _ => (Tint :: nil, Tint) + | Oshrximm _ => (Tint :: nil, Tint) + | Ororimm _ => (Tint :: nil, Tint) + | Omadd => (Tint :: Tint :: Tint :: nil, Tint) + | Omaddimm _ => (Tint :: Tint :: nil, Tint) + | Omsub => (Tint :: Tint :: Tint :: nil, Tint) + + | Omakelong => (Tint :: Tint :: nil, Tlong) + | Olowlong => (Tlong :: nil, Tint) + | Ohighlong => (Tlong :: nil, Tint) + | Ocast32signed => (Tint :: nil, Tlong) + | Ocast32unsigned => (Tint :: nil, Tlong) + | Oaddl => (Tlong :: Tlong :: nil, Tlong) + | Oaddlimm _ => (Tlong :: nil, Tlong) + | Oaddxl _ => (Tlong :: Tlong :: nil, Tlong) + | Oaddxlimm _ _ => (Tlong :: nil, Tlong) + | Orevsublimm _ => (Tlong :: nil, Tlong) + | Orevsubxl _ => (Tlong :: Tlong :: nil, Tlong) + | Orevsubxlimm _ _ => (Tlong :: nil, Tlong) + | Onegl => (Tlong :: nil, Tlong) + | Osubl => (Tlong :: Tlong :: nil, Tlong) + | Omull => (Tlong :: Tlong :: nil, Tlong) + | Omullimm _ => (Tlong :: nil, Tlong) + | Omullhs => (Tlong :: Tlong :: nil, Tlong) + | Omullhu => (Tlong :: Tlong :: nil, Tlong) + | Odivl => (Tlong :: Tlong :: nil, Tlong) + | Odivlu => (Tlong :: Tlong :: nil, Tlong) + | Omodl => (Tlong :: Tlong :: nil, Tlong) + | Omodlu => (Tlong :: Tlong :: nil, Tlong) + | Oandl => (Tlong :: Tlong :: nil, Tlong) + | Oandlimm _ => (Tlong :: nil, Tlong) + | Onandl => (Tlong :: Tlong :: nil, Tlong) + | Onandlimm _ => (Tlong :: nil, Tlong) + | Oorl => (Tlong :: Tlong :: nil, Tlong) + | Oorlimm _ => (Tlong :: nil, Tlong) + | Onorl => (Tlong :: Tlong :: nil, Tlong) + | Onorlimm _ => (Tlong :: nil, Tlong) + | Oxorl => (Tlong :: Tlong :: nil, Tlong) + | Oxorlimm _ => (Tlong :: nil, Tlong) + | Onxorl => (Tlong :: Tlong :: nil, Tlong) + | Onxorlimm _ => (Tlong :: nil, Tlong) + | Onotl => (Tlong :: nil, Tlong) + | Oandnl => (Tlong :: Tlong :: nil, Tlong) + | Oandnlimm _ => (Tlong :: nil, Tlong) + | Oornl => (Tlong :: Tlong :: nil, Tlong) + | Oornlimm _ => (Tlong :: nil, Tlong) + | Oshll => (Tlong :: Tint :: nil, Tlong) + | Oshllimm _ => (Tlong :: nil, Tlong) + | Oshrl => (Tlong :: Tint :: nil, Tlong) + | Oshrlimm _ => (Tlong :: nil, Tlong) + | Oshrlu => (Tlong :: Tint :: nil, Tlong) + | Oshrluimm _ => (Tlong :: nil, Tlong) + | Oshrxlimm _ => (Tlong :: nil, Tlong) + | Omaddl => (Tlong :: Tlong :: Tlong :: nil, Tlong) + | Omaddlimm _ => (Tlong :: Tlong :: nil, Tlong) + | Omsubl => (Tlong :: Tlong :: Tlong :: nil, Tlong) + + | Onegf => (Tfloat :: nil, Tfloat) + | Oabsf => (Tfloat :: nil, Tfloat) + | Oaddf + | Osubf + | Omulf + | Odivf + | Ominf + | Omaxf => (Tfloat :: Tfloat :: nil, Tfloat) + | Ofmaddf | Ofmsubf => (Tfloat :: Tfloat :: Tfloat :: nil, Tfloat) + + | Onegfs => (Tsingle :: nil, Tsingle) + | Oabsfs => (Tsingle :: nil, Tsingle) + | Oaddfs + | Osubfs + | Omulfs + | Odivfs + | Ominfs + | Omaxfs => (Tsingle :: Tsingle :: nil, Tsingle) + | Oinvfs => (Tsingle :: nil, Tsingle) + | Ofmaddfs | Ofmsubfs => (Tsingle :: Tsingle :: Tsingle :: nil, Tsingle) + + | Osingleoffloat => (Tfloat :: nil, Tsingle) + | Ofloatofsingle => (Tsingle :: nil, Tfloat) + | Ointoffloat => (Tfloat :: nil, Tint) + | Ointuoffloat => (Tfloat :: nil, Tint) + | Ointofsingle => (Tsingle :: nil, Tint) + | Ointuofsingle => (Tsingle :: nil, Tint) + | Osingleofint => (Tint :: nil, Tsingle) + | Osingleofintu => (Tint :: nil, Tsingle) + | Olongoffloat => (Tfloat :: nil, Tlong) + | Olonguoffloat => (Tfloat :: nil, Tlong) + | Ofloatoflong => (Tlong :: nil, Tfloat) + | Ofloatoflongu => (Tlong :: nil, Tfloat) + | Olongofsingle => (Tsingle :: nil, Tlong) + | Olonguofsingle => (Tsingle :: nil, Tlong) + | Osingleoflong => (Tlong :: nil, Tsingle) + | Osingleoflongu => (Tlong :: nil, Tsingle) + | Ocmp c => (type_of_condition c, Tint) + | Oextfz _ _ | Oextfs _ _ => (Tint :: nil, Tint) + | Oextfzl _ _ | Oextfsl _ _ => (Tlong :: nil, Tlong) + | Oinsf _ _ => (Tint :: Tint :: nil, Tint) + | Oinsfl _ _ => (Tlong :: Tlong :: nil, Tlong) + | Osel c ty => (ty :: ty :: arg_type_of_condition0 c :: nil, ty) + | Oselimm c ty => (Tint :: arg_type_of_condition0 c :: nil, Tint) + | Osellimm c ty => (Tlong :: arg_type_of_condition0 c :: nil, Tlong) + end. + +(* FIXME: two Tptr ?! *) +Definition type_of_addressing (addr: addressing) : list typ := + match addr with + | Aindexed2XS _ => Tptr :: Tptr :: nil + | Aindexed2 => Tptr :: Tptr :: nil + | Aindexed _ => Tptr :: nil + | Aglobal _ _ => nil + | Ainstack _ => nil + end. + +(** Weak type soundness results for [eval_operation]: + the result values, when defined, are always of the type predicted + by [type_of_operation]. *) + +Section SOUNDNESS. + +Variable A V: Type. +Variable genv: Genv.t A V. + +Remark type_add: + forall v1 v2, Val.has_type (Val.add v1 v2) Tint. +Proof. + intros. unfold Val.has_type, Val.add. destruct Archi.ptr64, v1, v2; auto. +Qed. + +Remark type_addl: + forall v1 v2, Val.has_type (Val.addl v1 v2) Tlong. +Proof. + intros. unfold Val.has_type, Val.addl. destruct Archi.ptr64, v1, v2; auto. +Qed. + +Remark type_sub: + forall v1 v2, Val.has_type (Val.sub v1 v2) Tint. +Proof. + intros. unfold Val.has_type, Val.sub. destruct Archi.ptr64, v1, v2; cbn; auto. + destruct (eq_block _ _); auto. +Qed. + +Remark type_subl: + forall v1 v2, Val.has_type (Val.subl v1 v2) Tlong. +Proof. + intros. unfold Val.has_type, Val.subl. destruct Archi.ptr64, v1, v2; cbn; auto. + destruct (eq_block _ _); auto. +Qed. + +Remark type_shl: + forall v1 v2, Val.has_type (Val.shl v1 v2) Tint. +Proof. + destruct v1, v2; cbn; trivial; destruct (Int.ltu _ _); cbn; trivial. +Qed. + +Remark type_shll: + forall v1 v2, Val.has_type (Val.shll v1 v2) Tlong. +Proof. + destruct v1, v2; cbn; trivial; destruct (Int.ltu _ _); cbn; trivial. +Qed. + +Lemma type_of_operation_sound: + forall op vl sp v m, + op <> Omove -> + eval_operation genv sp op vl m = Some v -> + Val.has_type v (snd (type_of_operation op)). +Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). + intros. + destruct op; cbn; cbn in H0; FuncInv; subst; cbn. + (* move *) + - congruence. + (* intconst, longconst, floatconst, singleconst *) + - exact I. + - exact I. + - exact I. + - exact I. + (* addrsymbol *) + - unfold Genv.symbol_address. destruct (Genv.find_symbol genv id)... + (* addrstack *) + - destruct sp... + (* castsigned *) + - destruct v0... + - destruct v0... + (* add, addimm *) + - apply type_add. + - apply type_add. + (* addx, addximm *) + - apply type_add. + - destruct v0; cbn; trivial. + destruct (Int.ltu _ _); cbn; trivial. + (* neg, sub *) + - destruct v0... + - apply type_sub. + (* revsubimm, revsubx, revsubximm *) + - destruct v0... + - apply type_sub. + - destruct v0; cbn; trivial. + destruct (Int.ltu _ _); cbn; trivial. + (* mul, mulimm, mulhs, mulhu *) + - destruct v0; destruct v1... + - destruct v0... + - destruct v0; destruct v1... + - destruct v0; destruct v1... + (* div, divu *) + - destruct v0; destruct v1; cbn in *; inv H0. + destruct (_ || _); inv H2... + - destruct v0; destruct v1; cbn in *; inv H0. + destruct (Int.eq i0 Int.zero); inv H2... + (* mod, modu *) + - destruct v0; destruct v1; cbn in *; inv H0. + destruct (_ || _); inv H2... + - destruct v0; destruct v1; cbn in *; inv H0. + destruct (Int.eq i0 Int.zero); inv H2... + (* and, andimm *) + - destruct v0; destruct v1... + - destruct v0... + (* nand, nandimm *) + - destruct v0; destruct v1... + - destruct v0... + (* or, orimm *) + - destruct v0; destruct v1... + - destruct v0... + (* nor, norimm *) + - destruct v0; destruct v1... + - destruct v0... + (* xor, xorimm *) + - destruct v0; destruct v1... + - destruct v0... + (* nxor, nxorimm *) + - destruct v0; destruct v1... + - destruct v0... + (* not *) + - destruct v0... + (* andn, andnimm *) + - destruct v0; destruct v1... + - destruct v0... + (* orn, ornimm *) + - destruct v0; destruct v1... + - destruct v0... + (* shl, shlimm *) + - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int.iwordsize)... + - destruct v0; cbn... destruct (Int.ltu n Int.iwordsize)... + (* shr, shrimm *) + - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int.iwordsize)... + - destruct v0; cbn... destruct (Int.ltu n Int.iwordsize)... + (* shru, shruimm *) + - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int.iwordsize)... + - destruct v0; cbn... destruct (Int.ltu n Int.iwordsize)... + (* shrx *) + - destruct v0; cbn... destruct (Int.ltu n (Int.repr 31)); cbn; trivial. + (* shrimm *) + - destruct v0; cbn... + (* madd *) + - apply type_add. + - apply type_add. + (* msub *) + - apply type_sub. + (* makelong, lowlong, highlong *) + - destruct v0; destruct v1... + - destruct v0... + - destruct v0... + (* cast32 *) + - destruct v0... + - destruct v0... + (* addl, addlimm *) + - apply type_addl. + - apply type_addl. + (* addxl addxlimm *) + - apply type_addl. + - destruct v0; cbn; trivial. + destruct (Int.ltu _ _); cbn; trivial. + (* negl, subl *) + - destruct v0... + - apply type_subl. + - destruct v0; cbn; trivial. + destruct (Int.ltu _ _); cbn; trivial. + - destruct v0... + - apply type_subl. + (* mull, mullhs, mullhu *) + - destruct v0; destruct v1... + - destruct v0... + - destruct v0; destruct v1... + - destruct v0; destruct v1... + (* divl, divlu *) + - destruct v0; destruct v1; cbn in *; inv H0. + destruct (_ || _); inv H2... + - destruct v0; destruct v1; cbn in *; inv H0. + destruct (Int64.eq i0 Int64.zero); inv H2... + (* modl, modlu *) + - destruct v0; destruct v1; cbn in *; inv H0. + destruct (_ || _); inv H2... + - destruct v0; destruct v1; cbn in *; inv H0. + destruct (Int64.eq i0 Int64.zero); inv H2... + (* andl, andlimm *) + - destruct v0; destruct v1... + - destruct v0... + (* nandl, nandlimm *) + - destruct v0; destruct v1... + - destruct v0... + (* orl, orlimm *) + - destruct v0; destruct v1... + - destruct v0... + (* norl, norlimm *) + - destruct v0; destruct v1... + - destruct v0... + (* xorl, xorlimm *) + - destruct v0; destruct v1... + - destruct v0... + (* nxorl, nxorlimm *) + - destruct v0; destruct v1... + - destruct v0... + (* notl *) + - destruct v0... + (* andnl, andnlimm *) + - destruct v0; destruct v1... + - destruct v0... + (* ornl, ornlimm *) + - destruct v0; destruct v1... + - destruct v0... + (* shll, shllimm *) + - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int64.iwordsize')... + - destruct v0; cbn... destruct (Int.ltu n Int64.iwordsize')... + (* shr, shrimm *) + - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int64.iwordsize')... + - destruct v0; cbn... destruct (Int.ltu n Int64.iwordsize')... + (* shru, shruimm *) + - destruct v0; destruct v1; cbn... destruct (Int.ltu i0 Int64.iwordsize')... + - destruct v0; cbn... destruct (Int.ltu n Int64.iwordsize')... + (* shrxl *) + - destruct v0; cbn... destruct (Int.ltu n (Int.repr 63)); cbn; trivial. + (* maddl, maddlim *) + - apply type_addl. + - apply type_addl. + (* msubl *) + - apply type_subl. + (* negf, absf *) + - destruct v0... + - destruct v0... + (* addf, subf *) + - destruct v0; destruct v1... + - destruct v0; destruct v1... + (* mulf, divf *) + - destruct v0; destruct v1... + - destruct v0; destruct v1... + (* minf, maxf *) + - destruct v0; destruct v1... + - destruct v0; destruct v1... + (* fmaddf, fmsubf *) + - destruct v0; destruct v1; destruct v2... + - destruct v0; destruct v1; destruct v2... + (* negfs, absfs *) + - destruct v0... + - destruct v0... + (* addfs, subfs *) + - destruct v0; destruct v1... + - destruct v0; destruct v1... + (* mulfs, divfs *) + - destruct v0; destruct v1... + - destruct v0; destruct v1... + (* minfs, maxfs *) + - destruct v0; destruct v1... + - destruct v0; destruct v1... + (* invfs *) + - destruct v0... + (* fmaddfs, fmsubfs *) + - destruct v0; destruct v1; destruct v2... + - destruct v0; destruct v1; destruct v2... + (* singleoffloat, floatofsingle *) + - destruct v0... + - destruct v0... + (* intoffloat, intuoffloat *) + - destruct v0; cbn... destruct (Float.to_int f); cbn; trivial. + - destruct v0; cbn... destruct (Float.to_intu f); cbn; trivial. + (* intofsingle, intuofsingle *) + - destruct v0; cbn... destruct (Float32.to_int f); cbn; trivial. + - destruct v0; cbn... destruct (Float32.to_intu f); cbn; trivial. + (* singleofint, singleofintu *) + - destruct v0; cbn... + - destruct v0; cbn... + (* longoffloat, longuoffloat *) + - destruct v0; cbn... destruct (Float.to_long f); cbn; trivial. + - destruct v0; cbn... destruct (Float.to_longu f); cbn; trivial. + (* floatoflong, floatoflongu *) + - destruct v0; cbn... + - destruct v0; cbn... + (* longofsingle, longuofsingle *) + - destruct v0; cbn... destruct (Float32.to_long f); cbn; trivial. + - destruct v0; cbn... destruct (Float32.to_longu f); cbn; trivial. + (* singleoflong, singleoflongu *) + - destruct v0; cbn... + - destruct v0; cbn... + (* cmp *) + - destruct (eval_condition cond vl m)... destruct b... + (* extfz *) + - unfold extfz. + destruct (is_bitfield _ _). + + destruct v0; cbn; trivial. + + constructor. + (* extfs *) + - unfold extfs. + destruct (is_bitfield _ _). + + destruct v0; cbn; trivial. + + constructor. + (* extfzl *) + - unfold extfzl. + destruct (is_bitfieldl _ _). + + destruct v0; cbn; trivial. + + constructor. + (* extfsl *) + - unfold extfsl. + destruct (is_bitfieldl _ _). + + destruct v0; cbn; trivial. + + constructor. + (* insf *) + - unfold insf, bitfield_mask. + destruct (is_bitfield _ _). + + destruct v0; destruct v1; cbn; trivial. + destruct (Int.ltu _ _); cbn; trivial. + + constructor. + (* insf *) + - unfold insfl, bitfield_mask. + destruct (is_bitfieldl _ _). + + destruct v0; destruct v1; cbn; trivial. + destruct (Int.ltu _ _); cbn; trivial. + + constructor. + (* Osel *) + - unfold Val.select. destruct (eval_condition0 _ _ m). + + apply Val.normalize_type. + + constructor. + (* Oselimm *) + - unfold Val.select. destruct (eval_condition0 _ _ m). + + apply Val.normalize_type. + + constructor. + (* Osellimm *) + - unfold Val.select. destruct (eval_condition0 _ _ m). + + apply Val.normalize_type. + + constructor. +Qed. + +Definition is_trapping_op (op : operation) := + match op with + | Odiv | Odivl | Odivu | Odivlu + | Omod | Omodl | Omodu | Omodlu => true + | _ => false + end. + +Definition args_of_operation op := + if eq_operation op Omove + then 1%nat + else List.length (fst (type_of_operation op)). + +Lemma is_trapping_op_sound: + forall op vl sp m, + is_trapping_op op = false -> + (List.length vl) = args_of_operation op -> + eval_operation genv sp op vl m <> None. +Proof. + unfold args_of_operation. + destruct op; destruct eq_operation; intros; cbn in *; try congruence. + all: try (destruct vl as [ | vh1 vl1]; try discriminate). + all: try (destruct vl1 as [ | vh2 vl2]; try discriminate). + all: try (destruct vl2 as [ | vh3 vl3]; try discriminate). + all: try (destruct vl3 as [ | vh4 vl4]; try discriminate). +Qed. +End SOUNDNESS. + +(** * Manipulating and transforming operations *) + +(** Recognition of move operations. *) + +Definition is_move_operation + (A: Type) (op: operation) (args: list A) : option A := + match op, args with + | Omove, arg :: nil => Some arg + | _, _ => None + end. + +Lemma is_move_operation_correct: + forall (A: Type) (op: operation) (args: list A) (a: A), + is_move_operation op args = Some a -> + op = Omove /\ args = a :: nil. +Proof. + intros until a. unfold is_move_operation; destruct op; + try (intros; discriminate). + destruct args. intros; discriminate. + destruct args. intros. intuition congruence. + intros; discriminate. +Qed. + +(** [negate_condition cond] returns a condition that is logically + equivalent to the negation of [cond]. *) + +Definition negate_condition (cond: condition): condition := + match cond with + | Ccomp c => Ccomp(negate_comparison c) + | Ccompu c => Ccompu(negate_comparison c) + | Ccompimm c n => Ccompimm (negate_comparison c) n + | Ccompuimm c n => Ccompuimm (negate_comparison c) n + | Ccompl c => Ccompl(negate_comparison c) + | Ccomplu c => Ccomplu(negate_comparison c) + | Ccomplimm c n => Ccomplimm (negate_comparison c) n + | Ccompluimm c n => Ccompluimm (negate_comparison c) n + | Ccompf c => Cnotcompf c + | Cnotcompf c => Ccompf c + | Ccompfs c => Cnotcompfs c + | Cnotcompfs c => Ccompfs c + end. + +Lemma eval_negate_condition: + forall cond vl m, + eval_condition (negate_condition cond) vl m = option_map negb (eval_condition cond vl m). +Proof. + intros. destruct cond; cbn. + repeat (destruct vl; auto). apply Val.negate_cmp_bool. + repeat (destruct vl; auto). apply Val.negate_cmpu_bool. + repeat (destruct vl; auto). apply Val.negate_cmp_bool. + repeat (destruct vl; auto). apply Val.negate_cmpu_bool. + repeat (destruct vl; auto). apply Val.negate_cmpl_bool. + repeat (destruct vl; auto). apply Val.negate_cmplu_bool. + repeat (destruct vl; auto). apply Val.negate_cmpl_bool. + repeat (destruct vl; auto). apply Val.negate_cmplu_bool. + repeat (destruct vl; auto). + repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0) as [[]|]; auto. + repeat (destruct vl; auto). + repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v v0) as [[]|]; auto. +Qed. + +(** Shifting stack-relative references. This is used in [Stacking]. *) + +Definition shift_stack_addressing (delta: Z) (addr: addressing) := + match addr with + | Ainstack ofs => Ainstack (Ptrofs.add ofs (Ptrofs.repr delta)) + | _ => addr + end. + +Definition shift_stack_operation (delta: Z) (op: operation) := + match op with + | Oaddrstack ofs => Oaddrstack (Ptrofs.add ofs (Ptrofs.repr delta)) + | _ => op + end. + +Lemma type_shift_stack_addressing: + forall delta addr, type_of_addressing (shift_stack_addressing delta addr) = type_of_addressing addr. +Proof. + intros. destruct addr; auto. +Qed. + +Lemma type_shift_stack_operation: + forall delta op, type_of_operation (shift_stack_operation delta op) = type_of_operation op. +Proof. + intros. destruct op; auto. +Qed. + +Lemma eval_shift_stack_addressing: + forall F V (ge: Genv.t F V) sp addr vl delta, + eval_addressing ge (Vptr sp Ptrofs.zero) (shift_stack_addressing delta addr) vl = + eval_addressing ge (Vptr sp (Ptrofs.repr delta)) addr vl. +Proof. + intros. destruct addr; cbn; auto. destruct vl; auto. + rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto. +Qed. + +Lemma eval_shift_stack_operation: + forall F V (ge: Genv.t F V) sp op vl m delta, + eval_operation ge (Vptr sp Ptrofs.zero) (shift_stack_operation delta op) vl m = + eval_operation ge (Vptr sp (Ptrofs.repr delta)) op vl m. +Proof. + intros. destruct op; cbn; auto. destruct vl; auto. + rewrite Ptrofs.add_zero_l, Ptrofs.add_commut; auto. +Qed. + +(** Offset an addressing mode [addr] by a quantity [delta], so that + it designates the pointer [delta] bytes past the pointer designated + by [addr]. May be undefined, in which case [None] is returned. *) + +Definition offset_addressing (addr: addressing) (delta: Z) : option addressing := + match addr with + | Aindexed2 | Aindexed2XS _ => None + | Aindexed n => Some(Aindexed (Ptrofs.add n (Ptrofs.repr delta))) + | Aglobal id n => Some(Aglobal id (Ptrofs.add n (Ptrofs.repr delta))) + | Ainstack n => Some(Ainstack (Ptrofs.add n (Ptrofs.repr delta))) + end. + +Lemma eval_offset_addressing: + forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v, + offset_addressing addr delta = Some addr' -> + eval_addressing ge sp addr args = Some v -> + Archi.ptr64 = false -> + eval_addressing ge sp addr' args = Some(Val.add v (Vint (Int.repr delta))). +Proof. + intros. + assert (A: forall x n, + Val.offset_ptr x (Ptrofs.add n (Ptrofs.repr delta)) = + Val.add (Val.offset_ptr x n) (Vint (Int.repr delta))). + { intros; destruct x; cbn; auto. rewrite H1. + rewrite Ptrofs.add_assoc. f_equal; f_equal; f_equal. symmetry; auto with ptrofs. } + destruct addr; cbn in H; inv H; cbn in *; FuncInv; subst. +- rewrite A; auto. +- unfold Genv.symbol_address. destruct (Genv.find_symbol ge i); auto. + cbn. rewrite H1. f_equal; f_equal; f_equal. symmetry; auto with ptrofs. +- rewrite A; auto. +Qed. + +(** Operations that are so cheap to recompute that CSE should not factor them out. *) + +Definition is_trivial_op (op: operation) : bool := + match op with + | Omove => true + | Ointconst n => Int.eq (Int.sign_ext 12 n) n + | Olongconst n => Int64.eq (Int64.sign_ext 12 n) n + | Oaddrstack _ => true + | _ => false + end. + +(** Operations that depend on the memory state. *) + +Definition cond_depends_on_memory (c: condition) : bool := + match c with + | Ccompu _ | Ccompuimm _ _ => negb Archi.ptr64 + | Ccomplu _ | Ccompluimm _ _ => Archi.ptr64 + | _ => false + end. + +Lemma cond_depends_on_memory_correct: + forall c args m1 m2, + cond_depends_on_memory c = false -> + eval_condition c args m1 = eval_condition c args m2. +Proof. + intros; destruct c; cbn; discriminate || reflexivity. +Qed. + +Definition op_depends_on_memory (op: operation) : bool := + match op with + | Ocmp (Ccompu _ | Ccompuimm _ _) => negb Archi.ptr64 + | Ocmp (Ccomplu _ | Ccompluimm _ _) => Archi.ptr64 + + | Osel (Ccompu0 _) _ | Oselimm (Ccompu0 _) _ | Osellimm (Ccompu0 _) _ => negb Archi.ptr64 + | Osel (Ccomplu0 _) _ | Oselimm (Ccomplu0 _) _ | Osellimm (Ccomplu0 _) _ => Archi.ptr64 + + | _ => false + end. + +Lemma op_depends_on_memory_correct: + forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2, + op_depends_on_memory op = false -> + eval_operation ge sp op args m1 = eval_operation ge sp op args m2. +Proof. + intros until m2. destruct op; cbn; try congruence. + - destruct cond; cbn; try congruence; + intros SF; auto; rewrite ? negb_false_iff in SF; + unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity. + - destruct c0; cbn; try congruence; + intros SF; auto; rewrite ? negb_false_iff in SF; + unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity. + - destruct c0; cbn; try congruence; + intros SF; auto; rewrite ? negb_false_iff in SF; + unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity. + - destruct c0; cbn; try congruence; + intros SF; auto; rewrite ? negb_false_iff in SF; + unfold Val.cmpu_bool, Val.cmplu_bool; rewrite SF; reflexivity. +Qed. + +Lemma cond_valid_pointer_eq: + forall cond args m1 m2, + (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) -> + eval_condition cond args m1 = eval_condition cond args m2. +Proof. + intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence. + all: repeat (destruct args; simpl; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. +Qed. + +Lemma op_valid_pointer_eq: + forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2, + (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) -> + eval_operation ge sp op args m1 = eval_operation ge sp op args m2. +Proof. + intros until m2. destruct op; cbn; try congruence. + - intros MEM; destruct cond; cbn; try congruence; + repeat (destruct args; cbn; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. + - intros MEM; destruct c0; cbn; try congruence; + repeat (destruct args; cbn; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. + - intros MEM; destruct c0; cbn; try congruence; + repeat (destruct args; cbn; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. + - intros MEM; destruct c0; cbn; try congruence; + repeat (destruct args; cbn; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. +Qed. + +(** Global variables mentioned in an operation or addressing mode *) + +Definition globals_addressing (addr: addressing) : list ident := + match addr with + | Aglobal s ofs => s :: nil + | _ => nil + end. + +Definition globals_operation (op: operation) : list ident := + match op with + | Oaddrsymbol s ofs => s :: nil + | _ => nil + end. + +(** * Invariance and compatibility properties. *) + +(** [eval_operation] and [eval_addressing] depend on a global environment + for resolving references to global symbols. We show that they give + the same results if a global environment is replaced by another that + assigns the same addresses to the same symbols. *) + +Section GENV_TRANSF. + +Variable F1 F2 V1 V2: Type. +Variable ge1: Genv.t F1 V1. +Variable ge2: Genv.t F2 V2. +Hypothesis agree_on_symbols: + forall (s: ident), Genv.find_symbol ge2 s = Genv.find_symbol ge1 s. + +Lemma eval_addressing_preserved: + forall sp addr vl, + eval_addressing ge2 sp addr vl = eval_addressing ge1 sp addr vl. +Proof. + intros. + unfold eval_addressing; destruct addr; auto. destruct vl; auto. + unfold Genv.symbol_address. rewrite agree_on_symbols; auto. +Qed. + +Lemma eval_operation_preserved: + forall sp op vl m, + eval_operation ge2 sp op vl m = eval_operation ge1 sp op vl m. +Proof. + intros. + unfold eval_operation; destruct op; auto. destruct vl; auto. + unfold Genv.symbol_address. rewrite agree_on_symbols; auto. +Qed. + +End GENV_TRANSF. + +(** Compatibility of the evaluation functions with value injections. *) + +Section EVAL_COMPAT. + +Variable F1 F2 V1 V2: Type. +Variable ge1: Genv.t F1 V1. +Variable ge2: Genv.t F2 V2. +Variable f: meminj. + +Variable m1: mem. +Variable m2: mem. + +Hypothesis valid_pointer_inj: + forall b1 ofs b2 delta, + f b1 = Some(b2, delta) -> + Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true -> + Mem.valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true. + +Hypothesis weak_valid_pointer_inj: + forall b1 ofs b2 delta, + f b1 = Some(b2, delta) -> + Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true -> + Mem.weak_valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true. + +Hypothesis weak_valid_pointer_no_overflow: + forall b1 ofs b2 delta, + f b1 = Some(b2, delta) -> + Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true -> + 0 <= Ptrofs.unsigned ofs + Ptrofs.unsigned (Ptrofs.repr delta) <= Ptrofs.max_unsigned. + +Hypothesis valid_different_pointers_inj: + forall b1 ofs1 b2 ofs2 b1' delta1 b2' delta2, + b1 <> b2 -> + Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs1) = true -> + Mem.valid_pointer m1 b2 (Ptrofs.unsigned ofs2) = true -> + f b1 = Some (b1', delta1) -> + f b2 = Some (b2', delta2) -> + b1' <> b2' \/ + Ptrofs.unsigned (Ptrofs.add ofs1 (Ptrofs.repr delta1)) <> Ptrofs.unsigned (Ptrofs.add ofs2 (Ptrofs.repr delta2)). + +Ltac InvInject := + match goal with + | [ H: Val.inject _ (Vint _) _ |- _ ] => + inv H; InvInject + | [ H: Val.inject _ (Vfloat _) _ |- _ ] => + inv H; InvInject + | [ H: Val.inject _ (Vptr _ _) _ |- _ ] => + inv H; InvInject + | [ H: Val.inject_list _ nil _ |- _ ] => + inv H; InvInject + | [ H: Val.inject_list _ (_ :: _) _ |- _ ] => + inv H; InvInject + | _ => idtac + end. + +Lemma eval_condition_inj: + forall cond vl1 vl2 b, + Val.inject_list f vl1 vl2 -> + eval_condition cond vl1 m1 = Some b -> + eval_condition cond vl2 m2 = Some b. +Proof. + intros. destruct cond; cbn in H0; FuncInv; InvInject; cbn; auto. +- inv H3; inv H2; cbn in H0; inv H0; auto. +- eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies. +- inv H3; cbn in H0; inv H0; auto. +- eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies. +- inv H3; inv H2; cbn in H0; inv H0; auto. +- eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies. +- inv H3; cbn in H0; inv H0; auto. +- eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies. +- inv H3; inv H2; cbn in H0; inv H0; auto. +- inv H3; inv H2; cbn in H0; inv H0; auto. +- inv H3; inv H2; cbn in H0; inv H0; auto. +- inv H3; inv H2; cbn in H0; inv H0; auto. +Qed. + +Lemma eval_condition0_inj: + forall cond v1 v2 b, + Val.inject f v1 v2 -> + eval_condition0 cond v1 m1 = Some b -> + eval_condition0 cond v2 m2 = Some b. +Proof. + intros. destruct cond; cbn in H0; FuncInv; InvInject; cbn; auto. + - inv H; cbn in *; congruence. + - eauto 3 using Val.cmpu_bool_inject, Mem.valid_pointer_implies. + - inv H; cbn in *; congruence. + - eauto 3 using Val.cmplu_bool_inject, Mem.valid_pointer_implies. +Qed. + +Ltac TrivialExists := + match goal with + | [ |- exists v2, Some ?v1 = Some v2 /\ Val.inject _ _ v2 ] => + exists v1; split; auto + | _ => idtac + end. + +Lemma eval_operation_inj: + forall op sp1 vl1 sp2 vl2 v1, + (forall id ofs, + In id (globals_operation op) -> + Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) -> + Val.inject f sp1 sp2 -> + Val.inject_list f vl1 vl2 -> + eval_operation ge1 sp1 op vl1 m1 = Some v1 -> + exists v2, eval_operation ge2 sp2 op vl2 m2 = Some v2 /\ Val.inject f v1 v2. +Proof. + intros until v1; intros GL; intros. destruct op; cbn in H1; cbn; FuncInv; InvInject; TrivialExists. + (* addrsymbol *) + - apply GL; cbn; auto. + (* addrstack *) + - apply Val.offset_ptr_inject; auto. + (* castsigned *) + - inv H4; cbn; auto. + - inv H4; cbn; auto. + (* add, addimm *) + - apply Val.add_inject; auto. + - apply Val.add_inject; auto. + (* addx, addximm *) + - apply Val.add_inject; trivial. + inv H4; inv H2; cbn; try destruct (Int.ltu _ _); cbn; auto. + - inv H4; cbn; trivial. + destruct (Int.ltu _ _); cbn; trivial. + (* neg, sub *) + - inv H4; cbn; auto. + - apply Val.sub_inject; auto. + (* revsubimm, revsubx, revsubximm *) + - inv H4; cbn; trivial. + - apply Val.sub_inject; trivial. + inv H4; inv H2; cbn; try destruct (Int.ltu _ _); cbn; auto. + - inv H4; cbn; try destruct (Int.ltu _ _); cbn; auto. + (* mul, mulimm, mulhs, mulhu *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + - inv H4; inv H2; cbn; auto. + - inv H4; inv H2; cbn; auto. + (* div, divu *) + - inv H4; inv H3; cbn in H1; inv H1. cbn. + destruct (_ || _); inv H2. + TrivialExists. + - inv H4; inv H3; cbn in H1; inv H1. cbn. + destruct (Int.eq i0 Int.zero); inv H2. TrivialExists. + (* mod, modu *) + - inv H4; inv H3; cbn in H1; inv H1. cbn. + destruct (_ || _); inv H2. + TrivialExists. + - inv H4; inv H3; cbn in H1; inv H1. cbn. + destruct (Int.eq i0 Int.zero); inv H2. TrivialExists. + (* and, andimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* nand, nandimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* or, orimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* nor, norimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* xor, xorimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* nxor, nxorimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* not *) + - inv H4; cbn; auto. + (* andn, andnimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* orn, ornimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* shl, shlimm *) + - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int.iwordsize); auto. + - inv H4; cbn; auto. destruct (Int.ltu n Int.iwordsize); auto. + (* shr, shrimm *) + - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int.iwordsize); auto. + - inv H4; cbn; auto. destruct (Int.ltu n Int.iwordsize); auto. + (* shru, shruimm *) + - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int.iwordsize); auto. + - inv H4; cbn; auto. destruct (Int.ltu n Int.iwordsize); auto. + (* shrx *) + - inv H4; cbn; auto. + destruct (Int.ltu n (Int.repr 31)); inv H; cbn; auto. + (* rorimm *) + - inv H4; cbn; auto. + (* madd, maddim *) + - inv H2; inv H3; inv H4; cbn; auto. + - inv H2; inv H4; cbn; auto. + (* msub *) + - apply Val.sub_inject; auto. + inv H3; inv H2; cbn; auto. + (* makelong, highlong, lowlong *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + - inv H4; cbn; auto. + (* cast32 *) + - inv H4; cbn; auto. + - inv H4; cbn; auto. + (* addl, addlimm *) + - apply Val.addl_inject; auto. + - apply Val.addl_inject; auto. + (* addxl, addxlimm *) + - apply Val.addl_inject; auto. + inv H4; cbn; trivial. + destruct (Int.ltu _ _); cbn; trivial. + - inv H4; cbn; trivial. + destruct (Int.ltu _ _); cbn; trivial. + (* negl, subl *) + - inv H4; cbn; auto. + - apply Val.subl_inject; auto. + inv H4; inv H2; cbn; trivial; + destruct (Int.ltu _ _); cbn; trivial. + - inv H4; cbn; trivial; + destruct (Int.ltu _ _); cbn; trivial. + - inv H4; cbn; auto. + - apply Val.subl_inject; auto. + (* mull, mullhs, mullhu *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + - inv H4; inv H2; cbn; auto. + - inv H4; inv H2; cbn; auto. + (* divl, divlu *) + - inv H4; inv H3; cbn in H1; inv H1. cbn. + destruct (_ || _); inv H2. + TrivialExists. + - inv H4; inv H3; cbn in H1; inv H1. cbn. + destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists. + (* modl, modlu *) + - inv H4; inv H3; cbn in H1; inv H1. cbn. + destruct (_ || _); inv H2. + TrivialExists. + - inv H4; inv H3; cbn in H1; inv H1. cbn. + destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists. + (* andl, andlimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* nandl, nandlimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* orl, orlimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* norl, norlimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* xorl, xorlimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* nxorl, nxorlimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* notl *) + - inv H4; cbn; auto. + (* andnl, andnlimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* ornl, ornlimm *) + - inv H4; inv H2; cbn; auto. + - inv H4; cbn; auto. + (* shll, shllimm *) + - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto. + - inv H4; cbn; auto. destruct (Int.ltu n Int64.iwordsize'); auto. + (* shr, shrimm *) + - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto. + - inv H4; cbn; auto. destruct (Int.ltu n Int64.iwordsize'); auto. + (* shru, shruimm *) + - inv H4; inv H2; cbn; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto. + - inv H4; cbn; auto. destruct (Int.ltu n Int64.iwordsize'); auto. + (* shrx *) + - inv H4; cbn; auto. + destruct (Int.ltu n (Int.repr 63)); cbn; auto. + + (* maddl, maddlimm *) + - apply Val.addl_inject; auto. + inv H2; inv H3; inv H4; cbn; auto. + - apply Val.addl_inject; auto. + inv H4; inv H2; cbn; auto. + (* msubl, msublimm *) + - apply Val.subl_inject; auto. + inv H2; inv H3; inv H4; cbn; auto. + + (* negf, absf *) + - inv H4; cbn; auto. + - inv H4; cbn; auto. + (* addf, subf *) + - inv H4; inv H2; cbn; auto. + - inv H4; inv H2; cbn; auto. + (* mulf, divf *) + - inv H4; inv H2; cbn; auto. + - inv H4; inv H2; cbn; auto. + (* minf, maxf *) + - inv H4; inv H2; cbn; auto. + - inv H4; inv H2; cbn; auto. + (* fmaddf, fmsubf *) + - inv H4; inv H3; inv H2; cbn; auto. + - inv H4; inv H3; inv H2; cbn; auto. + (* negfs, absfs *) + - inv H4; cbn; auto. + - inv H4; cbn; auto. + (* addfs, subfs *) + - inv H4; inv H2; cbn; auto. + - inv H4; inv H2; cbn; auto. + (* mulfs, divfs *) + - inv H4; inv H2; cbn; auto. + - inv H4; inv H2; cbn; auto. + (* minfs, maxfs *) + - inv H4; inv H2; cbn; auto. + - inv H4; inv H2; cbn; auto. + (* invfs *) + - inv H4; cbn; auto. + (* fmaddfs, fmsubfs *) + - inv H4; inv H3; inv H2; cbn; auto. + - inv H4; inv H3; inv H2; cbn; auto. + (* singleoffloat, floatofsingle *) + - inv H4; cbn; auto. + - inv H4; cbn; auto. + (* intoffloat, intuoffloat *) + - inv H4; cbn; auto. destruct (Float.to_int f0); cbn; auto. + - inv H4; cbn; auto. destruct (Float.to_intu f0); cbn; auto. + (* intofsingle, intuofsingle *) + - inv H4; cbn; auto. destruct (Float32.to_int f0); cbn; auto. + - inv H4; cbn; auto. destruct (Float32.to_intu f0); cbn; auto. + (* singleofint, singleofintu *) + - inv H4; cbn; auto. + - inv H4; cbn; auto. + (* longoffloat, longuoffloat *) + - inv H4; cbn; auto. destruct (Float.to_long f0); cbn; auto. + - inv H4; cbn; auto. destruct (Float.to_longu f0); cbn; auto. + (* floatoflong, floatoflongu *) + - inv H4; cbn; auto. + - inv H4; cbn; auto. + (* longofsingle, longuofsingle *) + - inv H4; cbn; auto. destruct (Float32.to_long f0); cbn; auto. + - inv H4; cbn; auto. destruct (Float32.to_longu f0); cbn; auto. + (* singleoflong, singleoflongu *) + - inv H4; cbn; auto. + - inv H4; cbn; auto. + (* cmp *) + - subst v1. destruct (eval_condition cond vl1 m1) eqn:?. + exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. + destruct b; cbn; constructor. + cbn; constructor. + + (* extfz *) + - unfold extfz. + destruct (is_bitfield _ _). + + inv H4; trivial. + + trivial. + + (* extfs *) + - unfold extfs. + destruct (is_bitfield _ _). + + inv H4; trivial. + + trivial. + + (* extfzl *) + - unfold extfzl. + destruct (is_bitfieldl _ _). + + inv H4; trivial. + + trivial. + + (* extfsl *) + - unfold extfsl. + destruct (is_bitfieldl _ _). + + inv H4; trivial. + + trivial. + + (* insf *) + - unfold insf. + destruct (is_bitfield _ _). + + inv H4; inv H2; trivial. + cbn. destruct (Int.ltu _ _); trivial. + cbn. trivial. + + trivial. + + (* insfl *) + - unfold insfl. + destruct (is_bitfieldl _ _). + + inv H4; inv H2; trivial. + cbn. destruct (Int.ltu _ _); trivial. + cbn. trivial. + + trivial. + + (* Osel *) + - apply Val.select_inject; trivial. + destruct (eval_condition0 c0 v2 m1) eqn:Hcond. + + right. + symmetry. + eapply eval_condition0_inj; eassumption. + + left. trivial. + + (* Oselimm *) + - apply Val.select_inject; trivial. + destruct (eval_condition0 _ _ _) eqn:Hcond. + + right. + symmetry. + eapply eval_condition0_inj; eassumption. + + left. trivial. + + (* Osellimm *) + - apply Val.select_inject; trivial. + destruct (eval_condition0 _ _ _) eqn:Hcond. + + right. + symmetry. + eapply eval_condition0_inj; eassumption. + + left. trivial. +Qed. + +Lemma eval_addressing_inj: + forall addr sp1 vl1 sp2 vl2 v1, + (forall id ofs, + In id (globals_addressing addr) -> + Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) -> + Val.inject f sp1 sp2 -> + Val.inject_list f vl1 vl2 -> + eval_addressing ge1 sp1 addr vl1 = Some v1 -> + exists v2, eval_addressing ge2 sp2 addr vl2 = Some v2 /\ Val.inject f v1 v2. +Proof. + intros. destruct addr; cbn in H2; cbn; FuncInv; InvInject; TrivialExists. + - apply Val.addl_inject; trivial. + destruct v0; destruct v'0; cbn; trivial; destruct (Int.ltu _ _); cbn; trivial; inv H3. + apply Val.inject_long. + - apply Val.addl_inject; auto. + - apply Val.offset_ptr_inject; auto. + - apply H; cbn; auto. + - apply Val.offset_ptr_inject; auto. +Qed. + +Lemma eval_addressing_inj_none: + forall addr sp1 vl1 sp2 vl2, + (forall id ofs, + In id (globals_addressing addr) -> + Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) -> + Val.inject f sp1 sp2 -> + Val.inject_list f vl1 vl2 -> + eval_addressing ge1 sp1 addr vl1 = None -> + eval_addressing ge2 sp2 addr vl2 = None. +Proof. + intros until vl2. intros Hglobal Hinjsp Hinjvl. + destruct addr; cbn in *. + 1,2: inv Hinjvl; trivial; + inv H0; trivial; + inv H2; trivial; + discriminate. + 2,3: inv Hinjvl; trivial; discriminate. + inv Hinjvl; trivial; inv H0; trivial; + inv H; trivial; discriminate. +Qed. + +End EVAL_COMPAT. + +(** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *) + +Section EVAL_LESSDEF. + +Variable F V: Type. +Variable genv: Genv.t F V. + +Remark valid_pointer_extends: + forall m1 m2, Mem.extends m1 m2 -> + forall b1 ofs b2 delta, + Some(b1, 0) = Some(b2, delta) -> + Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true -> + Mem.valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true. +Proof. + intros. inv H0. rewrite Ptrofs.add_zero. eapply Mem.valid_pointer_extends; eauto. +Qed. + +Remark weak_valid_pointer_extends: + forall m1 m2, Mem.extends m1 m2 -> + forall b1 ofs b2 delta, + Some(b1, 0) = Some(b2, delta) -> + Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true -> + Mem.weak_valid_pointer m2 b2 (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr delta))) = true. +Proof. + intros. inv H0. rewrite Ptrofs.add_zero. eapply Mem.weak_valid_pointer_extends; eauto. +Qed. + +Remark weak_valid_pointer_no_overflow_extends: + forall m1 b1 ofs b2 delta, + Some(b1, 0) = Some(b2, delta) -> + Mem.weak_valid_pointer m1 b1 (Ptrofs.unsigned ofs) = true -> + 0 <= Ptrofs.unsigned ofs + Ptrofs.unsigned (Ptrofs.repr delta) <= Ptrofs.max_unsigned. +Proof. + intros. inv H. rewrite Z.add_0_r. apply Ptrofs.unsigned_range_2. +Qed. + +Remark valid_different_pointers_extends: + forall m1 b1 ofs1 b2 ofs2 b1' delta1 b2' delta2, + b1 <> b2 -> + Mem.valid_pointer m1 b1 (Ptrofs.unsigned ofs1) = true -> + Mem.valid_pointer m1 b2 (Ptrofs.unsigned ofs2) = true -> + Some(b1, 0) = Some (b1', delta1) -> + Some(b2, 0) = Some (b2', delta2) -> + b1' <> b2' \/ + Ptrofs.unsigned(Ptrofs.add ofs1 (Ptrofs.repr delta1)) <> Ptrofs.unsigned(Ptrofs.add ofs2 (Ptrofs.repr delta2)). +Proof. + intros. inv H2; inv H3. auto. +Qed. + +Lemma eval_condition_lessdef: + forall cond vl1 vl2 b m1 m2, + Val.lessdef_list vl1 vl2 -> + Mem.extends m1 m2 -> + eval_condition cond vl1 m1 = Some b -> + eval_condition cond vl2 m2 = Some b. +Proof. + intros. eapply eval_condition_inj with (f := fun b => Some(b, 0)) (m1 := m1). + apply valid_pointer_extends; auto. + apply weak_valid_pointer_extends; auto. + apply weak_valid_pointer_no_overflow_extends. + apply valid_different_pointers_extends; auto. + rewrite <- val_inject_list_lessdef. eauto. auto. +Qed. + +Lemma eval_operation_lessdef: + forall sp op vl1 vl2 v1 m1 m2, + Val.lessdef_list vl1 vl2 -> + Mem.extends m1 m2 -> + eval_operation genv sp op vl1 m1 = Some v1 -> + exists v2, eval_operation genv sp op vl2 m2 = Some v2 /\ Val.lessdef v1 v2. +Proof. + intros. rewrite val_inject_list_lessdef in H. + assert (exists v2 : val, + eval_operation genv sp op vl2 m2 = Some v2 + /\ Val.inject (fun b => Some(b, 0)) v1 v2). + eapply eval_operation_inj with (m1 := m1) (sp1 := sp). + apply valid_pointer_extends; auto. + apply weak_valid_pointer_extends; auto. + apply weak_valid_pointer_no_overflow_extends. + apply valid_different_pointers_extends; auto. + intros. apply val_inject_lessdef. auto. + apply val_inject_lessdef; auto. + eauto. + auto. + destruct H2 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto. +Qed. + +Lemma eval_addressing_lessdef: + forall sp addr vl1 vl2 v1, + Val.lessdef_list vl1 vl2 -> + eval_addressing genv sp addr vl1 = Some v1 -> + exists v2, eval_addressing genv sp addr vl2 = Some v2 /\ Val.lessdef v1 v2. +Proof. + intros. rewrite val_inject_list_lessdef in H. + assert (exists v2 : val, + eval_addressing genv sp addr vl2 = Some v2 + /\ Val.inject (fun b => Some(b, 0)) v1 v2). + eapply eval_addressing_inj with (sp1 := sp). + intros. rewrite <- val_inject_lessdef; auto. + rewrite <- val_inject_lessdef; auto. + eauto. auto. + destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto. +Qed. + + +Lemma eval_addressing_lessdef_none: + forall sp addr vl1 vl2, + Val.lessdef_list vl1 vl2 -> + eval_addressing genv sp addr vl1 = None -> + eval_addressing genv sp addr vl2 = None. +Proof. + intros until vl2. intros Hlessdef Heval1. + destruct addr; cbn in *. + 1, 2, 4, 5: inv Hlessdef; trivial; + inv H0; trivial; + inv H2; trivial; + discriminate. + inv Hlessdef; trivial. + inv H0; trivial. + discriminate. +Qed. + +End EVAL_LESSDEF. + +(** Compatibility of the evaluation functions with memory injections. *) + +Section EVAL_INJECT. + +Variable F V: Type. +Variable genv: Genv.t F V. +Variable f: meminj. +Hypothesis globals: meminj_preserves_globals genv f. +Variable sp1: block. +Variable sp2: block. +Variable delta: Z. +Hypothesis sp_inj: f sp1 = Some(sp2, delta). + +Remark symbol_address_inject: + forall id ofs, Val.inject f (Genv.symbol_address genv id ofs) (Genv.symbol_address genv id ofs). +Proof. + intros. unfold Genv.symbol_address. destruct (Genv.find_symbol genv id) eqn:?; auto. + exploit (proj1 globals); eauto. intros. + econstructor; eauto. rewrite Ptrofs.add_zero; auto. +Qed. + +Lemma eval_condition_inject: + forall cond vl1 vl2 b m1 m2, + Val.inject_list f vl1 vl2 -> + Mem.inject f m1 m2 -> + eval_condition cond vl1 m1 = Some b -> + eval_condition cond vl2 m2 = Some b. +Proof. + intros. eapply eval_condition_inj with (f := f) (m1 := m1); eauto. + intros; eapply Mem.valid_pointer_inject_val; eauto. + intros; eapply Mem.weak_valid_pointer_inject_val; eauto. + intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto. + intros; eapply Mem.different_pointers_inject; eauto. +Qed. + +Lemma eval_addressing_inject: + forall addr vl1 vl2 v1, + Val.inject_list f vl1 vl2 -> + eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = Some v1 -> + exists v2, + eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = Some v2 + /\ Val.inject f v1 v2. +Proof. + intros. + rewrite eval_shift_stack_addressing. + eapply eval_addressing_inj with (sp1 := Vptr sp1 Ptrofs.zero); eauto. + intros. apply symbol_address_inject. + econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. +Qed. + +Lemma eval_addressing_inject_none: + forall addr vl1 vl2, + Val.inject_list f vl1 vl2 -> + eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None -> + eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None. +Proof. + intros. + rewrite eval_shift_stack_addressing. + eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto. + intros. apply symbol_address_inject. + econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. +Qed. + +Lemma eval_operation_inject: + forall op vl1 vl2 v1 m1 m2, + Val.inject_list f vl1 vl2 -> + Mem.inject f m1 m2 -> + eval_operation genv (Vptr sp1 Ptrofs.zero) op vl1 m1 = Some v1 -> + exists v2, + eval_operation genv (Vptr sp2 Ptrofs.zero) (shift_stack_operation delta op) vl2 m2 = Some v2 + /\ Val.inject f v1 v2. +Proof. + intros. + rewrite eval_shift_stack_operation. cbn. + eapply eval_operation_inj with (sp1 := Vptr sp1 Ptrofs.zero) (m1 := m1); eauto. + intros; eapply Mem.valid_pointer_inject_val; eauto. + intros; eapply Mem.weak_valid_pointer_inject_val; eauto. + intros; eapply Mem.weak_valid_pointer_inject_no_overflow; eauto. + intros; eapply Mem.different_pointers_inject; eauto. + intros. apply symbol_address_inject. + econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. +Qed. + +End EVAL_INJECT. + +(** * Handling of builtin arguments *) + +Definition builtin_arg_ok_1 + (A: Type) (ba: builtin_arg A) (c: builtin_arg_constraint) := + match c, ba with + | OK_all, _ => true + | OK_const, (BA_int _ | BA_long _ | BA_float _ | BA_single _) => true + | OK_addrstack, BA_addrstack _ => true + | OK_addressing, BA_addrstack _ => true + | OK_addressing, BA_addptr (BA _) (BA_int _) => true + | OK_addressing, BA_addptr (BA _) (BA_long _) => true + | _, _ => false + end. + +Definition builtin_arg_ok + (A: Type) (ba: builtin_arg A) (c: builtin_arg_constraint) := + match ba with + | (BA _ | BA_splitlong (BA _) (BA _)) => true + | _ => builtin_arg_ok_1 ba c + end. diff --git a/kvx/OpWeights.ml b/kvx/OpWeights.ml new file mode 100644 index 00000000..23c2e5d3 --- /dev/null +++ b/kvx/OpWeights.ml @@ -0,0 +1,115 @@ +open Op;; +open PostpassSchedulingOracle;; +open PrepassSchedulingOracleDeps;; + +module KV3 = + struct +let resource_bounds = PostpassSchedulingOracle.resource_bounds;; +let nr_non_pipelined_units = 0;; + +let rec nlist_rec x l = function + | 0 -> l + | n when n > 0 -> nlist_rec x (x :: l) (n-1) + | _ -> failwith "nlist_rec";; +let nlist x n = nlist_rec x [] n;; + +let bogus_register = Machregs.R0;; +let bogus_inputs n = nlist bogus_register n;; + +let insns_of_op (op : operation) (nargs : int) = + match Asmblockgen.transl_op op + (bogus_inputs nargs) bogus_register [] with + | Errors.Error msg -> failwith "OpWeights.insns_of_op" + | Errors.OK insns -> insns;; + +let insn_of_op op nargs = + match insns_of_op op nargs with + | [] -> failwith "OpWeights.insn_of_op" + | h::_ -> h;; + +let insns_of_cond (cond : condition) (nargs : int) = + match Asmblockgen.transl_cond_op cond + Asmvliw.GPR0 (bogus_inputs nargs) [] with + | Errors.Error msg -> failwith "OpWeights.insns_of_cond" + | Errors.OK insns -> insns;; + +let insn_of_cond cond nargs = + match insns_of_cond cond nargs with + | [] -> failwith "OpWeights.insn_of_cond" + | h::_ -> h;; + +let insns_of_load trap chunk addressing (nargs : int) = + match Asmblockgen.transl_load trap chunk addressing + (bogus_inputs nargs) bogus_register [] with + | Errors.Error msg -> failwith "OpWeights.insns_of_load" + | Errors.OK insns -> insns;; + +let insn_of_load trap chunk addressing nargs = + match insns_of_load trap chunk addressing nargs with + | [] -> failwith "OpWeights.insn_of_load" + | h::_ -> h;; + +let insns_of_store chunk addressing (nargs : int) = + match Asmblockgen.transl_store chunk addressing + (bogus_inputs nargs) bogus_register [] with + | Errors.Error msg -> failwith "OpWeights.insns_of_store" + | Errors.OK insns -> insns;; + +let insn_of_store chunk addressing nargs = + match insns_of_store chunk addressing nargs with + | [] -> failwith "OpWeights.insn_of_store" + | h::_ -> h;; + +let latency_of_op (op : operation) (nargs : int) = + let insn = insn_of_op op nargs in + let record = basic_rec insn in + let latency = real_inst_to_latency record.inst in + latency;; + +let resources_of_op (op : operation) (nargs : int) = + let insn = insn_of_op op nargs in + let record = basic_rec insn in + rec_to_usage record;; + +let non_pipelined_resources_of_op (op : operation) (nargs : int) = [| |] + +let resources_of_cond (cond : condition) (nargs : int) = + let insn = insn_of_cond cond nargs in + let record = basic_rec insn in + rec_to_usage record;; + +let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; +let latency_of_call _ _ = 6;; + +let resources_of_load trap chunk addressing nargs = + let insn = insn_of_load trap chunk addressing nargs in + let record = basic_rec insn in + rec_to_usage record;; + +let resources_of_store chunk addressing nargs = + let insn = insn_of_store chunk addressing nargs in + let record = basic_rec insn in + rec_to_usage record;; + +let resources_of_call _ _ = resource_bounds;; +let resources_of_builtin _ = resource_bounds;; + end;; + +let get_opweights () : opweights = + match !Clflags.option_mtune with + | "kv3" | "" -> + { + pipelined_resource_bounds = KV3.resource_bounds; + nr_non_pipelined_units = KV3.nr_non_pipelined_units; + latency_of_op = KV3.latency_of_op; + resources_of_op = KV3.resources_of_op; + non_pipelined_resources_of_op = KV3.non_pipelined_resources_of_op; + latency_of_load = KV3.latency_of_load; + resources_of_load = KV3.resources_of_load; + resources_of_store = KV3.resources_of_store; + resources_of_cond = KV3.resources_of_cond; + latency_of_call = KV3.latency_of_call; + resources_of_call = KV3.resources_of_call; + resources_of_builtin = KV3.resources_of_builtin + } + | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);; diff --git a/kvx/Peephole.v b/kvx/Peephole.v new file mode 100644 index 00000000..5adb823b --- /dev/null +++ b/kvx/Peephole.v @@ -0,0 +1,158 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib. +Require Import Asmvliw. +Require Import Values. +Require Import Integers. +Require Import AST. +Require Compopts. + +Definition gpreg_q_list : list gpreg_q := +R0R1 :: R2R3 :: R4R5 :: R6R7 :: R8R9 +:: R10R11 :: R12R13 :: R14R15 :: R16R17 :: R18R19 +:: R20R21 :: R22R23 :: R24R25 :: R26R27 :: R28R29 +:: R30R31 :: R32R33 :: R34R35 :: R36R37 :: R38R39 +:: R40R41 :: R42R43 :: R44R45 :: R46R47 :: R48R49 +:: R50R51 :: R52R53 :: R54R55 :: R56R57 :: R58R59 +:: R60R61 :: R62R63 :: nil. + +Definition gpreg_o_list : list gpreg_o := +R0R1R2R3 :: R4R5R6R7 :: R8R9R10R11 :: R12R13R14R15 +:: R16R17R18R19 :: R20R21R22R23 :: R24R25R26R27 :: R28R29R30R31 +:: R32R33R34R35 :: R36R37R38R39 :: R40R41R42R43 :: R44R45R46R47 +:: R48R49R50R51 :: R52R53R54R55 :: R56R57R58R59 :: R60R61R62R63 :: nil. + +Fixpoint gpreg_q_search_rec r0 r1 l := + match l with + | h :: t => + let (s0, s1) := gpreg_q_expand h in + if (gpreg_eq r0 s0) && (gpreg_eq r1 s1) + then Some h + else gpreg_q_search_rec r0 r1 t + | nil => None + end. + +Fixpoint gpreg_o_search_rec r0 r1 r2 r3 l := + match l with + | h :: t => + match gpreg_o_expand h with + | (((s0, s1), s2), s3) => + if (gpreg_eq r0 s0) && (gpreg_eq r1 s1) && + (gpreg_eq r2 s2) && (gpreg_eq r3 s3) + then Some h + else gpreg_o_search_rec r0 r1 r2 r3 t + end + | nil => None + end. + +Definition gpreg_q_search (r0 : gpreg) (r1 : gpreg) : option gpreg_q := + gpreg_q_search_rec r0 r1 gpreg_q_list. + +Definition gpreg_o_search r0 r1 r2 r3 : option gpreg_o := + gpreg_o_search_rec r0 r1 r2 r3 gpreg_o_list. + +Parameter print_found_store: forall A, Z -> A -> A. + +Definition coalesce_octuples := true. + +Fixpoint coalesce_mem (insns : list basic) : list basic := + match insns with + | nil => nil + | h0 :: t0 => + match t0 with + | h1 :: t1 => + match h0, h1 with + | (PStoreRRO Psd_a rs0 ra0 ofs0), + (PStoreRRO Psd_a rs1 ra1 ofs1) => + match gpreg_q_search rs0 rs1 with + | Some rs0rs1 => + let zofs0 := Ptrofs.signed ofs0 in + let zofs1 := Ptrofs.signed ofs1 in + if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1) + then + if coalesce_octuples + then + match t1 with + | (PStoreRRO Psd_a rs2 ra2 ofs2) :: + (PStoreRRO Psd_a rs3 ra3 ofs3) :: t3 => + match gpreg_o_search rs0 rs1 rs2 rs3 with + | Some octuple => + let zofs2 := Ptrofs.signed ofs2 in + let zofs3 := Ptrofs.signed ofs3 in + if (zofs2 =? zofs0 + 16) && (ireg_eq ra0 ra2) && + (zofs3 =? zofs0 + 24) && (ireg_eq ra0 ra3) + then (PStore (PStoreORRO octuple ra0 ofs0)) :: Pnop :: Pnop :: Pnop :: (coalesce_mem t3) + else (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + | None => (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + end + | _ => (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + end + else (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + else h0 :: (coalesce_mem t0) + | None => h0 :: (coalesce_mem t0) + end + + | (PLoad (PLoadRRO TRAP Pld_a rd0 ra0 ofs0)), + (PLoad (PLoadRRO TRAP Pld_a rd1 ra1 ofs1)) => + match gpreg_q_search rd0 rd1 with + | Some rd0rd1 => + let zofs0 := Ptrofs.signed ofs0 in + let zofs1 := Ptrofs.signed ofs1 in + if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1) && negb (ireg_eq ra0 rd0) + then + if coalesce_octuples + then + match t1 with + | (PLoad (PLoadRRO TRAP Pld_a rd2 ra2 ofs2)) :: + (PLoad (PLoadRRO TRAP Pld_a rd3 ra3 ofs3)) :: t3 => + match gpreg_o_search rd0 rd1 rd2 rd3 with + | Some octuple => + let zofs2 := Ptrofs.signed ofs2 in + let zofs3 := Ptrofs.signed ofs3 in + if (zofs2 =? zofs0 + 16) && (ireg_eq ra0 ra2) && + (zofs3 =? zofs0 + 24) && (ireg_eq ra0 ra3) && + negb (ireg_eq ra0 rd1) && negb (ireg_eq ra0 rd2) + then (PLoad (PLoadORRO octuple ra0 ofs0)) :: Pnop :: Pnop :: Pnop :: (coalesce_mem t3) + else (PLoad (PLoadQRRO rd0rd1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + | None => (PLoad (PLoadQRRO rd0rd1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + end + | _ => (PLoad (PLoadQRRO rd0rd1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + end + else (PLoad (PLoadQRRO rd0rd1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + else h0 :: (coalesce_mem t0) + | None => h0 :: (coalesce_mem t0) + end + | _, _ => h0 :: (coalesce_mem t0) + end + | nil => h0 :: nil + end + end. + +Definition optimize_body (insns : list basic) := + if Compopts.optim_coalesce_mem tt + then coalesce_mem insns + else insns. + +Program Definition optimize_bblock (bb : bblock) := + let optimized := optimize_body (body bb) in + let wf_ok := wf_bblockb optimized (exit bb) in + {| header := header bb; + body := if wf_ok then optimized else (body bb); + exit := exit bb |}. +Next Obligation. + destruct (wf_bblockb (optimize_body (body bb))) eqn:Rwf. + - rewrite Rwf. cbn. trivial. + - exact (correct bb). +Qed. diff --git a/kvx/PostpassScheduling.v b/kvx/PostpassScheduling.v new file mode 100644 index 00000000..08e640c6 --- /dev/null +++ b/kvx/PostpassScheduling.v @@ -0,0 +1,526 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Implementation (and basic properties) of the verified postpass scheduler *) + +Require Import Coqlib Errors AST Integers. +Require Import Asmblock Axioms Memory Globalenvs. +Require Import Asmblockdeps Asmblockgenproof0 Asmblockprops. +Require Peephole. +Require Import Lia. + +Local Open Scope error_monad_scope. + +(** * Oracle taking as input a basic block, + returns a scheduled list of bundles *) +Axiom schedule: bblock -> (list (list basic)) * option control. + +Extract Constant schedule => "PostpassSchedulingOracle.schedule". + +(** * Concat all bundles into one big basic block *) + +(* Lemmas necessary for defining concat_all *) +Lemma app_nonil {A: Type} (l l': list A) : l <> nil -> l ++ l' <> nil. +Proof. + intros. destruct l; simpl. + - contradiction. + - discriminate. +Qed. + +Lemma app_nonil2 {A: Type} : forall (l l': list A), l' <> nil -> l ++ l' <> nil. +Proof. + destruct l. + - intros. simpl; auto. + - intros. rewrite <- app_comm_cons. discriminate. +Qed. + +Definition check_size bb := + if zlt Ptrofs.max_unsigned (size bb) + then Error (msg "PostpassSchedulingproof.check_size") + else OK tt. + +Program Definition concat2 (bb bb': bblock) : res bblock := + do ch <- check_size bb; + do ch' <- check_size bb'; + match (exit bb) with + | None => + match (header bb') with + | nil => + match (exit bb') with + | Some (PExpand (Pbuiltin _ _ _)) => Error (msg "PostpassSchedulingproof.concat2: builtin not alone") + | _ => OK {| header := header bb; body := body bb ++ body bb'; exit := exit bb' |} + end + | _ => Error (msg "PostpassSchedulingproof.concat2") + end + | _ => Error (msg "PostpassSchedulingproof.concat2") + end. +Next Obligation. + apply wf_bblock_refl. constructor. + - destruct bb' as [hd' bdy' ex' WF']. destruct bb as [hd bdy ex WF]. simpl in *. + apply wf_bblock_refl in WF'. apply wf_bblock_refl in WF. + inversion_clear WF'. inversion_clear WF. clear H1 H3. + inversion H2; inversion H0. + + left. apply app_nonil. auto. + + right. auto. + + left. apply app_nonil2. auto. + + right. auto. + - unfold builtin_alone. intros. rewrite H0 in H. + assert (Some (PExpand (Pbuiltin ef args res)) <> Some (PExpand (Pbuiltin ef args res))). + apply (H ef args res). contradict H1. auto. +Defined. + +Lemma concat2_zlt_size: + forall a b bb, + concat2 a b = OK bb -> + size a <= Ptrofs.max_unsigned + /\ size b <= Ptrofs.max_unsigned. +Proof. + intros. monadInv H. + split. + - unfold check_size in EQ. destruct (zlt Ptrofs.max_unsigned (size a)); monadInv EQ. lia. + - unfold check_size in EQ1. destruct (zlt Ptrofs.max_unsigned (size b)); monadInv EQ1. lia. +Qed. + +Lemma concat2_noexit: + forall a b bb, + concat2 a b = OK bb -> + exit a = None. +Proof. + intros. destruct a as [hd bdy ex WF]; simpl in *. + destruct ex as [e|]; simpl in *; auto. + unfold concat2 in H. simpl in H. monadInv H. +Qed. + +Lemma concat2_decomp: + forall a b bb, + concat2 a b = OK bb -> + body bb = body a ++ body b + /\ exit bb = exit b. +Proof. + intros. exploit concat2_noexit; eauto. intros. + destruct a as [hda bda exa WFa]; destruct b as [hdb bdb exb WFb]; destruct bb as [hd bd ex WF]; simpl in *. + subst exa. + unfold concat2 in H; simpl in H. + destruct hdb. + - destruct exb. + + destruct c. + * destruct i; monadInv H; split; auto. + * monadInv H. split; auto. + + monadInv H. split; auto. + - monadInv H. +Qed. + +Lemma concat2_size: + forall a b bb, concat2 a b = OK bb -> size bb = size a + size b. +Proof. + intros. unfold concat2 in H. + destruct a as [hda bda exa WFa]; destruct b as [hdb bdb exb WFb]; destruct bb as [hd bdy ex WF]; simpl in *. + destruct exa; monadInv H. destruct hdb; try (monadInv EQ2). destruct exb; try (monadInv EQ2). + - destruct c. + + destruct i; monadInv EQ2; + unfold size; simpl; rewrite app_length; rewrite Nat.add_0_r; rewrite <- Nat2Z.inj_add; rewrite Nat.add_assoc; reflexivity. + + monadInv EQ2. unfold size; simpl. rewrite app_length. rewrite Nat.add_0_r. rewrite <- Nat2Z.inj_add. rewrite Nat.add_assoc. reflexivity. + - unfold size; simpl. rewrite app_length. repeat (rewrite Nat.add_0_r). rewrite <- Nat2Z.inj_add. reflexivity. +Qed. + +Lemma concat2_header: + forall bb bb' tbb, + concat2 bb bb' = OK tbb -> header bb = header tbb. +Proof. + intros. destruct bb as [hd bdy ex COR]; destruct bb' as [hd' bdy' ex' COR']; destruct tbb as [thd tbdy tex tCOR]; simpl in *. + unfold concat2 in H. simpl in H. monadInv H. + destruct ex; try discriminate. destruct hd'; try discriminate. destruct ex'. + - destruct c. + + destruct i; try discriminate; congruence. + + congruence. + - congruence. +Qed. + +Lemma concat2_no_header_in_middle: + forall bb bb' tbb, + concat2 bb bb' = OK tbb -> + header bb' = nil. +Proof. + intros. destruct bb as [hd bdy ex COR]; destruct bb' as [hd' bdy' ex' COR']; destruct tbb as [thd tbdy tex tCOR]; simpl in *. + unfold concat2 in H. simpl in H. monadInv H. + destruct ex; try discriminate. destruct hd'; try discriminate. reflexivity. +Qed. + + + +Fixpoint concat_all (lbb: list bblock) : res bblock := + match lbb with + | nil => Error (msg "PostpassSchedulingproof.concatenate: empty list") + | bb::nil => OK bb + | bb::lbb => + do bb' <- concat_all lbb; + concat2 bb bb' + end. + +Lemma concat_all_size : + forall lbb a bb bb', + concat_all (a :: lbb) = OK bb -> + concat_all lbb = OK bb' -> + size bb = size a + size bb'. +Proof. + intros. unfold concat_all in H. fold concat_all in H. + destruct lbb; try discriminate. + monadInv H. rewrite H0 in EQ. inv EQ. + apply concat2_size. assumption. +Qed. + +Lemma concat_all_header: + forall lbb bb tbb, + concat_all (bb::lbb) = OK tbb -> header bb = header tbb. +Proof. + destruct lbb. + - intros. simpl in H. congruence. + - intros. simpl in H. destruct lbb. + + inv H. eapply concat2_header; eassumption. + + monadInv H. eapply concat2_header; eassumption. +Qed. + +Lemma concat_all_no_header_in_middle: + forall lbb tbb, + concat_all lbb = OK tbb -> + Forall (fun b => header b = nil) (tail lbb). +Proof. + induction lbb; intros; try constructor. + simpl. simpl in H. destruct lbb. + - constructor. + - monadInv H. simpl tl in IHlbb. constructor. + + apply concat2_no_header_in_middle in EQ0. apply concat_all_header in EQ. congruence. + + apply IHlbb in EQ. assumption. +Qed. + +Inductive is_concat : bblock -> list bblock -> Prop := + | mk_is_concat: forall tbb lbb, concat_all lbb = OK tbb -> is_concat tbb lbb. + +(** * Remainder of the verified scheduler *) + +Definition verify_schedule (bb bb' : bblock) : res unit := + match bblock_simub bb bb' with + | true => OK tt + | false => Error (msg "PostpassScheduling.verify_schedule") + end. + + +Definition verify_size bb lbb := if (Z.eqb (size bb) (size_blocks lbb)) then OK tt else Error (msg "PostpassScheduling:verify_size: wrong size"). + +Lemma verify_size_size: + forall bb lbb, verify_size bb lbb = OK tt -> size bb = size_blocks lbb. +Proof. + intros. unfold verify_size in H. destruct (size bb =? size_blocks lbb) eqn:SIZE; try discriminate. + apply Z.eqb_eq. assumption. +Qed. + +Lemma verify_schedule_no_header: + forall bb bb', + verify_schedule (no_header bb) bb' = verify_schedule bb bb'. +Proof. + intros. unfold verify_schedule. unfold bblock_simub. unfold pure_bblock_simu_test, bblock_simu_test. rewrite trans_block_noheader_inv. + reflexivity. +Qed. + + +Lemma stick_header_verify_schedule: + forall hd bb' hbb' bb, + stick_header hd bb' = hbb' -> + verify_schedule bb bb' = verify_schedule bb hbb'. +Proof. + intros. unfold verify_schedule. unfold bblock_simub, pure_bblock_simu_test, bblock_simu_test. + rewrite <- H. rewrite trans_block_header_inv. reflexivity. +Qed. + +Lemma check_size_stick_header: + forall bb hd, + check_size bb = check_size (stick_header hd bb). +Proof. + intros. unfold check_size. rewrite stick_header_size. reflexivity. +Qed. + +Lemma stick_header_concat2: + forall bb bb' hd tbb, + concat2 bb bb' = OK tbb -> + concat2 (stick_header hd bb) bb' = OK (stick_header hd tbb). +Proof. + intros. monadInv H. erewrite check_size_stick_header in EQ. + unfold concat2. rewrite EQ. rewrite EQ1. simpl. + destruct bb as [hdr bdy ex COR]; destruct bb' as [hdr' bdy' ex' COR']; simpl in *. + destruct ex; try discriminate. destruct hdr'; try discriminate. destruct ex'. + - destruct c. + + destruct i; try discriminate; inv EQ2; unfold stick_header; simpl; reflexivity. + + inv EQ2. unfold stick_header; simpl. reflexivity. + - inv EQ2. unfold stick_header; simpl. reflexivity. +Qed. + +Lemma stick_header_concat_all: + forall bb c tbb hd, + concat_all (bb :: c) = OK tbb -> + concat_all (stick_header hd bb :: c) = OK (stick_header hd tbb). +Proof. + intros. simpl in *. destruct c; try congruence. + monadInv H. rewrite EQ. simpl. + apply stick_header_concat2. assumption. +Qed. + + + +Definition stick_header_code (h : list label) (lbb : list bblock) := + match (head lbb) with + | None => Error (msg "PostpassScheduling.stick_header: empty schedule") + | Some fst => OK ((stick_header h fst) :: tail lbb) + end. + +Lemma stick_header_code_no_header: + forall bb c, + stick_header_code (header bb) (no_header bb :: c) = OK (bb :: c). +Proof. + intros. unfold stick_header_code. simpl. rewrite stick_header_no_header. reflexivity. +Qed. + +Lemma hd_tl_size: + forall lbb bb, hd_error lbb = Some bb -> size_blocks lbb = size bb + size_blocks (tl lbb). +Proof. + destruct lbb. + - intros. simpl in H. discriminate. + - intros. simpl in H. inv H. simpl. reflexivity. +Qed. + +Lemma stick_header_code_size: + forall h lbb lbb', stick_header_code h lbb = OK lbb' -> size_blocks lbb = size_blocks lbb'. +Proof. + intros. unfold stick_header_code in H. destruct (hd_error lbb) eqn:HD; try discriminate. + inv H. simpl. rewrite stick_header_size. erewrite hd_tl_size; eauto. +Qed. + +Lemma stick_header_code_no_header_in_middle: + forall c h lbb, + stick_header_code h c = OK lbb -> + Forall (fun b => header b = nil) (tl c) -> + Forall (fun b => header b = nil) (tl lbb). +Proof. + destruct c; intros. + - unfold stick_header_code in H. simpl in H. discriminate. + - unfold stick_header_code in H. simpl in H. inv H. simpl in H0. + simpl. assumption. +Qed. + +Lemma stick_header_code_concat_all: + forall hd lbb hlbb tbb, + stick_header_code hd lbb = OK hlbb -> + concat_all lbb = OK tbb -> + exists htbb, + concat_all hlbb = OK htbb + /\ stick_header hd tbb = htbb. +Proof. + intros. exists (stick_header hd tbb). split; auto. + destruct lbb. + - unfold stick_header_code in H. simpl in H. discriminate. + - unfold stick_header_code in H. simpl in H. inv H. + apply stick_header_concat_all. assumption. +Qed. + +Program Definition make_bblock_from_basics lb := + match lb with + | nil => Error (msg "PostpassScheduling.make_bblock_from_basics") + | b :: lb => OK {| header := nil; body := b::lb; exit := None |} + end. + +Fixpoint schedule_to_bblocks_nocontrol llb := + match llb with + | nil => OK nil + | lb :: llb => do bb <- make_bblock_from_basics lb; + do lbb <- schedule_to_bblocks_nocontrol llb; + OK (bb :: lbb) + end. + +Program Definition make_bblock_from_basics_and_control lb c := + match c with + | PExpand (Pbuiltin _ _ _) => Error (msg "PostpassScheduling.make_bblock_from_basics_and_control") + | PCtlFlow cf => OK {| header := nil; body := lb; exit := Some (PCtlFlow cf) |} + end. +Next Obligation. + apply wf_bblock_refl. constructor. + - right. discriminate. + - discriminate. +Qed. + +Fixpoint schedule_to_bblocks_wcontrol llb c := + match llb with + | nil => OK ((bblock_single_inst (PControl c)) :: nil) + | lb :: nil => do bb <- make_bblock_from_basics_and_control lb c; OK (bb :: nil) + | lb :: llb => do bb <- make_bblock_from_basics lb; + do lbb <- schedule_to_bblocks_wcontrol llb c; + OK (bb :: lbb) + end. + +Definition schedule_to_bblocks (llb: list (list basic)) (oc: option control) : res (list bblock) := + match oc with + | None => schedule_to_bblocks_nocontrol llb + | Some c => schedule_to_bblocks_wcontrol llb c + end. + +Definition do_schedule (bb: bblock) : res (list bblock) := + if (Z.eqb (size bb) 1) then OK (bb::nil) + else match (schedule bb) with (llb, oc) => schedule_to_bblocks llb oc end. + +Definition verify_par_bblock (bb: bblock) : res unit := + if (bblock_para_check bb) then OK tt else Error (msg "PostpassScheduling.verify_par_bblock"). + +Fixpoint verify_par (lbb: list bblock) := + match lbb with + | nil => OK tt + | bb :: lbb => do res <- verify_par_bblock bb; verify_par lbb + end. + +Definition verified_schedule_nob (bb : bblock) : res (list bblock) := + let bb' := no_header bb in + let bb'' := Peephole.optimize_bblock bb' in + do lbb <- do_schedule bb''; + do tbb <- concat_all lbb; + do sizecheck <- verify_size bb lbb; + do schedcheck <- verify_schedule bb' tbb; + do res <- stick_header_code (header bb) lbb; + do parcheck <- verify_par res; + OK res. + +Lemma verified_schedule_nob_size: + forall bb lbb, verified_schedule_nob bb = OK lbb -> size bb = size_blocks lbb. +Proof. + intros. monadInv H. erewrite <- stick_header_code_size; eauto. + apply verify_size_size. + destruct x1; try discriminate. assumption. +Qed. + +Lemma verified_schedule_nob_no_header_in_middle: + forall lbb bb, + verified_schedule_nob bb = OK lbb -> + Forall (fun b => header b = nil) (tail lbb). +Proof. + intros. monadInv H. eapply stick_header_code_no_header_in_middle; eauto. + eapply concat_all_no_header_in_middle. eassumption. +Qed. + +Lemma verified_schedule_nob_header: + forall bb tbb lbb, + verified_schedule_nob bb = OK (tbb :: lbb) -> + header bb = header tbb + /\ Forall (fun b => header b = nil) lbb. +Proof. + intros. split. + - monadInv H. unfold stick_header_code in EQ3. destruct (hd_error _); try discriminate. inv EQ3. + simpl. reflexivity. + - apply verified_schedule_nob_no_header_in_middle in H. assumption. +Qed. + + +Definition verified_schedule (bb : bblock) : res (list bblock) := + match exit bb with + | Some (PExpand (Pbuiltin ef args res)) => OK (bb::nil) (* Special case for ensuring the lemma verified_schedule_builtin_idem *) + | _ => verified_schedule_nob bb + end. + +Lemma verified_schedule_size: + forall bb lbb, verified_schedule bb = OK lbb -> size bb = size_blocks lbb. +Proof. + intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i. + all: try (apply verified_schedule_nob_size; auto; fail). + inv H. simpl. lia. +Qed. + +Lemma verified_schedule_no_header_in_middle: + forall lbb bb, + verified_schedule bb = OK lbb -> + Forall (fun b => header b = nil) (tail lbb). +Proof. + intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i. + all: try (eapply verified_schedule_nob_no_header_in_middle; eauto; fail). + inv H. simpl. auto. +Qed. + +Lemma verified_schedule_header: + forall bb tbb lbb, + verified_schedule bb = OK (tbb :: lbb) -> + header bb = header tbb + /\ Forall (fun b => header b = nil) lbb. +Proof. + intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i. + all: try (eapply verified_schedule_nob_header; eauto; fail). + inv H. split; simpl; auto. +Qed. + + +Lemma verified_schedule_nob_correct: + forall ge f bb lbb, + verified_schedule_nob bb = OK lbb -> + exists tbb, + is_concat tbb lbb + /\ bblock_simu ge f bb tbb. +Proof. + intros. monadInv H. + exploit stick_header_code_concat_all; eauto. + intros (tbb & CONC & STH). + exists tbb. split; auto. constructor; auto. + rewrite verify_schedule_no_header in EQ2. erewrite stick_header_verify_schedule in EQ2; eauto. + eapply bblock_simub_correct; eauto. unfold verify_schedule in EQ2. + destruct (bblock_simub _ _); auto; try discriminate. +Qed. + +Theorem verified_schedule_correct: + forall ge f bb lbb, + verified_schedule bb = OK lbb -> + exists tbb, + is_concat tbb lbb + /\ bblock_simu ge f bb tbb. +Proof. + intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i. + all: try (eapply verified_schedule_nob_correct; eauto; fail). + inv H. eexists. split; simpl; auto. constructor; auto. simpl; auto. constructor; auto. +Qed. + +Lemma verified_schedule_builtin_idem: + forall bb ef args res lbb, + exit bb = Some (PExpand (Pbuiltin ef args res)) -> + verified_schedule bb = OK lbb -> + lbb = bb :: nil. +Proof. + intros. unfold verified_schedule in H0. rewrite H in H0. inv H0. reflexivity. +Qed. + + +Fixpoint transf_blocks (lbb : list bblock) : res (list bblock) := + match lbb with + | nil => OK nil + | (cons bb lbb) => + do tlbb <- transf_blocks lbb; + do tbb <- verified_schedule bb; + OK (tbb ++ tlbb) + end. + +Definition transl_function (f: function) : res function := + do lb <- transf_blocks (fn_blocks f); + OK (mkfunction (fn_sig f) lb). + +Definition transf_function (f: function) : res function := + do tf <- transl_function f; + if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks)) + then Error (msg "code size exceeded") + else OK tf. + +Definition transf_fundef (f: fundef) : res fundef := + transf_partial_fundef transf_function f. + +Definition transf_program (p: program) : res program := + transform_partial_program transf_fundef p. diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml new file mode 100644 index 00000000..3f4520a6 --- /dev/null +++ b/kvx/PostpassSchedulingOracle.ml @@ -0,0 +1,1036 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open Asmvliw +open Asmblock +open Printf +open Camlcoq +open InstructionScheduler +open TargetPrinter.Target + +let debug = false + +(** + * Extracting infos from Asmvliw instructions + *) + +type immediate = I32 of Integers.Int.int | I64 of Integers.Int64.int | Off of offset + +type location = Reg of preg | Mem + +type real_instruction = + (* ALU *) + | Addw | Andw | Compw | Mulw | Orw | Sbfw | Sbfxw | Sraw | Srlw | Sllw | Srsw | Rorw | Xorw + | Addd | Andd | Compd | Muld | Ord | Sbfd | Sbfxd | Srad | Srld | Slld | Srsd | Xord + | Nandw | Norw | Nxorw | Nandd | Nord | Nxord | Andnw | Ornw | Andnd | Ornd + | Maddw | Maddd | Msbfw | Msbfd | Cmoved + | Make | Nop | Extfz | Extfs | Insf + | Addxw | Addxd + (* LSU *) + | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo + | Sb | Sh | Sw | Sd | Sq | So + (* BCU *) + | Icall | Call | Cb | Igoto | Goto | Ret | Get | Set + (* FPU *) + | Fabsd | Fabsw | Fnegw | Fnegd + | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw + | Fmind | Fminw | Fmaxd | Fmaxw | Finvw + | Ffmaw | Ffmad | Ffmsw | Ffmsd + | Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz | Fixedwz | Fixeduwz | Fixeddz | Fixedudz + | Fcompw | Fcompd + +type ab_inst_rec = { + inst: real_instruction; + write_locs : location list; + read_locs : location list; + read_at_id : location list; (* Must be contained in read_locs *) + read_at_e1 : location list; (* idem *) + imm : immediate option; + is_control : bool; +} + +(** Asmvliw constructor to real instructions *) + +exception OpaqueInstruction + +let arith_rr_real = function + | Pcvtl2w -> Addw + | Pmv -> Addd + | Pnegw -> Sbfw + | Pnegl -> Sbfd + | Psxwd -> Extfs + | Pzxwd -> Extfz + | Pextfz(_,_) -> Extfz + | Pextfs(_,_) -> Extfs + | Pextfzl(_,_) -> Extfz + | Pextfsl(_,_) -> Extfs + | Pfabsw -> Fabsw + | Pfabsd -> Fabsd + | Pfnegw -> Fnegw + | Pfnegd -> Fnegd + | Pfinvw -> Finvw + | Pfnarrowdw -> Fnarrowdw + | Pfwidenlwd -> Fwidenlwd + | Pfloatwrnsz -> Floatwz + | Pfloatuwrnsz -> Floatuwz + | Pfloatudrnsz -> Floatudz + | Pfloatdrnsz -> Floatdz + | Pfixedwrzz -> Fixedwz + | Pfixeduwrzz -> Fixeduwz + | Pfixeddrzz -> Fixeddz + | Pfixedudrzz -> Fixedudz + | Pfixeddrzz_i32 -> Fixeddz + | Pfixedudrzz_i32 -> Fixedudz + +let arith_rrr_real = function + | Pcompw it -> Compw + | Pcompl it -> Compd + | Pfcompw ft -> Fcompw + | Pfcompl ft -> Fcompd + | Paddw -> Addw + | Paddxw _ -> Addxw + | Psubw -> Sbfw + | Prevsubxw _ -> Sbfxw + | Pmulw -> Mulw + | Pandw -> Andw + | Pnandw -> Nandw + | Porw -> Orw + | Pnorw -> Norw + | Pxorw -> Xorw + | Pnxorw -> Nxorw + | Pandnw -> Andnw + | Pornw -> Ornw + | Psraw -> Sraw + | Psrlw -> Srlw + | Psrxw -> Srsw + | Psllw -> Sllw + | Paddl -> Addd + | Paddxl _ -> Addxd + | Psubl -> Sbfd + | Prevsubxl _ -> Sbfxd + | Pandl -> Andd + | Pnandl -> Nandd + | Porl -> Ord + | Pnorl -> Nord + | Pxorl -> Xord + | Pnxorl -> Nxord + | Pandnl -> Andnd + | Pornl -> Ornd + | Pmull -> Muld + | Pslll -> Slld + | Psrll -> Srld + | Psrxl -> Srsd + | Psral -> Srad + | Pfaddd -> Faddd + | Pfaddw -> Faddw + | Pfsbfd -> Fsbfd + | Pfsbfw -> Fsbfw + | Pfmuld -> Fmuld + | Pfmulw -> Fmulw + | Pfmind -> Fmind + | Pfminw -> Fminw + | Pfmaxd -> Fmaxd + | Pfmaxw -> Fmaxw + +let arith_rri32_real = function + | Pcompiw it -> Compw + | Paddiw -> Addw + | Paddxiw _ -> Addxw + | Prevsubiw -> Sbfw + | Prevsubxiw _ -> Sbfxw + | Pmuliw -> Mulw + | Pandiw -> Andw + | Pnandiw -> Nandw + | Poriw -> Orw + | Pnoriw -> Norw + | Pxoriw -> Xorw + | Pnxoriw -> Nxorw + | Pandniw -> Andnw + | Porniw -> Ornw + | Psraiw -> Sraw + | Psrxiw -> Srsw + | Psrliw -> Srlw + | Pslliw -> Sllw + | Proriw -> Rorw + | Psllil -> Slld + | Psrlil -> Srld + | Psrail -> Srad + | Psrxil -> Srsd + +let arith_rri64_real = function + | Pcompil it -> Compd + | Paddil -> Addd + | Prevsubil -> Sbfd + | Paddxil _ -> Addxd + | Prevsubxil _ -> Sbfxd + | Pmulil -> Muld + | Pandil -> Andd + | Pnandil -> Nandd + | Poril -> Ord + | Pnoril -> Nord + | Pxoril -> Xord + | Pnxoril -> Nxord + | Pandnil -> Andnd + | Pornil -> Ornd + + +let arith_arr_real = function + | Pinsf (_, _) -> Insf + | Pinsfl (_, _) -> Insf + +let arith_arrr_real = function + | Pfmaddfw -> Ffmaw + | Pfmaddfl -> Ffmad + | Pfmsubfw -> Ffmsw + | Pfmsubfl -> Ffmsd + | Pmaddw -> Maddw + | Pmaddl -> Maddd + | Pmsubw -> Msbfw + | Pmsubl -> Msbfd + | Pcmove _ -> Cmoved + | Pcmoveu _ -> Cmoved + +let arith_arri32_real = function + | Pmaddiw -> Maddw + | Pcmoveiw _ -> Cmoved + | Pcmoveuiw _ -> Cmoved + +let arith_arri64_real = function + | Pmaddil -> Maddd + | Pcmoveil _ -> Cmoved + | Pcmoveuil _ -> Cmoved + +let arith_ri32_real = Make + +let arith_ri64_real = Make + +let arith_rf32_real = Make + +let arith_rf64_real = Make + +let store_real = function + | Psb -> Sb + | Psh -> Sh + | Psw -> Sw + | Psw_a -> Sw + | Psd -> Sd + | Psd_a -> Sd + | Pfss -> Sw + | Pfsd -> Sd + +let load_real = function + | Plb -> Lbs + | Plbu -> Lbz + | Plh -> Lhs + | Plhu -> Lhz + | Plw -> Lws + | Plw_a -> Lws + | Pld -> Ld + | Pld_a -> Ld + | Pfls -> Lws + | Pfld -> Ld + +let set_real = Set +let get_real = Get +let nop_real = Nop +let loadsymbol_real = Make +let loadqrro_real = Lq +let loadorro_real = Lo +let storeqrro_real = Sq +let storeorro_real = So + +let ret_real = Ret +let call_real = Call +let icall_real = Icall +let goto_real = Goto +let igoto_real = Igoto +let jl_real = Goto +let cb_real = Cb +let cbu_real = Cb + +let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false; + read_at_id = []; read_at_e1 = [] } + +let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false; + read_at_id = []; read_at_e1 = [] } + +let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false; + read_at_id = []; read_at_e1 = [] } + +let arith_arri32_rec i rd rs imm32 = + let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> [] + in { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false; + read_at_id = [] ; read_at_e1 = rae1 } + +let arith_arri64_rec i rd rs imm64 = + let rae1 = match i with Pmaddil -> [Reg rd] | _ -> [] + in { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false; + read_at_id = []; read_at_e1 = rae1 } + +let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false; + read_at_id = []; read_at_e1 = [] } + +let arith_arrr_rec i rd rs1 rs2 = + let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> [] + in { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false; + read_at_id = []; read_at_e1 = rae1 } + +let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false; + read_at_id = []; read_at_e1 = [] } + +let arith_r_rec i rd = match i with + (* For Ploadsymbol, writing the highest integer since we do not know how many bits does a symbol have *) + | Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed); + is_control = false; read_at_id = []; read_at_e1 = [] } + +let arith_rec i = + match i with + | PArithRRI32 (i, rd, rs, imm32) -> arith_rri32_rec i (IR rd) (IR rs) (Some (I32 imm32)) + | PArithRRI64 (i, rd, rs, imm64) -> arith_rri64_rec i (IR rd) (IR rs) (Some (I64 imm64)) + | PArithRRR (i, rd, rs1, rs2) -> arith_rrr_rec i (IR rd) (IR rs1) (IR rs2) + | PArithARR (i, rd, rs) -> arith_arr_rec i (IR rd) (IR rs) + (* Seems like single constant constructor types are elided *) + | PArithARRI32 (i, rd, rs, imm32) -> arith_arri32_rec i (IR rd) (IR rs) (Some (I32 imm32)) + | PArithARRI64 (i, rd, rs, imm64) -> arith_arri64_rec i (IR rd) (IR rs) (Some (I64 imm64)) + | PArithARRR (i, rd, rs1, rs2) -> arith_arrr_rec i (IR rd) (IR rs1) (IR rs2) + | PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false; + read_at_id = []; read_at_e1 = [] } + | PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false; + read_at_id = []; read_at_e1 = [] } + | PArithRF32 (rd, f) -> { inst = arith_rf32_real; write_locs = [Reg (IR rd)]; read_locs = []; + imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []} + | PArithRF64 (rd, f) -> { inst = arith_rf64_real; write_locs = [Reg (IR rd)]; read_locs = []; + imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []} + | PArithRR (i, rd, rs) -> arith_rr_rec i (IR rd) (IR rs) + | PArithR (i, rd) -> arith_r_rec i (IR rd) + +let load_rec i = match i with + | PLoadRRO (trap, i, rs1, rs2, imm) -> + { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false; + read_at_id = []; read_at_e1 = [] } + | PLoadQRRO(rs, ra, imm) -> + let (rs0, rs1) = gpreg_q_expand rs in + { inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false; + read_at_id = []; read_at_e1 = [] } + | PLoadORRO(rs, ra, imm) -> + let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in + { inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)]; + imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []} + | PLoadRRR (trap, i, rs1, rs2, rs3) | PLoadRRRXS (trap, i, rs1, rs2, rs3) -> + { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false; + read_at_id = []; read_at_e1 = [] } + +let store_rec i = match i with + | PStoreRRO (i, rs, ra, imm) -> + { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm)); + read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false} + | PStoreQRRO (rs, ra, imm) -> + let (rs0, rs1) = gpreg_q_expand rs in + { inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm)); + read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false} + | PStoreORRO (rs, ra, imm) -> + let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in + { inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)]; + imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false} + | PStoreRRR (i, rs, ra1, ra2) | PStoreRRRXS (i, rs, ra1, ra2) -> + { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None; + read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false} + +let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false; + read_at_id = []; read_at_e1 = [] } + +let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false; + read_at_id = [Reg (IR rs)]; read_at_e1 = [] } + +let basic_rec i = + match i with + | PArith i -> arith_rec i + | PLoad i -> load_rec i + | PStore i -> store_rec i + | Pallocframe (_, _) -> raise OpaqueInstruction + | Pfreeframe (_, _) -> raise OpaqueInstruction + | Pget (rd, rs) -> get_rec rd rs + | Pset (rd, rs) -> set_rec rd rs + | Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []} + +let expand_rec = function + | Pbuiltin _ -> raise OpaqueInstruction + +let ctl_flow_rec = function + | Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true; read_at_id = [Reg RA]; read_at_e1 = []} + | Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []} + | Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true; + read_at_id = [Reg (IR r)]; read_at_e1 = [] } + | Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []} + | Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true; + read_at_id = [Reg (IR r)]; read_at_e1 = [] } + | Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []} + | Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true; + read_at_id = [Reg (IR rs)]; read_at_e1 = [] } + | Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true; + read_at_id = [Reg (IR rs)]; read_at_e1 = [] } + | Pjumptable (r, _) -> raise OpaqueInstruction (* { inst = "Pjumptable"; write_locs = [Reg (IR GPR62); Reg (IR GPR63)]; read_locs = [Reg (IR r)]; imm = None ; is_control = true} *) + +let control_rec i = + match i with + | PExpand i -> expand_rec i + | PCtlFlow i -> ctl_flow_rec i + +let rec basic_recs body = match body with + | [] -> [] + | bi :: body -> (basic_rec bi) :: (basic_recs body) + +let exit_rec exit = match exit with + | None -> [] + | Some ex -> [control_rec ex] + +let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit) + +(** + * Providing informations relative to the real instructions + *) + +(** Abstraction providing all the necessary informations for solving the scheduling problem *) +type inst_info = { + write_locs : location list; + read_locs : location list; + reads_at_id : bool; + reads_at_e1 : bool; + is_control : bool; + usage: int array; (* resources consumed by the instruction *) + latency: int; +} + +(** Figuring out whether an immediate is s10, u27l10 or e27u27l10 *) +type imm_encoding = U6 | S10 | U27L5 | U27L10 | E27U27L10 + +let rec pow a = function + | 0 -> Int64.one + | 1 -> Int64.of_int a + | n -> let b = pow a (n/2) in + Int64.mul b (Int64.mul b (if n mod 2 = 0 then Int64.one else Int64.of_int a)) + +let signed_interval n : (int64 * int64) = begin + assert (n > 0); + let min = Int64.neg @@ pow 2 (n-1) + and max = Int64.sub (pow 2 (n-1)) Int64.one + in (min, max) +end + +let within i interv = match interv with (min, max) -> (i >= min && i <= max) + +let signed_length (i:int64) = + let rec f (i:int64) n = + let interv = signed_interval n + in if (within i interv) then n else f i (n+1) + in f i 1 + +let unsigned_length (i:int64) = (signed_length i) - 1 + +let encode_imm (imm:int64) = + if (Int64.compare imm Int64.zero < 0) then + let length = signed_length imm + in if length <= 10 then S10 + else if length <= 32 then U27L5 + else if length <= 37 then U27L10 + else if length <= 64 then E27U27L10 + else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm + else + let length = unsigned_length imm + in if length <= 6 then U6 + else if length <= 9 then S10 (* Special case for S10 - stay signed no matter what *) + else if length <= 32 then U27L5 + else if length <= 37 then U27L10 + else if length <= 64 then E27U27L10 + else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm + +(** Resources *) +type rname = Rissue | Rtiny | Rlite | Rfull | Rlsu | Rmau | Rbcu | Rtca | Rauxr | Rauxw | Rcrrp | Rcrwl | Rcrwh | Rnop + +let resource_names = [Rissue; Rtiny; Rlite; Rfull; Rlsu; Rmau; Rbcu; Rtca; Rauxr; Rauxw; Rcrrp; Rcrwl; Rcrwh; Rnop] + +let rec find_index elt l = + match l with + | [] -> raise Not_found + | e::l -> if (e == elt) then 0 + else 1 + find_index elt l + +let resource_id resource : int = find_index resource resource_names + +let resource_bound resource : int = + match resource with + | Rissue -> 8 + | Rtiny -> 4 + | Rlite -> 2 + | Rfull -> 1 + | Rlsu -> 1 + | Rmau -> 1 + | Rbcu -> 1 + | Rtca -> 1 + | Rauxr -> 1 + | Rauxw -> 1 + | Rcrrp -> 1 + | Rcrwl -> 1 + | Rcrwh -> 1 + | Rnop -> 4 + +let resource_bounds : int array = Array.of_list (List.map resource_bound resource_names) + +(** Reservation tables *) +let alu_full : int array = let resmap = fun r -> match r with + | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | Rfull -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let alu_lite : int array = let resmap = fun r -> match r with + | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let alu_lite_x : int array = let resmap = fun r -> match r with + | Rissue -> 2 | Rtiny -> 1 | Rlite -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let alu_lite_y : int array = let resmap = fun r -> match r with + | Rissue -> 3 | Rtiny -> 1 | Rlite -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let alu_nop : int array = let resmap = fun r -> 0 + in Array.of_list (List.map resmap resource_names) + +let alu_tiny : int array = let resmap = fun r -> match r with + | Rissue -> 1 | Rtiny -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let alu_tiny_x : int array = let resmap = fun r -> match r with + | Rissue -> 2 | Rtiny -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let alu_tiny_y : int array = let resmap = fun r -> match r with + | Rissue -> 3 | Rtiny -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let bcu : int array = let resmap = fun r -> match r with + | Rissue -> 1 | Rbcu -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let bcu_tiny_tiny_mau_xnop : int array = let resmap = fun r -> match r with + | Rissue -> 1 | Rtiny -> 2 | Rmau -> 1 | Rbcu -> 1 | Rnop -> 4 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let lsu_auxr : int array = let resmap = fun r -> match r with + | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let lsu_auxr_x : int array = let resmap = fun r -> match r with + | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let lsu_auxr_y : int array = let resmap = fun r -> match r with + | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let lsu_auxw : int array = let resmap = fun r -> match r with + | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let lsu_auxw_x : int array = let resmap = fun r -> match r with + | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let lsu_auxw_y : int array = let resmap = fun r -> match r with + | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let mau : int array = let resmap = fun r -> match r with + | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let mau_x : int array = let resmap = fun r -> match r with + | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let mau_y : int array = let resmap = fun r -> match r with + | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let mau_auxr : int array = let resmap = fun r -> match r with + | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let mau_auxr_x : int array = let resmap = fun r -> match r with + | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +let mau_auxr_y : int array = let resmap = fun r -> match r with + | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0 + in Array.of_list (List.map resmap resource_names) + +(** Real instructions *) + +exception InvalidEncoding + +let rec_to_usage r = + let encoding = match r.imm with None -> None | Some (I32 i) | Some (I64 i) -> Some (encode_imm @@ Z.to_int64 i) + | Some (Off ptr) -> Some (encode_imm @@ camlint64_of_ptrofs ptr) + + in match r.inst with + | Addw | Andw | Nandw | Orw | Norw | Sbfw | Xorw + | Nxorw | Andnw | Ornw -> + (match encoding with None | Some U6 | Some S10 -> alu_tiny + | Some U27L5 | Some U27L10 -> alu_tiny_x + | _ -> raise InvalidEncoding) + | Sbfxw | Sbfxd -> + (match encoding with None -> alu_lite + | Some U6 | Some S10 | Some U27L5 -> alu_lite_x + | _ -> raise InvalidEncoding) + | Addd | Andd | Nandd | Ord | Nord | Sbfd | Xord + | Nxord | Andnd | Ornd -> + (match encoding with None | Some U6 | Some S10 -> alu_tiny + | Some U27L5 | Some U27L10 -> alu_tiny_x + | Some E27U27L10 -> alu_tiny_y) + |Cmoved -> + (match encoding with None | Some U6 | Some S10 -> alu_lite + | Some U27L5 | Some U27L10 -> alu_lite_x + | Some E27U27L10 -> alu_lite_y) + | Addxw -> + (match encoding with None | Some U6 | Some S10 -> alu_lite + | Some U27L5 | Some U27L10 -> alu_lite_x + | _ -> raise InvalidEncoding) + | Addxd -> + (match encoding with None | Some U6 | Some S10 -> alu_lite + | Some U27L5 | Some U27L10 -> alu_lite_x + | Some E27U27L10 -> alu_lite_y) + | Compw -> (match encoding with None -> alu_tiny + | Some U6 | Some S10 | Some U27L5 -> alu_tiny_x + | _ -> raise InvalidEncoding) + | Compd -> (match encoding with None | Some U6 | Some S10 -> alu_tiny + | Some U27L5 | Some U27L10 -> alu_tiny_x + | Some E27U27L10 -> alu_tiny_y) + | Fcompw -> (match encoding with None -> alu_lite + | Some U6 | Some S10 | Some U27L5 -> alu_lite_x + | _ -> raise InvalidEncoding) + | Fcompd -> (match encoding with None -> alu_lite + | Some U6 | Some S10 | Some U27L5 -> alu_lite_x + | _ -> raise InvalidEncoding) + | Make -> (match encoding with Some U6 | Some S10 -> alu_tiny + | Some U27L5 | Some U27L10 -> alu_tiny_x + | Some E27U27L10 -> alu_tiny_y + | _ -> raise InvalidEncoding) + | Maddw | Msbfw -> (match encoding with None -> mau_auxr + | Some U6 | Some S10 | Some U27L5 -> mau_auxr_x + | _ -> raise InvalidEncoding) + | Maddd | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau_auxr + | Some U27L5 | Some U27L10 -> mau_auxr_x + | Some E27U27L10 -> mau_auxr_y) + | Mulw -> (match encoding with None -> mau + | Some U6 | Some S10 | Some U27L5 -> mau_x + | _ -> raise InvalidEncoding) + | Muld -> (match encoding with None | Some U6 | Some S10 -> mau + | Some U27L5 | Some U27L10 -> mau_x + | Some E27U27L10 -> mau_y) + | Nop -> alu_nop + | Sraw | Srlw | Sllw | Srad | Srld | Slld -> (match encoding with None | Some U6 -> alu_tiny | _ -> raise InvalidEncoding) + (* TODO: check *) + | Srsw | Srsd | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding) + | Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding) + | Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau + | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> + (match encoding with None | Some U6 | Some S10 -> lsu_auxw + | Some U27L5 | Some U27L10 -> lsu_auxw_x + | Some E27U27L10 -> lsu_auxw_y) + | Sb | Sh | Sw | Sd | Sq | So -> + (match encoding with None | Some U6 | Some S10 -> lsu_auxr + | Some U27L5 | Some U27L10 -> lsu_auxr_x + | Some E27U27L10 -> lsu_auxr_y) + | Icall | Call | Cb | Igoto | Goto | Ret | Set -> bcu + | Get -> bcu_tiny_tiny_mau_xnop + | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd + | Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite + | Fnarrowdw -> alu_full + | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw + | Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau + + +let inst_info_to_dlatency i = + begin + assert (not (i.reads_at_id && i.reads_at_e1)); + match i.reads_at_id with + | true -> +1 + | false -> (match i.reads_at_e1 with + | true -> -1 + | false -> 0) + end + +let real_inst_to_latency = function + | Nop -> 0 (* Only goes through ID *) + | Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw + (* TODO check rorw *) + | Rorw | Nandw | Norw | Nxorw | Ornw | Andnw + | Nandd | Nord | Nxord | Ornd | Andnd + | Addd | Andd | Compd | Ord | Sbfd | Sbfxd | Srad | Srsd | Srld | Slld | Xord | Make + | Extfs | Extfz | Insf | Fcompw | Fcompd | Cmoved | Addxw | Addxd + | Fmind | Fmaxd | Fminw | Fmaxw + -> 1 + | Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4 + | Mulw | Muld | Maddw | Maddd | Msbfw | Msbfd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *) + | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 3 + | Sb | Sh | Sw | Sd | Sq | So -> 1 (* See kvx-Optimization.pdf page 19 *) + | Get -> 1 + | Set -> 4 (* According to the manual should be 3, but I measured 4 *) + | Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *) + | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1 + | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw + | Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4 + | Finvw -> 15 + +let rec empty_inter la = function + | [] -> true + | b::lb -> if (List.mem b la) then false else empty_inter la lb + +let rec_to_info r : inst_info = + let usage = rec_to_usage r + and latency = real_inst_to_latency r.inst + and reads_at_id = not (empty_inter r.read_locs r.read_at_id) + and reads_at_e1 = not (empty_inter r.read_locs r.read_at_e1) + in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control; + reads_at_id = reads_at_id; reads_at_e1 = reads_at_e1 } + +let instruction_infos bb = List.map rec_to_info (instruction_recs bb) + +let instruction_usages bb = + let usages = List.map (fun info -> info.usage) (instruction_infos bb) + in Array.of_list usages + +(** + * Latency constraints building + *) + +(* type access = { inst: int; loc: location } *) + +let preg2int pr = Camlcoq.P.to_int @@ Asmblockdeps.ppos pr + +let loc2int = function + | Mem -> 1 + | Reg pr -> preg2int pr + +(* module HashedLoc = struct + type t = { loc: location; key: int } + let equal l1 l2 = (l1.key = l2.key) + let hash l = l.key + let create (l:location) : t = { loc=l; key = loc2int l } +end *) + +(* module LocHash = Hashtbl.Make(HashedLoc) *) +module LocHash = Hashtbl + +(* Hash table : location => list of instruction ids *) + +let rec intlist n = + if n < 0 then failwith "intlist: n < 0" + else if n = 0 then [] + else (n-1) :: (intlist (n-1)) + +let find_in_hash hashloc loc = + match LocHash.find_opt hashloc loc with + | Some idl -> idl + | None -> [] + +(* Returns a list of instruction ids *) +let rec get_accesses hashloc (ll: location list) = match ll with + | [] -> [] + | loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs) + +let compute_latency (ifrom: inst_info) (ito: inst_info) = + let dlat = inst_info_to_dlatency ito + in let lat = ifrom.latency + dlat + in assert (lat >= 0); if (lat == 0) then 1 else lat + +let latency_constraints bb = + let written = LocHash.create 70 + and read = LocHash.create 70 + and count = ref 0 + and constraints = ref [] + and instr_infos = instruction_infos bb + in let step (i: inst_info) = + let raw = get_accesses written i.read_locs + and waw = get_accesses written i.write_locs + and war = get_accesses read i.write_locs + in begin + List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; + latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) raw; + List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; + latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) waw; + List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = 0} :: !constraints) war; + if i.is_control then List.iter (fun n -> constraints := {instr_from = n; instr_to = !count; latency = 0} :: !constraints) (intlist !count); + (* Updating "read" and "written" hashmaps *) + List.iter (fun loc -> + begin + LocHash.replace written loc [!count]; + LocHash.replace read loc []; (* Clearing all the entries of "read" hashmap when a register is written *) + end) i.write_locs; + List.iter (fun loc -> LocHash.replace read loc ((!count) :: (find_in_hash read loc))) i.read_locs; + count := !count + 1 + end + in (List.iter step instr_infos; !constraints) + +(** + * Using the InstructionScheduler + *) + +let build_problem bb = +{ max_latency = -1; + resource_bounds = resource_bounds; + instruction_usages = instruction_usages bb; + latency_constraints = latency_constraints bb; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None +} + +let rec find_min_opt (l: int option list) = + match l with + | [] -> None + | e :: l -> + begin match find_min_opt l with + | None -> e + | Some m -> + begin match e with + | None -> Some m + | Some n -> if n < m then Some n else Some m + end + end + +let rec filter_indexes predicate = function + | [] -> [] + | e :: l -> if (predicate e) then e :: (filter_indexes predicate l) else filter_indexes predicate l + +let get_from_indexes indexes l = List.map (List.nth l) indexes + +let is_basic = function PBasic _ -> true | _ -> false +let is_control = function PControl _ -> true | _ -> false +let to_basic = function PBasic i -> i | _ -> failwith "to_basic: control instruction found" +let to_control = function PControl i -> i | _ -> failwith "to_control: basic instruction found" + +let bundlize li hd = + let last = List.nth li (List.length li - 1) + in if is_control last then + let cut_li = Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1) + in let bli = List.map to_basic cut_li + in { header = hd; body = bli; exit = Some (to_control last) } + else + let bli = List.map to_basic li + in { header = hd; body = bli; exit = None } + +let apply_pbasic b = PBasic b +let extract_some o = match o with Some e -> e | None -> failwith "extract_some: None found" + +let rec find_min = function + | [] -> None + | e :: l -> + match find_min l with + | None -> Some e + | Some m -> if (e < m) then Some e else Some m + +let rec remove_all m = function + | [] -> [] + | e :: l -> if m=e then remove_all m l + else e :: (remove_all m l) + +let rec find_mins l = match find_min l with + | None -> [] + | Some m -> m :: find_mins (remove_all m l) + +let find_all_indices m l = + let rec find m off = function + | [] -> [] + | e :: l -> if m=e then off :: find m (off+1) l + else find m (off+1) l + in find m 0 l + +module TimeHash = Hashtbl + +(* Hash table : time => list of instruction ids *) + +let hashtbl2list h maxint = + let rec f i = match TimeHash.find_opt h i with + | None -> if (i > maxint) then [] else (f (i+1)) + | Some bund -> bund :: (f (i+1)) + in f 0 + +let find_max l = + let rec f = function + | [] -> None + | e :: l -> match f l with + | None -> Some e + | Some m -> if (e > m) then Some e else Some m + in match (f l) with + | None -> raise Not_found + | Some m -> m + +(* [0, 2, 3, 1, 1, 2, 4, 5] -> [[0], [3, 4], [1, 5], [2], [6], [7]] *) +let minpack_list (l: int list) = + let timehash = TimeHash.create (List.length l) + in let rec f i = function + | [] -> () + | t::l -> begin + (match TimeHash.find_opt timehash t with + | None -> TimeHash.add timehash t [i] + | Some bund -> TimeHash.replace timehash t (bund @ [i])); + f (i+1) l + end + in begin + f 0 l; + hashtbl2list timehash (find_max l) + end;; + +(* let minpack_list l = + let mins = find_mins l + in List.map (fun m -> find_all_indices m l) mins + *) + +let bb_to_instrs bb = (List.map apply_pbasic bb.body) @ (match bb.exit with None -> [] | Some e -> [PControl e]) + +let bundlize_solution bb sol = + let tmp = (Array.to_list @@ Array.sub sol 0 (Array.length sol - 1)) + in let packs = minpack_list tmp + and instrs = bb_to_instrs bb + in let rec bund hd = function + | [] -> [] + | pack :: packs -> bundlize (get_from_indexes pack instrs) hd :: (bund [] packs) + in bund bb.header packs + +let print_inst oc = function + | Asm.Pallocframe(sz, ofs) -> fprintf oc " Pallocframe\n" + | Asm.Pfreeframe(sz, ofs) -> fprintf oc " Pfreeframe\n" + | Asm.Pbuiltin(ef, args, res) -> fprintf oc " Pbuiltin\n" + | Asm.Pcvtl2w(rd, rs) -> fprintf oc " Pcvtl2w %a = %a\n" ireg rd ireg rs + | i -> print_instruction oc i + +let print_bb oc bb = + let asm_instructions = Asm.unfold_bblock bb + in List.iter (print_inst oc) asm_instructions + +let print_schedule sched = + print_string "[ "; + Array.iter (fun x -> Printf.printf "%d; " x) sched; + print_endline "]";; + +let do_schedule bb = + let problem = build_problem bb in + (if debug then print_problem stdout problem); + let solution = scheduler_by_name (!Clflags.option_fpostpass_sched) problem + in match solution with + | None -> failwith "Could not find a valid schedule" + | Some sol -> + ((if debug then print_schedule sol); + let bundles = bundlize_solution bb sol in + (if debug then + begin + Printf.eprintf "Scheduling the following group of instructions:\n"; + print_bb stderr bb; + Printf.eprintf "Gave the following solution:\n"; + List.iter (print_bb stderr) bundles; + Printf.eprintf "--------------------------------\n" + end; + bundles)) + +(** + * Dumb schedule if the above doesn't work + *) + +let bundlize_label l = + match l with + | [] -> [] + | l -> [{ header = l; body = []; exit = None }] + +let rec bundlize_basic l = + match l with + | [] -> [] + | b :: l -> { header = []; body = [b]; exit = None } :: bundlize_basic l + +let bundlize_exit e = + match e with + | Some e -> [{ header = []; body = []; exit = Some e }] + | None -> [] + +let dumb_schedule (bb : bblock) : bblock list = bundlize_label bb.header @ bundlize_basic bb.body @ bundlize_exit bb.exit + +(** + * Separates the opaque instructions such as Pfreeframe and Pallocframe + *) + +let is_opaque = function + | PBasic (Pallocframe _) | PBasic (Pfreeframe _) | PControl (PExpand (Pbuiltin _)) -> true + | _ -> false + +(* Returns : (accumulated instructions, remaining instructions, opaque instruction if found) *) +let rec biggest_wo_opaque = function + | [] -> ([], [], None) + | i :: li -> if is_opaque i then ([], li, Some i) + else let big, rem, opaque = biggest_wo_opaque li in (i :: big, rem, opaque);; + +let separate_opaque bb = + let instrs = bb_to_instrs bb + in let rec f hd li = + match li with + | [] -> [] + | li -> let big, rem, opaque = biggest_wo_opaque li in + match opaque with + | Some i -> + (match big with + | [] -> (bundlize [i] hd) :: (f [] rem) + | big -> (bundlize big hd) :: (bundlize [i] []) :: (f [] rem) + ) + | None -> (bundlize big hd) :: (f [] rem) + in f bb.header instrs + +let smart_schedule bb = + let lbb = separate_opaque bb + in let rec f = function + | [] -> [] + | bb :: lbb -> + let bundles = + try do_schedule bb + with OpaqueInstruction -> dumb_schedule bb + | e -> + let msg = Printexc.to_string e + and stack = Printexc.get_backtrace () + in begin + Printf.eprintf "In regards to this group of instructions:\n"; + print_bb stderr bb; + Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg stack; + failwith "Invalid schedule" + (* + Printf.eprintf "Issuing one instruction per bundle instead\n\n"; + dumb_schedule bb + *) + end + in bundles @ (f lbb) + in f lbb + +let bblock_to_bundles bb = + if debug then (eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"; print_bb stderr bb); + (* print_problem (build_problem bb); *) + if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb + +(** To deal with the Coq Axiom schedule : bblock -> (list (list basic)) * option control *) + +let rec bundles_to_coq_schedule = function + | [] -> ([], None) + | bb :: [] -> ([bb.body], bb.exit) + | bb :: lbb -> let (llb, oc) = bundles_to_coq_schedule lbb in (bb.body :: llb, oc) + +(** Called schedule function from Coq *) + +let schedule_notime bb = let toto = bundles_to_coq_schedule @@ bblock_to_bundles bb in toto +let schedule bb = Timing.time_coq ('P'::('o'::('s'::('t'::('p'::('a'::('s'::('s'::('S'::('c'::('h'::('e'::('d'::('u'::('l'::('i'::('n'::('g'::(' '::('o'::('r'::('a'::('c'::('l'::('e'::([])))))))))))))))))))))))))) schedule_notime bb diff --git a/kvx/PostpassSchedulingproof.v b/kvx/PostpassSchedulingproof.v new file mode 100644 index 00000000..937b3be6 --- /dev/null +++ b/kvx/PostpassSchedulingproof.v @@ -0,0 +1,690 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib Errors. +Require Import Integers Floats AST Linking. +Require Import Values Memory Events Globalenvs Smallstep. +Require Import Op Locations Machblock Conventions Asmblock. +Require Import Asmblockgenproof0 Asmblockprops. +Require Import PostpassScheduling. +Require Import Asmblockgenproof. +Require Import Axioms. +Require Import Lia. + +Local Open Scope error_monad_scope. + +Definition match_prog (p tp: Asmvliw.program) := + match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp. + +Lemma transf_program_match: + forall p tp, transf_program p = OK tp -> match_prog p tp. +Proof. + intros. eapply match_transform_partial_program; eauto. +Qed. + +Lemma regset_double_set_id: + forall r (rs: regset) v1 v2, + (rs # r <- v1 # r <- v2) = (rs # r <- v2). +Proof. + intros. apply functional_extensionality. intros. destruct (preg_eq r x). + - subst r. repeat (rewrite Pregmap.gss; auto). + - repeat (rewrite Pregmap.gso); auto. +Qed. + +Lemma exec_body_pc_var: + forall l ge rs m rs' m' v, + exec_body ge l rs m = Next rs' m' -> + exec_body ge l (rs # PC <- v) m = Next (rs' # PC <- v) m'. +Proof. + induction l. + - intros. simpl. simpl in H. inv H. auto. + - intros. simpl in *. + destruct (exec_basic_instr ge a rs m) eqn:EXEBI; try discriminate. + erewrite exec_basic_instr_pc_var; eauto. +Qed. + +Lemma pc_set_add: + forall rs v r x y, + 0 <= x <= Ptrofs.max_unsigned -> + 0 <= y <= Ptrofs.max_unsigned -> + rs # r <- (Val.offset_ptr v (Ptrofs.repr (x + y))) = rs # r <- (Val.offset_ptr (rs # r <- (Val.offset_ptr v (Ptrofs.repr x)) r) (Ptrofs.repr y)). +Proof. + intros. apply functional_extensionality. intros r0. destruct (preg_eq r r0). + - subst. repeat (rewrite Pregmap.gss); auto. + destruct v; simpl; auto. + rewrite Ptrofs.add_assoc. + enough (Ptrofs.repr (x + y) = Ptrofs.add (Ptrofs.repr x) (Ptrofs.repr y)) as ->; auto. + unfold Ptrofs.add. + enough (x + y = Ptrofs.unsigned (Ptrofs.repr x) + Ptrofs.unsigned (Ptrofs.repr y)) as ->; auto. + repeat (rewrite Ptrofs.unsigned_repr); auto. + - repeat (rewrite Pregmap.gso; auto). +Qed. + +Lemma concat2_straight: + forall a b bb rs m rs'' m'' f ge, + concat2 a b = OK bb -> + exec_bblock ge f bb rs m = Next rs'' m'' -> + exists rs' m', + exec_bblock ge f a rs m = Next rs' m' + /\ rs' PC = Val.offset_ptr (rs PC) (Ptrofs.repr (size a)) + /\ exec_bblock ge f b rs' m' = Next rs'' m''. +Proof. + intros until ge. intros CONC2 EXEB. + exploit concat2_zlt_size; eauto. intros (LTA & LTB). + exploit concat2_noexit; eauto. intros EXA. + exploit concat2_decomp; eauto. intros. inv H. + unfold exec_bblock in EXEB. destruct (exec_body ge (body bb) rs m) eqn:EXEB'; try discriminate. + rewrite H0 in EXEB'. apply exec_body_app in EXEB'. destruct EXEB' as (rs1 & m1 & EXEB1 & EXEB2). + eexists; eexists. split. + unfold exec_bblock. rewrite EXEB1. rewrite EXA. simpl. eauto. + split. + exploit exec_body_pc. eapply EXEB1. intros. rewrite <- H. auto. + unfold exec_bblock. unfold nextblock, incrPC. rewrite regset_same_assign. erewrite exec_body_pc_var; eauto. + rewrite <- H1. unfold nextblock in EXEB. rewrite regset_double_set_id. + assert (size bb = size a + size b). + { unfold size. rewrite H0. rewrite H1. rewrite app_length. rewrite EXA. simpl. rewrite Nat.add_0_r. + repeat (rewrite Nat2Z.inj_add). lia. } + clear EXA H0 H1. rewrite H in EXEB. + assert (rs1 PC = rs0 PC). { apply exec_body_pc in EXEB2. auto. } + rewrite H0. rewrite <- pc_set_add; auto. + exploit size_positive. instantiate (1 := a). intro. lia. + exploit size_positive. instantiate (1 := b). intro. lia. +Qed. + +Lemma concat_all_exec_bblock (ge: Genv.t fundef unit) (f: function) : + forall a bb rs m lbb rs'' m'', + lbb <> nil -> + concat_all (a :: lbb) = OK bb -> + exec_bblock ge f bb rs m = Next rs'' m'' -> + exists bb' rs' m', + concat_all lbb = OK bb' + /\ exec_bblock ge f a rs m = Next rs' m' + /\ rs' PC = Val.offset_ptr (rs PC) (Ptrofs.repr (size a)) + /\ exec_bblock ge f bb' rs' m' = Next rs'' m''. +Proof. + intros until m''. intros Hnonil CONC EXEB. + simpl in CONC. + destruct lbb as [|b lbb]; try contradiction. clear Hnonil. + monadInv CONC. exploit concat2_straight; eauto. intros (rs' & m' & EXEB1 & PCeq & EXEB2). + exists x. repeat econstructor. all: eauto. +Qed. + +Lemma ptrofs_add_repr : + forall a b, + Ptrofs.unsigned (Ptrofs.add (Ptrofs.repr a) (Ptrofs.repr b)) = Ptrofs.unsigned (Ptrofs.repr (a + b)). +Proof. + intros a b. + rewrite Ptrofs.add_unsigned. repeat (rewrite Ptrofs.unsigned_repr_eq). + rewrite <- Zplus_mod. auto. +Qed. + +Section PRESERVATION_ASMBLOCK. + +Variables prog tprog: program. +Hypothesis TRANSL: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma transf_function_no_overflow: + forall f tf, + transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned. +Proof. + intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. + lia. +Qed. + +Lemma symbols_preserved: + forall id, + Genv.find_symbol tge id = Genv.find_symbol ge id. +Proof (Genv.find_symbol_match TRANSL). + +Lemma senv_preserved: + Senv.equiv ge tge. +Proof (Genv.senv_match TRANSL). + +Lemma functions_translated: + forall v f, + Genv.find_funct ge v = Some f -> + exists tf, + Genv.find_funct tge v = Some tf /\ transf_fundef f = OK tf. +Proof (Genv.find_funct_transf_partial TRANSL). + +Lemma function_ptr_translated: + forall v f, + Genv.find_funct_ptr ge v = Some f -> + exists tf, + Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf. +Proof (Genv.find_funct_ptr_transf_partial TRANSL). + +Lemma functions_transl: + forall fb f tf, + Genv.find_funct_ptr ge fb = Some (Internal f) -> + transf_function f = OK tf -> + Genv.find_funct_ptr tge fb = Some (Internal tf). +Proof. + intros. exploit function_ptr_translated; eauto. + intros (tf' & A & B). monadInv B. rewrite H0 in EQ. inv EQ. auto. +Qed. + +Inductive match_states: state -> state -> Prop := + | match_states_intro: + forall s1 s2, s1 = s2 -> match_states s1 s2. + +Lemma prog_main_preserved: + prog_main tprog = prog_main prog. +Proof (match_program_main TRANSL). + +Lemma prog_main_address_preserved: + (Genv.symbol_address (Genv.globalenv prog) (prog_main prog) Ptrofs.zero) = + (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero). +Proof. + unfold Genv.symbol_address. rewrite symbols_preserved. + rewrite prog_main_preserved. auto. +Qed. + +Lemma transf_initial_states: + forall st1, initial_state prog st1 -> + exists st2, initial_state tprog st2 /\ match_states st1 st2. +Proof. + intros. inv H. + econstructor; split. + - eapply initial_state_intro. + eapply (Genv.init_mem_transf_partial TRANSL); eauto. + - econstructor; eauto. subst ge0. subst rs0. rewrite prog_main_address_preserved. auto. +Qed. + +Lemma transf_final_states: + forall st1 st2 r, + match_states st1 st2 -> final_state st1 r -> final_state st2 r. +Proof. + intros. inv H0. inv H. econstructor; eauto. +Qed. + +Lemma tail_find_bblock: + forall lbb pos bb, + find_bblock pos lbb = Some bb -> + exists c, code_tail pos lbb (bb::c). +Proof. + induction lbb. + - intros. simpl in H. inv H. + - intros. simpl in H. + destruct (zlt pos 0); try (inv H; fail). + destruct (zeq pos 0). + + inv H. exists lbb. constructor; auto. + + apply IHlbb in H. destruct H as (c & TAIL). exists c. + enough (pos = pos - size a + size a) as ->. + apply code_tail_S; auto. + lia. +Qed. + +Lemma code_tail_head_app: + forall l pos c1 c2, + code_tail pos c1 c2 -> + code_tail (pos + size_blocks l) (l++c1) c2. +Proof. + induction l. + - intros. simpl. rewrite Z.add_0_r. auto. + - intros. apply IHl in H. simpl. rewrite (Z.add_comm (size a)). rewrite Z.add_assoc. apply code_tail_S. assumption. +Qed. + +Lemma transf_blocks_verified: + forall c tc pos bb c', + transf_blocks c = OK tc -> + code_tail pos c (bb::c') -> + exists lbb, + verified_schedule bb = OK lbb + /\ exists tc', code_tail pos tc (lbb ++ tc'). +Proof. + induction c; intros. + - simpl in H. inv H. inv H0. + - inv H0. + + monadInv H. exists x0. + split; simpl; auto. eexists; eauto. econstructor; eauto. + + unfold transf_blocks in H. fold transf_blocks in H. monadInv H. + exploit IHc; eauto. + intros (lbb & TRANS & tc' & TAIL). +(* monadInv TRANS. *) + repeat eexists; eauto. + erewrite verified_schedule_size; eauto. + apply code_tail_head_app. + eauto. +Qed. + +Lemma transf_find_bblock: + forall ofs f bb tf, + find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb -> + transf_function f = OK tf -> + exists lbb, + verified_schedule bb = OK lbb + /\ exists c, code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (lbb ++ c). +Proof. + intros. + monadInv H0. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); try (inv EQ0; fail). inv EQ0. + monadInv EQ. apply tail_find_bblock in H. destruct H as (c & TAIL). + eapply transf_blocks_verified; eauto. +Qed. + +Lemma symbol_address_preserved: + forall l ofs, Genv.symbol_address ge l ofs = Genv.symbol_address tge l ofs. +Proof. + intros. unfold Genv.symbol_address. repeat (rewrite symbols_preserved). reflexivity. +Qed. + +Lemma head_tail {A: Type}: + forall (l: list A) hd, hd::l = hd :: (tail (hd::l)). +Proof. + intros. simpl. auto. +Qed. + +Lemma verified_schedule_not_empty: + forall bb lbb, + verified_schedule bb = OK lbb -> lbb <> nil. +Proof. + intros. apply verified_schedule_size in H. + pose (size_positive bb). assert (size_blocks lbb > 0) by lia. clear H g. + destruct lbb; simpl in *; discriminate. +Qed. + +Lemma header_nil_label_pos_none: + forall lbb l p, + Forall (fun b => header b = nil) lbb -> label_pos l p lbb = None. +Proof. + induction lbb. + - intros. simpl. auto. + - intros. inv H. simpl. unfold is_label. rewrite H2. destruct (in_dec l nil). { inv i. } + auto. +Qed. + +Lemma verified_schedule_label: + forall bb tbb lbb l, + verified_schedule bb = OK (tbb :: lbb) -> + is_label l bb = is_label l tbb + /\ label_pos l 0 lbb = None. +Proof. + intros. exploit verified_schedule_header; eauto. + intros (HdrEq & HdrNil). + split. + - unfold is_label. rewrite HdrEq. reflexivity. + - apply header_nil_label_pos_none. assumption. +Qed. + +Lemma label_pos_app_none: + forall c c' l p p', + label_pos l p c = None -> + label_pos l (p' + size_blocks c) c' = label_pos l p' (c ++ c'). +Proof. + induction c. + - intros. simpl in *. rewrite Z.add_0_r. reflexivity. + - intros. simpl in *. destruct (is_label _ _) eqn:ISLABEL. + + discriminate. + + eapply IHc in H. rewrite Z.add_assoc. eauto. +Qed. + +Remark label_pos_pvar_none_add: + forall tc l p p' k, + label_pos l (p+k) tc = None -> label_pos l (p'+k) tc = None. +Proof. + induction tc. + - intros. simpl. auto. + - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL. + + discriminate. + + pose (IHtc l p p' (k + size a)). repeat (rewrite Z.add_assoc in e). auto. +Qed. + +Lemma label_pos_pvar_none: + forall tc l p p', + label_pos l p tc = None -> label_pos l p' tc = None. +Proof. + intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1. + eapply label_pos_pvar_none_add; eauto. +Qed. + +Remark label_pos_pvar_some_add_add: + forall tc l p p' k k', + label_pos l (p+k') tc = Some (p+k) -> label_pos l (p'+k') tc = Some (p'+k). +Proof. + induction tc. + - intros. simpl in H. discriminate. + - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL. + + inv H. assert (k = k') by lia. subst. reflexivity. + + pose (IHtc l p p' k (k' + size a)). repeat (rewrite Z.add_assoc in e). auto. +Qed. + +Lemma label_pos_pvar_some_add: + forall tc l p p' k, + label_pos l p tc = Some (p+k) -> label_pos l p' tc = Some (p'+k). +Proof. + intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1. + eapply label_pos_pvar_some_add_add; eauto. +Qed. + +Remark label_pos_pvar_add: + forall c tc l p p' k, + label_pos l (p+k) c = label_pos l p tc -> + label_pos l (p'+k) c = label_pos l p' tc. +Proof. + induction c. + - intros. simpl in *. + exploit label_pos_pvar_none; eauto. + - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL. + + exploit label_pos_pvar_some_add; eauto. + + pose (IHc tc l p p' (k+size a)). repeat (rewrite Z.add_assoc in e). auto. +Qed. + +Lemma label_pos_pvar: + forall c tc l p p', + label_pos l p c = label_pos l p tc -> + label_pos l p' c = label_pos l p' tc. +Proof. + intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1. + eapply label_pos_pvar_add; eauto. +Qed. + +Lemma label_pos_head_app: + forall c bb lbb l tc p, + verified_schedule bb = OK lbb -> + label_pos l p c = label_pos l p tc -> + label_pos l p (bb :: c) = label_pos l p (lbb ++ tc). +Proof. + intros. simpl. destruct lbb as [|tbb lbb]. + - apply verified_schedule_not_empty in H. contradiction. + - simpl. exploit verified_schedule_label; eauto. intros (ISLBL & LBLPOS). + rewrite ISLBL. + destruct (is_label l tbb) eqn:ISLBL'; simpl; auto. + eapply label_pos_pvar in H0. erewrite H0. + erewrite verified_schedule_size; eauto. simpl size_blocks. rewrite Z.add_assoc. + erewrite label_pos_app_none; eauto. +Qed. + +Lemma label_pos_preserved: + forall c tc l, + transf_blocks c = OK tc -> label_pos l 0 c = label_pos l 0 tc. +Proof. + induction c. + - intros. simpl in *. inv H. reflexivity. + - intros. unfold transf_blocks in H; fold transf_blocks in H. monadInv H. eapply IHc in EQ. + eapply label_pos_head_app; eauto. +Qed. + +Lemma label_pos_preserved_blocks: + forall l f tf, + transf_function f = OK tf -> + label_pos l 0 (fn_blocks f) = label_pos l 0 (fn_blocks tf). +Proof. + intros. monadInv H. monadInv EQ. + destruct (zlt Ptrofs.max_unsigned _); try discriminate. + monadInv EQ0. simpl. eapply label_pos_preserved; eauto. +Qed. + +Lemma transf_exec_control: + forall f tf ex rs m, + transf_function f = OK tf -> + exec_control ge f ex rs m = exec_control tge tf ex rs m. +Proof. + intros. destruct ex; simpl; auto. + assert (ge = Genv.globalenv prog). auto. + assert (tge = Genv.globalenv tprog). auto. + pose symbol_address_preserved. + exploreInst; simpl; auto; try congruence; + unfold par_goto_label; unfold par_eval_branch; unfold par_goto_label; erewrite label_pos_preserved_blocks; eauto. +Qed. + +Lemma transf_exec_basic_instr: + forall i rs m, exec_basic_instr ge i rs m = exec_basic_instr tge i rs m. +Proof. + intros. pose symbol_address_preserved. + unfold exec_basic_instr. unfold bstep. exploreInst; simpl; auto; try congruence. + unfold parexec_arith_instr; unfold arith_eval_r; exploreInst; simpl; auto; try congruence. +Qed. + +Lemma transf_exec_body: + forall bdy rs m, exec_body ge bdy rs m = exec_body tge bdy rs m. +Proof. + induction bdy; intros. + - simpl. reflexivity. + - simpl. rewrite transf_exec_basic_instr. + destruct (exec_basic_instr _ _ _); auto. +Qed. + +Lemma transf_exec_bblock: + forall f tf bb rs m, + transf_function f = OK tf -> + exec_bblock ge f bb rs m = exec_bblock tge tf bb rs m. +Proof. + intros. unfold exec_bblock. rewrite transf_exec_body. destruct (exec_body _ _ _ _); auto. + eapply transf_exec_control; eauto. +Qed. + +Lemma transf_step_simu: + forall tf b lbb ofs c tbb rs m rs' m', + Genv.find_funct_ptr tge b = Some (Internal tf) -> + size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned -> + rs PC = Vptr b ofs -> + code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (lbb ++ c) -> + concat_all lbb = OK tbb -> + exec_bblock tge tf tbb rs m = Next rs' m' -> + plus step tge (State rs m) E0 (State rs' m'). +Proof. + induction lbb. + - intros until m'. simpl. intros. discriminate. + - intros until m'. intros GFIND SIZE PCeq TAIL CONC EXEB. + destruct lbb. + + simpl in *. clear IHlbb. inv CONC. eapply plus_one. econstructor; eauto. eapply find_bblock_tail; eauto. + + exploit concat_all_exec_bblock; eauto; try discriminate. + intros (tbb0 & rs0 & m0 & CONC0 & EXEB0 & PCeq' & EXEB1). + eapply plus_left. + econstructor. + 3: eapply find_bblock_tail. rewrite <- app_comm_cons in TAIL. 3: eauto. + all: eauto. + eapply plus_star. eapply IHlbb; eauto. rewrite PCeq in PCeq'. simpl in PCeq'. all: eauto. + eapply code_tail_next_int; eauto. +Qed. + +Theorem transf_step_correct: + forall s1 t s2, step ge s1 t s2 -> + forall s1' (MS: match_states s1 s1'), + (exists s2', plus step tge s1' t s2' /\ match_states s2 s2'). +Proof. + induction 1; intros; inv MS. + - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF. + exploit transf_find_bblock; eauto. intros (lbb & VES & c & TAIL). + exploit verified_schedule_correct; eauto. intros (tbb & CONC & BBEQ). inv CONC. rename H3 into CONC. + assert (NOOV: size_blocks x.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + + erewrite transf_exec_bblock in H2; eauto. + unfold bblock_simu in BBEQ. rewrite BBEQ in H2; try congruence. + exists (State rs' m'). split; try (constructor; auto). + eapply transf_step_simu; eauto. + + - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF. + exploit transf_find_bblock; eauto. intros (lbb & VES & c & TAIL). + exploit verified_schedule_builtin_idem; eauto. intros. subst lbb. + + remember (State (nextblock _ _) _) as s'. exists s'. + split; try constructor; auto. + eapply plus_one. subst s'. + eapply exec_step_builtin. + 3: eapply find_bblock_tail. simpl in TAIL. 3: eauto. + all: eauto. + eapply eval_builtin_args_preserved with (ge1 := ge). exact symbols_preserved. eauto. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. + + - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF. + remember (State _ m') as s'. exists s'. split; try constructor; auto. + subst s'. eapply plus_one. eapply exec_step_external; eauto. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. +Qed. + +Theorem transf_program_correct_Asmblock: + forward_simulation (Asmblock.semantics prog) (Asmblock.semantics tprog). +Proof. + eapply forward_simulation_plus. + - apply senv_preserved. + - apply transf_initial_states. + - apply transf_final_states. + - apply transf_step_correct. +Qed. + +End PRESERVATION_ASMBLOCK. + +Require Import Asmvliw. + +Lemma verified_par_checks_alls_bundles lb x: forall bundle, + verify_par lb = OK x -> + List.In bundle lb -> verify_par_bblock bundle = OK tt. +Proof. + induction lb; simpl; try tauto. + intros bundle H; monadInv H. + destruct 1; subst; eauto. + destruct x0; auto. +Qed. + +Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle: + verified_schedule_nob bb = OK lb -> + List.In bundle lb -> verify_par_bblock bundle = OK tt. +Proof. + unfold verified_schedule_nob. intros H; + monadInv H. destruct x4. + intros; eapply verified_par_checks_alls_bundles; eauto. +Qed. + +Lemma verify_par_bblock_PExpand bb i: + exit bb = Some (PExpand i) -> verify_par_bblock bb = OK tt. +Proof. + destruct bb as [h bdy ext H]; simpl. + intros; subst. destruct i. + generalize H. + rewrite <- wf_bblock_refl in H. + destruct H as [H H0]. + unfold builtin_alone in H0. erewrite H0; eauto. +Qed. + +Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core. + +Lemma verified_schedule_checks_alls_bundles bb lb bundle: + verified_schedule bb = OK lb -> + List.In bundle lb -> verify_par_bblock bundle = OK tt. +Proof. + unfold verified_schedule. remember (exit bb) as exb. + destruct exb as [c|]; eauto. + destruct c as [i|]; eauto. + destruct i; intros H. inversion_clear H; simpl. + intuition subst. + intros; eapply verify_par_bblock_PExpand; eauto. +Qed. + +Lemma transf_blocks_checks_all_bundles lbb: forall lb bundle, + transf_blocks lbb = OK lb -> + List.In bundle lb -> verify_par_bblock bundle = OK tt. +Proof. + induction lbb; simpl. + - intros lb bundle H; inversion_clear H. simpl; try tauto. + - intros lb bundle H0. + monadInv H0. + rewrite in_app. destruct 1; eauto. + eapply verified_schedule_checks_alls_bundles; eauto. +Qed. + +Lemma find_bblock_Some_in lb: + forall ofs b, find_bblock ofs lb = Some b -> List.In b lb. +Proof. + induction lb; simpl; try congruence. + intros ofs b. + destruct (zlt ofs 0); try congruence. + destruct (zeq ofs 0); eauto. + intros X; inversion X; eauto. +Qed. + +Section PRESERVATION_ASMVLIW. + +Variables prog tprog: program. +Hypothesis TRANSL: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma all_bundles_are_checked b ofs f bundle: + Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) -> + find_bblock ofs (fn_blocks f) = Some bundle -> + verify_par_bblock bundle = OK tt. +Proof. + unfold match_prog, match_program in TRANSL. + unfold Genv.find_funct_ptr; simpl; intros X. + destruct (Genv.find_def_match_2 TRANSL b) as [|f0 y H]; try congruence. + destruct y as [tf0|]; try congruence. + inversion X as [H1]. subst. clear X. + remember (@Gfun fundef unit (Internal f)) as f2. + destruct H as [ctx' f1 f2 H0|]; try congruence. + inversion Heqf2 as [H2]. subst; clear Heqf2. + unfold transf_fundef, transf_partial_fundef in H. + destruct f1 as [f1|f1]; try congruence. + unfold transf_function, transl_function in H. + monadInv H. monadInv EQ. + destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks _))); simpl in *|-; try congruence. + injection EQ1; intros; subst. + monadInv EQ0. simpl in * |-. + intros; exploit transf_blocks_checks_all_bundles; eauto. + intros; eapply find_bblock_Some_in; eauto. +Qed. + +Lemma checked_bundles_are_parexec_equiv f bundle rs rs' m m': + exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' -> + verify_par_bblock bundle = OK tt -> + det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'. +Proof. + intros. unfold verify_par_bblock in H0. destruct (Asmblockdeps.bblock_para_check _) eqn:BPC; try discriminate. clear H0. + simpl in H. + eapply Asmblockdeps.bblock_para_check_correct; eauto. +Qed. + +Lemma seqexec_parexec_equiv b ofs f bundle rs rs' m m': + Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) -> + find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle -> + exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' -> + det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'. +Proof. + intros; eapply checked_bundles_are_parexec_equiv; eauto. + eapply all_bundles_are_checked; eauto. +Qed. + +Theorem transf_program_correct_Asmvliw: + forward_simulation (Asmblock.semantics tprog) (Asmvliw.semantics tprog). +Proof. + eapply forward_simulation_step with (match_states:=fun (s1:Asmvliw.state) s2 => s1=s2); eauto. + - intros; subst; auto. + - intros s1 t s1' H s2 H0; subst; inversion H; clear H; subst; eexists; split; eauto. + + eapply exec_step_internal; eauto. + intros; eapply seqexec_parexec_equiv; eauto. + + eapply exec_step_builtin; eauto. + + eapply exec_step_external; eauto. +Qed. + +End PRESERVATION_ASMVLIW. + +Section PRESERVATION. + +Variables prog tprog: program. +Hypothesis TRANSL: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Theorem transf_program_correct: + forward_simulation (Asmblock.semantics prog) (Asmvliw.semantics tprog). +Proof. + eapply compose_forward_simulations. + eapply transf_program_correct_Asmblock; eauto. + eapply transf_program_correct_Asmvliw; eauto. +Qed. + +End PRESERVATION. diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml new file mode 120000 index 00000000..912e9ffa --- /dev/null +++ b/kvx/PrepassSchedulingOracle.ml @@ -0,0 +1 @@ +../aarch64/PrepassSchedulingOracle.ml
\ No newline at end of file diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml new file mode 120000 index 00000000..1e955b85 --- /dev/null +++ b/kvx/PrepassSchedulingOracleDeps.ml @@ -0,0 +1 @@ +../aarch64/PrepassSchedulingOracleDeps.ml
\ No newline at end of file diff --git a/kvx/PrintOp.ml b/kvx/PrintOp.ml new file mode 100644 index 00000000..da7d6c32 --- /dev/null +++ b/kvx/PrintOp.ml @@ -0,0 +1,229 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Pretty-printing of operators, conditions, addressing modes *) + +open Printf +open Camlcoq +open Integers +open Op +open ExtValues + +let comparison_name = function + | Ceq -> "==" + | Cne -> "!=" + | Clt -> "<" + | Cle -> "<=" + | Cgt -> ">" + | Cge -> ">=" + +let print_condition reg pp = function + | (Ccomp c, [r1;r2]) -> + fprintf pp "%a %ss %a" reg r1 (comparison_name c) reg r2 + | (Ccompu c, [r1;r2]) -> + fprintf pp "%a %su %a" reg r1 (comparison_name c) reg r2 + | (Ccompimm(c, n), [r1]) -> + fprintf pp "%a %ss %ld" reg r1 (comparison_name c) (camlint_of_coqint n) + | (Ccompuimm(c, n), [r1]) -> + fprintf pp "%a %su %ld" reg r1 (comparison_name c) (camlint_of_coqint n) + | (Ccompf c, [r1;r2]) -> + fprintf pp "%a %sf %a" reg r1 (comparison_name c) reg r2 + | (Ccompl c, [r1;r2]) -> + fprintf pp "%a %sls %a" reg r1 (comparison_name c) reg r2 + | (Ccomplu c, [r1;r2]) -> + fprintf pp "%a %slu %a" reg r1 (comparison_name c) reg r2 + | (Ccomplimm(c, n), [r1]) -> + fprintf pp "%a %sls %Ld" reg r1 (comparison_name c) (camlint64_of_coqint n) + | (Ccompluimm(c, n), [r1]) -> + fprintf pp "%a %slu %Lu" reg r1 (comparison_name c) (camlint64_of_coqint n) + | (Cnotcompf c, [r1;r2]) -> + fprintf pp "%a not(%sf) %a" reg r1 (comparison_name c) reg r2 + | (Ccompfs c, [r1;r2]) -> + fprintf pp "%a %sfs %a" reg r1 (comparison_name c) reg r2 + | (Cnotcompfs c, [r1;r2]) -> + fprintf pp "%a not(%sfs) %a" reg r1 (comparison_name c) reg r2 + | _ -> + fprintf pp "<bad condition>" + +let print_condition0 reg pp cond0 rc = + match cond0 with + | Ccomp0 c -> fprintf pp "%a %ss 0" reg rc (comparison_name c) + | Ccompu0 c -> fprintf pp "%a %su 0" reg rc (comparison_name c) + | Ccompl0 c -> fprintf pp "%a %ss 0" reg rc (comparison_name c) + | Ccomplu0 c -> fprintf pp "%a %su 0" reg rc (comparison_name c) + +let int_of_s14 = function + | SHIFT1 -> 1 + | SHIFT2 -> 2 + | SHIFT3 -> 3 + | SHIFT4 -> 4 + +let print_operation reg pp op = match op with + | Omove, [r1] -> reg pp r1 + | Ointconst n, [] -> fprintf pp "%ld" (camlint_of_coqint n) + | Olongconst n, [] -> fprintf pp "%LdL" (camlint64_of_coqint n) + | Ofloatconst n, [] -> fprintf pp "%F" (camlfloat_of_coqfloat n) + | Osingleconst n, [] -> fprintf pp "%Ff" (camlfloat_of_coqfloat32 n) + | Oaddrsymbol(id, ofs), [] -> + fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs) + | Oaddrstack ofs, [] -> + fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs) + | Ocast8signed, [r1] -> fprintf pp "int8signed(%a)" reg r1 + | Ocast16signed, [r1] -> fprintf pp "int16signed(%a)" reg r1 + | Oadd, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2 + | Oaddimm n, [r1] -> fprintf pp "%a + %ld" reg r1 (camlint_of_coqint n) + | Oaddx(s14), [r1; r2] -> fprintf pp "(%a << %d) + %a" reg r1 (int_of_s14 s14) reg r2 + | Oaddximm(s14, imm), [r1] -> fprintf pp "(%a << %d) + %ld" reg r1 (int_of_s14 s14) (camlint_of_coqint imm) + | Oneg, [r1] -> fprintf pp "-(%a)" reg r1 + | Osub, [r1;r2] -> fprintf pp "%a - %a" reg r1 reg r2 + | Orevsubimm(imm), [r1] -> fprintf pp "%ld - %a" (camlint_of_coqint imm) reg r1 + | Orevsubx(s14), [r1; r2] -> fprintf pp "%a - (%a << %d)" reg r2 reg r1 (int_of_s14 s14) + | Orevsubximm(s14, imm), [r1] -> fprintf pp "%ld - (%a << %d)" (camlint_of_coqint imm) reg r1 (int_of_s14 s14) + | Omul, [r1;r2] -> fprintf pp "%a * %a" reg r1 reg r2 + | Omulimm(imm), [r1] -> fprintf pp "%a * %ld" reg r1 (camlint_of_coqint imm) + | Omulhs, [r1;r2] -> fprintf pp "%a *hs %a" reg r1 reg r2 + | Omulhu, [r1;r2] -> fprintf pp "%a *hu %a" reg r1 reg r2 + | Odiv, [r1;r2] -> fprintf pp "%a /s %a" reg r1 reg r2 + | Odivu, [r1;r2] -> fprintf pp "%a /u %a" reg r1 reg r2 + | Omod, [r1;r2] -> fprintf pp "%a %%s %a" reg r1 reg r2 + | Omodu, [r1;r2] -> fprintf pp "%a %%u %a" reg r1 reg r2 + | Oand, [r1;r2] -> fprintf pp "%a & %a" reg r1 reg r2 + | Oandimm n, [r1] -> fprintf pp "%a & %ld" reg r1 (camlint_of_coqint n) + | Oor, [r1;r2] -> fprintf pp "%a | %a" reg r1 reg r2 + | Oorimm n, [r1] -> fprintf pp "%a | %ld" reg r1 (camlint_of_coqint n) + | Oxor, [r1;r2] -> fprintf pp "%a ^ %a" reg r1 reg r2 + | Oxorimm n, [r1] -> fprintf pp "%a ^ %ld" reg r1 (camlint_of_coqint n) + | Onxor, [r1;r2] -> fprintf pp "~(%a ^ %a)" reg r1 reg r2 + | Onxorimm n, [r1] -> fprintf pp "~(%a ^ %ld)" reg r1 (camlint_of_coqint n) + | Onot, [r1] -> fprintf pp "~%a" reg r1 + | Oandn, [r1; r2] -> fprintf pp "(~%a) & %a" reg r1 reg r2 + | Oandnimm n, [r1] -> fprintf pp "(~%a) & %ld" reg r1 (camlint_of_coqint n) + | Oorn, [r1;r2] -> fprintf pp "(~%a) | %a" reg r1 reg r2 + | Oornimm n, [r1] -> fprintf pp "(~%a) | %ld" reg r1 (camlint_of_coqint n) + | Oshl, [r1;r2] -> fprintf pp "%a << %a" reg r1 reg r2 + | Oshlimm n, [r1] -> fprintf pp "%a << %ld" reg r1 (camlint_of_coqint n) + | Oshr, [r1;r2] -> fprintf pp "%a >>s %a" reg r1 reg r2 + | Oshrimm n, [r1] -> fprintf pp "%a >>s %ld" reg r1 (camlint_of_coqint n) + | Oshru, [r1;r2] -> fprintf pp "%a >>u %a" reg r1 reg r2 + | Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n) + | Oshrximm n, [r1] -> fprintf pp "%a >>x %ld" reg r1 (camlint_of_coqint n) + | Ororimm n, [r1] -> fprintf pp "(%a ror %ld)" reg r1 (camlint_of_coqint n) + | Omadd, [r1; r2; r3] -> fprintf pp "%a + %a * %a" reg r1 reg r2 reg r3 + | Omaddimm imm, [r1; r2] -> fprintf pp "%a + %a * %ld" reg r1 reg r2 (camlint_of_coqint imm) + | Omsub, [r1; r2; r3] -> fprintf pp "%a - %a * %a" reg r1 reg r2 reg r3 + + | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2 + | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1 + | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1 + | Ocast32signed, [r1] -> fprintf pp "long32signed(%a)" reg r1 + | Ocast32unsigned, [r1] -> fprintf pp "long32unsigned(%a)" reg r1 + | Oaddl, [r1;r2] -> fprintf pp "%a +l %a" reg r1 reg r2 + | Oaddlimm n, [r1] -> fprintf pp "%a +l %Ld" reg r1 (camlint64_of_coqint n) + | Oaddxl(s14), [r1; r2] -> fprintf pp "(%a <<l %d) +l %a" reg r1 (int_of_s14 s14) reg r2 + | Oaddxlimm(s14, imm), [r1] -> fprintf pp "(%a <<l %d) +l %Ld" reg r1 (int_of_s14 s14) (camlint64_of_coqint imm) + | Orevsublimm(imm), [r1] -> fprintf pp "%Ld -l %a" (camlint64_of_coqint imm) reg r1 + | Orevsubxl(s14), [r1; r2] -> fprintf pp "%a -l (%a <<l %d)" reg r2 reg r1 (int_of_s14 s14) + | Orevsubxlimm(s14, imm), [r1] -> fprintf pp "%Ld -l (%a <<l %d)" (camlint64_of_coqint imm) reg r1 (int_of_s14 s14) + | Onegl, [r1] -> fprintf pp "-l (%a)" reg r1 + | Osubl, [r1;r2] -> fprintf pp "%a -l %a" reg r1 reg r2 + | Omull, [r1;r2] -> fprintf pp "%a *l %a" reg r1 reg r2 + | Omullimm(imm), [r1] -> fprintf pp "%a *l %Ld" reg r1 (camlint64_of_coqint imm) + | Omullhs, [r1;r2] -> fprintf pp "%a *lhs %a" reg r1 reg r2 + | Omullhu, [r1;r2] -> fprintf pp "%a *lhu %a" reg r1 reg r2 + | Odivl, [r1;r2] -> fprintf pp "%a /ls %a" reg r1 reg r2 + | Odivlu, [r1;r2] -> fprintf pp "%a /lu %a" reg r1 reg r2 + | Omodl, [r1;r2] -> fprintf pp "%a %%ls %a" reg r1 reg r2 + | Omodlu, [r1;r2] -> fprintf pp "%a %%lu %a" reg r1 reg r2 + | Oandl, [r1;r2] -> fprintf pp "%a &l %a" reg r1 reg r2 + | Oandlimm n, [r1] -> fprintf pp "%a &l %Ld" reg r1 (camlint64_of_coqint n) + | Oorl, [r1;r2] -> fprintf pp "%a |l %a" reg r1 reg r2 + | Oorlimm n, [r1] -> fprintf pp "%a |l %Ld" reg r1 (camlint64_of_coqint n) + | Onorl, [r1; r2] -> fprintf pp "~(%a |l %a)" reg r1 reg r2 + | Onorlimm n, [r1] -> fprintf pp "~(%a |l %Ld)" reg r1 (camlint64_of_coqint n) + | Oxorl, [r1;r2] -> fprintf pp "%a ^l %a" reg r1 reg r2 + | Oxorlimm n, [r1] -> fprintf pp "%a ^l %Ld" reg r1 (camlint64_of_coqint n) + | Onxorl, [r1;r2] -> fprintf pp "~(%a ^l %a)" reg r1 reg r2 + | Onxorlimm n, [r1] -> fprintf pp "~(%a ^l %Ld)" reg r1 (camlint64_of_coqint n) + | Onotl, [r1] -> fprintf pp "~%a" reg r1 + | Oandnl, [r1;r2] -> fprintf pp "(~%a) &l %a" reg r1 reg r2 + | Oandnlimm n, [r1] -> fprintf pp "(~%a) &l %Ld" reg r1 (camlint64_of_coqint n) + | Oornl, [r1;r2] -> fprintf pp "(~%a) |l %a" reg r1 reg r2 + | Oornlimm n, [r1;r2] -> fprintf pp "(~%a) |l %Ld" reg r1 (camlint64_of_coqint n) + | Oshll, [r1;r2] -> fprintf pp "%a <<l %a" reg r1 reg r2 + | Oshllimm n, [r1] -> fprintf pp "%a <<l %Ld" reg r1 (camlint64_of_coqint n) + | Oshrl, [r1;r2] -> fprintf pp "%a >>ls %a" reg r1 reg r2 + | Oshrlimm n, [r1] -> fprintf pp "%a >>ls %ld" reg r1 (camlint_of_coqint n) + | Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2 + | Oshrluimm n, [r1] -> fprintf pp "%a >>lu %ld" reg r1 (camlint_of_coqint n) + | Oshrxlimm n, [r1] -> fprintf pp "%a >>lx %ld" reg r1 (camlint_of_coqint n) + | Omaddl, [r1; r2; r3] -> fprintf pp "%a +l %a *l %a" reg r1 reg r2 reg r3 + | Omaddlimm imm, [r1; r2] -> fprintf pp "%a +l %a *l %Ld" reg r1 reg r2 (camlint64_of_coqint imm) + | Omsubl, [r1; r2; r3] -> fprintf pp "%a -l %a *l %a" reg r1 reg r2 reg r3 + + | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1 + | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1 + | Oaddf, [r1;r2] -> fprintf pp "%a +f %a" reg r1 reg r2 + | Osubf, [r1;r2] -> fprintf pp "%a -f %a" reg r1 reg r2 + | Omulf, [r1;r2] -> fprintf pp "%a *f %a" reg r1 reg r2 + | Odivf, [r1;r2] -> fprintf pp "%a /f %a" reg r1 reg r2 + | Onegfs, [r1] -> fprintf pp "negfs(%a)" reg r1 + | Oabsfs, [r1] -> fprintf pp "absfs(%a)" reg r1 + | Oaddfs, [r1;r2] -> fprintf pp "%a +fs %a" reg r1 reg r2 + | Osubfs, [r1;r2] -> fprintf pp "%a -fs %a" reg r1 reg r2 + | Omulfs, [r1;r2] -> fprintf pp "%a *fs %a" reg r1 reg r2 + | Odivfs, [r1;r2] -> fprintf pp "%a /fs %a" reg r1 reg r2 + | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1 + | Ofloatofsingle, [r1] -> fprintf pp "floatofsingle(%a)" reg r1 + | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1 + | Ointuoffloat, [r1] -> fprintf pp "intuoffloat(%a)" reg r1 + | Ointofsingle, [r1] -> fprintf pp "intofsingle(%a)" reg r1 + | Ointuofsingle, [r1] -> fprintf pp "intuofsingle(%a)" reg r1 + | Osingleofint, [r1] -> fprintf pp "singleofint(%a)" reg r1 + | Osingleofintu, [r1] -> fprintf pp "singleofintu(%a)" reg r1 + | Olongoffloat, [r1] -> fprintf pp "longoffloat(%a)" reg r1 + | Olonguoffloat, [r1] -> fprintf pp "longuoffloat(%a)" reg r1 + | Ofloatoflong, [r1] -> fprintf pp "floatoflong(%a)" reg r1 + | Ofloatoflongu, [r1] -> fprintf pp "floatoflongu(%a)" reg r1 + | Olongofsingle, [r1] -> fprintf pp "longofsingle(%a)" reg r1 + | Olonguofsingle, [r1] -> fprintf pp "longuofsingle(%a)" reg r1 + | Osingleoflong, [r1] -> fprintf pp "singleoflong(%a)" reg r1 + | Osingleoflongu, [r1] -> fprintf pp "singleoflongu(%a)" reg r1 + | Ocmp c, args -> print_condition reg pp (c, args) + + | Oextfz(stop, start), [r1] -> fprintf pp "extfz(%ld, %ld, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1 + | Oextfs(stop, start), [r1] -> fprintf pp "extfs(%ld, %ld, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1 + | Oextfzl(stop, start), [r1] -> fprintf pp "extfzl(%ld, %ld, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1 + | Oextfsl(stop, start), [r1] -> fprintf pp "extfsl(%ld, %ld, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1 + | Oinsf(stop, start), [r1; r2] -> fprintf pp "insf(%ld, %ld, %a, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1 reg r2 + | Oinsfl(stop, start), [r1; r2] -> fprintf pp "insfl(%ld, %ld, %a, %a)" (camlint_of_coqint stop) (camlint_of_coqint start) reg r1 reg r2 + | Osel(cond0, ty), [r1; r2; rc] -> + print_condition0 reg pp cond0 rc; + fprintf pp " ? %a : %a" reg r1 reg r2 + | Oselimm(cond0, imm), [r1; rc] -> + print_condition0 reg pp cond0 rc; + fprintf pp " ? %a : %ld" reg r1 (camlint_of_coqint imm) + | Osellimm(cond0, imm), [r1; rc] -> + print_condition0 reg pp cond0 rc; + fprintf pp " ? %a :l %Ld" reg r1 (camlint64_of_coqint imm) + | _, _ -> fprintf pp "<bad operator>" + +let print_addressing reg pp = function + | Aindexed2XS scale, [r1;r2] -> fprintf pp "%a + (%a << %ld)" reg r1 reg r2 (camlint_of_coqint scale) + | Aindexed2, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2 + | Aindexed n, [r1] -> fprintf pp "%a + %Ld" reg r1 (camlint64_of_ptrofs n) + | Aglobal(id, ofs), [] -> + fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs) + | Ainstack ofs, [] -> fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs) + | _ -> fprintf pp "<bad addressing>" diff --git a/kvx/SelectLong.vp b/kvx/SelectLong.vp new file mode 100644 index 00000000..b3638eca --- /dev/null +++ b/kvx/SelectLong.vp @@ -0,0 +1,463 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Instruction selection for 64-bit integer operations *) + +Require Import Coqlib. +Require Import Compopts. +Require Import AST Integers Floats. +Require Import Op CminorSel. +Require Import OpHelpers. +Require Import SelectOp SplitLong. +Require Import ExtValues. +Require Import DecBoolOps. + +Local Open Scope cminorsel_scope. +Local Open Scope string_scope. + +Section SELECT. + +Context {hf: helper_functions}. + +Definition longconst (n: int64) : expr := + if Archi.splitlong then SplitLong.longconst n else Eop (Olongconst n) Enil. + +Definition is_longconst (e: expr) := + if Archi.splitlong then SplitLong.is_longconst e else + match e with + | Eop (Olongconst n) Enil => Some n + | _ => None + end. + +Definition intoflong (e: expr) := + if Archi.splitlong then SplitLong.intoflong e else + match is_longconst e with + | Some n => Eop (Ointconst (Int.repr (Int64.unsigned n))) Enil + | None => Eop Olowlong (e ::: Enil) + end. + +Definition longofint (e: expr) := + if Archi.splitlong then SplitLong.longofint e else + match is_intconst e with + | Some n => longconst (Int64.repr (Int.signed n)) + | None => Eop Ocast32signed (e ::: Enil) + end. + +Definition longofintu (e: expr) := + if Archi.splitlong then SplitLong.longofintu e else + match is_intconst e with + | Some n => longconst (Int64.repr (Int.unsigned n)) + | None => Eop Ocast32unsigned (e ::: Enil) + end. + +(** ** Integer addition and pointer addition *) + +Definition addlimm_shllimm sh k2 e1 := + if Compopts.optim_addx tt + then + match shift1_4_of_z (Int.unsigned sh) with + | Some s14 => Eop (Oaddxlimm s14 k2) (e1:::Enil) + | None => Eop (Oaddlimm k2) ((Eop (Oshllimm sh) (e1:::Enil)):::Enil) + end + else Eop (Oaddlimm k2) ((Eop (Oshllimm sh) (e1:::Enil)):::Enil). + +Nondetfunction addlimm (n: int64) (e: expr) := + if Int64.eq n Int64.zero then e else + match e with + | Eop (Olongconst m) Enil => longconst (Int64.add n m) + | Eop (Oaddrsymbol s m) Enil => + (if Compopts.optim_globaladdroffset tt + then Eop (Oaddrsymbol s (Ptrofs.add (Ptrofs.of_int64 n) m)) Enil + else Eop (Oaddlimm n) (e ::: Enil)) + | Eop (Oaddrstack m) Enil => Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int64 n) m)) Enil + | Eop (Oaddlimm m) (t ::: Enil) => Eop (Oaddlimm(Int64.add n m)) (t ::: Enil) + | Eop (Oaddxlimm sh m) (t ::: Enil) => Eop (Oaddxlimm sh (Int64.add n m)) (t ::: Enil) + | Eop (Oshllimm sh) (t1:::Enil) => addlimm_shllimm sh n t1 + | _ => Eop (Oaddlimm n) (e ::: Enil) + end. + +Definition addl_shllimm n e1 e2 := + if Compopts.optim_addx tt + then + match shift1_4_of_z (Int.unsigned n) with + | Some s14 => Eop (Oaddxl s14) (e1:::e2:::Enil) + | None => Eop Oaddl (e2:::(Eop (Oshllimm n) (e1:::Enil)):::Enil) + end + else Eop Oaddl (e2:::(Eop (Oshllimm n) (e1:::Enil)):::Enil). + +Nondetfunction addl (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.addl e1 e2 else + match e1, e2 with + | Eop (Olongconst n1) Enil, t2 => addlimm n1 t2 + | t1, Eop (Olongconst n2) Enil => addlimm n2 t1 + | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddlimm n2) (t2:::Enil) => + addlimm (Int64.add n1 n2) (Eop Oaddl (t1:::t2:::Enil)) + | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddrstack n2) Enil => + Eop Oaddl (Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int64 n1) n2)) Enil ::: t1 ::: Enil) + | Eop (Oaddrstack n1) Enil, Eop (Oaddlimm n2) (t2:::Enil) => + Eop Oaddl (Eop (Oaddrstack (Ptrofs.add n1 (Ptrofs.of_int64 n2))) Enil ::: t2 ::: Enil) + | Eop (Oaddlimm n1) (t1:::Enil), t2 => + addlimm n1 (Eop Oaddl (t1:::t2:::Enil)) + | t1, Eop (Oaddlimm n2) (t2:::Enil) => + addlimm n2 (Eop Oaddl (t1:::t2:::Enil)) + | t1, (Eop Omull (t2:::t3:::Enil)) => + Eop Omaddl (t1:::t2:::t3:::Enil) + | (Eop Omull (t2:::t3:::Enil)), t1 => + Eop Omaddl (t1:::t2:::t3:::Enil) + | t1, (Eop (Omullimm n) (t2:::Enil)) => + Eop (Omaddlimm n) (t1:::t2:::Enil) + | (Eop (Omullimm n) (t2:::Enil)), t1 => + Eop (Omaddlimm n) (t1:::t2:::Enil) + | (Eop (Oshllimm n) (t1:::Enil)), t2 => + addl_shllimm n t1 t2 + | t2, (Eop (Oshllimm n) (t1:::Enil)) => + addl_shllimm n t1 t2 + | _, _ => Eop Oaddl (e1:::e2:::Enil) + end. + +(** ** Integer and pointer subtraction *) + +Nondetfunction subl (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.subl e1 e2 else + match e1, e2 with + | t1, Eop (Olongconst n2) Enil => + addlimm (Int64.neg n2) t1 + | Eop (Oaddlimm n1) (t1:::Enil), Eop (Oaddlimm n2) (t2:::Enil) => + addlimm (Int64.sub n1 n2) (Eop Osubl (t1:::t2:::Enil)) + | Eop (Oaddlimm n1) (t1:::Enil), t2 => + addlimm n1 (Eop Osubl (t1:::t2:::Enil)) + | t1, Eop (Oaddlimm n2) (t2:::Enil) => + addlimm (Int64.neg n2) (Eop Osubl (t1:::t2:::Enil)) + | t1, (Eop Omull (t2:::t3:::Enil)) => + Eop Omsubl (t1:::t2:::t3:::Enil) + | t1, (Eop (Omullimm n) (t2:::Enil)) => + Eop (Omaddlimm (Int64.neg n)) (t1:::t2:::Enil) + | _, _ => Eop Osubl (e1:::e2:::Enil) + end. + +Definition negl (e: expr) := + if Archi.splitlong then SplitLong.negl e else + match is_longconst e with + | Some n => longconst (Int64.neg n) + | None => Eop Onegl (e ::: Enil) + end. + +(** ** Immediate shifts *) + +Nondetfunction shllimm (e1: expr) (n: int) := + if Archi.splitlong then SplitLong.shllimm e1 n else + if Int.eq n Int.zero then + e1 + else if negb (Int.ltu n Int64.iwordsize') then + Eop Oshll (e1 ::: Eop (Ointconst n) Enil ::: Enil) + else match e1 with + | Eop (Olongconst n1) Enil => + longconst (Int64.shl' n1 n) + | Eop (Oshllimm n1) (t1:::Enil) => + if Int.ltu (Int.add n n1) Int64.iwordsize' + then Eop (Oshllimm (Int.add n n1)) (t1:::Enil) + else Eop (Oshllimm n) (e1:::Enil) + | _ => + Eop (Oshllimm n) (e1:::Enil) + end. + +Nondetfunction shrluimm (e1: expr) (n: int) := + if Archi.splitlong then SplitLong.shrluimm e1 n else + if Int.eq n Int.zero then e1 else + if negb (Int.ltu n Int64.iwordsize') then + Eop Oshrlu (e1:::Eop (Ointconst n) Enil:::Enil) + else + match e1 with + | Eop (Olongconst n1) Enil => + longconst (Int64.shru' n1 n) + | Eop (Oshrluimm n1) (t1:::Enil) => + if Int.ltu (Int.add n n1) Int64.iwordsize' + then Eop (Oshrluimm (Int.add n n1)) (t1:::Enil) + else Eop (Oshrluimm n) (e1:::Enil) + | Eop (Oshllimm n1) (t1:::Enil) => + let stop := Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one) in + let start := Z.sub (Z.add (Z.add (Int.unsigned n) stop) Z.one) Int64.zwordsize in + if is_bitfieldl stop start + then Eop (Oextfzl stop start) (t1:::Enil) + else Eop (Oshrluimm n) (e1:::Enil) + | _ => + Eop (Oshrluimm n) (e1:::Enil) + end. + +Nondetfunction shrlimm (e1: expr) (n: int) := + if Archi.splitlong then SplitLong.shrlimm e1 n else + if Int.eq n Int.zero then e1 else + if negb (Int.ltu n Int64.iwordsize') then + Eop Oshrl (e1:::Eop (Ointconst n) Enil:::Enil) + else + match e1 with + | Eop (Olongconst n1) Enil => + longconst (Int64.shr' n1 n) + | Eop (Oshrlimm n1) (t1:::Enil) => + if Int.ltu (Int.add n n1) Int64.iwordsize' + then Eop (Oshrlimm (Int.add n n1)) (t1:::Enil) + else Eop (Oshrlimm n) (e1:::Enil) + | Eop (Oshllimm n1) (t1:::Enil) => + let stop := Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one) in + let start := Z.sub (Z.add (Z.add (Int.unsigned n) stop) Z.one) Int64.zwordsize in + if is_bitfieldl stop start + then Eop (Oextfsl stop start) (t1:::Enil) + else Eop (Oshrlimm n) (e1:::Enil) + | _ => + Eop (Oshrlimm n) (e1:::Enil) + end. + +(** ** General shifts *) + +Definition shll (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.shll e1 e2 else + match is_intconst e2 with + | Some n2 => shllimm e1 n2 + | None => Eop Oshll (e1:::e2:::Enil) + end. + +Definition shrl (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.shrl e1 e2 else + match is_intconst e2 with + | Some n2 => shrlimm e1 n2 + | None => Eop Oshrl (e1:::e2:::Enil) + end. + +Definition shrlu (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.shrlu e1 e2 else + match is_intconst e2 with + | Some n2 => shrluimm e1 n2 + | _ => Eop Oshrlu (e1:::e2:::Enil) + end. + +(** ** Integer multiply *) + +Definition mullimm_base (n1: int64) (e2: expr) := + match Int64.one_bits' n1 with + | i :: nil => + shllimm e2 i + | i :: j :: nil => + Elet e2 (addl (shllimm (Eletvar 0) i) (shllimm (Eletvar 0) j)) + | _ => + Eop (Omullimm n1) (e2 ::: Enil) + end. + +Nondetfunction mullimm (n1: int64) (e2: expr) := + if Archi.splitlong then SplitLong.mullimm n1 e2 + else if Int64.eq n1 Int64.zero then longconst Int64.zero + else if Int64.eq n1 Int64.one then e2 + else match e2 with + | Eop (Olongconst n2) Enil => longconst (Int64.mul n1 n2) + | Eop (Oaddlimm n2) (t2:::Enil) => addlimm (Int64.mul n1 n2) (mullimm_base n1 t2) + | _ => mullimm_base n1 e2 + end. + +Nondetfunction mull (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.mull e1 e2 else + match e1, e2 with + | Eop (Olongconst n1) Enil, t2 => mullimm n1 t2 + | t1, Eop (Olongconst n2) Enil => mullimm n2 t1 + | _, _ => Eop Omull (e1:::e2:::Enil) + end. + +Definition mullhu (e1: expr) (n2: int64) := + if Archi.splitlong then SplitLong.mullhu e1 n2 else + Eop Omullhu (e1 ::: longconst n2 ::: Enil). + +Definition mullhs (e1: expr) (n2: int64) := + if Archi.splitlong then SplitLong.mullhs e1 n2 else + Eop Omullhs (e1 ::: longconst n2 ::: Enil). + +(** ** Bitwise and, or, xor *) + +Nondetfunction andlimm (n1: int64) (e2: expr) := + if Int64.eq n1 Int64.zero then longconst Int64.zero else + if Int64.eq n1 Int64.mone then e2 else + match e2 with + | Eop (Olongconst n2) Enil => + longconst (Int64.and n1 n2) + | Eop (Oandlimm n2) (t2:::Enil) => + Eop (Oandlimm (Int64.and n1 n2)) (t2:::Enil) + | Eop Onotl (t2:::Enil) => Eop (Oandnlimm n1) (t2:::Enil) + | _ => + Eop (Oandlimm n1) (e2:::Enil) + end. + +Nondetfunction andl (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.andl e1 e2 else + match e1, e2 with + | Eop (Olongconst n1) Enil, t2 => andlimm n1 t2 + | t1, Eop (Olongconst n2) Enil => andlimm n2 t1 + | (Eop Onotl (t1:::Enil)), t2 => Eop Oandnl (t1:::t2:::Enil) + | t1, (Eop Onotl (t2:::Enil)) => Eop Oandnl (t2:::t1:::Enil) + | _, _ => Eop Oandl (e1:::e2:::Enil) + end. + +Nondetfunction orlimm (n1: int64) (e2: expr) := + if Int64.eq n1 Int64.zero then e2 else + if Int64.eq n1 Int64.mone then longconst Int64.mone else + match e2 with + | Eop (Olongconst n2) Enil => longconst (Int64.or n1 n2) + | Eop (Oorlimm n2) (t2:::Enil) => Eop (Oorlimm (Int64.or n1 n2)) (t2:::Enil) + | Eop Onotl (t2:::Enil) => Eop (Oornlimm n1) (t2:::Enil) + | _ => Eop (Oorlimm n1) (e2:::Enil) + end. + +Nondetfunction orl (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.orl e1 e2 else + match e1, e2 with + | Eop (Olongconst n1) Enil, t2 => orlimm n1 t2 + | t1, Eop (Olongconst n2) Enil => orlimm n2 t1 + | (Eop Onotl (t1:::Enil)), t2 => Eop Oornl (t1:::t2:::Enil) + | t1, (Eop Onotl (t2:::Enil)) => Eop Oornl (t2:::t1:::Enil) + | (Eop (Oandlimm nmask) (prev:::Enil)), + (Eop (Oandlimm mask) + ((Eop (Oshllimm start) (fld:::Enil)):::Enil)) => + let zstart := Int.unsigned start in + let zstop := int64_highest_bit mask in + if is_bitfieldl zstop zstart + then + let mask' := Int64.repr (zbitfield_mask zstop zstart) in + if and_dec (Int64.eq_dec mask mask') + (Int64.eq_dec nmask (Int64.not mask')) + then Eop (Oinsfl zstop zstart) (prev:::fld:::Enil) + else Eop Oorl (e1:::e2:::Enil) + else Eop Oorl (e1:::e2:::Enil) + | (Eop (Oandlimm nmask) (prev:::Enil)), + (Eop (Oandlimm mask) (fld:::Enil)) => + let zstart := 0 in + let zstop := int64_highest_bit mask in + if is_bitfieldl zstop zstart + then + let mask' := Int64.repr (zbitfield_mask zstop zstart) in + if and_dec (Int64.eq_dec mask mask') + (Int64.eq_dec nmask (Int64.not mask')) + then Eop (Oinsfl zstop zstart) (prev:::fld:::Enil) + else Eop Oorl (e1:::e2:::Enil) + else Eop Oorl (e1:::e2:::Enil) + | _, _ => Eop Oorl (e1:::e2:::Enil) + end. + +Nondetfunction xorlimm (n1: int64) (e2: expr) := + if Int64.eq n1 Int64.zero then e2 else + if Int64.eq n1 Int64.mone + then Eop Onotl (e2:::Enil) + else + match e2 with + | Eop (Olongconst n2) Enil => longconst (Int64.xor n1 n2) + | Eop (Oxorlimm n2) (t2:::Enil) => + let n := Int64.xor n1 n2 in + if Int64.eq n Int64.zero then t2 else Eop (Oxorlimm n) (t2:::Enil) + | _ => Eop (Oxorlimm n1) (e2:::Enil) + end. + +Nondetfunction xorl (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.xorl e1 e2 else + match e1, e2 with + | Eop (Olongconst n1) Enil, t2 => xorlimm n1 t2 + | t1, Eop (Olongconst n2) Enil => xorlimm n2 t1 + | _, _ => Eop Oxorl (e1:::e2:::Enil) + end. + +(** ** Integer logical negation *) + +Nondetfunction notl (e: expr) := + match e with + | Eop Oandl (e1:::e2:::Enil) => Eop Onandl (e1:::e2:::Enil) + | Eop (Oandlimm n) (e1:::Enil) => Eop (Onandlimm n) (e1:::Enil) + | Eop Oorl (e1:::e2:::Enil) => Eop Onorl (e1:::e2:::Enil) + | Eop (Oorlimm n) (e1:::Enil) => Eop (Onorlimm n) (e1:::Enil) + | Eop Oxorl (e1:::e2:::Enil) => Eop Onxorl (e1:::e2:::Enil) + | Eop (Oxorlimm n) (e1:::Enil) => Eop (Onxorlimm n) (e1:::Enil) + | Eop Onandl (e1:::e2:::Enil) => Eop Oandl (e1:::e2:::Enil) + | Eop (Onandlimm n) (e1:::Enil) => Eop (Oandlimm n) (e1:::Enil) + | Eop Onorl (e1:::e2:::Enil) => Eop Oorl (e1:::e2:::Enil) + | Eop (Onorlimm n) (e1:::Enil) => Eop (Oorlimm n) (e1:::Enil) + | Eop Onxorl (e1:::e2:::Enil) => Eop Oxorl (e1:::e2:::Enil) + | Eop (Onxorlimm n) (e1:::Enil) => Eop (Oxorlimm n) (e1:::Enil) + | Eop Oandnl (e1:::e2:::Enil) => Eop Oornl (e2:::e1:::Enil) + | Eop (Oandnlimm n) (e1:::Enil) => Eop (Oorlimm (Int64.not n)) (e1:::Enil) + | Eop Oornl (e1:::e2:::Enil) => Eop Oandnl (e2:::e1:::Enil) + | Eop (Oornlimm n) (e1:::Enil) => Eop (Oandlimm (Int64.not n)) (e1:::Enil) + | Eop Onotl (e1:::Enil) => e1 + | Eop (Olongconst k) Enil => Eop (Olongconst (Int64.not k)) Enil + | _ => Eop Onotl (e:::Enil) + end. +(* old: if Archi.splitlong then SplitLong.notl e else xorlimm Int64.mone e. *) + +(** ** Integer division and modulus *) + +Definition divlu_base (e1: expr) (e2: expr) := SplitLong.divlu_base e1 e2. +Definition modlu_base (e1: expr) (e2: expr) := SplitLong.modlu_base e1 e2. +Definition divls_base (e1: expr) (e2: expr) := SplitLong.divls_base e1 e2. +Definition modls_base (e1: expr) (e2: expr) := SplitLong.modls_base e1 e2. + +Definition shrxlimm (e: expr) (n: int) := + if Archi.splitlong then SplitLong.shrxlimm e n else + if Int.eq n Int.zero then e else Eop (Oshrxlimm n) (e ::: Enil). + +(** ** Comparisons *) + +Definition cmplu (c: comparison) (e1 e2: expr) := + if Archi.splitlong then SplitLong.cmplu c e1 e2 else + match is_longconst e1, is_longconst e2 with + | Some n1, Some n2 => + Eop (Ointconst (if Int64.cmpu c n1 n2 then Int.one else Int.zero)) Enil + | Some n1, None => Eop (Ocmp (Ccompluimm (swap_comparison c) n1)) (e2:::Enil) + | None, Some n2 => Eop (Ocmp (Ccompluimm c n2)) (e1:::Enil) + | None, None => Eop (Ocmp (Ccomplu c)) (e1:::e2:::Enil) + end. + +Definition cmpl (c: comparison) (e1 e2: expr) := + if Archi.splitlong then SplitLong.cmpl c e1 e2 else + match is_longconst e1, is_longconst e2 with + | Some n1, Some n2 => + Eop (Ointconst (if Int64.cmp c n1 n2 then Int.one else Int.zero)) Enil + | Some n1, None => Eop (Ocmp (Ccomplimm (swap_comparison c) n1)) (e2:::Enil) + | None, Some n2 => Eop (Ocmp (Ccomplimm c n2)) (e1:::Enil) + | None, None => Eop (Ocmp (Ccompl c)) (e1:::e2:::Enil) + end. + +(** ** Floating-point conversions *) + +Definition longoffloat (e: expr) := + if Archi.splitlong then SplitLong.longoffloat e else + Eop Olongoffloat (e:::Enil). + +Definition longuoffloat (e: expr) := + if Archi.splitlong then SplitLong.longuoffloat e else + Eop Olonguoffloat (e:::Enil). + +Definition floatoflong (e: expr) := + if Archi.splitlong then SplitLong.floatoflong e else + Eop Ofloatoflong (e:::Enil). + +Definition floatoflongu (e: expr) := + if Archi.splitlong then SplitLong.floatoflongu e else + Eop Ofloatoflongu (e:::Enil). + +Definition longofsingle (e: expr) := longoffloat (floatofsingle e). + +Definition longuofsingle (e: expr) := longuoffloat (floatofsingle e). + +Definition singleoflong (e: expr) := SplitLong.singleoflong e. + +Definition singleoflongu (e: expr) := SplitLong.singleoflongu e. + +End SELECT. + +(* Local Variables: *) +(* mode: coq *) +(* End: *) diff --git a/kvx/SelectLongproof.v b/kvx/SelectLongproof.v new file mode 100644 index 00000000..c3abdbc7 --- /dev/null +++ b/kvx/SelectLongproof.v @@ -0,0 +1,951 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Correctness of instruction selection for 64-bit integer operations *) + +Require Import String Coqlib Maps Integers Floats Errors. +Require Archi. +Require Import AST Values ExtValues Memory Globalenvs Events. +Require Import Cminor Op CminorSel. +Require Import OpHelpers OpHelpersproof. +Require Import SelectOp SelectOpproof SplitLong SplitLongproof. +Require Import SelectLong. +Require Import DecBoolOps. +Require Import Lia. + +Local Open Scope cminorsel_scope. +Local Open Scope string_scope. + +(** * Correctness of the instruction selection functions for 64-bit operators *) + +Section CMCONSTR. + +Variable prog: program. +Variable hf: helper_functions. +Hypothesis HELPERS: helper_functions_declared prog hf. +Let ge := Genv.globalenv prog. +Variable sp: val. +Variable e: env. +Variable m: mem. + +Definition unary_constructor_sound (cstr: expr -> expr) (sem: val -> val) : Prop := + forall le a x, + eval_expr ge sp e m le a x -> + exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef (sem x) v. + +Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> val) : Prop := + forall le a x b y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v. + +Definition partial_unary_constructor_sound (cstr: expr -> expr) (sem: val -> option val) : Prop := + forall le a x y, + eval_expr ge sp e m le a x -> + sem x = Some y -> + exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef y v. + +Definition partial_binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> option val) : Prop := + forall le a x b y z, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + sem x y = Some z -> + exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef z v. + +Theorem eval_longconst: + forall le n, eval_expr ge sp e m le (longconst n) (Vlong n). +Proof. + unfold longconst; intros; destruct Archi.splitlong. + apply SplitLongproof.eval_longconst. + EvalOp. +Qed. + +Lemma is_longconst_sound: + forall v a n le, + is_longconst a = Some n -> eval_expr ge sp e m le a v -> v = Vlong n. +Proof with (try discriminate). + intros. unfold is_longconst in *. destruct Archi.splitlong. + eapply SplitLongproof.is_longconst_sound; eauto. + assert (a = Eop (Olongconst n) Enil). + { destruct a... destruct o... destruct e0... congruence. } + subst a. InvEval. auto. +Qed. + +Theorem eval_intoflong: unary_constructor_sound intoflong Val.loword. +Proof. + unfold intoflong; destruct Archi.splitlong. apply SplitLongproof.eval_intoflong. + red; intros. destruct (is_longconst a) as [n|] eqn:C. +- TrivialExists. simpl. erewrite (is_longconst_sound x) by eauto. auto. +- TrivialExists. +Qed. + +Theorem eval_longofintu: unary_constructor_sound longofintu Val.longofintu. +Proof. + unfold longofintu; destruct Archi.splitlong. apply SplitLongproof.eval_longofintu. + red; intros. destruct (is_intconst a) as [n|] eqn:C. +- econstructor; split. apply eval_longconst. + exploit is_intconst_sound; eauto. intros; subst x. auto. +- TrivialExists. +Qed. + +Theorem eval_longofint: unary_constructor_sound longofint Val.longofint. +Proof. + unfold longofint; destruct Archi.splitlong. apply SplitLongproof.eval_longofint. + red; intros. destruct (is_intconst a) as [n|] eqn:C. +- econstructor; split. apply eval_longconst. + exploit is_intconst_sound; eauto. intros; subst x. auto. +- TrivialExists. +Qed. + +Theorem eval_negl: unary_constructor_sound negl Val.negl. +Proof. + unfold negl. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_negl; auto. + red; intros. destruct (is_longconst a) as [n|] eqn:C. +- exploit is_longconst_sound; eauto. intros EQ; subst x. + econstructor; split. apply eval_longconst. auto. +- TrivialExists. +Qed. + + +Theorem eval_addlimm_shllimm: + forall sh k2, unary_constructor_sound (addlimm_shllimm sh k2) (fun x => ExtValues.addxl sh x (Vlong k2)). +Proof. + red; unfold addlimm_shllimm; intros. + destruct (Compopts.optim_addx tt). + { + destruct (shift1_4_of_z (Int.unsigned sh)) as [s14 |] eqn:SHIFT. + - TrivialExists. simpl. + f_equal. + unfold shift1_4_of_z, int_of_shift1_4, z_of_shift1_4 in *. + destruct (Z.eq_dec _ _) as [e1|]. + { replace s14 with SHIFT1 by congruence. + destruct x; simpl; trivial. + replace (Int.ltu _ _) with true by reflexivity. + unfold Int.ltu. + rewrite e1. + replace (if zlt _ _ then true else false) with true by reflexivity. + rewrite <- e1. + rewrite Int.repr_unsigned. + reflexivity. + } + destruct (Z.eq_dec _ _) as [e2|]. + { replace s14 with SHIFT2 by congruence. + destruct x; simpl; trivial. + replace (Int.ltu _ _) with true by reflexivity. + unfold Int.ltu. + rewrite e2. + replace (if zlt _ _ then true else false) with true by reflexivity. + rewrite <- e2. + rewrite Int.repr_unsigned. + reflexivity. + } + destruct (Z.eq_dec _ _) as [e3|]. + { replace s14 with SHIFT3 by congruence. + destruct x; simpl; trivial. + replace (Int.ltu _ _) with true by reflexivity. + unfold Int.ltu. + rewrite e3. + replace (if zlt _ _ then true else false) with true by reflexivity. + rewrite <- e3. + rewrite Int.repr_unsigned. + reflexivity. + } + destruct (Z.eq_dec _ _) as [e4|]. + { replace s14 with SHIFT4 by congruence. + destruct x; simpl; trivial. + replace (Int.ltu _ _) with true by reflexivity. + unfold Int.ltu. + rewrite e4. + replace (if zlt _ _ then true else false) with true by reflexivity. + rewrite <- e4. + rewrite Int.repr_unsigned. + reflexivity. + } + discriminate. + - unfold addxl. rewrite Val.addl_commut. + TrivialExists. + repeat (try eassumption; try econstructor). + simpl. + reflexivity. + } + { unfold addxl. rewrite Val.addl_commut. + TrivialExists. + repeat (try eassumption; try econstructor). + simpl. + reflexivity. + } +Qed. + +Theorem eval_addlimm: forall n, unary_constructor_sound (addlimm n) (fun v => Val.addl v (Vlong n)). +Proof. + unfold addlimm; intros; red; intros. + predSpec Int64.eq Int64.eq_spec n Int64.zero. + subst. exists x; split; auto. + destruct x; simpl; rewrite ?Int64.add_zero, ?Ptrofs.add_zero; auto. + destruct (addlimm_match a); InvEval. +- econstructor; split. apply eval_longconst. rewrite Int64.add_commut; auto. +- destruct (Compopts.optim_globaladdroffset _). + + econstructor; split. EvalOp. simpl; eauto. + unfold Genv.symbol_address. destruct (Genv.find_symbol ge s); simpl; auto. + destruct Archi.ptr64; auto. rewrite Ptrofs.add_commut; auto. + + TrivialExists. repeat econstructor. simpl. trivial. +- econstructor; split. EvalOp. simpl; eauto. + destruct sp; simpl; auto. destruct Archi.ptr64; auto. + rewrite Ptrofs.add_assoc, (Ptrofs.add_commut m0). auto. +- subst x. rewrite Val.addl_assoc. rewrite Int64.add_commut. TrivialExists. +- TrivialExists; simpl. subst x. + destruct v1; simpl; trivial. + destruct (Int.ltu _ _); simpl; trivial. + rewrite Int64.add_assoc. rewrite Int64.add_commut. + reflexivity. +- pose proof eval_addlimm_shllimm as ADDXL. + unfold unary_constructor_sound in ADDXL. + unfold addxl in ADDXL. + rewrite Val.addl_commut. + subst x. + apply ADDXL; assumption. +- TrivialExists. +Qed. + +Lemma eval_addxl: forall n, binary_constructor_sound (addl_shllimm n) (ExtValues.addxl n). +Proof. + red. + intros. + unfold addl_shllimm. + destruct (Compopts.optim_addx tt). + { + destruct (shift1_4_of_z (Int.unsigned n)) as [s14 |] eqn:SHIFT. + - TrivialExists. + simpl. + f_equal. f_equal. + unfold shift1_4_of_z, int_of_shift1_4, z_of_shift1_4 in *. + destruct (Z.eq_dec _ _) as [e1|]. + { replace s14 with SHIFT1 by congruence. + rewrite <- e1. + apply Int.repr_unsigned. } + destruct (Z.eq_dec _ _) as [e2|]. + { replace s14 with SHIFT2 by congruence. + rewrite <- e2. + apply Int.repr_unsigned. } + destruct (Z.eq_dec _ _) as [e3|]. + { replace s14 with SHIFT3 by congruence. + rewrite <- e3. + apply Int.repr_unsigned. } + destruct (Z.eq_dec _ _) as [e4|]. + { replace s14 with SHIFT4 by congruence. + rewrite <- e4. + apply Int.repr_unsigned. } + discriminate. + (* Oaddxl *) + - TrivialExists; + repeat econstructor; eassumption. + } + { TrivialExists; + repeat econstructor; eassumption. + } +Qed. + +Theorem eval_addl: binary_constructor_sound addl Val.addl. +Proof. + unfold addl. destruct Archi.splitlong eqn:SL. + apply SplitLongproof.eval_addl. apply Archi.splitlong_ptr32; auto. +(* + assert (SF: Archi.ptr64 = true). + { Local Transparent Archi.splitlong. unfold Archi.splitlong in SL. + destruct Archi.ptr64; simpl in *; congruence. } +*) +(* + assert (B: forall id ofs n, + Genv.symbol_address ge id (Ptrofs.add ofs (Ptrofs.repr n)) = + Val.addl (Genv.symbol_address ge id ofs) (Vlong (Int64.repr n))). + { intros. replace (Ptrofs.repr n) with (Ptrofs.of_int64 (Int64.repr n)) by auto with ptrofs. + apply Genv.shift_symbol_address_64; auto. } + +*) + red; intros until y. + case (addl_match a b); intros; InvEval. + - rewrite Val.addl_commut. apply eval_addlimm; auto. + - apply eval_addlimm; auto. + - subst. + replace (Val.addl (Val.addl v1 (Vlong n1)) (Val.addl v0 (Vlong n2))) + with (Val.addl (Val.addl v1 v0) (Val.addl (Vlong n1) (Vlong n2))). + apply eval_addlimm. EvalOp. + repeat rewrite Val.addl_assoc. decEq. apply Val.addl_permut. + - subst. econstructor; split. + EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto. + rewrite Val.addl_commut. destruct sp; simpl; auto. + destruct v1; simpl; auto. + destruct Archi.ptr64 eqn:SF; auto. + apply Val.lessdef_same. f_equal. rewrite ! Ptrofs.add_assoc. f_equal. + rewrite (Ptrofs.add_commut (Ptrofs.of_int64 n1)), Ptrofs.add_assoc. f_equal. auto with ptrofs. + - subst. econstructor; split. + EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto. + destruct sp; simpl; auto. + destruct v1; simpl; auto. + destruct Archi.ptr64 eqn:SF; auto. + apply Val.lessdef_same. f_equal. rewrite ! Ptrofs.add_assoc. f_equal. f_equal. + rewrite Ptrofs.add_commut. auto with ptrofs. + - subst. + replace (Val.addl (Val.addl v1 (Vlong n1)) y) + with (Val.addl (Val.addl v1 y) (Vlong n1)). + apply eval_addlimm. EvalOp. + repeat rewrite Val.addl_assoc. decEq. apply Val.addl_commut. + - subst. + replace (Val.addl x (Val.addl v1 (Vlong n2))) + with (Val.addl (Val.addl x v1) (Vlong n2)). + apply eval_addlimm. EvalOp. + repeat rewrite Val.addl_assoc. reflexivity. + - subst. TrivialExists. + - subst. rewrite Val.addl_commut. TrivialExists. + - subst. TrivialExists. + - subst. rewrite Val.addl_commut. TrivialExists. + - subst. pose proof eval_addxl as ADDXL. + unfold binary_constructor_sound in ADDXL. + rewrite Val.addl_commut. + apply ADDXL; assumption. + (* Oaddxl *) + - subst. pose proof eval_addxl as ADDXL. + unfold binary_constructor_sound in ADDXL. + apply ADDXL; assumption. + - TrivialExists. +Qed. + +Theorem eval_subl: binary_constructor_sound subl Val.subl. +Proof. + unfold subl. destruct Archi.splitlong eqn:SL. + apply SplitLongproof.eval_subl. apply Archi.splitlong_ptr32; auto. + red; intros; destruct (subl_match a b); InvEval. +- rewrite Val.subl_addl_opp. apply eval_addlimm; auto. +- subst. rewrite Val.subl_addl_l. rewrite Val.subl_addl_r. + rewrite Val.addl_assoc. simpl. rewrite Int64.add_commut. rewrite <- Int64.sub_add_opp. + apply eval_addlimm; EvalOp. +- subst. rewrite Val.subl_addl_l. apply eval_addlimm; EvalOp. +- subst. rewrite Val.subl_addl_r. + apply eval_addlimm; EvalOp. +- TrivialExists. simpl. subst. reflexivity. +- TrivialExists. simpl. subst. + destruct v1; destruct x; simpl; trivial. + + f_equal. f_equal. + rewrite <- Int64.neg_mul_distr_r. + rewrite Int64.sub_add_opp. + reflexivity. + + destruct (Archi.ptr64) eqn:ARCHI64; simpl; trivial. + f_equal. f_equal. + rewrite <- Int64.neg_mul_distr_r. + rewrite Ptrofs.sub_add_opp. + unfold Ptrofs.add. + f_equal. f_equal. + rewrite (Ptrofs.agree64_neg ARCHI64 (Ptrofs.of_int64 (Int64.mul i n)) (Int64.mul i n)). + rewrite (Ptrofs.agree64_of_int ARCHI64 (Int64.neg (Int64.mul i n))). + reflexivity. + apply (Ptrofs.agree64_of_int ARCHI64). +- TrivialExists. +Qed. + +Theorem eval_shllimm: forall n, unary_constructor_sound (fun e => shllimm e n) (fun v => Val.shll v (Vint n)). +Proof. + intros; unfold shllimm. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shllimm; auto. + red; intros. + predSpec Int.eq Int.eq_spec n Int.zero. + exists x; split; auto. subst n; destruct x; simpl; auto. + destruct (Int.ltu Int.zero Int64.iwordsize'); auto. + change (Int64.shl' i Int.zero) with (Int64.shl i Int64.zero). rewrite Int64.shl_zero; auto. + destruct (Int.ltu n Int64.iwordsize') eqn:LT; simpl. + assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshllimm n) (a:::Enil)) v + /\ Val.lessdef (Val.shll x (Vint n)) v) by TrivialExists. + destruct (shllimm_match a); InvEval. +- econstructor; split. apply eval_longconst. simpl; rewrite LT; auto. +- destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto. + subst. econstructor; split. EvalOp. simpl; eauto. + destruct v1; simpl; auto. rewrite LT'. + destruct (Int.ltu n1 Int64.iwordsize') eqn:LT1; auto. + simpl; rewrite LT. rewrite Int.add_commut, Int64.shl'_shl'; auto. rewrite Int.add_commut; auto. +- apply DEFAULT. +- TrivialExists. constructor; eauto. constructor. EvalOp. simpl; eauto. constructor. auto. +Qed. + +Theorem eval_shrluimm: forall n, unary_constructor_sound (fun e => shrluimm e n) (fun v => Val.shrlu v (Vint n)). +Proof. + intros; unfold shrluimm. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrluimm; auto. + red; intros. + predSpec Int.eq Int.eq_spec n Int.zero. + exists x; split; auto. subst n; destruct x; simpl; auto. + destruct (Int.ltu Int.zero Int64.iwordsize'); auto. + change (Int64.shru' i Int.zero) with (Int64.shru i Int64.zero). rewrite Int64.shru_zero; auto. + destruct (Int.ltu n Int64.iwordsize') eqn:LT. + assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshrluimm n) (a:::Enil)) v + /\ Val.lessdef (Val.shrlu x (Vint n)) v) by TrivialExists. + destruct (shrluimm_match a); InvEval. +- econstructor; split. apply eval_longconst. simpl; rewrite LT; auto. +- destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto. + subst. econstructor; split. EvalOp. simpl; eauto. + destruct v1; simpl; auto. rewrite LT'. + destruct (Int.ltu n1 Int64.iwordsize') eqn:LT1; auto. + simpl; rewrite LT. rewrite Int.add_commut, Int64.shru'_shru'; auto. rewrite Int.add_commut; auto. +- subst x. + simpl negb. + cbn iota. + destruct (is_bitfieldl _ _) eqn:BOUNDS. + + exists (extfzl (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) + (Z.sub + (Z.add + (Z.add (Int.unsigned n) (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one))) + Z.one) Int64.zwordsize) v1). + split. + ++ EvalOp. + ++ unfold extfzl. + rewrite BOUNDS. + destruct v1; try (simpl; apply Val.lessdef_undef). + replace (Z.sub Int64.zwordsize + (Z.add (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)) with (Int.unsigned n1) by lia. + replace (Z.sub Int64.zwordsize + (Z.sub + (Z.add (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one) + (Z.sub + (Z.add + (Z.add (Int.unsigned n) (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one))) + Z.one) Int64.zwordsize))) with (Int.unsigned n) by lia. + simpl. + destruct (Int.ltu n1 Int64.iwordsize') eqn:Hltu_n1; simpl; trivial. + destruct (Int.ltu n Int64.iwordsize') eqn:Hltu_n; simpl; trivial. + rewrite Int.repr_unsigned. + rewrite Int.repr_unsigned. + constructor. + + TrivialExists. constructor. econstructor. constructor. eassumption. constructor. simpl. reflexivity. constructor. simpl. reflexivity. +- apply DEFAULT. +- TrivialExists. constructor; eauto. constructor. EvalOp. simpl; eauto. constructor. auto. +Qed. + +Theorem eval_shrlimm: forall n, unary_constructor_sound (fun e => shrlimm e n) (fun v => Val.shrl v (Vint n)). +Proof. + intros; unfold shrlimm. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrlimm; auto. + red; intros. + predSpec Int.eq Int.eq_spec n Int.zero. + exists x; split; auto. subst n; destruct x; simpl; auto. + destruct (Int.ltu Int.zero Int64.iwordsize'); auto. + change (Int64.shr' i Int.zero) with (Int64.shr i Int64.zero). rewrite Int64.shr_zero; auto. + destruct (Int.ltu n Int64.iwordsize') eqn:LT. + assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop (Oshrlimm n) (a:::Enil)) v + /\ Val.lessdef (Val.shrl x (Vint n)) v) by TrivialExists. + destruct (shrlimm_match a); InvEval. +- econstructor; split. apply eval_longconst. simpl; rewrite LT; auto. +- destruct (Int.ltu (Int.add n n1) Int64.iwordsize') eqn:LT'; auto. + subst. econstructor; split. EvalOp. simpl; eauto. + destruct v1; simpl; auto. rewrite LT'. + destruct (Int.ltu n1 Int64.iwordsize') eqn:LT1; auto. + simpl; rewrite LT. rewrite Int.add_commut, Int64.shr'_shr'; auto. rewrite Int.add_commut; auto. +- subst x. + simpl negb. + cbn iota. + destruct (is_bitfieldl _ _) eqn:BOUNDS. + + exists (extfsl (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) + (Z.sub + (Z.add + (Z.add (Int.unsigned n) (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one))) + Z.one) Int64.zwordsize) v1). + split. + ++ EvalOp. + ++ unfold extfsl. + rewrite BOUNDS. + destruct v1; try (simpl; apply Val.lessdef_undef). + replace (Z.sub Int64.zwordsize + (Z.add (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)) with (Int.unsigned n1) by lia. + replace (Z.sub Int64.zwordsize + (Z.sub + (Z.add (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one) + (Z.sub + (Z.add + (Z.add (Int.unsigned n) (Z.sub Int64.zwordsize (Z.add (Int.unsigned n1) Z.one))) + Z.one) Int64.zwordsize))) with (Int.unsigned n) by lia. + simpl. + destruct (Int.ltu n1 Int64.iwordsize') eqn:Hltu_n1; simpl; trivial. + destruct (Int.ltu n Int64.iwordsize') eqn:Hltu_n; simpl; trivial. + rewrite Int.repr_unsigned. + rewrite Int.repr_unsigned. + constructor. + + TrivialExists. constructor. econstructor. constructor. eassumption. constructor. simpl. reflexivity. constructor. simpl. reflexivity. +- apply DEFAULT. +- TrivialExists. constructor; eauto. constructor. EvalOp. simpl; eauto. constructor. auto. +Qed. + +Theorem eval_shll: binary_constructor_sound shll Val.shll. +Proof. + unfold shll. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shll; auto. + red; intros. destruct (is_intconst b) as [n2|] eqn:C. +- exploit is_intconst_sound; eauto. intros EQ; subst y. apply eval_shllimm; auto. +- TrivialExists. +Qed. + +Theorem eval_shrlu: binary_constructor_sound shrlu Val.shrlu. +Proof. + unfold shrlu. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrlu; auto. + red; intros. destruct (is_intconst b) as [n2|] eqn:C. +- exploit is_intconst_sound; eauto. intros EQ; subst y. apply eval_shrluimm; auto. +- TrivialExists. +Qed. + +Theorem eval_shrl: binary_constructor_sound shrl Val.shrl. +Proof. + unfold shrl. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_shrl; auto. + red; intros. destruct (is_intconst b) as [n2|] eqn:C. +- exploit is_intconst_sound; eauto. intros EQ; subst y. apply eval_shrlimm; auto. +- TrivialExists. +Qed. + +Theorem eval_mullimm_base: forall n, unary_constructor_sound (mullimm_base n) (fun v => Val.mull v (Vlong n)). +Proof. + intros; unfold mullimm_base. red; intros. + assert (DEFAULT: exists v, + eval_expr ge sp e m le (Eop Omull (a ::: longconst n ::: Enil)) v + /\ Val.lessdef (Val.mull x (Vlong n)) v). + { econstructor; split. EvalOp. constructor. eauto. constructor. apply eval_longconst. constructor. simpl; eauto. + auto. } + generalize (Int64.one_bits'_decomp n); intros D. + destruct (Int64.one_bits' n) as [ | i [ | j [ | ? ? ]]] eqn:B. +- TrivialExists. +- replace (Val.mull x (Vlong n)) with (Val.shll x (Vint i)). + apply eval_shllimm; auto. + simpl in D. rewrite D, Int64.add_zero. destruct x; simpl; auto. + rewrite (Int64.one_bits'_range n) by (rewrite B; auto with coqlib). + rewrite Int64.shl'_mul; auto. +- set (le' := x :: le). + assert (A0: eval_expr ge sp e m le' (Eletvar O) x) by (constructor; reflexivity). + exploit (eval_shllimm i). eexact A0. intros (v1 & A1 & B1). + exploit (eval_shllimm j). eexact A0. intros (v2 & A2 & B2). + exploit (eval_addl). eexact A1. eexact A2. intros (v3 & A3 & B3). + exists v3; split. econstructor; eauto. + rewrite D. simpl. rewrite Int64.add_zero. destruct x; auto. + simpl in *. + rewrite (Int64.one_bits'_range n) in B1 by (rewrite B; auto with coqlib). + rewrite (Int64.one_bits'_range n) in B2 by (rewrite B; auto with coqlib). + inv B1; inv B2. simpl in B3; inv B3. + rewrite Int64.mul_add_distr_r. rewrite <- ! Int64.shl'_mul. auto. +- TrivialExists. +Qed. + +Theorem eval_mullimm: forall n, unary_constructor_sound (mullimm n) (fun v => Val.mull v (Vlong n)). +Proof. + unfold mullimm. intros; red; intros. + destruct Archi.splitlong eqn:SL. + eapply SplitLongproof.eval_mullimm; eauto. + predSpec Int64.eq Int64.eq_spec n Int64.zero. + exists (Vlong Int64.zero); split. apply eval_longconst. + destruct x; simpl; auto. subst n; rewrite Int64.mul_zero; auto. + predSpec Int64.eq Int64.eq_spec n Int64.one. + exists x; split; auto. + destruct x; simpl; auto. subst n; rewrite Int64.mul_one; auto. + destruct (mullimm_match a); InvEval. +- econstructor; split. apply eval_longconst. rewrite Int64.mul_commut; auto. +- exploit (eval_mullimm_base n); eauto. intros (v2 & A2 & B2). + exploit (eval_addlimm (Int64.mul n n2)). eexact A2. intros (v3 & A3 & B3). + exists v3; split; auto. + subst x. destruct v1; simpl; auto. + simpl in B2; inv B2. simpl in B3; inv B3. rewrite Int64.mul_add_distr_l. + rewrite (Int64.mul_commut n). auto. +- apply eval_mullimm_base; auto. +Qed. + +Theorem eval_mull: binary_constructor_sound mull Val.mull. +Proof. + unfold mull. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mull; auto. + red; intros; destruct (mull_match a b); InvEval. +- rewrite Val.mull_commut. apply eval_mullimm; auto. +- apply eval_mullimm; auto. +- TrivialExists. +Qed. + +Theorem eval_mullhu: + forall n, unary_constructor_sound (fun a => mullhu a n) (fun v => Val.mullhu v (Vlong n)). +Proof. + unfold mullhu; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhu; auto. + red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto. +Qed. + +Theorem eval_mullhs: + forall n, unary_constructor_sound (fun a => mullhs a n) (fun v => Val.mullhs v (Vlong n)). +Proof. + unfold mullhs; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhs; auto. + red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto. +Qed. + +Theorem eval_andlimm: forall n, unary_constructor_sound (andlimm n) (fun v => Val.andl v (Vlong n)). +Proof. + unfold andlimm; intros; red; intros. + predSpec Int64.eq Int64.eq_spec n Int64.zero. + exists (Vlong Int64.zero); split. apply eval_longconst. + subst. destruct x; simpl; auto. rewrite Int64.and_zero; auto. + predSpec Int64.eq Int64.eq_spec n Int64.mone. + exists x; split. assumption. + subst. destruct x; simpl; auto. rewrite Int64.and_mone; auto. + destruct (andlimm_match a); InvEval; subst. +- econstructor; split. apply eval_longconst. simpl. rewrite Int64.and_commut; auto. +- TrivialExists. simpl. rewrite Val.andl_assoc. rewrite Int64.and_commut; auto. +- TrivialExists. +- TrivialExists. +Qed. + +Lemma int64_eq_commut: forall x y : int64, + (Int64.eq x y) = (Int64.eq y x). +Proof. + intros. + predSpec Int64.eq Int64.eq_spec x y; + predSpec Int64.eq Int64.eq_spec y x; + congruence. +Qed. + +Theorem eval_andl: binary_constructor_sound andl Val.andl. +Proof. + unfold andl; destruct Archi.splitlong. apply SplitLongproof.eval_andl. + red; intros. destruct (andl_match a b). +- InvEval. rewrite Val.andl_commut. apply eval_andlimm; auto. +- InvEval. apply eval_andlimm; auto. +- (*andn*) InvEval. TrivialExists. simpl. congruence. +- (*andn reverse*) InvEval. rewrite Val.andl_commut. TrivialExists; simpl. congruence. + (* +- (* selectl *) + InvEval. + predSpec Int64.eq Int64.eq_spec zero1 Int64.zero; simpl; TrivialExists. + + constructor. econstructor; constructor. + constructor; try constructor; try constructor; try eassumption. + + simpl in *. f_equal. inv H6. + unfold selectl. + simpl. + destruct v3; simpl; trivial. + rewrite int64_eq_commut. + destruct (Int64.eq i Int64.zero); simpl. + * replace (Int64.repr (Int.signed (Int.neg Int.zero))) with Int64.zero by Int64.bit_solve. + destruct y; simpl; trivial. + * replace (Int64.repr (Int.signed (Int.neg Int.one))) with Int64.mone by Int64.bit_solve. + destruct y; simpl; trivial. + rewrite Int64.and_commut. rewrite Int64.and_mone. reflexivity. + + constructor. econstructor. constructor. econstructor. constructor. econstructor. constructor. eassumption. constructor. simpl. f_equal. constructor. simpl. f_equal. constructor. simpl. f_equal. constructor. eassumption. constructor. + + simpl in *. congruence. *) +- TrivialExists. +Qed. + +Theorem eval_orlimm: forall n, unary_constructor_sound (orlimm n) (fun v => Val.orl v (Vlong n)). +Proof. + unfold orlimm; intros; red; intros. + predSpec Int64.eq Int64.eq_spec n Int64.zero. + exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.or_zero; auto. + predSpec Int64.eq Int64.eq_spec n Int64.mone. + econstructor; split. apply eval_longconst. subst. destruct x; simpl; auto. rewrite Int64.or_mone; auto. + destruct (orlimm_match a); InvEval; subst. +- econstructor; split. apply eval_longconst. simpl. rewrite Int64.or_commut; auto. +- TrivialExists. simpl. rewrite Val.orl_assoc. rewrite Int64.or_commut; auto. +- InvEval. TrivialExists. +- TrivialExists. +Qed. + + +Theorem eval_orl: binary_constructor_sound orl Val.orl. +Proof. + unfold orl; destruct Archi.splitlong. apply SplitLongproof.eval_orl. + red; intros. + destruct (orl_match a b). +- InvEval. rewrite Val.orl_commut. apply eval_orlimm; auto. +- InvEval. apply eval_orlimm; auto. +- (*orn*) InvEval. TrivialExists; simpl; congruence. +- (*orn reversed*) InvEval. rewrite Val.orl_commut. TrivialExists; simpl; congruence. + + - (*insfl first case*) + destruct (is_bitfieldl _ _) eqn:Risbitfield. + + destruct (and_dec _ _) as [[Rmask Rnmask] | ]. + * rewrite Rnmask in *. + inv H. inv H0. inv H4. inv H3. inv H9. inv H8. + simpl in H6, H7. + inv H6. inv H7. + inv H4. inv H3. inv H7. + simpl in H6. + inv H6. + set (zstop := (int64_highest_bit mask)) in *. + set (zstart := (Int.unsigned start)) in *. + + TrivialExists. + simpl. f_equal. + + unfold insfl. + rewrite Risbitfield. + rewrite Rmask. + simpl. + unfold bitfield_maskl. + subst zstart. + rewrite Int.repr_unsigned. + reflexivity. + * TrivialExists. + + TrivialExists. + - destruct (is_bitfieldl _ _) eqn:Risbitfield. + + destruct (and_dec _ _) as [[Rmask Rnmask] | ]. + * rewrite Rnmask in *. + inv H. inv H0. inv H4. inv H6. inv H8. inv H3. inv H8. + inv H0. simpl in H7. inv H7. + set (zstop := (int64_highest_bit mask)) in *. + set (zstart := 0) in *. + + TrivialExists. simpl. f_equal. + unfold insfl. + rewrite Risbitfield. + rewrite Rmask. + simpl. + subst zstart. + f_equal. + destruct v0; simpl; trivial. + unfold Int.ltu, Int64.iwordsize', Int64.zwordsize, Int64.wordsize. + rewrite Int.unsigned_repr. + ** rewrite Int.unsigned_repr. + *** simpl. + rewrite Int64.shl'_zero. + reflexivity. + *** simpl. unfold Int.max_unsigned. unfold Int.modulus. + simpl. lia. + ** unfold Int.max_unsigned. unfold Int.modulus. + simpl. lia. + * TrivialExists. + + TrivialExists. +- TrivialExists. +Qed. + +Theorem eval_xorlimm: forall n, unary_constructor_sound (xorlimm n) (fun v => Val.xorl v (Vlong n)). +Proof. + unfold xorlimm; intros; red; intros. + predSpec Int64.eq Int64.eq_spec n Int64.zero. + - exists x; split; auto. subst. destruct x; simpl; auto. rewrite Int64.xor_zero; auto. + - predSpec Int64.eq Int64.eq_spec n Int64.mone. + -- subst n. intros. rewrite <- Val.notl_xorl. TrivialExists. + -- destruct (xorlimm_match a); InvEval; subst. + + econstructor; split. apply eval_longconst. simpl. rewrite Int64.xor_commut; auto. + + rewrite Val.xorl_assoc. simpl. rewrite (Int64.xor_commut n2). + predSpec Int64.eq Int64.eq_spec (Int64.xor n n2) Int64.zero. + * rewrite H. exists v1; split; auto. destruct v1; simpl; auto. rewrite Int64.xor_zero; auto. + * TrivialExists. + + TrivialExists. +Qed. + +Theorem eval_xorl: binary_constructor_sound xorl Val.xorl. +Proof. + unfold xorl; destruct Archi.splitlong. apply SplitLongproof.eval_xorl. + red; intros. destruct (xorl_match a b). +- InvEval. rewrite Val.xorl_commut. apply eval_xorlimm; auto. +- InvEval. apply eval_xorlimm; auto. +- TrivialExists. +Qed. + +Theorem eval_notl: unary_constructor_sound notl Val.notl. +Proof. + assert (forall v, Val.lessdef (Val.notl (Val.notl v)) v). + destruct v; simpl; auto. rewrite Int64.not_involutive; auto. + unfold notl; red; intros until x; case (notl_match a); intros; InvEval. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - subst x. exists (Val.andl v1 v0); split; trivial. + econstructor. constructor. eassumption. constructor. + eassumption. constructor. simpl. reflexivity. + - subst x. exists (Val.andl v1 (Vlong n)); split; trivial. + econstructor. constructor. eassumption. constructor. + simpl. reflexivity. + - subst x. exists (Val.orl v1 v0); split; trivial. + econstructor. constructor. eassumption. constructor. + eassumption. constructor. simpl. reflexivity. + - subst x. exists (Val.orl v1 (Vlong n)); split; trivial. + econstructor. constructor. eassumption. constructor. + simpl. reflexivity. + - subst x. exists (Val.xorl v1 v0); split; trivial. + econstructor. constructor. eassumption. constructor. + eassumption. constructor. simpl. reflexivity. + - subst x. exists (Val.xorl v1 (Vlong n)); split; trivial. + econstructor. constructor. eassumption. constructor. + simpl. reflexivity. + (* andn *) + - subst x. TrivialExists. simpl. + destruct v0; destruct v1; simpl; trivial. + f_equal. f_equal. + rewrite Int64.not_and_or_not. + rewrite Int64.not_involutive. + apply Int64.or_commut. + - subst x. TrivialExists. simpl. + destruct v1; simpl; trivial. + f_equal. f_equal. + rewrite Int64.not_and_or_not. + rewrite Int64.not_involutive. + reflexivity. + (* orn *) + - subst x. TrivialExists. simpl. + destruct v0; destruct v1; simpl; trivial. + f_equal. f_equal. + rewrite Int64.not_or_and_not. + rewrite Int64.not_involutive. + apply Int64.and_commut. + - subst x. TrivialExists. simpl. + destruct v1; simpl; trivial. + f_equal. f_equal. + rewrite Int64.not_or_and_not. + rewrite Int64.not_involutive. + reflexivity. + - subst x. exists v1; split; trivial. + - TrivialExists. + - TrivialExists. +Qed. + +Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls. +Proof. + unfold divls_base; red; intros. + eapply SplitLongproof.eval_divls_base; eauto. +Qed. + +Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls. +Proof. + unfold modls_base; red; intros. + eapply SplitLongproof.eval_modls_base; eauto. +Qed. + +Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu. +Proof. + unfold divlu_base; red; intros. + eapply SplitLongproof.eval_divlu_base; eauto. +Qed. + +Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu. +Proof. + unfold modlu_base; red; intros. + eapply SplitLongproof.eval_modlu_base; eauto. +Qed. + +Theorem eval_shrxlimm: + forall le a n x z, + eval_expr ge sp e m le a x -> + Val.shrxl x (Vint n) = Some z -> + exists v, eval_expr ge sp e m le (shrxlimm a n) v /\ Val.lessdef z v. +Proof. + unfold shrxlimm; intros. destruct Archi.splitlong eqn:SL. ++ eapply SplitLongproof.eval_shrxlimm; eauto using Archi.splitlong_ptr32. ++ predSpec Int.eq Int.eq_spec n Int.zero. +- subst n. destruct x; simpl in H0; inv H0. econstructor; split; eauto. + change (Int.ltu Int.zero (Int.repr 63)) with true. simpl. rewrite Int64.shrx'_zero; auto. +- TrivialExists. simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_cmplu: + forall c le a x b y v, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.cmplu (Mem.valid_pointer m) c x y = Some v -> + eval_expr ge sp e m le (cmplu c a b) v. +Proof. + unfold cmplu; intros. destruct Archi.splitlong eqn:SL. + eapply SplitLongproof.eval_cmplu; eauto using Archi.splitlong_ptr32. + unfold Val.cmplu in H1. + destruct (Val.cmplu_bool (Mem.valid_pointer m) c x y) as [vb|] eqn:C; simpl in H1; inv H1. + destruct (is_longconst a) as [n1|] eqn:LC1; destruct (is_longconst b) as [n2|] eqn:LC2; + try (assert (x = Vlong n1) by (eapply is_longconst_sound; eauto)); + try (assert (y = Vlong n2) by (eapply is_longconst_sound; eauto)); + subst. +- simpl in C; inv C. EvalOp. destruct (Int64.cmpu c n1 n2); reflexivity. +- EvalOp. simpl. rewrite Val.swap_cmplu_bool. rewrite C; auto. +- EvalOp. simpl; rewrite C; auto. +- EvalOp. simpl; rewrite C; auto. +Qed. + +Theorem eval_cmpl: + forall c le a x b y v, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.cmpl c x y = Some v -> + eval_expr ge sp e m le (cmpl c a b) v. +Proof. + unfold cmpl; intros. destruct Archi.splitlong eqn:SL. + eapply SplitLongproof.eval_cmpl; eauto. + unfold Val.cmpl in H1. + destruct (Val.cmpl_bool c x y) as [vb|] eqn:C; simpl in H1; inv H1. + destruct (is_longconst a) as [n1|] eqn:LC1; destruct (is_longconst b) as [n2|] eqn:LC2; + try (assert (x = Vlong n1) by (eapply is_longconst_sound; eauto)); + try (assert (y = Vlong n2) by (eapply is_longconst_sound; eauto)); + subst. +- simpl in C; inv C. EvalOp. destruct (Int64.cmp c n1 n2); reflexivity. +- EvalOp. simpl. rewrite Val.swap_cmpl_bool. rewrite C; auto. +- EvalOp. simpl; rewrite C; auto. +- EvalOp. simpl; rewrite C; auto. +Qed. + +Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat. +Proof. + unfold longoffloat; red; intros. destruct Archi.splitlong eqn:SL. + eapply SplitLongproof.eval_longoffloat; eauto. + TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat. +Proof. + unfold longuoffloat; red; intros. destruct Archi.splitlong eqn:SL. + eapply SplitLongproof.eval_longuoffloat; eauto. + TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong. +Proof. + unfold floatoflong; red; intros. destruct Archi.splitlong eqn:SL. + eapply SplitLongproof.eval_floatoflong; eauto. + TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu. +Proof. + unfold floatoflongu; red; intros. destruct Archi.splitlong eqn:SL. + eapply SplitLongproof.eval_floatoflongu; eauto. + TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle. +Proof. + unfold longofsingle; red; intros. + destruct x; simpl in H0; inv H0. destruct (Float32.to_long f) as [n|] eqn:EQ; simpl in H2; inv H2. + exploit eval_floatofsingle; eauto. intros (v & A & B). simpl in B. inv B. + apply Float32.to_long_double in EQ. + eapply eval_longoffloat; eauto. simpl. + change (Float.of_single f) with (Float32.to_double f); rewrite EQ; auto. +Qed. + +Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle. +Proof. + unfold longuofsingle; red; intros. (* destruct Archi.splitlong eqn:SL. *) + destruct x; simpl in H0; inv H0. destruct (Float32.to_longu f) as [n|] eqn:EQ; simpl in H2; inv H2. + exploit eval_floatofsingle; eauto. intros (v & A & B). simpl in B. inv B. + apply Float32.to_longu_double in EQ. + eapply eval_longuoffloat; eauto. simpl. + change (Float.of_single f) with (Float32.to_double f); rewrite EQ; auto. +Qed. + +Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong. +Proof. + unfold singleoflong; red; intros. (* destruct Archi.splitlong eqn:SL. *) + eapply SplitLongproof.eval_singleoflong; eauto. +(* TrivialExists. *) +Qed. + +Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu. +Proof. + unfold singleoflongu; red; intros. (* destruct Archi.splitlong eqn:SL. *) + eapply SplitLongproof.eval_singleoflongu; eauto. +(* TrivialExists. *) +Qed. + +End CMCONSTR. diff --git a/kvx/SelectOp.vp b/kvx/SelectOp.vp new file mode 100644 index 00000000..4e1087f9 --- /dev/null +++ b/kvx/SelectOp.vp @@ -0,0 +1,758 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + + +(** Instruction selection for operators *) + +(** The instruction selection pass recognizes opportunities for using + combined arithmetic and logical operations and addressing modes + offered by the target processor. For instance, the expression [x + 1] + can take advantage of the "immediate add" instruction of the processor, + and on the PowerPC, the expression [(x >> 6) & 0xFF] can be turned + into a "rotate and mask" instruction. + + This file defines functions for building CminorSel expressions and + statements, especially expressions consisting of operator + applications. These functions examine their arguments to choose + cheaper forms of operators whenever possible. + + For instance, [add e1 e2] will return a CminorSel expression semantically + equivalent to [Eop Oadd (e1 ::: e2 ::: Enil)], but will use a + [Oaddimm] operator if one of the arguments is an integer constant, + or suppress the addition altogether if one of the arguments is the + null integer. In passing, we perform operator reassociation + ([(e + c1) * c2] becomes [(e * c2) + (c1 * c2)]) and a small amount + of constant propagation. + + On top of the "smart constructor" functions defined below, + module [Selection] implements the actual instruction selection pass. +*) + +Require Archi. +Require Import Coqlib. +Require Import Compopts. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Op. +Require Import CminorSel. +Require Import OpHelpers. +Require Import ExtValues ExtFloats. +Require Import DecBoolOps. +Require Import Chunks. +Require Import Builtins. +Require Compopts. + +Local Open Scope cminorsel_scope. + +Local Open Scope string_scope. +Local Open Scope error_monad_scope. + +Section SELECT. + +Context {hf: helper_functions}. + +Inductive to_cond0 := +| Cond0_some : condition0 -> expr -> to_cond0 +| Cond0_none : to_cond0 +| Cond0_true : to_cond0 +| Cond0_false : to_cond0. + +Definition compu0 c e1 := + match c with + | Clt => Cond0_false + | Cge => Cond0_true + | _ => Cond0_some (Ccompu0 c) e1 + end. + +Definition complu0 c e1 := + match c with + | Clt => Cond0_false + | Cge => Cond0_true + | _ => Cond0_some (Ccomplu0 c) e1 + end. + +Nondetfunction cond_to_condition0 (cond : condition) (args : exprlist) := + match cond, args with + | (Ccompimm c x), (e1 ::: Enil) => + if Int.eq_dec x Int.zero + then Cond0_some (Ccomp0 c) e1 + else Cond0_none + + | (Ccompuimm c x), (e1 ::: Enil) => + if Int.eq_dec x Int.zero + then compu0 c e1 + else Cond0_none + + | (Ccomplimm c x), (e1 ::: Enil) => + if Int64.eq_dec x Int64.zero + then Cond0_some (Ccompl0 c) e1 + else Cond0_none + + | (Ccompluimm c x), (e1 ::: Enil) => + if Int64.eq_dec x Int64.zero + then complu0 c e1 + else Cond0_none + + | _, _ => Cond0_none + end. + +(** Ternary operator *) +Nondetfunction select0 (ty : typ) (cond0 : condition0) (e1 e2 e3: expr) := + match ty, cond0, e1, e2, e3 with + | Tint, cond0, e1, (Eop (Ointconst imm) Enil), e3 => + (Eop (Oselimm cond0 imm) (e1 ::: e3 ::: Enil)) + | Tint, cond0, (Eop (Ointconst imm) Enil), e2, e3 => + (Eop (Oselimm (negate_condition0 cond0) imm) (e2 ::: e3 ::: Enil)) + | Tlong, cond0, e1, (Eop (Olongconst imm) Enil), e3 => + (Eop (Osellimm cond0 imm) (e1 ::: e3 ::: Enil)) + | Tlong, cond0, (Eop (Olongconst imm) Enil), e2, e3 => + (Eop (Osellimm (negate_condition0 cond0) imm) (e2 ::: e3 ::: Enil)) + | _, _, _ => (Eop (Osel cond0 ty) (e1 ::: e2 ::: e3 ::: Enil)) + end. + +Definition same_expr_pure (e1 e2: expr) := + match e1, e2 with + | Evar v1, Evar v2 => if ident_eq v1 v2 then true else false + | _, _ => false + end. + +Definition select (ty : typ) (cond : condition) (args : exprlist) (e1 e2: expr) : option expr := + Some (if same_expr_pure e1 e2 then e1 else + match cond_to_condition0 cond args with + | Cond0_none => select0 ty (Ccomp0 Cne) e1 e2 (Eop (Ocmp cond) args) + | Cond0_some cond0 ec => select0 ty cond0 e1 e2 ec + | Cond0_true => e1 + | Cond0_false => e2 + end). + + +(** ** Constants **) + +Definition addrsymbol (id: ident) (ofs: ptrofs) := + Eop (Oaddrsymbol id ofs) Enil. + +Definition addrstack (ofs: ptrofs) := + Eop (Oaddrstack ofs) Enil. + +(** ** Integer addition and pointer addition *) + +Definition addimm_shlimm sh k2 e1 := + if Compopts.optim_addx tt + then + match shift1_4_of_z (Int.unsigned sh) with + | Some s14 => Eop (Oaddximm s14 k2) (e1:::Enil) + | None => Eop (Oaddimm k2) ((Eop (Oshlimm sh) (e1:::Enil)):::Enil) + end + else Eop (Oaddimm k2) ((Eop (Oshlimm sh) (e1:::Enil)):::Enil). + +Nondetfunction addimm (n: int) (e: expr) := + if Int.eq n Int.zero then e else + match e with + | Eop (Ointconst m) Enil => Eop (Ointconst (Int.add n m)) Enil + | Eop (Oaddrsymbol s m) Enil => Eop (Oaddrsymbol s (Ptrofs.add (Ptrofs.of_int n) m)) Enil + | Eop (Oaddrstack m) Enil => Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int n) m)) Enil + | Eop (Oaddimm m) (t ::: Enil) => Eop (Oaddimm(Int.add n m)) (t ::: Enil) + | Eop (Oaddximm sh m) (t ::: Enil) => Eop (Oaddximm sh (Int.add n m)) (t ::: Enil) + | Eop (Oshlimm sh) (t1:::Enil) => addimm_shlimm sh n t1 + | _ => Eop (Oaddimm n) (e ::: Enil) + end. + +Definition add_shlimm n e1 e2 := + if Compopts.optim_addx tt + then + match shift1_4_of_z (Int.unsigned n) with + | Some s14 => Eop (Oaddx s14) (e1:::e2:::Enil) + | None => Eop Oadd (e2:::(Eop (Oshlimm n) (e1:::Enil)):::Enil) + end + else Eop Oadd (e2:::(Eop (Oshlimm n) (e1:::Enil)):::Enil). + +Nondetfunction add (e1: expr) (e2: expr) := + match e1, e2 with + | Eop (Ointconst n1) Enil, t2 => addimm n1 t2 + | t1, Eop (Ointconst n2) Enil => addimm n2 t1 + | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) => + addimm (Int.add n1 n2) (Eop Oadd (t1:::t2:::Enil)) + | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddrstack n2) Enil => + Eop Oadd (Eop (Oaddrstack (Ptrofs.add (Ptrofs.of_int n1) n2)) Enil ::: t1 ::: Enil) + | Eop (Oaddrstack n1) Enil, Eop (Oaddimm n2) (t2:::Enil) => + Eop Oadd (Eop (Oaddrstack (Ptrofs.add n1 (Ptrofs.of_int n2))) Enil ::: t2 ::: Enil) + | Eop (Oaddimm n1) (t1:::Enil), t2 => + addimm n1 (Eop Oadd (t1:::t2:::Enil)) + | t1, Eop (Oaddimm n2) (t2:::Enil) => + addimm n2 (Eop Oadd (t1:::t2:::Enil)) + | t1, (Eop Omul (t2:::t3:::Enil)) => + if Compopts.optim_madd tt + then Eop Omadd (t1:::t2:::t3:::Enil) + else Eop Oadd (e1:::e2:::Enil) + | (Eop Omul (t2:::t3:::Enil)), t1 => + if Compopts.optim_madd tt + then Eop Omadd (t1:::t2:::t3:::Enil) + else Eop Oadd (e1:::e2:::Enil) + | t1, (Eop (Omulimm n) (t2:::Enil)) => + if Compopts.optim_madd tt + then Eop (Omaddimm n) (t1:::t2:::Enil) + else Eop Oadd (e1:::e2:::Enil) + | (Eop (Omulimm n) (t2:::Enil)), t1 => + if Compopts.optim_madd tt + then Eop (Omaddimm n) (t1:::t2:::Enil) + else Eop Oadd (e1:::e2:::Enil) + | (Eop (Oshlimm n) (t1:::Enil)), t2 => + add_shlimm n t1 t2 + | t2, (Eop (Oshlimm n) (t1:::Enil)) => + add_shlimm n t1 t2 + | _, _ => Eop Oadd (e1:::e2:::Enil) + end. + +(** ** Integer and pointer subtraction *) + +Nondetfunction sub (e1: expr) (e2: expr) := + match e1, e2 with + | t1, Eop (Ointconst n2) Enil => + addimm (Int.neg n2) t1 + | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) => + addimm (Int.sub n1 n2) (Eop Osub (t1:::t2:::Enil)) + | Eop (Oaddimm n1) (t1:::Enil), t2 => + addimm n1 (Eop Osub (t1:::t2:::Enil)) + | t1, Eop (Oaddimm n2) (t2:::Enil) => + addimm (Int.neg n2) (Eop Osub (t1:::t2:::Enil)) + | t1, (Eop Omul (t2:::t3:::Enil)) => + Eop Omsub (t1:::t2:::t3:::Enil) + | t1, (Eop (Omulimm n) (t2:::Enil)) => + if Compopts.optim_madd tt + then Eop (Omaddimm (Int.neg n)) (t1:::t2:::Enil) + else Eop Osub (e1:::e2:::Enil) + | _, _ => Eop Osub (e1:::e2:::Enil) + end. + +Nondetfunction negint (e: expr) := + match e with + | Eop (Ointconst n) Enil => Eop (Ointconst (Int.neg n)) Enil + | _ => Eop Oneg (e ::: Enil) + end. + +(** ** Immediate shifts *) + +Nondetfunction shlimm (e1: expr) (n: int) := + if Int.eq n Int.zero then + e1 + else if negb (Int.ltu n Int.iwordsize) then + Eop Oshl (e1 ::: Eop (Ointconst n) Enil ::: Enil) + else match e1 with + | Eop (Ointconst n1) Enil => + Eop (Ointconst (Int.shl n1 n)) Enil + | Eop (Oshlimm n1) (t1:::Enil) => + if Int.ltu (Int.add n n1) Int.iwordsize + then Eop (Oshlimm (Int.add n n1)) (t1:::Enil) + else Eop (Oshlimm n) (e1:::Enil) + | _ => + Eop (Oshlimm n) (e1:::Enil) + end. + +Nondetfunction shruimm (e1: expr) (n: int) := + if Int.eq n Int.zero then + e1 + else if negb (Int.ltu n Int.iwordsize) then + Eop Oshru (e1 ::: Eop (Ointconst n) Enil ::: Enil) + else match e1 with + | Eop (Ointconst n1) Enil => + Eop (Ointconst (Int.shru n1 n)) Enil + | Eop (Oshruimm n1) (t1:::Enil) => + if Int.ltu (Int.add n n1) Int.iwordsize + then Eop (Oshruimm (Int.add n n1)) (t1:::Enil) + else Eop (Oshruimm n) (e1:::Enil) + | Eop (Oshlimm n1) (t1:::Enil) => + let stop := Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one) in + let start := Z.sub (Z.add (Z.add (Int.unsigned n) stop) Z.one) Int.zwordsize in + if is_bitfield stop start + then Eop (Oextfz stop start) (t1:::Enil) + else Eop (Oshruimm n) (e1:::Enil) + | _ => + Eop (Oshruimm n) (e1:::Enil) + end. + +Nondetfunction shrimm (e1: expr) (n: int) := + if Int.eq n Int.zero then + e1 + else if negb (Int.ltu n Int.iwordsize) then + Eop Oshr (e1 ::: Eop (Ointconst n) Enil ::: Enil) + else match e1 with + | Eop (Ointconst n1) Enil => + Eop (Ointconst (Int.shr n1 n)) Enil + | Eop (Oshrimm n1) (t1:::Enil) => + if Int.ltu (Int.add n n1) Int.iwordsize + then Eop (Oshrimm (Int.add n n1)) (t1:::Enil) + else Eop (Oshrimm n) (e1:::Enil) + | Eop (Oshlimm n1) (t1:::Enil) => + let stop := Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one) in + let start := Z.sub (Z.add (Z.add (Int.unsigned n) stop) Z.one) Int.zwordsize in + if is_bitfield stop start + then Eop (Oextfs stop start) (t1:::Enil) + else Eop (Oshrimm n) (e1:::Enil) + | _ => + Eop (Oshrimm n) (e1:::Enil) + end. + +(** ** Integer multiply *) + +Definition mulimm_base (n1: int) (e2: expr) := + match Int.one_bits n1 with + | i :: nil => + shlimm e2 i + | i :: j :: nil => + Elet e2 (add (shlimm (Eletvar 0) i) (shlimm (Eletvar 0) j)) + | _ => + Eop (Omulimm n1) (e2 ::: Enil) + end. + +Nondetfunction mulimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil + else if Int.eq n1 Int.one then e2 + else match e2 with + | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.mul n1 n2)) Enil + | Eop (Oaddimm n2) (t2:::Enil) => addimm (Int.mul n1 n2) (mulimm_base n1 t2) + | _ => mulimm_base n1 e2 + end. + +Nondetfunction mul (e1: expr) (e2: expr) := + match e1, e2 with + | Eop (Ointconst n1) Enil, t2 => mulimm n1 t2 + | t1, Eop (Ointconst n2) Enil => mulimm n2 t1 + | _, _ => Eop Omul (e1:::e2:::Enil) + end. + +Definition mulhs (e1: expr) (e2: expr) := + if Archi.ptr64 then + Eop Olowlong + (Eop (Oshrlimm (Int.repr 32)) + (Eop Omull (Eop Ocast32signed (e1 ::: Enil) ::: + Eop Ocast32signed (e2 ::: Enil) ::: Enil) ::: Enil) + ::: Enil) + else + Eop Omulhs (e1 ::: e2 ::: Enil). + +Definition mulhu (e1: expr) (e2: expr) := + if Archi.ptr64 then + Eop Olowlong + (Eop (Oshrluimm (Int.repr 32)) + (Eop Omull (Eop Ocast32unsigned (e1 ::: Enil) ::: + Eop Ocast32unsigned (e2 ::: Enil) ::: Enil) ::: Enil) + ::: Enil) + else + Eop Omulhu (e1 ::: e2 ::: Enil). + +(** ** Bitwise and, or, xor *) + +Nondetfunction andimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil + else if Int.eq n1 Int.mone then e2 + else match e2 with + | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.and n1 n2)) Enil + | Eop (Oandimm n2) (t2:::Enil) => Eop (Oandimm (Int.and n1 n2)) (t2:::Enil) + | Eop Onot (t2:::Enil) => Eop (Oandnimm n1) (t2:::Enil) + | _ => Eop (Oandimm n1) (e2:::Enil) + end. + +Nondetfunction and (e1: expr) (e2: expr) := + match e1, e2 with + | Eop (Ointconst n1) Enil, t2 => andimm n1 t2 + | t1, Eop (Ointconst n2) Enil => andimm n2 t1 + | (Eop Onot (t1:::Enil)), t2 => Eop Oandn (t1:::t2:::Enil) + | t1, (Eop Onot (t2:::Enil)) => Eop Oandn (t2:::t1:::Enil) + | _, _ => Eop Oand (e1:::e2:::Enil) + end. + +Nondetfunction orimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then e2 + else if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil + else match e2 with + | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil + | Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil) + | Eop Onot (t2:::Enil) => Eop (Oornimm n1) (t2:::Enil) + | _ => Eop (Oorimm n1) (e2:::Enil) + end. + +Nondetfunction or (e1: expr) (e2: expr) := + match e1, e2 with + | Eop (Ointconst n1) Enil, t2 => orimm n1 t2 + | t1, Eop (Ointconst n2) Enil => orimm n2 t1 + | Eop (Oshlimm n1) (t1:::Enil), Eop (Oshruimm n2) (t2:::Enil) => + if Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2 + then Eop (Ororimm n2) (t1:::Enil) + else Eop Oor (e1:::e2:::Enil) + | Eop (Oshruimm n2) (t2:::Enil), Eop (Oshlimm n1) (t1:::Enil) => + if Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2 + then Eop (Ororimm n2) (t1:::Enil) + else Eop Oor (e1:::e2:::Enil) + | (Eop Onot (t1:::Enil)), t2 => Eop Oorn (t1:::t2:::Enil) + | t1, (Eop Onot (t2:::Enil)) => Eop Oorn (t2:::t1:::Enil) + | (Eop (Oandimm nmask) (prev:::Enil)), + (Eop (Oandimm mask) + ((Eop (Oshlimm start) (fld:::Enil)):::Enil)) => + let zstart := Int.unsigned start in + let zstop := int_highest_bit mask in + if is_bitfield zstop zstart + then + let mask' := Int.repr (zbitfield_mask zstop zstart) in + if and_dec (Int.eq_dec mask mask') + (Int.eq_dec nmask (Int.not mask')) + then Eop (Oinsf zstop zstart) (prev:::fld:::Enil) + else Eop Oor (e1:::e2:::Enil) + else Eop Oor (e1:::e2:::Enil) + | (Eop (Oandimm mask) + ((Eop (Oshlimm start) (fld:::Enil)):::Enil)), + (Eop (Oandimm nmask) (prev:::Enil)) => + let zstart := Int.unsigned start in + let zstop := int_highest_bit mask in + if is_bitfield zstop zstart + then + let mask' := Int.repr (zbitfield_mask zstop zstart) in + if and_dec (Int.eq_dec mask mask') + (Int.eq_dec nmask (Int.not mask')) + then Eop (Oinsf zstop zstart) (prev:::fld:::Enil) + else Eop Oor (e1:::e2:::Enil) + else Eop Oor (e1:::e2:::Enil) + | (Eop (Oandimm nmask) (prev:::Enil)), + (Eop (Oandimm mask) (fld:::Enil)) => + let zstart := 0 in + let zstop := int_highest_bit mask in + if is_bitfield zstop zstart + then + let mask' := Int.repr (zbitfield_mask zstop zstart) in + if and_dec (Int.eq_dec mask mask') + (Int.eq_dec nmask (Int.not mask')) + then Eop (Oinsf zstop zstart) (prev:::fld:::Enil) + else let zstart := 0 in + let zstop := int_highest_bit nmask in + if is_bitfield zstop zstart + then + let mask' := Int.repr (zbitfield_mask zstop zstart) in + if and_dec (Int.eq_dec nmask mask') + (Int.eq_dec mask (Int.not mask')) + then Eop (Oinsf zstop zstart) (fld:::prev:::Enil) + else Eop Oor (e1:::e2:::Enil) + else Eop Oor (e1:::e2:::Enil) + else Eop Oor (e1:::e2:::Enil) + | _, _ => Eop Oor (e1:::e2:::Enil) + end. + +Nondetfunction xorimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero + then e2 + else + if Int.eq n1 Int.mone + then Eop Onot (e2:::Enil) + else + match e2 with + | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.xor n1 n2)) Enil + | Eop (Oxorimm n2) (t2:::Enil) => + let n := Int.xor n1 n2 in + if Int.eq n Int.zero then t2 else Eop (Oxorimm n) (t2:::Enil) + | _ => Eop (Oxorimm n1) (e2:::Enil) + end. + +Nondetfunction xor (e1: expr) (e2: expr) := + match e1, e2 with + | Eop (Ointconst n1) Enil, t2 => xorimm n1 t2 + | t1, Eop (Ointconst n2) Enil => xorimm n2 t1 + | _, _ => Eop Oxor (e1:::e2:::Enil) + end. + +(** ** Integer logical negation *) + +Nondetfunction notint (e: expr) := + match e with + | Eop Oand (e1:::e2:::Enil) => Eop Onand (e1:::e2:::Enil) + | Eop (Oandimm n) (e1:::Enil) => Eop (Onandimm n) (e1:::Enil) + | Eop Oor (e1:::e2:::Enil) => Eop Onor (e1:::e2:::Enil) + | Eop (Oorimm n) (e1:::Enil) => Eop (Onorimm n) (e1:::Enil) + | Eop Oxor (e1:::e2:::Enil) => Eop Onxor (e1:::e2:::Enil) + | Eop (Oxorimm n) (e1:::Enil) => Eop (Onxorimm n) (e1:::Enil) + | Eop Onand (e1:::e2:::Enil) => Eop Oand (e1:::e2:::Enil) + | Eop (Onandimm n) (e1:::Enil) => Eop (Oandimm n) (e1:::Enil) + | Eop Onor (e1:::e2:::Enil) => Eop Oor (e1:::e2:::Enil) + | Eop (Onorimm n) (e1:::Enil) => Eop (Oorimm n) (e1:::Enil) + | Eop Onxor (e1:::e2:::Enil) => Eop Oxor (e1:::e2:::Enil) + | Eop (Onxorimm n) (e1:::Enil) => Eop (Oxorimm n) (e1:::Enil) + | Eop Oandn (e1:::e2:::Enil) => Eop Oorn (e2:::e1:::Enil) + | Eop (Oandnimm n) (e1:::Enil) => Eop (Oorimm (Int.not n)) (e1:::Enil) + | Eop Oorn (e1:::e2:::Enil) => Eop Oandn (e2:::e1:::Enil) + | Eop (Oornimm n) (e1:::Enil) => Eop (Oandimm (Int.not n)) (e1:::Enil) + | Eop Onot (e1:::Enil) => e1 + | Eop (Ointconst k) Enil => Eop (Ointconst (Int.not k)) Enil + | _ => Eop Onot (e:::Enil) + end. + +(** ** Integer division and modulus *) + +Definition divs_base (e1: expr) (e2: expr) := + Eexternal i32_sdiv sig_ii_i (e1 ::: e2 ::: Enil). + +Definition mods_base (e1: expr) (e2: expr) := + Eexternal i32_smod sig_ii_i (e1 ::: e2 ::: Enil). + +Definition divu_base (e1: expr) (e2: expr) := + Eexternal i32_udiv sig_ii_i (e1 ::: e2 ::: Enil). + +Definition modu_base (e1: expr) (e2: expr) := + Eexternal i32_umod sig_ii_i (e1 ::: e2 ::: Enil). + +Definition shrximm (e1: expr) (n2: int) := + if Int.eq n2 Int.zero then e1 else Eop (Oshrximm n2) (e1:::Enil). + +(* Alternate definition, not convenient for strength reduction during constant propagation *) +(* +(* n2 will be less than 31. *) + +Definition shrximm_inner (e1: expr) (n2: int) := + Eop (Oshruimm (Int.sub Int.iwordsize n2)) + ((Eop (Oshrimm (Int.repr (Int.zwordsize - 1))) + (e1 ::: Enil)) + ::: Enil). + +Definition shrximm (e1: expr) (n2: int) := + if Int.eq n2 Int.zero then e1 + else Eop (Oshrimm n2) + ((Eop Oadd (e1 ::: shrximm_inner e1 n2 ::: Enil)) + ::: Enil). +*) + +(** ** General shifts *) + +Nondetfunction shl (e1: expr) (e2: expr) := + match e2 with + | Eop (Ointconst n2) Enil => shlimm e1 n2 + | _ => Eop Oshl (e1:::e2:::Enil) + end. + +Nondetfunction shr (e1: expr) (e2: expr) := + match e2 with + | Eop (Ointconst n2) Enil => shrimm e1 n2 + | _ => Eop Oshr (e1:::e2:::Enil) + end. + +Nondetfunction shru (e1: expr) (e2: expr) := + match e2 with + | Eop (Ointconst n2) Enil => shruimm e1 n2 + | _ => Eop Oshru (e1:::e2:::Enil) + end. + +(** ** Floating-point arithmetic *) + +Definition negf (e: expr) := Eop Onegf (e ::: Enil). +Definition absf (e: expr) := Eop Oabsf (e ::: Enil). +Definition addf (e1 e2: expr) := Eop Oaddf (e1 ::: e2 ::: Enil). +Definition subf (e1 e2: expr) := Eop Osubf (e1 ::: e2 ::: Enil). +Definition mulf (e1 e2: expr) := Eop Omulf (e1 ::: e2 ::: Enil). + +Definition negfs (e: expr) := Eop Onegfs (e ::: Enil). +Definition absfs (e: expr) := Eop Oabsfs (e ::: Enil). +Definition addfs (e1 e2: expr) := Eop Oaddfs (e1 ::: e2 ::: Enil). +Definition subfs (e1 e2: expr) := Eop Osubfs (e1 ::: e2 ::: Enil). +Definition mulfs (e1 e2: expr) := Eop Omulfs (e1 ::: e2 ::: Enil). + +(** ** Comparisons *) + +Nondetfunction compimm (default: comparison -> int -> condition) + (sem: comparison -> int -> int -> bool) + (c: comparison) (e1: expr) (n2: int) := + match c, e1 with + | c, Eop (Ointconst n1) Enil => + Eop (Ointconst (if sem c n1 n2 then Int.one else Int.zero)) Enil + | Ceq, Eop (Ocmp c) el => + if Int.eq_dec n2 Int.zero then + Eop (Ocmp (negate_condition c)) el + else if Int.eq_dec n2 Int.one then + Eop (Ocmp c) el + else + Eop (Ointconst Int.zero) Enil + | Cne, Eop (Ocmp c) el => + if Int.eq_dec n2 Int.zero then + Eop (Ocmp c) el + else if Int.eq_dec n2 Int.one then + Eop (Ocmp (negate_condition c)) el + else + Eop (Ointconst Int.one) Enil + | _, _ => + Eop (Ocmp (default c n2)) (e1 ::: Enil) + end. + +Nondetfunction comp (c: comparison) (e1: expr) (e2: expr) := + match e1, e2 with + | Eop (Ointconst n1) Enil, t2 => + compimm Ccompimm Int.cmp (swap_comparison c) t2 n1 + | t1, Eop (Ointconst n2) Enil => + compimm Ccompimm Int.cmp c t1 n2 + | _, _ => + Eop (Ocmp (Ccomp c)) (e1 ::: e2 ::: Enil) + end. + +Nondetfunction compu (c: comparison) (e1: expr) (e2: expr) := + match e1, e2 with + | Eop (Ointconst n1) Enil, t2 => + compimm Ccompuimm Int.cmpu (swap_comparison c) t2 n1 + | t1, Eop (Ointconst n2) Enil => + compimm Ccompuimm Int.cmpu c t1 n2 + | _, _ => + Eop (Ocmp (Ccompu c)) (e1 ::: e2 ::: Enil) + end. + +Definition compf (c: comparison) (e1: expr) (e2: expr) := + Eop (Ocmp (Ccompf c)) (e1 ::: e2 ::: Enil). + +Definition compfs (c: comparison) (e1: expr) (e2: expr) := + Eop (Ocmp (Ccompfs c)) (e1 ::: e2 ::: Enil). + +(** ** Integer conversions *) + +Definition cast8unsigned (e: expr) := andimm (Int.repr 255) e. + +Nondetfunction cast8signed (e: expr) := + match e with + | Eop (Ointconst n) Enil => Eop (Ointconst (Int.sign_ext 8 n)) Enil + | _ => Eop Ocast8signed (e ::: Enil) + end. + +Definition cast16unsigned (e: expr) := andimm (Int.repr 65535) e. + +Nondetfunction cast16signed (e: expr) := + match e with + | Eop (Ointconst n) Enil => Eop (Ointconst (Int.sign_ext 16 n)) Enil + | _ => Eop Ocast16signed (e ::: Enil) + end. + +(** ** Floating-point conversions *) + +Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil). +Definition intuoffloat (e: expr) := Eop Ointuoffloat (e ::: Enil). + +Nondetfunction floatofintu (e: expr) := + match e with + | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil + | _ => Eop Ofloatoflongu ((Eop Ocast32unsigned (e ::: Enil)) ::: Enil) + end. + +Nondetfunction floatofint (e: expr) := + match e with + | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_int n)) Enil + | _ => Eop Ofloatoflong ((Eop Ocast32signed (e ::: Enil)) ::: Enil) + end. + +Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil). +Definition singleofint (e: expr) := Eop Osingleofint (e ::: Enil). + +Definition intuofsingle (e: expr) := Eop Ointuofsingle (e ::: Enil). +Definition singleofintu (e: expr) := Eop Osingleofintu (e ::: Enil). + +Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil). +Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil). + +(** ** Recognition of addressing modes for load and store operations *) + +Nondetfunction addressing (chunk: memory_chunk) (e: expr) := + match e with + | Eop (Oaddrstack n) Enil => (Ainstack n, Enil) + | Eop (Oaddrsymbol id ofs) Enil => + (if (orb (Archi.pic_code tt) (negb (Compopts.optim_globaladdrtmp tt))) + then (Aindexed Ptrofs.zero, e:::Enil) + else (Aglobal id ofs, Enil)) + | Eop (Oaddimm n) (e1:::Enil) => (Aindexed (Ptrofs.of_int n), e1:::Enil) + | Eop (Oaddlimm n) (e1:::Enil) => (Aindexed (Ptrofs.of_int64 n), e1:::Enil) + | Eop Oaddl (e1:::(Eop (Oshllimm scale) (e2:::Enil)):::Enil) => + (if Compopts.optim_xsaddr tt + then let zscale := Int.unsigned scale in + if Z.eq_dec zscale (zscale_of_chunk chunk) + then (Aindexed2XS zscale, e1:::e2:::Enil) + else (Aindexed2, e1:::(Eop (Oshllimm scale) (e2:::Enil)):::Enil) + else (Aindexed2, e1:::(Eop (Oshllimm scale) (e2:::Enil)):::Enil)) + | Eop (Oaddxl sh) (e1:::e2:::Enil) => + let zscale := ExtValues.z_of_shift1_4 sh in + let scale := Int.repr zscale in + (if Compopts.optim_xsaddr tt + then if Z.eq_dec zscale (zscale_of_chunk chunk) + then (Aindexed2XS zscale, e2:::e1:::Enil) + else (Aindexed2, e2:::(Eop (Oshllimm scale) (e1:::Enil)):::Enil) + else (Aindexed2, e2:::(Eop (Oshllimm scale) (e1:::Enil)):::Enil)) + | Eop Oaddl (e1:::e2:::Enil) => (Aindexed2, e1:::e2:::Enil) + | _ => (Aindexed Ptrofs.zero, e:::Enil) + end. + +(** ** Arguments of builtins *) + +Nondetfunction builtin_arg (e: expr) := + match e with + | Eop (Ointconst n) Enil => BA_int n + | Eop (Oaddrsymbol id ofs) Enil => BA_addrglobal id ofs + | Eop (Oaddrstack ofs) Enil => BA_addrstack ofs + | Eop Omakelong (Eop (Ointconst h) Enil ::: Eop (Ointconst l) Enil ::: Enil) => + BA_long (Int64.ofwords h l) + | Eop Omakelong (h ::: l ::: Enil) => BA_splitlong (BA h) (BA l) + | Eload chunk (Ainstack ofs) Enil => BA_loadstack chunk ofs + | Eop (Oaddimm n) (e1:::Enil) => + if Archi.ptr64 then BA e else BA_addptr (BA e1) (BA_int n) + | Eop (Oaddlimm n) (e1:::Enil) => + if Archi.ptr64 then BA_addptr (BA e1) (BA_long n) else BA e + | _ => BA e + end. + +(* float division *) + +Definition divf_base (e1: expr) (e2: expr) := + (* Eop Odivf (e1 ::: e2 ::: Enil). *) + Eexternal f64_div sig_ff_f (e1 ::: e2 ::: Enil). + +Definition divfs_base1 (e2 : expr) := + Eop Oinvfs (e2 ::: Enil). +Definition divfs_baseX (e1 : expr) (e2 : expr) := + (* Eop Odivf (e1 ::: e2 ::: Enil). *) + Eexternal f32_div sig_ss_s (e1 ::: e2 ::: Enil). + +Nondetfunction divfs_base (e1: expr) := + match e1 with + | Eop (Osingleconst f) Enil => + (if Float32.eq_dec f ExtFloat32.one + then divfs_base1 + else divfs_baseX e1) + | _ => divfs_baseX e1 + end. + +Nondetfunction gen_fma args := + match args with + | (Eop Onegf (e1:::Enil)):::e2:::e3:::Enil => Some (Eop Ofmsubf (e3:::e1:::e2:::Enil)) + | e1:::e2:::e3:::Enil => Some (Eop Ofmaddf (e3:::e1:::e2:::Enil)) + | _ => None + end. + +Nondetfunction gen_fmaf args := + match args with + | (Eop Onegfs (e1:::Enil)):::e2:::e3:::Enil => Some (Eop Ofmsubfs (e3:::e1:::e2:::Enil)) + | e1:::e2:::e3:::Enil => Some (Eop Ofmaddfs (e3:::e1:::e2:::Enil)) + | _ => None + end. + +Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr := + match b with + | BI_fmin => Some (Eop Ominf args) + | BI_fmax => Some (Eop Omaxf args) + | BI_fminf => Some (Eop Ominfs args) + | BI_fmaxf => Some (Eop Omaxfs args) + | BI_fma => gen_fma args + | BI_fmaf => gen_fmaf args + end. +End SELECT. + +(* Local Variables: *) +(* mode: coq *) +(* End: *) diff --git a/kvx/SelectOpproof.v b/kvx/SelectOpproof.v new file mode 100644 index 00000000..0ede1e2d --- /dev/null +++ b/kvx/SelectOpproof.v @@ -0,0 +1,1901 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Correctness of instruction selection for operators *) + +Require Import Builtins. +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import ExtValues. +Require Import Memory. +Require Import Globalenvs. +Require Import Cminor. +Require Import Op. +Require Import CminorSel. +Require Import Builtins1. +Require Import SelectOp. +Require Import Events. +Require Import OpHelpers. +Require Import OpHelpersproof. +Require Import DecBoolOps. +Require Import Lia. + +Local Open Scope cminorsel_scope. +Local Open Scope string_scope. + + +(** * Useful lemmas and tactics *) + +(** The following are trivial lemmas and custom tactics that help + perform backward (inversion) and forward reasoning over the evaluation + of operator applications. *) + +Ltac EvalOp := eapply eval_Eop; eauto with evalexpr. + +Ltac InvEval1 := + match goal with + | [ H: (eval_expr _ _ _ _ _ (Eop _ Enil) _) |- _ ] => + inv H; InvEval1 + | [ H: (eval_expr _ _ _ _ _ (Eop _ (_ ::: Enil)) _) |- _ ] => + inv H; InvEval1 + | [ H: (eval_expr _ _ _ _ _ (Eop _ (_ ::: _ ::: Enil)) _) |- _ ] => + inv H; InvEval1 + | [ H: (eval_exprlist _ _ _ _ _ Enil _) |- _ ] => + inv H; InvEval1 + | [ H: (eval_exprlist _ _ _ _ _ (_ ::: _) _) |- _ ] => + inv H; InvEval1 + | _ => + idtac + end. + +Ltac InvEval2 := + match goal with + | [ H: (eval_operation _ _ _ nil _ = Some _) |- _ ] => + simpl in H; inv H + | [ H: (eval_operation _ _ _ (_ :: nil) _ = Some _) |- _ ] => + simpl in H; FuncInv + | [ H: (eval_operation _ _ _ (_ :: _ :: nil) _ = Some _) |- _ ] => + simpl in H; FuncInv + | [ H: (eval_operation _ _ _ (_ :: _ :: _ :: nil) _ = Some _) |- _ ] => + simpl in H; FuncInv + | _ => + idtac + end. + +Ltac InvEval := InvEval1; InvEval2; InvEval2. + +Ltac TrivialExists := + match goal with + | [ |- exists v, _ /\ Val.lessdef ?a v ] => exists a; split; [EvalOp | auto] + end. + +(** * Correctness of the smart constructors *) + +Section CMCONSTR. +Variable prog: program. +Variable hf: helper_functions. +Hypothesis HELPERS: helper_functions_declared prog hf. +Let ge := Genv.globalenv prog. +Variable sp: val. +Variable e: env. +Variable m: mem. + +(* Helper lemmas - from SplitLongproof.v *) + +Ltac UseHelper := decompose [Logic.and] arith_helpers_correct; eauto. +Ltac DeclHelper := red in HELPERS; decompose [Logic.and] HELPERS; eauto. + +Lemma eval_helper: + forall le id name sg args vargs vres, + eval_exprlist ge sp e m le args vargs -> + helper_declared prog id name sg -> + external_implements name sg vargs vres -> + eval_expr ge sp e m le (Eexternal id sg args) vres. +Proof. + intros. + red in H0. apply Genv.find_def_symbol in H0. destruct H0 as (b & P & Q). + rewrite <- Genv.find_funct_ptr_iff in Q. + econstructor; eauto. +Qed. + +Corollary eval_helper_1: + forall le id name sg arg1 varg1 vres, + eval_expr ge sp e m le arg1 varg1 -> + helper_declared prog id name sg -> + external_implements name sg (varg1::nil) vres -> + eval_expr ge sp e m le (Eexternal id sg (arg1 ::: Enil)) vres. +Proof. + intros. eapply eval_helper; eauto. constructor; auto. constructor. +Qed. + +Corollary eval_helper_2: + forall le id name sg arg1 arg2 varg1 varg2 vres, + eval_expr ge sp e m le arg1 varg1 -> + eval_expr ge sp e m le arg2 varg2 -> + helper_declared prog id name sg -> + external_implements name sg (varg1::varg2::nil) vres -> + eval_expr ge sp e m le (Eexternal id sg (arg1 ::: arg2 ::: Enil)) vres. +Proof. + intros. eapply eval_helper; eauto. constructor; auto. constructor; auto. constructor. +Qed. + +(** We now show that the code generated by "smart constructor" functions + such as [Selection.notint] behaves as expected. Continuing the + [notint] example, we show that if the expression [e] + evaluates to some integer value [Vint n], then [Selection.notint e] + evaluates to a value [Vint (Int.not n)] which is indeed the integer + negation of the value of [e]. + + All proofs follow a common pattern: +- Reasoning by case over the result of the classification functions + (such as [add_match] for integer addition), gathering additional + information on the shape of the argument expressions in the non-default + cases. +- Inversion of the evaluations of the arguments, exploiting the additional + information thus gathered. +- Equational reasoning over the arithmetic operations performed, + using the lemmas from the [Int] and [Float] modules. +- Construction of an evaluation derivation for the expression returned + by the smart constructor. +*) + +Definition unary_constructor_sound (cstr: expr -> expr) (sem: val -> val) : Prop := + forall le a x, + eval_expr ge sp e m le a x -> + exists v, eval_expr ge sp e m le (cstr a) v /\ Val.lessdef (sem x) v. + +Definition binary_constructor_sound (cstr: expr -> expr -> expr) (sem: val -> val -> val) : Prop := + forall le a x b y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (cstr a b) v /\ Val.lessdef (sem x y) v. + +Theorem eval_addrsymbol: + forall le id ofs, + exists v, eval_expr ge sp e m le (addrsymbol id ofs) v /\ Val.lessdef (Genv.symbol_address ge id ofs) v. +Proof. + intros. unfold addrsymbol. econstructor; split. + EvalOp. simpl; eauto. + auto. +Qed. + +Theorem eval_addrstack: + forall le ofs, + exists v, eval_expr ge sp e m le (addrstack ofs) v /\ Val.lessdef (Val.offset_ptr sp ofs) v. +Proof. + intros. unfold addrstack. econstructor; split. + EvalOp. simpl; eauto. + auto. +Qed. + +Theorem eval_addimm_shlimm: + forall sh k2, unary_constructor_sound (addimm_shlimm sh k2) (fun x => ExtValues.addx sh x (Vint k2)). +Proof. + red; unfold addimm_shlimm; intros. + destruct (Compopts.optim_addx tt). + { + destruct (shift1_4_of_z (Int.unsigned sh)) as [s14 |] eqn:SHIFT. + - TrivialExists. simpl. + f_equal. + unfold shift1_4_of_z, int_of_shift1_4, z_of_shift1_4 in *. + destruct (Z.eq_dec _ _) as [e1|]. + { replace s14 with SHIFT1 by congruence. + destruct x; simpl; trivial. + replace (Int.ltu _ _) with true by reflexivity. + unfold Int.ltu. + rewrite e1. + replace (if zlt _ _ then true else false) with true by reflexivity. + rewrite <- e1. + rewrite Int.repr_unsigned. + reflexivity. + } + destruct (Z.eq_dec _ _) as [e2|]. + { replace s14 with SHIFT2 by congruence. + destruct x; simpl; trivial. + replace (Int.ltu _ _) with true by reflexivity. + unfold Int.ltu. + rewrite e2. + replace (if zlt _ _ then true else false) with true by reflexivity. + rewrite <- e2. + rewrite Int.repr_unsigned. + reflexivity. + } + destruct (Z.eq_dec _ _) as [e3|]. + { replace s14 with SHIFT3 by congruence. + destruct x; simpl; trivial. + replace (Int.ltu _ _) with true by reflexivity. + unfold Int.ltu. + rewrite e3. + replace (if zlt _ _ then true else false) with true by reflexivity. + rewrite <- e3. + rewrite Int.repr_unsigned. + reflexivity. + } + destruct (Z.eq_dec _ _) as [e4|]. + { replace s14 with SHIFT4 by congruence. + destruct x; simpl; trivial. + replace (Int.ltu _ _) with true by reflexivity. + unfold Int.ltu. + rewrite e4. + replace (if zlt _ _ then true else false) with true by reflexivity. + rewrite <- e4. + rewrite Int.repr_unsigned. + reflexivity. + } + discriminate. + - unfold addx. rewrite Val.add_commut. + TrivialExists. + repeat (try eassumption; try econstructor). + simpl. + reflexivity. + } + { unfold addx. rewrite Val.add_commut. + TrivialExists. + repeat (try eassumption; try econstructor). + simpl. + reflexivity. + } +Qed. + +Theorem eval_addimm: + forall n, unary_constructor_sound (addimm n) (fun x => Val.add x (Vint n)). +Proof. + red; unfold addimm; intros until x. + predSpec Int.eq Int.eq_spec n Int.zero. + - subst n. intros. exists x; split; auto. + destruct x; simpl; auto. + rewrite Int.add_zero; auto. + - case (addimm_match a); intros; InvEval; simpl. + + TrivialExists; simpl. rewrite Int.add_commut. auto. + + econstructor; split. EvalOp. simpl; eauto. + unfold Genv.symbol_address. destruct (Genv.find_symbol ge s); simpl; auto. + + econstructor; split. EvalOp. simpl; eauto. + destruct sp; simpl; auto. + + TrivialExists; simpl. subst x. rewrite Val.add_assoc. rewrite Int.add_commut. auto. + + TrivialExists; simpl. subst x. + destruct v1; simpl; trivial. + destruct (Int.ltu _ _); simpl; trivial. + rewrite Int.add_assoc. rewrite Int.add_commut. + reflexivity. + + pose proof eval_addimm_shlimm as ADDX. + unfold unary_constructor_sound in ADDX. + unfold addx in ADDX. + rewrite Val.add_commut. + subst x. + apply ADDX; assumption. + + TrivialExists. +Qed. + +Lemma eval_addx: forall n, binary_constructor_sound (add_shlimm n) (ExtValues.addx n). +Proof. + red. + intros. + unfold add_shlimm. + destruct (Compopts.optim_addx tt). + { + destruct (shift1_4_of_z (Int.unsigned n)) as [s14 |] eqn:SHIFT. + - TrivialExists. + simpl. + f_equal. f_equal. + unfold shift1_4_of_z, int_of_shift1_4, z_of_shift1_4 in *. + destruct (Z.eq_dec _ _) as [e1|]. + { replace s14 with SHIFT1 by congruence. + rewrite <- e1. + apply Int.repr_unsigned. } + destruct (Z.eq_dec _ _) as [e2|]. + { replace s14 with SHIFT2 by congruence. + rewrite <- e2. + apply Int.repr_unsigned. } + destruct (Z.eq_dec _ _) as [e3|]. + { replace s14 with SHIFT3 by congruence. + rewrite <- e3. + apply Int.repr_unsigned. } + destruct (Z.eq_dec _ _) as [e4|]. + { replace s14 with SHIFT4 by congruence. + rewrite <- e4. + apply Int.repr_unsigned. } + discriminate. + - TrivialExists; + repeat econstructor; eassumption. + } + { TrivialExists; + repeat econstructor; eassumption. + } +Qed. + +Theorem eval_add: binary_constructor_sound add Val.add. +Proof. + red; intros until y. + unfold add; case (add_match a b); intros; InvEval. + - rewrite Val.add_commut. apply eval_addimm; auto. + - apply eval_addimm; auto. + - subst. + replace (Val.add (Val.add v1 (Vint n1)) (Val.add v0 (Vint n2))) + with (Val.add (Val.add v1 v0) (Val.add (Vint n1) (Vint n2))). + apply eval_addimm. EvalOp. + repeat rewrite Val.add_assoc. decEq. apply Val.add_permut. + - subst. econstructor; split. + EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto. + rewrite Val.add_commut. destruct sp; simpl; auto. + destruct v1; simpl; auto. + - subst. econstructor; split. + EvalOp. constructor. EvalOp. simpl; eauto. constructor. eauto. constructor. simpl; eauto. + destruct sp; simpl; auto. + destruct v1; simpl; auto. + - subst. + replace (Val.add (Val.add v1 (Vint n1)) y) + with (Val.add (Val.add v1 y) (Vint n1)). + apply eval_addimm. EvalOp. + repeat rewrite Val.add_assoc. decEq. apply Val.add_commut. + - subst. + replace (Val.add x (Val.add v1 (Vint n2))) + with (Val.add (Val.add x v1) (Vint n2)). + apply eval_addimm. EvalOp. + repeat rewrite Val.add_assoc. reflexivity. + - (* Omadd *) + subst. destruct (Compopts.optim_madd tt); TrivialExists; + repeat (eauto; econstructor; simpl). + - (* Omadd rev *) + subst. destruct (Compopts.optim_madd tt); TrivialExists; + repeat (eauto; econstructor; simpl). + simpl. rewrite Val.add_commut. reflexivity. + - (* Omaddimm *) + subst. destruct (Compopts.optim_madd tt); TrivialExists; + repeat (eauto; econstructor; simpl). + - (* Omaddimm rev *) + subst. destruct (Compopts.optim_madd tt); TrivialExists; + repeat (eauto; econstructor; simpl). + simpl. rewrite Val.add_commut. reflexivity. + (* Oaddx *) + - subst. pose proof eval_addx as ADDX. + unfold binary_constructor_sound in ADDX. + rewrite Val.add_commut. + apply ADDX; assumption. + (* Oaddx *) + - subst. pose proof eval_addx as ADDX. + unfold binary_constructor_sound in ADDX. + apply ADDX; assumption. + - TrivialExists. +Qed. + +Theorem eval_sub: binary_constructor_sound sub Val.sub. +Proof. + red; intros until y. + unfold sub; case (sub_match a b); intros; InvEval. + - rewrite Val.sub_add_opp. apply eval_addimm; auto. + - subst. rewrite Val.sub_add_l. rewrite Val.sub_add_r. + rewrite Val.add_assoc. simpl. rewrite Int.add_commut. rewrite <- Int.sub_add_opp. + apply eval_addimm; EvalOp. + - subst. rewrite Val.sub_add_l. apply eval_addimm; EvalOp. + - subst. rewrite Val.sub_add_r. apply eval_addimm; EvalOp. + - TrivialExists. simpl. subst. reflexivity. + - destruct (Compopts.optim_madd tt). + + TrivialExists. simpl. subst. + rewrite sub_add_neg. + rewrite neg_mul_distr_r. + unfold Val.neg. + reflexivity. + + TrivialExists. repeat (eauto; econstructor). + simpl. subst. reflexivity. + - TrivialExists. +Qed. + +Theorem eval_negint: unary_constructor_sound negint (fun v => Val.sub Vzero v). +Proof. + red; intros until x. unfold negint. case (negint_match a); intros; InvEval. + TrivialExists. + TrivialExists. +Qed. + +Theorem eval_shlimm: + forall n, unary_constructor_sound (fun a => shlimm a n) + (fun x => Val.shl x (Vint n)). +Proof. + red; intros until x. unfold shlimm. + + predSpec Int.eq Int.eq_spec n Int.zero. + intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shl_zero; auto. + + destruct (Int.ltu n Int.iwordsize) eqn:LT; simpl. + destruct (shlimm_match a); intros; InvEval. + - exists (Vint (Int.shl n1 n)); split. EvalOp. + simpl. rewrite LT. auto. + - destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?. + + exists (Val.shl v1 (Vint (Int.add n n1))); split. EvalOp. + subst. destruct v1; simpl; auto. + rewrite Heqb. + destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto. + destruct (Int.ltu n Int.iwordsize) eqn:?; simpl; auto. + rewrite Int.add_commut. rewrite Int.shl_shl; auto. rewrite Int.add_commut; auto. + + subst. TrivialExists. econstructor. EvalOp. simpl; eauto. constructor. + simpl. auto. + - TrivialExists. + - intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor. + auto. +Qed. + +Theorem eval_shruimm: + forall n, unary_constructor_sound (fun a => shruimm a n) + (fun x => Val.shru x (Vint n)). +Proof. + red; intros until x. unfold shruimm. + + predSpec Int.eq Int.eq_spec n Int.zero. + intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shru_zero; auto. + + destruct (Int.ltu n Int.iwordsize) eqn:LT. + destruct (shruimm_match a); intros; InvEval. + - exists (Vint (Int.shru n1 n)); split. EvalOp. + simpl. rewrite LT; auto. + - destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?. + exists (Val.shru v1 (Vint (Int.add n n1))); split. EvalOp. + subst. destruct v1; simpl; auto. + rewrite Heqb. + destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto. + rewrite LT. rewrite Int.add_commut. rewrite Int.shru_shru; auto. rewrite Int.add_commut; auto. + subst. TrivialExists. econstructor. EvalOp. simpl; eauto. constructor. + simpl. auto. + - subst x. + simpl negb. + cbn iota. + destruct (is_bitfield _ _) eqn:BOUNDS. + + exists (extfz (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) + (Z.sub + (Z.add + (Z.add (Int.unsigned n) (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one))) + Z.one) Int.zwordsize) v1). + split. + ++ EvalOp. + ++ unfold extfz. + rewrite BOUNDS. + destruct v1; try (simpl; apply Val.lessdef_undef). + replace (Z.sub Int.zwordsize + (Z.add (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)) with (Int.unsigned n1) by lia. + replace (Z.sub Int.zwordsize + (Z.sub + (Z.add (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one) + (Z.sub + (Z.add + (Z.add (Int.unsigned n) (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one))) + Z.one) Int.zwordsize))) with (Int.unsigned n) by lia. + rewrite Int.repr_unsigned. + rewrite Int.repr_unsigned. + simpl. + destruct (Int.ltu n1 Int.iwordsize) eqn:Hltu_n1; simpl; trivial. + simpl. + destruct (Int.ltu n Int.iwordsize) eqn:Hltu_n; simpl; trivial. + + TrivialExists. constructor. econstructor. constructor. eassumption. constructor. simpl. reflexivity. constructor. simpl. reflexivity. + - TrivialExists. + - intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor. + auto. +Qed. + +Theorem eval_shrimm: + forall n, unary_constructor_sound (fun a => shrimm a n) + (fun x => Val.shr x (Vint n)). +Proof. + red; intros until x. unfold shrimm. + + predSpec Int.eq Int.eq_spec n Int.zero. + intros; subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.shr_zero; auto. + + destruct (Int.ltu n Int.iwordsize) eqn:LT. + destruct (shrimm_match a); intros; InvEval. + - exists (Vint (Int.shr n1 n)); split. EvalOp. + simpl. rewrite LT; auto. + - destruct (Int.ltu (Int.add n n1) Int.iwordsize) eqn:?. + exists (Val.shr v1 (Vint (Int.add n n1))); split. EvalOp. + subst. destruct v1; simpl; auto. + rewrite Heqb. + destruct (Int.ltu n1 Int.iwordsize) eqn:?; simpl; auto. + rewrite LT. + rewrite Int.add_commut. rewrite Int.shr_shr; auto. rewrite Int.add_commut; auto. + subst. TrivialExists. econstructor. EvalOp. simpl; eauto. constructor. + simpl. auto. + - subst x. + simpl negb. + cbn iota. + destruct (is_bitfield _ _) eqn:BOUNDS. + + exists (extfs (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) + (Z.sub + (Z.add + (Z.add (Int.unsigned n) (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one))) + Z.one) Int.zwordsize) v1). + split. + ++ EvalOp. + ++ unfold extfs. + rewrite BOUNDS. + destruct v1; try (simpl; apply Val.lessdef_undef). + replace (Z.sub Int.zwordsize + (Z.add (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one)) with (Int.unsigned n1) by lia. + replace (Z.sub Int.zwordsize + (Z.sub + (Z.add (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one)) Z.one) + (Z.sub + (Z.add + (Z.add (Int.unsigned n) (Z.sub Int.zwordsize (Z.add (Int.unsigned n1) Z.one))) + Z.one) Int.zwordsize))) with (Int.unsigned n) by lia. + rewrite Int.repr_unsigned. + rewrite Int.repr_unsigned. + simpl. + destruct (Int.ltu n1 Int.iwordsize) eqn:Hltu_n1; simpl; trivial. + simpl. + destruct (Int.ltu n Int.iwordsize) eqn:Hltu_n; simpl; trivial. + + TrivialExists. constructor. econstructor. constructor. eassumption. constructor. simpl. reflexivity. constructor. simpl. reflexivity. + - TrivialExists. + - intros; TrivialExists. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor. + auto. +Qed. + +Lemma eval_mulimm_base: + forall n, unary_constructor_sound (mulimm_base n) (fun x => Val.mul x (Vint n)). +Proof. + intros; red; intros; unfold mulimm_base. + + assert (DFL: exists v, eval_expr ge sp e m le (Eop Omul (Eop (Ointconst n) Enil ::: a ::: Enil)) v /\ Val.lessdef (Val.mul x (Vint n)) v). + TrivialExists. econstructor. EvalOp. simpl; eauto. econstructor. eauto. constructor. + rewrite Val.mul_commut. auto. + + generalize (Int.one_bits_decomp n). + generalize (Int.one_bits_range n). + destruct (Int.one_bits n). + - intros. TrivialExists. + - destruct l. + + intros. rewrite H1. simpl. + rewrite Int.add_zero. + replace (Vint (Int.shl Int.one i)) with (Val.shl Vone (Vint i)). rewrite Val.shl_mul. + apply eval_shlimm. auto. simpl. rewrite H0; auto with coqlib. + + destruct l. + intros. rewrite H1. simpl. + exploit (eval_shlimm i (x :: le) (Eletvar 0) x). constructor; auto. intros [v1 [A1 B1]]. + exploit (eval_shlimm i0 (x :: le) (Eletvar 0) x). constructor; auto. intros [v2 [A2 B2]]. + exploit (eval_add (x :: le)). eexact A1. eexact A2. intros [v [A B]]. + exists v; split. econstructor; eauto. + rewrite Int.add_zero. + replace (Vint (Int.add (Int.shl Int.one i) (Int.shl Int.one i0))) + with (Val.add (Val.shl Vone (Vint i)) (Val.shl Vone (Vint i0))). + rewrite Val.mul_add_distr_r. + repeat rewrite Val.shl_mul. eapply Val.lessdef_trans. 2: eauto. apply Val.add_lessdef; auto. + simpl. repeat rewrite H0; auto with coqlib. + intros. TrivialExists. +Qed. + +Theorem eval_mulimm: + forall n, unary_constructor_sound (mulimm n) (fun x => Val.mul x (Vint n)). +Proof. + intros; red; intros until x; unfold mulimm. + + predSpec Int.eq Int.eq_spec n Int.zero. + intros. exists (Vint Int.zero); split. EvalOp. + destruct x; simpl; auto. subst n. rewrite Int.mul_zero. auto. + + predSpec Int.eq Int.eq_spec n Int.one. + intros. exists x; split; auto. + destruct x; simpl; auto. subst n. rewrite Int.mul_one. auto. + + case (mulimm_match a); intros; InvEval. + - TrivialExists. simpl. rewrite Int.mul_commut; auto. + - subst. rewrite Val.mul_add_distr_l. + exploit eval_mulimm_base; eauto. instantiate (1 := n). intros [v' [A1 B1]]. + exploit (eval_addimm (Int.mul n n2) le (mulimm_base n t2) v'). auto. intros [v'' [A2 B2]]. + exists v''; split; auto. eapply Val.lessdef_trans. eapply Val.add_lessdef; eauto. + rewrite Val.mul_commut; auto. + - apply eval_mulimm_base; auto. +Qed. + +Theorem eval_mul: binary_constructor_sound mul Val.mul. +Proof. + red; intros until y. + unfold mul; case (mul_match a b); intros; InvEval. + rewrite Val.mul_commut. apply eval_mulimm. auto. + apply eval_mulimm. auto. + TrivialExists. +Qed. + +Theorem eval_mulhs: binary_constructor_sound mulhs Val.mulhs. +Proof. + red; intros. unfold mulhs; destruct Archi.ptr64 eqn:SF. +- econstructor; split. + EvalOp. constructor. EvalOp. constructor. EvalOp. constructor. EvalOp. simpl; eauto. + constructor. EvalOp. simpl; eauto. constructor. + simpl; eauto. constructor. simpl; eauto. constructor. simpl; eauto. + destruct x; simpl; auto. destruct y; simpl; auto. + change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl. + apply Val.lessdef_same. f_equal. + transitivity (Int.repr (Z.shiftr (Int.signed i * Int.signed i0) 32)). + unfold Int.mulhs; f_equal. rewrite Zbits.Zshiftr_div_two_p by lia. reflexivity. + apply Int.same_bits_eq; intros n N. + change Int.zwordsize with 32 in *. + assert (N1: 0 <= n < 64) by lia. + rewrite Int64.bits_loword by auto. + rewrite Int64.bits_shr' by auto. + change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64. + rewrite zlt_true by lia. + rewrite Int.testbit_repr by auto. + unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia). + transitivity (Z.testbit (Int.signed i * Int.signed i0) (n + 32)). + rewrite Z.shiftr_spec by lia. auto. + apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. + change Int64.zwordsize with 64; lia. +- TrivialExists. +Qed. + +Theorem eval_mulhu: binary_constructor_sound mulhu Val.mulhu. +Proof. + red; intros. unfold mulhu; destruct Archi.ptr64 eqn:SF. +- econstructor; split. + EvalOp. constructor. EvalOp. constructor. EvalOp. constructor. EvalOp. simpl; eauto. + constructor. EvalOp. simpl; eauto. constructor. + simpl; eauto. constructor. simpl; eauto. constructor. simpl; eauto. + destruct x; simpl; auto. destruct y; simpl; auto. + change (Int.ltu (Int.repr 32) Int64.iwordsize') with true; simpl. + apply Val.lessdef_same. f_equal. + transitivity (Int.repr (Z.shiftr (Int.unsigned i * Int.unsigned i0) 32)). + unfold Int.mulhu; f_equal. rewrite Zbits.Zshiftr_div_two_p by lia. reflexivity. + apply Int.same_bits_eq; intros n N. + change Int.zwordsize with 32 in *. + assert (N1: 0 <= n < 64) by lia. + rewrite Int64.bits_loword by auto. + rewrite Int64.bits_shru' by auto. + change (Int.unsigned (Int.repr 32)) with 32. change Int64.zwordsize with 64. + rewrite zlt_true by lia. + rewrite Int.testbit_repr by auto. + unfold Int64.mul. rewrite Int64.testbit_repr by (change Int64.zwordsize with 64; lia). + transitivity (Z.testbit (Int.unsigned i * Int.unsigned i0) (n + 32)). + rewrite Z.shiftr_spec by lia. auto. + apply Int64.same_bits_eqm. apply Int64.eqm_mult; apply Int64.eqm_unsigned_repr. + change Int64.zwordsize with 64; lia. +- TrivialExists. +Qed. + +Theorem eval_andimm: + forall n, unary_constructor_sound (andimm n) (fun x => Val.and x (Vint n)). +Proof. + intros; red; intros until x. unfold andimm. + + predSpec Int.eq Int.eq_spec n Int.zero. + intros. exists (Vint Int.zero); split. EvalOp. + destruct x; simpl; auto. subst n. rewrite Int.and_zero. auto. + + predSpec Int.eq Int.eq_spec n Int.mone. + intros. exists x; split; auto. + subst. destruct x; simpl; auto. rewrite Int.and_mone; auto. + + case (andimm_match a); intros. + - InvEval. TrivialExists. simpl. rewrite Int.and_commut; auto. + - InvEval. subst. rewrite Val.and_assoc. simpl. rewrite Int.and_commut. TrivialExists. + - InvEval. TrivialExists. simpl; congruence. + - TrivialExists. +Qed. + +Theorem eval_and: binary_constructor_sound and Val.and. +Proof. + red; intros until y; unfold and; case (and_match a b); intros; InvEval. + - rewrite Val.and_commut. apply eval_andimm; auto. + - apply eval_andimm; auto. + - (*andn*) TrivialExists; simpl; congruence. + - (*andn reverse*) rewrite Val.and_commut. TrivialExists; simpl; congruence. + - TrivialExists. +Qed. + +Theorem eval_orimm: + forall n, unary_constructor_sound (orimm n) (fun x => Val.or x (Vint n)). +Proof. + intros; red; intros until x. unfold orimm. + + predSpec Int.eq Int.eq_spec n Int.zero. + intros. subst. exists x; split; auto. + destruct x; simpl; auto. rewrite Int.or_zero; auto. + + predSpec Int.eq Int.eq_spec n Int.mone. + intros. exists (Vint Int.mone); split. EvalOp. + destruct x; simpl; auto. subst n. rewrite Int.or_mone. auto. + + destruct (orimm_match a); intros; InvEval. + - TrivialExists. simpl. rewrite Int.or_commut; auto. + - subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists. + - InvEval. TrivialExists. simpl; congruence. + - TrivialExists. +Qed. + + +Remark eval_same_expr: + forall a1 a2 le v1 v2, + same_expr_pure a1 a2 = true -> + eval_expr ge sp e m le a1 v1 -> + eval_expr ge sp e m le a2 v2 -> + a1 = a2 /\ v1 = v2. +Proof. + intros until v2. + destruct a1; simpl; try (intros; discriminate). + destruct a2; simpl; try (intros; discriminate). + case (ident_eq i i0); intros. + subst i0. inversion H0. inversion H1. split. auto. congruence. + discriminate. +Qed. + +Lemma int_eq_commut: forall x y : int, + (Int.eq x y) = (Int.eq y x). +Proof. + intros. + predSpec Int.eq Int.eq_spec x y; + predSpec Int.eq Int.eq_spec y x; + congruence. +Qed. + +Theorem eval_or: binary_constructor_sound or Val.or. +Proof. + unfold or; red; intros. + assert (DEFAULT: exists v, eval_expr ge sp e m le (Eop Oor (a:::b:::Enil)) v /\ Val.lessdef (Val.or x y) v) by TrivialExists. + assert (ROR: forall v n1 n2, + Int.add n1 n2 = Int.iwordsize -> + Val.lessdef (Val.or (Val.shl v (Vint n1)) (Val.shru v (Vint n2))) + (Val.ror v (Vint n2))). + { intros. destruct v; simpl; auto. + destruct (Int.ltu n1 Int.iwordsize) eqn:N1; auto. + destruct (Int.ltu n2 Int.iwordsize) eqn:N2; auto. + simpl. rewrite <- Int.or_ror; auto. } + + destruct (or_match a b); InvEval. + + - rewrite Val.or_commut. apply eval_orimm; auto. + - apply eval_orimm; auto. + - predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize; auto. + destruct (same_expr_pure t1 t2) eqn:?; auto. + InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst. + exists (Val.ror v0 (Vint n2)); split. EvalOp. apply ROR; auto. + - predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize; auto. + destruct (same_expr_pure t1 t2) eqn:?; auto. + InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst. + exists (Val.ror v1 (Vint n2)); split. EvalOp. rewrite Val.or_commut. apply ROR; auto. + - (*orn*) TrivialExists; simpl; congruence. + - (*orn reversed*) rewrite Val.or_commut. TrivialExists; simpl; congruence. + - set (zstop := (int_highest_bit mask)). + set (zstart := (Int.unsigned start)). + destruct (is_bitfield _ _) eqn:Risbitfield. + + destruct (and_dec _ _) as [[Rmask Rnmask] | ]. + * simpl in H6. + injection H6. + clear H6. + intro. subst y. subst x. + TrivialExists. simpl. f_equal. + unfold insf. + rewrite Risbitfield. + rewrite Rmask. + rewrite Rnmask. + simpl. + unfold bitfield_mask. + subst v0. + subst zstart. + rewrite Int.repr_unsigned. + reflexivity. + * apply DEFAULT. + + apply DEFAULT. + - set (zstop := (int_highest_bit mask)). + set (zstart := (Int.unsigned start)). + destruct (is_bitfield _ _) eqn:Risbitfield. + + destruct (and_dec _ _) as [[Rmask Rnmask] | ]. + * simpl in H6. + injection H6. + clear H6. + intro. subst y. subst x. + TrivialExists. + rewrite Val.or_commut. + simpl. f_equal. + unfold insf. + rewrite Risbitfield. + rewrite Rmask. + rewrite Rnmask. + simpl. + unfold bitfield_mask. + subst v1. + subst zstart. + rewrite Int.repr_unsigned. + reflexivity. + * apply DEFAULT. + + apply DEFAULT. + - set (zstop := (int_highest_bit mask)). + set (zstart := 0). + destruct (is_bitfield _ _) eqn:Risbitfield. + + destruct (and_dec _ _) as [[Rmask Rnmask] | ]. + * subst y. subst x. + TrivialExists. simpl. f_equal. + unfold insf. + rewrite Risbitfield. + rewrite Rmask. + rewrite Rnmask. + simpl. + unfold bitfield_mask. + subst zstart. + rewrite (Val.or_commut (Val.and v1 _)). + rewrite (Val.or_commut (Val.and v1 _)). + destruct v0; simpl; trivial. + unfold Int.ltu, Int.iwordsize, Int.zwordsize. + rewrite Int.unsigned_repr. + ** rewrite Int.unsigned_repr. + *** simpl. + rewrite Int.shl_zero. + reflexivity. + *** simpl. + unfold Int.max_unsigned, Int.modulus. + simpl. + lia. + ** unfold Int.max_unsigned, Int.modulus. + simpl. + lia. + * clear Risbitfield. clear o. + clear zstop. + set (zstop := (int_highest_bit nmask)). + destruct (is_bitfield _ _) eqn:Risbitfield. + ++ destruct (and_dec _ _) as [[Rmask Rnmask] | ]. + ** subst y. subst x. + TrivialExists. simpl. f_equal. + rewrite Val.or_commut. + unfold insf. + rewrite Risbitfield. + rewrite Rmask. + rewrite Rnmask. + simpl. + unfold bitfield_mask. + subst zstart. + rewrite (Val.or_commut (Val.and v0 _)). + rewrite (Val.or_commut (Val.and v0 _)). + destruct v1; simpl; trivial. + unfold Int.ltu, Int.iwordsize, Int.zwordsize. + rewrite Int.unsigned_repr. + *** rewrite Int.unsigned_repr. + **** simpl. + rewrite Int.shl_zero. + reflexivity. + **** simpl. + unfold Int.max_unsigned, Int.modulus. + simpl. + lia. + *** unfold Int.max_unsigned, Int.modulus. + simpl. + lia. + ** apply DEFAULT. + ++ apply DEFAULT. + + apply DEFAULT. + - apply DEFAULT. +Qed. + +Theorem eval_xorimm: + forall n, unary_constructor_sound (xorimm n) (fun x => Val.xor x (Vint n)). +Proof. + intros; red; intros until x. unfold xorimm. + + predSpec Int.eq Int.eq_spec n Int.zero. + - intros. exists x; split. auto. + destruct x; simpl; auto. subst n. rewrite Int.xor_zero. auto. + - predSpec Int.eq Int.eq_spec n Int.mone. + -- subst n. intros. rewrite <- Val.not_xor. TrivialExists. + -- intros. destruct (xorimm_match a); intros; InvEval. + + TrivialExists. simpl. rewrite Int.xor_commut; auto. + + subst. rewrite Val.xor_assoc. simpl. rewrite Int.xor_commut. + predSpec Int.eq Int.eq_spec (Int.xor n2 n) Int.zero. + * exists v1; split; auto. destruct v1; simpl; auto. rewrite H1, Int.xor_zero; auto. + * TrivialExists. + + TrivialExists. +Qed. + +Theorem eval_xor: binary_constructor_sound xor Val.xor. +Proof. + red; intros until y; unfold xor; case (xor_match a b); intros; InvEval. + - rewrite Val.xor_commut. apply eval_xorimm; auto. + - apply eval_xorimm; auto. + - TrivialExists. +Qed. + +Theorem eval_notint: unary_constructor_sound notint Val.notint. +Proof. + assert (forall v, Val.lessdef (Val.notint (Val.notint v)) v). + destruct v; simpl; auto. rewrite Int.not_involutive; auto. + unfold notint; red; intros until x; case (notint_match a); intros; InvEval. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - TrivialExists; simpl; congruence. + - subst x. exists (Val.and v1 v0); split; trivial. + econstructor. constructor. eassumption. constructor. + eassumption. constructor. simpl. reflexivity. + - subst x. exists (Val.and v1 (Vint n)); split; trivial. + econstructor. constructor. eassumption. constructor. + simpl. reflexivity. + - subst x. exists (Val.or v1 v0); split; trivial. + econstructor. constructor. eassumption. constructor. + eassumption. constructor. simpl. reflexivity. + - subst x. exists (Val.or v1 (Vint n)); split; trivial. + econstructor. constructor. eassumption. constructor. + simpl. reflexivity. + - subst x. exists (Val.xor v1 v0); split; trivial. + econstructor. constructor. eassumption. constructor. + eassumption. constructor. simpl. reflexivity. + - subst x. exists (Val.xor v1 (Vint n)); split; trivial. + econstructor. constructor. eassumption. constructor. + simpl. reflexivity. + (* andn *) + - subst x. TrivialExists. simpl. + destruct v0; destruct v1; simpl; trivial. + f_equal. f_equal. + rewrite Int.not_and_or_not. + rewrite Int.not_involutive. + apply Int.or_commut. + - subst x. TrivialExists. simpl. + destruct v1; simpl; trivial. + f_equal. f_equal. + rewrite Int.not_and_or_not. + rewrite Int.not_involutive. + reflexivity. + (* orn *) + - subst x. TrivialExists. simpl. + destruct v0; destruct v1; simpl; trivial. + f_equal. f_equal. + rewrite Int.not_or_and_not. + rewrite Int.not_involutive. + apply Int.and_commut. + - subst x. TrivialExists. simpl. + destruct v1; simpl; trivial. + f_equal. f_equal. + rewrite Int.not_or_and_not. + rewrite Int.not_involutive. + reflexivity. + - subst x. exists v1; split; trivial. + - TrivialExists. + - TrivialExists. +Qed. + +Theorem eval_divs_base: + forall le a b x y z, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.divs x y = Some z -> + exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v. +Proof. + intros; unfold divs_base. + econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto. +Qed. + +Theorem eval_mods_base: + forall le a b x y z, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.mods x y = Some z -> + exists v, eval_expr ge sp e m le (mods_base a b) v /\ Val.lessdef z v. +Proof. + intros; unfold mods_base. + econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto. +Qed. + +Theorem eval_divu_base: + forall le a b x y z, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.divu x y = Some z -> + exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v. +Proof. + intros; unfold divu_base. + econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto. +Qed. + +(* For using 64-bit unsigned division for 32-bit + + intros until z. + intros Hax Hby Hdiv. unfold divu_base. + pose proof (divu_is_divlu x y) as DIVU. + destruct (Val.divlu (Val.longofintu x) (Val.longofintu y)) + as [ ql | ] eqn:Ediv. + { TrivialExists. + { econstructor. eapply eval_helper_2; eauto. + { econstructor. econstructor. eassumption. + constructor. simpl. reflexivity. } + { econstructor. econstructor. eassumption. + constructor. simpl. reflexivity. } + { DeclHelper. } + { UseHelper. } + constructor. } + simpl. + congruence. + } + congruence. +Qed. + *) + +Theorem eval_modu_base: + forall le a b x y z, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + Val.modu x y = Some z -> + exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v. +Proof. + intros; unfold modu_base. + econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto. +Qed. + +(* for using 64-bit unsigned modulo for 32-bit + + intros until z. + intros Hax Hby Hmod. unfold modu_base. + pose proof (modu_is_modlu x y) as MODU. + destruct (Val.modlu (Val.longofintu x) (Val.longofintu y)) + as [ ql | ] eqn:Emod. + { TrivialExists. + { econstructor. eapply eval_helper_2; eauto. + { econstructor. econstructor. eassumption. + constructor. simpl. reflexivity. } + { econstructor. econstructor. eassumption. + constructor. simpl. reflexivity. } + { DeclHelper. } + { UseHelper. } + constructor. } + simpl. + congruence. + } + congruence. +Qed. + *) + +Theorem eval_shrximm: + forall le a n x z, + eval_expr ge sp e m le a x -> + Val.shrx x (Vint n) = Some z -> + exists v, eval_expr ge sp e m le (shrximm a n) v /\ Val.lessdef z v. +Proof. + intros. unfold shrximm. + predSpec Int.eq Int.eq_spec n Int.zero. + subst n. exists x; split; auto. + destruct x; simpl in H0; try discriminate. + destruct (Int.ltu Int.zero (Int.repr 31)); inv H0. + replace (Int.shrx i Int.zero) with i. auto. + unfold Int.shrx, Int.divs. rewrite Int.shl_zero. + change (Int.signed Int.one) with 1. rewrite Z.quot_1_r. rewrite Int.repr_signed; auto. + econstructor; split. EvalOp. + simpl. rewrite H0. simpl. reflexivity. auto. +Qed. + +Theorem eval_shl: binary_constructor_sound shl Val.shl. +Proof. + red; intros until y; unfold shl; case (shl_match b); intros. + InvEval. apply eval_shlimm; auto. + TrivialExists. +Qed. + +Theorem eval_shr: binary_constructor_sound shr Val.shr. +Proof. + red; intros until y; unfold shr; case (shr_match b); intros. + InvEval. apply eval_shrimm; auto. + TrivialExists. +Qed. + +Theorem eval_shru: binary_constructor_sound shru Val.shru. +Proof. + red; intros until y; unfold shru; case (shru_match b); intros. + InvEval. apply eval_shruimm; auto. + TrivialExists. +Qed. + +Theorem eval_negf: unary_constructor_sound negf Val.negf. +Proof. + red; intros. TrivialExists. +Qed. + +Theorem eval_absf: unary_constructor_sound absf Val.absf. +Proof. + red; intros. TrivialExists. +Qed. + +Theorem eval_addf: binary_constructor_sound addf Val.addf. +Proof. + red; intros; TrivialExists. +Qed. + +Theorem eval_subf: binary_constructor_sound subf Val.subf. +Proof. + red; intros; TrivialExists. +Qed. + +Theorem eval_mulf: binary_constructor_sound mulf Val.mulf. +Proof. + red; intros; TrivialExists. +Qed. + +Theorem eval_negfs: unary_constructor_sound negfs Val.negfs. +Proof. + red; intros. TrivialExists. +Qed. + +Theorem eval_absfs: unary_constructor_sound absfs Val.absfs. +Proof. + red; intros. TrivialExists. +Qed. + +Theorem eval_addfs: binary_constructor_sound addfs Val.addfs. +Proof. + red; intros; TrivialExists. +Qed. + +Theorem eval_subfs: binary_constructor_sound subfs Val.subfs. +Proof. + red; intros; TrivialExists. +Qed. + +Theorem eval_mulfs: binary_constructor_sound mulfs Val.mulfs. +Proof. + red; intros; TrivialExists. +Qed. + +Section COMP_IMM. + +Variable default: comparison -> int -> condition. +Variable intsem: comparison -> int -> int -> bool. +Variable sem: comparison -> val -> val -> val. + +Hypothesis sem_int: forall c x y, sem c (Vint x) (Vint y) = Val.of_bool (intsem c x y). +Hypothesis sem_undef: forall c v, sem c Vundef v = Vundef. +Hypothesis sem_eq: forall x y, sem Ceq (Vint x) (Vint y) = Val.of_bool (Int.eq x y). +Hypothesis sem_ne: forall x y, sem Cne (Vint x) (Vint y) = Val.of_bool (negb (Int.eq x y)). +Hypothesis sem_default: forall c v n, sem c v (Vint n) = Val.of_optbool (eval_condition (default c n) (v :: nil) m). + +Lemma eval_compimm: + forall le c a n2 x, + eval_expr ge sp e m le a x -> + exists v, eval_expr ge sp e m le (compimm default intsem c a n2) v + /\ Val.lessdef (sem c x (Vint n2)) v. +Proof. + intros until x. + unfold compimm; case (compimm_match c a); intros. +(* constant *) + - InvEval. rewrite sem_int. TrivialExists. simpl. destruct (intsem c0 n1 n2); auto. +(* eq cmp *) + - InvEval. inv H. simpl in H5. inv H5. + destruct (Int.eq_dec n2 Int.zero). + + subst n2. TrivialExists. + simpl. rewrite eval_negate_condition. + destruct (eval_condition c0 vl m); simpl. + unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto. + rewrite sem_undef; auto. + + destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists. + simpl. destruct (eval_condition c0 vl m); simpl. + unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_eq; auto. + rewrite sem_undef; auto. + exists (Vint Int.zero); split. EvalOp. + destruct (eval_condition c0 vl m); simpl. + unfold Vtrue, Vfalse. destruct b; rewrite sem_eq; rewrite Int.eq_false; auto. + rewrite sem_undef; auto. +(* ne cmp *) + - InvEval. inv H. simpl in H5. inv H5. + destruct (Int.eq_dec n2 Int.zero). + + subst n2. TrivialExists. + simpl. destruct (eval_condition c0 vl m); simpl. + unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto. + rewrite sem_undef; auto. + + destruct (Int.eq_dec n2 Int.one). subst n2. TrivialExists. + simpl. rewrite eval_negate_condition. destruct (eval_condition c0 vl m); simpl. + unfold Vtrue, Vfalse. destruct b; simpl; rewrite sem_ne; auto. + rewrite sem_undef; auto. + exists (Vint Int.one); split. EvalOp. + destruct (eval_condition c0 vl m); simpl. + unfold Vtrue, Vfalse. destruct b; rewrite sem_ne; rewrite Int.eq_false; auto. + rewrite sem_undef; auto. +(* default *) + - TrivialExists. simpl. rewrite sem_default. auto. +Qed. + +Hypothesis sem_swap: + forall c x y, sem (swap_comparison c) x y = sem c y x. + +Lemma eval_compimm_swap: + forall le c a n2 x, + eval_expr ge sp e m le a x -> + exists v, eval_expr ge sp e m le (compimm default intsem (swap_comparison c) a n2) v + /\ Val.lessdef (sem c (Vint n2) x) v. +Proof. + intros. rewrite <- sem_swap. eapply eval_compimm; eauto. +Qed. + +End COMP_IMM. + +Theorem eval_comp: + forall c, binary_constructor_sound (comp c) (Val.cmp c). +Proof. + intros; red; intros until y. unfold comp; case (comp_match a b); intros; InvEval. + eapply eval_compimm_swap; eauto. + intros. unfold Val.cmp. rewrite Val.swap_cmp_bool; auto. + eapply eval_compimm; eauto. + TrivialExists. +Qed. + +Theorem eval_compu: + forall c, binary_constructor_sound (compu c) (Val.cmpu (Mem.valid_pointer m) c). +Proof. + intros; red; intros until y. unfold compu; case (compu_match a b); intros; InvEval. + eapply eval_compimm_swap; eauto. + intros. unfold Val.cmpu. rewrite Val.swap_cmpu_bool; auto. + eapply eval_compimm; eauto. + TrivialExists. +Qed. + +Theorem eval_compf: + forall c, binary_constructor_sound (compf c) (Val.cmpf c). +Proof. + intros; red; intros. unfold compf. TrivialExists. +Qed. + +Theorem eval_compfs: + forall c, binary_constructor_sound (compfs c) (Val.cmpfs c). +Proof. + intros; red; intros. unfold compfs. TrivialExists. +Qed. + +Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8). +Proof. + red; intros until x. unfold cast8signed. case (cast8signed_match a); intros; InvEval. + TrivialExists. + TrivialExists. +Qed. + +Theorem eval_cast8unsigned: unary_constructor_sound cast8unsigned (Val.zero_ext 8). +Proof. + red; intros until x. unfold cast8unsigned. + rewrite Val.zero_ext_and. apply eval_andimm. compute; auto. discriminate. +Qed. + +Theorem eval_cast16signed: unary_constructor_sound cast16signed (Val.sign_ext 16). +Proof. + red; intros until x. unfold cast16signed. case (cast16signed_match a); intros; InvEval. + TrivialExists. + TrivialExists. +Qed. + +Theorem eval_cast16unsigned: unary_constructor_sound cast16unsigned (Val.zero_ext 16). +Proof. + red; intros until x. unfold cast8unsigned. + rewrite Val.zero_ext_and. apply eval_andimm. compute; auto. discriminate. +Qed. + +Theorem eval_intoffloat: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.intoffloat x = Some y -> + exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v. +Proof. + intros; unfold intoffloat. TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_intuoffloat: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.intuoffloat x = Some y -> + exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v. +Proof. + intros; unfold intuoffloat. TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_floatofintu: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.floatofintu x = Some y -> + exists v, eval_expr ge sp e m le (floatofintu a) v /\ Val.lessdef y v. +Proof. + intros. + unfold Val.floatofintu in *. + unfold floatofintu. + destruct (floatofintu_match a). + - InvEval. + TrivialExists. + - InvEval. + TrivialExists. + constructor. econstructor. constructor. eassumption. constructor. + simpl. f_equal. constructor. + simpl. + destruct x; simpl; trivial; try discriminate. + f_equal. + inv H0. + f_equal. + rewrite Float.of_intu_of_longu. + reflexivity. +Qed. + +Theorem eval_floatofint: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.floatofint x = Some y -> + exists v, eval_expr ge sp e m le (floatofint a) v /\ Val.lessdef y v. +Proof. + intros. + unfold floatofint. + destruct (floatofint_match a). + - InvEval. + TrivialExists. + - InvEval. + TrivialExists. + constructor. econstructor. constructor. eassumption. constructor. + simpl. f_equal. constructor. + simpl. + destruct x; simpl; trivial; try discriminate. + f_equal. + inv H0. + f_equal. + rewrite Float.of_int_of_long. + reflexivity. +Qed. + +Theorem eval_intofsingle: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.intofsingle x = Some y -> + exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v. +Proof. + intros; unfold intofsingle. TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_singleofint: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.singleofint x = Some y -> + exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v. +Proof. + intros; unfold singleofint; TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_intuofsingle: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.intuofsingle x = Some y -> + exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v. +Proof. + intros; unfold intuofsingle. TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_singleofintu: + forall le a x y, + eval_expr ge sp e m le a x -> + Val.singleofintu x = Some y -> + exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v. +Proof. + intros; unfold intuofsingle. TrivialExists. + simpl. rewrite H0. reflexivity. +Qed. + +Theorem eval_singleoffloat: unary_constructor_sound singleoffloat Val.singleoffloat. +Proof. + red; intros. unfold singleoffloat. TrivialExists. +Qed. + +Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle. +Proof. + red; intros. unfold floatofsingle. TrivialExists. +Qed. + +Theorem eval_addressing: + forall le chunk a v b ofs, + eval_expr ge sp e m le a v -> + v = Vptr b ofs -> + match addressing chunk a with (mode, args) => + exists vl, + eval_exprlist ge sp e m le args vl /\ + eval_addressing ge sp mode vl = Some v + end. +Proof. + intros until v. unfold addressing; case (addressing_match a); intros; InvEval. + - exists (@nil val); split. eauto with evalexpr. simpl. auto. + - destruct (orb _ _). + + exists (Vptr b ofs0 :: nil); split. + constructor. EvalOp. simpl. congruence. constructor. simpl. rewrite Ptrofs.add_zero. congruence. + + exists (@nil val); split. constructor. simpl; auto. + - exists (v1 :: nil); split. eauto with evalexpr. simpl. + destruct v1; simpl in H; try discriminate. + - exists (v1 :: nil); split. eauto with evalexpr. simpl. + destruct v1; simpl in H; try discriminate. destruct Archi.ptr64 eqn:SF; inv H. + simpl. auto. + - destruct (Compopts.optim_xsaddr tt). + + destruct (Z.eq_dec _ _). + * exists (v1 :: v2 :: nil); split. + repeat (constructor; auto). simpl. rewrite Int.repr_unsigned. destruct v2; simpl in *; congruence. + * exists (v1 :: v0 :: nil); split. + repeat (constructor; auto). econstructor. + repeat (constructor; auto). eassumption. simpl. congruence. + simpl. congruence. + + exists (v1 :: v0 :: nil); split. + repeat (constructor; auto). econstructor. + repeat (constructor; auto). eassumption. simpl. congruence. + simpl. congruence. + - unfold addxl in *. + destruct (Compopts.optim_xsaddr tt). + + unfold int_of_shift1_4 in *. + destruct (Z.eq_dec _ _). + * exists (v0 :: v1 :: nil); split. + repeat (constructor; auto). simpl. + congruence. + * eexists; split. + repeat (constructor; auto). eassumption. + econstructor. + repeat (constructor; auto). eassumption. simpl. + reflexivity. + simpl. congruence. + + eexists; split. + repeat (constructor; auto). eassumption. + econstructor. + repeat (constructor; auto). eassumption. simpl. + reflexivity. + simpl. unfold int_of_shift1_4 in *. congruence. + - exists (v1 :: v0 :: nil); split. repeat (constructor; auto). simpl. congruence. + - exists (v :: nil); split. eauto with evalexpr. subst. simpl. rewrite Ptrofs.add_zero; auto. +Qed. + +Theorem eval_builtin_arg: + forall a v, + eval_expr ge sp e m nil a v -> + CminorSel.eval_builtin_arg ge sp e m (builtin_arg a) v. +Proof. + intros until v. unfold builtin_arg; case (builtin_arg_match a); intros. +- InvEval. constructor. +- InvEval. constructor. +- InvEval. constructor. +- InvEval. simpl in H5. inv H5. constructor. +- InvEval. subst v. constructor; auto. +- inv H. InvEval. simpl in H6; inv H6. constructor; auto. +- destruct Archi.ptr64 eqn:SF. ++ constructor; auto. ++ InvEval. replace v with (if Archi.ptr64 then Val.addl v1 (Vint n) else Val.add v1 (Vint n)). + repeat constructor; auto. + rewrite SF; auto. +- destruct Archi.ptr64 eqn:SF. ++ InvEval. replace v with (if Archi.ptr64 then Val.addl v1 (Vlong n) else Val.add v1 (Vlong n)). + repeat constructor; auto. ++ constructor; auto. +- constructor; auto. +Qed. + +(* ternary *) +(* does not work due to possible nondeterminism +Lemma cond_to_condition0_correct : + forall cond : condition, + forall al : exprlist, + match (cond_to_condition0 cond al) with + | None => True + | Some(cond0, e1) => + forall le vl v1, + eval_expr ge sp e m le e1 v1 -> + eval_exprlist ge sp e m le al vl -> + (eval_condition0 cond0 v1 m) = (eval_condition cond vl m) + end. +Proof. + intros. + unfold cond_to_condition0. + case (cond_to_condition0_match cond al); trivial. + { + intros. + destruct (Int.eq_dec _ _); trivial. + intros until v1. + intros He1 Hel. + InvEval. + simpl. + f_equal. + eapply eval_expr_determ. eassumption. + } +Qed. +*) + +Lemma eval_neg_condition0: + forall cond0: condition0, + forall v1: val, + forall m: mem, + (eval_condition0 (negate_condition0 cond0) v1 m) = + option_map negb (eval_condition0 cond0 v1 m). +Proof. + intros. + destruct cond0; simpl; + try rewrite Val.negate_cmp_bool; + try rewrite Val.negate_cmpu_bool; + try rewrite Val.negate_cmpl_bool; + try rewrite Val.negate_cmplu_bool; + reflexivity. +Qed. + +Lemma select_neg: + forall a b c, + Val.select (option_map negb a) b c = + Val.select a c b. +Proof. + destruct a; simpl; trivial. + destruct b; simpl; trivial. +Qed. + +Lemma eval_select0: + forall le ty cond0 ac vc a1 v1 a2 v2, + eval_expr ge sp e m le ac vc -> + eval_expr ge sp e m le a1 v1 -> + eval_expr ge sp e m le a2 v2 -> + exists v, + eval_expr ge sp e m le (select0 ty cond0 a1 a2 ac) v + /\ Val.lessdef (Val.select (eval_condition0 cond0 vc m) v1 v2 ty) v. +Proof. + intros. + unfold select0. + destruct (select0_match ty cond0 a1 a2 ac). + all: InvEval; econstructor; split; + try repeat (try econstructor; try eassumption). + all: rewrite eval_neg_condition0; rewrite select_neg; constructor. +Qed. + +Lemma bool_cond0_ne: + forall ob : option bool, + forall m, + (eval_condition0 (Ccomp0 Cne) (Val.of_optbool ob) m) = ob. +Proof. + destruct ob; simpl; trivial. + intro. + destruct b; reflexivity. +Qed. + +Lemma eval_condition_ccomp_swap : + forall c x y m, + eval_condition (Ccomp (swap_comparison c)) (x :: y :: nil) m= + eval_condition (Ccomp c) (y :: x :: nil) m. +Proof. + intros; unfold eval_condition; + apply Val.swap_cmp_bool. +Qed. + +Lemma eval_condition_ccompu_swap : + forall c x y m, + eval_condition (Ccompu (swap_comparison c)) (x :: y :: nil) m= + eval_condition (Ccompu c) (y :: x :: nil) m. +Proof. + intros; unfold eval_condition; + apply Val.swap_cmpu_bool. +Qed. + +Lemma eval_condition_ccompl_swap : + forall c x y m, + eval_condition (Ccompl (swap_comparison c)) (x :: y :: nil) m= + eval_condition (Ccompl c) (y :: x :: nil) m. +Proof. + intros; unfold eval_condition; + apply Val.swap_cmpl_bool. +Qed. + +Lemma eval_condition_ccomplu_swap : + forall c x y m, + eval_condition (Ccomplu (swap_comparison c)) (x :: y :: nil) m= + eval_condition (Ccomplu c) (y :: x :: nil) m. +Proof. + intros; unfold eval_condition; + apply Val.swap_cmplu_bool. +Qed. + +Lemma int_ltu_zero : forall i, Int.ltu i Int.zero = false. +Proof. + intro. + unfold Int.ltu. + apply zlt_false. + rewrite Int.unsigned_zero. + pose proof (Int.unsigned_range i). + lia. +Qed. + +Lemma cmpu_bool_Clt : forall pred v0 b, + Val.cmpu_bool pred Clt v0 (Vint Int.zero) = Some b -> b = false. +Proof. + intros until b. intro CMP. + destruct v0; cbn in CMP; try discriminate. + inv CMP. + apply int_ltu_zero. +Qed. + +Lemma cmpu_bool_Cge : forall pred v0 b, + Val.cmpu_bool pred Cge v0 (Vint Int.zero) = Some b -> b = true. +Proof. + intros until b. intro CMP. + destruct v0; cbn in CMP; try discriminate. + inv CMP. + rewrite int_ltu_zero. + reflexivity. +Qed. + +Lemma int64_ltu_zero : forall i, Int64.ltu i Int64.zero = false. +Proof. + intro. + unfold Int64.ltu. + apply zlt_false. + rewrite Int64.unsigned_zero. + pose proof (Int64.unsigned_range i). + lia. +Qed. + +Lemma cmplu_bool_Clt : forall pred v0 b, + Val.cmplu_bool pred Clt v0 (Vlong Int64.zero) = Some b -> b = false. +Proof. + intros until b. intro CMP. + destruct v0; cbn in CMP; try discriminate. + { inv CMP. + apply int64_ltu_zero. + } + repeat rewrite if_same in CMP. + discriminate. +Qed. + +Lemma cmplu_bool_Cge : forall pred v0 b, + Val.cmplu_bool pred Cge v0 (Vlong Int64.zero) = Some b -> b = true. +Proof. + intros until b. intro CMP. + destruct v0; cbn in CMP; try discriminate. + { inv CMP. + rewrite int64_ltu_zero. + reflexivity. + } + repeat rewrite if_same in CMP. + discriminate. +Qed. + +Theorem eval_select: + forall le ty cond al vl a1 v1 a2 v2 a b, + select ty cond al a1 a2 = Some a -> + eval_exprlist ge sp e m le al vl -> + eval_expr ge sp e m le a1 v1 -> + eval_expr ge sp e m le a2 v2 -> + eval_condition cond vl m = Some b -> + exists v, + eval_expr ge sp e m le a v + /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v. +Proof. + unfold select. + intros until b. + intro Hop; injection Hop; clear Hop; intro; subst a. + intros HeL He1 He2 HeC. + destruct same_expr_pure eqn:SAME. + { + destruct (eval_same_expr a1 a2 le v1 v2 SAME He1 He2) as [EQ1 EQ2]. + subst a2. subst v2. + exists v1; split; trivial. + cbn. + rewrite if_same. + apply Val.lessdef_normalize. + } + unfold cond_to_condition0. + destruct (cond_to_condition0_match cond al). + { + InvEval. + rewrite <- HeC. + destruct (Int.eq_dec x Int.zero). + { subst x. + simpl. + change (Val.cmp_bool c v0 (Vint Int.zero)) + with (eval_condition0 (Ccomp0 c) v0 m). + eapply eval_select0; eassumption. + } + simpl. + erewrite <- (bool_cond0_ne (Val.cmp_bool c v0 (Vint x))). + eapply eval_select0; repeat (try econstructor; try eassumption). + } + { + InvEval. + rewrite <- HeC. + destruct (Int.eq_dec x Int.zero). + { subst x. + simpl. + change (Val.cmpu_bool (Mem.valid_pointer m) c v0 (Vint Int.zero)) + with (eval_condition0 (Ccompu0 c) v0 m). + destruct c. + all: try (eapply eval_select0; eassumption). + all: cbn. + all: cbn in HeC. + { + destruct (Val.cmpu_bool (Mem.valid_pointer m) Clt v0 (Vint Int.zero)) eqn:CMP; try discriminate. + inv HeC. + rewrite (cmpu_bool_Clt (Mem.valid_pointer m) v0 b CMP). + cbn. + exists v2. + split; auto using Val.lessdef_normalize. + } + { + destruct (Val.cmpu_bool (Mem.valid_pointer m) Cge v0 (Vint Int.zero)) eqn:CMP; try discriminate. + inv HeC. + rewrite (cmpu_bool_Cge (Mem.valid_pointer m) v0 b CMP). + cbn. + exists v1. + split; auto using Val.lessdef_normalize. + } + } + simpl. + erewrite <- (bool_cond0_ne (Val.cmpu_bool (Mem.valid_pointer m) c v0 (Vint x))). + eapply eval_select0; repeat (try econstructor; try eassumption). + } + { + InvEval. + rewrite <- HeC. + destruct (Int64.eq_dec x Int64.zero). + { subst x. + simpl. + change (Val.cmpl_bool c v0 (Vlong Int64.zero)) + with (eval_condition0 (Ccompl0 c) v0 m). + eapply eval_select0; eassumption. + } + simpl. + erewrite <- (bool_cond0_ne (Val.cmpl_bool c v0 (Vlong x))). + eapply eval_select0; repeat (try econstructor; try eassumption). + } + { + InvEval. + rewrite <- HeC. + destruct (Int64.eq_dec x Int64.zero). + { subst x. + simpl. + change (Val.cmplu_bool (Mem.valid_pointer m) c v0 (Vlong Int64.zero)) + with (eval_condition0 (Ccomplu0 c) v0 m). + destruct c. + all: try (eapply eval_select0; eassumption). + all: cbn. + all: cbn in HeC. + { + destruct (Val.cmplu_bool (Mem.valid_pointer m) Clt v0 (Vlong Int64.zero)) eqn:CMP; try discriminate. + inv HeC. + rewrite (cmplu_bool_Clt (Mem.valid_pointer m) v0 b CMP). + cbn. + exists v2. + split; auto using Val.lessdef_normalize. + } + { + destruct (Val.cmplu_bool (Mem.valid_pointer m) Cge v0 (Vlong Int64.zero)) eqn:CMP; try discriminate. + inv HeC. + rewrite (cmplu_bool_Cge (Mem.valid_pointer m) v0 b CMP). + cbn. + exists v1. + split; auto using Val.lessdef_normalize. + } + } + simpl. + erewrite <- (bool_cond0_ne (Val.cmplu_bool (Mem.valid_pointer m) c v0 (Vlong x))). + eapply eval_select0; repeat (try econstructor; try eassumption). + } + erewrite <- (bool_cond0_ne (Some b)). + eapply eval_select0; repeat (try econstructor; try eassumption). + rewrite <- HeC. + simpl. + reflexivity. +Qed. + +(* floating-point division *) +Theorem eval_divf_base: + forall le a b x y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (divf_base a b) v /\ Val.lessdef (Val.divf x y) v. +Proof. + intros; unfold divf_base. + econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto. +Qed. + + +Lemma eval_divfs_base1: + forall le a b x y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (divfs_base1 b) v /\ Val.lessdef (ExtValues.invfs y) v. +Proof. + intros; unfold divfs_base1. + econstructor; split. + repeat (try econstructor; try eassumption). + trivial. +Qed. + +Lemma eval_divfs_baseX: + forall le a b x y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (divfs_baseX a b) v /\ Val.lessdef (Val.divfs x y) v. +Proof. + intros; unfold divfs_base. + econstructor; split. eapply eval_helper_2; eauto. DeclHelper. UseHelper. auto. +Qed. + +Theorem eval_divfs_base: + forall le a b x y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (divfs_base a b) v /\ Val.lessdef (Val.divfs x y) v. +Proof. + intros; unfold divfs_base. + destruct (divfs_base_match _). + - destruct (Float32.eq_dec _ _). + + exists (Val.divfs x y). + split; trivial. repeat (try econstructor; try eassumption). + simpl. InvEval. reflexivity. + + apply eval_divfs_baseX; assumption. + - apply eval_divfs_baseX; assumption. +Qed. + +(** Platform-specific known builtins *) + +Lemma eval_fma: + forall al a vl v le, + gen_fma al = Some a -> + eval_exprlist ge sp e m le al vl -> + platform_builtin_sem BI_fma vl = Some v -> + exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'. +Proof. + unfold gen_fma. + intros until le. + intro Heval. + destruct (gen_fma_match _) in *; try discriminate. + all: inversion Heval; subst a; clear Heval; intro; InvEval. + - subst v1. + TrivialExists. + destruct v0; simpl; trivial; + destruct v2; simpl; trivial; + destruct v3; simpl; trivial. + - intro Heval. + simpl in Heval. + inv Heval. + TrivialExists. + destruct v0; simpl; trivial; + destruct v1; simpl; trivial; + destruct v2; simpl; trivial. +Qed. + +Lemma eval_fmaf: + forall al a vl v le, + gen_fmaf al = Some a -> + eval_exprlist ge sp e m le al vl -> + platform_builtin_sem BI_fmaf vl = Some v -> + exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'. +Proof. + unfold gen_fmaf. + intros until le. + intro Heval. + destruct (gen_fmaf_match _) in *; try discriminate. + all: inversion Heval; subst a; clear Heval; intro; InvEval. + - subst v1. + TrivialExists. + destruct v0; simpl; trivial; + destruct v2; simpl; trivial; + destruct v3; simpl; trivial. + - intro Heval. + simpl in Heval. + inv Heval. + TrivialExists. + destruct v0; simpl; trivial; + destruct v1; simpl; trivial; + destruct v2; simpl; trivial. +Qed. + +Theorem eval_platform_builtin: + forall bf al a vl v le, + platform_builtin bf al = Some a -> + eval_exprlist ge sp e m le al vl -> + platform_builtin_sem bf vl = Some v -> + exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'. +Proof. + destruct bf; intros until le; intro Heval. + all: try (inversion Heval; subst a; clear Heval; + exists v; split; trivial; + repeat (try econstructor; try eassumption)). + - apply eval_fma; assumption. + - apply eval_fmaf; assumption. +Qed. + +End CMCONSTR. diff --git a/kvx/Stacklayout.v b/kvx/Stacklayout.v new file mode 100644 index 00000000..05cfa1d7 --- /dev/null +++ b/kvx/Stacklayout.v @@ -0,0 +1,151 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Machine- and ABI-dependent layout information for activation records. *) + +Require Import Coqlib. +Require Import AST Memory Separation. +Require Import Bounds. +Require Import Lia. + +Local Open Scope sep_scope. + +(** The general shape of activation records is as follows, + from bottom (lowest offsets) to top: +- Space for outgoing arguments to function calls. +- Back link to parent frame +- Return address +- Saved values of callee-save registers used by the function. +- Local stack slots. +- Space for the stack-allocated data declared in Cminor. + +The stack pointer is kept 16-aligned. +*) + +Definition fe_ofs_arg := 0. + +Definition make_env (b: bounds) : frame_env := + let w := if Archi.ptr64 then 8 else 4 in + let olink := align (4 * b.(bound_outgoing)) w in (* back link *) + let oretaddr := olink + w in (* return address *) + let ocs := oretaddr + w in (* callee-saves *) + let ol := align (size_callee_save_area b ocs) 8 in (* locals *) + let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *) + let sz := align (ostkdata + b.(bound_stack_data)) 16 in + {| fe_size := sz; + fe_ofs_link := olink; + fe_ofs_retaddr := oretaddr; + fe_ofs_local := ol; + fe_ofs_callee_save := ocs; + fe_stack_data := ostkdata; + fe_used_callee_save := b.(used_callee_save) |}. + +Lemma frame_env_separated: + forall b sp m P, + let fe := make_env b in + m |= range sp 0 (fe_stack_data fe) ** range sp (fe_stack_data fe + bound_stack_data b) (fe_size fe) ** P -> + m |= range sp (fe_ofs_local fe) (fe_ofs_local fe + 4 * bound_local b) + ** range sp fe_ofs_arg (fe_ofs_arg + 4 * bound_outgoing b) + ** range sp (fe_ofs_link fe) (fe_ofs_link fe + size_chunk Mptr) + ** range sp (fe_ofs_retaddr fe) (fe_ofs_retaddr fe + size_chunk Mptr) + ** range sp (fe_ofs_callee_save fe) (size_callee_save_area b (fe_ofs_callee_save fe)) + ** P. +Proof. +Local Opaque Z.add Z.mul sepconj range. + intros; cbn. + set (w := if Archi.ptr64 then 8 else 4). + set (olink := align (4 * b.(bound_outgoing)) w). + set (oretaddr := olink + w). + set (ocs := oretaddr + w). + set (ol := align (size_callee_save_area b ocs) 8). + set (ostkdata := align (ol + 4 * b.(bound_local)) 8). + replace (size_chunk Mptr) with w by (rewrite size_chunk_Mptr; auto). + assert (0 < w) by (unfold w; destruct Archi.ptr64; lia). + generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros. + assert (0 <= 4 * b.(bound_outgoing)) by lia. + assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia). + assert (olink + w <= oretaddr) by (unfold oretaddr; lia). + assert (oretaddr + w <= ocs) by (unfold ocs; lia). + assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). + assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia). + assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia). +(* Reorder as: + outgoing + back link + retaddr + callee-save + local *) + rewrite sep_swap12. + rewrite sep_swap23. + rewrite sep_swap34. + rewrite sep_swap45. +(* Apply range_split and range_split2 repeatedly *) + unfold fe_ofs_arg. + apply range_split_2. fold olink; lia. lia. + apply range_split. lia. + apply range_split. lia. + apply range_split_2. fold ol. lia. lia. + apply range_drop_right with ostkdata. lia. + eapply sep_drop2. eexact H. +Qed. + +Lemma frame_env_range: + forall b, + let fe := make_env b in + 0 <= fe_stack_data fe /\ fe_stack_data fe + bound_stack_data b <= fe_size fe. +Proof. + intros; cbn. + set (w := if Archi.ptr64 then 8 else 4). + set (olink := align (4 * b.(bound_outgoing)) w). + set (oretaddr := olink + w). + set (ocs := oretaddr + w). + set (ol := align (size_callee_save_area b ocs) 8). + set (ostkdata := align (ol + 4 * b.(bound_local)) 8). + assert (0 < w) by (unfold w; destruct Archi.ptr64; lia). + generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros. + assert (0 <= 4 * b.(bound_outgoing)) by lia. + assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; lia). + assert (olink + w <= oretaddr) by (unfold oretaddr; lia). + assert (oretaddr + w <= ocs) by (unfold ocs; lia). + assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). + assert (size_callee_save_area b ocs <= ol) by (apply align_le; lia). + assert (ol + 4 * b.(bound_local) <= ostkdata) by (apply align_le; lia). + split. lia. apply align_le. lia. +Qed. + +Lemma frame_env_aligned: + forall b, + let fe := make_env b in + (8 | fe_ofs_arg) + /\ (8 | fe_ofs_local fe) + /\ (8 | fe_stack_data fe) + /\ (align_chunk Mptr | fe_ofs_link fe) + /\ (align_chunk Mptr | fe_ofs_retaddr fe). +Proof. + intros; cbn. + set (w := if Archi.ptr64 then 8 else 4). + set (olink := align (4 * b.(bound_outgoing)) w). + set (oretaddr := olink + w). + set (ocs := oretaddr + w). + set (ol := align (size_callee_save_area b ocs) 8). + set (ostkdata := align (ol + 4 * b.(bound_local)) 8). + assert (0 < w) by (unfold w; destruct Archi.ptr64; lia). + replace (align_chunk Mptr) with w by (rewrite align_chunk_Mptr; auto). + split. apply Z.divide_0_r. + split. apply align_divides; lia. + split. apply align_divides; lia. + split. apply align_divides; lia. + apply Z.divide_add_r. apply align_divides; lia. apply Z.divide_refl. +Qed. diff --git a/kvx/TargetPrinter.ml b/kvx/TargetPrinter.ml new file mode 100644 index 00000000..9e2e3776 --- /dev/null +++ b/kvx/TargetPrinter.ml @@ -0,0 +1,892 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* Printing RISC-V assembly code in asm syntax *) + +open Printf +open Camlcoq +open Sections +open AST +open Asm +open PrintAsmaux +open Fileinfo + +(* Module containing the printing functions *) + +module Target (*: TARGET*) = + struct + +(* Basic printing functions *) + + let comment = "#" + + type idiv_function_kind = + | Idiv_system + | Idiv_stsud + | Idiv_fp;; + + let idiv_function_kind = function + "stsud" -> Idiv_stsud + | "system" -> Idiv_system + | "fp" -> Idiv_fp + | _ -> failwith "unknown integer division kind";; + + let idiv_function_kind_32bit () = idiv_function_kind !Clflags.option_div_i32;; + let idiv_function_kind_64bit () = idiv_function_kind !Clflags.option_div_i64;; + + let subst_symbol = function + "__compcert_i64_udiv" -> + (match idiv_function_kind_64bit () with + | Idiv_system | Idiv_fp -> "__udivdi3" + | Idiv_stsud -> "__compcert_i64_udiv_stsud") + | "__compcert_i64_sdiv" -> + (match idiv_function_kind_64bit() with + | Idiv_system | Idiv_fp -> "__divdi3" + | Idiv_stsud -> "__compcert_i64_sdiv_stsud") + | "__compcert_i64_umod" -> + (match idiv_function_kind_64bit() with + | Idiv_system | Idiv_fp -> "__umoddi3" + | Idiv_stsud -> "__compcert_i64_umod_stsud") + | "__compcert_i64_smod" -> + (match idiv_function_kind_64bit() with + | Idiv_system | Idiv_fp -> "__moddi3" + | Idiv_stsud -> "__compcert_i64_smod_stsud") + | "__compcert_i32_sdiv" as s -> + (match idiv_function_kind_32bit() with + | Idiv_system -> s + | Idiv_fp -> "__compcert_i32_sdiv_fp" + | Idiv_stsud -> "__compcert_i32_sdiv_stsud") + | "__compcert_i32_udiv" as s -> + (match idiv_function_kind_32bit() with + | Idiv_system -> s + | Idiv_fp -> "__compcert_i32_udiv_fp" + | Idiv_stsud -> "__compcert_i32_udiv_stsud") + | "__compcert_i32_smod" as s -> + (match idiv_function_kind_32bit() with + | Idiv_system -> s + | Idiv_fp -> "__compcert_i32_smod_fp" + | Idiv_stsud -> "__compcert_i32_smod_stsud") + | "__compcert_i32_umod" as s -> + (match idiv_function_kind_32bit() with + | Idiv_system -> s + | Idiv_fp -> "__compcert_i32_umod_fp" + | Idiv_stsud -> "__compcert_i32_umod_stsud") + | "__compcert_f64_div" -> "__divdf3" + | "__compcert_f32_div" -> "__divsf3" + | x -> x;; + + let symbol oc symb = + fprintf oc "%s" (subst_symbol (extern_atom symb)) + + let symbol_offset oc (symb, ofs) = + symbol oc symb; + let ofs = camlint64_of_ptrofs ofs in + if ofs <> 0L then fprintf oc " + %Ld" ofs + + let label = elf_label + + let print_label oc lbl = label oc (transl_label lbl) + + let int_reg_name = let open Asmvliw in function + + | GPR0 -> "$r0" | GPR1 -> "$r1" | GPR2 -> "$r2" | GPR3 -> "$r3" + | GPR4 -> "$r4" | GPR5 -> "$r5" | GPR6 -> "$r6" | GPR7 -> "$r7" + | GPR8 -> "$r8" | GPR9 -> "$r9" | GPR10 -> "$r10" | GPR11 -> "$r11" + | GPR12 -> "$r12" | GPR13 -> "$r13" | GPR14 -> "$r14" | GPR15 -> "$r15" + | GPR16 -> "$r16" | GPR17 -> "$r17" | GPR18 -> "$r18" | GPR19 -> "$r19" + | GPR20 -> "$r20" | GPR21 -> "$r21" | GPR22 -> "$r22" | GPR23 -> "$r23" + | GPR24 -> "$r24" | GPR25 -> "$r25" | GPR26 -> "$r26" | GPR27 -> "$r27" + | GPR28 -> "$r28" | GPR29 -> "$r29" | GPR30 -> "$r30" | GPR31 -> "$r31" + | GPR32 -> "$r32" | GPR33 -> "$r33" | GPR34 -> "$r34" | GPR35 -> "$r35" + | GPR36 -> "$r36" | GPR37 -> "$r37" | GPR38 -> "$r38" | GPR39 -> "$r39" + | GPR40 -> "$r40" | GPR41 -> "$r41" | GPR42 -> "$r42" | GPR43 -> "$r43" + | GPR44 -> "$r44" | GPR45 -> "$r45" | GPR46 -> "$r46" | GPR47 -> "$r47" + | GPR48 -> "$r48" | GPR49 -> "$r49" | GPR50 -> "$r50" | GPR51 -> "$r51" + | GPR52 -> "$r52" | GPR53 -> "$r53" | GPR54 -> "$r54" | GPR55 -> "$r55" + | GPR56 -> "$r56" | GPR57 -> "$r57" | GPR58 -> "$r58" | GPR59 -> "$r59" + | GPR60 -> "$r60" | GPR61 -> "$r61" | GPR62 -> "$r62" | GPR63 -> "$r63" + + let ireg oc r = output_string oc (int_reg_name r) + + let int_gpreg_q_name = + let open Asmvliw in + function + | R0R1 -> "$r0r1" + | R2R3 -> "$r2r3" + | R4R5 -> "$r4r5" + | R6R7 -> "$r6r7" + | R8R9 -> "$r8r9" + | R10R11 -> "$r10r11" + | R12R13 -> "$r12r13" + | R14R15 -> "$r14r15" + | R16R17 -> "$r16r17" + | R18R19 -> "$r18r19" + | R20R21 -> "$r20r21" + | R22R23 -> "$r22r23" + | R24R25 -> "$r24r25" + | R26R27 -> "$r26r27" + | R28R29 -> "$r28r29" + | R30R31 -> "$r30r31" + | R32R33 -> "$r32r33" + | R34R35 -> "$r34r35" + | R36R37 -> "$r36r37" + | R38R39 -> "$r38r39" + | R40R41 -> "$r40r41" + | R42R43 -> "$r42r43" + | R44R45 -> "$r44r45" + | R46R47 -> "$r46r47" + | R48R49 -> "$r48r49" + | R50R51 -> "$r50r51" + | R52R53 -> "$r52r53" + | R54R55 -> "$r54r55" + | R56R57 -> "$r56r57" + | R58R59 -> "$r58r59" + | R60R61 -> "$r60r61" + | R62R63 -> "$r62r63" + + let int_gpreg_o_name = + let open Asmvliw in + function + | R0R1R2R3 -> "$r0r1r2r3" + | R4R5R6R7 -> "$r4r5r6r7" + | R8R9R10R11 -> "$r8r9r10r11" + | R12R13R14R15 -> "$r12r13r14r15" + | R16R17R18R19 -> "$r16r17r18r19" + | R20R21R22R23 -> "$r20r21r22r23" + | R24R25R26R27 -> "$r24r25r26r27" + | R28R29R30R31 -> "$r28r29r30r31" + | R32R33R34R35 -> "$r32r33r34r35" + | R36R37R38R39 -> "$r36r37r38r39" + | R40R41R42R43 -> "$r40r41r42r43" + | R44R45R46R47 -> "$r44r45r46r47" + | R48R49R50R51 -> "$r48r49r50r51" + | R52R53R54R55 -> "$r52r53r54r55" + | R56R57R58R59 -> "$r56r57r58r59" + | R60R61R62R63 -> "$r60r61r62r63";; + + let gpreg_q oc r = output_string oc (int_gpreg_q_name r) + let gpreg_o oc r = output_string oc (int_gpreg_o_name r) + + let preg oc = let open Asmvliw in function + | IR r -> ireg oc r + | RA -> output_string oc "$ra" + | _ -> assert false + + let preg_asm oc ty = preg oc + + let preg_annot = let open Asmvliw in function + | IR r -> int_reg_name r + | RA -> "$ra" + | _ -> assert false + + let scale_of_shift1_4 = let open ExtValues in function + | SHIFT1 -> 2 + | SHIFT2 -> 4 + | SHIFT3 -> 8 + | SHIFT4 -> 16;; + +(* Names of sections *) + + let name_of_section = function + | Section_text -> ".text" + | Section_data(Init, true) -> + ".section .tdata,\"awT\",@progbits" + | Section_data(Uninit, true) -> + ".section .tbss,\"awT\",@nobits" + | Section_data(Init_reloc, true) -> + failwith "Sylvain does not how to fix this" + | Section_data(i, false) | Section_small_data(i) -> + variable_section ~sec:".data" ~bss:".bss" i + | Section_const i | Section_small_const i -> + variable_section ~sec:".section .rodata" i + | Section_string -> ".section .rodata" + | Section_literal -> ".section .rodata" + | Section_jumptable -> ".section .rodata" + | Section_debug_info _ -> ".section .debug_info,\"\",%progbits" + | Section_debug_loc -> ".section .debug_loc,\"\",%progbits" + | Section_debug_abbrev -> ".section .debug_abbrev,\"\",%progbits" + | Section_debug_line _ -> ".section .debug_line,\"\",%progbits" + | Section_debug_ranges -> ".section .debug_ranges,\"\",%progbits" + | Section_debug_str -> ".section .debug_str,\"MS\",%progbits,1" + | Section_user(s, wr, ex) -> + sprintf ".section \"%s\",\"a%s%s\",%%progbits" + s (if wr then "w" else "") (if ex then "x" else "") + | Section_ais_annotation -> sprintf ".section \"__compcert_ais_annotations\",\"\",@note" + + let section oc sec = + fprintf oc " %s\n" (name_of_section sec) + +(* Associate labels to floating-point constants and to symbols. *) + + let print_tbl oc (lbl, tbl) = + fprintf oc " .balign 8\n"; + fprintf oc "%a:\n" label lbl; + List.iter + (fun l -> fprintf oc " .8byte %a\n" + print_label l) + tbl + + let emit_constants oc lit = + if exists_constants () then begin + section oc lit; + if Hashtbl.length literal64_labels > 0 then + begin + fprintf oc " .align 3\n"; + Hashtbl.iter + (fun bf lbl -> fprintf oc "%a: .quad 0x%Lx\n" label lbl bf) + literal64_labels + end; + if Hashtbl.length literal32_labels > 0 then + begin + fprintf oc " .align 2\n"; + Hashtbl.iter + (fun bf lbl -> + fprintf oc "%a: .long 0x%lx\n" label lbl bf) + literal32_labels + end; + reset_literals () + end + +(* Generate code to load the address of id + ofs in register r *) + + let loadsymbol oc r id ofs = + if Archi.pic_code () then begin + assert (ofs = Integers.Ptrofs.zero); + if C2C.atom_is_thread_local id then begin + (* fprintf oc " addd %a = $r13, @tprel(%s)\n" ireg r (extern_atom id) *) + fprintf oc " addd %a = $r13, @tlsle(%s)\n" ireg r (extern_atom id) + end else begin + fprintf oc " make %a = %s\n" ireg r (extern_atom id) + end + end else + begin + if C2C.atom_is_thread_local id then begin + (* fprintf oc " addd %a = $r13, @tprel(%a)\n" ireg r symbol_offset (id, ofs) *) + fprintf oc " addd %a = $r13, @tlsle(%a)\n" ireg r symbol_offset (id, ofs) + end else begin + fprintf oc " make %a = %a\n" ireg r symbol_offset (id, ofs) + end + end + +(* Emit .file / .loc debugging directives *) + + let print_file_line oc file line = + print_file_line oc comment file line + +(* + let print_location oc loc = + if loc <> Cutil.no_loc then print_file_line oc (fst loc) (snd loc) +*) + +(* Add "w" suffix to 32-bit instructions if we are in 64-bit mode *) + + (*let w oc = + if Archi.ptr64 then output_string oc "w" + *) + + (* Profiling *) + + + let kvx_profiling_stub oc nr_items + profiling_id_table_name + profiling_counter_table_name = + fprintf oc " make $r0 = %d\n" nr_items; + fprintf oc " make $r1 = %s\n" profiling_id_table_name; + fprintf oc " make $r2 = %s\n" profiling_counter_table_name; + fprintf oc " goto %s\n" profiling_write_table_helper; + fprintf oc " ;;\n";; + + (* Offset part of a load or store *) + + let offset oc n = ptrofs oc n + + let addressing oc = function + | AOff ofs -> offset oc ofs + | AReg ro | ARegXS ro -> ireg oc ro + + let xscale oc = function + | ARegXS _ -> fprintf oc ".xs" + | _ -> () + + let lsvariant oc = function + | TRAP -> () + | NOTRAP -> output_string oc ".s" + + let icond_name = let open Asmvliw in function + | ITne | ITneu -> "ne" + | ITeq | ITequ -> "eq" + | ITlt -> "lt" + | ITge -> "ge" + | ITle -> "le" + | ITgt -> "gt" + | ITltu -> "ltu" + | ITgeu -> "geu" + | ITleu -> "leu" + | ITgtu -> "gtu" + + let icond oc c = fprintf oc "%s" (icond_name c) + + let fcond_name = let open Asmvliw in function + | FTone -> "one" + | FTueq -> "ueq" + | FToeq -> "oeq" + | FTune -> "une" + | FTolt -> "olt" + | FTuge -> "uge" + | FToge -> "oge" + | FTult -> "ult" + + let fcond oc c = fprintf oc "%s" (fcond_name c) + + let bcond_name = let open Asmvliw in function + | BTwnez -> "wnez" + | BTweqz -> "weqz" + | BTwltz -> "wltz" + | BTwgez -> "wgez" + | BTwlez -> "wlez" + | BTwgtz -> "wgtz" + | BTdnez -> "dnez" + | BTdeqz -> "deqz" + | BTdltz -> "dltz" + | BTdgez -> "dgez" + | BTdlez -> "dlez" + | BTdgtz -> "dgtz" + + let bcond oc c = fprintf oc "%s" (bcond_name c) + +(* Printing of instructions *) + exception ShouldBeExpanded + + let print_instruction oc = function + (* Pseudo-instructions expanded in Asmexpand *) + | Pallocframe(sz, ofs) -> assert false + | Pfreeframe(sz, ofs) -> assert false + + (* Pseudo-instructions that remain *) + | Plabel lbl -> + fprintf oc "%a:\n" print_label lbl + | Ploadsymbol(rd, id, ofs) -> + loadsymbol oc rd id ofs + | Pbuiltin(ef, args, res) -> + begin match ef with + | EF_annot(kind,txt, targs) -> + begin match (P.to_int kind) with + | 1 -> let annot = annot_text preg_annot "x2" (camlstring_of_coqstring txt) args in + fprintf oc "%s annotation: %S\n" comment annot + (*| 2 -> let lbl = new_label () in + fprintf oc "%a: " label lbl; + add_ais_annot lbl preg_annot "x2" (camlstring_of_coqstring txt) args + *)| _ -> assert false + end + | EF_debug(kind, txt, targs) -> + print_debug_info comment print_file_line preg_annot "sp" oc + (P.to_int kind) (extern_atom txt) args + | EF_inline_asm(txt, sg, clob) -> + fprintf oc "%s begin inline assembly\n\t" comment; + print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res; + fprintf oc "%s end inline assembly\n" comment + | EF_profiling(id, coq_kind) -> + let kind = Z.to_int coq_kind in + assert (kind >= 0); + assert (kind <= 1); + fprintf oc "%s profiling %a %d\n" comment + Profilingaux.pp_id id kind; + fprintf oc " make $r63 = %s\n" profiling_counter_table_name; + fprintf oc " make $r62 = 1\n"; + fprintf oc " ;;\n"; + fprintf oc " afaddd %d[$r63] = $r62\n" + (profiling_offset id kind); + fprintf oc " ;;\n" + | _ -> + assert false + end + | Pnop -> (* FIXME fprintf oc " nop\n" *) () + | Psemi -> fprintf oc ";;\n" + + | Pclzll (rd, rs) -> fprintf oc " clzd %a = %a\n" ireg rd ireg rs + | Pclzw (rd, rs) -> fprintf oc " clzw %a = %a\n" ireg rd ireg rs + | Pctzll (rd, rs) -> fprintf oc " ctzd %a = %a\n" ireg rd ireg rs + | Pctzw (rd, rs) -> fprintf oc " ctzw %a = %a\n" ireg rd ireg rs + | Pstsud (rd, rs1, rs2) -> fprintf oc " stsud %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + + + (* Control flow instructions *) + | Pget (rd, rs) -> + fprintf oc " get %a = %a\n" ireg rd preg rs + | Pset (rd, rs) -> + fprintf oc " set %a = %a\n" preg rd ireg rs + | Pret -> + fprintf oc " ret \n" + | Pcall(s) -> + fprintf oc " call %a\n" symbol s + | Picall(rs) -> + fprintf oc " icall %a\n" ireg rs + | Pgoto(s) -> + fprintf oc " goto %a\n" symbol s + | Pigoto(rs) -> + fprintf oc " igoto %a\n" ireg rs + | Pj_l(s) -> + fprintf oc " goto %a\n" print_label s + | Pcb (bt, r, lbl) | Pcbu (bt, r, lbl) -> + fprintf oc " cb.%a %a? %a\n" bcond bt ireg r print_label lbl + + (* For builtins *) + | Ploopdo (r, lbl) -> + fprintf oc " loopdo %a, %a\n" ireg r print_label lbl + | Pgetn(n, dst) -> + fprintf oc " get %a = $s%ld\n" ireg dst (camlint_of_coqint n) + | Psetn(n, dst) -> + fprintf oc " set $s%ld = %a\n" (camlint_of_coqint n) ireg dst + | Pwfxl(n, dst) -> + fprintf oc " wfxl $s%ld = %a\n" (camlint_of_coqint n) ireg dst + | Pwfxm(n, dst) -> + fprintf oc " wfxm $s%ld = %a\n" (camlint_of_coqint n) ireg dst + | Pldu(dst, addr) -> + fprintf oc " ld.u %a = 0[%a]\n" ireg dst ireg addr + | Plbzu(dst, addr) -> + fprintf oc " lbz.u %a = 0[%a]\n" ireg dst ireg addr + | Plhzu(dst, addr) -> + fprintf oc " lhz.u %a = 0[%a]\n" ireg dst ireg addr + | Plwzu(dst, addr) -> + fprintf oc " lwz.u %a = 0[%a]\n" ireg dst ireg addr + | Pawait -> + fprintf oc " await\n" + | Psleep -> + fprintf oc " sleep\n" + | Pstop -> + fprintf oc " stop\n" + | Pbarrier -> + fprintf oc " barrier\n" + | Pfence -> + fprintf oc " fence\n" + | Pdinval -> + fprintf oc " dinval\n" + | Pdinvall addr -> + fprintf oc " dinvall 0[%a]\n" ireg addr + | Pdtouchl addr -> + fprintf oc " dtouchl 0[%a]\n" ireg addr + | Piinval -> + fprintf oc " iinval\n" + | Piinvals addr -> + fprintf oc " iinvals 0[%a]\n" ireg addr + | Pitouchl addr -> + fprintf oc " itouchl 0[%a]\n" ireg addr + | Pdzerol addr -> + fprintf oc " dzerol 0[%a]\n" ireg addr +(* | Pafaddd(addr, incr_res) -> + fprintfoc " afaddd 0[%a] = %a\n" ireg addr ireg incr_res + | Pafaddw(addr, incr_res) -> + fprintfoc " afaddw 0[%a] = %a\n" ireg addr ireg incr_res *) (* see #157 *) + | Palclrd(res, addr) -> + fprintf oc " alclrd %a = 0[%a]\n" ireg res ireg addr + | Palclrw(res, addr) -> + fprintf oc " alclrw %a = 0[%a]\n" ireg res ireg addr + | Pjumptable (idx_reg, tbl) -> + let lbl = new_label() in + (* jumptables := (lbl, tbl) :: !jumptables; *) + let base_reg = if idx_reg=Asmvliw.GPR63 then Asmvliw.GPR62 else Asmvliw.GPR63 in + fprintf oc "%s jumptable [ " comment; + List.iter (fun l -> fprintf oc "%a " print_label l) tbl; + fprintf oc "]\n"; + fprintf oc " make %a = %a\n ;;\n" ireg base_reg label lbl; + fprintf oc " ld.xs %a = %a[%a]\n ;;\n" ireg base_reg ireg idx_reg ireg base_reg; + fprintf oc " igoto %a\n ;;\n" ireg base_reg; + section oc Section_jumptable; + print_tbl oc (lbl, tbl); + section oc Section_text + + (* Load/Store instructions *) + | Plb(trap, rd, ra, adr) -> + fprintf oc " lbs%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra + | Plbu(trap, rd, ra, adr) -> + fprintf oc " lbz%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra + | Plh(trap, rd, ra, adr) -> + fprintf oc " lhs%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra + | Plhu(trap, rd, ra, adr) -> + fprintf oc " lhz%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra + | Plw(trap, rd, ra, adr) | Plw_a(trap, rd, ra, adr) | Pfls(trap, rd, ra, adr) -> + fprintf oc " lws%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra + | Pld(trap, rd, ra, adr) | Pfld(trap, rd, ra, adr) | Pld_a(trap, rd, ra, adr) -> assert Archi.ptr64; + fprintf oc " ld%a%a %a = %a[%a]\n" lsvariant trap xscale adr ireg rd addressing adr ireg ra + | Plq(rd, ra, adr) -> + fprintf oc " lq%a %a = %a[%a]\n" xscale adr gpreg_q rd addressing adr ireg ra + | Plo(rd, ra, adr) -> + fprintf oc " lo%a %a = %a[%a]\n" xscale adr gpreg_o rd addressing adr ireg ra + + | Psb(rd, ra, adr) -> + fprintf oc " sb%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd + | Psh(rd, ra, adr) -> + fprintf oc " sh%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd + | Psw(rd, ra, adr) | Psw_a(rd, ra, adr) | Pfss(rd, ra, adr) -> + fprintf oc " sw%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd + | Psd(rd, ra, adr) | Psd_a(rd, ra, adr) | Pfsd(rd, ra, adr) -> assert Archi.ptr64; + fprintf oc " sd%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd + | Psq(rd, ra, adr) -> + fprintf oc " sq%a %a[%a] = %a\n" xscale adr addressing adr ireg ra gpreg_q rd + | Pso(rd, ra, adr) -> + fprintf oc " so%a %a[%a] = %a\n" xscale adr addressing adr ireg ra gpreg_o rd + + (* Arith R instructions *) + + (* Arith RR instructions *) + | Pmv(rd, rs) -> + fprintf oc " addd %a = %a, 0\n" ireg rd ireg rs + | Pcvtl2w(rd, rs) -> assert false + | Pnegl(rd, rs) -> assert Archi.ptr64; + fprintf oc " negd %a = %a\n" ireg rd ireg rs + | Pnegw(rd, rs) -> + fprintf oc " negw %a = %a\n" ireg rd ireg rs + | Psxwd(rd, rs) -> + fprintf oc " sxwd %a = %a\n" ireg rd ireg rs + | Pzxwd(rd, rs) -> + fprintf oc " zxwd %a = %a\n" ireg rd ireg rs + | Pextfz(rd, rs, stop, start) | Pextfzl(rd, rs, stop, start) -> + fprintf oc " extfz %a = %a, %ld, %ld\n" ireg rd ireg rs (camlint_of_coqint stop) (camlint_of_coqint start) + | Pextfs(rd, rs, stop, start) | Pextfsl(rd, rs, stop, start) -> + fprintf oc " extfs %a = %a, %ld, %ld\n" ireg rd ireg rs (camlint_of_coqint stop) (camlint_of_coqint start) + | Pinsf(rd, rs, stop, start) | Pinsfl(rd, rs, stop, start) -> + fprintf oc " insf %a = %a, %ld, %ld\n" ireg rd ireg rs (camlint_of_coqint stop) (camlint_of_coqint start) + | Pfabsd(rd, rs) -> + fprintf oc " fabsd %a = %a\n" ireg rd ireg rs + | Pfabsw(rd, rs) -> + fprintf oc " fabsw %a = %a\n" ireg rd ireg rs + | Pfnegd(rd, rs) -> + fprintf oc " fnegd %a = %a\n" ireg rd ireg rs + | Pfnegw(rd, rs) -> + fprintf oc " fnegw %a = %a\n" ireg rd ireg rs + | Pfnarrowdw(rd, rs) -> + fprintf oc " fnarrowdw %a = %a\n" ireg rd ireg rs + | Pfwidenlwd(rd, rs) -> + fprintf oc " fwidenlwd %a = %a\n" ireg rd ireg rs + | Pfloatuwrnsz(rd, rs) -> + fprintf oc " floatuw.rn.s %a = %a, 0\n" ireg rd ireg rs + | Pfloatwrnsz(rd, rs) -> + fprintf oc " floatw.rn.s %a = %a, 0\n" ireg rd ireg rs + | Pfloatudrnsz(rd, rs) -> + fprintf oc " floatud.rn.s %a = %a, 0\n" ireg rd ireg rs + | Pfloatdrnsz(rd, rs) -> + fprintf oc " floatd.rn.s %a = %a, 0\n" ireg rd ireg rs + | Pfixedwrzz(rd, rs) -> + fprintf oc " fixedw.rz %a = %a, 0\n" ireg rd ireg rs + | Pfixeduwrzz(rd, rs) -> + fprintf oc " fixeduw.rz %a = %a, 0\n" ireg rd ireg rs + | Pfixeddrzz(rd, rs) | Pfixeddrzz_i32(rd, rs) -> + fprintf oc " fixedd.rz %a = %a, 0\n" ireg rd ireg rs + | Pfixedudrzz(rd, rs) | Pfixedudrzz_i32(rd, rs) -> + fprintf oc " fixedud.rz %a = %a, 0\n" ireg rd ireg rs + + (* Arith RI32 instructions *) + | Pmake (rd, imm) -> + fprintf oc " make %a, %a\n" ireg rd coqint imm + + (* Arith RI64 instructions *) + | Pmakel (rd, imm) -> + fprintf oc " make %a, %a\n" ireg rd coqint64 imm + + (* Arith RF32 instructions *) + | Pmakefs (rd, f) -> + let d = Floats.Float32.to_bits f in + fprintf oc " make %a, %a %s %.18g\n" + ireg rd coqint d comment (camlfloat_of_coqfloat32 f) + + (* Arith RF64 instructions *) + | Pmakef (rd, f) -> + let d = Floats.Float.to_bits f in + fprintf oc " make %a, %a %s %.18g\n" + ireg rd coqint64 d comment (camlfloat_of_coqfloat f) + + (* Arith RRR instructions *) + | Pcompw (it, rd, rs1, rs2) -> + fprintf oc " compw.%a %a = %a, %a\n" icond it ireg rd ireg rs1 ireg rs2 + | Pcompl (it, rd, rs1, rs2) -> + fprintf oc " compd.%a %a = %a, %a\n" icond it ireg rd ireg rs1 ireg rs2 + + | Pfcompw (ft, rd, rs1, rs2) -> + fprintf oc " fcompw.%a %a = %a, %a\n" fcond ft ireg rd ireg rs1 ireg rs2 + | Pfcompl (ft, rd, rs1, rs2) -> + fprintf oc " fcompd.%a %a = %a, %a\n" fcond ft ireg rd ireg rs1 ireg rs2 + + | Paddw (rd, rs1, rs2) -> + fprintf oc " addw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Paddxw (s14, rd, rs1, rs2) -> + fprintf oc " addx%dw %a = %a, %a\n" (scale_of_shift1_4 s14) + ireg rd ireg rs1 ireg rs2 + | Psubw (rd, rs1, rs2) -> + fprintf oc " sbfw %a = %a, %a\n" ireg rd ireg rs2 ireg rs1 + | Prevsubxw (s14, rd, rs1, rs2) -> + fprintf oc " sbfx%dw %a = %a, %a\n" (scale_of_shift1_4 s14) + ireg rd ireg rs1 ireg rs2 + | Pmulw (rd, rs1, rs2) -> + fprintf oc " mulw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pandw (rd, rs1, rs2) -> + fprintf oc " andw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pnandw (rd, rs1, rs2) -> + fprintf oc " nandw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Porw (rd, rs1, rs2) -> + fprintf oc " orw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pnorw (rd, rs1, rs2) -> + fprintf oc " norw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pxorw (rd, rs1, rs2) -> + fprintf oc " xorw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pnxorw (rd, rs1, rs2) -> + fprintf oc " nxorw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pandnw (rd, rs1, rs2) -> + fprintf oc " andnw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pornw (rd, rs1, rs2) -> + fprintf oc " ornw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Psraw (rd, rs1, rs2) -> + fprintf oc " sraw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Psrxw (rd, rs1, rs2) -> + fprintf oc " srsw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Psrlw (rd, rs1, rs2) -> + fprintf oc " srlw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Psllw (rd, rs1, rs2) -> + fprintf oc " sllw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pmaddw (rd, rs1, rs2) -> + fprintf oc " maddw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pmsubw (rd, rs1, rs2) -> + fprintf oc " msbfw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmaddfw (rd, rs1, rs2) -> + fprintf oc " ffmaw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmsubfw (rd, rs1, rs2) -> + fprintf oc " ffmsw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + + | Paddl (rd, rs1, rs2) -> + fprintf oc " addd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Paddxl (s14, rd, rs1, rs2) -> + fprintf oc " addx%dd %a = %a, %a\n" (scale_of_shift1_4 s14) + ireg rd ireg rs1 ireg rs2 + | Psubl (rd, rs1, rs2) -> + fprintf oc " sbfd %a = %a, %a\n" ireg rd ireg rs2 ireg rs1 + | Prevsubxl (s14, rd, rs1, rs2) -> + fprintf oc " sbfx%dd %a = %a, %a\n" (scale_of_shift1_4 s14) + ireg rd ireg rs1 ireg rs2 + | Pandl (rd, rs1, rs2) -> + fprintf oc " andd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pnandl (rd, rs1, rs2) -> + fprintf oc " nandd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Porl (rd, rs1, rs2) -> + fprintf oc " ord %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pnorl (rd, rs1, rs2) -> + fprintf oc " nord %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pxorl (rd, rs1, rs2) -> + fprintf oc " xord %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pnxorl (rd, rs1, rs2) -> + fprintf oc " nxord %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pandnl (rd, rs1, rs2) -> + fprintf oc " andnd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pornl (rd, rs1, rs2) -> + fprintf oc " ornd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pmull (rd, rs1, rs2) -> + fprintf oc " muld %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pslll (rd, rs1, rs2) -> + fprintf oc " slld %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Psrll (rd, rs1, rs2) -> + fprintf oc " srld %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Psrxl (rd, rs1, rs2) -> + fprintf oc " srsd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Psral (rd, rs1, rs2) -> + fprintf oc " srad %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pmaddl (rd, rs1, rs2) -> + fprintf oc " maddd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pmsubl (rd, rs1, rs2) -> + fprintf oc " msbfd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmaddfl (rd, rs1, rs2) -> + fprintf oc " ffmad %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmsubfl (rd, rs1, rs2) -> + fprintf oc " ffmsd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + + | Pfaddd (rd, rs1, rs2) -> + fprintf oc " faddd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfaddw (rd, rs1, rs2) -> + fprintf oc " faddw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfsbfd (rd, rs1, rs2) -> + fprintf oc " fsbfd %a = %a, %a\n" ireg rd ireg rs2 ireg rs1 + | Pfsbfw (rd, rs1, rs2) -> + fprintf oc " fsbfw %a = %a, %a\n" ireg rd ireg rs2 ireg rs1 + | Pfmuld (rd, rs1, rs2) -> + fprintf oc " fmuld %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmulw (rd, rs1, rs2) -> + fprintf oc " fmulw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmind (rd, rs1, rs2) -> + fprintf oc " fmind %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfminw (rd, rs1, rs2) -> + fprintf oc " fminw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmaxd (rd, rs1, rs2) -> + fprintf oc " fmaxd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmaxw (rd, rs1, rs2) -> + fprintf oc " fmaxw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfinvw (rd, rs1) -> + fprintf oc " finvw %a = %a\n" ireg rd ireg rs1 + + (* Arith RRI32 instructions *) + | Pcompiw (it, rd, rs, imm) -> + fprintf oc " compw.%a %a = %a, %a\n" icond it ireg rd ireg rs coqint imm + | Paddiw (rd, rs, imm) -> + fprintf oc " addw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Paddxiw (s14, rd, rs, imm) -> + fprintf oc " addx%dw %a = %a, %a\n" (scale_of_shift1_4 s14) + ireg rd ireg rs coqint imm + | Prevsubiw (rd, rs, imm) -> + fprintf oc " sbfw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Prevsubxiw (s14, rd, rs, imm) -> + fprintf oc " sbfx%dw %a = %a, %a\n" (scale_of_shift1_4 s14) + ireg rd ireg rs coqint imm + | Pmuliw (rd, rs, imm) -> + fprintf oc " mulw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Pandiw (rd, rs, imm) -> + fprintf oc " andw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Pnandiw (rd, rs, imm) -> + fprintf oc " nandw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Poriw (rd, rs, imm) -> + fprintf oc " orw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Pnoriw (rd, rs, imm) -> + fprintf oc " norw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Pxoriw (rd, rs, imm) -> + fprintf oc " xorw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Pnxoriw (rd, rs, imm) -> + fprintf oc " nxorw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Pandniw (rd, rs, imm) -> + fprintf oc " andnw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Porniw (rd, rs, imm) -> + fprintf oc " ornw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Psraiw (rd, rs, imm) -> + fprintf oc " sraw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Psrxiw (rd, rs, imm) -> + fprintf oc " srsw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Psrliw (rd, rs, imm) -> + fprintf oc " srlw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Pslliw (rd, rs, imm) -> + fprintf oc " sllw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Proriw (rd, rs, imm) -> + fprintf oc " rorw %a = %a, %a\n" ireg rd ireg rs coqint imm + | Pmaddiw (rd, rs, imm) -> + fprintf oc " maddw %a = %a, %a\n" ireg rd ireg rs coqint imm + + | Psllil (rd, rs, imm) -> + fprintf oc " slld %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Psrlil (rd, rs, imm) -> + fprintf oc " srld %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Psrail (rd, rs, imm) -> + fprintf oc " srad %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Psrxil (rd, rs, imm) -> + fprintf oc " srsd %a = %a, %a\n" ireg rd ireg rs coqint64 imm + + (* Arith RRI64 instructions *) + | Pcompil (it, rd, rs, imm) -> + fprintf oc " compd.%a %a = %a, %a\n" icond it ireg rd ireg rs coqint64 imm + | Paddil (rd, rs, imm) -> assert Archi.ptr64; + fprintf oc " addd %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Paddxil (s14, rd, rs, imm) -> + fprintf oc " addx%dd %a = %a, %a\n" (scale_of_shift1_4 s14) + ireg rd ireg rs coqint imm + | Prevsubil (rd, rs, imm) -> + fprintf oc " sbfd %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Prevsubxil (s14, rd, rs, imm) -> + fprintf oc " sbfx%dd %a = %a, %a\n" (scale_of_shift1_4 s14) + ireg rd ireg rs coqint64 imm + | Pmulil (rd, rs, imm) -> assert Archi.ptr64; + fprintf oc " muld %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Pandil (rd, rs, imm) -> assert Archi.ptr64; + fprintf oc " andd %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Pnandil (rd, rs, imm) -> assert Archi.ptr64; + fprintf oc " nandd %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Poril (rd, rs, imm) -> assert Archi.ptr64; + fprintf oc " ord %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Pnoril (rd, rs, imm) -> assert Archi.ptr64; + fprintf oc " nord %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Pxoril (rd, rs, imm) -> assert Archi.ptr64; + fprintf oc " xord %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Pnxoril (rd, rs, imm) -> assert Archi.ptr64; + fprintf oc " nxord %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Pandnil (rd, rs, imm) -> + fprintf oc " andnd %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Pornil (rd, rs, imm) -> + fprintf oc " ornd %a = %a, %a\n" ireg rd ireg rs coqint64 imm + | Pmaddil (rd, rs, imm) -> + fprintf oc " maddd %a = %a, %a\n" ireg rd ireg rs coqint64 imm + + | Pcmove (bt, rd, rcond, rs) | Pcmoveu (bt, rd, rcond, rs) -> + fprintf oc " cmoved.%a %a? %a = %a\n" + bcond bt ireg rcond ireg rd ireg rs + | Pcmoveiw (bt, rd, rcond, imm) | Pcmoveuiw (bt, rd, rcond, imm) -> + fprintf oc " cmoved.%a %a? %a = %a\n" + bcond bt ireg rcond ireg rd coqint imm + | Pcmoveil (bt, rd, rcond, imm) | Pcmoveuil (bt, rd, rcond, imm) -> + fprintf oc " cmoved.%a %a? %a = %a\n" + bcond bt ireg rcond ireg rd coqint64 imm + + let get_section_names name = + let (text, lit) = + match C2C.atom_sections name with + | t :: l :: _ -> (t, l) + | _ -> (Section_text, Section_literal) in + text,lit,Section_jumptable + + let print_align oc alignment = + fprintf oc " .balign %d\n" alignment + + let print_jumptable oc jmptbl = () + (* if !jumptables <> [] then + begin + section oc jmptbl; + List.iter (print_tbl oc) !jumptables; + jumptables := [] + end *) + + let print_fun_info = elf_print_fun_info + + let print_optional_fun_info _ = () + + let print_var_info = elf_print_var_info + + let print_comm_symb oc sz name align = + if C2C.atom_is_static name then + fprintf oc " .local %a\n" symbol name; + fprintf oc " .comm %a, %s, %d\n" + symbol name + (Z.to_string sz) + align + + let print_instructions oc fn = + current_function_sig := fn.fn_sig; + List.iter (print_instruction oc) fn.fn_code + +(* Data *) + + let address = if Archi.ptr64 then ".quad" else ".long" + + let print_prologue oc = + (* fprintf oc " .option %s\n" (if Archi.pic_code() then "pic" else "nopic"); *) + if !Clflags.option_g then begin + section oc Section_text; + end + + let print_epilogue oc = + print_profiling_epilogue elf_text_print_fun_info Dtors kvx_profiling_stub oc; + if !Clflags.option_g then begin + Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f)); + section oc Section_text; + end + + let default_falignment = 2 + + let cfi_startproc oc = () + let cfi_endproc oc = () + + end + +let sel_target () = + (module Target:TARGET) diff --git a/kvx/ValueAOp.v b/kvx/ValueAOp.v new file mode 100644 index 00000000..87554258 --- /dev/null +++ b/kvx/ValueAOp.v @@ -0,0 +1,599 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib Compopts. +Require Import AST Integers Floats Values Memory Globalenvs. +Require Import Op ExtValues ExtFloats RTL ValueDomain. +Definition minf := binop_float ExtFloat.min. +Definition maxf := binop_float ExtFloat.max. +Definition minfs := binop_single ExtFloat32.min. +Definition maxfs := binop_single ExtFloat32.max. + +Definition ntop3 (x y z: aval) : aval := Ifptr (plub (provenance x) (plub (provenance y) (provenance z))). + +Definition triple_op_float (sem: float -> float -> float -> float) (x y z: aval) := + match x, y, z with + | F a, F b, F c => F (sem a b c) + | _, _, _ => ntop3 x y z + end. + +Definition triple_op_single (sem: float32 -> float32 -> float32 -> float32) (x y z: aval) := + match x, y, z with + | FS a, FS b, FS c => FS (sem a b c) + | _, _, _ => ntop3 x y z + end. + +Definition fmaddf := triple_op_float (fun x y z => Float.fma y z x). +Definition fmsubf := triple_op_float (fun x y z => Float.fma (Float.neg y) z x). +Definition fmaddfs := triple_op_single (fun x y z => Float32.fma y z x). +Definition fmsubfs := triple_op_single (fun x y z => Float32.fma (Float32.neg y) z x). + +Definition invfs (y : aval) := + match y with + | FS f => FS (ExtFloat32.inv f) + | _ => ntop1 y + end. + +(** Value analysis for RISC V operators *) + +Definition eval_static_condition (cond: condition) (vl: list aval): abool := + match cond, vl with + | Ccomp c, v1 :: v2 :: nil => cmp_bool c v1 v2 + | Ccompu c, v1 :: v2 :: nil => cmpu_bool c v1 v2 + | Ccompimm c n, v1 :: nil => cmp_bool c v1 (I n) + | Ccompuimm c n, v1 :: nil => cmpu_bool c v1 (I n) + | Ccompl c, v1 :: v2 :: nil => cmpl_bool c v1 v2 + | Ccomplu c, v1 :: v2 :: nil => cmplu_bool c v1 v2 + | Ccomplimm c n, v1 :: nil => cmpl_bool c v1 (L n) + | Ccompluimm c n, v1 :: nil => cmplu_bool c v1 (L n) + | Ccompf c, v1 :: v2 :: nil => cmpf_bool c v1 v2 + | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2) + | Ccompfs c, v1 :: v2 :: nil => cmpfs_bool c v1 v2 + | Cnotcompfs c, v1 :: v2 :: nil => cnot (cmpfs_bool c v1 v2) + | _, _ => Bnone + end. + +Definition eval_static_addressing (addr: addressing) (vl: list aval): aval := + match addr, vl with + | Aindexed n, v1::nil => offset_ptr v1 n + | Aindexed2, v1::v2::nil => addl v1 v2 + | Aindexed2XS scale, v1::v2::nil => addl v1 (shll v2 (I (Int.repr scale))) + | Aglobal s ofs, nil => Ptr (Gl s ofs) + | Ainstack ofs, nil => Ptr (Stk ofs) + | _, _ => Vbot + end. + +Definition eval_static_condition0 (cond : condition0) (v : aval) : abool := + match cond with + | Ccomp0 c => cmp_bool c v (I Int.zero) + | Ccompu0 c => cmpu_bool c v (I Int.zero) + | Ccompl0 c => cmpl_bool c v (L Int64.zero) + | Ccomplu0 c => cmplu_bool c v (L Int64.zero) + end. + + +Definition eval_static_extfs (stop : Z) (start : Z) (v : aval) := + if is_bitfield stop start + then + let stop' := Z.add stop Z.one in + match v with + | I w => + I (Int.shr (Int.shl w (Int.repr (Z.sub Int.zwordsize stop'))) (Int.repr (Z.sub Int.zwordsize (Z.sub stop' start)))) + | _ => Vtop + end + else Vtop. + +Definition eval_static_extfz (stop : Z) (start : Z) (v : aval) := + if is_bitfield stop start + then + let stop' := Z.add stop Z.one in + match v with + | I w => + I (Int.shru (Int.shl w (Int.repr (Z.sub Int.zwordsize stop'))) (Int.repr (Z.sub Int.zwordsize (Z.sub stop' start)))) + | _ => Vtop + end + else Vtop. + +Definition eval_static_extfsl (stop : Z) (start : Z) (v : aval) := + if is_bitfieldl stop start + then + let stop' := Z.add stop Z.one in + match v with + | L w => + L (Int64.shr' (Int64.shl' w (Int.repr (Z.sub Int64.zwordsize stop'))) (Int.repr (Z.sub Int64.zwordsize (Z.sub stop' start)))) + | _ => Vtop + end + else Vtop. + +Definition eval_static_extfzl (stop : Z) (start : Z) (v : aval) := + if is_bitfieldl stop start + then + let stop' := Z.add stop Z.one in + match v with + | L w => + L (Int64.shru' (Int64.shl' w (Int.repr (Z.sub Int64.zwordsize stop'))) (Int.repr (Z.sub Int64.zwordsize (Z.sub stop' start)))) + | _ => Vtop + end + else Vtop. + +Definition eval_static_insf stop start prev fld := + let mask := Int.repr (zbitfield_mask stop start) in + if is_bitfield stop start + then + match prev, fld with + | (I prevI), (I fldI) => + if Int.ltu (Int.repr start) Int.iwordsize + then I (Int.or (Int.and prevI (Int.not mask)) + (Int.and (Int.shl fldI (Int.repr start)) mask)) + else Vtop + | _, _ => Vtop + end + else Vtop. + +Definition eval_static_insfl stop start prev fld := + let mask := Int64.repr (zbitfield_mask stop start) in + if is_bitfieldl stop start + then + match prev, fld with + | (L prevL), (L fldL) => + if Int.ltu (Int.repr start) Int64.iwordsize' + then L (Int64.or (Int64.and prevL (Int64.not mask)) + (Int64.and (Int64.shl' fldL (Int.repr start)) mask)) + else Vtop + | _,_ => Vtop + end + else Vtop. + +Definition eval_static_operation (op: operation) (vl: list aval): aval := + match op, vl with + | Omove, v1::nil => v1 + | Ointconst n, nil => I n + | Olongconst n, nil => L n + | Ofloatconst n, nil => if propagate_float_constants tt then F n else ntop + | Osingleconst n, nil => if propagate_float_constants tt then FS n else ntop + | Oaddrsymbol id ofs, nil => Ptr (Gl id ofs) + | Oaddrstack ofs, nil => Ptr (Stk ofs) + | Ocast8signed, v1 :: nil => sign_ext 8 v1 + | Ocast16signed, v1 :: nil => sign_ext 16 v1 + | Oadd, v1::v2::nil => add v1 v2 + | Oaddimm n, v1::nil => add v1 (I n) + | Oaddx shift, v1::v2::nil => add v2 (shl v1 (I (int_of_shift1_4 shift))) + | Oaddximm shift n, v1::nil => add (I n) (shl v1 (I (int_of_shift1_4 shift))) + | Oneg, v1::nil => neg v1 + | Osub, v1::v2::nil => sub v1 v2 + | Orevsubx shift, v1::v2::nil => sub v2 (shl v1 (I (int_of_shift1_4 shift))) + | Orevsubimm n, v1::nil => sub (I n) v1 + | Orevsubximm shift n, v1::nil => sub (I n) (shl v1 (I (int_of_shift1_4 shift))) + | Omul, v1::v2::nil => mul v1 v2 + | Omulimm n, v1::nil => mul v1 (I n) + | Omulhs, v1::v2::nil => mulhs v1 v2 + | Omulhu, v1::v2::nil => mulhu v1 v2 + | Odiv, v1::v2::nil => divs v1 v2 + | Odivu, v1::v2::nil => divu v1 v2 + | Omod, v1::v2::nil => mods v1 v2 + | Omodu, v1::v2::nil => modu v1 v2 + | Oand, v1::v2::nil => and v1 v2 + | Oandimm n, v1::nil => and v1 (I n) + | Onand, v1::v2::nil => notint (and v1 v2) + | Onandimm n, v1::nil => notint (and v1 (I n)) + | Oor, v1::v2::nil => or v1 v2 + | Oorimm n, v1::nil => or v1 (I n) + | Onor, v1::v2::nil => notint (or v1 v2) + | Onorimm n, v1::nil => notint (or v1 (I n)) + | Oxor, v1::v2::nil => xor v1 v2 + | Oxorimm n, v1::nil => xor v1 (I n) + | Onxor, v1::v2::nil => notint (xor v1 v2) + | Onxorimm n, v1::nil => notint (xor v1 (I n)) + | Onot, v1::nil => notint v1 + | Oandn, v1::v2::nil => and (notint v1) v2 + | Oandnimm n, v1::nil => and (notint v1) (I n) + | Oorn, v1::v2::nil => or (notint v1) v2 + | Oornimm n, v1::nil => or (notint v1) (I n) + | Oshl, v1::v2::nil => shl v1 v2 + | Oshlimm n, v1::nil => shl v1 (I n) + | Oshr, v1::v2::nil => shr v1 v2 + | Oshrimm n, v1::nil => shr v1 (I n) + | Ororimm n, v1::nil => ror v1 (I n) + | Oshru, v1::v2::nil => shru v1 v2 + | Oshruimm n, v1::nil => shru v1 (I n) + | Oshrximm n, v1::nil => shrx v1 (I n) + | Omadd, v1::v2::v3::nil => add v1 (mul v2 v3) + | Omaddimm n, v1::v2::nil => add v1 (mul v2 (I n)) + | Omsub, v1::v2::v3::nil => sub v1 (mul v2 v3) + | Omakelong, v1::v2::nil => longofwords v1 v2 + | Olowlong, v1::nil => loword v1 + | Ohighlong, v1::nil => hiword v1 + | Ocast32signed, v1::nil => longofint v1 + | Ocast32unsigned, v1::nil => longofintu v1 + | Oaddl, v1::v2::nil => addl v1 v2 + | Oaddlimm n, v1::nil => addl v1 (L n) + | Oaddxl shift, v1::v2::nil => addl v2 (shll v1 (I (int_of_shift1_4 shift))) + | Oaddxlimm shift n, v1::nil => addl (L n) (shll v1 (I (int_of_shift1_4 shift))) + | Onegl, v1::nil => negl v1 + | Osubl, v1::v2::nil => subl v1 v2 + | Orevsubxl shift, v1::v2::nil => subl v2 (shll v1 (I (int_of_shift1_4 shift))) + | Orevsublimm n, v1::nil => subl (L n) v1 + | Orevsubxlimm shift n, v1::nil => subl (L n) (shll v1 (I (int_of_shift1_4 shift))) + | Omull, v1::v2::nil => mull v1 v2 + | Omullimm n, v1::nil => mull v1 (L n) + | Omullhs, v1::v2::nil => mullhs v1 v2 + | Omullhu, v1::v2::nil => mullhu v1 v2 + | Odivl, v1::v2::nil => divls v1 v2 + | Odivlu, v1::v2::nil => divlu v1 v2 + | Omodl, v1::v2::nil => modls v1 v2 + | Omodlu, v1::v2::nil => modlu v1 v2 + | Oandl, v1::v2::nil => andl v1 v2 + | Oandlimm n, v1::nil => andl v1 (L n) + | Onandl, v1::v2::nil => notl (andl v1 v2) + | Onandlimm n, v1::nil => notl (andl v1 (L n)) + | Oorl, v1::v2::nil => orl v1 v2 + | Oorlimm n, v1::nil => orl v1 (L n) + | Onorl, v1::v2::nil => notl (orl v1 v2) + | Onorlimm n, v1::nil => notl (orl v1 (L n)) + | Oxorl, v1::v2::nil => xorl v1 v2 + | Oxorlimm n, v1::nil => xorl v1 (L n) + | Onxorl, v1::v2::nil => notl (xorl v1 v2) + | Onxorlimm n, v1::nil => notl (xorl v1 (L n)) + | Onotl, v1::nil => notl v1 + | Oandnl, v1::v2::nil => andl (notl v1) v2 + | Oandnlimm n, v1::nil => andl (notl v1) (L n) + | Oornl, v1::v2::nil => orl (notl v1) v2 + | Oornlimm n, v1::nil => orl (notl v1) (L n) + | Oshll, v1::v2::nil => shll v1 v2 + | Oshllimm n, v1::nil => shll v1 (I n) + | Oshrl, v1::v2::nil => shrl v1 v2 + | Oshrlimm n, v1::nil => shrl v1 (I n) + | Oshrlu, v1::v2::nil => shrlu v1 v2 + | Oshrluimm n, v1::nil => shrlu v1 (I n) + | Oshrxlimm n, v1::nil => shrxl v1 (I n) + | Omaddl, v1::v2::v3::nil => addl v1 (mull v2 v3) + | Omaddlimm n, v1::v2::nil => addl v1 (mull v2 (L n)) + | Omsubl, v1::v2::v3::nil => subl v1 (mull v2 v3) + | Onegf, v1::nil => negf v1 + | Oabsf, v1::nil => absf v1 + | Oaddf, v1::v2::nil => addf v1 v2 + | Osubf, v1::v2::nil => subf v1 v2 + | Omulf, v1::v2::nil => mulf v1 v2 + | Odivf, v1::v2::nil => divf v1 v2 + | Ominf, v1::v2::nil => minf v1 v2 + | Omaxf, v1::v2::nil => maxf v1 v2 + | Ofmaddf, v1::v2::v3::nil => fmaddf v1 v2 v3 + | Ofmsubf, v1::v2::v3::nil => fmsubf v1 v2 v3 + | Onegfs, v1::nil => negfs v1 + | Oabsfs, v1::nil => absfs v1 + | Oaddfs, v1::v2::nil => addfs v1 v2 + | Osubfs, v1::v2::nil => subfs v1 v2 + | Omulfs, v1::v2::nil => mulfs v1 v2 + | Odivfs, v1::v2::nil => divfs v1 v2 + | Ominfs, v1::v2::nil => minfs v1 v2 + | Omaxfs, v1::v2::nil => maxfs v1 v2 + | Oinvfs, v1::nil => invfs v1 + | Ofmaddfs, v1::v2::v3::nil => fmaddfs v1 v2 v3 + | Ofmsubfs, v1::v2::v3::nil => fmsubfs v1 v2 v3 + | Osingleoffloat, v1::nil => singleoffloat v1 + | Ofloatofsingle, v1::nil => floatofsingle v1 + | Ointoffloat, v1::nil => intoffloat_total v1 + | Ointuoffloat, v1::nil => intuoffloat_total v1 + | Ointofsingle, v1::nil => intofsingle_total v1 + | Ointuofsingle, v1::nil => intuofsingle_total v1 + | Osingleofint, v1::nil => singleofint v1 + | Osingleofintu, v1::nil => singleofintu v1 + | Olongoffloat, v1::nil => longoffloat_total v1 + | Olonguoffloat, v1::nil => longuoffloat_total v1 + | Ofloatoflong, v1::nil => floatoflong v1 + | Ofloatoflongu, v1::nil => floatoflongu v1 + | Olongofsingle, v1::nil => longofsingle_total v1 + | Olonguofsingle, v1::nil => longuofsingle_total v1 + | Osingleoflong, v1::nil => singleoflong v1 + | Osingleoflongu, v1::nil => singleoflongu v1 + | Ocmp c, _ => of_optbool (eval_static_condition c vl) + | (Oextfz stop start), v0::nil => eval_static_extfz stop start v0 + | (Oextfs stop start), v0::nil => eval_static_extfs stop start v0 + | (Oextfzl stop start), v0::nil => eval_static_extfzl stop start v0 + | (Oextfsl stop start), v0::nil => eval_static_extfsl stop start v0 + | (Oinsf stop start), v0::v1::nil => eval_static_insf stop start v0 v1 + | (Oinsfl stop start), v0::v1::nil => eval_static_insfl stop start v0 v1 + | Osel c ty, v1::v2::vc::nil => select (eval_static_condition0 c vc) v1 v2 + | Oselimm c imm, v1::vc::nil => select (eval_static_condition0 c vc) v1 (I imm) + | Osellimm c imm, v1::vc::nil => select (eval_static_condition0 c vc) v1 (L imm) + | _, _ => Vbot + end. + +Section SOUNDNESS. + +Variable bc: block_classification. +Variable ge: genv. +Hypothesis GENV: genv_match bc ge. +Variable sp: block. +Hypothesis STACK: bc sp = BCstack. + +Lemma minf_sound: + forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.minf v w) (minf x y). +Proof. + apply (binop_float_sound bc ExtFloat.min); assumption. +Qed. + +Lemma maxf_sound: + forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.maxf v w) (maxf x y). +Proof. + apply (binop_float_sound bc ExtFloat.max); assumption. +Qed. + +Lemma minfs_sound: + forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.minfs v w) (minfs x y). +Proof. + apply (binop_single_sound bc ExtFloat32.min); assumption. +Qed. + +Lemma maxfs_sound: + forall v x w y, vmatch bc v x -> vmatch bc w y -> vmatch bc (ExtValues.maxfs v w) (maxfs x y). +Proof. + apply (binop_single_sound bc ExtFloat32.max); assumption. +Qed. + +Lemma invfs_sound: + forall v x, vmatch bc v x -> vmatch bc (ExtValues.invfs v) (invfs x). +Proof. + intros v x; + intro MATCH; + inversion MATCH; + cbn; + constructor. +Qed. + +Lemma triple_op_float_sound: + forall f a x b y c z, + vmatch bc a x -> vmatch bc b y -> vmatch bc c z -> + vmatch bc (ExtValues.triple_op_float f a b c) + (triple_op_float f x y z). +Proof. + intros until z. + intros Hax Hby Hcz. + inv Hax; cbn; try constructor; + inv Hby; cbn; try constructor; + inv Hcz; cbn; try constructor. +Qed. + +Lemma triple_op_single_sound: + forall f a x b y c z, + vmatch bc a x -> vmatch bc b y -> vmatch bc c z -> + vmatch bc (ExtValues.triple_op_single f a b c) + (triple_op_single f x y z). +Proof. + intros until z. + intros Hax Hby Hcz. + inv Hax; cbn; try constructor; + inv Hby; cbn; try constructor; + inv Hcz; cbn; try constructor. +Qed. + +Lemma fmaddf_sound : + forall a x b y c z, vmatch bc a x -> vmatch bc b y -> vmatch bc c z -> + vmatch bc (ExtValues.fmaddf a b c) (fmaddf x y z). +Proof. + intros. unfold ExtValues.fmaddf, fmaddf. + apply triple_op_float_sound; assumption. +Qed. + +Lemma fmaddfs_sound : + forall a x b y c z, vmatch bc a x -> vmatch bc b y -> vmatch bc c z -> + vmatch bc (ExtValues.fmaddfs a b c) (fmaddfs x y z). +Proof. + intros. unfold ExtValues.fmaddfs, fmaddfs. + apply triple_op_single_sound; assumption. +Qed. + +Lemma fmsubf_sound : + forall a x b y c z, vmatch bc a x -> vmatch bc b y -> vmatch bc c z -> + vmatch bc (ExtValues.fmsubf a b c) (fmsubf x y z). +Proof. + intros. unfold ExtValues.fmsubf, fmsubf. + apply triple_op_float_sound; assumption. +Qed. + +Lemma fmsubfs_sound : + forall a x b y c z, vmatch bc a x -> vmatch bc b y -> vmatch bc c z -> + vmatch bc (ExtValues.fmsubfs a b c) (fmsubfs x y z). +Proof. + intros. unfold ExtValues.fmsubfs, fmsubfs. + apply triple_op_single_sound; assumption. +Qed. +Hint Resolve minf_sound maxf_sound minfs_sound maxfs_sound invfs_sound fmaddf_sound fmaddfs_sound fmsubf_sound fmsubfs_sound : va. + +Theorem eval_static_condition_sound: + forall cond vargs m aargs, + list_forall2 (vmatch bc) vargs aargs -> + cmatch (eval_condition cond vargs m) (eval_static_condition cond aargs). +Proof. + intros until aargs; intros VM. inv VM. + destruct cond; auto with va. + inv H0. + destruct cond; cbn; eauto with va. + inv H2. + destruct cond; cbn; eauto with va. + destruct cond; auto with va. +Qed. + +Theorem eval_static_condition0_sound: + forall cond varg m aarg, + vmatch bc varg aarg -> + cmatch (eval_condition0 cond varg m) (eval_static_condition0 cond aarg). +Proof. + intros until aarg; intro VM. + destruct cond; cbn; eauto with va. +Qed. + +Lemma symbol_address_sound: + forall id ofs, + vmatch bc (Genv.symbol_address ge id ofs) (Ptr (Gl id ofs)). +Proof. + intros; apply symbol_address_sound; apply GENV. +Qed. + +Lemma symbol_address_sound_2: + forall id ofs, + vmatch bc (Genv.symbol_address ge id ofs) (Ifptr (Gl id ofs)). +Proof. + intros. unfold Genv.symbol_address. destruct (Genv.find_symbol ge id) as [b|] eqn:F. + constructor. constructor. apply GENV; auto. + constructor. +Qed. + +Hint Resolve symbol_address_sound symbol_address_sound_2: va. + +Ltac InvHyps := + match goal with + | [H: None = Some _ |- _ ] => discriminate + | [H: Some _ = Some _ |- _] => inv H + | [H1: match ?vl with nil => _ | _ :: _ => _ end = Some _ , + H2: list_forall2 _ ?vl _ |- _ ] => inv H2; InvHyps + | [H: (if Archi.ptr64 then _ else _) = Some _ |- _] => destruct Archi.ptr64 eqn:?; InvHyps + | _ => idtac + end. + +Theorem eval_static_addressing_sound: + forall addr vargs vres aargs, + eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = Some vres -> + list_forall2 (vmatch bc) vargs aargs -> + vmatch bc vres (eval_static_addressing addr aargs). +Proof. + unfold eval_addressing, eval_static_addressing; intros; + destruct addr; InvHyps; eauto with va. + rewrite Ptrofs.add_zero_l; eauto with va. +Qed. + +Theorem eval_static_addressing_sound_none: + forall addr vargs aargs, + eval_addressing ge (Vptr sp Ptrofs.zero) addr vargs = None -> + list_forall2 (vmatch bc) vargs aargs -> + (eval_static_addressing addr aargs) = Vbot. +Proof. + unfold eval_addressing, eval_static_addressing. + intros until aargs. intros Heval_none Hlist. + inv Hlist. + destruct addr; trivial; discriminate. + inv H0. + destruct addr; trivial; discriminate. + inv H2. + destruct addr; trivial; discriminate. + inv H3; + destruct addr; trivial; discriminate. +Qed. + +Lemma vmatch_vint_ntop1: + forall x y, vmatch bc (Vint x) (ntop1 y). +Proof. + intro. unfold ntop1, provenance. + destruct y; + destruct (va_strict tt); + constructor. +Qed. + +Lemma vmatch_vlong_ntop1: + forall x y, vmatch bc (Vlong x) (ntop1 y). +Proof. + intro. unfold ntop1, provenance. + destruct y; + destruct (va_strict tt); + constructor. +Qed. + +Hint Resolve vmatch_vint_ntop1 vmatch_vlong_ntop1: va. + +Theorem eval_static_operation_sound: + forall op vargs m vres aargs, + eval_operation ge (Vptr sp Ptrofs.zero) op vargs m = Some vres -> + list_forall2 (vmatch bc) vargs aargs -> + vmatch bc vres (eval_static_operation op aargs). +Proof. + unfold eval_operation, eval_static_operation, addx, revsubx, addxl, revsubxl; intros. + destruct op; InvHyps; eauto with va. + - destruct (propagate_float_constants tt); constructor. + - destruct (propagate_float_constants tt); constructor. + - rewrite Ptrofs.add_zero_l; eauto with va. + - replace(match Val.shl a1 (Vint (int_of_shift1_4 shift)) with + | Vint n2 => Vint (Int.add n n2) + | Vptr b2 ofs2 => + if Archi.ptr64 + then Vundef + else Vptr b2 (Ptrofs.add ofs2 (Ptrofs.of_int n)) + | _ => Vundef + end) with (Val.add (Vint n) (Val.shl a1 (Vint (int_of_shift1_4 shift)))). + + eauto with va. + + destruct a1; destruct shift; reflexivity. + - (*revsubimm*) inv H1; constructor. + - replace (match Val.shl a1 (Vint (int_of_shift1_4 shift)) with + | Vint n2 => Vint (Int.sub n n2) + | _ => Vundef + end) with (Val.sub (Vint n) (Val.shl a1 (Vint (int_of_shift1_4 shift)))). + + eauto with va. + + destruct n; destruct shift; reflexivity. + - (* shrx *) + inv H1; cbn; try constructor. + all: destruct Int.ltu; [cbn | constructor; fail]. + all: auto with va. + - inv H1; constructor. + - replace (match Val.shll a1 (Vint (int_of_shift1_4 shift)) with + | Vlong n2 => Vlong (Int64.sub n n2) + | _ => Vundef + end) with (Val.subl (Vlong n) (Val.shll a1 (Vint (int_of_shift1_4 shift)))). + + eauto with va. + + destruct a1; destruct shift; reflexivity. + - apply of_optbool_sound. eapply eval_static_condition_sound; eauto. + + (* extfz *) + - unfold extfz, eval_static_extfz. + destruct (is_bitfield _ _). + + inv H1; constructor. + + constructor. + + (* extfs *) + - unfold extfs, eval_static_extfs. + destruct (is_bitfield _ _). + + inv H1; constructor. + + constructor. + + (* extfzl *) + - unfold extfzl, eval_static_extfzl. + destruct (is_bitfieldl _ _). + + inv H1; constructor. + + constructor. + + (* extfsl *) + - unfold extfsl, eval_static_extfsl. + destruct (is_bitfieldl _ _). + + inv H1; constructor. + + constructor. + + (* insf *) + - unfold insf, eval_static_insf. + destruct (is_bitfield _ _). + + inv H1; inv H0; cbn; try constructor; destruct (Int.ltu _ _); cbn; constructor. + + constructor. + (* insfl *) + - unfold insfl, eval_static_insfl. + destruct (is_bitfieldl _ _). + + inv H1; inv H0; cbn; try constructor; destruct (Int.ltu _ _); cbn; constructor. + + constructor. + (* select *) + - apply select_sound; auto. eapply eval_static_condition0_sound; eauto. + (* select imm *) + - apply select_sound; auto with va. eapply eval_static_condition0_sound; eauto. + (* select long imm *) + - apply select_sound; auto with va. eapply eval_static_condition0_sound; eauto. +Qed. + +End SOUNDNESS. + diff --git a/kvx/bitmasks.py b/kvx/bitmasks.py new file mode 100755 index 00000000..9f6987d6 --- /dev/null +++ b/kvx/bitmasks.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +def bitmask(to, fr): + bit_to = 1<<to + return (bit_to | (bit_to - 1)) & ~((1<<fr)-1) + +def bitmask2(to, fr): + bit_to = 1<<to + return bit_to + (bit_to - (1 << fr)) + +for stop in range(32): + for start in range(stop+1): + assert(bitmask(stop, start) == bitmask2(stop, start)) diff --git a/kvx/extractionMachdep.v b/kvx/extractionMachdep.v new file mode 100644 index 00000000..2e409931 --- /dev/null +++ b/kvx/extractionMachdep.v @@ -0,0 +1,32 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* Additional extraction directives specific to the RISC-V port *) + +Require Archi Asm. + +(* Archi *) + +Extract Constant Archi.ptr64 => " Configuration.model = ""64"" ". +Extract Constant Archi.pic_code => "fun () -> false". (* for the time being *) + +Extract Constant Peephole.print_found_store => +"fun offset x -> Printf.printf ""found offset = %ld\n"" (Camlcoq.camlint_of_coqint offset); x". + +(* Asm *) +(* +Extract Constant Asm.low_half => "fun _ _ _ -> assert false". +Extract Constant Asm.high_half => "fun _ _ _ -> assert false". +*) diff --git a/kvx/unittest/Makefile b/kvx/unittest/Makefile new file mode 100644 index 00000000..fcbede2d --- /dev/null +++ b/kvx/unittest/Makefile @@ -0,0 +1,13 @@ +# Needs to be called from CompCert root directory +# $ make -f kvx/unittest/Makefile postpass_test + +include Makefile.extr + +TEST_CMX=kvx/unittest/postpass_test.cmx + +UNITTEST_OBJS:=$(shell $(MODORDER) $(TEST_CMX)) + +postpass_test: $(UNITTEST_OBJS) + @echo "Linking $@ $(UNITTEST_OBJS)" + @$(OCAMLOPT) -o $@ $(LIBS) $(LINK_OPT) $+ + diff --git a/kvx/unittest/postpass_test.ml b/kvx/unittest/postpass_test.ml new file mode 100644 index 00000000..434bfaf7 --- /dev/null +++ b/kvx/unittest/postpass_test.ml @@ -0,0 +1,12 @@ +open Printf +open Asmblock +open Integers +open PostpassSchedulingOracle +open BinNums + +let test_schedule_sd = + let sd_inst = PStore (PStoreRRO (Psd, GPR12, GPR16, (Ofsimm (Ptrofs.of_int @@ Int.intval Z0)))) + in let bb = { header = []; body = [sd_inst]; exit = None } + in List.iter print_bb (smart_schedule bb) + +let _ = test_schedule_sd; printf "Done\n" |