diff options
Diffstat (limited to 'aarch64')
36 files changed, 16001 insertions, 2451 deletions
diff --git a/aarch64/Archi.v b/aarch64/Archi.v index 4911db73..378ca0d1 100644 --- a/aarch64/Archi.v +++ b/aarch64/Archi.v @@ -88,6 +88,10 @@ Global Opaque ptr64 big_endian splitlong (** Which ABI to implement *) +Parameter pic_code: unit -> bool. + +Definition has_notrap_loads := false. + Inductive abi_kind: Type := | AAPCS64 (**r ARM's standard as used in Linux and other ELF platforms *) | Apple. (**r the variant used in macOS and iOS *) diff --git a/aarch64/Asm.v b/aarch64/Asm.v index b5f4c838..e5111220 100644 --- a/aarch64/Asm.v +++ b/aarch64/Asm.v @@ -10,13 +10,20 @@ (* *) (* *********************************************************************) -(** Abstract syntax and semantics for AArch64 assembly language *) +(** Abstract syntax and semantics for AArch64 assembly language + +W.r.t Asmblock, this is a "flat" syntax and semantics of "aarch64" assembly: + - without the basic block structure + - without the hierarchy of instructions. + +*) Require Import Coqlib Zbits Maps. Require Import AST Integers Floats. Require Import Values Memory Events Globalenvs Smallstep. Require Import Locations Conventions. Require Stacklayout. +Require Import OptionMonad. (** * Abstract syntax *) @@ -43,6 +50,9 @@ Coercion RR1: ireg >-> iregsp. Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}. Proof. decide equality. Defined. +Lemma iregsp_eq: forall (x y: iregsp), {x=y} + {x<>y}. +Proof. decide equality; apply ireg_eq. Defined. + (** In assembly files, [Dn] denotes the low 64-bit of a vector register, and [Sn] the low 32 bits. *) @@ -66,21 +76,29 @@ Inductive crbit: Type := Lemma crbit_eq: forall (x y: crbit), {x=y} + {x<>y}. Proof. decide equality. Defined. +Inductive dreg : Type := + | IR: iregsp -> dreg (**r 64- or 32-bit integer registers *) + | FR: freg -> dreg. (**r double- or single-precision float registers *) + (** We model the following registers of the ARM architecture. *) Inductive preg: Type := - | IR: ireg -> preg (**r 64- or 32-bit integer registers *) - | FR: freg -> preg (**r double- or single-precision float registers *) - | CR: crbit -> preg (**r bits in the condition register *) - | SP: preg (**r register X31 used as stack pointer *) - | PC: preg. (**r program counter *) - -Coercion IR: ireg >-> preg. -Coercion FR: freg >-> preg. + | DR: dreg -> preg (** see dreg **) + | CR: crbit -> preg (**r bits in the condition register *) + | PC: preg. (**r program counter *) + +(* XXX: ireg no longer coerces to preg *) +Coercion IR: iregsp >-> dreg. +Coercion FR: freg >-> dreg. +Coercion DR: dreg >-> preg. +Definition SP:preg := XSP. Coercion CR: crbit >-> preg. +Lemma dreg_eq: forall (x y: dreg), {x=y} + {x<>y}. +Proof. decide equality. apply iregsp_eq. apply freg_eq. Defined. + Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}. -Proof. decide equality. apply ireg_eq. apply freg_eq. apply crbit_eq. Defined. +Proof. decide equality. apply dreg_eq. apply crbit_eq. Defined. Module PregEq. Definition t := preg. @@ -92,8 +110,6 @@ Module Pregmap := EMap(PregEq). Definition preg_of_iregsp (r: iregsp) : preg := match r with RR1 r => IR r | XSP => SP end. -Coercion preg_of_iregsp: iregsp >-> preg. - (** Conventional name for return address ([RA]) *) Notation "'RA'" := X30 (only parsing) : asm. @@ -177,14 +193,16 @@ Inductive instruction: Type := | Pldrsh (sz: isize) (rd: ireg) (a: addressing) (**r load int16, sign-extend *) | Pldrzw (rd: ireg) (a: addressing) (**r load int32, zero-extend to int64 *) | Pldrsw (rd: ireg) (a: addressing) (**r load int32, sign-extend to int64 *) - | Pldp (rd1 rd2: ireg) (a: addressing) (**r load two int64 *) + | Pldpw (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int32 *) + | Pldpx (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *) | Pstrw (rs: ireg) (a: addressing) (**r store int32 *) | Pstrw_a (rs: ireg) (a: addressing) (**r store int32 as any32 *) | Pstrx (rs: ireg) (a: addressing) (**r store int64 *) | Pstrx_a (rs: ireg) (a: addressing) (**r store int64 as any64 *) | Pstrb (rs: ireg) (a: addressing) (**r store int8 *) | Pstrh (rs: ireg) (a: addressing) (**r store int16 *) - | Pstp (rs1 rs2: ireg) (a: addressing) (**r store two int64 *) + | Pstpw (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int32 *) + | Pstpx (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *) (** Integer arithmetic, immediate *) | Paddimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r addition *) | Psubimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r subtraction *) @@ -255,9 +273,13 @@ Inductive instruction: Type := | Pldrs (rd: freg) (a: addressing) (**r load float32 (single precision) *) | Pldrd (rd: freg) (a: addressing) (**r load float64 (double precision) *) | Pldrd_a (rd: freg) (a: addressing) (**r load float64 as any64 *) + | Pldps (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two float32 *) + | Pldpd (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two float64 *) | Pstrs (rs: freg) (a: addressing) (**r store float32 *) | Pstrd (rs: freg) (a: addressing) (**r store float64 *) | Pstrd_a (rs: freg) (a: addressing) (**r store float64 as any64 *) + | Pstps (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two float32 *) + | Pstpd (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two float64 *) (** Floating-point move *) | Pfmov (rd r1: freg) | Pfmovimms (rd: freg) (f: float32) (**r load float32 constant *) @@ -310,9 +332,30 @@ Definition code := list instruction. Record function : Type := mkfunction { fn_sig: signature; fn_code: code }. Definition fundef := AST.fundef function. Definition program := AST.program fundef unit. +Definition genv := Genv.t fundef unit. + +(** The two functions below axiomatize how the linker processes + symbolic references [symbol + offset]. It computes the + difference between the address and the PC, and splits it into: + - 12 low bits usable as an offset in an addressing mode; + - 21 high bits usable as argument to the ADRP instruction. + + In CompCert's model, we cannot really describe PC-relative addressing, + but we can claim that the address of [symbol + offset] decomposes + as the sum of + - a low part, usable as an offset in an addressing mode; + - a high part, usable as argument to the ADRP instruction. *) + +Parameter symbol_low: genv -> ident -> ptrofs -> val. +Parameter symbol_high: genv -> ident -> ptrofs -> val. + +Axiom symbol_high_low: + forall (ge: genv) (id: ident) (ofs: ptrofs), + Val.addl (symbol_high ge id ofs) (symbol_low ge id ofs) = Genv.symbol_address ge id ofs. (** * Operational semantics *) + (** The semantics operates over a single mapping from registers (type [preg]) to values. We maintain (but do not enforce) the convention that integer registers are mapped to values of @@ -320,22 +363,19 @@ Definition program := AST.program fundef unit. and condition bits to either [Vzero] or [Vone]. *) Definition regset := Pregmap.t val. -Definition genv := Genv.t fundef unit. (** The value of an [ireg0] is either the value of the integer register, or 0. *) -Definition ir0w (rs: regset) (r: ireg0) : val := - match r with RR0 r => rs (IR r) | XZR => Vint Int.zero end. -Definition ir0x (rs: regset) (r: ireg0) : val := - match r with RR0 r => rs (IR r) | XZR => Vlong Int64.zero end. +Definition ir0 (is:isize) (rs: regset) (r: ireg0) : val := + match r with RR0 r => rs (IR r) | XZR => if is (* is W *) then Vint Int.zero else Vlong Int64.zero end. (** Concise notations for accessing and updating the values of registers. *) Notation "a # b" := (a b) (at level 1, only parsing) : asm. Notation "a # b <- c" := (Pregmap.set b c a) (at level 1, b at next level) : asm. -Notation "a ## b" := (ir0w a b) (at level 1, only parsing) : asm. -Notation "a ### b" := (ir0x a b) (at level 1, only parsing) : asm. +Notation "a ## b" := (ir0 W a b) (at level 1, only parsing) : asm. +Notation "a ### b" := (ir0 X a b) (at level 1, only parsing) : asm. Open Scope asm. @@ -369,84 +409,16 @@ Fixpoint set_res (res: builtin_res preg) (v: val) (rs: regset) : regset := | BR_splitlong hi lo => set_res lo (Val.loword v) (set_res hi (Val.hiword v) rs) end. -(** The two functions below axiomatize how the linker processes - symbolic references [symbol + offset]. It computes the - difference between the address and the PC, and splits it into: - - 12 low bits usable as an offset in an addressing mode; - - 21 high bits usable as argument to the ADRP instruction. - - In CompCert's model, we cannot really describe PC-relative addressing, - but we can claim that the address of [symbol + offset] decomposes - as the sum of - - a low part, usable as an offset in an addressing mode; - - a high part, usable as argument to the ADRP instruction. *) - -Parameter symbol_low: genv -> ident -> ptrofs -> val. -Parameter symbol_high: genv -> ident -> ptrofs -> val. - -Axiom symbol_high_low: - forall (ge: genv) (id: ident) (ofs: ptrofs), - Val.addl (symbol_high ge id ofs) (symbol_low ge id ofs) = Genv.symbol_address ge id ofs. - -Section RELSEM. - -Variable ge: genv. - -(** Looking up instructions in a code sequence by position. *) - -Fixpoint find_instr (pos: Z) (c: code) {struct c} : option instruction := - match c with - | nil => None - | i :: il => if zeq pos 0 then Some i else find_instr (pos - 1) il - end. - -(** Position corresponding to a label *) - -Definition is_label (lbl: label) (instr: instruction) : bool := - match instr with - | Plabel lbl' => if peq lbl lbl' then true else false - | _ => false - end. - -Lemma is_label_correct: - forall lbl instr, - if is_label lbl instr then instr = Plabel lbl else instr <> Plabel lbl. -Proof. - intros. destruct instr; simpl; try discriminate. destruct (peq lbl lbl0); congruence. -Qed. - -Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z := - match c with - | nil => None - | instr :: c' => - if is_label lbl instr then Some (pos + 1) else label_pos lbl (pos + 1) c' - end. - (** The semantics is purely small-step and defined as a function from the current state (a register set + a memory state) to either [Next rs' m'] where [rs'] and [m'] are the updated register set and memory state after execution of the instruction at [rs#PC], or [Stuck] if the processor is stuck. *) -Inductive outcome: Type := - | Next: regset -> mem -> outcome - | Stuck: outcome. - -(** Manipulations over the [PC] register: continuing with the next - instruction ([nextinstr]) or branching to a label ([goto_label]). *) - -Definition nextinstr (rs: regset) := - rs#PC <- (Val.offset_ptr rs#PC Ptrofs.one). - -Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) := - match label_pos lbl 0 (fn_code f) with - | None => Stuck - | Some pos => - match rs#PC with - | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m - | _ => Stuck - end - end. +Record state: Type := State { _rs: regset; _m: mem }. +Definition outcome := option state. +Definition Next rs m: outcome := Some (State rs m). +Definition Stuck: outcome := None. (** Testing a condition *) @@ -516,10 +488,10 @@ Definition eval_testcond (c: testcond) (rs: regset) : option bool := (** Integer "is zero?" test *) -Definition eval_testzero (sz: isize) (v: val) (m: mem): option bool := +Definition eval_testzero (sz: isize) (v: val): option bool := match sz with - | W => Val.cmpu_bool (Mem.valid_pointer m) Ceq v (Vint Int.zero) - | X => Val.cmplu_bool (Mem.valid_pointer m) Ceq v (Vlong Int64.zero) + | W => Val.mxcmpu_bool Ceq v (Vint Int.zero) + | X => Val.mxcmplu_bool Ceq v (Vlong Int64.zero) end. (** Integer "bit is set?" test *) @@ -530,48 +502,19 @@ Definition eval_testbit (sz: isize) (v: val) (n: int): option bool := | X => Val.cmpl_bool Cne (Val.andl v (Vlong (Int64.shl' Int64.one n))) (Vlong Int64.zero) end. -(** Evaluating an addressing mode *) - -Definition eval_addressing (a: addressing) (rs: regset): val := - match a with - | ADimm base n => Val.addl rs#base (Vlong n) - | ADreg base r => Val.addl rs#base rs#r - | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n)) - | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n)) - | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n)) - | ADadr base id ofs => Val.addl rs#base (symbol_low ge id ofs) - | ADpostincr base n => Vundef (* not modeled yet *) - end. - -(** Auxiliaries for memory accesses *) - -Definition exec_load (chunk: memory_chunk) (transf: val -> val) - (a: addressing) (r: preg) (rs: regset) (m: mem) := - match Mem.loadv chunk m (eval_addressing a rs) with - | None => Stuck - | Some v => Next (nextinstr (rs#r <- (transf v))) m - end. - -Definition exec_store (chunk: memory_chunk) - (a: addressing) (v: val) - (rs: regset) (m: mem) := - match Mem.storev chunk m (eval_addressing a rs) v with - | None => Stuck - | Some m' => Next (nextinstr rs) m' - end. (** Comparisons *) -Definition compare_int (rs: regset) (v1 v2: val) (m: mem) := +Definition compare_int (rs: regset) (v1 v2: val) := rs#CN <- (Val.negative (Val.sub v1 v2)) - #CZ <- (Val.cmpu (Mem.valid_pointer m) Ceq v1 v2) - #CC <- (Val.cmpu (Mem.valid_pointer m) Cge v1 v2) + #CZ <- (Val.mxcmpu Ceq v1 v2) + #CC <- (Val.mxcmpu Cge v1 v2) #CV <- (Val.sub_overflow v1 v2). -Definition compare_long (rs: regset) (v1 v2: val) (m: mem) := +Definition compare_long (rs: regset) (v1 v2: val) := rs#CN <- (Val.negativel (Val.subl v1 v2)) - #CZ <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq v1 v2)) - #CC <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Cge v1 v2)) + #CZ <- (Val.mxcmplu Ceq v1 v2) + #CC <- (Val.mxcmplu Cge v1 v2) #CV <- (Val.subl_overflow v1 v2). (** Semantics of [fcmp] instructions: @@ -672,19 +615,230 @@ Definition float64_of_bits (v: val): val := | _ => Vundef end. -(** Execution of a single instruction [i] in initial state - [rs] and [m]. Return updated state. For instructions - that correspond to actual AArch64 instructions, the cases are - straightforward transliterations of the informal descriptions - given in the ARMv8 reference manuals. For pseudo-instructions, - refer to the informal descriptions given above. - - Note that we set to [Vundef] the registers used as temporaries by - the expansions of the pseudo-instructions, so that the code we - generate cannot use those registers to hold values that must - survive the execution of the pseudo-instruction. +(* Recognition of FP numbers that are supported by the fmov #imm instructions: + "a normalized binary floating point encoding with 1 sign bit, + 4 bits of fraction and a 3-bit exponent" *) +Definition is_immediate_float64 (f: float): bool := + let bits := Float.to_bits f in + let exp := + Int64.sub + (Int64.and (Int64.shr' bits (Int.repr 52)) + (Int64.repr 2047)) (Int64.repr 1023) in + let mant := + Int64.and bits (Int64.repr 4503599627370495) in + andb (Int64.cmp Cge exp (Int64.repr (-3))) + (andb (Int64.cmp Cle exp (Int64.repr 4)) + (Int64.eq + (Int64.and mant + (Int64.repr 4222124650659840)) mant)). + +Definition is_immediate_float32 (f: float32): bool := + let bits := Float32.to_bits f in + let exp := + Int.sub + (Int.and (Int.shr bits (Int.repr 23)) + (Int.repr 255)) (Int.repr 127) in + let mant := + Int.and bits (Int.repr 8388607) in + andb (Int.cmp Cge exp (Int.repr (-3))) + (andb (Int.cmp Cle exp (Int.repr 4)) + (Int.eq + (Int.and mant + (Int.repr 7864320)) mant)). + +(** Translation of the LTL/Linear/Mach view of machine registers + to the AArch64 view. Note that no LTL register maps to [X16], + [X18], nor [X30]. + [X18] is reserved as the platform register and never used by the + code generated by CompCert. + [X30] is used for the return address, and can also be used as temporary. + [X16] can be used as temporary. *) + +Definition dreg_of (r: mreg) : dreg := + match r with + | R0 => X0 | R1 => X1 | R2 => X2 | R3 => X3 + | R4 => X4 | R5 => X5 | R6 => X6 | R7 => X7 + | R8 => X8 | R9 => X9 | R10 => X10 | R11 => X11 + | R12 => X12 | R13 => X13 | R14 => X14 | R15 => X15 + | R17 => X17 | R19 => X19 + | R20 => X20 | R21 => X21 | R22 => X22 | R23 => X23 + | R24 => X24 | R25 => X25 | R26 => X26 | R27 => X27 + | R28 => X28 | R29 => X29 + | F0 => D0 | F1 => D1 | F2 => D2 | F3 => D3 + | F4 => D4 | F5 => D5 | F6 => D6 | F7 => D7 + | F8 => D8 | F9 => D9 | F10 => D10 | F11 => D11 + | F12 => D12 | F13 => D13 | F14 => D14 | F15 => D15 + | F16 => D16 | F17 => D17 | F18 => D18 | F19 => D19 + | F20 => D20 | F21 => D21 | F22 => D22 | F23 => D23 + | F24 => D24 | F25 => D25 | F26 => D26 | F27 => D27 + | F28 => D28 | F29 => D29 | F30 => D30 | F31 => D31 + end. + +Definition preg_of (r: mreg) : preg := + dreg_of r. + +(** Undefine all registers except SP and callee-save registers *) + +Definition undef_caller_save_regs (rs: regset) : regset := + fun r => + if preg_eq r SP + || In_dec preg_eq r (List.map preg_of (List.filter is_callee_save all_mregs)) + then rs r + else Vundef. + +(** Extract the values of the arguments of an external call. + We exploit the calling conventions from module [Conventions], except that + we use AArch64 registers instead of locations. *) + +Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop := + | extcall_arg_reg: forall r, + extcall_arg rs m (R r) (rs (preg_of r)) + | extcall_arg_stack: forall ofs ty bofs v, + bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> + Mem.loadv (chunk_of_type ty) m + (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v -> + extcall_arg rs m (Locations.S Outgoing ofs ty) v. + +Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop := + | extcall_arg_one: forall l v, + extcall_arg rs m l v -> + extcall_arg_pair rs m (One l) v + | extcall_arg_twolong: forall hi lo vhi vlo, + extcall_arg rs m hi vhi -> + extcall_arg rs m lo vlo -> + extcall_arg_pair rs m (Twolong hi lo) (Val.longofwords vhi vlo). + +Definition extcall_arguments + (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop := + list_forall2 (extcall_arg_pair rs m) (loc_arguments sg) args. + +Definition loc_external_result (sg: signature) : rpair preg := + map_rpair preg_of (loc_result sg). + +(** Looking up instructions in a code sequence by position. *) + +Fixpoint find_instr (pos: Z) (c: code) {struct c} : option instruction := + match c with + | nil => None + | i :: il => if zeq pos 0 then Some i else find_instr (pos - 1) il + end. + +(** Position corresponding to a label *) + +Definition is_label (lbl: label) (instr: instruction) : bool := + match instr with + | Plabel lbl' => if peq lbl lbl' then true else false + | _ => false + end. + +Lemma is_label_correct: + forall lbl instr, + if is_label lbl instr then instr = Plabel lbl else instr <> Plabel lbl. +Proof. + intros. destruct instr; simpl; try discriminate. destruct (peq lbl lbl0); congruence. +Qed. + +Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z := + match c with + | nil => None + | instr :: c' => + if is_label lbl instr then Some pos else label_pos lbl (pos + 1) c' + end. + +Definition nextinstr (rs: regset) := + rs#PC <- (Val.offset_ptr rs#PC Ptrofs.one). + +Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) := + match label_pos lbl 0 (fn_code f) with + | None => Stuck + | Some pos => + match rs#PC with + | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m + | _ => Stuck + end + end. + +Section RELSEM. + +Variable ge: genv. + +(** Evaluating an addressing mode *) + +Definition eval_addressing (a: addressing) (rs: regset): val := + match a with + | ADimm base n => Val.addl rs#base (Vlong n) + | ADreg base r => Val.addl rs#base rs#r + | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n)) + | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n)) + | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n)) + | ADadr base id ofs => Val.addl rs#base (symbol_low ge id ofs) + | ADpostincr base n => Vundef + end. + +Definition is_pair_addressing_mode_correct (a: addressing): bool := + match a with + | ADimm _ _ => true + | _ => false + end. + +Definition get_offset_addr (a: addressing) (ofs: Z) : addressing := + match a with + | ADimm base n => (ADimm base (Int64.add n (Int64.repr ofs))) + | _ => a + end. + +(** Auxiliaries for memory accesses *) + +Definition exec_load (chunk: memory_chunk) (transf: val -> val) + (a: addressing) (r: preg) (rs: regset) (m: mem) := + match Mem.loadv chunk m (eval_addressing a rs) with + | None => Stuck + | Some v => Next (nextinstr (rs#r <- (transf v))) m + end. + +Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val) + (a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) := + if is_pair_addressing_mode_correct a then + let addr := (eval_addressing a rs) in + let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in + let addr' := (eval_addressing (get_offset_addr a ofs) rs) in + match Mem.loadv chk1 m addr with + | None => Stuck + | Some v1 => + match Mem.loadv chk2 m addr' with + | None => Stuck + | Some v2 => + Next (nextinstr ((rs#rd1 <- (transf v1))#rd2 <- (transf v2))) m + end + end + else Stuck. + +Definition exec_store (chunk: memory_chunk) + (a: addressing) (v: val) + (rs: regset) (m: mem) := + match Mem.storev chunk m (eval_addressing a rs) v with + | None => Stuck + | Some m' => Next (nextinstr rs) m' + end. + +Definition exec_store_double (chk1 chk2: memory_chunk) + (a: addressing) (v1 v2: val) + (rs: regset) (m: mem) := + if is_pair_addressing_mode_correct a then + let addr := (eval_addressing a rs) in + let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in + let addr' := (eval_addressing (get_offset_addr a ofs) rs) in + match Mem.storev chk1 m addr v1 with + | None => Stuck + | Some m' => match Mem.storev chk2 m' addr' v2 with + | None => Stuck + | Some m'' => Next (nextinstr rs) m'' + end + end + else Stuck. + Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : outcome := match i with (** Branches *) @@ -707,13 +861,13 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pret r => Next (rs#PC <- (rs#r)) m | Pcbnz sz r lbl => - match eval_testzero sz rs#r m with + match eval_testzero sz rs#r with | Some true => Next (nextinstr rs) m | Some false => goto_label f lbl rs m | None => Stuck end | Pcbz sz r lbl => - match eval_testzero sz rs#r m with + match eval_testzero sz rs#r with | Some true => goto_label f lbl rs m | Some false => Next (nextinstr rs) m | None => Stuck @@ -781,13 +935,13 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Psubimm X rd r1 n => Next (nextinstr (rs#rd <- (Val.subl rs#r1 (Vlong (Int64.repr n))))) m | Pcmpimm W r1 n => - Next (nextinstr (compare_int rs rs#r1 (Vint (Int.repr n)) m)) m + Next (nextinstr (compare_int rs rs#r1 (Vint (Int.repr n)))) m | Pcmpimm X r1 n => - Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.repr n)) m)) m + Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.repr n)))) m | Pcmnimm W r1 n => - Next (nextinstr (compare_int rs rs#r1 (Vint (Int.neg (Int.repr n))) m)) m + Next (nextinstr (compare_int rs rs#r1 (Vint (Int.neg (Int.repr n))))) m | Pcmnimm X r1 n => - Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.neg (Int64.repr n))) m)) m + Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.neg (Int64.repr n))))) m (** Move integer register *) | Pmov rd r1 => Next (nextinstr (rs#rd <- (rs#r1))) m @@ -805,9 +959,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Porrimm X rd r1 n => Next (nextinstr (rs#rd <- (Val.orl rs###r1 (Vlong (Int64.repr n))))) m | Ptstimm W r1 n => - Next (nextinstr (compare_int rs (Val.and rs#r1 (Vint (Int.repr n))) (Vint Int.zero) m)) m + Next (nextinstr (compare_int rs (Val.and rs#r1 (Vint (Int.repr n))) (Vint Int.zero))) m | Ptstimm X r1 n => - Next (nextinstr (compare_long rs (Val.andl rs#r1 (Vlong (Int64.repr n))) (Vlong Int64.zero) m)) m + Next (nextinstr (compare_long rs (Val.andl rs#r1 (Vlong (Int64.repr n))) (Vlong Int64.zero))) m (** Move wide immediate *) | Pmovz W rd n pos => Next (nextinstr (rs#rd <- (Vint (Int.repr (Z.shiftl n pos))))) m @@ -853,22 +1007,22 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Psub X rd r1 r2 s => Next (nextinstr (rs#rd <- (Val.subl rs###r1 (eval_shift_op_long rs#r2 s)))) m | Pcmp W r1 r2 s => - Next (nextinstr (compare_int rs rs##r1 (eval_shift_op_int rs#r2 s) m)) m + Next (nextinstr (compare_int rs rs##r1 (eval_shift_op_int rs#r2 s))) m | Pcmp X r1 r2 s => - Next (nextinstr (compare_long rs rs###r1 (eval_shift_op_long rs#r2 s) m)) m + Next (nextinstr (compare_long rs rs###r1 (eval_shift_op_long rs#r2 s))) m | Pcmn W r1 r2 s => - Next (nextinstr (compare_int rs rs##r1 (Val.neg (eval_shift_op_int rs#r2 s)) m)) m + Next (nextinstr (compare_int rs rs##r1 (Val.neg (eval_shift_op_int rs#r2 s)))) m | Pcmn X r1 r2 s => - Next (nextinstr (compare_long rs rs###r1 (Val.negl (eval_shift_op_long rs#r2 s)) m)) m + Next (nextinstr (compare_long rs rs###r1 (Val.negl (eval_shift_op_long rs#r2 s)))) m (** Integer arithmetic, extending register *) | Paddext rd r1 r2 x => Next (nextinstr (rs#rd <- (Val.addl rs#r1 (eval_extend rs#r2 x)))) m | Psubext rd r1 r2 x => Next (nextinstr (rs#rd <- (Val.subl rs#r1 (eval_extend rs#r2 x)))) m | Pcmpext r1 r2 x => - Next (nextinstr (compare_long rs rs#r1 (eval_extend rs#r2 x) m)) m + Next (nextinstr (compare_long rs rs#r1 (eval_extend rs#r2 x))) m | Pcmnext r1 r2 x => - Next (nextinstr (compare_long rs rs#r1 (Val.negl (eval_extend rs#r2 x)) m)) m + Next (nextinstr (compare_long rs rs#r1 (Val.negl (eval_extend rs#r2 x)))) m (** Logical, shifted register *) | Pand W rd r1 r2 s => Next (nextinstr (rs#rd <- (Val.and rs##r1 (eval_shift_op_int rs#r2 s)))) m @@ -895,9 +1049,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Porn X rd r1 r2 s => Next (nextinstr (rs#rd <- (Val.orl rs###r1 (Val.notl (eval_shift_op_long rs#r2 s))))) m | Ptst W r1 r2 s => - Next (nextinstr (compare_int rs (Val.and rs##r1 (eval_shift_op_int rs#r2 s)) (Vint Int.zero) m)) m + Next (nextinstr (compare_int rs (Val.and rs##r1 (eval_shift_op_int rs#r2 s)) (Vint Int.zero))) m | Ptst X r1 r2 s => - Next (nextinstr (compare_long rs (Val.andl rs###r1 (eval_shift_op_long rs#r2 s)) (Vlong Int64.zero) m)) m + Next (nextinstr (compare_long rs (Val.andl rs###r1 (eval_shift_op_long rs#r2 s)) (Vlong Int64.zero))) m (** Variable shifts *) | Pasrv W rd r1 r2 => Next (nextinstr (rs#rd <- (Val.shr rs#r1 rs#r2))) m @@ -969,10 +1123,16 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out (** Floating-point move *) | Pfmov rd r1 => Next (nextinstr (rs#rd <- (rs#r1))) m - | Pfmovimms rd f => - Next (nextinstr (rs#X16 <- Vundef #rd <- (Vsingle f))) m + | Pfmovimms rd f => + if is_immediate_float32 f then + Next (nextinstr (rs#rd <- (Vsingle f))) m + else + Next (nextinstr ((rs#rd <- (Vsingle f))#X16 <- Vundef)) m | Pfmovimmd rd f => - Next (nextinstr (rs#X16 <- Vundef #rd <- (Vfloat f))) m + if is_immediate_float64 f then + Next (nextinstr (rs#rd <- (Vfloat f))) m + else + Next (nextinstr ((rs#rd <- (Vfloat f))#X16 <- Vundef)) m | Pfmovi S rd r1 => Next (nextinstr (rs#rd <- (float32_of_bits rs##r1))) m | Pfmovi D rd r1 => @@ -1102,10 +1262,27 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | _ => Stuck end | Pbuiltin ef args res => Stuck (**r treated specially below *) + (** loads and stores pairs int/int64 *) + | Pldpw rd1 rd2 chk1 chk2 a => + exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m + | Pldpx rd1 rd2 chk1 chk2 a => + exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m + | Pstpw rs1 rs2 chk1 chk2 a => + exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m + | Pstpx rs1 rs2 chk1 chk2 a => + exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m + (** loads and stores pairs floating-point *) + | Pldps rd1 rd2 chk1 chk2 a => + exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m + | Pldpd rd1 rd2 chk1 chk2 a => + exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m + | Pstps rs1 rs2 chk1 chk2 a => + exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m + | Pstpd rs1 rs2 chk1 chk2 a => + exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m + | Pnop => Next (nextinstr rs) m (** The following instructions and directives are not generated directly by Asmgen, so we do not model them. *) - | Pldp _ _ _ - | Pstp _ _ _ | Pcls _ _ _ | Pclz _ _ _ | Prev _ _ _ @@ -1118,86 +1295,14 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pfnmsub _ _ _ _ _ | Pfmax _ _ _ _ | Pfmin _ _ _ _ - | Pnop | Pcfi_adjust _ | Pcfi_rel_offset _ => Stuck end. -(** Translation of the LTL/Linear/Mach view of machine registers - to the AArch64 view. Note that no LTL register maps to [X16], - [X18], nor [X30]. - [X18] is reserved as the platform register and never used by the - code generated by CompCert. - [X30] is used for the return address, and can also be used as temporary. - [X16] can be used as temporary. *) - -Definition preg_of (r: mreg) : preg := - match r with - | R0 => X0 | R1 => X1 | R2 => X2 | R3 => X3 - | R4 => X4 | R5 => X5 | R6 => X6 | R7 => X7 - | R8 => X8 | R9 => X9 | R10 => X10 | R11 => X11 - | R12 => X12 | R13 => X13 | R14 => X14 | R15 => X15 - | R17 => X17 | R19 => X19 - | R20 => X20 | R21 => X21 | R22 => X22 | R23 => X23 - | R24 => X24 | R25 => X25 | R26 => X26 | R27 => X27 - | R28 => X28 | R29 => X29 - | F0 => D0 | F1 => D1 | F2 => D2 | F3 => D3 - | F4 => D4 | F5 => D5 | F6 => D6 | F7 => D7 - | F8 => D8 | F9 => D9 | F10 => D10 | F11 => D11 - | F12 => D12 | F13 => D13 | F14 => D14 | F15 => D15 - | F16 => D16 | F17 => D17 | F18 => D18 | F19 => D19 - | F20 => D20 | F21 => D21 | F22 => D22 | F23 => D23 - | F24 => D24 | F25 => D25 | F26 => D26 | F27 => D27 - | F28 => D28 | F29 => D29 | F30 => D30 | F31 => D31 - end. - -(** Undefine all registers except SP and callee-save registers *) - -Definition undef_caller_save_regs (rs: regset) : regset := - fun r => - if preg_eq r SP - || In_dec preg_eq r (List.map preg_of (List.filter is_callee_save all_mregs)) - then rs r - else Vundef. - -(** Extract the values of the arguments of an external call. - We exploit the calling conventions from module [Conventions], except that - we use AArch64 registers instead of locations. *) - -Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop := - | extcall_arg_reg: forall r, - extcall_arg rs m (R r) (rs (preg_of r)) - | extcall_arg_stack: forall ofs ty bofs v, - bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> - Mem.loadv (chunk_of_type ty) m - (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v -> - extcall_arg rs m (Locations.S Outgoing ofs ty) v. - -Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop := - | extcall_arg_one: forall l v, - extcall_arg rs m l v -> - extcall_arg_pair rs m (One l) v - | extcall_arg_twolong: forall hi lo vhi vlo, - extcall_arg rs m hi vhi -> - extcall_arg rs m lo vlo -> - extcall_arg_pair rs m (Twolong hi lo) (Val.longofwords vhi vlo). - -Definition extcall_arguments - (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop := - list_forall2 (extcall_arg_pair rs m) (loc_arguments sg) args. - -Definition loc_external_result (sg: signature) : rpair preg := - map_rpair preg_of (loc_result sg). - -(** Execution of the instruction at [rs#PC]. *) - -Inductive state: Type := - | State: regset -> mem -> state. - Inductive step: state -> trace -> state -> Prop := | exec_step_internal: - forall b ofs f i rs m rs' m', + forall b ofs (f:function) i rs m rs' m', rs PC = Vptr b ofs -> Genv.find_funct_ptr ge b = Some (Internal f) -> find_instr (Ptrofs.unsigned ofs) f.(fn_code) = Some i -> @@ -1212,7 +1317,7 @@ Inductive step: state -> trace -> state -> Prop := external_call ef ge vargs m t vres m' -> rs' = nextinstr (set_res res vres - (undef_regs (IR X16 :: IR X30 :: map preg_of (destroyed_by_builtin ef)) rs)) -> + (undef_regs (DR X16 :: DR X30 :: map preg_of (destroyed_by_builtin ef)) rs)) -> step (State rs m) t (State rs' m') | exec_step_external: forall b ef args res rs m t rs' m', @@ -1308,11 +1413,11 @@ Qed. Definition data_preg (r: preg) : bool := match r with - | IR X16 => false - | IR X30 => false - | IR _ => true - | FR _ => true + | DR (IR X16) => false + | DR (IR X30) => false + | DR (IR _) => true + | DR (FR _) => true | CR _ => false - | SP => true + (* | SP => true; subsumed by IR (iregsp) *) | PC => false end. diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v new file mode 100644 index 00000000..073f1f4c --- /dev/null +++ b/aarch64/Asmblock.v @@ -0,0 +1,1049 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Justus Fasse UGA, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + + +(* Asmblock language for aarch64 + +WORK IN PROGRESS: we want to define an Asmblock syntax, with an Asmblock semantics +(e.g. we don't need the parallel semantics of Asmvliw) + + +NOTE: this file is inspired from + - aarch64/Asm.v + - kvx/Asmvliw.v (only the Asmblock syntax) + - kvx/Asmblock.v +*) + + +(** Abstract syntax and semantics for AArch64 assembly language *) + +Require Import Coqlib Zbits Maps. +Require Import AST Integers Floats. +Require Import Values Memory Events Globalenvs Smallstep. +Require Import Locations Conventions. +Require Stacklayout. +Require Import OptionMonad Asm. +Require Import Lia. +Require Export Asm. + +Local Open Scope option_monad_scope. + +Notation regset := Asm.regset. + +(** * Abstract syntax *) + +(* First task: splitting the big [instruction] type below into CFI and basic instructions. + Actually a finer splitting in order to regroup "similar" instructions could be much better for automation of the scheduler proof! + e.g. "similar" means identical "interface" w.r.t. pseudo-registers when translated to AbstractBB, + or with a "similar" semantics. + + see example of loads below. +*) + +(** Control Flow instructions + +*) +Inductive cf_instruction : Type := + | Pb (lbl: label) (**r branch *) + | Pbc (c: testcond) (lbl: label) (**r conditional branch *) + | Pbl (id: ident) (sg: signature) (**r jump to function and link *) + | Pbs (id: ident) (sg: signature) (**r jump to function *) + | Pblr (r: ireg) (sg: signature) (**r indirect jump and link *) + | Pbr (r: ireg) (sg: signature) (**r indirect jump *) + | Pret (r: ireg) (**r return *) + | Pcbnz (sz: isize) (r: ireg) (lbl: label) (**r branch if not zero *) + | Pcbz (sz: isize) (r: ireg) (lbl: label) (**r branch if zero *) + | Ptbnz (sz: isize) (r: ireg) (n: int) (lbl: label) (**r branch if bit n is not zero *) + | Ptbz (sz: isize) (r: ireg) (n: int) (lbl: label) (**r branch if bit n is zero *) + (** Pseudo-instructions *) + | Pbtbl (r1: ireg) (tbl: list label) (**r N-way branch through a jump table *) + . + +(* +A builtin is considered as a control-flow instruction, because it could emit a trace (cf. Machblock semantics). +Here, we do not need to have builtins alone in basic-blocks (on the contrary to KVX bundles). +*) + +Inductive control: Type := + | PCtlFlow (i: cf_instruction) + (** Pseudo-instructions *) + | Pbuiltin (ef: external_function) + (args: list (builtin_arg dreg)) (res: builtin_res dreg) (**r built-in function (pseudo) *) + . + +(** Basic instructions *) + +(* Loads waiting for (rd: dreg) (a: addressing) + * XXX Use dreg because exec_load is defined in terms of it, thus allowing us to + * treat integer and floating point loads the same. *) +Inductive load_rd_a: Type := + (* Integer load *) + | Pldrw (**r load int32 *) + | Pldrw_a (**r load int32 as any32 *) + | Pldrx (**r load int64 *) + | Pldrx_a (**r load int64 as any64 *) + | Pldrb (sz: isize) (**r load int8, zero-extend *) + | Pldrsb (sz: isize) (**r load int8, sign-extend *) + | Pldrh (sz: isize) (**r load int16, zero-extend *) + | Pldrsh (sz: isize) (**r load int16, sign-extend *) + | Pldrzw (**r load int32, zero-extend to int64 *) + | Pldrsw (**r load int32, sign-extend to int64 *) + (* Floating-point load *) + | Pldrs (**r load float32 (single precision) *) + | Pldrd (**r load float64 (double precision) *) + | Pldrd_a (**r load float64 as any64 *) + . + +Inductive load_rd1_rd2_a: Type := + | Pldpw + | Pldpx + | Pldps + | Pldpd + . + +Inductive ld_instruction: Type := + | PLd_rd_a (ld: load_rd_a) (rd: dreg) (a: addressing) + | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: dreg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *) + . + +Inductive store_rs_a : Type := + (* Integer store *) + | Pstrw (**r store int32 *) + | Pstrw_a (**r store int32 as any32 *) + | Pstrx (**r store int64 *) + | Pstrx_a (**r store int64 as any64 *) + | Pstrb (**r store int8 *) + | Pstrh (**r store int16 *) + (* Floating-point store *) + | Pstrs (**r store float32 *) + | Pstrd (**r store float64 *) + | Pstrd_a (**r store float64 as any64 *) + . + +Inductive store_rs1_rs2_a : Type := + | Pstpw + | Pstpx + | Pstps + | Pstpd + . + +Inductive st_instruction : Type := + | PSt_rs_a (st: store_rs_a) (rs: dreg) (a: addressing) + | Pstp (st: store_rs1_rs2_a) (rs1 rs2: dreg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *) + . + +Inductive arith_p : Type := + (** PC-relative addressing *) + | Padrp (id: ident) (ofs: ptrofs) (**r set [rd] to high address of [id + ofs] *) + (** Move wide immediate *) + | Pmovz (sz: isize) (n: Z) (pos: Z) (**r move [n << pos] to [rd] *) + | Pmovn (sz: isize) (n: Z) (pos: Z) (**r move [NOT(n << pos)] to [rd] *) + (** Floating-point move *) + | Pfmovimms (f: float32) (**r load float32 constant *) + | Pfmovimmd (f: float) (**r load float64 constant *) +. + +Inductive arith_comparison_p : Type := + (** Floating-point comparison *) + | Pfcmp0 (sz: fsize) (**r compare [r1] and [+0.0] *) + (** Integer arithmetic, immediate *) + | Pcmpimm (sz: isize) (n: Z) (**r compare *) + | Pcmnimm (sz: isize) (n: Z) (**r compare negative *) + (** Logical, immediate *) + | Ptstimm (sz: isize) (n: Z) (**r and, then set flags *) +. + +Inductive arith_pp : Type := + (** Move integer register *) + | Pmov + (** Move wide immediate *) + (* XXX: has to have the same register supplied both times *) + | Pmovk (sz: isize) (n: Z) (pos: Z) (**r insert 16 bits of [n] at [pos] in rd *) + (** PC-relative addressing *) + | Paddadr (id: ident) (ofs: ptrofs) (**r add the low address of [id + ofs] *) + (** Bit-field operations *) + | Psbfiz (sz: isize) (r: int) (s: Z) (**r sign extend and shift left *) + | Psbfx (sz: isize) (r: int) (s: Z) (**r shift right and sign extend *) + | Pubfiz (sz: isize) (r: int) (s: Z) (**r zero extend and shift left *) + | Pubfx (sz: isize) (r: int) (s: Z) (**r shift right and zero extend *) +(* Bit operations are not used in the aarch64/asm semantics + (** Bit operations *) + | Pcls (sz: isize) (**r count leading sign bits *) + | Pclz (sz: isize) (**r count leading zero bits *) + | Prev (sz: isize) (**r reverse bytes *) + | Prev16 (sz: isize) (**r reverse bytes in each 16-bit word *) +*) + (** Floating-point move *) + | Pfmov + (** Floating-point conversions *) + | Pfcvtds (**r convert float32 to float64 *) + | Pfcvtsd (**r convert float64 to float32 *) + (** Floating-point arithmetic *) + | Pfabs (sz: fsize) (**r absolute value *) + | Pfneg (sz: fsize) (**r negation *) + (* Pfsqrt is not used in the semantics of aarch64/asm + | Pfsqrt (sz: fsize) (**r square root *) *) + (** Floating-point conversions *) + | Pscvtf (fsz: fsize) (isz: isize) (**r convert signed int to float *) + | Pucvtf (fsz: fsize) (isz: isize) (**r convert unsigned int to float *) + | Pfcvtzs (isz: isize) (fsz: fsize) (**r convert float to signed int *) + | Pfcvtzu (isz: isize) (fsz: fsize) (**r convert float to unsigned int *) + (** Integer arithmetic, immediate *) + | Paddimm (sz: isize) (n: Z) (**r addition *) + | Psubimm (sz: isize) (n: Z) (**r subtraction *) +. + +Inductive arith_comparison_r0r : Type := + (** Integer arithmetic, shifted register *) + | Pcmp (is:isize) (s: shift_op) (**r compare *) + | Pcmn (is:isize) (s: shift_op) (**r compare negative *) + (** Logical, shifted register *) + | Ptst (is:isize) (s: shift_op) (**r and, then set flags *) +. + +Inductive arith_comparison_pp : Type := + (** Integer arithmetic, extending register *) + | Pcmpext (x: extend_op) (**r int64-int32 cmp *) + | Pcmnext (x: extend_op) (**r int64-int32 cmn *) + (** Floating-point comparison *) + | Pfcmp (sz: fsize) (**r compare [r1] and [r2] *) +. + +Inductive arith_ppp : Type := + (** Variable shifts *) + | Pasrv (sz: isize) (**r arithmetic right shift *) + | Plslv (sz: isize) (**r left shift *) + | Plsrv (sz: isize) (**r logical right shift *) + | Prorv (sz: isize) (**r rotate right *) + (** Integer multiply/divide *) + | Psmulh (**r signed multiply high *) + | Pumulh (**r unsigned multiply high *) + | Psdiv (sz: isize) (**r signed division *) + | Pudiv (sz: isize) (**r unsigned division *) + (** Integer arithmetic, extending register *) + | Paddext (x: extend_op) (**r int64-int32 add *) + | Psubext (x: extend_op) (**r int64-int32 sub *) + (** Floating-point arithmetic *) + | Pfadd (sz: fsize) (**r addition *) + | Pfdiv (sz: fsize) (**r division *) + | Pfmul (sz: fsize) (**r multiplication *) + | Pfsub (sz: fsize) (**r subtraction *) +. + +Inductive arith_rr0r : Type := + (** Integer arithmetic, shifted register *) + | Padd (sz:isize) (s: shift_op) (**r addition *) + | Psub (sz:isize) (s: shift_op) (**r subtraction *) + (** Logical, shifted register *) + | Pand (sz:isize) (s: shift_op) (**r and *) + | Pbic (sz:isize) (s: shift_op) (**r and-not *) + | Peon (sz:isize) (s: shift_op) (**r xor-not *) + | Peor (sz:isize) (s: shift_op) (**r xor *) + | Porr (sz:isize) (s: shift_op) (**r or *) + | Porn (sz:isize) (s: shift_op) (**r or-not *) +. + + +Inductive arith_rr0 : Type := + (** Logical, immediate *) + | Pandimm (sz: isize) (n: Z) (**r and *) + | Peorimm (sz: isize) (n: Z) (**r xor *) + | Porrimm (sz: isize) (n: Z) (**r or *) +. + +Inductive arith_arrrr0 : Type := + (** Integer multiply/divide *) + | Pmadd (sz: isize) (**r multiply-add *) + | Pmsub (sz: isize) (**r multiply-sub *) +. + +(* Currently not used by the semantics of aarch64/Asm + * Inductive arith_apppp : Type := + * (** Floating-point arithmetic *) + * | Pfmadd (sz: fsize) (**r [rd = r3 + r1 * r2] *) + * | Pfmsub (sz: fsize) (**r [rd = r3 - r1 * r2] *) + * . + + * Inductive arith_aapppp : Type := + * (** Floating-point arithmetic *) + * | Pfnmadd (sz: fsize) (**r [rd = - r3 - r1 * r2] *) + * | Pfnmsub (sz: fsize) (**r [rd = - r3 + r1 * r2] *) + * . *) + +(* Notes on the naming scheme used here: + * R: ireg + * R0: ireg0 + * Rsp: iregsp + * F: freg + * W/X: Occur in conjunction with R0, says whether an ireg0 should be evaluated + * as W regsiter (32 bit) or X register (64 bit) + * S/D: Used for completeness sake. Only used for copying an integer register + * to a floating point register. Could be removed. + * A: These instructions perform an additional arithmetic operation + XXX Does this interpretation match the use in kvx/Asmvliw? + * Comparison: For these instructions the first register is not the target. + * Instead, the condition register is mutated. + *) +Inductive ar_instruction : Type := + | PArithP (i : arith_p) (rd : dreg) + | PArithPP (i : arith_pp) (rd rs : dreg) + | PArithPPP (i : arith_ppp) (rd r1 r2 : dreg) + | PArithRR0R (i : arith_rr0r) (rd : ireg) (r1 : ireg0) (r2 : ireg) + | PArithRR0 (i : arith_rr0) (rd : ireg) (r1 : ireg0) + | PArithARRRR0 (i : arith_arrrr0) (rd r1 r2 : ireg) (r3 : ireg0) + (* Pfmadd and Pfmsub are currently not used by the semantics of aarch64/Asm + | PArithAPPPP (i : arith_apppp) (rd r1 r2 r3 : preg) *) + (* Pfnmadd and Pfnmsub are currently not used by the semantics of aarch64/Asm + | PArithAAPPPP (i : arith_aapppp) (rd r1 r2 r3 : preg) *) + | PArithComparisonPP (i : arith_comparison_pp) (r1 r2 : dreg) + | PArithComparisonR0R (i : arith_comparison_r0r) (r1 : ireg0) (r2 : ireg) + | PArithComparisonP (i : arith_comparison_p) (r1 : dreg) + (* Instructions without indirection sine they would be the only ones *) + (* PArithCP: Pcsetm is commented out by aarch64/Asm, so Pcset is alone *) + | Pcset (rd : ireg) (c : testcond) (**r set to 1/0 if cond is true/false *) + (* PArithFR0 *) + | Pfmovi (fsz : fsize) (rd : freg) (r1 : ireg0) (**r copy int reg to FP reg *) + (* PArithCPPP *) + | Pcsel (rd r1 r2 : dreg) (c : testcond) (**r int/float conditional move *) + (* PArithAFFF *) + | Pfnmul (fsz : fsize) (rd r1 r2 : freg) (**r multiply-negate *) +. + +Inductive basic : Type := + | PArith (i: ar_instruction) + | PLoad (ld: ld_instruction) + | PStore (st: st_instruction) + | Pallocframe (sz: Z) (linkofs: ptrofs) (**r allocate new stack frame *) + | Pfreeframe (sz: Z) (linkofs: ptrofs) (**r deallocate stack frame and restore previous frame *) + | Ploadsymbol (rd: ireg) (id: ident) (**r load the address of [id] *) + | Pcvtsw2x (rd: ireg) (r1: ireg) (**r sign-extend 32-bit int to 64-bit *) + | Pcvtuw2x (rd: ireg) (r1: ireg) (**r zero-extend 32-bit int to 64-bit *) + | Pcvtx2w (rd: ireg) (**r retype a 64-bit int as a 32-bit int *) + | Pnop (**r no operation *) +(* NOT USED IN THE SEMANTICS ! + | Pcfi_adjust (ofs: int) (**r .cfi_adjust debug directive *) + | Pcfi_rel_offset (ofs: int) (**r .cfi_rel_offset debug directive *) +*) +. + +Coercion PCtlFlow: cf_instruction >-> control. +Coercion PLoad: ld_instruction >-> basic. +Coercion PStore : st_instruction >-> basic. +Coercion PArithP: arith_p >-> Funclass. +Coercion PArithPP: arith_pp >-> Funclass. +Coercion PArithPPP: arith_ppp >-> Funclass. +Coercion PArithRR0: arith_rr0 >-> Funclass. +Coercion PArithRR0R: arith_rr0r >-> Funclass. +Coercion PArithARRRR0: arith_arrrr0 >-> Funclass. +Coercion PArithComparisonP: arith_comparison_p >-> Funclass. +Coercion PArithComparisonPP: arith_comparison_pp >-> Funclass. +Coercion PArithComparisonR0R: arith_comparison_r0r >-> Funclass. +Coercion PArith: ar_instruction >-> basic. + + +(* Not used in Coq, declared in ocaml directly in PostpassSchedulingOracle.ml +Inductive instruction : Type := + | PBasic (i: basic) + | PControl (i: control). + +Coercion PBasic: basic >-> instruction. +Coercion PControl: control >-> instruction. *) + +(** * Definition of a bblock + +A bblock must contain at least one instruction. + +This choice simplifies the definition of [find_bblock] below: +indeed, each address of a code block identifies at most one bblock. +*) + +Definition non_empty_body (body: list basic): bool := + match body with + | nil => false + | _ => true + end. + +Definition non_empty_exit (exit: option control): bool := + match exit with + | None => false + | _ => true + end. + +Definition non_empty_bblockb (body: list basic) (exit: option control): bool := non_empty_body body || non_empty_exit exit. + +(** A bblock is well-formed if he contains at least one instruction. *) + +Record bblock := mk_bblock { + header: list label; + body: list basic; + exit: option control; + correct: Is_true (non_empty_bblockb body exit) +}. + +(* FIXME? redundant with definition in Machblock *) +Definition length_opt {A} (o: option A) : nat := + match o with + | Some o => 1 + | None => 0 + end. + +Program Definition no_header (bb : bblock) := {| header := nil; body := body bb; exit := exit bb |}. +Next Obligation. + destruct bb; cbn. assumption. +Defined. + +Program Definition stick_header (h : list label) (bb : bblock) := {| header := h; body := body bb; exit := exit bb |}. +Next Obligation. + destruct bb; cbn. assumption. +Defined. + +(* This notion of size induces the notion of "valid" code address given by [find_bblock] + + The result is in Z to be compatible with operations on PC. +*) +Definition size (b:bblock): Z := Z.of_nat (length (header b) + length (body b) + length_opt (exit b)). + +Definition bblocks := list bblock. + +Fixpoint size_blocks (l: bblocks): Z := + match l with + | nil => 0 + | b :: l => + (size b) + (size_blocks l) + end + . + +Lemma to_nat_pos : forall z:Z, (Z.to_nat z > 0)%nat -> z > 0. +Proof. + intros. destruct z; auto. + - contradict H. cbn. apply gt_irrefl. + - apply Zgt_pos_0. + - contradict H. cbn. apply gt_irrefl. +Qed. + +Lemma size_positive (b:bblock): size b > 0. +Proof. + unfold size. destruct b as [hd bdy ex cor]. cbn. + destruct ex; destruct bdy; try (apply to_nat_pos; rewrite Nat2Z.id; cbn; lia); + unfold non_empty_bblockb in cor; simpl in cor. + inversion cor. +Qed. + +Record function : Type := mkfunction { fn_sig: signature; fn_blocks: bblocks }. +Definition fundef := AST.fundef function. +Definition program := AST.program fundef unit. + +(** * Operational semantics *) + +(** See "Parameters" of the same names in Asm.v *) +Record aarch64_linker: Type := { + symbol_low: ident -> ptrofs -> val; + symbol_high: ident -> ptrofs -> val +}. + +Definition genv := Genv.t fundef unit. + +Section RELSEM. + +Variable lk: aarch64_linker. +Variable ge: genv. + +(** Evaluating an addressing mode *) + +Definition eval_addressing (a: addressing) (rs: regset): val := + match a with + | ADimm base n => Val.addl rs#base (Vlong n) + | ADreg base r => Val.addl rs#base rs#r + | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n)) + | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n)) + | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n)) + | ADadr base id ofs => Val.addl rs#base (symbol_low lk id ofs) + | ADpostincr base n => Vundef + end. + +(** Auxiliaries for memory accesses *) + +Definition exec_load_rd_a (chunk: memory_chunk) (transf: val -> val) + (a: addressing) (r: dreg) (rs: regset) (m: mem) := + SOME v <- Mem.loadv chunk m (eval_addressing a rs) IN + Next (rs#r <- (transf v)) m. + +Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val) + (a: addressing) (rd1 rd2: dreg) (rs: regset) (m: mem) := + if is_pair_addressing_mode_correct a then + let addr := (eval_addressing a rs) in + let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in + let addr' := (eval_addressing (get_offset_addr a ofs) rs) in + match Mem.loadv chk1 m addr with + | None => Stuck + | Some v1 => + match Mem.loadv chk2 m addr' with + | None => Stuck + | Some v2 => + Next ((rs#rd1 <- (transf v1))#rd2 <- (transf v2)) m + end + end + else Stuck. + +Definition exec_store_rs_a (chunk: memory_chunk) + (a: addressing) (v: val) + (rs: regset) (m: mem) := + SOME m' <- Mem.storev chunk m (eval_addressing a rs) v IN + Next rs m'. + +Definition exec_store_double (chk1 chk2: memory_chunk) + (a: addressing) (v1 v2: val) + (rs: regset) (m: mem) := + if is_pair_addressing_mode_correct a then + let addr := (eval_addressing a rs) in + let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in + let addr' := (eval_addressing (get_offset_addr a ofs) rs) in + match Mem.storev chk1 m addr v1 with + | None => Stuck + | Some m' => match Mem.storev chk2 m' addr' v2 with + | None => Stuck + | Some m'' => Next rs m'' + end + end + else Stuck. + +(** execution of loads +*) + +Definition chunk_load (ld: load_rd_a): memory_chunk := + match ld with + | Pldrw => Mint32 + | Pldrw_a => Many32 + | Pldrx => Mint64 + | Pldrx_a => Many64 + | Pldrb _ => Mint8unsigned + | Pldrsb _ => Mint8signed + | Pldrh _ => Mint16unsigned + | Pldrsh _ => Mint16signed + | Pldrzw => Mint32 + | Pldrsw => Mint32 + | Pldrs => Mfloat32 + | Pldrd => Mfloat64 + | Pldrd_a => Many64 + end. + +Definition chunk_store (st: store_rs_a) : memory_chunk := + match st with + | Pstrw => Mint32 + | Pstrw_a => Many32 + | Pstrx => Mint64 + | Pstrx_a => Many64 + | Pstrb => Mint8unsigned + | Pstrh => Mint16unsigned + | Pstrs => Mfloat32 + | Pstrd => Mfloat64 + | Pstrd_a => Many64 + end. + +Definition interp_load (ld: load_rd_a): val -> val := + match ld with + | Pldrb X => Val.longofintu + | Pldrsb X => Val.longofint + | Pldrh X => Val.longofintu + | Pldrsh X => Val.longofint + | Pldrzw => Val.longofintu + | Pldrsw => Val.longofint + (* Changed to exhaustive list because I tend to forgot all the places I need + * to check when changing things. *) + | Pldrb W | Pldrsb W | Pldrh W | Pldrsh W + | Pldrw | Pldrw_a | Pldrx + | Pldrx_a | Pldrs | Pldrd + | Pldrd_a => fun v => v + end. + +Definition exec_load (ldi: ld_instruction) (rs: regset) (m: mem) := + match ldi with + | PLd_rd_a ld rd a => exec_load_rd_a (chunk_load ld) (interp_load ld) a rd rs m + | Pldp ld rd1 rd2 chk1 chk2 a => exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m + end. + +Definition exec_store (sti: st_instruction) (rs: regset) (m: mem) := + match sti with + | PSt_rs_a st rsr a => exec_store_rs_a (chunk_store st) a rs#rsr rs m + | Pstp st rs1 rs2 chk1 chk2 a => exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m + end. + +(** TODO: redundant w.r.t Machblock ?? *) +Lemma in_dec (lbl: label) (l: list label): { List.In lbl l } + { ~(List.In lbl l) }. +Proof. + apply List.in_dec. + apply Pos.eq_dec. +Qed. + +(** Note: copy-paste from Machblock *) +Definition is_label (lbl: label) (bb: bblock) : bool := + if in_dec lbl (header bb) then true else false. + +Lemma is_label_correct_true lbl bb: + List.In lbl (header bb) <-> is_label lbl bb = true. +Proof. + unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition. +Qed. + +Lemma is_label_correct_false lbl bb: + ~(List.In lbl (header bb)) <-> is_label lbl bb = false. +Proof. + unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition. +Qed. + +(** convert a label into a position in the code *) +Fixpoint label_pos (lbl: label) (pos: Z) (lb: bblocks) {struct lb} : option Z := + match lb with + | nil => None + | b :: lb' => if is_label lbl b then Some pos else label_pos lbl (pos + (size b)) lb' + end. + +Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) := + SOME pos <- label_pos lbl 0 (fn_blocks f) IN + match rs#PC with + | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m + | _ => Stuck + end. + +(** Evaluating a branch + +Warning: PC is assumed to be already pointing on the next instruction ! + +*) + +Definition eval_branch (f: function) (lbl: label) (rs: regset) (m: mem) (ores: option bool) := + SOME res <- ores IN + if res then goto_label f lbl rs m else Next rs m. + +Definition eval_neg_branch (f: function) (lbl: label) (rs: regset) (m: mem) (ores: option bool) := + SOME res <- ores IN + if res then Next rs m else goto_label f lbl rs m. + +Definition exec_cfi (f: function) (cfi: cf_instruction) (rs: regset) (m: mem) : outcome := + match cfi with + (** Branches *) + | Pb lbl => + goto_label f lbl rs m + | Pbc cond lbl => + eval_branch f lbl rs m (eval_testcond cond rs) + | Pbl id sg => + Next (rs#RA <- (rs#PC) #PC <- (Genv.symbol_address ge id Ptrofs.zero)) m + | Pbs id sg => + Next (rs#PC <- (Genv.symbol_address ge id Ptrofs.zero)) m + | Pblr r sg => + Next (rs#RA <- (rs#PC) #PC <- (rs#r)) m + | Pbr r sg => + Next (rs#PC <- (rs#r)) m + | Pret r => + Next (rs#PC <- (rs#r)) m + | Pcbnz sz r lbl => + eval_neg_branch f lbl rs m (eval_testzero sz rs#r) + | Pcbz sz r lbl => + eval_branch f lbl rs m (eval_testzero sz rs#r) + | Ptbnz sz r n lbl => + eval_branch f lbl rs m (eval_testbit sz rs#r n) + | Ptbz sz r n lbl => + eval_neg_branch f lbl rs m (eval_testbit sz rs#r n) + (** Pseudo-instructions *) + | Pbtbl r tbl => + match (rs#X16 <- Vundef)#r with + | Vint n => + SOME lbl <- list_nth_z tbl (Int.unsigned n) IN + goto_label f lbl (rs#X16 <- Vundef) m + | _ => Stuck + end + end. + +Definition arith_eval_p (i : arith_p) : val := + match i with + | Padrp id ofs => symbol_high lk id ofs + (** Move wide immediate *) + | Pmovz W n pos => Vint (Int.repr (Z.shiftl n pos)) + | Pmovz X n pos => Vlong (Int64.repr (Z.shiftl n pos)) + | Pmovn W n pos => Vint (Int.repr (Z.lnot (Z.shiftl n pos))) + | Pmovn X n pos => Vlong (Int64.repr (Z.lnot (Z.shiftl n pos))) + (** Floating-point move *) + | Pfmovimms f => Vsingle f + | Pfmovimmd f => Vfloat f + end. + +Definition destroy_X16 (i : arith_p) : bool := + match i with + | Pfmovimms d => negb (is_immediate_float32 d) + | Pfmovimmd d => negb (is_immediate_float64 d) + | _ => false + end. + +Definition if_opt_bool_val (c: option bool) v1 v2: val := + match c with + | Some true => v1 + | Some false => v2 + | None => Vundef + end. + +Definition arith_eval_pp i v := + match i with + | Pmov => v + | Pmovk W n pos => insert_in_int v n pos 16 + | Pmovk X n pos => insert_in_long v n pos 16 + | Paddadr id ofs => Val.addl v (symbol_low lk id ofs) + | Psbfiz W r s => Val.shl (Val.sign_ext s v) (Vint r) + | Psbfiz X r s => Val.shll (Val.sign_ext_l s v) (Vint r) + | Psbfx W r s => Val.sign_ext s (Val.shr v (Vint r)) + | Psbfx X r s => Val.sign_ext_l s (Val.shrl v (Vint r)) + | Pubfiz W r s => Val.shl (Val.zero_ext s v) (Vint r) + | Pubfiz X r s => Val.shll (Val.zero_ext_l s v) (Vint r) + | Pubfx W r s => Val.zero_ext s (Val.shru v (Vint r)) + | Pubfx X r s => Val.zero_ext_l s (Val.shrlu v (Vint r)) + | Pfmov => v + | Pfcvtds => Val.floatofsingle v + | Pfcvtsd => Val.singleoffloat v + | Pfabs S => Val.absfs v + | Pfabs D => Val.absf v + | Pfneg S => Val.negfs v + | Pfneg D => Val.negf v + | Pfcvtzs W S => Val.maketotal (Val.intofsingle v) + | Pfcvtzs W D => Val.maketotal (Val.intoffloat v) + | Pfcvtzs X S => Val.maketotal (Val.longofsingle v) + | Pfcvtzs X D => Val.maketotal (Val.longoffloat v) + | Pfcvtzu W S => Val.maketotal (Val.intuofsingle v) + | Pfcvtzu W D => Val.maketotal (Val.intuoffloat v) + | Pfcvtzu X S => Val.maketotal (Val.longuofsingle v) + | Pfcvtzu X D => Val.maketotal (Val.longuoffloat v) + | Paddimm W n => Val.add v (Vint (Int.repr n)) + | Paddimm X n => Val.addl v (Vlong (Int64.repr n)) + | Psubimm W n => Val.sub v (Vint (Int.repr n)) + | Psubimm X n => Val.subl v (Vlong (Int64.repr n)) + | Pscvtf S W => Val.maketotal (Val.singleofint v) + | Pscvtf D W => Val.maketotal (Val.floatofint v) + | Pscvtf S X => Val.maketotal (Val.singleoflong v) + | Pscvtf D X => Val.maketotal (Val.floatoflong v) + | Pucvtf S W => Val.maketotal (Val.singleofintu v) + | Pucvtf D W => Val.maketotal (Val.floatofintu v) + | Pucvtf S X => Val.maketotal (Val.singleoflongu v) + | Pucvtf D X => Val.maketotal (Val.floatoflongu v) + end. + +Definition arith_eval_ppp i v1 v2 := + match i with + | Pasrv W => Val.shr v1 v2 + | Pasrv X => Val.shrl v1 v2 + | Plslv W => Val.shl v1 v2 + | Plslv X => Val.shll v1 v2 + | Plsrv W => Val.shru v1 v2 + | Plsrv X => Val.shrlu v1 v2 + | Prorv W => Val.ror v1 v2 + | Prorv X => Val.rorl v1 v2 + | Psmulh => Val.mullhs v1 v2 + | Pumulh => Val.mullhu v1 v2 + | Psdiv W => Val.maketotal (Val.divs v1 v2) + | Psdiv X => Val.maketotal (Val.divls v1 v2) + | Pudiv W => Val.maketotal (Val.divu v1 v2) + | Pudiv X => Val.maketotal (Val.divlu v1 v2) + | Paddext x => Val.addl v1 (eval_extend v2 x) + | Psubext x => Val.subl v1 (eval_extend v2 x) + | Pfadd S => Val.addfs v1 v2 + | Pfadd D => Val.addf v1 v2 + | Pfdiv S => Val.divfs v1 v2 + | Pfdiv D => Val.divf v1 v2 + | Pfmul S => Val.mulfs v1 v2 + | Pfmul D => Val.mulf v1 v2 + | Pfsub S => Val.subfs v1 v2 + | Pfsub D => Val.subf v1 v2 + end. + +Definition arith_rr0r_isize (i: arith_rr0r) := + match i with + | Padd is _ => is + | Psub is _ => is + | Pand is _ => is + | Pbic is _ => is + | Peon is _ => is + | Peor is _ => is + | Porr is _ => is + | Porn is _ => is + end. + +(* obtain v1 by [ir0 (arith_rr0r_isize i) rs s1] *) +Definition arith_eval_rr0r i v1 v2 := + match i with + | Padd W s => Val.add v1 (eval_shift_op_int v2 s) + | Padd X s => Val.addl v1 (eval_shift_op_long v2 s) + | Psub W s => Val.sub v1 (eval_shift_op_int v2 s) + | Psub X s => Val.subl v1 (eval_shift_op_long v2 s) + | Pand W s => Val.and v1 (eval_shift_op_int v2 s) + | Pand X s => Val.andl v1 (eval_shift_op_long v2 s) + | Pbic W s => Val.and v1 (Val.notint (eval_shift_op_int v2 s)) + | Pbic X s => Val.andl v1 (Val.notl (eval_shift_op_long v2 s)) + | Peon W s => Val.xor v1 (Val.notint (eval_shift_op_int v2 s)) + | Peon X s => Val.xorl v1 (Val.notl (eval_shift_op_long v2 s)) + | Peor W s => Val.xor v1 (eval_shift_op_int v2 s) + | Peor X s => Val.xorl v1 (eval_shift_op_long v2 s) + | Porr W s => Val.or v1 (eval_shift_op_int v2 s) + | Porr X s => Val.orl v1 (eval_shift_op_long v2 s) + | Porn W s => Val.or v1 (Val.notint (eval_shift_op_int v2 s)) + | Porn X s => Val.orl v1 (Val.notl (eval_shift_op_long v2 s)) + end. + +Definition arith_rr0_isize (i : arith_rr0) := + match i with + | Pandimm is _ | Peorimm is _ | Porrimm is _ => is + end. + +(* obtain v by [ir0 (arith_rr0_isize i) rs s] *) +Definition arith_eval_rr0 i v := + match i with + | Pandimm W n => Val.and v (Vint (Int.repr n)) + | Pandimm X n => Val.andl v (Vlong (Int64.repr n)) + | Peorimm W n => Val.xor v (Vint (Int.repr n)) + | Peorimm X n => Val.xorl v (Vlong (Int64.repr n)) + | Porrimm W n => Val.or v (Vint (Int.repr n)) + | Porrimm X n => Val.orl v (Vlong (Int64.repr n)) + end. + +Definition arith_arrrr0_isize (i : arith_arrrr0) := + match i with + | Pmadd is | Pmsub is => is + end. + +(* obtain v3 by [ir0 (arith_arrrr0_isize i) rs s3] *) +Definition arith_eval_arrrr0 i v1 v2 v3 := + match i with + | Pmadd W => Val.add v3 (Val.mul v1 v2) + | Pmadd X => Val.addl v3 (Val.mull v1 v2) + | Pmsub W => Val.sub v3 (Val.mul v1 v2) + | Pmsub X => Val.subl v3 (Val.mull v1 v2) + end. + +Definition arith_prepare_comparison_pp i (v1 v2 : val) := + match i with + | Pcmpext x => (v1, (eval_extend v2 x)) + | Pcmnext x => (v1, (Val.negl (eval_extend v2 x))) + | Pfcmp _ => (v1, v2) + end. + +Definition arith_comparison_r0r_isize i := + match i with + | Pcmp is _ => is + | Pcmn is _ => is + | Ptst is _ => is + end. + +Definition arith_prepare_comparison_r0r i v1 v2 := + match i with + | Pcmp W s => (v1, (eval_shift_op_int v2 s)) + | Pcmp X s => (v1, (eval_shift_op_long v2 s)) + | Pcmn W s => (v1, (Val.neg (eval_shift_op_int v2 s))) + | Pcmn X s => (v1, (Val.negl (eval_shift_op_long v2 s))) + | Ptst W s => ((Val.and v1 (eval_shift_op_int v2 s)), (Vint Int.zero)) + | Ptst X s => ((Val.andl v1 (eval_shift_op_long v2 s)), (Vlong Int64.zero)) + end. + +Definition arith_prepare_comparison_p i v := + match i with + | Pcmpimm W n => (v, (Vint (Int.repr n))) + | Pcmpimm X n => (v, (Vlong (Int64.repr n))) + | Pcmnimm W n => (v, (Vint (Int.neg (Int.repr n)))) + | Pcmnimm X n => (v, (Vlong (Int64.neg (Int64.repr n)))) + | Ptstimm W n => ((Val.and v (Vint (Int.repr n))), (Vint Int.zero)) + | Ptstimm X n => ((Val.andl v (Vlong (Int64.repr n))), (Vlong Int64.zero)) + | Pfcmp0 S => (v, (Vsingle Float32.zero)) + | Pfcmp0 D => (v, (Vfloat Float.zero)) + end. + +Definition arith_comparison_pp_compare i := + match i with + | Pcmpext _ | Pcmnext _ => compare_long + | Pfcmp S => compare_single + | Pfcmp D => compare_float + end. + +Definition arith_comparison_p_compare i := + match i with + | Pcmpimm W _ | Pcmnimm W _ | Ptstimm W _ => compare_int + | Pcmpimm X _ | Pcmnimm X _ | Ptstimm X _ => compare_long + | Pfcmp0 S => compare_single + | Pfcmp0 D => compare_float + end. + +Definition exec_arith_instr (ai: ar_instruction) (rs: regset): regset := + match ai with + | PArithP i d => + let rs' := rs#d <- (arith_eval_p i) in + if destroy_X16 i then rs'#X16 <- Vundef else rs' + | PArithPP i d s => rs#d <- (arith_eval_pp i rs#s) + | PArithPPP i d s1 s2 => rs#d <- (arith_eval_ppp i rs#s1 rs#s2) + + | PArithRR0R i d s1 s2 => rs#d <- (arith_eval_rr0r i (ir0 (arith_rr0r_isize i) rs s1) rs#s2) + + | PArithRR0 i d s => rs#d <- (arith_eval_rr0 i (ir0 (arith_rr0_isize i) rs s)) + + | PArithARRRR0 i d s1 s2 s3 => + rs#d <- (arith_eval_arrrr0 i rs#s1 rs#s2 (ir0 (arith_arrrr0_isize i) rs s3)) + + | PArithComparisonPP i s1 s2 => + let (v1, v2) := arith_prepare_comparison_pp i rs#s1 rs#s2 in + arith_comparison_pp_compare i rs v1 v2 + | PArithComparisonR0R i s1 s2 => + let is := arith_comparison_r0r_isize i in + let (v1, v2) := arith_prepare_comparison_r0r i (ir0 is rs s1) rs#s2 in + (if is (* is W *) then compare_int else compare_long) rs v1 v2 + | PArithComparisonP i s => + let (v1, v2) := arith_prepare_comparison_p i rs#s in + arith_comparison_p_compare i rs v1 v2 + | Pcset d c => rs#d <- (if_opt_bool_val (eval_testcond c rs) (Vint Int.one) (Vint Int.zero)) + | Pfmovi S d s => rs#d <- (float32_of_bits rs##s) + | Pfmovi D d s => rs#d <- (float64_of_bits rs###s) + | Pcsel d s1 s2 c => rs#d <- (if_opt_bool_val (eval_testcond c rs) (rs#s1) (rs#s2)) + | Pfnmul S d s1 s2 => rs#d <- (Val.negfs (Val.mulfs rs#s1 rs#s2)) + | Pfnmul D d s1 s2 => rs#d <- (Val.negf (Val.mulf rs#s1 rs#s2)) + end. + +(* basic exec *) +Definition exec_basic (b: basic) (rs: regset) (m: mem): outcome := + match b with + | PArith ai => Next (exec_arith_instr ai rs) m + | PLoad ldi => exec_load ldi rs m + | PStore sti => exec_store sti rs m + | Pallocframe sz pos => + let (m1, stk) := Mem.alloc m 0 sz in + let sp := (Vptr stk Ptrofs.zero) in + SOME m2 <- Mem.storev Mint64 m1 (Val.offset_ptr sp pos) rs#SP IN + Next (rs #X29 <- (rs#SP) #SP <- sp #X16 <- Vundef) m2 + | Pfreeframe sz pos => + SOME v <- Mem.loadv Mint64 m (Val.offset_ptr rs#SP pos) IN + match rs#SP with + | Vptr stk ofs => + SOME m' <- Mem.free m stk 0 sz IN + Next (rs#SP <- v #X16 <- Vundef) m' + | _ => Stuck + end + | Ploadsymbol rd id => + Next (rs#rd <- (Genv.symbol_address ge id Ptrofs.zero)) m + | Pcvtsw2x rd r1 => + Next (rs#rd <- (Val.longofint rs#r1)) m + | Pcvtuw2x rd r1 => + Next (rs#rd <- (Val.longofintu rs#r1)) m + | Pcvtx2w rd => + Next (rs#rd <- (Val.loword rs#rd)) m + | Pnop => Next rs m + end. + +(** execution of the body of a bblock *) +Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome := + match body with + | nil => Next rs m + | bi::body' => + SOME o <- exec_basic bi rs m IN + exec_body body' (_rs o) (_m o) + end. + +Definition incrPC size_b (rs: regset) := + rs#PC <- (Val.offset_ptr rs#PC size_b). + +Definition estep (f: function) oc size_b (rs: regset) (m: mem) := + match oc with + | Some (PCtlFlow cfi) => exec_cfi f cfi (incrPC size_b rs) m + | Some (Pbuiltin ef args res) => Next (incrPC size_b rs) m + | None => Next (incrPC size_b rs) m + end. + +(** execution of the exit instruction of a bblock *) +Inductive exec_exit (f: function) size_b (rs: regset) (m: mem): (option control) -> trace -> regset -> mem -> Prop := + | none_step: + exec_exit f size_b rs m None E0 (incrPC size_b rs) m + | cfi_step (cfi: cf_instruction) rs' m': + exec_cfi f cfi (incrPC size_b rs) m = Next rs' m' -> + exec_exit f size_b rs m (Some (PCtlFlow cfi)) E0 rs' m' + | builtin_step ef args res vargs t vres rs' m': + eval_builtin_args ge (fun (r: dreg) => rs r) rs#SP m args vargs -> + external_call ef ge vargs m t vres m' -> + rs' = incrPC size_b + (set_res (map_builtin_res DR res) vres + (undef_regs (DR (IR X16) :: DR (IR X30) :: map preg_of (destroyed_by_builtin ef)) rs)) -> + exec_exit f size_b rs m (Some (Pbuiltin ef args res)) t rs' m' + . + +(*Definition bbstep f cfi size_b bdy rs m :=*) + (*match exec_body bdy rs m with*) + (*| Some (State rs' m') => estep f cfi size_b rs' m'*) + (*| Stuck => Stuck*) + (*end.*) + +Definition bbstep f bb rs m := + match exec_body (body bb) rs m with + | Some (State rs' m') => estep f (exit bb) (Ptrofs.repr (size bb)) rs' m' + | Stuck => Stuck + end. + +Definition exec_bblock (f: function) (b: bblock) (rs: regset) (m: mem) (t:trace) (rs':regset) (m':mem): Prop + := exists rs1 m1, exec_body (body b) rs m = Next rs1 m1 /\ exec_exit f (Ptrofs.repr (size b)) rs1 m1 (exit b) t rs' m'. + +Fixpoint find_bblock (pos: Z) (lb: bblocks) {struct lb} : option bblock := + match lb with + | nil => None + | b :: il => + if zlt pos 0 then None (* NOTE: It is impossible to branch inside a block *) + else if zeq pos 0 then Some b + else find_bblock (pos - (size b)) il + end. + +(** Execution of the instruction at [rs PC]. *) + +Inductive step: state -> trace -> state -> Prop := + | exec_step_internal: + forall b ofs f bi rs m t rs' m', + rs PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal f) -> + find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bi -> + exec_bblock f bi rs m t rs' m' -> + step (State rs m) t (State rs' m') + | exec_step_external: + forall b ef args res rs m t rs' m', + rs PC = Vptr b Ptrofs.zero -> + Genv.find_funct_ptr ge b = Some (External ef) -> + external_call ef ge args m t res m' -> + extcall_arguments rs m (ef_sig ef) args -> + rs' = (set_pair (loc_external_result (ef_sig ef) ) res (undef_caller_save_regs rs))#PC <- (rs RA) -> + step (State rs m) t (State rs' m') + . + + +End RELSEM. + + +(** Execution of whole programs. *) + +Inductive initial_state (p: program): state -> Prop := + | initial_state_intro: forall m0, + Genv.init_mem p = Some m0 -> + let ge := Genv.globalenv p in + let rs0 := + (Pregmap.init Vundef) + # PC <- (Genv.symbol_address ge p.(prog_main) Ptrofs.zero) + # RA <- Vnullptr + # SP <- Vnullptr in + initial_state p (State rs0 m0). + +Inductive final_state: state -> int -> Prop := + | final_state_intro: forall rs m r, + rs#PC = Vnullptr -> + rs#X0 = Vint r -> + final_state (State rs m) r. + +Definition semantics (lk: aarch64_linker) (p: program) := + Semantics (step lk) (initial_state p) final_state (Genv.globalenv p). diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v new file mode 100644 index 00000000..5cd049c5 --- /dev/null +++ b/aarch64/Asmblockdeps.v @@ -0,0 +1,2688 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** * Translation from [Asmblock] to [AbstractBB] *) + +(** We define a specific instance [L] of [AbstractBB] and translate [bblocks] from [Asmblock] into [L]. + [AbstractBB] will then define a sequential semantics for [L]. + We prove a bisimulation between the sequential semantics of [L] and [Asmblock]. + Then, the checker on [Asmblock] is deduced from those of [L]. + *) + +Require Import AST. +Require Import Asm Asmblock. +Require Import Asmblockprops. +Require Import Values. +Require Import Globalenvs. +Require Import Memory. +Require Import Errors. +Require Import Integers. +Require Import Floats. +Require Import ZArith. +Require Import Coqlib. +Require Import ImpSimuTest. +Require Import Axioms. +Require Import Permutation. +Require Import Events. + +Require Import Lia. + +Import ListNotations. +Local Open Scope list_scope. + +Open Scope impure. +(** auxiliary treatments of builtins *) + +Definition is_builtin(ex: option control): bool := + match ex with + | Some (Pbuiltin _ _ _) => true + | _ => false + end. + +Definition has_builtin(bb: bblock): bool := + is_builtin (exit bb). + +Remark builtin_arg_eq_dreg: forall (a b: builtin_arg dreg), {a=b} + {a<>b}. +Proof. + intros. + apply (builtin_arg_eq dreg_eq). +Qed. + +Remark builtin_res_eq_dreg: forall (a b: builtin_res dreg), {a=b} + {a<>b}. +Proof. + intros. + apply (builtin_res_eq dreg_eq). +Qed. + +Definition assert_same_builtin (bb1 bb2: bblock): ?? unit := + match exit bb1 with + | Some (Pbuiltin ef1 lbar1 brr1) => + match exit bb2 with + | Some (Pbuiltin ef2 lbar2 brr2) => + if (external_function_eq ef1 ef2) then + if (list_eq_dec builtin_arg_eq_dreg lbar1 lbar2) then + if (builtin_res_eq_dreg brr1 brr2) then RET tt + else FAILWITH "Different brr in Pbuiltin" + else FAILWITH "Different lbar in Pbuiltin" + else FAILWITH "Different ef in Pbuiltin" + | _ => FAILWITH "Expected a builtin: found something else" (* XXX: on peut raffiner le message d'erreur si nécessaire *) + end + | _ => match exit bb2 with + | Some (Pbuiltin ef2 lbar2 brr2) => FAILWITH "Expected a something else: found a builtin" + | _ => RET tt (* ok *) + end + end. + +Lemma assert_same_builtin_correct (bb1 bb2: bblock): + WHEN assert_same_builtin bb1 bb2 ~> _ THEN + has_builtin bb1 = true \/ has_builtin bb2 = true -> exit bb1 = exit bb2. +Proof. + unfold assert_same_builtin, has_builtin. + destruct (exit bb1) as [[]|]; simpl; + destruct (exit bb2) as [[]|]; wlp_simplify; try congruence. +Qed. +Global Opaque assert_same_builtin. +Local Hint Resolve assert_same_builtin_correct: wlp. + +(** Definition of [L] *) + +Module P<: ImpParam. +Module R := Pos. + +Section IMPPARAM. + +Definition env := Genv.t fundef unit. + +Record genv_wrap := { _genv: env; _fn: function; _lk: aarch64_linker }. +Definition genv := genv_wrap. + +Variable Ge: genv. + +Inductive value_wrap := + | Val (v: val) + | Memstate (m: mem) + | Bool (b: bool) +. + +Definition value := value_wrap. + +Record CRflags := { _CN: val; _CZ:val; _CC: val; _CV: val }. + +Inductive control_op := + | Ob (l: label) + | Obc (c: testcond) (l: label) + | Obl (id: ident) + | Obs (id: ident) + | Ocbnz (sz: isize) (l: label) + | Ocbz (sz: isize) (l: label) + | Otbnz (sz: isize) (n: int) (l: label) + | Otbz (sz: isize) (n: int) (l: label) + | Obtbl (l: list label) + | OError + | OIncremPC (sz: Z) +. + +Inductive arith_op := + | OArithP (n: arith_p) + | OArithPP (n: arith_pp) + | OArithPPP (n: arith_ppp) + | OArithRR0R (n: arith_rr0r) + | OArithRR0R_XZR (n: arith_rr0r) (vz: val) + | OArithRR0 (n: arith_rr0) + | OArithRR0_XZR (n: arith_rr0) (vz: val) + | OArithARRRR0 (n: arith_arrrr0) + | OArithARRRR0_XZR (n: arith_arrrr0) (vz: val) + | OArithComparisonPP_CN (n: arith_comparison_pp) + | OArithComparisonPP_CZ (n: arith_comparison_pp) + | OArithComparisonPP_CC (n: arith_comparison_pp) + | OArithComparisonPP_CV (n: arith_comparison_pp) + | OArithComparisonR0R_CN (n: arith_comparison_r0r) (is: isize) + | OArithComparisonR0R_CZ (n: arith_comparison_r0r) (is: isize) + | OArithComparisonR0R_CC (n: arith_comparison_r0r) (is: isize) + | OArithComparisonR0R_CV (n: arith_comparison_r0r) (is: isize) + | OArithComparisonR0R_CN_XZR (n: arith_comparison_r0r) (is: isize) (vz: val) + | OArithComparisonR0R_CZ_XZR (n: arith_comparison_r0r) (is: isize) (vz: val) + | OArithComparisonR0R_CC_XZR (n: arith_comparison_r0r) (is: isize) (vz: val) + | OArithComparisonR0R_CV_XZR (n: arith_comparison_r0r) (is: isize) (vz: val) + | OArithComparisonP_CN (n: arith_comparison_p) + | OArithComparisonP_CZ (n: arith_comparison_p) + | OArithComparisonP_CC (n: arith_comparison_p) + | OArithComparisonP_CV (n: arith_comparison_p) + | Ocset (c: testcond) + | Ofmovi (fsz: fsize) + | Ofmovi_XZR (fsz: fsize) + | Ocsel (c: testcond) + | Ofnmul (fsz: fsize) +. + +Inductive store_op := + | Ostore1 (st: store_rs_a) (chunk: memory_chunk) (a: addressing) + | Ostore2 (st: store_rs_a) (chunk: memory_chunk) (a: addressing) + | OstoreU (st: store_rs_a) (chunk: memory_chunk) (a: addressing) +. + +Inductive load_op := + | Oload1 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) + | Oload2 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) + | OloadU (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) +. + +Inductive allocf_op := + | OAllocf_SP (sz: Z) (linkofs: ptrofs) + | OAllocf_Mem (sz: Z) (linkofs: ptrofs) +. + +Inductive freef_op := + | OFreef_SP (sz: Z) (linkofs: ptrofs) + | OFreef_Mem (sz: Z) (linkofs: ptrofs) +. + +Inductive op_wrap := + (* arithmetic operation *) + | Arith (op: arith_op) + | Load (ld: load_op) + | Store (st: store_op) + | Allocframe (al: allocf_op) + | Freeframe (fr: freef_op) + | Loadsymbol (id: ident) + | Cvtsw2x + | Cvtuw2x + | Cvtx2w + | Control (co: control_op) + | Constant (v: val) +. + +Definition op:=op_wrap. + +Coercion Arith: arith_op >-> op_wrap. +Coercion Control: control_op >-> op_wrap. + +Definition v_compare_int (v1 v2: val) : CRflags := + {| _CN := (Val.negative (Val.sub v1 v2)); + _CZ := (Val.mxcmpu Ceq v1 v2); + _CC := (Val.mxcmpu Cge v1 v2); + _CV := (Val.sub_overflow v1 v2) |}. + +Definition v_compare_long (v1 v2: val) : CRflags := + {| _CN := (Val.negativel (Val.subl v1 v2)); + _CZ := (Val.mxcmplu Ceq v1 v2); + _CC := (Val.mxcmplu Cge v1 v2); + _CV := (Val.subl_overflow v1 v2) |}. + +Definition v_compare_float (v1 v2: val) : CRflags := + match v1, v2 with + | Vfloat f1, Vfloat f2 => + {| _CN := (Val.of_bool (Float.cmp Clt f1 f2)); + _CZ := (Val.of_bool (Float.cmp Ceq f1 f2)); + _CC := (Val.of_bool (negb (Float.cmp Clt f1 f2))); + _CV := (Val.of_bool (negb (Float.ordered f1 f2))) |} + | _, _ => + {| _CN := Vundef; + _CZ := Vundef; + _CC := Vundef; + _CV := Vundef |} + end. + +Definition v_compare_single (v1 v2: val) : CRflags := + match v1, v2 with + | Vsingle f1, Vsingle f2 => + {| _CN := (Val.of_bool (Float32.cmp Clt f1 f2)); + _CZ := (Val.of_bool (Float32.cmp Ceq f1 f2)); + _CC := (Val.of_bool (negb (Float32.cmp Clt f1 f2))); + _CV := (Val.of_bool (negb (Float32.ordered f1 f2))) |} + | _, _ => + {| _CN := Vundef; + _CZ := Vundef; + _CC := Vundef; + _CV := Vundef |} + end. + +Definition arith_eval_comparison_pp (n: arith_comparison_pp) (v1 v2: val) := + let (v1',v2') := arith_prepare_comparison_pp n v1 v2 in + match n with + | Pcmpext _ | Pcmnext _ => v_compare_long v1' v2' + | Pfcmp S => v_compare_single v1' v2' + | Pfcmp D => v_compare_float v1' v2' + end. + +Definition arith_eval_comparison_p (n: arith_comparison_p) (v: val) := + let (v1',v2') := arith_prepare_comparison_p n v in + match n with + | Pcmpimm W _ | Pcmnimm W _ | Ptstimm W _ => v_compare_int v1' v2' + | Pcmpimm X _ | Pcmnimm X _ | Ptstimm X _ => v_compare_long v1' v2' + | Pfcmp0 S => v_compare_single v1' v2' + | Pfcmp0 D => v_compare_float v1' v2' + end. + +Definition arith_eval_comparison_r0r (n: arith_comparison_r0r) (v1 v2: val) (is: isize) := + let (v1',v2') := arith_prepare_comparison_r0r n v1 v2 in + if is then v_compare_int v1' v2' else v_compare_long v1' v2'. + +Definition flags_testcond_value (c: testcond) (vCN vCZ vCC vCV: val) := + match c with + | TCeq => (**r equal *) + match vCZ with + | Vint n => Some (Int.eq n Int.one) + | _ => None + end + | TCne => (**r not equal *) + match vCZ with + | Vint n => Some (Int.eq n Int.zero) + | _ => None + end + | TClo => (**r unsigned less than *) + match vCC with + | Vint n => Some (Int.eq n Int.zero) + | _ => None + end + | TCls => (**r unsigned less or equal *) + match vCC, vCZ with + | Vint c, Vint z => Some (Int.eq c Int.zero || Int.eq z Int.one) + | _, _ => None + end + | TChs => (**r unsigned greater or equal *) + match vCC with + | Vint n => Some (Int.eq n Int.one) + | _ => None + end + | TChi => (**r unsigned greater *) + match vCC, vCZ with + | Vint c, Vint z => Some (Int.eq c Int.one && Int.eq z Int.zero) + | _, _ => None + end + | TClt => (**r signed less than *) + match vCV, vCN with + | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.one) + | _, _ => None + end + | TCle => (**r signed less or equal *) + match vCV, vCN, vCZ with + | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.one || Int.eq z Int.one) + | _, _, _ => None + end + | TCge => (**r signed greater or equal *) + match vCV, vCN with + | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.zero) + | _, _ => None + end + | TCgt => (**r signed greater *) + match vCV, vCN, vCZ with + | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.zero && Int.eq z Int.zero) + | _, _, _ => None + end + | TCpl => (**r positive *) + match vCN with + | Vint n => Some (Int.eq n Int.zero) + | _ => None + end + | TCmi => (**r negative *) + match vCN with + | Vint n => Some (Int.eq n Int.one) + | _ => None + end + end. + +(* The is argument is used to identify the source inst and avoid rewriting some code + 0 -> Ocset + 1 -> Ocsel + 2 -> Obc *) +Definition cond_eval_is (c: testcond) (v1 v2 vCN vCZ vCC vCV: val) (is: Z) := + let res := flags_testcond_value c vCN vCZ vCC vCV in + match is, res with + | 0, res => Some (Val (if_opt_bool_val res (Vint Int.one) (Vint Int.zero))) + | 1, res => Some (Val (if_opt_bool_val res v1 v2)) + | 2, Some b => Some (Bool (b)) + | _, _ => None + end. + +Definition fmovi_eval (fsz: fsize) (v: val) := + match fsz with + | S => float32_of_bits v + | D => float64_of_bits v + end. + +Definition fmovi_eval_xzr (fsz: fsize) := + match fsz with + | S => float32_of_bits (Vint Int.zero) + | D => float64_of_bits (Vlong Int64.zero) + end. + +Definition fnmul_eval (fsz: fsize) (v1 v2: val) := + match fsz with + | S => Val.negfs (Val.mulfs v1 v2) + | D => Val.negf (Val.mulf v1 v2) + end. + +Definition cflags_eval (c: testcond) (l: list value) (v1 v2: val) (is: Z) := + match c, l with + | TCeq, [Val vCZ] => cond_eval_is TCeq v1 v2 Vundef vCZ Vundef Vundef is + | TCne, [Val vCZ] => cond_eval_is TCne v1 v2 Vundef vCZ Vundef Vundef is + | TChs, [Val vCC] => cond_eval_is TChs v1 v2 Vundef Vundef vCC Vundef is + | TClo, [Val vCC] => cond_eval_is TClo v1 v2 Vundef Vundef vCC Vundef is + | TCmi, [Val vCN] => cond_eval_is TCmi v1 v2 vCN Vundef Vundef Vundef is + | TCpl, [Val vCN] => cond_eval_is TCpl v1 v2 vCN Vundef Vundef Vundef is + | TChi, [Val vCZ; Val vCC] => cond_eval_is TChi v1 v2 Vundef vCZ vCC Vundef is + | TCls, [Val vCZ; Val vCC] => cond_eval_is TCls v1 v2 Vundef vCZ vCC Vundef is + | TCge, [Val vCN; Val vCV] => cond_eval_is TCge v1 v2 vCN Vundef Vundef vCV is + | TClt, [Val vCN; Val vCV] => cond_eval_is TClt v1 v2 vCN Vundef Vundef vCV is + | TCgt, [Val vCN; Val vCZ; Val vCV] => cond_eval_is TCgt v1 v2 vCN vCZ Vundef vCV is + | TCle, [Val vCN; Val vCZ; Val vCV] => cond_eval_is TCle v1 v2 vCN vCZ Vundef vCV is + | _, _ => None + end. + +Definition arith_op_eval (op: arith_op) (l: list value) := + match op, l with + | OArithP n, [] => Some (Val (arith_eval_p Ge.(_lk) n)) + | OArithPP n, [Val v] => Some (Val (arith_eval_pp Ge.(_lk) n v)) + | OArithPPP n, [Val v1; Val v2] => Some (Val (arith_eval_ppp n v1 v2)) + | OArithRR0R n, [Val v1; Val v2] => Some (Val (arith_eval_rr0r n v1 v2)) + | OArithRR0R_XZR n vz, [Val v] => Some (Val (arith_eval_rr0r n vz v)) + | OArithRR0 n, [Val v] => Some (Val (arith_eval_rr0 n v)) + | OArithRR0_XZR n vz, [] => Some (Val (arith_eval_rr0 n vz)) + | OArithARRRR0 n, [Val v1; Val v2; Val v3] => Some (Val (arith_eval_arrrr0 n v1 v2 v3)) + | OArithARRRR0_XZR n vz, [Val v1; Val v2] => Some (Val (arith_eval_arrrr0 n v1 v2 vz)) + | OArithComparisonPP_CN n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CN))) + | OArithComparisonPP_CZ n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CZ))) + | OArithComparisonPP_CC n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CC))) + | OArithComparisonPP_CV n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CV))) + | OArithComparisonR0R_CN n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CN))) + | OArithComparisonR0R_CZ n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CZ))) + | OArithComparisonR0R_CC n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CC))) + | OArithComparisonR0R_CV n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CV))) + | OArithComparisonR0R_CN_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CN))) + | OArithComparisonR0R_CZ_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CZ))) + | OArithComparisonR0R_CC_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CC))) + | OArithComparisonR0R_CV_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CV))) + | OArithComparisonP_CN n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CN))) + | OArithComparisonP_CZ n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CZ))) + | OArithComparisonP_CC n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CC))) + | OArithComparisonP_CV n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CV))) + | Ocset c, l => cflags_eval c l Vundef Vundef 0 + | Ofmovi fsz, [Val v] => Some (Val (fmovi_eval fsz v)) + | Ofmovi_XZR fsz, [] => Some (Val (fmovi_eval_xzr fsz)) + | Ocsel c, Val v1 :: Val v2 :: l' => cflags_eval c l' v1 v2 1 + | Ofnmul fsz, [Val v1; Val v2] => Some (Val (fnmul_eval fsz v1 v2)) + | _, _ => None + end. + +Definition call_ll_storev (c: memory_chunk) (m: mem) (v: option val) (vs: val) := + match v with + | Some va => match Mem.storev c m va vs with + | Some m' => Some (Memstate m') + | None => None + end + | None => None (* should never occurs *) + end. + +Definition exec_store1 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs: val) := + let v := + match a with + | ADimm _ n => Some (Val.addl vs (Vlong n)) + | ADadr _ id ofs => Some (Val.addl vs (symbol_low Ge.(_lk) id ofs)) + | _ => None + end in + call_ll_storev chunk m v vr. + +Definition exec_store2 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs1 vs2: val) := + let v := + match a with + | ADreg _ _ => Some (Val.addl vs1 vs2) + | ADlsl _ _ n => Some (Val.addl vs1 (Val.shll vs2 (Vint n))) + | ADsxt _ _ n => Some (Val.addl vs1 (Val.shll (Val.longofint vs2) (Vint n))) + | ADuxt _ _ n => Some (Val.addl vs1 (Val.shll (Val.longofintu vs2) (Vint n))) + | _ => None + end in + call_ll_storev chunk m v vr. + +Definition exec_storeU (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr: val) := + call_ll_storev chunk m None vr. + +Definition goto_label_deps (f: function) (lbl: label) (vpc: val) := + match label_pos lbl 0 (fn_blocks f) with + | None => None + | Some pos => + match vpc with + | Vptr b ofs => Some (Val (Vptr b (Ptrofs.repr pos))) + | _ => None + end + end. + +Definition control_eval (o: control_op) (l: list value) := + let (ge, fn, lk) := Ge in + match o, l with + | Ob lbl, [Val vpc] => goto_label_deps fn lbl vpc + | Obc c lbl, Val vpc :: l' => match cflags_eval c l' Vundef Vundef 2 with + | Some (Bool true) => goto_label_deps fn lbl vpc + | Some (Bool false) => Some (Val vpc) + | _ => None + end + | Obl id, [] => Some (Val (Genv.symbol_address Ge.(_genv) id Ptrofs.zero)) + | Obs id, [] => Some (Val (Genv.symbol_address Ge.(_genv) id Ptrofs.zero)) + | Ocbnz sz lbl, [Val v; Val vpc] => match eval_testzero sz v with + | Some (true) => Some (Val vpc) + | Some (false) => goto_label_deps fn lbl vpc + | None => None + end + | Ocbz sz lbl, [Val v; Val vpc] => match eval_testzero sz v with + | Some (true) => goto_label_deps fn lbl vpc + | Some (false) => Some (Val vpc) + | None => None + end + | Otbnz sz n lbl, [Val v; Val vpc] => match eval_testbit sz v n with + | Some (true) => goto_label_deps fn lbl vpc + | Some (false) => Some (Val vpc) + | None => None + end + | Otbz sz n lbl, [Val v; Val vpc] => match eval_testbit sz v n with + | Some (true) => Some (Val vpc) + | Some (false) => goto_label_deps fn lbl vpc + | None => None + end + | Obtbl tbl, [Val index; Val vpc] => match index with + | Vint n => + match list_nth_z tbl (Int.unsigned n) with + | None => None + | Some lbl => goto_label_deps fn lbl vpc + end + | _ => None + end + | OIncremPC sz, [Val vpc] => Some (Val (Val.offset_ptr vpc (Ptrofs.repr sz))) + | OError, _ => None + | _, _ => None + end. + +Definition store_eval (o: store_op) (l: list value) := + match o, l with + | Ostore1 st chunk a, [Val vr; Val vs; Memstate m] => exec_store1 st m chunk a vr vs + | Ostore2 st chunk a, [Val vr; Val vs1; Val vs2; Memstate m] => exec_store2 st m chunk a vr vs1 vs2 + | OstoreU st chunk a, [Val vr; Memstate m] => exec_storeU st m chunk a vr + | _, _ => None + end. + +Definition call_ll_loadv (c: memory_chunk) (transf: val -> val) (m: mem) (v: option val) := + match v with + | Some va => match Mem.loadv c m va with + | Some v' => Some (Val (transf v')) + | None => None + end + | None => None (* should never occurs *) + end. + +Definition exec_load1 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl: val) := + let v := + match a with + | ADimm _ n => Some (Val.addl vl (Vlong n)) + | ADadr _ id ofs => Some (Val.addl vl (symbol_low Ge.(_lk) id ofs)) + | _ => None + end in + call_ll_loadv chunk (interp_load ld) m v. + +Definition exec_load2 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl1 vl2: val) := + let v := + match a with + | ADreg _ _ => Some (Val.addl vl1 vl2) + | ADlsl _ _ n => Some (Val.addl vl1 (Val.shll vl2 (Vint n))) + | ADsxt _ _ n => Some (Val.addl vl1 (Val.shll (Val.longofint vl2) (Vint n))) + | ADuxt _ _ n => Some (Val.addl vl1 (Val.shll (Val.longofintu vl2) (Vint n))) + | _ => None + end in + call_ll_loadv chunk (interp_load ld) m v. + +Definition exec_loadU (n: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) := + call_ll_loadv chunk (interp_load n) m None. + +Definition load_eval (o: load_op) (l: list value) := + match o, l with + | Oload1 ld chunk a, [Val vs; Memstate m] => exec_load1 ld m chunk a vs + | Oload2 ld chunk a, [Val vs1; Val vs2; Memstate m] => exec_load2 ld m chunk a vs1 vs2 + | OloadU st chunk a, [Memstate m] => exec_loadU st m chunk a + | _, _ => None + end. + +Definition eval_allocf (o: allocf_op) (l: list value) := + match o, l with + | OAllocf_Mem sz linkofs, [Val spv; Memstate m] => + let (m1, stk) := Mem.alloc m 0 sz in + let sp := (Vptr stk Ptrofs.zero) in + call_ll_storev Mint64 m1 (Some (Val.offset_ptr sp linkofs)) spv + | OAllocf_SP sz linkofs, [Val spv; Memstate m] => + let (m1, stk) := Mem.alloc m 0 sz in + let sp := (Vptr stk Ptrofs.zero) in + match call_ll_storev Mint64 m1 (Some (Val.offset_ptr sp linkofs)) spv with + | None => None + | Some ms => Some (Val sp) + end + | _, _ => None + end. + +Definition eval_freef (o: freef_op) (l: list value) := + match o, l with + | OFreef_Mem sz linkofs, [Val spv; Memstate m] => + match call_ll_loadv Mint64 (fun v => v) m (Some (Val.offset_ptr spv linkofs)) with + | None => None + | Some v => + match spv with + | Vptr stk ofs => + match Mem.free m stk 0 sz with + | None => None + | Some m' => Some (Memstate m') + end + | _ => None + end + end + | OFreef_SP sz linkofs, [Val spv; Memstate m] => + match call_ll_loadv Mint64 (fun v => v) m (Some (Val.offset_ptr spv linkofs)) with + | None => None + | Some v => + match spv with + | Vptr stk ofs => + match Mem.free m stk 0 sz with + | None => None + | Some m' => Some (v) + end + | _ => None + end + end + | _, _ => None + end. + +Definition op_eval (op: op) (l:list value) := + match op, l with + | Arith op, l => arith_op_eval op l + | Load o, l => load_eval o l + | Store o, l => store_eval o l + | Allocframe o, l => eval_allocf o l + | Freeframe o, l => eval_freef o l + | Loadsymbol id, [] => Some (Val (Genv.symbol_address Ge.(_genv) id Ptrofs.zero)) + | Cvtsw2x, [Val v] => Some (Val (Val.longofint v)) + | Cvtuw2x, [Val v] => Some (Val (Val.longofintu v)) + | Cvtx2w, [Val v] => Some (Val (Val.loword v)) + | Control o, l => control_eval o l + | Constant v, [] => Some (Val v) + | _, _ => None + end. + +Definition vz_eq (vz1 vz2: val) : ?? bool := + RET (match vz1 with + | Vint i1 => match vz2 with + | Vint i2 => Int.eq i1 i2 + | _ => false + end + | Vlong l1 => match vz2 with + | Vlong l2 => Int64.eq l1 l2 + | _ => false + end + | _ => false + end). + +Lemma vz_eq_correct vz1 vz2: + WHEN vz_eq vz1 vz2 ~> b THEN b = true -> vz1 = vz2. +Proof. + wlp_simplify. + destruct vz1; destruct vz2; trivial; try discriminate. + - eapply f_equal; apply Int.same_if_eq; auto. + - eapply f_equal. apply Int64.same_if_eq; auto. +Qed. +Hint Resolve vz_eq_correct: wlp. + +Definition is_eq (is1 is2: isize) : ?? bool := + RET (match is1 with + | W => match is2 with + | W => true + | _ => false + end + | X => match is2 with + | X => true + | _ => false + end + end). + +Lemma is_eq_correct is1 is2: + WHEN is_eq is1 is2 ~> b THEN b = true -> is1 = is2. +Proof. + wlp_simplify; destruct is1; destruct is2; trivial; try discriminate. +Qed. +Hint Resolve is_eq_correct: wlp. + +Definition arith_op_eq (o1 o2: arith_op): ?? bool := + match o1 with + | OArithP n1 => + match o2 with OArithP n2 => phys_eq n1 n2 | _ => RET false end + | OArithPP n1 => + match o2 with OArithPP n2 => phys_eq n1 n2 | _ => RET false end + | OArithPPP n1 => + match o2 with OArithPPP n2 => phys_eq n1 n2 | _ => RET false end + | OArithRR0R n1 => + match o2 with OArithRR0R n2 => phys_eq n1 n2 | _ => RET false end + | OArithRR0R_XZR n1 vz1 => + match o2 with OArithRR0R_XZR n2 vz2 => iandb (phys_eq n1 n2) (vz_eq vz1 vz2) | _ => RET false end + | OArithRR0 n1 => + match o2 with OArithRR0 n2 => phys_eq n1 n2 | _ => RET false end + | OArithRR0_XZR n1 vz1 => + match o2 with OArithRR0_XZR n2 vz2 => iandb (phys_eq n1 n2) (vz_eq vz1 vz2) | _ => RET false end + | OArithARRRR0 n1 => + match o2 with OArithARRRR0 n2 => phys_eq n1 n2 | _ => RET false end + | OArithARRRR0_XZR n1 vz1 => + match o2 with OArithARRRR0_XZR n2 vz2 => iandb (phys_eq n1 n2) (vz_eq vz1 vz2) | _ => RET false end + | OArithComparisonPP_CN n1 => + match o2 with OArithComparisonPP_CN n2 => phys_eq n1 n2 | _ => RET false end + | OArithComparisonPP_CZ n1 => + match o2 with OArithComparisonPP_CZ n2 => phys_eq n1 n2 | _ => RET false end + | OArithComparisonPP_CC n1 => + match o2 with OArithComparisonPP_CC n2 => phys_eq n1 n2 | _ => RET false end + | OArithComparisonPP_CV n1 => + match o2 with OArithComparisonPP_CV n2 => phys_eq n1 n2 | _ => RET false end + | OArithComparisonR0R_CN n1 is1 => + match o2 with OArithComparisonR0R_CN n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end + | OArithComparisonR0R_CZ n1 is1 => + match o2 with OArithComparisonR0R_CZ n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end + | OArithComparisonR0R_CC n1 is1 => + match o2 with OArithComparisonR0R_CC n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end + | OArithComparisonR0R_CV n1 is1 => + match o2 with OArithComparisonR0R_CV n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end + | OArithComparisonR0R_CN_XZR n1 is1 vz1 => + match o2 with OArithComparisonR0R_CN_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end + | OArithComparisonR0R_CZ_XZR n1 is1 vz1 => + match o2 with OArithComparisonR0R_CZ_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end + | OArithComparisonR0R_CC_XZR n1 is1 vz1 => + match o2 with OArithComparisonR0R_CC_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end + | OArithComparisonR0R_CV_XZR n1 is1 vz1 => + match o2 with OArithComparisonR0R_CV_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end + | OArithComparisonP_CN n1 => + match o2 with OArithComparisonP_CN n2 => phys_eq n1 n2 | _ => RET false end + | OArithComparisonP_CZ n1 => + match o2 with OArithComparisonP_CZ n2 => phys_eq n1 n2 | _ => RET false end + | OArithComparisonP_CC n1 => + match o2 with OArithComparisonP_CC n2 => phys_eq n1 n2 | _ => RET false end + | OArithComparisonP_CV n1 => + match o2 with OArithComparisonP_CV n2 => phys_eq n1 n2 | _ => RET false end + | Ocset c1 => + match o2 with Ocset c2 => struct_eq c1 c2 | _ => RET false end + | Ofmovi fsz1 => + match o2 with Ofmovi fsz2 => phys_eq fsz1 fsz2 | _ => RET false end + | Ofmovi_XZR fsz1 => + match o2 with Ofmovi_XZR fsz2 => phys_eq fsz1 fsz2 | _ => RET false end + | Ocsel c1 => + match o2 with Ocsel c2 => struct_eq c1 c2 | _ => RET false end + | Ofnmul fsz1 => + match o2 with Ofnmul fsz2 => phys_eq fsz1 fsz2 | _ => RET false end + end. + +Ltac my_wlp_simplify := wlp_xsimplify ltac:(intros; subst; simpl in * |- *; congruence || intuition eauto with wlp). + +Lemma arith_op_eq_correct o1 o2: + WHEN arith_op_eq o1 o2 ~> b THEN b = true -> o1 = o2. +Proof. + destruct o1, o2; my_wlp_simplify; try congruence; + try (destruct vz; destruct vz0); try (destruct is; destruct is0); + repeat apply f_equal; try congruence; + try apply Int.same_if_eq; try apply Int64.same_if_eq; try auto. +Qed. +Hint Resolve arith_op_eq_correct: wlp. +Opaque arith_op_eq_correct. + +Definition control_op_eq (c1 c2: control_op): ?? bool := + match c1 with + | Ob lbl1 => + match c2 with Ob lbl2 => phys_eq lbl1 lbl2 | _ => RET false end + | Obc c1 lbl1 => + match c2 with Obc c2 lbl2 => iandb (struct_eq c1 c2) (phys_eq lbl1 lbl2) | _ => RET false end + | Obl id1 => + match c2 with Obl id2 => phys_eq id1 id2 | _ => RET false end + | Obs id1 => + match c2 with Obs id2 => phys_eq id1 id2 | _ => RET false end + | Ocbnz sz1 lbl1 => + match c2 with Ocbnz sz2 lbl2 => iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2) | _ => RET false end + | Ocbz sz1 lbl1 => + match c2 with Ocbz sz2 lbl2 => iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2) | _ => RET false end + | Otbnz sz1 n1 lbl1 => + match c2 with Otbnz sz2 n2 lbl2 => iandb (RET (Int.eq n1 n2)) (iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2)) | _ => RET false end + | Otbz sz1 n1 lbl1 => + match c2 with Otbz sz2 n2 lbl2 => iandb (RET (Int.eq n1 n2)) (iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2)) | _ => RET false end + | Obtbl tbl1 => + match c2 with Obtbl tbl2 => (phys_eq tbl1 tbl2) | _ => RET false end + | OIncremPC sz1 => + match c2 with OIncremPC sz2 => RET (Z.eqb sz1 sz2) | _ => RET false end + | OError => + match c2 with OError => RET true | _ => RET false end + end. + +Lemma control_op_eq_correct c1 c2: + WHEN control_op_eq c1 c2 ~> b THEN b = true -> c1 = c2. +Proof. + destruct c1, c2; wlp_simplify; try rewrite Z.eqb_eq in * |-; try congruence; + try apply Int.same_if_eq in H; try congruence. +Qed. +Hint Resolve control_op_eq_correct: wlp. +Opaque control_op_eq_correct. + +Definition store_op_eq (s1 s2: store_op): ?? bool := + match s1 with + | Ostore1 st1 chk1 a1 => + match s2 with Ostore1 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end + | Ostore2 st1 chk1 a1 => + match s2 with Ostore2 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end + | OstoreU st1 chk1 a1 => + match s2 with OstoreU st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end + end. + +Lemma store_op_eq_correct s1 s2: + WHEN store_op_eq s1 s2 ~> b THEN b = true -> s1 = s2. +Proof. + destruct s1, s2; wlp_simplify; try congruence. + all: rewrite H1 in H0; rewrite H0, H; reflexivity. +Qed. +Hint Resolve store_op_eq_correct: wlp. +Opaque store_op_eq_correct. + +Definition load_op_eq (l1 l2: load_op): ?? bool := + match l1 with + | Oload1 ld1 chk1 a1 => + match l2 with Oload1 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end + | Oload2 ld1 chk1 a1 => + match l2 with Oload2 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end + | OloadU ld1 chk1 a1 => + match l2 with OloadU ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end + end. + +Lemma load_op_eq_correct l1 l2: + WHEN load_op_eq l1 l2 ~> b THEN b = true -> l1 = l2. +Proof. + destruct l1, l2; wlp_simplify; try congruence. + all: rewrite H1 in H0; rewrite H, H0; reflexivity. +Qed. +Hint Resolve load_op_eq_correct: wlp. +Opaque load_op_eq_correct. + +Definition allocf_op_eq (al1 al2: allocf_op): ?? bool := + match al1 with + | OAllocf_SP sz1 linkofs1 => + match al2 with OAllocf_SP sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end + | OAllocf_Mem sz1 linkofs1 => + match al2 with OAllocf_Mem sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end + end. + +Lemma allocf_op_eq_correct al1 al2: + WHEN allocf_op_eq al1 al2 ~> b THEN b = true -> al1 = al2. +Proof. + destruct al1, al2; wlp_simplify; try congruence. + all: rewrite H2; rewrite Z.eqb_eq in H; rewrite H; reflexivity. +Qed. +Hint Resolve allocf_op_eq_correct: wlp. +Opaque allocf_op_eq_correct. + +Definition freef_op_eq (fr1 fr2: freef_op): ?? bool := + match fr1 with + | OFreef_SP sz1 linkofs1 => + match fr2 with OFreef_SP sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end + | OFreef_Mem sz1 linkofs1 => + match fr2 with OFreef_Mem sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end + end. + +Lemma freef_op_eq_correct fr1 fr2: + WHEN freef_op_eq fr1 fr2 ~> b THEN b = true -> fr1 = fr2. +Proof. + destruct fr1, fr2; wlp_simplify; try congruence. + all: rewrite H2; rewrite Z.eqb_eq in H; rewrite H; reflexivity. +Qed. +Hint Resolve freef_op_eq_correct: wlp. +Opaque freef_op_eq_correct. + +Definition op_eq (o1 o2: op): ?? bool := + match o1 with + | Arith i1 => + match o2 with Arith i2 => arith_op_eq i1 i2 | _ => RET false end + | Control i1 => + match o2 with Control i2 => control_op_eq i1 i2 | _ => RET false end + | Load i1 => + match o2 with Load i2 => load_op_eq i1 i2 | _ => RET false end + | Store i1 => + match o2 with Store i2 => store_op_eq i1 i2 | _ => RET false end + | Allocframe i1 => + match o2 with Allocframe i2 => allocf_op_eq i1 i2 | _ => RET false end + | Freeframe i1 => + match o2 with Freeframe i2 => freef_op_eq i1 i2 | _ => RET false end + | Loadsymbol id1 => + match o2 with Loadsymbol id2 => phys_eq id1 id2 | _ => RET false end + | Cvtsw2x => + match o2 with Cvtsw2x => RET true | _ => RET false end + | Cvtuw2x => + match o2 with Cvtuw2x => RET true | _ => RET false end + | Cvtx2w => + match o2 with Cvtx2w => RET true | _ => RET false end + | Constant c1 => + match o2 with Constant c2 => phys_eq c1 c2 | _ => RET false end + end. + +Lemma op_eq_correct o1 o2: + WHEN op_eq o1 o2 ~> b THEN b=true -> o1 = o2. +Proof. + destruct o1, o2; wlp_simplify; congruence. +Qed. + +End IMPPARAM. + +End P. + +Module L <: ISeqLanguage with Module LP:=P. + +Module LP:=P. + +Include MkSeqLanguage P. + +End L. + +Module IST := ImpSimu L ImpPosDict. + +Import L. +Import P. + +(** Compilation from [Asmblock] to [L] *) + +Local Open Scope positive_scope. + +Definition pmem : R.t := 1. + +Definition ireg_to_pos (ir: ireg) : R.t := + match ir with + | X0 => 8 | X1 => 9 | X2 => 10 | X3 => 11 | X4 => 12 | X5 => 13 | X6 => 14 | X7 => 15 + | X8 => 16 | X9 => 17 | X10 => 18 | X11 => 19 | X12 => 20 | X13 => 21 | X14 => 22 | X15 => 23 + | X16 => 24 | X17 => 25 | X18 => 26 | X19 => 27 | X20 => 28 | X21 => 29 | X22 => 30 | X23 => 31 + | X24 => 32 | X25 => 33 | X26 => 34 | X27 => 35 | X28 => 36 | X29 => 37 | X30 => 38 + end +. + +Definition freg_to_pos (fr: freg) : R.t := + match fr with + | D0 => 39 | D1 => 40 | D2 => 41 | D3 => 42 | D4 => 43 | D5 => 44 | D6 => 45 | D7 => 46 + | D8 => 47 | D9 => 48 | D10 => 49 | D11 => 50 | D12 => 51 | D13 => 52 | D14 => 53 | D15 => 54 + | D16 => 55 | D17 => 56 | D18 => 57 | D19 => 58 | D20 => 59 | D21 => 60 | D22 => 61 | D23 => 62 + | D24 => 63 | D25 => 64 | D26 => 65 | D27 => 66 | D28 => 67 | D29 => 68 | D30 => 69 | D31 => 70 + end +. + +Lemma ireg_to_pos_discr: forall r r', r <> r' -> ireg_to_pos r <> ireg_to_pos r'. +Proof. + destruct r; destruct r'; try contradiction; discriminate. +Qed. + +Lemma freg_to_pos_discr: forall r r', r <> r' -> freg_to_pos r <> freg_to_pos r'. +Proof. + destruct r; destruct r'; try contradiction; discriminate. +Qed. + +Definition ppos (r: preg) : R.t := + match r with + | CR c => match c with + | CN => 2 + | CZ => 3 + | CC => 4 + | CV => 5 + end + | PC => 6 + | DR d => match d with + | IR i => match i with + | XSP => 7 + | RR1 ir => ireg_to_pos ir + end + | FR fr => freg_to_pos fr + end + end +. + +Notation "# r" := (ppos r) (at level 100, right associativity). + +Lemma not_eq_add: + forall k n n', n <> n' -> k + n <> k + n'. +Proof. + intros k n n' H1 H2. apply H1; clear H1. eapply Pos.add_reg_l; eauto. +Qed. + +Lemma ppos_equal: forall r r', r = r' <-> ppos r = ppos r'. +Proof. + destruct r as [dr|cr|]; destruct r' as [dr'|cr'|]; + try destruct dr as [ir|fr]; try destruct dr' as [ir'|fr']; + try destruct ir as [irr|]; try destruct ir' as [irr'|]. + all: split; intros; try rewrite H; try discriminate; try contradiction; simpl; eauto; + try destruct irr; try destruct irr'; + try destruct fr; try destruct fr'; + try destruct cr; try destruct cr'; + simpl; try discriminate; try reflexivity. +Qed. + +Lemma ppos_discr: forall r r', r <> r' <-> ppos r <> ppos r'. +Proof. + split; unfold not; try intros; try apply ppos_equal in H0; try discriminate; try contradiction. +Qed. + +Lemma ppos_pmem_discr: forall r, pmem <> ppos r. +Proof. + intros. destruct r as [dr|cr|]. + - destruct dr as [ir|fr]; try destruct ir as [irr|]; try destruct irr; try destruct fr; + unfold ppos; unfold pmem; discriminate. + - unfold ppos; unfold pmem; destruct cr; discriminate. + - unfold ppos; unfold pmem; discriminate. +Qed. + +(** Inversion functions, used for debug traces *) + +Definition pos_to_ireg (p: R.t) : option ireg := + match p with + | 8 => Some (X0) | 9 => Some (X1) | 10 => Some (X2) | 11 => Some (X3) | 12 => Some (X4) | 13 => Some (X5) | 14 => Some (X6) | 15 => Some (X7) + | 16 => Some (X8) | 17 => Some (X9) | 18 => Some (X10) | 19 => Some (X11) | 20 => Some (X12) | 21 => Some (X13) | 22 => Some (X14) | 23 => Some (X15) + | 24 => Some (X16) | 25 => Some (X17) | 26 => Some (X18) | 27 => Some (X19) | 28 => Some (X20) | 29 => Some (X21) | 30 => Some (X22) | 31 => Some (X23) + | 32 => Some (X24) | 33 => Some (X25) | 34 => Some (X26) | 35 => Some (X27) | 36 => Some (X28) | 37 => Some (X29) | 38 => Some (X30) | _ => None + end. + +Definition pos_to_freg (p: R.t) : option freg := + match p with + | 39 => Some(D0) | 40 => Some(D1) | 41 => Some(D2) | 42 => Some(D3) | 43 => Some(D4) | 44 => Some(D5) | 45 => Some(D6) | 46 => Some(D7) + | 47 => Some(D8) | 48 => Some(D9) | 49 => Some(D10) | 50 => Some(D11) | 51 => Some(D12) | 52 => Some(D13) | 53 => Some(D14) | 54 => Some(D15) + | 55 => Some(D16) | 56 => Some(D17) | 57 => Some(D18) | 58 => Some(D19) | 59 => Some(D20) | 60 => Some(D21) | 61 => Some(D22) | 62 => Some(D23) + | 63 => Some(D24) | 64 => Some(D25) | 65 => Some(D26) | 66 => Some(D27) | 67 => Some(D28) | 68 => Some(D29) | 69 => Some(D30) | 70 => Some(D31) | _ => None + end. + +Definition inv_ppos (p: R.t) : option preg := + match p with + | 1 => None + | 2 => Some (CR CN) + | 3 => Some (CR CZ) + | 4 => Some (CR CC) + | 5 => Some (CR CV) + | 6 => Some (PC) + | 7 => Some (DR (IR XSP)) + | n => match pos_to_ireg n with + | None => match pos_to_freg n with + | None => None + | Some fr => Some (DR (FR fr)) + end + | Some ir => Some (DR (IR ir)) + end + end. + +Notation "a @ b" := (Econs a b) (at level 102, right associativity). + +(** Translations of instructions *) + +Definition get_testcond_rlocs (c: testcond) := + match c with + | TCeq => (PReg(#CZ) @ Enil) + | TCne => (PReg(#CZ) @ Enil) + | TChs => (PReg(#CC) @ Enil) + | TClo => (PReg(#CC) @ Enil) + | TCmi => (PReg(#CN) @ Enil) + | TCpl => (PReg(#CN) @ Enil) + | TChi => (PReg(#CZ) @ PReg(#CC) @ Enil) + | TCls => (PReg(#CZ) @ PReg(#CC) @ Enil) + | TCge => (PReg(#CN) @ PReg(#CV) @ Enil) + | TClt => (PReg(#CN) @ PReg(#CV) @ Enil) + | TCgt => (PReg(#CN) @ PReg(#CZ) @ PReg(#CV) @ Enil) + | TCle => (PReg(#CN) @ PReg(#CZ) @ PReg(#CV) @ Enil) + end. + +Definition trans_control (ctl: control) : inst := + match ctl with + | Pb lbl => [(#PC, Op (Control (Ob lbl)) (PReg(#PC) @ Enil))] + | Pbc c lbl => + let lr := get_testcond_rlocs c in + [(#PC, Op (Control (Obc c lbl)) (PReg(#PC) @ lr))] + | Pbl id sg => [(#RA, PReg(#PC)); + (#PC, Op (Control (Obl id)) Enil)] + | Pbs id sg => [(#PC, Op (Control (Obs id)) Enil)] + | Pblr r sg => [(#RA, PReg(#PC)); + (#PC, Old (PReg(#r)))] + | Pbr r sg => [(#PC, PReg(#r))] + | Pret r => [(#PC, PReg(#r))] + | Pcbnz sz r lbl => [(#PC, Op (Control (Ocbnz sz lbl)) (PReg(#r) @ PReg(#PC) @ Enil))] + | Pcbz sz r lbl => [(#PC, Op (Control (Ocbz sz lbl)) (PReg(#r) @ PReg(#PC) @ Enil))] + | Ptbnz sz r n lbl => [(#PC, Op (Control (Otbnz sz n lbl)) (PReg(#r) @ PReg(#PC) @ Enil))] + | Ptbz sz r n lbl => [(#PC, Op (Control (Otbz sz n lbl)) (PReg(#r) @ PReg(#PC) @ Enil))] + | Pbtbl r tbl => [(#X16, Op (Constant Vundef) Enil); + (#PC, Op (Control (Obtbl tbl)) (PReg(#r) @ PReg(#PC) @ Enil)); + (#X16, Op (Constant Vundef) Enil)] + | Pbuiltin ef args res => [] + end. + +Definition trans_exit (ex: option control) : L.inst := + match ex with + | None => [] + | Some ctl => trans_control ctl + end +. + +Definition trans_arith (ai: ar_instruction) : inst := + match ai with + | PArithP n rd => + if destroy_X16 n then [(#rd, Op(Arith (OArithP n)) Enil); (#X16, Op (Constant Vundef) Enil)] + else [(#rd, Op(Arith (OArithP n)) Enil)] + | PArithPP n rd r1 => [(#rd, Op(Arith (OArithPP n)) (PReg(#r1) @ Enil))] + | PArithPPP n rd r1 r2 => [(#rd, Op(Arith (OArithPPP n)) (PReg(#r1) @ PReg(#r2) @ Enil))] + | PArithRR0R n rd r1 r2 => + let lr := match r1 with + | RR0 r1' => Op(Arith (OArithRR0R n)) (PReg(#r1') @ PReg(#r2) @ Enil) + | XZR => let vz := if arith_rr0r_isize n then Vint Int.zero else Vlong Int64.zero in + Op(Arith (OArithRR0R_XZR n vz)) (PReg(#r2) @ Enil) + end in + [(#rd, lr)] + | PArithRR0 n rd r1 => + let lr := match r1 with + | RR0 r1' => Op(Arith (OArithRR0 n)) (PReg(#r1') @ Enil) + | XZR => let vz := if arith_rr0_isize n then Vint Int.zero else Vlong Int64.zero in + Op(Arith (OArithRR0_XZR n vz)) (Enil) + end in + [(#rd, lr)] + | PArithARRRR0 n rd r1 r2 r3 => + let lr := match r3 with + | RR0 r3' => Op(Arith (OArithARRRR0 n)) (PReg(#r1) @ PReg (#r2) @ PReg(#r3') @ Enil) + | XZR => let vz := if arith_arrrr0_isize n then Vint Int.zero else Vlong Int64.zero in + Op(Arith (OArithARRRR0_XZR n vz)) (PReg(#r1) @ PReg(#r2) @ Enil) + end in + [(#rd, lr)] + | PArithComparisonPP n r1 r2 => + [(#CN, Op(Arith (OArithComparisonPP_CN n)) (PReg(#r1) @ PReg(#r2) @ Enil)); + (#CZ, Op(Arith (OArithComparisonPP_CZ n)) (PReg(#r1) @ PReg(#r2) @ Enil)); + (#CC, Op(Arith (OArithComparisonPP_CC n)) (PReg(#r1) @ PReg(#r2) @ Enil)); + (#CV, Op(Arith (OArithComparisonPP_CV n)) (PReg(#r1) @ PReg(#r2) @ Enil))] + | PArithComparisonR0R n r1 r2 => + let is := arith_comparison_r0r_isize n in + match r1 with + | RR0 r1' => [(#CN, Op(Arith (OArithComparisonR0R_CN n is)) (PReg(#r1') @ PReg(#r2) @ Enil)); + (#CZ, Op(Arith (OArithComparisonR0R_CZ n is)) (PReg(#r1') @ PReg(#r2) @ Enil)); + (#CC, Op(Arith (OArithComparisonR0R_CC n is)) (PReg(#r1') @ PReg(#r2) @ Enil)); + (#CV, Op(Arith (OArithComparisonR0R_CV n is)) (PReg(#r1') @ PReg(#r2) @ Enil))] + | XZR => let vz := if is then Vint Int.zero else Vlong Int64.zero in + [(#CN, Op(Arith (OArithComparisonR0R_CN_XZR n is vz)) (PReg(#r2) @ Enil)); + (#CZ, Op(Arith (OArithComparisonR0R_CZ_XZR n is vz)) (PReg(#r2) @ Enil)); + (#CC, Op(Arith (OArithComparisonR0R_CC_XZR n is vz)) (PReg(#r2) @ Enil)); + (#CV, Op(Arith (OArithComparisonR0R_CV_XZR n is vz)) (PReg(#r2) @ Enil))] + end + | PArithComparisonP n r1 => + [(#CN, Op(Arith (OArithComparisonP_CN n)) (PReg(#r1) @ Enil)); + (#CZ, Op(Arith (OArithComparisonP_CZ n)) (PReg(#r1) @ Enil)); + (#CC, Op(Arith (OArithComparisonP_CC n)) (PReg(#r1) @ Enil)); + (#CV, Op(Arith (OArithComparisonP_CV n)) (PReg(#r1) @ Enil))] + | Pcset rd c => + let lr := get_testcond_rlocs c in + [(#rd, Op(Arith (Ocset c)) lr)] + | Pfmovi fsz rd r1 => + let lr := match r1 with + | RR0 r1' => Op(Arith (Ofmovi fsz)) (PReg(#r1') @ Enil) + | XZR => Op(Arith (Ofmovi_XZR fsz)) Enil + end in + [(#rd, lr)] + | Pcsel rd r1 r2 c => + let lr := get_testcond_rlocs c in + [(#rd, Op(Arith (Ocsel c)) (PReg(#r1) @ PReg (#r2) @ lr))] + | Pfnmul fsz rd r1 r2 => [(#rd, Op(Arith (Ofnmul fsz)) (PReg(#r1) @ PReg(#r2) @ Enil))] + end. + +Definition eval_addressing_rlocs_st (st: store_rs_a) (chunk: memory_chunk) (rs: dreg) (a: addressing) := + match a with + | ADimm base n => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil) + | ADreg base r => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADlsl base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADsxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADuxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADadr base id ofs => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil) + | ADpostincr base n => Op (Store (OstoreU st chunk a)) (PReg (#rs) @ PReg (pmem) @ Enil) (* not modeled yet *) + end. + +Definition eval_addressing_rlocs_ld (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) := + match a with + | ADimm base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil) + | ADreg base r => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADlsl base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADsxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADuxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADadr base id ofs => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil) + | ADpostincr base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil) + end. + +Definition trans_ldp_chunk (chunk: memory_chunk) (r: dreg): load_rd_a := + match chunk with + | Mint32 => Pldrw + | Mint64 => Pldrx + | Mfloat32 => Pldrs + | Mfloat64 => Pldrd + | Many32 => Pldrw_a + | _ => (* This case should always correspond to Many64 *) + match r with + | IR _ => Pldrx_a + | FR _ => Pldrd_a + end + end. + +Definition trans_stp_chunk (chunk: memory_chunk) (r: dreg): store_rs_a := + match chunk with + | Mint32 => Pstrw + | Mint64 => Pstrx + | Mfloat32 => Pstrs + | Mfloat64 => Pstrd + | Many32 => Pstrw_a + | _ => (* This case should always correspond to Many64 *) + match r with + | IR _ => Pstrx_a + | FR _ => Pstrd_a + end + end. + +Definition trans_load (ldi: ld_instruction) := + match ldi with + | PLd_rd_a ld r a => + let lr := eval_addressing_rlocs_ld ld (chunk_load ld) a in [(#r, lr)] + | Pldp ld r1 r2 chk1 chk2 a => + let ldi1 := trans_ldp_chunk chk1 r1 in + let ldi2 := trans_ldp_chunk chk2 r1 in + let lr := eval_addressing_rlocs_ld ldi1 chk1 a in + let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4%Z | _ => 8%Z end in + match a with + | ADimm base n => + let a' := (get_offset_addr a ofs) in + [(#r1, lr); + (#r2, Op (Load (Oload1 ldi2 chk2 a')) + (Old(PReg (#base)) @ PReg (pmem) @ Enil))] + | _ => [(#PC, (Op (OError)) Enil)] + end + end. + +Definition trans_store (sti: st_instruction) := + match sti with + | PSt_rs_a st r a => + let lr := eval_addressing_rlocs_st st (chunk_store st) r a in [(pmem, lr)] + | Pstp st r1 r2 chk1 chk2 a => + let sti1 := trans_stp_chunk chk1 r1 in + let sti2 := trans_stp_chunk chk2 r1 in + let lr := eval_addressing_rlocs_st sti1 chk1 r1 a in + let ofs := match chk1 with | Mint32 | Mfloat32| Many32 => 4%Z | _ => 8%Z end in + match a with + | ADimm base n => + let a' := (get_offset_addr a ofs) in + [(pmem, lr); + (pmem, Op (Store (Ostore1 sti2 chk2 a')) + (PReg (#r2) @ Old(PReg (#base)) @ PReg (pmem) @ Enil))] + | _ => [(#PC, (Op (OError)) Enil)] + end + end. + +Definition trans_basic (b: basic) : inst := + match b with + | PArith ai => trans_arith ai + | PLoad ld => trans_load ld + | PStore st => trans_store st + | Pallocframe sz linkofs => + [(#X29, PReg(#SP)); + (#SP, Op (Allocframe (OAllocf_SP sz linkofs)) (PReg (#SP) @ PReg pmem @ Enil)); + (#X16, Op (Constant Vundef) Enil); + (pmem, Op (Allocframe (OAllocf_Mem sz linkofs)) (Old(PReg(#SP)) @ PReg pmem @ Enil))] + | Pfreeframe sz linkofs => + [(pmem, Op (Freeframe (OFreef_Mem sz linkofs)) (PReg (#SP) @ PReg pmem @ Enil)); + (#SP, Op (Freeframe (OFreef_SP sz linkofs)) (PReg (#SP) @ Old (PReg pmem) @ Enil)); + (#X16, Op (Constant Vundef) Enil)] + | Ploadsymbol rd id => [(#rd, Op (Loadsymbol id) Enil)] + | Pcvtsw2x rd r1 => [(#rd, Op (Cvtsw2x) (PReg (#r1) @ Enil))] + | Pcvtuw2x rd r1 => [(#rd, Op (Cvtuw2x) (PReg (#r1) @ Enil))] + | Pcvtx2w rd => [(#rd, Op (Cvtx2w) (PReg (#rd) @ Enil))] + | Pnop => [] + end. + +Fixpoint trans_body (b: list basic) : list L.inst := + match b with + | nil => nil + | b :: lb => (trans_basic b) :: (trans_body lb) + end. + +Definition trans_pcincr (sz: Z) (k: L.inst) := (#PC, Op (Control (OIncremPC sz)) (PReg(#PC) @ Enil)) :: k. + +Definition trans_block (b: Asmblock.bblock) : L.bblock := + trans_body (body b) ++ (trans_pcincr (size b) (trans_exit (exit b)) :: nil). + +(*Theorem trans_block_noheader_inv: forall bb, trans_block (no_header bb) = trans_block bb.*) +(*Proof.*) + (*intros. destruct bb as [hd bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.*) +(*Qed.*) + +(*Theorem trans_block_header_inv: forall bb hd, trans_block (stick_header hd bb) = trans_block bb.*) +(*Proof.*) + (*intros. destruct bb as [hdr bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.*) +(*Qed.*) + +(** Lemmas on the translation *) + +Definition state := L.mem. +Definition exec := L.run. + +Definition match_states (s: Asm.state) (s': state) := + let (rs, m) := s in + s' pmem = Memstate m + /\ forall r, s' (#r) = Val (rs r). + +Definition match_outcome (o:outcome) (s: option state) := + match o with + | Some n => exists s', s=Some s' /\ match_states n s' + | None => s=None + end. + +Notation "a <[ b <- c ]>" := (assign a b c) (at level 102, right associativity). + +Definition trans_state (s: Asm.state) : state := + let (rs, m) := s in + fun x => if (Pos.eq_dec x pmem) then Memstate m + else match (inv_ppos x) with + | Some r => Val (rs r) + | None => Val Vundef + end. + +Lemma not_eq_IR: + forall r r', r <> r' -> IR r <> IR r'. +Proof. + intros. congruence. +Qed. + +Lemma ireg_pos_ppos: forall r, + ireg_to_pos r = # r. +Proof. + intros. simpl. reflexivity. +Qed. + +Lemma freg_pos_ppos: forall r, + freg_to_pos r = # r. +Proof. + intros. simpl. reflexivity. +Qed. + +Lemma ireg_not_pc: forall r, + (#PC) <> ireg_to_pos r. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma ireg_not_pmem: forall r, + ireg_to_pos r <> pmem. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma ireg_not_CN: forall r, + 2 <> ireg_to_pos r. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma ireg_not_CZ: forall r, + 3 <> ireg_to_pos r. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma ireg_not_CC: forall r, + 4 <> ireg_to_pos r. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma ireg_not_CV: forall r, + 5 <> ireg_to_pos r. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma freg_not_pmem: forall r, + freg_to_pos r <> pmem. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma freg_not_CN: forall r, + 2 <> freg_to_pos r. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma freg_not_CZ: forall r, + 3 <> freg_to_pos r. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma freg_not_CC: forall r, + 4 <> freg_to_pos r. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma freg_not_CV: forall r, + 5 <> freg_to_pos r. +Proof. + intros; destruct r; discriminate. +Qed. + +Lemma dreg_not_pmem: forall (r: dreg), + (# r) <> pmem. +Proof. + intros; destruct r as [i|f]. + - destruct i. apply ireg_not_pmem. discriminate. + - apply freg_not_pmem. +Qed. + +Ltac DPRM pr := + destruct pr as [drDPRF|crDPRF|]; + [destruct drDPRF as [irDPRF|frDPRF]; [destruct irDPRF |] + | destruct crDPRF|]. + +Ltac DPRF pr := + destruct pr as [drDPRF|crDPRF|]; + [destruct drDPRF as [irDPRF|frDPRF]; [destruct irDPRF as [irrDPRF|]; [destruct irrDPRF|] + | destruct frDPRF] + | destruct crDPRF|]. + +Lemma preg_not_pmem: forall r, + pmem <> # r. +Proof. + intros. DPRF r; simpl; discriminate. +Qed. + +Ltac DIRN1 ir := destruct ir as [irrDIRN1|]; subst; try destruct irrDIRN1; simpl. + +Lemma dreg_not_CN: forall (r: dreg), + 2 <> (#r). +Proof. + intros; DIRN1 r; [ apply ireg_not_CN | discriminate | apply freg_not_CN]. +Qed. + +Lemma dreg_not_CZ: forall (r: dreg), + 3 <> (#r). +Proof. + intros; DIRN1 r; [ apply ireg_not_CZ | discriminate | apply freg_not_CZ]. +Qed. + +Lemma dreg_not_CC: forall (r: dreg), + 4 <> (#r). +Proof. + intros; DIRN1 r; [ apply ireg_not_CC | discriminate | apply freg_not_CC]. +Qed. + +Lemma dreg_not_CV: forall (r: dreg), + 5 <> (#r). +Proof. + intros; DIRN1 r; [ apply ireg_not_CV | discriminate | apply freg_not_CV]. +Qed. + +Lemma sr_update_both: forall sr rsr r1 rr v + (HEQV: forall r : preg, sr (# r) = Val (rsr r)), + (sr <[ (#r1) <- Val (v) ]>) (#rr) = + Val (rsr # r1 <- v rr). +Proof. + intros. unfold assign. + destruct (R.eq_dec (#r1) (#rr)); subst. + - apply ppos_equal in e; subst; rewrite Pregmap.gss; reflexivity. + - rewrite Pregmap.gso; eapply ppos_discr in n; auto. +Qed. + +Lemma sr_gss: forall sr pos v, + (sr <[ pos <- v ]>) pos = v. +Proof. + intros. unfold assign. + destruct (R.eq_dec pos pos) eqn:REQ; try reflexivity; try congruence. +Qed. + +Lemma sr_update_overwrite: forall sr pos v1 v2, + (sr <[ pos <- v1 ]>) <[ pos <- v2 ]> = (sr <[ pos <- v2 ]>). +Proof. + intros. + unfold assign. apply functional_extensionality; intros x. + destruct (R.eq_dec pos x); reflexivity. +Qed. + +Ltac sr_val_rwrt := + repeat match goal with + | [H: forall r: preg, ?sr (# r) = Val (?rsr r) |- _ ] + => rewrite H + end. + +Ltac sr_memstate_rwrt := + repeat match goal with + | [H: ?sr pmem = Memstate ?mr |- _ ] + => rewrite <- H + end. + +Ltac replace_ppos := + try erewrite !ireg_pos_ppos; + try erewrite !freg_pos_ppos; + try replace (7) with (#XSP) by eauto; + try replace (24) with (#X16) by auto. + +Ltac DDRM dr := + destruct dr as [irsDDRF|frDDRF]; + [destruct irsDDRF + | idtac ]. + +(* Ltac DDRF dr := + destruct dr as [irsDDRF|frDDRF]; + [destruct irsDDRF as [irsDDRF|]; [destruct irsDDRF|] + | destruct frDDRF]. *) + +(* Ltac DPRI pr := + destruct pr as [drDPRI|crDPRI|]; + [destruct drDPRI as [irDPRI|frDPRI]; [destruct irDPRI as [irrDPRI|]; [destruct irrDPRI|]|] + | idtac + | idtac ]. *) + +Ltac discriminate_ppos := + try apply ireg_not_pmem; + try apply ireg_not_pc; + try apply freg_not_pmem; + try apply dreg_not_pmem; + try apply ireg_not_CN; + try apply ireg_not_CZ; + try apply ireg_not_CC; + try apply ireg_not_CV; + try apply freg_not_CN; + try apply freg_not_CZ; + try apply freg_not_CC; + try apply freg_not_CV; + try apply dreg_not_CN; + try apply dreg_not_CZ; + try apply dreg_not_CC; + try apply dreg_not_CV; + try(simpl; discriminate). + +Ltac replace_pc := try replace (6) with (#PC) by eauto. + +Ltac replace_regs_pos sr := + try replace (sr 7) with (sr (ppos XSP)) by eauto; + try replace (sr 6) with (sr (ppos PC)) by eauto; + try replace (sr 2) with (sr (ppos CN)) by eauto; + try replace (sr 3) with (sr (ppos CZ)) by eauto; + try replace (sr 4) with (sr (ppos CC)) by eauto; + try replace (sr 5) with (sr (ppos CV)) by eauto. + +Ltac Simpl_exists sr := + replace_ppos; + replace_regs_pos sr; + try sr_val_rwrt; + try (eexists; split; [| split]); eauto; + try (sr_memstate_rwrt; rewrite assign_diff; + try reflexivity; + discriminate_ppos + ). + +Ltac Simpl_rep sr := + replace_ppos; + replace_regs_pos sr; + try sr_val_rwrt; + try (sr_memstate_rwrt; rewrite assign_diff; + try reflexivity; + discriminate_ppos + ). + +Ltac Simpl_update := + try eapply sr_update_both; eauto. + +Ltac Simpl sr := Simpl_exists sr; try (intros rr(* ; try rewrite sr_update_overwrite; replace_regs_pos sr; DPRM rr *)); Simpl_update. + +Ltac destruct_res_flag rsr := try (rewrite Pregmap.gso; discriminate_ppos); destruct (rsr _); simpl; try reflexivity. + +(* Ltac discriminate_preg_flags := rewrite !assign_diff; try rewrite !Pregmap.gso; discriminate_ppos; sr_val_rwrt; reflexivity. *) + +(* Ltac destruct_reg_neq r1 r2 := + destruct (PregEq.eq r1 r2); subst; + [ rewrite sr_gss; rewrite Pregmap.gss; reflexivity | + rewrite assign_diff; try rewrite Pregmap.gso; fold (ppos r1); try apply ppos_discr; auto]. *) + +Lemma reg_update_overwrite: forall rsr sr r rd v1 v2 + (HEQV: forall r : preg, sr (# r) = Val (rsr r)), + ((sr <[ # rd <- Val (v1) ]>) <[ # rd <- Val (v2) ]>) (# r) = + Val ((rsr # rd <- v1) # rd <- v2 r). +Proof. + intros. + unfold Pregmap.set; destruct (PregEq.eq r rd). + - rewrite e; apply sr_gss; reflexivity. + - rewrite sr_update_overwrite. rewrite assign_diff; eauto. + unfold not; intros. apply ppos_equal in H. congruence. +Qed. + +Ltac replace_regs_cond_force := + try replace (5) with (#CV) in * by auto; + try replace (4) with (#CC) in * by auto; + try replace (3) with (#CZ) in * by auto; + try replace (2) with (#CN) in * by auto. + +Ltac validate_crbit_flags rr v1 v2 := + destruct (R.eq_dec 5 (#rr)) as [e0|n0]; + destruct (R.eq_dec 4 (#rr)) as [e1|n1]; + destruct (R.eq_dec 3 (#rr)) as [e2|n2]; + destruct (R.eq_dec 2 (#rr)) as [e3|n3]; + replace_regs_cond_force; + try apply ppos_equal in e0; + try apply ppos_equal in e1; + try apply ppos_equal in e2; + try apply ppos_equal in e3; + try apply ppos_discr in n0; + try apply ppos_discr in n1; + try apply ppos_discr in n2; + try apply ppos_discr in n3; + subst. + +Ltac Simpl_flags := + try (rewrite Pregmap.gss; reflexivity); + try (rewrite Pregmap.gso, Pregmap.gss; [reflexivity|]; try auto); + try (rewrite Pregmap.gso; try auto); + try (rewrite !Pregmap.gso; try auto). + +Lemma compare_single_res: forall sr mr rsr rr v1 v2 + (HMEM: sr pmem = Memstate mr) + (HEQV: forall r : preg, sr (# r) = Val (rsr r)), + ((((sr <[ 2 <- Val (_CN (v_compare_single v1 v2)) ]>) <[ 3 <- + Val (_CZ (v_compare_single v1 v2)) ]>) <[ 4 <- + Val (_CC (v_compare_single v1 v2)) ]>) <[ 5 <- + Val (_CV (v_compare_single v1 v2)) ]>) (# rr) = +Val + ((compare_single rsr v1 v2) rr). +Proof. + intros. unfold v_compare_single, compare_single, assign. + validate_crbit_flags rr v1 v2. + all: destruct v1; destruct v2; Simpl_flags. +Qed. + +Lemma compare_float_res: forall sr mr rsr rr v1 v2 + (HMEM: sr pmem = Memstate mr) + (HEQV: forall r : preg, sr (# r) = Val (rsr r)), + ((((sr <[ 2 <- Val (_CN (v_compare_float v1 v2)) ]>) <[ 3 <- + Val (_CZ (v_compare_float v1 v2)) ]>) <[ 4 <- + Val (_CC (v_compare_float v1 v2)) ]>) <[ 5 <- + Val (_CV (v_compare_float v1 v2)) ]>) (# rr) = +Val + ((compare_float rsr v1 v2) rr). +Proof. + intros. unfold v_compare_float, compare_float, assign. + validate_crbit_flags rr v1 v2. + all: destruct v1; destruct v2; Simpl_flags. +Qed. + +Lemma compare_long_res: forall sr mr rsr rr v1 v2 + (HMEM: sr pmem = Memstate mr) + (HEQV: forall r : preg, sr (# r) = Val (rsr r)), + ((((sr <[ 2 <- Val (_CN (v_compare_long v1 v2)) ]>) <[ 3 <- + Val (_CZ (v_compare_long v1 v2)) ]>) <[ 4 <- + Val (_CC (v_compare_long v1 v2)) ]>) <[ 5 <- + Val (_CV (v_compare_long v1 v2)) ]>) (# rr) = +Val + ((compare_long rsr v1 v2) rr). +Proof. + intros. unfold v_compare_long, compare_long, assign. + validate_crbit_flags rr v1 v2. + all: Simpl_flags. +Qed. + +Lemma compare_int_res: forall sr mr rsr rr v1 v2 + (HMEM: sr pmem = Memstate mr) + (HEQV: forall r : preg, sr (# r) = Val (rsr r)), + ((((sr <[ 2 <- Val (_CN (v_compare_int v1 v2)) ]>) <[ 3 <- + Val (_CZ (v_compare_int v1 v2)) ]>) <[ 4 <- + Val (_CC (v_compare_int v1 v2)) ]>) <[ 5 <- + Val (_CV (v_compare_int v1 v2)) ]>) (# rr) = +Val + ((compare_int rsr v1 v2) rr). +Proof. + intros. unfold v_compare_int, compare_int, assign. + validate_crbit_flags rr v1 v2. + all: Simpl_flags. +Qed. + +Section SECT_SEQ. + +Variable Ge: genv. + +Lemma trans_arith_correct rsr mr sr rsw' old i: + match_states (State rsr mr) sr -> + exec_arith_instr Ge.(_lk) i rsr = rsw' -> + exists sw, + inst_run Ge (trans_arith i) sr old = Some sw + /\ match_states (State rsw' mr) sw. +Proof. + induction i. + all: intros MS EARITH; subst; inv MS; unfold exec_arith_instr. + - (* PArithP *) + destruct i. + 1,2,3: DIRN1 rd; Simpl sr. + (* Special case for Pmovimms/Pmovimmd *) + all: simpl; + try(destruct (negb (is_immediate_float32 _))); + try(destruct (negb (is_immediate_float64 _))); + DIRN1 rd; Simpl sr; + try (rewrite assign_diff; discriminate_ppos; reflexivity); + try (intros rr'; Simpl_update). + - (* PArithPP *) + DIRN1 rs; DIRN1 rd; Simpl sr. + - (* PArithPPP *) + DIRN1 r1; DIRN1 r2; DIRN1 rd; Simpl sr. + - (* PArithRR0R *) + simpl. destruct r1. + + (* OArithRR0R *) simpl; Simpl sr. + + (* OArithRR0R_XZR *) simpl; destruct (arith_rr0r_isize _); Simpl sr. + - (* PArithRR0 *) + simpl. destruct r1. + + (* OArithRR0 *) simpl; Simpl sr. + + (* OArithRR0_XZR *) simpl; destruct (arith_rr0_isize _); Simpl sr. + - (* PArithARRRR0 *) + simpl. destruct r3. + + (* OArithARRRR0 *) simpl; Simpl sr. + + (* OArithARRRR0_XZR *) simpl; destruct (arith_arrrr0_isize _); Simpl sr. + - (* PArithComparisonPP *) + simpl; destruct i; + unfold arith_eval_comparison_pp, arith_prepare_comparison_pp; simpl; + fold (ppos r2); fold(ppos r1); try rewrite !H0; repeat Simpl_rep sr; + Simpl sr; + try destruct sz; + try (eapply compare_single_res; eauto); + try (eapply compare_long_res; eauto); + try (eapply compare_float_res; eauto). + - (* PArithComparisonR0R *) + simpl; destruct r1; simpl; destruct i; + repeat (replace_regs_cond_force; try fold (ppos r); try fold(ppos r2); + try rewrite !assign_diff; discriminate_ppos; try rewrite !H0); + Simpl sr; unfold arith_eval_comparison_r0r, arith_comparison_r0r_isize; + destruct arith_prepare_comparison_r0r; destruct is; + try (eapply compare_long_res; eauto); + try (eapply compare_int_res; eauto). + - (* PArithComparisonP *) + simpl; destruct i; + unfold arith_eval_comparison_p, arith_prepare_comparison_p; simpl; + fold(ppos r1); try rewrite !H0; repeat Simpl_rep sr; + Simpl sr; + try destruct sz; + try (eapply compare_int_res; eauto); + try (eapply compare_single_res; eauto); + try (eapply compare_long_res; eauto); + try (eapply compare_float_res; eauto). + - (* Pcset *) + simpl; unfold eval_testcond, get_testcond_rlocs, cflags_eval; + unfold cond_eval_is; unfold flags_testcond_value, list_exp_eval; destruct c; simpl; + repeat Simpl_rep sr; Simpl_exists sr; + destruct_res_flag rsr; + Simpl sr. + - (* Pfmovi *) + simpl; destruct r1; simpl; destruct fsz; Simpl sr. + - (* Pcsel *) + destruct c; simpl; DIRN1 rd; fold (ppos r2); fold (ppos r1); Simpl sr. + - (* Pfnmul *) + simpl; destruct fsz; Simpl sr. +Qed. + +Lemma sp_xsp: + SP = XSP. +Proof. + econstructor. +Qed. + +Lemma load_chunk_neutral: forall chk v r, + interp_load (trans_ldp_chunk chk r) v = v. +Proof. + intros; destruct chk; destruct r; simpl; reflexivity. +Qed. + +Theorem bisimu_basic rsr mr sr bi: + match_states (State rsr mr) sr -> + match_outcome (exec_basic Ge.(_lk) Ge.(_genv) bi rsr mr) (inst_run Ge (trans_basic bi) sr sr). +Proof. +(* a little tactic to automate reasoning on preg_eq *) +Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core. +Local Ltac preg_eq_discr r rd := + destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto); + rewrite (assign_diff _ (#rd) (#r) _); auto; + rewrite Pregmap.gso; auto. + + intros MS; inversion MS as (H & H0). + destruct bi; simpl. + (* Loadsymbol / Cvtsw2x / Cvtuw2x / Cvtx2w *) + 6,7,8,9: Simpl sr. + - (* Arith *) + exploit trans_arith_correct; eauto. + - (* Load *) + destruct ld. + + unfold exec_load, exec_load_rd_a, eval_addressing_rlocs_ld, exp_eval; + destruct ld; destruct a; simpl; + try fold (ppos base); try fold (ppos r); + erewrite !H0, H; simpl; + unfold exec_load1, exec_load2, chunk_load; unfold call_ll_loadv; + try destruct (Mem.loadv _ _ _); simpl; auto. + all: try (fold (ppos rd); Simpl_exists sr; auto; intros rr; Simpl_update). + + + unfold exec_load, exec_load_double, eval_addressing_rlocs_ld, exp_eval; + destruct ld; destruct a; simpl; unfold control_eval; destruct Ge; auto; + try fold (ppos base); + try erewrite !H0, H; simpl; + unfold exec_load1, exec_load2; unfold call_ll_loadv; + destruct (Mem.loadv _ _ _); simpl; auto; + fold (ppos rd1); rewrite assign_diff; discriminate_ppos; rewrite H; + try destruct (Mem.loadv _ _ _); simpl; auto; Simpl_exists sr; + rewrite !load_chunk_neutral; + try (rewrite !assign_diff; discriminate_ppos; reflexivity); + try (destruct rd1 as [ir1|fr1]; try destruct ir1; destruct rd2 as [ir2|fr2]; try destruct ir2; + destruct base; discriminate_ppos); + repeat (try fold (ppos r); try fold (ppos r0); + try fold (ppos fr1); try fold (ppos fr2); intros; Simpl_update). + - (* Store *) + destruct st. + + unfold exec_store, exec_store_rs_a, eval_addressing_rlocs_st, exp_eval; + destruct st; destruct a; simpl; + try fold (ppos base); try fold (ppos rs); try fold (ppos r); + erewrite !H0, H; simpl; + unfold exec_store1, exec_store2, chunk_store; unfold call_ll_storev; + try destruct (Mem.storev _ _ _ _); simpl; auto. + all: eexists; split; [| split]; eauto; + intros rr; rewrite assign_diff; try rewrite H0; auto; discriminate_ppos. + + unfold exec_store, exec_store_double, eval_addressing_rlocs_st, exp_eval; + destruct st; destruct a; simpl; unfold control_eval; destruct Ge; auto; + try fold (ppos base); try fold (ppos rs1); + erewrite !H0, H; simpl; + unfold exec_store1, exec_store2; unfold call_ll_storev; + try destruct (Mem.storev _ _ _ _); simpl; auto; + fold (ppos rs2); rewrite assign_diff; try congruence; try rewrite H0; simpl; + try destruct (Mem.storev _ _ _ _); simpl; auto. + all: eexists; split; [| split]; eauto; + repeat (try intros rr; rewrite assign_diff; try rewrite H0; auto; discriminate_ppos). + - (* Alloc *) + destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS. + + eexists; repeat split. + * rewrite !assign_diff; try discriminate_ppos; Simpl_exists sr; + rewrite H; destruct (Mem.alloc _ _ _) eqn:MEMAL2; + injection MEMAL; intros Hm Hb; try rewrite Hm, Hb; + rewrite sp_xsp in MEMS; rewrite MEMS. + rewrite !assign_diff; try discriminate_ppos; Simpl_exists sr; rewrite H; + destruct (Mem.alloc _ _ _) eqn:MEMAL3; + injection MEMAL2; intros Hm2 Hb2; try rewrite Hm2, Hb2; + rewrite Hm, Hb; rewrite MEMS; reflexivity. + * eauto. + * intros rr. rewrite assign_diff; try apply preg_not_pmem; try rewrite sp_xsp. + replace 37 with (#X29) by auto; repeat (Simpl_update; intros). + + simpl; repeat Simpl_exists sr. erewrite H. destruct (Mem.alloc _ _ _) eqn:HMEMAL2. + injection MEMAL; intros Hm Hb. + try rewrite Hm, Hb; clear Hm Hb. + try rewrite sp_xsp in MEMS; rewrite MEMS. reflexivity. + - (* Free *) + destruct (Mem.loadv _ _ _) eqn:MLOAD; simpl; auto; + repeat Simpl_exists sr; rewrite H; simpl. + + destruct (rsr SP) eqn:EQSP; simpl; rewrite <- sp_xsp; rewrite EQSP; rewrite MLOAD; try reflexivity. + destruct (Mem.free _ _ _) eqn:EQFREE; try reflexivity. rewrite assign_diff; discriminate_ppos. + replace_regs_pos sr; sr_val_rwrt. rewrite <- sp_xsp; rewrite EQSP; rewrite MLOAD. rewrite EQFREE. + replace 24 with (#X16) by auto; rewrite sp_xsp; Simpl sr. + intros rr'; destruct (PregEq.eq XSP rr'). + * rewrite e; rewrite Pregmap.gss, sr_gss; auto. + * rewrite Pregmap.gso, !assign_diff; auto; apply ppos_discr in n; auto. + + rewrite <- sp_xsp; rewrite MLOAD; reflexivity. + - (* Nop *) + Simpl sr. +Qed. + +Theorem bisimu_body: + forall bdy rsr mr sr, + match_states (State rsr mr) sr -> + match_outcome (exec_body Ge.(_lk) Ge.(_genv) bdy rsr mr) (exec Ge (trans_body bdy) sr). +Proof. + induction bdy as [|i bdy]; simpl; eauto. + intros. + exploit (bisimu_basic rsr mr sr i); eauto. + destruct (exec_basic _ _ _ _ _); simpl. + - intros (s' & X1 & X2). + rewrite X1; simpl; eauto. eapply IHbdy; eauto; simpl. + unfold match_states in *. destruct s. unfold Asm._m. eauto. + - intros X; rewrite X; simpl; auto. +Qed. + +Theorem bisimu_control ex sz rsr mr sr: + match_states (State rsr mr) sr -> + match_outcome (exec_cfi Ge.(_genv) Ge.(_fn) ex (incrPC (Ptrofs.repr sz) rsr) mr) (inst_run Ge (trans_pcincr sz (trans_exit (Some (PCtlFlow ex)))) sr sr). +Proof. + intros MS. + simpl in *. inv MS. + destruct ex. + (* Obr / Oret *) + 6,7: unfold control_eval, incrPC; simpl; destruct Ge; + replace_pc; rewrite (H0 PC); + repeat Simpl_rep sr; Simpl_exists sr; + intros rr; destruct (preg_eq rr PC); [ + rewrite e; rewrite sr_gss; rewrite Pregmap.gss; + try rewrite Pregmap.gso; discriminate_ppos; fold (ppos r); auto | + repeat Simpl_rep sr; try rewrite !Pregmap.gso; auto; apply ppos_discr in n; auto ]. + (* Ocbnz / Ocbz *) + 6,7,8,9: unfold control_eval; destruct Ge; simpl; + replace_pc; rewrite (H0 PC); + unfold eval_branch, eval_neg_branch, eval_testzero, eval_testbit, + incrPC, goto_label_deps, goto_label; + destruct (PregEq.eq r PC); + [ rewrite e; destruct sz0; simpl; Simpl sr | + destruct sz0; simpl; replace_pc; rewrite Pregmap.gso; auto; repeat Simpl_rep sr; try rewrite H0; + try (destruct (Val.mxcmpu_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b); + try (destruct (Val.mxcmplu_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b); + try (destruct (Val.cmp_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b); + try (destruct (Val.cmpl_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b); + try (simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr); + destruct (label_pos _ _ _); try reflexivity; rewrite Pregmap.gss; + destruct Val.offset_ptr; try reflexivity; rewrite sr_update_overwrite; + simpl; Simpl sr; (destruct (PregEq.eq rr PC); subst; + [ rewrite sr_gss, Pregmap.gss; reflexivity | + rewrite !assign_diff, !Pregmap.gso; replace_pc; auto; apply ppos_discr; auto]) ]. + - (* Ob *) + replace_pc. rewrite (H0 PC). simpl. + unfold goto_label, control_eval. destruct Ge. + unfold goto_label_deps. destruct (label_pos _ _ _); auto. + + unfold incrPC. rewrite Pregmap.gss; eauto. destruct (Val.offset_ptr _ _); auto; + try (rewrite sr_gss; unfold Stuck; reflexivity). + simpl. eexists; split; split. + * rewrite sr_update_overwrite. unfold pmem, assign in *. simpl. rewrite H; reflexivity. + * intros. rewrite sr_update_overwrite. unfold Pregmap.set, assign. + destruct r as [dr|cr|]; try destruct dr as [ir|fr]; try destruct ir as [irr|]; + try destruct irr; try destruct fr; try destruct cr; simpl; try rewrite <- H0; eauto. + + rewrite sr_gss; reflexivity. + - (* Obc *) + replace_pc. rewrite (H0 PC). simpl. + unfold eval_branch, goto_label, control_eval. destruct Ge. + unfold goto_label_deps, cflags_eval, eval_testcond, list_exp_eval. + destruct c; simpl; unfold incrPC; + repeat (replace_ppos; replace_pc; replace_regs_pos sr; sr_val_rwrt; try rewrite !assign_diff; discriminate_ppos). + 1,2,3,4,5,6: destruct_res_flag rsr. + 7,8,9,10: do 2 (destruct_res_flag rsr). + 11,12 : do 3 (destruct_res_flag rsr). + 1,2,3,4,5,6,9,10: destruct (Int.eq _ _); [| simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr ]; + destruct (label_pos _ _ _); [| reflexivity]; replace_pc; + rewrite !Pregmap.gss; destruct Val.offset_ptr; + try (unfold Stuck; reflexivity); Simpl_exists sr; intros rr; + apply reg_update_overwrite; eauto. + 1,3: destruct (andb); [| simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr ]; + destruct (label_pos _ _ _); [| reflexivity]; replace_pc; rewrite !Pregmap.gss; + destruct Val.offset_ptr; try (unfold Stuck; reflexivity); Simpl_exists sr; + intros rr; apply reg_update_overwrite; eauto. + 1,2: destruct (orb); [| simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr ]; + destruct (label_pos _ _ _); [| reflexivity]; replace_pc; rewrite !Pregmap.gss; + destruct Val.offset_ptr; try (unfold Stuck; reflexivity); Simpl_exists sr; + intros rr; apply reg_update_overwrite; eauto. + - (* Obl *) + replace_pc. rewrite (H0 PC). simpl. + unfold control_eval. destruct Ge. + rewrite sr_gss. Simpl sr. + replace_pc; try replace (38) with (#X30) by eauto; unfold incrPC. Simpl_update. + repeat (intros; Simpl_update). + - (* Obs *) + unfold control_eval. destruct Ge. replace_pc. rewrite (H0 PC). simpl; unfold incrPC. + replace_pc; Simpl_exists sr; intros rr; apply reg_update_overwrite; eauto. + - (* Oblr *) + replace_pc. rewrite (H0 PC). + unfold control_eval. destruct Ge. simpl. unfold incrPC. + try (eexists; split; [ | split ]); eauto. + intros rr; destruct (PregEq.eq PC rr). + + replace_pc; rewrite e; rewrite !Pregmap.gss, !sr_gss; + rewrite Pregmap.gso; fold (ppos r); try rewrite H0; auto. + rewrite <- e; destruct r; discriminate. + + replace_pc; rewrite Pregmap.gso, assign_diff; auto; apply ppos_discr in n; auto; + rewrite Pregmap.gss, sr_gss. destruct (PregEq.eq X30 rr); replace 38 with (#X30) by auto. + * rewrite e; rewrite Pregmap.gss, sr_gss; auto. + * replace_pc; rewrite !Pregmap.gso, !assign_diff; auto; apply ppos_discr in n0; auto; + apply ppos_discr; eauto. + - (* Obtbl *) + replace_pc; rewrite (H0 PC); + unfold control_eval; destruct Ge; simpl; unfold incrPC. + destruct (PregEq.eq X16 r1). + + fold (ppos r1); rewrite <- e; rewrite Pregmap.gss, sr_gss; reflexivity. + + rewrite !Pregmap.gso; auto; rewrite ppos_discr in n; + fold (ppos r1); replace 24 with (#X16) by auto; try rewrite !assign_diff; auto; + discriminate_ppos; rewrite H0; destruct (rsr r1) eqn:EQR; + try (try rewrite EQR; reflexivity). + try destruct (list_nth_z _ _); try reflexivity; + unfold goto_label, goto_label_deps; destruct (label_pos _ _ _); + try rewrite 2Pregmap.gso, Pregmap.gss; destruct (Val.offset_ptr (rsr PC) (Ptrofs.repr sz)); + try reflexivity; discriminate_ppos. Simpl sr. + destruct (PregEq.eq X16 rr); [ subst; Simpl_update |]; + destruct (PregEq.eq PC rr); [ subst; Simpl_update |]. + rewrite !Pregmap.gso; auto; + apply ppos_discr in n0; apply ppos_discr in n1; + rewrite !assign_diff; auto. +Qed. + +Theorem bisimu_exit ex sz rsr mr sr: + match_states (State rsr mr) sr -> + (*is_builtin ex = false ->*) + match_outcome (estep Ge.(_genv) Ge.(_fn) ex (Ptrofs.repr sz) rsr mr) (inst_run Ge (trans_pcincr sz (trans_exit ex)) sr sr). +Proof. + intros MS; unfold estep. + destruct ex. + - destruct c. + + exploit (bisimu_control i sz rsr mr sr); eauto. + + simpl. inv MS. eexists; split; [| split]. + unfold control_eval; destruct Ge. + replace_pc; rewrite (H0 PC); eauto. + rewrite assign_diff; auto. + intros rr. unfold incrPC. destruct (PregEq.eq rr PC); subst. + * rewrite sr_gss, Pregmap.gss. reflexivity. + * rewrite assign_diff, Pregmap.gso; try rewrite H0; + auto; try rewrite ppos_discr in n; auto. + - simpl. inv MS. eexists; split; [| split]. + + unfold control_eval; destruct Ge. + replace_pc; rewrite (H0 PC); eauto. + + rewrite assign_diff; auto. + + intros rr. unfold incrPC. destruct (PregEq.eq rr PC); subst. + * rewrite sr_gss, Pregmap.gss. reflexivity. + * rewrite assign_diff, Pregmap.gso; try rewrite H0; auto; try rewrite ppos_discr in n; auto. +Qed. + +(* Definition trans_block_aux bdy sz ex := (trans_body bdy) ++ (trans_pcincr sz (trans_exit ex) :: nil). *) + +Theorem bisimu rsr mr sr bb: + match_states (State rsr mr) sr -> + match_outcome (bbstep Ge.(_lk) Ge.(_genv) Ge.(_fn) bb rsr mr) (exec Ge (trans_block bb) sr). +Proof. + intros MS. unfold bbstep, trans_block. + exploit (bisimu_body (body bb) rsr mr sr); eauto. + destruct (exec_body _ _ _ _ _); simpl. + - unfold match_states in *. intros (s' & X1 & X2). destruct s. + erewrite run_app_Some; eauto. + exploit (bisimu_exit (exit bb) (size bb) _rs _m s'); eauto. + destruct Ge; simpl. destruct MS as (Y1 & Y2). destruct X2 as (X2 & X3). + replace_pc. erewrite !X3; simpl. + destruct (inst_run _ _ _ _); simpl; auto. + - intros X; erewrite run_app_None; eauto. +Qed. + +Theorem trans_state_match: forall S, match_states S (trans_state S). +Proof. + intros. destruct S as (rs & m). simpl. + split. reflexivity. + intro. destruct r as [dr|cr|]; try destruct dr as [ir|fr]; try destruct cr; + try destruct ir as [irr|]; try destruct irr; try destruct fr; try reflexivity. +Qed. + +Lemma state_eq_decomp: + forall rs1 m1 rs2 m2, rs1 = rs2 -> m1 = m2 -> State rs1 m1 = State rs2 m2. +Proof. + intros. congruence. +Qed. + +Theorem state_equiv S1 S2 S': match_states S1 S' -> match_states S2 S' -> S1 = S2. +Proof. + unfold match_states; intros H0 H1. destruct S1 as (rs1 & m1). destruct S2 as (rs2 & m2). inv H0. inv H1. + apply state_eq_decomp. + - apply functional_extensionality. intros. assert (Val (rs1 x) = Val (rs2 x)) by congruence. congruence. + - congruence. +Qed. + +End SECT_SEQ. + +Section SECT_BBLOCK_EQUIV. + +Variable Ge: genv. + +Local Hint Resolve trans_state_match: core. + +Lemma bblock_simu_reduce_aux: + forall p1 p2, + L.bblock_simu Ge (trans_block p1) (trans_block p2) -> + Asmblockprops.bblock_simu_aux Ge.(_lk) Ge.(_genv) Ge.(_fn) p1 p2. +Proof. + intros p1 p2 H0 rs m EBB. + generalize (H0 (trans_state (State rs m))); clear H0. + intro H0. + exploit (bisimu Ge rs m (trans_state (State rs m)) p1); eauto. + exploit (bisimu Ge rs m (trans_state (State rs m)) p2); eauto. + destruct (bbstep Ge.(_lk) Ge.(_genv) Ge.(_fn) p1 rs m); try (unfold Stuck in EBB; congruence). + intros H1 (s2' & exp2 & MS'). unfold exec in exp2, H1. rewrite exp2 in H0. + destruct H0 as (m2' & H0 & H2). discriminate. rewrite H0 in H1. + destruct (bbstep Ge.(_lk) Ge.(_genv) Ge.(_fn) p2 rs m); simpl in H1. + * unfold match_states in H1, MS'. destruct s, s0. + destruct H1 as (s' & H1 & H3 & H4). inv H1. inv MS'. + replace (_rs0) with (_rs). + - replace (_m0) with (_m); auto. congruence. + - apply functional_extensionality. intros r. + generalize (H1 r). intros Hr. congruence. + * discriminate. +Qed. + +Lemma incrPC_set_res_commut res: forall d vres rs, + incrPC d (set_res (map_builtin_res DR res) vres rs) = + set_res (map_builtin_res DR res) vres (incrPC d rs). +Proof. + induction res; simpl; auto. + - intros; apply functional_extensionality. + unfold incrPC; intros x0. + destruct (PregEq.eq x0 PC). + + subst; rewrite! Pregmap.gss; auto. + + rewrite Pregmap.gso; auto. + destruct (PregEq.eq x x0). + * subst; rewrite! Pregmap.gss; auto. + * rewrite !Pregmap.gso; auto. + - intros; rewrite IHres2. f_equal. auto. +Qed. + +Lemma incrPC_undef_regs_commut l : forall d rs, + incrPC d (undef_regs l rs) = undef_regs l (incrPC d rs). +Proof. + induction l; simpl; auto. + intros. rewrite IHl. unfold incrPC. + destruct (PregEq.eq a PC). + - rewrite e. rewrite Pregmap.gss. + simpl. apply f_equal. unfold Pregmap.set. + apply functional_extensionality. intros x. + destruct (PregEq.eq x PC); auto. + - rewrite Pregmap.gso; auto. + apply f_equal. unfold Pregmap.set. + apply functional_extensionality. intros x. + destruct (PregEq.eq x PC). + + subst. destruct a; simpl; auto. congruence. + + auto. +Qed. + +Lemma bblock_simu_reduce: + forall p1 p2, + L.bblock_simu Ge (trans_block p1) (trans_block p2) -> + (has_builtin p1 = true \/ has_builtin p2 = true -> exit p1 = exit p2) -> + Asmblockprops.bblock_simu Ge.(_lk) Ge.(_genv) Ge.(_fn) p1 p2. +Proof. + unfold bblock_simu. intros p1 p2 H0 BLT rs m EBB. + unfold exec_bblock. + generalize (bblock_simu_reduce_aux p1 p2 H0). + unfold bblock_simu_aux. clear H0. + unfold exec_bblock, bbstep. intros H0 m' t0 (rs1 & m1 & H1 & H2). + assert ((has_builtin p1 = false /\ has_builtin p2 = false) \/ (has_builtin p1 = true \/ has_builtin p2 = true)). { repeat destruct (has_builtin _); simpl; intuition. } + destruct H as [[X1 X2]|H]. + - (* Not a builtin *) + exploit (H0); eauto; erewrite H1; simpl; (*gen*) + unfold estep; unfold has_builtin in *. + { destruct (exit p1) as [[]|] eqn: EQEX1; try discriminate; simpl in *. + inversion H2; subst. rewrite H3; discriminate. } + destruct (exit p1) as [[]|] eqn: EQEX1; try discriminate; simpl in *. + { inversion H2; subst. rewrite H3. + destruct (exec_body _ _ (body p2) _ _) as [[rs2 m2]|]; try discriminate. + intros H4. eexists; eexists; split; try reflexivity. + destruct (exit p2) as [[]|] eqn:EQEX2; simpl in *; try discriminate; try econstructor; eauto. + inversion H4. econstructor. } + { inversion H2; subst. + destruct (exec_body _ _ (body p2) _ _) as [[rs2 m2]|]; try discriminate. +intros H4. eexists; eexists; split; try reflexivity. + destruct (exit p2) as [[]|] eqn:EQEX2; simpl in *; try discriminate; try econstructor; eauto. + inversion H4. rewrite H3. econstructor. } + - (* Builtin *) + exploit (BLT); eauto. + intros EXIT. + unfold has_builtin in H. + assert (is_builtin (exit p1) = true). { rewrite <- EXIT in H; intuition. } + clear H. + generalize (H0 rs m); eauto; erewrite H1; simpl; + unfold estep. destruct (exit p1) as [[]|] eqn:EQEX1; try discriminate. + rewrite <- EXIT. + intros CONTRA. + exploit (CONTRA); try discriminate. + destruct (exec_body _ _ (body p2) _ _) as [[rs2 m2]|]; try discriminate. + intros H4. eexists; eexists; split; try reflexivity. + inversion H2; subst. inversion H4; subst. + econstructor; eauto. + + unfold incrPC in H5. + replace (rs2 SP) with (rs1 SP). + replace (fun r : dreg => rs2 r) with (fun r : dreg => rs1 r); auto. + * apply functional_extensionality. + intros r; destruct (PregEq.eq r PC); try discriminate. + replace (rs1 r) with (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size p1))) r) by auto. + rewrite H5; rewrite Pregmap.gso; auto. + * replace (rs1 SP) with (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size p1))) SP) by auto. + rewrite H5; rewrite Pregmap.gso; auto; try discriminate. + + rewrite !incrPC_set_res_commut. + rewrite !incrPC_undef_regs_commut. + rewrite H5. + reflexivity. +Qed. + +(** Used for debug traces *) + +Definition ireg_name (ir: ireg) : pstring := + match ir with + | X0 => Str ("X0") | X1 => Str ("X1") | X2 => Str ("X2") | X3 => Str ("X3") | X4 => Str ("X4") | X5 => Str ("X5") | X6 => Str ("X6") | X7 => Str ("X7") + | X8 => Str ("X8") | X9 => Str ("X9") | X10 => Str ("X10") | X11 => Str ("X11") | X12 => Str ("X12") | X13 => Str ("X13") | X14 => Str ("X14") | X15 => Str ("X15") + | X16 => Str ("X16") | X17 => Str ("X17") | X18 => Str ("X18") | X19 => Str ("X19") | X20 => Str ("X20") | X21 => Str ("X21") | X22 => Str ("X22") | X23 => Str ("X23") + | X24 => Str ("X24") | X25 => Str ("X25") | X26 => Str ("X26") | X27 => Str ("X27") | X28 => Str ("X28") | X29 => Str ("X29") | X30 => Str ("X30") + end +. + +Definition freg_name (fr: freg) : pstring := + match fr with + | D0 => Str ("D0") | D1 => Str ("D1") | D2 => Str ("D2") | D3 => Str ("D3") | D4 => Str ("D4") | D5 => Str ("D5") | D6 => Str ("D6") | D7 => Str ("D7") + | D8 => Str ("D8") | D9 => Str ("D9") | D10 => Str ("D10") | D11 => Str ("D11") | D12 => Str ("D12") | D13 => Str ("D13") | D14 => Str ("D14") | D15 => Str ("D15") + | D16 => Str ("D16") | D17 => Str ("D17") | D18 => Str ("D18") | D19 => Str ("D19") | D20 => Str ("D20") | D21 => Str ("D21") | D22 => Str ("D22") | D23 => Str ("D23") + | D24 => Str ("D24") | D25 => Str ("D25") | D26 => Str ("D26") | D27 => Str ("D27") | D28 => Str ("D28") | D29 => Str ("D29") | D30 => Str ("D30") | D31 => Str ("D31") + end +. + +Definition iregsp_name (irsp: iregsp) : pstring := + match irsp with + | RR1 ir => ireg_name ir + | XSP => Str ("XSP") + end. + +Definition dreg_name (dr: dreg) : ?? pstring := + match dr with + | IR ir => match ir with + | XSP => RET (Str ("XSP")) + | RR1 irr => RET (ireg_name irr) + end + | FR fr => RET (freg_name fr) + end. + +Definition string_of_name (x: P.R.t): ?? pstring := + if (Pos.eqb x pmem) then + RET (Str "MEM") + else + match inv_ppos x with + | Some (CR cr) => match cr with + | CN => RET (Str ("CN")) + | CZ => RET (Str ("CZ")) + | CC => RET (Str ("CC")) + | CV => RET (Str ("CV")) + end + | Some (PC) => RET (Str ("PC")) + | Some (DR dr) => dreg_name dr + | _ => RET (Str ("UNDEFINED")) + end. + +Definition string_of_name_ArithP (n: arith_p) : pstring := + match n with + | Padrp _ _ => "Padrp" + | Pmovz _ _ _ => "Pmov" + | Pmovn _ _ _ => "Pmov" + | Pfmovimms _ => "Pfmovimm" + | Pfmovimmd _ => "Pfmovimm" + end. + +Definition string_of_name_ArithPP (n: arith_pp) : pstring := + match n with + | Pmov => "Pmov" + | Pmovk _ _ _ => "Pmovk" + | Paddadr _ _ => "Paddadr" + | Psbfiz _ _ _ => "Psbfiz" + | Psbfx _ _ _ => "Psbfx" + | Pubfiz _ _ _ => "Pubfiz" + | Pubfx _ _ _ => "Pubfx" + | Pfmov => "Pfmov" + | Pfcvtds => "Pfcvtds" + | Pfcvtsd => "Pfcvtsd" + | Pfabs _ => "Pfabs" + | Pfneg _ => "Pfneg" + | Pscvtf _ _ => "Pscvtf" + | Pucvtf _ _ => "Pucvtf" + | Pfcvtzs _ _ => "Pfcvtzs" + | Pfcvtzu _ _ => "Pfcvtzu" + | Paddimm _ _ => "Paddimm" + | Psubimm _ _ => "Psubimm" + end. + +Definition string_of_name_ArithPPP (n: arith_ppp) : pstring := + match n with + | Pasrv _ => "Pasrv" + | Plslv _ => "Plslv" + | Plsrv _ => "Plsrv" + | Prorv _ => "Prorv" + | Psmulh => "Psmulh" + | Pumulh => "Pumulh" + | Psdiv _ => "Psdiv" + | Pudiv _ => "Pudiv" + | Paddext _ => "Paddext" + | Psubext _ => "Psubext" + | Pfadd _ => "Pfadd" + | Pfdiv _ => "Pfdiv" + | Pfmul _ => "Pfmul" + | Pfsub _ => "Pfsub" + end. + +Definition string_of_name_ArithRR0R (n: arith_rr0r) : pstring := + match n with + | Padd _ _ => "ArithRR0R=>Padd" + | Psub _ _ => "ArithRR0R=>Psub" + | Pand _ _ => "ArithRR0R=>Pand" + | Pbic _ _ => "ArithRR0R=>Pbic" + | Peon _ _ => "ArithRR0R=>Peon" + | Peor _ _ => "ArithRR0R=>Peor" + | Porr _ _ => "ArithRR0R=>Porr" + | Porn _ _ => "ArithRR0R=>Porn" + end. + +Definition string_of_name_ArithRR0R_XZR (n: arith_rr0r) : pstring := + match n with + | Padd _ _ => "ArithRR0R_XZR=>Padd" + | Psub _ _ => "ArithRR0R_XZR=>Psub" + | Pand _ _ => "ArithRR0R_XZR=>Pand" + | Pbic _ _ => "ArithRR0R_XZR=>Pbic" + | Peon _ _ => "ArithRR0R_XZR=>Peon" + | Peor _ _ => "ArithRR0R_XZR=>Peor" + | Porr _ _ => "ArithRR0R_XZR=>Porr" + | Porn _ _ => "ArithRR0R_XZR=>Porn" + end. + +Definition string_of_name_ArithRR0 (n: arith_rr0) : pstring := + match n with + | Pandimm _ _ => "ArithRR0=>Pandimm" + | Peorimm _ _ => "ArithRR0=>Peorimm" + | Porrimm _ _ => "ArithRR0=>Porrimm" + end. + +Definition string_of_name_ArithRR0_XZR (n: arith_rr0) : pstring := +match n with + | Pandimm _ _ => "ArithRR0_XZR=>Pandimm" + | Peorimm _ _ => "ArithRR0_XZR=>Peorimm" + | Porrimm _ _ => "ArithRR0_XZR=>Porrimm" + end. + +Definition string_of_name_ArithARRRR0 (n: arith_arrrr0) : pstring := + match n with + | Pmadd _ => "ArithARRRR0=>Pmadd" + | Pmsub _ => "ArithARRRR0=>Pmsub" + end. + +Definition string_of_name_ArithARRRR0_XZR (n: arith_arrrr0) : pstring := + match n with + | Pmadd _ => "ArithARRRR0_XZR=>Pmadd" + | Pmsub _ => "ArithARRRR0_XZR=>Pmsub" + end. + +Definition string_of_name_ArithComparisonPP_CN (n: arith_comparison_pp) : pstring := + match n with + | Pcmpext _ => "ArithComparisonPP_CN=>Pcmpext" + | Pcmnext _ => "ArithComparisonPP_CN=>Pcmnext" + | Pfcmp _ => "ArithComparisonPP_CN=>Pfcmp" + end. + +Definition string_of_name_ArithComparisonPP_CZ (n: arith_comparison_pp) : pstring := + match n with + | Pcmpext _ => "ArithComparisonPP_CZ=>Pcmpext" + | Pcmnext _ => "ArithComparisonPP_CZ=>Pcmnext" + | Pfcmp _ => "ArithComparisonPP_CZ=>Pfcmp" + end. + +Definition string_of_name_ArithComparisonPP_CC (n: arith_comparison_pp) : pstring := +match n with + | Pcmpext _ => "ArithComparisonPP_CC=>Pcmpext" + | Pcmnext _ => "ArithComparisonPP_CC=>Pcmnext" + | Pfcmp _ => "ArithComparisonPP_CC=>Pfcmp" + end. + +Definition string_of_name_ArithComparisonPP_CV (n: arith_comparison_pp) : pstring := +match n with + | Pcmpext _ => "ArithComparisonPP_CV=>Pcmpext" + | Pcmnext _ => "ArithComparisonPP_CV=>Pcmnext" + | Pfcmp _ => "ArithComparisonPP_CV=>Pfcmp" + end. + +Definition string_of_name_ArithComparisonR0R_CN (n: arith_comparison_r0r) : pstring := + match n with + | Pcmp _ _ => "ArithComparisonR0R_CN=>Pcmp" + | Pcmn _ _ => "ArithComparisonR0R_CN=>Pcmn" + | Ptst _ _ => "ArithComparisonR0R_CN=>Ptst" + end. + +Definition string_of_name_ArithComparisonR0R_CZ (n: arith_comparison_r0r) : pstring := + match n with + | Pcmp _ _ => "ArithComparisonR0R_CZ=>Pcmp" + | Pcmn _ _ => "ArithComparisonR0R_CZ=>Pcmn" + | Ptst _ _ => "ArithComparisonR0R_CZ=>Ptst" + end. + +Definition string_of_name_ArithComparisonR0R_CC (n: arith_comparison_r0r) : pstring := + match n with + | Pcmp _ _ => "ArithComparisonR0R_CC=>Pcmp" + | Pcmn _ _ => "ArithComparisonR0R_CC=>Pcmn" + | Ptst _ _ => "ArithComparisonR0R_CC=>Ptst" + end. + +Definition string_of_name_ArithComparisonR0R_CV (n: arith_comparison_r0r) : pstring := + match n with + | Pcmp _ _ => "ArithComparisonR0R_CV=>Pcmp" + | Pcmn _ _ => "ArithComparisonR0R_CV=>Pcmn" + | Ptst _ _ => "ArithComparisonR0R_CV=>Ptst" + end. + +Definition string_of_name_ArithComparisonR0R_CN_XZR (n: arith_comparison_r0r) : pstring := + match n with + | Pcmp _ _ => "ArithComparisonR0R_CN_XZR=>Pcmp" + | Pcmn _ _ => "ArithComparisonR0R_CN_XZR=>Pcmn" + | Ptst _ _ => "ArithComparisonR0R_CN_XZR=>Ptst" + end. + +Definition string_of_name_ArithComparisonR0R_CZ_XZR (n: arith_comparison_r0r) : pstring := + match n with + | Pcmp _ _ => "ArithComparisonR0R_CZ_XZR=>Pcmp" + | Pcmn _ _ => "ArithComparisonR0R_CZ_XZR=>Pcmn" + | Ptst _ _ => "ArithComparisonR0R_CZ_XZR=>Ptst" + end. + +Definition string_of_name_ArithComparisonR0R_CC_XZR (n: arith_comparison_r0r) : pstring := +match n with + | Pcmp _ _ => "ArithComparisonR0R_CC_XZR=>Pcmp" + | Pcmn _ _ => "ArithComparisonR0R_CC_XZR=>Pcmn" + | Ptst _ _ => "ArithComparisonR0R_CC_XZR=>Ptst" + end. + +Definition string_of_name_ArithComparisonR0R_CV_XZR (n: arith_comparison_r0r) : pstring := +match n with + | Pcmp _ _ => "ArithComparisonR0R_CV_XZR=>Pcmp" + | Pcmn _ _ => "ArithComparisonR0R_CV_XZR=>Pcmn" + | Ptst _ _ => "ArithComparisonR0R_CV_XZR=>Ptst" + end. + +Definition string_of_name_ArithComparisonP_CN (n: arith_comparison_p) : pstring := + match n with + | Pfcmp0 _ => "ArithComparisonP_CN=>Pfcmp0" + | Pcmpimm _ _ => "ArithComparisonP_CN=>Pcmpimm" + | Pcmnimm _ _ => "ArithComparisonP_CN=>Pcmnimm" + | Ptstimm _ _ => "ArithComparisonP_CN=>Ptstimm" + end. + +Definition string_of_name_ArithComparisonP_CZ (n: arith_comparison_p) : pstring := + match n with + | Pfcmp0 _ => "ArithComparisonP_CZ=>Pfcmp0" + | Pcmpimm _ _ => "ArithComparisonP_CZ=>Pcmpimm" + | Pcmnimm _ _ => "ArithComparisonP_CZ=>Pcmnimm" + | Ptstimm _ _ => "ArithComparisonP_CZ=>Ptstimm" + end. + +Definition string_of_name_ArithComparisonP_CC (n: arith_comparison_p) : pstring := + match n with + | Pfcmp0 _ => "ArithComparisonP_CC=>Pfcmp0" + | Pcmpimm _ _ => "ArithComparisonP_CC=>Pcmpimm" + | Pcmnimm _ _ => "ArithComparisonP_CC=>Pcmnimm" + | Ptstimm _ _ => "ArithComparisonP_CC=>Ptstimm" + end. + +Definition string_of_name_ArithComparisonP_CV (n: arith_comparison_p) : pstring := + match n with + | Pfcmp0 _ => "ArithComparisonP_CV=>Pfcmp0" + | Pcmpimm _ _ => "ArithComparisonP_CV=>Pcmpimm" + | Pcmnimm _ _ => "ArithComparisonP_CV=>Pcmnimm" + | Ptstimm _ _ => "ArithComparisonP_CV=>Ptstimm" + end. + +Definition string_of_name_cset (c: testcond) : pstring := + match c with + | TCeq => "Cset=>TCeq" + | TCne => "Cset=>TCne" + | TChs => "Cset=>TChs" + | TClo => "Cset=>TClo" + | TCmi => "Cset=>TCmi" + | TCpl => "Cset=>TCpl" + | TChi => "Cset=>TChi" + | TCls => "Cset=>TCls" + | TCge => "Cset=>TCge" + | TClt => "Cset=>TClt" + | TCgt => "Cset=>TCgt" + | TCle => "Cset=>TCle" + end. + +Definition string_of_arith (op: arith_op): pstring := + match op with + | OArithP n => string_of_name_ArithP n + | OArithPP n => string_of_name_ArithPP n + | OArithPPP n => string_of_name_ArithPPP n + | OArithRR0R n => string_of_name_ArithRR0R n + | OArithRR0R_XZR n _ => string_of_name_ArithRR0R_XZR n + | OArithRR0 n => string_of_name_ArithRR0 n + | OArithRR0_XZR n _ => string_of_name_ArithRR0_XZR n + | OArithARRRR0 n => string_of_name_ArithARRRR0 n + | OArithARRRR0_XZR n _ => string_of_name_ArithARRRR0_XZR n + | OArithComparisonPP_CN n => string_of_name_ArithComparisonPP_CN n + | OArithComparisonPP_CZ n => string_of_name_ArithComparisonPP_CZ n + | OArithComparisonPP_CC n => string_of_name_ArithComparisonPP_CC n + | OArithComparisonPP_CV n => string_of_name_ArithComparisonPP_CV n + | OArithComparisonR0R_CN n _ => string_of_name_ArithComparisonR0R_CN n + | OArithComparisonR0R_CZ n _ => string_of_name_ArithComparisonR0R_CZ n + | OArithComparisonR0R_CC n _ => string_of_name_ArithComparisonR0R_CC n + | OArithComparisonR0R_CV n _ => string_of_name_ArithComparisonR0R_CV n + | OArithComparisonR0R_CN_XZR n _ _ => string_of_name_ArithComparisonR0R_CN_XZR n + | OArithComparisonR0R_CZ_XZR n _ _ => string_of_name_ArithComparisonR0R_CZ_XZR n + | OArithComparisonR0R_CC_XZR n _ _ => string_of_name_ArithComparisonR0R_CC_XZR n + | OArithComparisonR0R_CV_XZR n _ _ => string_of_name_ArithComparisonR0R_CV_XZR n + | OArithComparisonP_CN n => string_of_name_ArithComparisonP_CN n + | OArithComparisonP_CZ n => string_of_name_ArithComparisonP_CZ n + | OArithComparisonP_CC n => string_of_name_ArithComparisonP_CC n + | OArithComparisonP_CV n => string_of_name_ArithComparisonP_CV n + | Ocset c => string_of_name_cset c + | Ofmovi _ => "Ofmovi" + | Ofmovi_XZR _ => "Ofmovi_XZR" + | Ocsel _ => "Ocsel" + | Ofnmul _ => "Ofnmul" + end. + +Definition string_of_ofs (ofs: ptrofs) : ?? pstring := + (string_of_Z (Ptrofs.signed ofs)). + +Definition string_of_int (n: int) : ?? pstring := + (string_of_Z (Int.signed n)). + +Definition string_of_int64 (n: int64) : ?? pstring := + (string_of_Z (Int64.signed n)). + +Notation "x +; y" := (Concat x y). + +Definition string_of_addressing (a: addressing) : ?? pstring := + match a with + | ADimm base n => + DO n' <~ string_of_int64 n;; + RET ((Str "[ADimm ") +; (iregsp_name base) +; " " +; n' +; "]") + | ADreg base r => + RET ((Str "[ADreg ") +; (iregsp_name base) +; " " +; (ireg_name r) +; "]") + | ADlsl base r n => + DO n' <~ string_of_int n;; + RET ((Str "[ADlsl ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]") + | ADsxt base r n => + DO n' <~ string_of_int n;; + RET ((Str "[ADsxt ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]") + | ADuxt base r n => + DO n' <~ string_of_int n;; + RET ((Str "[ADuxt ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]") + | ADadr base id ofs => + DO id' <~ string_of_Z (Zpos id);; + DO ofs' <~ string_of_ofs ofs;; + RET ((Str "[ADadr ") +; (iregsp_name base) +; " " +; id' +; " " +; ofs' +; "]") + | ADpostincr base n => + DO n' <~ string_of_int64 n;; + RET ((Str "[ADpostincr ") +; (iregsp_name base) +; " " +; n' +; "]") + end. + +Definition string_of_ld_rd_a (ld: load_rd_a) : pstring := + match ld with + | Pldrw => Str "Pldrw" + | Pldrw_a => Str "Pldrw_a" + | Pldrx => Str "Pldrx" + | Pldrx_a => Str "Pldrx_a" + | Pldrb _ => Str "Pldrb" + | Pldrsb _ => Str "Pldrsb" + | Pldrh _ => Str "Pldrh" + | Pldrsh _ => Str "Pldrsh" + | Pldrzw => Str "Pldrzw" + | Pldrsw => Str "Pldrsw" + | Pldrs => Str "Pldrs" + | Pldrd => Str "Pldrd" + | Pldrd_a => Str "Pldrd_a" + end. + +Definition string_of_ldi (ldi: ld_instruction) : ?? pstring:= + match ldi with + | PLd_rd_a ld rd a => + DO a' <~ string_of_addressing a;; + DO rd' <~ dreg_name rd;; + RET (Str "PLd_rd_a (" +; (string_of_ld_rd_a ld) +; " " +; rd' +; " " +; a' +; ")") + | Pldp _ _ _ _ _ a => + DO a' <~ string_of_addressing a;; + RET (Str "Pldp (" +; a' +; ")") + end. + +Definition string_of_load (op: load_op) : ?? pstring := + match op with + | Oload1 ld _ a => + DO a' <~ string_of_addressing a;; + (*DO ld' <~ string_of_ldi ld;;*) + RET((Str "Oload1 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ") + | Oload2 ld _ a => + DO a' <~ string_of_addressing a;; + (*DO ld' <~ string_of_ldi ld;;*) + RET((Str "Oload2 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ") + | OloadU _ _ _ => RET (Str "OloadU") + end. + +Definition string_of_st_rs_a (st: store_rs_a) : pstring := + match st with + | Pstrw => Str "Pstrw" + | Pstrw_a => Str "Pstrw_a" + | Pstrx => Str "Pstrx" + | Pstrx_a => Str "Pstrx_a" + | Pstrb => Str "Pstrb" + | Pstrh => Str "Pstrh" + | Pstrs => Str "Pstrs" + | Pstrd => Str "Pstrd" + | Pstrd_a => Str "Pstrd_a" + end. + +Definition string_of_sti (sti: st_instruction) : ?? pstring:= + match sti with + | PSt_rs_a st rs a => + DO a' <~ string_of_addressing a;; + DO rs' <~ dreg_name rs;; + RET (Str "PSt_rs_a (" +; (string_of_st_rs_a st) +; " " +; rs' +; " " +; a' +; ")") + | Pstp _ _ _ _ _ a => + DO a' <~ string_of_addressing a;; + RET (Str "Pstp (" +; a' +; ")") + end. + +Definition string_of_store (op: store_op) : ?? pstring := + match op with + | Ostore1 st _ a => + DO a' <~ string_of_addressing a;; + (*DO st' <~ string_of_sti st;;*) + RET((Str "Ostore1 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ") + | Ostore2 st _ a => + DO a' <~ string_of_addressing a;; + (*DO st' <~ string_of_sti st;;*) + RET((Str "Ostore2 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ") + | OstoreU _ _ _ => RET (Str "OstoreU") + end. + +Definition string_of_control (op: control_op) : pstring := + match op with + | Ob _ => "Ob" + | Obc _ _ => "Obc" + | Obl _ => "Obl" + | Obs _ => "Obs" + | Ocbnz _ _ => "Ocbnz" + | Ocbz _ _ => "Ocbz" + | Otbnz _ _ _ => "Otbnz" + | Otbz _ _ _ => "Otbz" + | Obtbl _ => "Obtbl" + | OIncremPC _ => "OIncremPC" + | OError => "OError" + end. + +Definition string_of_allocf (op: allocf_op) : pstring := + match op with + | OAllocf_SP _ _ => "OAllocf_SP" + | OAllocf_Mem _ _ => "OAllocf_Mem" + end. + +Definition string_of_freef (op: freef_op) : pstring := + match op with + | OFreef_SP _ _ => "OFreef_SP" + | OFreef_Mem _ _ => "OFreef_Mem" + end. + +Definition string_of_op (op: P.op): ?? pstring := + match op with + | Arith op => RET (string_of_arith op) + | Load op => string_of_load op + | Store op => string_of_store op + | Control op => RET (string_of_control op) + | Allocframe op => RET (string_of_allocf op) + | Freeframe op => RET (string_of_freef op) + | Loadsymbol _ => RET (Str "Loadsymbol") + | Constant _ => RET (Str "Constant") + | Cvtsw2x => RET (Str "Cvtsw2x") + | Cvtuw2x => RET (Str "Cvtuw2x") + | Cvtx2w => RET (Str "Cvtx2w") + (*| Fail => RET (Str "Fail")*) + end. +End SECT_BBLOCK_EQUIV. + +(** REWRITE RULES *) + +Definition is_constant (o: op): bool := + match o with + | OArithP _ | OArithRR0_XZR _ _ | Ofmovi_XZR _ | Loadsymbol _ | Constant _ | Obl _ | Obs _ => true + | _ => false + end. + +Lemma is_constant_correct ge o: is_constant o = true -> op_eval ge o [] <> None. +Proof. + destruct o; simpl in * |- *; try congruence. + destruct op0; simpl in * |- *; try congruence. + destruct co; simpl in * |- *; try congruence; + unfold control_eval; destruct ge; simpl in * |- *; try congruence. +Qed. + +Definition main_reduce (t: Terms.term):= RET (Terms.nofail is_constant t). + +Local Hint Resolve is_constant_correct: wlp. + +Lemma main_reduce_correct t: + WHEN main_reduce t ~> pt THEN Terms.match_pt t pt. +Proof. + wlp_simplify. +Qed. + +Definition reduce := {| Terms.result := main_reduce; Terms.result_correct := main_reduce_correct |}. + +Definition bblock_simu_test (verb: bool) (p1 p2: Asmblock.bblock) : ?? bool := + assert_same_builtin p1 p2;; + if verb then + IST.verb_bblock_simu_test reduce string_of_name string_of_op (trans_block p1) (trans_block p2) + else + IST.bblock_simu_test reduce (trans_block p1) (trans_block p2). + +Local Hint Resolve IST.bblock_simu_test_correct IST.verb_bblock_simu_test_correct: wlp. + +(** Main simulation (Impure) theorem *) +Theorem bblock_simu_test_correct verb p1 p2 : + WHEN bblock_simu_test verb p1 p2 ~> b THEN b=true -> forall ge fn lk, Asmblockprops.bblock_simu lk ge fn p1 p2. +Proof. + wlp_simplify; eapply bblock_simu_reduce with (Ge:={| _genv := ge; _fn := fn; _lk := lk |}); eauto; + intros; destruct H; auto. +Qed. + +Hint Resolve bblock_simu_test_correct: wlp. + +(** ** Coerce bblock_simu_test into a pure function (this is a little unsafe like all oracles in CompCert). *) + +Import UnsafeImpure. + +Definition pure_bblock_simu_test (verb: bool) (p1 p2: Asmblock.bblock): bool := + match unsafe_coerce (bblock_simu_test verb p1 p2) with + | Some b => b + | None => false + end. + +Theorem pure_bblock_simu_test_correct verb p1 p2 lk ge fn: pure_bblock_simu_test verb p1 p2 = true -> Asmblockprops.bblock_simu lk ge fn p1 p2. +Proof. + unfold pure_bblock_simu_test. + destruct (unsafe_coerce (bblock_simu_test verb p1 p2)) eqn: UNSAFE; try discriminate. + intros; subst. eapply bblock_simu_test_correct; eauto. + apply unsafe_coerce_not_really_correct; eauto. +Qed. + +Definition bblock_simub: Asmblock.bblock -> Asmblock.bblock -> bool := pure_bblock_simu_test true. + +Lemma bblock_simub_correct p1 p2 lk ge fn: bblock_simub p1 p2 = true -> Asmblockprops.bblock_simu lk ge fn p1 p2. +Proof. + eapply (pure_bblock_simu_test_correct true). +Qed. diff --git a/aarch64/Asmblockgen.v b/aarch64/Asmblockgen.v new file mode 100644 index 00000000..acb5a1e6 --- /dev/null +++ b/aarch64/Asmblockgen.v @@ -0,0 +1,1252 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Léo Gourdin UGA, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** * Translation from Machblock to AArch64 assembly block language (Asmblock) + Inspired from the Mach->Asm pass of original Leroy's backend, but adapted to the block structure like the KVX backend. *) + +Require Import Recdef Coqlib Zwf Zbits. +Require Import Errors AST Integers Floats Op. +Require Import Locations Machblock Asm Asmblock. + +Local Open Scope string_scope. +Local Open Scope list_scope. +Local Open Scope error_monad_scope. + +(** Extracting integer or float registers. *) + +Definition ireg_of (r: mreg) : res ireg := + match preg_of r with + | IR irs => match irs with + | RR1 mr => OK mr + | _ => Error(msg "Asmgenblock.ireg_of") + end + | _ => Error(msg "Asmgenblock.iregsp_of") + end. + +Definition freg_of (r: mreg) : res freg := + match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end. + +(** Recognition of immediate arguments for logical integer operations.*) + +(** Valid immediate arguments are repetitions of a bit pattern [B] + of length [e] = 2, 4, 8, 16, 32 or 64. + The bit pattern [B] must be of the form [0*1*0*] or [1*0*1*] + but must not be all zeros or all ones. *) + +(** The following automaton recognizes [0*1*0*|1*0*1*]. +<< + 0 1 0 + / \ / \ / \ + \ / \ / \ / + -0--> [B] --1--> [D] --0--> [F] + / + [A] + \ + -1--> [C] --0--> [E] --1--> [G] + / \ / \ / \ + \ / \ / \ / + 1 0 1 +>> +*) + +Module Automaton. + +Inductive state : Type := SA | SB | SC | SD | SE | SF | SG | Sbad. + +Definition start := SA. + +Definition next (s: state) (b: bool) := + match s, b with + | SA,false => SB | SA,true => SC + | SB,false => SB | SB,true => SD + | SC,false => SE | SC,true => SC + | SD,false => SF | SD,true => SD + | SE,false => SE | SE,true => SG + | SF,false => SF | SF,true => Sbad + | SG,false => Sbad | SG,true => SG + | Sbad,_ => Sbad + end. + +Definition accepting (s: state) := + match s with + | SA | SB | SC | SD | SE | SF | SG => true + | Sbad => false + end. + +Fixpoint run (len: nat) (s: state) (x: Z) : bool := + match len with + | Datatypes.O => accepting s + | Datatypes.S len => run len (next s (Z.odd x)) (Z.div2 x) + end. + +End Automaton. + +(** The following function determines the candidate length [e], + ensuring that [x] is a repetition [BB...B] + of a bit pattern [B] of length [e]. *) + +Definition logical_imm_length (x: Z) (sixtyfour: bool) : nat := + (** [test n] checks that the low [2n] bits of [x] are of the + form [BB], that is, two occurrences of the same [n] bits *) + let test (n: Z) : bool := + Z.eqb (Zzero_ext n x) (Zzero_ext n (Z.shiftr x n)) in + (** If [test n] fails, we know that the candidate length [e] is + at least [2n]. Hence we test with decreasing values of [n]: + 32, 16, 8, 4, 2. *) + if sixtyfour && negb (test 32) then 64%nat + else if negb (test 16) then 32%nat + else if negb (test 8) then 16%nat + else if negb (test 4) then 8%nat + else if negb (test 2) then 4%nat + else 2%nat. + +(** A valid logical immediate is +- neither [0] nor [-1]; +- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e] +- the low [e] bits of the number, that is, [B], match [0*1*0*] or [1*0*1*]. +*) + +Definition is_logical_imm32 (x: int) : bool := + negb (Int.eq x Int.zero) && negb (Int.eq x Int.mone) && + Automaton.run (logical_imm_length (Int.unsigned x) false) + Automaton.start (Int.unsigned x). + +Definition is_logical_imm64 (x: int64) : bool := + negb (Int64.eq x Int64.zero) && negb (Int64.eq x Int64.mone) && + Automaton.run (logical_imm_length (Int64.unsigned x) true) + Automaton.start (Int64.unsigned x). + +(** Arithmetic immediates are 12-bit unsigned numbers, possibly shifted left 12 bits *) + +Definition is_arith_imm32 (x: int) : bool := + Int.eq x (Int.zero_ext 12 x) + || Int.eq x (Int.shl (Int.zero_ext 12 (Int.shru x (Int.repr 12))) (Int.repr 12)). + +Definition is_arith_imm64 (x: int64) : bool := + Int64.eq x (Int64.zero_ext 12 x) + || Int64.eq x (Int64.shl (Int64.zero_ext 12 (Int64.shru x (Int64.repr 12))) (Int64.repr 12)). + +Definition bcode := list basic. + +Program Definition single_basic (bi: basic): bblock := + {| header := nil; body:= bi::nil; exit := None |}. + +(* insert [bi] at the head of [k] *) +Program Definition insert_basic (bi: basic) (k:bblocks): bblocks := + match k with + | bb::k' => + match bb.(header) with + | nil => {| header := nil; body := bi :: (body bb); exit := exit bb |}::k' + | _ => (single_basic bi)::k + end + | _ => (single_basic bi)::k + end. + +Notation "bi ::b k" := (insert_basic bi k) (at level 49, right associativity). + +(* NB: this notation helps the Coq typechecker to infer coercion [PArith] in [bcode] expressions *) +(** Alignment check for symbols *) +Notation "i ::bi k" := (cons (A:=basic) i k) (at level 49, right associativity). +Notation "a @@ b" := (app a b) (at level 49, right associativity). + +(* The pop_bc and push_bc functions are used to adapt the output of some definitions + in bblocks format and avoid some redefinitions. *) + +(* pop_bc takes the body of the first bblock in the list if it does not have a header. *) +Definition pop_bc (k: bblocks): bcode := + match k with + | bb :: k' => match bb.(header) with + | nil => (body bb) + | _ => nil + end + | _ => nil + end. + +(* push_bc tries to overwrite code in the first bblock if it does not have a header, + otherwise, a new bblock is created and appended to the list. *) +Program Definition push_bc (bc: bcode) (k:bblocks): bblocks := + match bc with + | bi :: bc' => match k with + | bb :: k' => match bb.(header) with + | nil => {| header := nil; body := bc; exit := exit bb |} :: k' + | _ => {| header := nil; body := bc; exit := None |} :: k + end + | _ => {| header := nil; body := bc; exit := None |} :: nil + end + | nil => k + end. +Next Obligation. + simpl; auto. +Qed. +Next Obligation. + simpl; auto. +Qed. +Next Obligation. + simpl; auto. +Qed. + +Parameter symbol_is_aligned : ident -> Z -> bool. +(** [symbol_is_aligned id sz] checks whether the symbol [id] is [sz] aligned *) + +(***************************************************************************************) + +(** Decompose integer literals into 16-bit fragments *) + +Fixpoint decompose_int (N: nat) (n p: Z) {struct N} : list (Z * Z) := + match N with + | Datatypes.O => nil + | Datatypes.S N => + let frag := Zzero_ext 16 (Z.shiftr n p) in + if Z.eqb frag 0 then + decompose_int N n (p + 16) + else + (frag, p) :: decompose_int N (Z.ldiff n (Z.shiftl 65535 p)) (p + 16) + end. + +Definition negate_decomposition (l: list (Z * Z)) := + List.map (fun np => (Z.lxor (fst np) 65535, snd np)) l. + +Definition loadimm_k (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: bcode) : bcode := + List.fold_right (fun np k => Pmovk sz (fst np) (snd np) rd rd ::bi k) k l. + +Definition loadimm_z (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: bcode) : bcode := + match l with + | nil => Pmovz sz 0 0 rd ::bi k + | (n1, p1) :: l => (Pmovz sz n1 p1 rd) ::bi loadimm_k sz rd l k + end. + +Definition loadimm_n (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: bcode) : bcode := + match l with + | nil => Pmovn sz 0 0 rd ::bi k + | (n1, p1) :: l => Pmovn sz n1 p1 rd ::bi loadimm_k sz rd (negate_decomposition l) k + end. + +Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: bcode) : bcode := + let N := match sz with W => 2%nat | X => 4%nat end in + let dz := decompose_int N n 0 in + let dn := decompose_int N (Z.lnot n) 0 in + if Nat.leb (List.length dz) (List.length dn) + then loadimm_z sz rd dz k + else loadimm_n sz rd dn k. + +Definition loadimm32 (rd: ireg) (n: int) (k: bcode) : bcode := + if is_logical_imm32 n + then Porrimm W (Int.unsigned n) rd XZR ::bi k + else loadimm W rd (Int.unsigned n) k. + +Definition loadimm64 (rd: ireg) (n: int64) (k: bcode) : bcode := + if is_logical_imm64 n + then Porrimm X (Int64.unsigned n) rd XZR ::bi k + else loadimm X rd (Int64.unsigned n) k. + +Definition offset_representable (sz: Z) (ofs: int64) : bool := + let isz := Int64.repr sz in + (** either unscaled 9-bit signed *) + Int64.eq ofs (Int64.sign_ext 9 ofs) || + (** or scaled 12-bit unsigned *) + (Int64.eq (Int64.modu ofs isz) Int64.zero + && Int64.ltu ofs (Int64.shl isz (Int64.repr 12))). + +Definition indexed_memory_access_bc (insn: addressing -> basic) + (sz: Z) (base: iregsp) (ofs: ptrofs) (k: bcode) : bcode := + let ofs := Ptrofs.to_int64 ofs in + if offset_representable sz ofs + then insn (ADimm base ofs) :: k + else loadimm64 X16 ofs (insn (ADreg base X16) :: k). + +Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: bcode) := + match ty, preg_of dst with + | Tint, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrw rd) 4 base ofs k) + | Tlong, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrx rd) 8 base ofs k) + | Tsingle, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrs rd) 4 base ofs k) + | Tfloat, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrd rd) 8 base ofs k) + | Tany32, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrw_a rd) 4 base ofs k) + | Tany64, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrx_a rd) 8 base ofs k) + | Tany64, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrd_a rd) 8 base ofs k) + | _, _ => Error (msg "Asmgen.loadind") + end. + +Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: bcode) := + match ty, preg_of src with + | Tint, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrw rd) 4 base ofs k) + | Tlong, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrx rd) 8 base ofs k) + | Tsingle, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrs rd) 4 base ofs k) + | Tfloat, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrd rd) 8 base ofs k) + | Tany32, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrw_a rd) 4 base ofs k) + | Tany64, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrx_a rd) 8 base ofs k) + | Tany64, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrd_a rd) 8 base ofs k) + | _, _ => Error (msg "Asmgen.storeind") + end. + +Definition loadptr_bc (base: iregsp) (ofs: ptrofs) (dst: ireg) (k: bcode): bcode := + indexed_memory_access_bc (PLd_rd_a Pldrx dst) 8 base ofs k. + +Definition storeptr_bc (src: ireg) (base: iregsp) (ofs: ptrofs) (k: bcode): bcode := + indexed_memory_access_bc (PSt_rs_a Pstrx src) 8 base ofs k. + +(** Function epilogue *) + +Definition make_epilogue (f: Machblock.function) : bcode := + loadptr_bc XSP f.(fn_retaddr_ofs) RA + (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs)::nil). + +(** Add immediate *) + +Definition addimm_aux (insn: Z -> arith_pp) + (rd r1: iregsp) (n: Z) (k: bcode) := + let nlo := Zzero_ext 12 n in + let nhi := n - nlo in + if Z.eqb nhi 0 then + insn nlo rd r1 ::bi k + else if Z.eqb nlo 0 then + insn nhi rd r1 ::bi k + else + insn nhi rd r1 ::bi insn nlo rd rd ::bi k. + +Definition addimm32 (rd r1: ireg) (n: int) (k: bcode) : bcode := + let m := Int.neg n in + if Int.eq n (Int.zero_ext 24 n) then + addimm_aux (Paddimm W) rd r1 (Int.unsigned n) k + else if Int.eq m (Int.zero_ext 24 m) then + addimm_aux (Psubimm W) rd r1 (Int.unsigned m) k + else if Int.lt n Int.zero then + loadimm32 X16 m (Psub W SOnone rd r1 X16 ::bi k) + else + loadimm32 X16 n (Padd W SOnone rd r1 X16 ::bi k). + +Definition addimm64 (rd r1: iregsp) (n: int64) (k: bcode) : bcode := + let m := Int64.neg n in + if Int64.eq n (Int64.zero_ext 24 n) then + addimm_aux (Paddimm X) rd r1 (Int64.unsigned n) k + else if Int64.eq m (Int64.zero_ext 24 m) then + addimm_aux (Psubimm X) rd r1 (Int64.unsigned m) k + else if Int64.lt n Int64.zero then + loadimm64 X16 m (Psubext (EOuxtx Int.zero) rd r1 X16 ::bi k) + else + loadimm64 X16 n (Paddext (EOuxtx Int.zero) rd r1 X16 ::bi k). + +(** Logical immediate *) + +Definition logicalimm32 + (insn1: Z -> arith_rr0) + (insn2: shift_op -> arith_rr0r) + (rd r1: ireg) (n: int) (k: bcode) : bcode := + if is_logical_imm32 n + then insn1 (Int.unsigned n) rd r1 ::bi k + else loadimm32 X16 n (insn2 SOnone rd r1 X16 ::bi k). + +Definition logicalimm64 + (insn1: Z -> arith_rr0) + (insn2: shift_op -> arith_rr0r) + (rd r1: ireg) (n: int64) (k: bcode) : bcode := + if is_logical_imm64 n + then insn1 (Int64.unsigned n) rd r1 ::bi k + else loadimm64 X16 n (insn2 SOnone rd r1 X16 ::bi k). + +(** Sign- or zero-extended arithmetic *) + +Definition transl_extension (ex: extension) (a: int) : extend_op := + match ex with Xsgn32 => EOsxtw a | Xuns32 => EOuxtw a end. + +Definition move_extended_base + (rd: ireg) (r1: ireg) (ex: extension) (k: bcode) : bcode := + match ex with + | Xsgn32 => Pcvtsw2x rd r1 ::bi k + | Xuns32 => Pcvtuw2x rd r1 ::bi k + end. + +Definition move_extended + (rd: ireg) (r1: ireg) (ex: extension) (a: int) (k: bcode) : bcode := + if Int.eq a Int.zero then + move_extended_base rd r1 ex k + else + move_extended_base rd r1 ex (Padd X (SOlsl a) rd XZR rd ::bi k). + +Definition arith_extended + (insnX: extend_op -> arith_ppp) + (insnS: shift_op -> arith_rr0r) + (rd r1 r2: ireg) (ex: extension) (a: int) (k: bcode) : bcode := + if Int.ltu a (Int.repr 5) then + insnX (transl_extension ex a) rd r1 r2 ::bi k + else + move_extended_base X16 r2 ex (insnS (SOlsl a) rd r1 X16 ::bi k). + +(** Extended right shift *) + +Definition shrx32 (rd r1: ireg) (n: int) (k: bcode) : bcode := + if Int.eq n Int.zero then + Pmov rd r1 ::bi k + else + Porr W (SOasr (Int.repr 31)) X16 XZR r1 ::bi + Padd W (SOlsr (Int.sub Int.iwordsize n)) X16 r1 X16 ::bi + Porr W (SOasr n) rd XZR X16 ::bi k. + +Definition shrx64 (rd r1: ireg) (n: int) (k: bcode) : bcode := + if Int.eq n Int.zero then + Pmov rd r1 ::bi k + else + Porr X (SOasr (Int.repr 63)) X16 XZR r1 ::bi + Padd X (SOlsr (Int.sub Int64.iwordsize' n)) X16 r1 X16 ::bi + Porr X (SOasr n) rd XZR X16 ::bi k. + +(** Load the address [id + ofs] in [rd] *) + +Definition loadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (k: bcode) : bcode := + if Archi.pic_code tt then + if Ptrofs.eq ofs Ptrofs.zero then + Ploadsymbol rd id ::bi k + else + Ploadsymbol rd id :: addimm64 rd rd (Ptrofs.to_int64 ofs) k + else + Padrp id ofs rd ::bi Paddadr id ofs rd rd ::bi k. + +(** Translate a shifted operand *) + +Definition transl_shift (s: Op.shift) (a: int): Asm.shift_op := + match s with + | Slsl => SOlsl a + | Slsr => SOlsr a + | Sasr => SOasr a + | Sror => SOror a + end. + +(** Translation of a condition. Prepends to [k] the instructions + that evaluate the condition and leave its boolean result in one of + the bits of the condition register. The bit in question is + determined by the [crbit_for_cond] function. *) + +Definition transl_cond + (cond: condition) (args: list mreg) (k: bcode) := + match cond, args with + | (Ccomp c | Ccompu c), a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pcmp W SOnone r1 r2 ::bi k) + | (Ccompshift c s a | Ccompushift c s a), a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pcmp W (transl_shift s a) r1 r2 ::bi k) + | (Ccompimm c n | Ccompuimm c n), a1 :: nil => + do r1 <- ireg_of a1; + OK (if is_arith_imm32 n then + Pcmpimm W (Int.unsigned n) r1 ::bi k + else if is_arith_imm32 (Int.neg n) then + Pcmnimm W (Int.unsigned (Int.neg n)) r1 ::bi k + else + loadimm32 X16 n (Pcmp W SOnone r1 X16 ::bi k)) + | (Cmaskzero n | Cmasknotzero n), a1 :: nil => + do r1 <- ireg_of a1; + OK (if is_logical_imm32 n then + Ptstimm W (Int.unsigned n) r1 ::bi k + else + loadimm32 X16 n (Ptst W SOnone r1 X16 ::bi k)) + | (Ccompl c | Ccomplu c), a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pcmp X SOnone r1 r2 ::bi k) + | (Ccomplshift c s a | Ccomplushift c s a), a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pcmp X (transl_shift s a) r1 r2 ::bi k) + | (Ccomplimm c n | Ccompluimm c n), a1 :: nil => + do r1 <- ireg_of a1; + OK (if is_arith_imm64 n then + Pcmpimm X (Int64.unsigned n) r1 ::bi k + else if is_arith_imm64 (Int64.neg n) then + Pcmnimm X (Int64.unsigned (Int64.neg n)) r1 ::bi k + else + loadimm64 X16 n (Pcmp X SOnone r1 X16 ::bi k)) + | (Cmasklzero n | Cmasklnotzero n), a1 :: nil => + do r1 <- ireg_of a1; + OK (if is_logical_imm64 n then + Ptstimm X (Int64.unsigned n) r1 ::bi k + else + loadimm64 X16 n (Ptst X SOnone r1 X16 ::bi k)) + | Ccompf cmp, a1 :: a2 :: nil => + do r1 <- freg_of a1; do r2 <- freg_of a2; + OK (Pfcmp D r1 r2 ::bi k) + | Cnotcompf cmp, a1 :: a2 :: nil => + do r1 <- freg_of a1; do r2 <- freg_of a2; + OK (Pfcmp D r1 r2 ::bi k) + | Ccompfzero cmp, a1 :: nil => + do r1 <- freg_of a1; + OK (Pfcmp0 D r1 ::bi k) + | Cnotcompfzero cmp, a1 :: nil => + do r1 <- freg_of a1; + OK (Pfcmp0 D r1 ::bi k) + | Ccompfs cmp, a1 :: a2 :: nil => + do r1 <- freg_of a1; do r2 <- freg_of a2; + OK (Pfcmp S r1 r2 ::bi k) + | Cnotcompfs cmp, a1 :: a2 :: nil => + do r1 <- freg_of a1; do r2 <- freg_of a2; + OK (Pfcmp S r1 r2 ::bi k) + | Ccompfszero cmp, a1 :: nil => + do r1 <- freg_of a1; + OK (Pfcmp0 S r1 ::bi k) + | Cnotcompfszero cmp, a1 :: nil => + do r1 <- freg_of a1; + OK (Pfcmp0 S r1 ::bi k) + | _, _ => + Error(msg "Asmgenblock.transl_cond") + end. + +Definition cond_for_signed_cmp (cmp: comparison) := + match cmp with + | Ceq => TCeq + | Cne => TCne + | Clt => TClt + | Cle => TCle + | Cgt => TCgt + | Cge => TCge + end. + +Definition cond_for_unsigned_cmp (cmp: comparison) := + match cmp with + | Ceq => TCeq + | Cne => TCne + | Clt => TClo + | Cle => TCls + | Cgt => TChi + | Cge => TChs + end. + +Definition cond_for_float_cmp (cmp: comparison) := + match cmp with + | Ceq => TCeq + | Cne => TCne + | Clt => TCmi + | Cle => TCls + | Cgt => TCgt + | Cge => TCge + end. + +Definition cond_for_float_not_cmp (cmp: comparison) := + match cmp with + | Ceq => TCne + | Cne => TCeq + | Clt => TCpl + | Cle => TChi + | Cgt => TCle + | Cge => TClt + end. + +Definition cond_for_cond (cond: condition) := + match cond with + | Ccomp cmp => cond_for_signed_cmp cmp + | Ccompu cmp => cond_for_unsigned_cmp cmp + | Ccompshift cmp s a => cond_for_signed_cmp cmp + | Ccompushift cmp s a => cond_for_unsigned_cmp cmp + | Ccompimm cmp n => cond_for_signed_cmp cmp + | Ccompuimm cmp n => cond_for_unsigned_cmp cmp + | Cmaskzero n => TCeq + | Cmasknotzero n => TCne + | Ccompl cmp => cond_for_signed_cmp cmp + | Ccomplu cmp => cond_for_unsigned_cmp cmp + | Ccomplshift cmp s a => cond_for_signed_cmp cmp + | Ccomplushift cmp s a => cond_for_unsigned_cmp cmp + | Ccomplimm cmp n => cond_for_signed_cmp cmp + | Ccompluimm cmp n => cond_for_unsigned_cmp cmp + | Cmasklzero n => TCeq + | Cmasklnotzero n => TCne + | Ccompf cmp => cond_for_float_cmp cmp + | Cnotcompf cmp => cond_for_float_not_cmp cmp + | Ccompfzero cmp => cond_for_float_cmp cmp + | Cnotcompfzero cmp => cond_for_float_not_cmp cmp + | Ccompfs cmp => cond_for_float_cmp cmp + | Cnotcompfs cmp => cond_for_float_not_cmp cmp + | Ccompfszero cmp => cond_for_float_cmp cmp + | Cnotcompfszero cmp => cond_for_float_not_cmp cmp + end. + +(** Translation of a conditional branch. Prepends to [k] the instructions + that evaluate the condition and ranch to [lbl] if it holds. + We recognize some conditional branches that can be implemented + without setting then testing condition flags. *) + +Definition transl_cond_branch_default + (c: condition) (args: list mreg) (lbl: label) (k: bcode) : res (bcode*cf_instruction) := + do ccode <- transl_cond c args k; + OK(ccode, Pbc (cond_for_cond c) lbl). + +Definition transl_cond_branch + (c: condition) (args: list mreg) (lbl: label) (k: bcode) : res (bcode*cf_instruction) := + match args, c with + | a1 :: nil, (Ccompimm Cne n | Ccompuimm Cne n) => + if Int.eq n Int.zero + then (do r1 <- ireg_of a1; OK (k, Pcbnz W r1 lbl)) + else transl_cond_branch_default c args lbl k + | a1 :: nil, (Ccompimm Ceq n | Ccompuimm Ceq n) => + if Int.eq n Int.zero + then (do r1 <- ireg_of a1; OK (k, Pcbz W r1 lbl)) + else transl_cond_branch_default c args lbl k + | a1 :: nil, (Ccomplimm Cne n | Ccompluimm Cne n) => + if Int64.eq n Int64.zero + then (do r1 <- ireg_of a1; OK (k, Pcbnz X r1 lbl)) + else transl_cond_branch_default c args lbl k + | a1 :: nil, (Ccomplimm Ceq n | Ccompluimm Ceq n) => + if Int64.eq n Int64.zero + then (do r1 <- ireg_of a1; OK (k, Pcbz X r1 lbl)) + else transl_cond_branch_default c args lbl k + | a1 :: nil, Cmaskzero n => + match Int.is_power2 n with + | Some bit => do r1 <- ireg_of a1; OK (k, Ptbz W r1 bit lbl) + | None => transl_cond_branch_default c args lbl k + end + | a1 :: nil, Cmasknotzero n => + match Int.is_power2 n with + | Some bit => do r1 <- ireg_of a1; OK (k, Ptbnz W r1 bit lbl) + | None => transl_cond_branch_default c args lbl k + end + | a1 :: nil, Cmasklzero n => + match Int64.is_power2' n with + | Some bit => do r1 <- ireg_of a1; OK (k, Ptbz X r1 bit lbl) + | None => transl_cond_branch_default c args lbl k + end + | a1 :: nil, Cmasklnotzero n => + match Int64.is_power2' n with + | Some bit => do r1 <- ireg_of a1; OK (k, Ptbnz X r1 bit lbl) + | None => transl_cond_branch_default c args lbl k + end + | _, _ => + transl_cond_branch_default c args lbl k + end. + +(** Translation of the arithmetic operation [r <- op(args)]. + The corresponding instructions are prepended to [k]. *) + +Definition transl_op + (op: operation) (args: list mreg) (res: mreg) (k: bcode) := + match op, args with + | Omove, a1 :: nil => + match preg_of res, preg_of a1 with + | IR r, IR a => OK (Pmov r a ::bi k) + | FR r, FR a => OK (Pfmov r a ::bi k) + | _ , _ => Error(msg "Asmgenblock.Omove") + end + | Ointconst n, nil => + do rd <- ireg_of res; + OK (loadimm32 rd n k) + | Olongconst n, nil => + do rd <- ireg_of res; + OK (loadimm64 rd n k) + | Ofloatconst f, nil => + do rd <- freg_of res; + OK (if Float.eq_dec f Float.zero + then Pfmovi D rd XZR ::bi k + else Pfmovimmd f rd ::bi k) + | Osingleconst f, nil => + do rd <- freg_of res; + OK (if Float32.eq_dec f Float32.zero + then Pfmovi S rd XZR ::bi k + else Pfmovimms f rd ::bi k) + | Oaddrsymbol id ofs, nil => + do rd <- ireg_of res; + OK (loadsymbol rd id ofs k) + | Oaddrstack ofs, nil => + do rd <- ireg_of res; + OK (addimm64 rd XSP (Ptrofs.to_int64 ofs) k) + (** 32-bit integer arithmetic *) + | Oshift s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Porr W (transl_shift s a) rd XZR r1 ::bi k) + | Oadd, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Padd W SOnone rd r1 r2 ::bi k) + | Oaddshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Padd W (transl_shift s a) rd r1 r2 ::bi k) + | Oaddimm n, a1 :: nil => + do rd <- ireg_of res; do rs <- ireg_of a1; + OK (addimm32 rd rs n k) + | Oneg, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psub W SOnone rd XZR r1 ::bi k) + | Onegshift s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psub W (transl_shift s a) rd XZR r1 ::bi k) + | Osub, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Psub W SOnone rd r1 r2 ::bi k) + | Osubshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Psub W (transl_shift s a) rd r1 r2 ::bi k) + | Omul, a1 :: a2 :: nil => + do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2; + OK (Pmadd W rd rs1 rs2 XZR ::bi k) + | Omuladd, a1 :: a2 :: a3 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3; + OK (Pmadd W rd r2 r3 r1 ::bi k) + | Omulsub, a1 :: a2 :: a3 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3; + OK (Pmsub W rd r2 r3 r1 ::bi k) + | Odiv, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Psdiv W rd r1 r2 ::bi k) + | Odivu, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pudiv W rd r1 r2 ::bi k) + | Oand, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pand W SOnone rd r1 r2 ::bi k) + | Oandshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pand W (transl_shift s a) rd r1 r2 ::bi k) + | Oandimm n, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (logicalimm32 (Pandimm W) (Pand W) rd r1 n k) + | Oor, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Porr W SOnone rd r1 r2 ::bi k) + | Oorshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Porr W (transl_shift s a) rd r1 r2 ::bi k) + | Oorimm n, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (logicalimm32 (Porrimm W) (Porr W) rd r1 n k) + | Oxor, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Peor W SOnone rd r1 r2 ::bi k) + | Oxorshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Peor W (transl_shift s a) rd r1 r2 ::bi k) + | Oxorimm n, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (logicalimm32 (Peorimm W) (Peor W) rd r1 n k) + | Onot, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Porn W SOnone rd XZR r1 ::bi k) + | Onotshift s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Porn W (transl_shift s a) rd XZR r1 ::bi k) + | Obic, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pbic W SOnone rd r1 r2 ::bi k) + | Obicshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pbic W (transl_shift s a) rd r1 r2 ::bi k) + | Oorn, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Porn W SOnone rd r1 r2 ::bi k) + | Oornshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Porn W (transl_shift s a) rd r1 r2 ::bi k) + | Oeqv, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Peon W SOnone rd r1 r2 ::bi k) + | Oeqvshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Peon W (transl_shift s a) rd r1 r2 ::bi k) + | Oshl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Plslv W rd r1 r2 ::bi k) + | Oshr, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pasrv W rd r1 r2 ::bi k) + | Oshru, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Plsrv W rd r1 r2 ::bi k) + | Oshrximm n, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (shrx32 rd r1 n k) + | Ozext s, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Pubfiz W Int.zero s rd r1 ::bi k) + | Osext s, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psbfiz W Int.zero s rd r1 ::bi k) + | Oshlzext s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Pubfiz W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k) + | Oshlsext s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psbfiz W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k) + | Ozextshr a s, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Pubfx W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k) + | Osextshr a s, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psbfx W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k) + (** 64-bit integer arithmetic *) + | Oshiftl s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Porr X (transl_shift s a) rd XZR r1 ::bi k) + | Oextend x a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (move_extended rd r1 x a k) + (* [Omakelong] and [Ohighlong] should not occur *) + | Olowlong, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + assertion (ireg_eq rd r1); + OK (Pcvtx2w rd ::bi k) + | Oaddl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Padd X SOnone rd r1 r2 ::bi k) + | Oaddlshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Padd X (transl_shift s a) rd r1 r2 ::bi k) + | Oaddlext x a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (arith_extended Paddext (Padd X) rd r1 r2 x a k) + | Oaddlimm n, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (addimm64 rd r1 n k) + | Onegl, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psub X SOnone rd XZR r1 ::bi k) + | Oneglshift s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psub X (transl_shift s a) rd XZR r1 ::bi k) + | Osubl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Psub X SOnone rd r1 r2 ::bi k) + | Osublshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Psub X (transl_shift s a) rd r1 r2 ::bi k) + | Osublext x a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (arith_extended Psubext (Psub X) rd r1 r2 x a k) + | Omull, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pmadd X rd r1 r2 XZR ::bi k) + | Omulladd, a1 :: a2 :: a3 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3; + OK (Pmadd X rd r2 r3 r1 ::bi k) + | Omullsub, a1 :: a2 :: a3 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3; + OK (Pmsub X rd r2 r3 r1 ::bi k) + | Omullhs, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Psmulh rd r1 r2 ::bi k) + | Omullhu, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pumulh rd r1 r2 ::bi k) + | Odivl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Psdiv X rd r1 r2 ::bi k) + | Odivlu, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pudiv X rd r1 r2 ::bi k) + | Oandl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pand X SOnone rd r1 r2 ::bi k) + | Oandlshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pand X (transl_shift s a) rd r1 r2 ::bi k) + | Oandlimm n, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (logicalimm64 (Pandimm X) (Pand X) rd r1 n k) + | Oorl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Porr X SOnone rd r1 r2 ::bi k) + | Oorlshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Porr X (transl_shift s a) rd r1 r2 ::bi k) + | Oorlimm n, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (logicalimm64 (Porrimm X) (Porr X) rd r1 n k) + | Oxorl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Peor X SOnone rd r1 r2 ::bi k) + | Oxorlshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Peor X (transl_shift s a) rd r1 r2 ::bi k) + | Oxorlimm n, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (logicalimm64 (Peorimm X) (Peor X) rd r1 n k) + | Onotl, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Porn X SOnone rd XZR r1 ::bi k) + | Onotlshift s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Porn X (transl_shift s a) rd XZR r1 ::bi k) + | Obicl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pbic X SOnone rd r1 r2 ::bi k) + | Obiclshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pbic X (transl_shift s a) rd r1 r2 ::bi k) + | Oornl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Porn X SOnone rd r1 r2 ::bi k) + | Oornlshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Porn X (transl_shift s a) rd r1 r2 ::bi k) + | Oeqvl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Peon X SOnone rd r1 r2 ::bi k) + | Oeqvlshift s a, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Peon X (transl_shift s a) rd r1 r2 ::bi k) + | Oshll, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Plslv X rd r1 r2 ::bi k) + | Oshrl, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Pasrv X rd r1 r2 ::bi k) + | Oshrlu, a1 :: a2 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (Plsrv X rd r1 r2 ::bi k) + | Oshrlximm n, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (shrx64 rd r1 n k) + | Ozextl s, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Pubfiz X Int.zero s rd r1 ::bi k) + | Osextl s, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psbfiz X Int.zero s rd r1 ::bi k) + | Oshllzext s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Pubfiz X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k) + | Oshllsext s a, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psbfiz X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k) + | Ozextshrl a s, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Pubfx X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k) + | Osextshrl a s, a1 :: nil => + do rd <- ireg_of res; do r1 <- ireg_of a1; + OK (Psbfx X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k) + (** 64-bit floating-point arithmetic *) + | Onegf, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfneg D rd rs ::bi k) + | Oabsf, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfabs D rd rs ::bi k) + | Oaddf, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfadd D rd rs1 rs2 ::bi k) + | Osubf, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfsub D rd rs1 rs2 ::bi k) + | Omulf, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfmul D rd rs1 rs2 ::bi k) + | Odivf, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfdiv D rd rs1 rs2 ::bi k) + (** 32-bit floating-point arithmetic *) + | Onegfs, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfneg S rd rs ::bi k) + | Oabsfs, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfabs S rd rs ::bi k) + | Oaddfs, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfadd S rd rs1 rs2 ::bi k) + | Osubfs, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfsub S rd rs1 rs2 ::bi k) + | Omulfs, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfmul S rd rs1 rs2 ::bi k) + | Odivfs, a1 :: a2 :: nil => + do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; + OK (Pfdiv S rd rs1 rs2 ::bi k) + | Osingleoffloat, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfcvtsd rd rs ::bi k) + | Ofloatofsingle, a1 :: nil => + do rd <- freg_of res; do rs <- freg_of a1; + OK (Pfcvtds rd rs ::bi k) + (** Conversions between int and float *) + | Ointoffloat, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfcvtzs W D rd rs ::bi k) + | Ointuoffloat, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfcvtzu W D rd rs ::bi k) + | Ofloatofint, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pscvtf D W rd rs ::bi k) + | Ofloatofintu, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pucvtf D W rd rs ::bi k) + | Ointofsingle, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfcvtzs W S rd rs ::bi k) + | Ointuofsingle, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfcvtzu W S rd rs ::bi k) + | Osingleofint, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pscvtf S W rd rs ::bi k) + | Osingleofintu, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pucvtf S W rd rs ::bi k) + | Olongoffloat, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfcvtzs X D rd rs ::bi k) + | Olonguoffloat, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfcvtzu X D rd rs ::bi k) + | Ofloatoflong, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pscvtf D X rd rs ::bi k) + | Ofloatoflongu, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pucvtf D X rd rs ::bi k) + | Olongofsingle, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfcvtzs X S rd rs ::bi k) + | Olonguofsingle, a1 :: nil => + do rd <- ireg_of res; do rs <- freg_of a1; + OK (Pfcvtzu X S rd rs ::bi k) + | Osingleoflong, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pscvtf S X rd rs ::bi k) + | Osingleoflongu, a1 :: nil => + do rd <- freg_of res; do rs <- ireg_of a1; + OK (Pucvtf S X rd rs ::bi k) + (** Boolean tests *) + | Ocmp c, _ => + do rd <- ireg_of res; + transl_cond c args (Pcset rd (cond_for_cond c) ::bi k) + (** Conditional move *) + | Osel cmp ty, a1 :: a2 :: args => + match preg_of res with + | IR r => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) ::bi k) + | FR r => + do r1 <- freg_of a1; do r2 <- freg_of a2; + transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) ::bi k) + | _ => + Error(msg "Asmgenblock.Osel") + end + | _, _ => Error(msg "Asmgenblock.transl_op") + end. + +(** Translation of addressing modes *) + +Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg) + (insn: Asm.addressing -> basic) (k: bcode) : res bcode := + match addr, args with + | Aindexed ofs, a1 :: nil => + do r1 <- ireg_of a1; + if offset_representable sz ofs then + OK (insn (ADimm r1 ofs) ::bi k) + else + OK (loadimm64 X16 ofs (insn (ADreg r1 X16) ::bi k)) + | Aindexed2, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + OK (insn (ADreg r1 r2) ::bi k) + | Aindexed2shift a, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + if Int.eq a Int.zero then + OK (insn (ADreg r1 r2) ::bi k) + else if Int.eq (Int.shl Int.one a) (Int.repr sz) then + OK (insn (ADlsl r1 r2 a) ::bi k) + else + OK (Padd X (SOlsl a) X16 r1 r2 ::bi insn (ADimm X16 Int64.zero) ::bi k) + | Aindexed2ext x a, a1 :: a2 :: nil => + do r1 <- ireg_of a1; do r2 <- ireg_of a2; + if Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz) then + OK (insn (match x with Xsgn32 => ADsxt r1 r2 a + | Xuns32 => ADuxt r1 r2 a end) ::bi k) + else + OK (arith_extended Paddext (Padd X) X16 r1 r2 x a + (insn (ADimm X16 Int64.zero) ::bi k)) + | Aglobal id ofs, nil => + assertion (negb (Archi.pic_code tt)); + if Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz + then OK (Padrp id ofs X16 ::bi insn (ADadr X16 id ofs) ::bi k) + else OK (loadsymbol X16 id ofs (insn (ADimm X16 Int64.zero) ::bi k)) + | Ainstack ofs, nil => + let ofs := Ptrofs.to_int64 ofs in + if offset_representable sz ofs then + OK (insn (ADimm XSP ofs) ::bi k) + else + OK (loadimm64 X16 ofs (insn (ADreg XSP X16) ::bi k)) + | _, _ => + Error(msg "Asmgen.transl_addressing") + end. + +(** Translation of loads and stores *) + +Definition transl_load (chunk: memory_chunk) (addr: Op.addressing) + (args: list mreg) (dst: mreg) (k: bcode) : res bcode := + match chunk with + | Mint8unsigned => + do rd <- ireg_of dst; transl_addressing 1 addr args (PLd_rd_a (Pldrb W) rd) k + | Mint8signed => + do rd <- ireg_of dst; transl_addressing 1 addr args (PLd_rd_a (Pldrsb W) rd) k + | Mint16unsigned => + do rd <- ireg_of dst; transl_addressing 2 addr args (PLd_rd_a (Pldrh W) rd) k + | Mint16signed => + do rd <- ireg_of dst; transl_addressing 2 addr args (PLd_rd_a (Pldrsh W) rd) k + | Mint32 => + do rd <- ireg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrw rd) k + | Mint64 => + do rd <- ireg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrx rd) k + | Mfloat32 => + do rd <- freg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrs rd) k + | Mfloat64 => + do rd <- freg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrd rd) k + | Many32 => + do rd <- ireg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrw_a rd) k + | Many64 => + do rd <- ireg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrx_a rd) k + end. + +Definition transl_store (chunk: memory_chunk) (addr: Op.addressing) + (args: list mreg) (src: mreg) (k: bcode) : res bcode := + match chunk with + | Mint8unsigned | Mint8signed => + do r1 <- ireg_of src; transl_addressing 1 addr args (PSt_rs_a Pstrb r1) k + | Mint16unsigned | Mint16signed => + do r1 <- ireg_of src; transl_addressing 2 addr args (PSt_rs_a Pstrh r1) k + | Mint32 => + do r1 <- ireg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrw r1) k + | Mint64 => + do r1 <- ireg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrx r1) k + | Mfloat32 => + do r1 <- freg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrs r1) k + | Mfloat64 => + do r1 <- freg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrd r1) k + | Many32 => + do r1 <- ireg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrw_a r1) k + | Many64 => + do r1 <- ireg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrx_a r1) k + end. + +(** Translation of a Machblock instruction. *) + +Definition transl_instr_basic (f: Machblock.function) (i: Machblock.basic_inst) + (ep: bool) (k: bcode) := + match i with + | MBgetstack ofs ty dst => + loadind XSP ofs ty dst k + | MBsetstack src ofs ty => + storeind src XSP ofs ty k + | MBgetparam ofs ty dst => + do c <- loadind X29 ofs ty dst k; + OK (if ep then c else loadptr_bc XSP f.(fn_link_ofs) X29 c) + | MBop op args res => + transl_op op args res k + | MBload t chunk addr args dst => + match t with + | TRAP => transl_load chunk addr args dst k + | NOTRAP => Error(msg "Asmgenblock.transl_instr_basic: NOTRAP load not supported in aarch64.") + end + | MBstore chunk addr args src => + transl_store chunk addr args src k + end. + +(** Translation of a code sequence *) + +Definition it1_is_parent (before: bool) (i: Machblock.basic_inst) : bool := + match i with + (*| MBgetstack ofs ty dst => before && negb (mreg_eq dst R29)*) + | MBsetstack src ofs ty => before + | MBgetparam ofs ty dst => negb (mreg_eq dst R29) + | MBop op args res => before && negb (mreg_eq res R29) + (*| MBload trapping_mode chunk addr args dst => before && negb (mreg_eq dst R29)*) + | _ => false + end. + +Fixpoint transl_basic_code (f: Machblock.function) (il: list Machblock.basic_inst) (it1p: bool) := + match il with + | nil => OK (nil) + | i1 :: il' => + do k1 <- transl_basic_code f il' (it1_is_parent it1p i1); + transl_instr_basic f i1 it1p k1 + end. + +Program Definition cons_bblocks (ll: list label) (bdy: list basic) (ex: option control): bblocks := + match ex with + | None => + match bdy with + | nil => {| header := ll; body:= Pnop::nil; exit := None |} :: nil + | _ => {| header := ll; body:= bdy; exit := None |} :: nil + end + | _ => + match bdy with + | nil => {| header := ll; body:= nil; exit := ex |} :: nil + | _ => {| header := ll; body:= bdy; exit := ex |} :: nil + end + end. +Next Obligation. + induction bdy. congruence. + simpl. auto. +Qed. +Next Obligation. + destruct ex. simpl. auto. + congruence. +Qed. +Next Obligation. + induction bdy. congruence. + simpl. auto. +Qed. + +Definition transl_control (f: Machblock.function) (ctl: control_flow_inst) : res (bcode*control) := + match ctl with + | MBcall sig (inl r) => do r1 <- ireg_of r; + OK (nil, PCtlFlow (Pblr r1 sig)) + | MBcall sig (inr symb) => OK (nil, PCtlFlow (Pbl symb sig)) + | MBtailcall sig (inr symb) => OK(make_epilogue f, PCtlFlow (Pbs symb sig)) + | MBtailcall sig (inl r) => do r1 <- ireg_of r; + OK (make_epilogue f, PCtlFlow (Pbr r1 sig)) + | MBbuiltin ef args res => OK (nil, Pbuiltin ef (List.map (map_builtin_arg dreg_of) args) (map_builtin_res dreg_of res)) + | MBgoto lbl => OK (nil, PCtlFlow (Pb lbl)) + | MBcond cond args lbl => do (bc, c) <- transl_cond_branch cond args lbl nil; OK (bc, PCtlFlow c) + | MBreturn => OK (make_epilogue f, PCtlFlow (Pret RA)) + | MBjumptable arg tbl => do r <- ireg_of arg; + OK (nil, PCtlFlow (Pbtbl r tbl)) + end. + +Definition transl_exit (f: Machblock.function) (ext: option control_flow_inst) : res (bcode*option control) := + match ext with + Some ctl => do (b,c) <- transl_control f ctl; OK (b, Some c) + | None => OK (nil, None) + end. + +Definition transl_block (f: Machblock.function) (fb: Machblock.bblock) (ep: bool) : res (list bblock) := + do (bdy2, ex) <- transl_exit f fb.(Machblock.exit); + do bdy1 <- transl_basic_code f fb.(Machblock.body) ep; + OK (cons_bblocks fb.(Machblock.header) (bdy1 @@ bdy2) ex) + . + +Fixpoint transl_blocks (f: Machblock.function) (lmb: list Machblock.bblock) (ep: bool) := + match lmb with + | nil => OK nil + | mb :: lmb => + do lb <- transl_block f mb (if Machblock.header mb then ep else false); + do lb' <- transl_blocks f lmb false; + OK (lb @@ lb') + end +. + +Program Definition make_prologue (f: Machblock.function) (k:bblocks) := + {| header := nil; body := Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::bi + ((PSt_rs_a Pstrx RA) (ADimm XSP (Ptrofs.to_int64 (f.(fn_retaddr_ofs))))) ::bi nil; + exit := None |} :: k. + +Definition transl_function (f: Machblock.function) : res Asmblock.function := + do lb <- transl_blocks f f.(Machblock.fn_code) true; + OK (mkfunction f.(Machblock.fn_sig) + (make_prologue f lb)). + +Definition transf_function (f: Machblock.function) : res Asmblock.function := + do tf <- transl_function f; + if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks)) + then Error (msg "code size exceeded") + else OK tf. + +Definition transf_fundef (f: Machblock.fundef) : res Asmblock.fundef := + transf_partial_fundef transf_function f. + +Definition transf_program (p: Machblock.program) : res Asmblock.program := + transform_partial_program transf_fundef p. + diff --git a/aarch64/Asmblockgenproof.v b/aarch64/Asmblockgenproof.v new file mode 100644 index 00000000..11219928 --- /dev/null +++ b/aarch64/Asmblockgenproof.v @@ -0,0 +1,1547 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib Errors. +Require Import Integers Floats AST Linking. +Require Import Values Memory Events Globalenvs Smallstep. +Require Import Op Locations Machblock Conventions Asmblock IterList. +Require Import Asmblockgen Asmblockgenproof0 Asmblockgenproof1 Asmblockprops. +Require Import Lia. + +Module MB := Machblock. +Module AB := Asmblock. + +Definition match_prog (p: MB.program) (tp: AB.program) := + match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp. + +Lemma transf_program_match: + forall p tp, transf_program p = OK tp -> match_prog p tp. +Proof. + intros. eapply match_transform_partial_program; eauto. +Qed. + +Section PRESERVATION. + +Variable lk: aarch64_linker. +Variable prog: Machblock.program. +Variable tprog: Asmblock.program. +Hypothesis TRANSF: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma symbols_preserved: + forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof (Genv.find_symbol_match TRANSF). + +Lemma senv_preserved: + Senv.equiv ge tge. +Proof (Genv.senv_match TRANSF). + +Lemma functions_translated: + forall b f, + Genv.find_funct_ptr ge b = Some f -> + exists tf, + Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf. +Proof (Genv.find_funct_ptr_transf_partial TRANSF). + +Lemma functions_transl: + forall fb f tf, + Genv.find_funct_ptr ge fb = Some (Internal f) -> + transf_function f = OK tf -> + Genv.find_funct_ptr tge fb = Some (Internal tf). +Proof. + intros. exploit functions_translated; eauto. intros [tf' [A B]]. + monadInv B. rewrite H0 in EQ; inv EQ; auto. +Qed. + +Lemma transf_function_no_overflow: + forall f tf, + transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned. +Proof. + intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. + lia. +Qed. + +Hypothesis symbol_high_low: forall (id: ident) (ofs: ptrofs), + Val.addl (symbol_high lk id ofs) (symbol_low lk id ofs) = Genv.symbol_address tge id ofs. + +(** * Proof of semantic preservation *) + +(** Semantic preservation is proved using a complex simulation diagram + of the following form. +<< + MB.step + ----------------------------------------> + header body exit + st1 -----> st2 -----> st3 ------------------> st4 + | | | | + | (A) | (B) | (C) | + match_codestate | | | | + | header | body1 | body2 | match_states + cs1 -----> cs2 -----> cs3 ------> cs4 | + | / \ exit | + match_asmstate | --------------- --->--- | + | / match_asmstate \ | + st'1 ---------------------------------------> st'2 + AB.step * +>> + The invariant between each MB.step/AB.step is the [match_states] predicate below. + However, we also need to introduce an intermediary state [Codestate] which allows + us to reason on a finer grain, executing header, body and exit separately. + + This [Codestate] consists in a state like [Asmblock.State], except that the + code is directly stored in the state, much like [Machblock.State]. It also features + additional useful elements to keep track of while executing a bblock. +*) + +Inductive match_states: Machblock.state -> Asm.state -> Prop := + | match_states_intro: + forall s fb sp c ep ms m m' rs f tf tc + (STACKS: match_stack ge s) + (FIND: Genv.find_funct_ptr ge fb = Some (Internal f)) + (MEXT: Mem.extends m m') + (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc) + (AG: agree ms sp rs) + (DXP: ep = true -> rs#X29 = parent_sp s), + match_states (Machblock.State s fb sp c ms m) + (Asm.State rs m') + | match_states_call: + forall s fb ms m m' rs + (STACKS: match_stack ge s) + (MEXT: Mem.extends m m') + (AG: agree ms (parent_sp s) rs) + (ATPC: rs PC = Vptr fb Ptrofs.zero) + (ATLR: rs RA = parent_ra s), + match_states (Machblock.Callstate s fb ms m) + (Asm.State rs m') + | match_states_return: + forall s ms m m' rs + (STACKS: match_stack ge s) + (MEXT: Mem.extends m m') + (AG: agree ms (parent_sp s) rs) + (ATPC: rs PC = parent_ra s), + match_states (Machblock.Returnstate s ms m) + (Asm.State rs m'). + +Section TRANSL_LABEL. (* Lemmas on translation of MB.is_label into AB.is_label *) + +Lemma cons_bblocks_label: + forall hd bdy ex tbb tc, + cons_bblocks hd bdy ex = tbb::tc -> + header tbb = hd. +Proof. + intros until tc. intros CONSB. unfold cons_bblocks in CONSB. + destruct ex; try destruct bdy; try destruct c; try destruct i. + all: inv CONSB; simpl; auto. +Qed. + +Lemma cons_bblocks_label2: + forall hd bdy ex tbb1 tbb2, + cons_bblocks hd bdy ex = tbb1::tbb2::nil -> + header tbb2 = nil. +Proof. + intros until tbb2. intros CONSB. unfold cons_bblocks in CONSB. + destruct ex; try destruct bdy; try destruct c; try destruct i. + all: inv CONSB; simpl; auto. +Qed. + +Remark in_dec_transl: + forall lbl hd, + (if in_dec lbl hd then true else false) = (if MB.in_dec lbl hd then true else false). +Proof. + intros. destruct (in_dec lbl hd), (MB.in_dec lbl hd). all: tauto. +Qed. + +Lemma transl_is_label: + forall lbl bb tbb f ep tc, + transl_block f bb ep = OK (tbb::tc) -> + is_label lbl tbb = MB.is_label lbl bb. +Proof. + intros until tc. intros TLB. + destruct tbb as [thd tbdy tex]; simpl in *. + monadInv TLB. + unfold is_label. simpl. + apply cons_bblocks_label in H0. simpl in H0. subst. + rewrite in_dec_transl. auto. +Qed. + +Lemma transl_is_label_false2: + forall lbl bb f ep tbb1 tbb2, + transl_block f bb ep = OK (tbb1::tbb2::nil) -> + is_label lbl tbb2 = false. +Proof. + intros until tbb2. intros TLB. + destruct tbb2 as [thd tbdy tex]; simpl in *. + monadInv TLB. apply cons_bblocks_label2 in H0. simpl in H0. subst. + apply is_label_correct_false. simpl. auto. +Qed. + +Lemma transl_block_nonil: + forall f c ep tc, + transl_block f c ep = OK tc -> + tc <> nil. +Proof. + intros. monadInv H. unfold cons_bblocks. + destruct x0; try destruct (x1 @@ x); try destruct c0; try destruct i. + all: discriminate. +Qed. + +Lemma transl_block_limit: forall f bb ep tbb1 tbb2 tbb3 tc, + ~transl_block f bb ep = OK (tbb1 :: tbb2 :: tbb3 :: tc). +Proof. + intros. intro. monadInv H. + unfold cons_bblocks in H0. + destruct x0; try destruct (x1 @@ x); try destruct c0; try destruct i. + all: discriminate. +Qed. + +Lemma find_label_transl_false: + forall x f lbl bb ep x', + transl_block f bb ep = OK x -> + MB.is_label lbl bb = false -> + find_label lbl (x++x') = find_label lbl x'. +Proof. + intros until x'. intros TLB MBis; simpl; auto. + destruct x as [|x0 x1]; simpl; auto. + destruct x1 as [|x1 x2]; simpl; auto. + - erewrite <- transl_is_label in MBis; eauto. rewrite MBis. auto. + - destruct x2 as [|x2 x3]; simpl; auto. + + erewrite <- transl_is_label in MBis; eauto. rewrite MBis. + erewrite transl_is_label_false2; eauto. + + apply transl_block_limit in TLB. destruct TLB. +Qed. + +Lemma transl_blocks_label: + forall lbl f c tc ep, + transl_blocks f c ep = OK tc -> + match MB.find_label lbl c with + | None => find_label lbl tc = None + | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_blocks f c' false = OK tc' + end. +Proof. + induction c; simpl; intros. + inv H. auto. + monadInv H. + destruct (MB.is_label lbl a) eqn:MBis. + - destruct x as [|tbb tc]. { apply transl_block_nonil in EQ. contradiction. } + simpl find_label. exploit transl_is_label; eauto. intros ABis. rewrite MBis in ABis. + rewrite ABis. + eexists. eexists. split; eauto. simpl transl_blocks. + assert (MB.header a <> nil). + { apply MB.is_label_correct_true in MBis. + destruct (MB.header a). contradiction. discriminate. } + destruct (MB.header a); try contradiction. + rewrite EQ. simpl. rewrite EQ1. simpl. auto. + - apply IHc in EQ1. destruct (MB.find_label lbl c). + + destruct EQ1 as (tc' & FIND & TLBS). exists tc'; eexists; auto. + erewrite find_label_transl_false; eauto. + + erewrite find_label_transl_false; eauto. +Qed. + +Lemma find_label_nil: + forall bb lbl c, + header bb = nil -> + find_label lbl (bb::c) = find_label lbl c. +Proof. + intros. destruct bb as [hd bdy ex]; simpl in *. subst. + assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false). + { erewrite <- is_label_correct_false. simpl. auto. } + rewrite H. auto. +Qed. + +Theorem transl_find_label: + forall lbl f tf, + transf_function f = OK tf -> + match MB.find_label lbl f.(MB.fn_code) with + | None => find_label lbl tf.(fn_blocks) = None + | Some c => exists tc, find_label lbl tf.(fn_blocks) = Some tc /\ transl_blocks f c false = OK tc + end. +Proof. + intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); inv EQ0. clear g. + monadInv EQ. unfold make_prologue. simpl fn_blocks. repeat (rewrite find_label_nil); simpl; auto. + eapply transl_blocks_label; eauto. +Qed. + +End TRANSL_LABEL. + +(** A valid branch in a piece of Machblock code translates to a valid ``go to'' + transition in the generated Asmblock code. *) + +Lemma find_label_goto_label: + forall f tf lbl rs m c' b ofs, + Genv.find_funct_ptr ge b = Some (Internal f) -> + transf_function f = OK tf -> + rs PC = Vptr b ofs -> + MB.find_label lbl f.(MB.fn_code) = Some c' -> + exists tc', exists rs', + goto_label tf lbl rs m = Next rs' m + /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc' + /\ forall r, r <> PC -> rs'#r = rs#r. +Proof. + intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2. + intros (tc & A & B). + exploit label_pos_code_tail; eauto. instantiate (1 := 0). + intros [pos' [P [Q R]]]. + exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))). + split. unfold goto_label. rewrite P. rewrite H1. auto. + split. rewrite Pregmap.gss. constructor; auto. + rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q. + auto. lia. + generalize (transf_function_no_overflow _ _ H0). lia. + intros. apply Pregmap.gso; auto. +Qed. + +(** Existence of return addresses *) + +Lemma return_address_exists: + forall b f c, is_tail (b :: c) f.(MB.fn_code) -> + exists ra, return_address_offset f c ra. +Proof. + intros. eapply Asmblockgenproof0.return_address_exists; eauto. + +- intros. monadInv H0. + destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. monadInv EQ. simpl. + exists x; exists true; split; auto. + repeat constructor. +- exact transf_function_no_overflow. +Qed. + +(* Useful for dealing with the many cases in some proofs *) +Ltac exploreInst := + repeat match goal with + | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var + | [ H : OK _ = OK _ |- _ ] => monadInv H + | [ |- context[if ?b then _ else _] ] => destruct b + | [ |- context[match ?m with | _ => _ end] ] => destruct m + | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m + | [ H : bind _ _ = OK _ |- _ ] => monadInv H + | [ H : Error _ = OK _ |- _ ] => inversion H + end. + +(** Some translation properties *) + +Lemma transl_blocks_distrib: + forall c f bb tbb tc ep, + transl_blocks f (bb::c) ep = OK (tbb::tc) + -> transl_block f bb (if MB.header bb then ep else false) = OK (tbb :: nil) + /\ transl_blocks f c false = OK tc. +Proof. + intros until ep. intros TLBS. + destruct bb as [hd bdy ex]. + monadInv TLBS. monadInv EQ. + unfold transl_block. + rewrite EQ0; simpl. + simpl in EQ; rewrite EQ; simpl. + unfold cons_bblocks in *. simpl in EQ0. + destruct ex. + - simpl in *. monadInv EQ0. + destruct (x3 @@ x1) eqn: CBDY; inv H0; inv EQ1; auto. + - simpl in *. inv EQ0. destruct (x3 @@ nil) eqn: CBDY; inv H0; inv EQ1; auto. +Qed. + +Lemma cons_bblocks_decomp: + forall thd tbdy tex tbb, + (tbdy <> nil \/ tex <> None) -> + cons_bblocks thd tbdy tex = tbb :: nil -> + header tbb = thd + /\ body tbb = tbdy + /\ exit tbb = tex. +Proof. + intros until tbb. intros Hnonil CONSB. unfold cons_bblocks in CONSB. + destruct (tex) eqn:ECTL. + - destruct tbdy; inv CONSB; simpl; auto. + - inversion Hnonil. + + destruct tbdy as [|bi tbdy]; [ contradiction H; auto | inv CONSB; auto]. + + contradict H; simpl; auto. +Qed. + +Lemma transl_blocks_nonil: + forall f bb c tc ep, + transl_blocks f (bb::c) ep = OK tc -> + exists tbb tc', tc = tbb :: tc'. +Proof. + intros until ep. intros TLBS. monadInv TLBS. monadInv EQ. unfold cons_bblocks. + destruct (x2); + destruct (x3 @@ x1); simpl; eauto. +Qed. + +Definition mb_remove_header bb := {| MB.header := nil; MB.body := MB.body bb; MB.exit := MB.exit bb |}. + +Definition mb_remove_body (bb: MB.bblock) := + {| MB.header := MB.header bb; MB.body := nil; MB.exit := MB.exit bb |}. + +Definition mbsize (bb: MB.bblock) := (length (MB.body bb) + length_opt (MB.exit bb))%nat. + +Lemma mbsize_eqz: + forall bb, mbsize bb = 0%nat -> MB.body bb = nil /\ MB.exit bb = None. +Proof. + intros. destruct bb as [hd bdy ex]; simpl in *. unfold mbsize in H. + remember (length _) as a. remember (length_opt _) as b. + assert (a = 0%nat) by lia. assert (b = 0%nat) by lia. subst. clear H. + inv H0. inv H1. destruct bdy; destruct ex; auto. + all: try discriminate. +Qed. + +Lemma mbsize_neqz: + forall bb, mbsize bb <> 0%nat -> (MB.body bb <> nil \/ MB.exit bb <> None). +Proof. + intros. destruct bb as [hd bdy ex]; simpl in *. + destruct bdy; destruct ex; try (right; discriminate); try (left; discriminate). + contradict H. unfold mbsize. simpl. auto. +Qed. + +Record codestate := + Codestate { pstate: state; (**r projection to Asmblock.state *) + pheader: list label; + pbody1: list basic; (**r list of basic instructions coming from the translation of the Machblock body *) + pbody2: list basic; (**r list of basic instructions coming from the translation of the Machblock exit *) + pctl: option control; (**r exit instruction, coming from the translation of the Machblock exit *) + ep: bool; (**r reflects the [ep] variable used in the translation *) + rem: list AB.bblock; (**r remaining bblocks to execute *) + cur: bblock (**r current bblock to execute - to keep track of its size when incrementing PC *) + }. + +(* The part that deals with Machblock <-> Codestate agreement + * Note about DXP: the property of [ep] only matters if the current block doesn't have a header, hence the condition *) +Inductive match_codestate fb: Machblock.state -> codestate -> Prop := + | match_codestate_intro: + forall s sp ms m rs0 m0 f tc ep c bb tbb tbc1 tbc2 ex + (STACKS: match_stack ge s) + (FIND: Genv.find_funct_ptr ge fb = Some (Internal f)) + (MEXT: Mem.extends m m0) + (TBC: transl_basic_code f (MB.body bb) (if MB.header bb then ep else false) = OK tbc1) + (TIC: transl_exit f (MB.exit bb) = OK (tbc2, ex)) + (TBLS: transl_blocks f c false = OK tc) + (AG: agree ms sp rs0) + (DXP: (if MB.header bb then ep else false) = true -> rs0#X29 = parent_sp s) + , + match_codestate fb (Machblock.State s fb sp (bb::c) ms m) + {| pstate := (Asm.State rs0 m0); + pheader := (MB.header bb); + pbody1 := tbc1; + pbody2 := tbc2; + pctl := ex; + ep := ep; + rem := tc; + cur := tbb + |} +. + +(* The part ensuring that the code in Codestate actually resides at [rs PC] *) +Inductive match_asmstate fb: codestate -> Asm.state -> Prop := + | match_asmstate_some: + forall rs f tf tc m tbb ofs ep tbdy1 tbdy2 tex lhd + (FIND: Genv.find_funct_ptr ge fb = Some (Internal f)) + (TRANSF: transf_function f = OK tf) + (PCeq: rs PC = Vptr fb ofs) + (TAIL: code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (tbb::tc)) + , + match_asmstate fb + {| pstate := (Asm.State rs m); + pheader := lhd; + pbody1 := tbdy1; + pbody2 := tbdy2; + pctl := tex; + ep := ep; + rem := tc; + cur := tbb |} + (Asm.State rs m) +. + +Lemma indexed_memory_access_nonil: forall f ofs r i k, + indexed_memory_access_bc f ofs r i k <> nil. +Proof. + intros. + unfold indexed_memory_access_bc, loadimm64, loadimm, loadimm_z, loadimm_n; + desif; try congruence. + all: destruct decompose_int; try destruct p; try congruence. +Qed. + +Lemma loadimm_nonil: forall sz x n k, + loadimm sz x n k <> nil. +Proof. + intros. + unfold loadimm. desif; + unfold loadimm_n, loadimm_z. + all: destruct decompose_int; try destruct p; try congruence. +Qed. + +Lemma loadimm32_nonil: forall sz x n, + loadimm32 sz x n <> nil. +Proof. + intros. + unfold loadimm32. desif; try congruence. + apply loadimm_nonil. +Qed. + +Lemma loadimm64_nonil: forall sz x n, + loadimm64 sz x n <> nil. +Proof. + intros. + unfold loadimm64. desif; try congruence. + apply loadimm_nonil. +Qed. + +Lemma loadsymbol_nonil: forall sz x n k, + loadsymbol sz x n k <> nil. +Proof. + intros. + unfold loadsymbol. desif; try congruence. +Qed. + +Lemma move_extended_nonil: forall x0 x1 x2 a k, + move_extended x1 x2 x0 a k <> nil. +Proof. + intros. unfold move_extended, move_extended_base. + desif; try congruence. +Qed. + +Lemma arith_extended_nonil: forall insnX insnS x0 x1 x2 x3 a k, + arith_extended insnX insnS x1 x2 x3 x0 a k <> nil. +Proof. + intros. unfold arith_extended, move_extended_base. + desif; try congruence. +Qed. + +Lemma transl_instr_basic_nonil: + forall k f bi ep x, + transl_instr_basic f bi ep k = OK x -> + x <> nil. +Proof. + intros until x. intros TIB. + destruct bi. + - simpl in TIB. unfold loadind in TIB; + exploreInst; try discriminate; apply indexed_memory_access_nonil. + - simpl in TIB. unfold storeind in TIB; + exploreInst; try discriminate; apply indexed_memory_access_nonil. + - simpl in TIB. monadInv TIB. unfold loadind in EQ. exploreInst; try discriminate; + unfold loadptr_bc; apply indexed_memory_access_nonil. + - simpl in TIB. unfold transl_op in TIB. exploreInst; try discriminate; + unfold addimm32, addimm64, shrx32, shrx64, + logicalimm32, logicalimm64, addimm_aux. + all: desif; try congruence; + try apply loadimm32_nonil; try apply loadimm64_nonil; try apply loadsymbol_nonil; + try apply move_extended_nonil; try apply arith_extended_nonil. + all: unfold transl_cond in *; exploreInst; try discriminate; + try apply loadimm32_nonil; try apply loadimm64_nonil. + - simpl in TIB. unfold transl_load in TIB. exploreInst; try discriminate; + unfold transl_addressing in *; exploreInst; try discriminate. + all: try apply loadimm64_nonil; try apply arith_extended_nonil; try apply loadsymbol_nonil. + - simpl in TIB. unfold transl_store in TIB. exploreInst; try discriminate; + unfold transl_addressing in *; exploreInst; try discriminate. + all: try apply loadimm64_nonil; try apply arith_extended_nonil; try apply loadsymbol_nonil. +Qed. + +Lemma transl_basic_code_nonil: + forall bdy f x ep, + bdy <> nil -> + transl_basic_code f bdy ep = OK x -> + x <> nil. +Proof. + induction bdy as [|bi bdy]. + intros. contradict H0; auto. + destruct bdy as [|bi2 bdy]. + - clear IHbdy. intros f x b _ TBC. simpl in TBC. eapply transl_instr_basic_nonil; eauto. + - intros f x b Hnonil TBC. remember (bi2 :: bdy) as bdy'. + monadInv TBC. + assert (x0 <> nil). + eapply IHbdy; eauto. subst bdy'. discriminate. + eapply transl_instr_basic_nonil; eauto. +Qed. + +Lemma transl_exit_nonil: + forall ex f bdy x, + ex <> None -> + transl_exit f ex = OK(bdy, x) -> + x <> None. +Proof. + intros ex f bdy x Hnonil TIC. + destruct ex as [ex|]. + - clear Hnonil. destruct ex. + all: try (simpl in TIC; try monadInv TIC; exploreInst; discriminate). + - contradict Hnonil; auto. +Qed. + +Theorem app_nonil: forall A (l1 l2: list A), + l1 <> nil -> + l1 @@ l2 <> nil. +Proof. + induction l2. + - intros; rewrite app_nil_r; auto. + - intros. unfold not. intros. symmetry in H0. + generalize (app_cons_not_nil); intros. unfold not in H1. + generalize (H0). apply H1. +Qed. + +Theorem match_state_codestate: + forall mbs abs s fb sp bb c ms m, + (MB.body bb <> nil \/ MB.exit bb <> None) -> + mbs = (Machblock.State s fb sp (bb::c) ms m) -> + match_states mbs abs -> + exists cs fb f tbb tc ep, + match_codestate fb mbs cs /\ match_asmstate fb cs abs + /\ Genv.find_funct_ptr ge fb = Some (Internal f) + /\ transl_blocks f (bb::c) ep = OK (tbb::tc) + /\ body tbb = pbody1 cs ++ pbody2 cs + /\ exit tbb = pctl cs + /\ cur cs = tbb /\ rem cs = tc + /\ pstate cs = abs. +Proof. + intros until m. intros Hnotempty Hmbs MS. subst. inv MS. + inv AT. clear H0. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. + exploit transl_blocks_distrib; eauto. intros (TLB & TLBS). clear H2. + monadInv TLB. exploit cons_bblocks_decomp; eauto. + { inversion Hnotempty. + - destruct (MB.body bb) as [|bi bdy]; try (contradict H0; simpl; auto; fail). + left. apply app_nonil. eapply transl_basic_code_nonil; eauto. + - destruct (MB.exit bb) as [ei|]; try (contradict H0; simpl; auto; fail). + right. eapply transl_exit_nonil; eauto. } + intros (Hth & Htbdy & Htexit). + exists {| pstate := (State rs m'); pheader := (Machblock.header bb); pbody1 := x1; pbody2 := x; + pctl := x0; ep := ep0; rem := tc'; cur := tbb |}, fb, f, tbb, tc', ep0. + repeat split. 1-2: econstructor; eauto. + { destruct (MB.header bb). eauto. discriminate. } eauto. + unfold transl_blocks. fold transl_blocks. unfold transl_block. rewrite EQ. simpl. rewrite EQ1; simpl. + rewrite TLBS. simpl. rewrite H2. + all: simpl; auto. +Qed. + +Lemma exec_straight_body: + forall c c' rs1 m1 rs2 m2, + exec_straight tge lk c rs1 m1 c' rs2 m2 -> + exists l, + c = l ++ c' + /\ exec_body lk tge l rs1 m1 = Next rs2 m2. +Proof. + induction c; try (intros; inv H; fail). + intros until m2. intros EXES. inv EXES. + - exists (a :: nil). repeat (split; simpl; auto). rewrite H6. auto. + - eapply IHc in H7; eauto. destruct H7 as (l' & Hc & EXECB). subst. + exists (a :: l'). repeat (split; simpl; auto). + rewrite H1. auto. +Qed. + +Lemma exec_straight_body2: + forall c rs1 m1 c' rs2 m2, + exec_straight tge lk c rs1 m1 c' rs2 m2 -> + exists body, + exec_body lk tge body rs1 m1 = Next rs2 m2 + /\ body ++ c' = c. +Proof. + intros until m2. induction 1. + - exists (i1::nil). split; auto. simpl. rewrite H. auto. + - destruct IHexec_straight as (bdy & EXEB & BTC). + exists (i:: bdy). split; simpl. + + rewrite H. auto. + + congruence. +Qed. + +Lemma exec_straight_opt_body2: + forall c rs1 m1 c' rs2 m2, + exec_straight_opt tge lk c rs1 m1 c' rs2 m2 -> + exists body, + exec_body lk tge body rs1 m1 = Next rs2 m2 + /\ body ++ c' = c. +Proof. + intros until m2. intros EXES. + inv EXES. + - exists nil. split; auto. + - eapply exec_straight_body2. auto. +Qed. + +Lemma PC_not_data_preg: forall r , + data_preg r = true -> + r <> PC. +Proof. + intros. destruct (PregEq.eq r PC); [ rewrite e in H; simpl in H; discriminate | auto ]. +Qed. + +Lemma X30_not_data_preg: forall r , + data_preg r = true -> + r <> X30. +Proof. + intros. destruct (PregEq.eq r X30); [ rewrite e in H; simpl in H; discriminate | auto ]. +Qed. + +Lemma X16_not_data_preg: forall r , + data_preg r = true -> + r <> X16. +Proof. + intros. destruct (PregEq.eq r X16); [ rewrite e in H; simpl in H; discriminate | auto ]. +Qed. + +Lemma undef_regs_other_2': + forall r rl rs, + data_preg r = true -> + preg_notin r rl -> + undef_regs (DR (IR X16) :: DR (IR X30) :: map preg_of rl) rs r = rs r. +Proof. + intros. apply undef_regs_other. intros. simpl in H1. + destruct H1 as [HX16 | [HX30 | HDES]]; subst. + apply X16_not_data_preg; auto. apply X30_not_data_preg; auto. + exploit list_in_map_inv; eauto. intros [mr [A B]]. subst. + rewrite preg_notin_charact in H0. auto. +Qed. + +Ltac Simpl := + rewrite Pregmap.gso; try apply PC_not_data_preg; try apply X30_not_data_preg. + +(* See (C) in the diagram. The proofs are mostly adapted from the previous Mach->Asm proofs, but are + unfortunately quite cumbersome. To reproduce them, it's best to have a Coq IDE with you and see by + yourself the steps *) +Theorem step_simu_control: + forall bb' fb fn s sp c ms' m' rs2 m2 t S'' rs1 m1 tbb tbdy2 tex cs2, + MB.body bb' = nil -> + Genv.find_funct_ptr tge fb = Some (Internal fn) -> + pstate cs2 = (State rs2 m2) -> + pbody1 cs2 = nil -> pbody2 cs2 = tbdy2 -> pctl cs2 = tex -> + cur cs2 = tbb -> + match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2 -> + match_asmstate fb cs2 (State rs1 m1) -> + exit_step return_address_offset ge (MB.exit bb') (MB.State s fb sp (bb'::c) ms' m') t S'' -> + (exists rs3 m3 rs4 m4, + exec_body lk tge tbdy2 rs2 m2 = Next rs3 m3 + /\ exec_exit tge fn (Ptrofs.repr (size tbb)) rs3 m3 tex t rs4 m4 + /\ match_states S'' (State rs4 m4)). +Proof. + intros until cs2. intros Hbody FIND Hpstate Hpbody1 Hpbody2 Hpctl Hcur MCS MAS ESTEP. + inv ESTEP. + - inv MCS. inv MAS. simpl in *. + inv Hpstate. + destruct ctl. + + (* MBcall *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + destruct s1 as [rf|fid]; simpl in H1. + * (* Indirect call *) + monadInv H1. monadInv EQ. + assert (ms' rf = Vptr f' Ptrofs.zero). + { unfold find_function_ptr in H12. destruct (ms' rf); try discriminate. + revert H12; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. } + assert (rs2 x = Vptr f' Ptrofs.zero). + { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. } + generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. + remember (Ptrofs.add _ _) as ofs'. + assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc). + { econstructor; eauto. } + assert (f1 = f) by congruence. subst f1. + exploit return_address_offset_correct; eauto. intros; subst ra. + + repeat eexists. + econstructor; eauto. econstructor. + econstructor; eauto. econstructor; eauto. + eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. + unfold incrPC; repeat Simpl; auto. + simpl. unfold incrPC; rewrite Pregmap.gso; auto; try discriminate. + rewrite !Pregmap.gss. rewrite PCeq. rewrite Heqofs'. simpl. auto. + + * (* Direct call *) + monadInv H1. + generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. + remember (Ptrofs.add _ _) as ofs'. + assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc). + econstructor; eauto. + assert (f1 = f) by congruence. subst f1. + exploit return_address_offset_correct; eauto. intros; subst ra. + repeat eexists. + econstructor; eauto. econstructor. + econstructor; eauto. econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros. + unfold incrPC; repeat Simpl; auto. unfold Genv.symbol_address. rewrite symbols_preserved. simpl in H12. rewrite H12. auto. + unfold incrPC; simpl; rewrite Pregmap.gso; try discriminate. rewrite !Pregmap.gss. + subst. unfold Val.offset_ptr. rewrite PCeq. auto. + + (* MBtailcall *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + exploit Mem.loadv_extends. eauto. eexact H13. auto. simpl. intros [parent' [A B]]. + destruct s1 as [rf|fid]; simpl in H11. + * monadInv H1. monadInv EQ. + assert (ms' rf = Vptr f' Ptrofs.zero). + { destruct (ms' rf); try discriminate. revert H11. predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. } + assert (rs2 x = Vptr f' Ptrofs.zero). + { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. } + + assert (f = f1) by congruence. subst f1. clear FIND1. clear H12. + exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). + exploit exec_straight_body; eauto. + intros (l & MKEPI & EXEB). + repeat eexists. rewrite app_nil_r in MKEPI. + rewrite <- MKEPI in EXEB. + eauto. econstructor. simpl. unfold incrPC. + rewrite !Pregmap.gso; try discriminate. eauto. + econstructor; eauto. + { apply agree_set_other. + - econstructor; auto with asmgen. + + apply V. + + intro r. destruct r; apply V; auto. + - eauto with asmgen. } + rewrite Pregmap.gss. rewrite Z; auto; try discriminate. + eapply ireg_of_not_X30''; eauto. + eapply ireg_of_not_X16''; eauto. + * monadInv H1. assert (f = f1) by congruence. subst f1. clear FIND1. clear H12. + exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). + exploit exec_straight_body; eauto. + intros (l & MKEPI & EXEB). + repeat eexists. inv EQ. rewrite app_nil_r in MKEPI. + rewrite <- MKEPI in EXEB. + eauto. inv EQ. econstructor. simpl. unfold incrPC. + eauto. + econstructor; eauto. + { apply agree_set_other. + - econstructor; auto with asmgen. + + apply V. + + intro r. destruct r; apply V; auto. + - eauto with asmgen. } + { rewrite Pregmap.gss. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H11. auto. } + + (* MBbuiltin *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + + assert (f0 = f) by congruence. subst f0. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. + remember (Ptrofs.add _ _) as ofs'. + assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc). + econstructor; eauto. + + monadInv TBC. monadInv TIC. inv H0. + + exploit builtin_args_match; eauto. intros [vargs' [P Q]]. + exploit external_call_mem_extends; eauto. + intros [vres' [m2' [A [B [C D]]]]]. + + repeat eexists. econstructor. erewrite <- sp_val by eauto. + eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. eauto. + econstructor; eauto. + unfold incrPC. rewrite Pregmap.gss. + rewrite set_res_other. rewrite undef_regs_other. unfold Val.offset_ptr. rewrite PCeq. + eauto. + intros; simpl in *; destruct H as [HX16 | [HX30 | HDES]]; subst; try discriminate; + exploit list_in_map_inv; eauto; intros [mr [E F]]; subst; discriminate. + auto with asmgen. apply agree_nextblock. eapply agree_set_res; auto. + eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2'; auto. + intros. discriminate. + + (* MBgoto *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. assert (f1 = f) by congruence. subst f1. clear H9. + remember (incrPC (Ptrofs.repr (size tbb)) rs2) as rs2'. + exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND'. + assert (tf = fn) by congruence. subst tf. + exploit find_label_goto_label. + eauto. eauto. + instantiate (2 := rs2'). + { subst. unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq. eauto. } + eauto. + intros (tc' & rs' & GOTO & AT2 & INV). + + eexists. eexists. repeat eexists. repeat split. + econstructor; eauto. + rewrite Heqrs2' in INV. unfold incrPC in INV. + rewrite Heqrs2' in GOTO; simpl; eauto. + econstructor; eauto. + eapply agree_exten; eauto with asmgen. + assert (forall r : preg, r <> PC -> rs' r = rs2 r). + { intros. rewrite Heqrs2' in INV. + rewrite INV; unfold incrPC; try rewrite Pregmap.gso; auto. } + eauto with asmgen. + congruence. + + (* MBcond *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. monadInv H1. monadInv EQ. + + * (* MBcond true *) + assert (f0 = f) by congruence. subst f0. + exploit eval_condition_lessdef. + eapply preg_vals; eauto. + all: eauto. + intros EC. + exploit transl_cbranch_correct_1; eauto. intros (rs', H). + destruct H as [ES [ECFI]]. + exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC). + assert (PCeq': rs2 PC = rs' PC). { inv ES; auto. erewrite <- exec_straight_pc. 2: eapply H0. eauto. } + rewrite PCeq' in PCeq. + assert (f1 = f) by congruence. subst f1. + exploit find_label_goto_label. + 4: eapply H14. 1-2: eauto. instantiate (2 := (incrPC (Ptrofs.repr (size tbb)) rs')). + unfold incrPC, Val.offset_ptr. rewrite PCeq. rewrite Pregmap.gss. eauto. + intros (tc' & rs3 & GOTOL & TLPC & Hrs3). + exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'. + assert (tf = fn) by congruence. subst tf. + + repeat eexists. + rewrite <- BTC. simpl. rewrite app_nil_r. eauto. + rewrite <- BTC. simpl. econstructor. rewrite ECFI. eauto. + + econstructor; eauto. + eapply agree_exten with rs2; eauto with asmgen. + { intros. rewrite Hrs3; unfold incrPC. Simpl. rewrite H. all: auto. apply PC_not_data_preg; auto. } + intros. discriminate. + * (* MBcond false *) + assert (f0 = f) by congruence. subst f0. monadInv H1. monadInv EQ. + exploit eval_condition_lessdef. + eapply preg_vals; eauto. + all: eauto. + intros EC. + + exploit transl_cbranch_correct_1; eauto. intros (rs', H). + destruct H as [ES [ECFI]]. + exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC). + assert (PCeq': rs2 PC = rs' PC). { inv ES; auto. erewrite <- exec_straight_pc. 2: eapply H0. eauto. } + rewrite PCeq' in PCeq. + exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'. + assert (tf = fn) by congruence. subst tf. + + assert (NOOV: size_blocks fn.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. + + repeat eexists. + rewrite <- BTC. simpl. rewrite app_nil_r. eauto. + rewrite <- BTC. simpl. econstructor. rewrite ECFI. eauto. + + econstructor; eauto. + unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq. econstructor; eauto. + eapply agree_exten with rs2; eauto with asmgen. + { intros. unfold incrPC. Simpl. rewrite H. all: auto. } + intros. discriminate. + + (* MBjumptable *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. + monadInv H1. monadInv EQ. + generalize (transf_function_no_overflow _ _ TRANSF0); intro NOOV. + assert (f1 = f) by congruence. subst f1. + exploit find_label_goto_label. 4: eapply H14. 1-2: eauto. instantiate (2 := (incrPC (Ptrofs.repr (size tbb)) rs2) # X16 <- Vundef). + unfold incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. reflexivity. discriminate. + exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND3. assert (fn = tf) by congruence. subst fn. + + intros [tc' [rs' [A [B C]]]]. + exploit ireg_val; eauto. rewrite H11. intros LD; inv LD. + + repeat eexists. econstructor. simpl. Simpl. 2: { eapply ireg_of_not_X16''; eauto. } + unfold incrPC. rewrite Pregmap.gso; try discriminate. rewrite <- H1. + simpl. unfold Mach.label in H12. unfold label. rewrite H12. eapply A. + econstructor; eauto. + eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen. + { unfold incrPC. repeat Simpl; auto. apply X16_not_data_preg; auto. } + discriminate. + + (* MBreturn *) + destruct bb' as [mhd' mbdy' mex']; simpl in *. subst. + inv TBC. inv TIC. inv H0. + + assert (f0 = f) by congruence. subst f0. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). + exploit exec_straight_body; eauto. + simpl. eauto. + intros EXEB. destruct EXEB as [l [MKEPI EXEB]]. + assert (f1 = f) by congruence. subst f1. + + repeat eexists. + rewrite app_nil_r in MKEPI. rewrite <- MKEPI in EXEB. eauto. + econstructor. simpl. reflexivity. + econstructor; eauto. + unfold incrPC. repeat apply agree_set_other; auto with asmgen. + + - inv MCS. inv MAS. simpl in *. subst. inv Hpstate. + destruct bb' as [hd' bdy' ex']; simpl in *. subst. + monadInv TBC. monadInv TIC. simpl in *. + simpl. repeat eexists. + econstructor. econstructor. 4: instantiate (3 := false). all:eauto. + unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + eapply transf_function_no_overflow; eauto. + assert (f = f0) by congruence. subst f0. econstructor; eauto. + generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. eauto. + eapply agree_exten; eauto. intros. unfold incrPC; Simpl; auto. + discriminate. +Qed. + +(* Handling the individual instructions of theorem (B) in the above diagram. A bit less cumbersome, but still tough *) +Theorem step_simu_basic: + forall bb bb' s fb sp c ms m rs1 m1 ms' m' bi cs1 tbdy bdy, + MB.header bb = nil -> MB.body bb = bi::(bdy) -> + bb' = {| MB.header := nil; MB.body := bdy; MB.exit := MB.exit bb |} -> + basic_step ge s fb sp ms m bi ms' m' -> + pstate cs1 = (State rs1 m1) -> pbody1 cs1 = tbdy -> + match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 -> + (exists rs2 m2 l cs2 tbdy', + cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := tbdy'; pbody2 := pbody2 cs1; + pctl := pctl cs1; ep := it1_is_parent (ep cs1) bi; rem := rem cs1; cur := cur cs1 |} + /\ tbdy = l ++ tbdy' + /\ exec_body lk tge l rs1 m1 = Next rs2 m2 + /\ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2). +Proof. + intros until bdy. intros Hheader Hbody (* Hnotempty *) Hbb' BSTEP Hpstate Hpbody1 MCS. inv MCS. + simpl in *. inv Hpstate. + rewrite Hbody in TBC. monadInv TBC. + inv BSTEP. + + - (* MBgetstack *) + simpl in EQ0. + unfold Mach.load_stack in H. + exploit Mem.loadv_extends; eauto. intros [v' [A B]]. + rewrite (sp_val _ _ _ AG) in A. + exploit loadind_correct; eauto with asmgen. + intros (rs2 & EXECS & Hrs'1 & Hrs'2). + eapply exec_straight_body in EXECS. + destruct EXECS as (l & Hlbi & EXECB). + exists rs2, m1, l. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. } + subst. simpl in Hheadereq. + + eapply match_codestate_intro; eauto. + eapply agree_set_mreg; eauto with asmgen. + intro Hep. simpl in Hep. discriminate. + - (* MBsetstack *) + simpl in EQ0. + unfold Mach.store_stack in H. + assert (Val.lessdef (ms src) (rs1 (preg_of src))). { eapply preg_val; eauto. } + exploit Mem.storev_extends; eauto. intros [m2' [A B]]. + exploit storeind_correct; eauto with asmgen. + rewrite (sp_val _ _ _ AG) in A. eauto. intros [rs' [P Q]]. + + eapply exec_straight_body in P. + destruct P as (l & ll & EXECB). + exists rs', m2', l. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + subst. + eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto. + eapply agree_undef_regs; eauto with asmgen. + simpl; intros. rewrite Q; auto with asmgen. rewrite Hheader in DXP. auto. + - (* MBgetparam *) + simpl in EQ0. + + assert (f0 = f) by congruence; subst f0. + unfold Mach.load_stack in *. + exploit Mem.loadv_extends. eauto. eexact H0. auto. + intros [parent' [A B]]. rewrite (sp_val _ _ _ AG) in A. + exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'. + exploit Mem.loadv_extends. eauto. eexact H1. auto. + intros [v' [C D]]. + + monadInv EQ0. rewrite Hheader. rewrite Hheader in DXP. + destruct ep0 eqn:EPeq. + + (* X29 contains parent *) + + exploit loadind_correct. eexact EQ1. + instantiate (2 := rs1). rewrite DXP; eauto. discriminate. + intros [rs2 [P [Q R]]]. + + eapply exec_straight_body in P. + destruct P as (l & ll & EXECB). + exists rs2, m1, l. eexists. + eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. } + subst. + eapply match_codestate_intro; eauto. + + eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen. + simpl; intros. rewrite R; auto with asmgen. unfold preg_of. + apply preg_of_not_X29; auto. + + (* X29 does not contain parent *) + + rewrite chunk_of_Tptr in A. + exploit loadptr_correct. eexact A. discriminate. intros [rs2 [P [Q R]]]. + exploit loadind_correct. eexact EQ1. instantiate (2 := rs2). rewrite Q. eauto. + discriminate. + intros [rs3 [S [T U]]]. + + exploit exec_straight_trans. + eapply P. + eapply S. + intros EXES. + + eapply exec_straight_body in EXES. + destruct EXES as (l & ll & EXECB). + exists rs3, m1, l. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. } + subst. + eapply match_codestate_intro; eauto. + eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto. + instantiate (1 := rs2#X29 <- (rs3#X29)). intros. + rewrite Pregmap.gso; auto with asmgen. + congruence. + intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen. + simpl; intros. rewrite U; auto with asmgen. + apply preg_of_not_X29; auto. + - (* MBop *) + simpl in EQ0. rewrite Hheader in DXP. + + assert (eval_operation tge sp op (map ms args) m' = Some v). + rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. + exploit eval_operation_lessdef. + eapply preg_vals; eauto. + 2: eexact H0. + all: eauto. + intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. + exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. + + eapply exec_straight_body in P. + destruct P as (l & ll & EXECB). + exists rs2, m1, l. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + subst. + eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto. + apply agree_set_undef_mreg with rs1; auto. + apply Val.lessdef_trans with v'; auto. + simpl; intros. destruct (andb_prop _ _ H1); clear H1. + rewrite R; auto. apply preg_of_not_X29; auto. +Local Transparent destroyed_by_op. + destruct op; simpl; auto; try discriminate. + - (* MBload *) + simpl in EQ0. rewrite Hheader in DXP. + + assert (Op.eval_addressing tge sp addr (map ms args) = Some a). + rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. + exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1. + intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A. + exploit Mem.loadv_extends; eauto. intros [v' [C D]]. destruct trap; try discriminate. + exploit transl_load_correct; eauto. + intros [rs2 [P [Q R]]]. + + eapply exec_straight_body in P. + destruct P as (l & ll & EXECB). + exists rs2, m1, l. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. } + subst. + eapply match_codestate_intro; eauto. + eapply agree_set_mreg; eauto with asmgen. + intro Hep. simpl in Hep. discriminate. + - (* MBload notrap1 *) + simpl in EQ0. unfold transl_load in EQ0. discriminate. + - (* MBload notrap2 *) + simpl in EQ0. unfold transl_load in EQ0. discriminate. + - (* MBstore *) + simpl in EQ0. rewrite Hheader in DXP. + + assert (Op.eval_addressing tge sp addr (map ms args) = Some a). + rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. + exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1. + intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A. + assert (Val.lessdef (ms src) (rs1 (preg_of src))). eapply preg_val; eauto. + exploit Mem.storev_extends; eauto. intros [m2' [C D]]. + exploit transl_store_correct; eauto. intros [rs2 [P Q]]. + + eapply exec_straight_body in P. + destruct P as (l & ll & EXECB). + exists rs2, m2', l. + eexists. eexists. split. instantiate (1 := x). eauto. + repeat (split; auto). + remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'. + assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. } + subst. + eapply match_codestate_intro; eauto. + eapply agree_undef_regs; eauto with asmgen. + intro Hep. simpl in Hep. discriminate. +Qed. + +Lemma exec_body_trans: + forall l l' rs0 m0 rs1 m1 rs2 m2, + exec_body lk tge l rs0 m0 = Next rs1 m1 -> + exec_body lk tge l' rs1 m1 = Next rs2 m2 -> + exec_body lk tge (l++l') rs0 m0 = Next rs2 m2. +Proof. + induction l. + - simpl. induction l'. intros. + + simpl in *. congruence. + + intros. inv H. auto. + - intros until m2. intros EXEB1 EXEB2. + inv EXEB1. destruct (exec_basic _) eqn:EBI; try discriminate. + simpl. rewrite EBI. eapply IHl; eauto. +Qed. + +Lemma exec_body_control: + forall b t rs1 m1 rs2 m2 rs3 m3 fn, + exec_body lk tge (body b) rs1 m1 = Next rs2 m2 -> + exec_exit tge fn (Ptrofs.repr (size b)) rs2 m2 (exit b) t rs3 m3 -> + exec_bblock lk tge fn b rs1 m1 t rs3 m3. +Proof. + intros until fn. intros EXEB EXECTL. + econstructor; eauto. +Qed. + +Inductive exec_header: codestate -> codestate -> Prop := + | exec_header_cons: forall cs1, + exec_header cs1 {| pstate := pstate cs1; pheader := nil; pbody1 := pbody1 cs1; pbody2 := pbody2 cs1; + pctl := pctl cs1; ep := (if pheader cs1 then ep cs1 else false); rem := rem cs1; + cur := cur cs1 |}. + +(* Theorem (A) in the diagram, the easiest of all *) +Theorem step_simu_header: + forall bb s fb sp c ms m rs1 m1 cs1, + pstate cs1 = (State rs1 m1) -> + match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 -> + (exists cs1', + exec_header cs1 cs1' + /\ match_codestate fb (MB.State s fb sp (mb_remove_header bb::c) ms m) cs1'). +Proof. + intros until cs1. intros Hpstate MCS. + eexists. split; eauto. + econstructor; eauto. + inv MCS. simpl in *. inv Hpstate. + econstructor; eauto. +Qed. + +(* Theorem (B) in the diagram, using step_simu_basic + induction on the Machblock body *) +Theorem step_simu_body: + forall bb s fb sp c ms m rs1 m1 ms' cs1 m', + MB.header bb = nil -> + body_step ge s fb sp (MB.body bb) ms m ms' m' -> + pstate cs1 = (State rs1 m1) -> + match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 -> + (exists rs2 m2 cs2 ep, + cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := nil; pbody2 := pbody2 cs1; + pctl := pctl cs1; ep := ep; rem := rem cs1; cur := cur cs1 |} + /\ exec_body lk tge (pbody1 cs1) rs1 m1 = Next rs2 m2 + /\ match_codestate fb (MB.State s fb sp ({| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |}::c) ms' m') cs2). +Proof. + intros bb. destruct bb as [hd bdy ex]; simpl; auto. induction bdy as [|bi bdy]. + - intros until m'. intros Hheader BSTEP Hpstate MCS. + inv BSTEP. + exists rs1, m1, cs1, (ep cs1). + inv MCS. inv Hpstate. simpl in *. monadInv TBC. repeat (split; simpl; auto). + econstructor; eauto. + - intros until m'. intros Hheader BSTEP Hpstate MCS. inv BSTEP. + rename ms' into ms''. rename m' into m''. rename rs' into ms'. rename m'0 into m'. + exploit (step_simu_basic); eauto. simpl. eauto. simpl; auto. simpl; auto. + intros (rs2 & m2 & l & cs2 & tbdy' & Hcs2 & Happ & EXEB & MCS'). + simpl in *. + exploit IHbdy. auto. eapply H6. 2: eapply MCS'. all: eauto. subst; eauto. simpl; auto. + intros (rs3 & m3 & cs3 & ep & Hcs3 & EXEB' & MCS''). + exists rs3, m3, cs3, ep. + repeat (split; simpl; auto). subst. simpl in *. auto. + rewrite Happ. eapply exec_body_trans; eauto. rewrite Hcs2 in EXEB'; simpl in EXEB'. auto. +Qed. + +(* Bringing theorems (A), (B) and (C) together, for the case of the absence of builtin instruction *) +(* This more general form is easier to prove, but the actual theorem is step_simulation_bblock further below *) +Lemma step_simulation_bblock': + forall t sf f sp bb bb' bb'' rs m rs' m' s'' c S1, + bb' = mb_remove_header bb -> + body_step ge sf f sp (Machblock.body bb') rs m rs' m' -> + bb'' = mb_remove_body bb' -> + exit_step return_address_offset ge (Machblock.exit bb'') (Machblock.State sf f sp (bb'' :: c) rs' m') t s'' -> + match_states (Machblock.State sf f sp (bb :: c) rs m) S1 -> + exists S2 : state, plus (step lk) tge S1 t S2 /\ match_states s'' S2. +Proof. + intros until S1. intros Hbb' BSTEP Hbb'' ESTEP MS. + destruct (mbsize bb) eqn:SIZE. + - apply mbsize_eqz in SIZE. destruct SIZE as (Hbody & Hexit). + destruct bb as [hd bdy ex]; simpl in *; subst. + inv MS. inv AT. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. rename tc' into tc. + monadInv H2. simpl in *. inv ESTEP. inv BSTEP. + eexists. split. + + eapply plus_one. + exploit functions_translated; eauto. intros (tf0 & FIND' & TRANSF'). monadInv TRANSF'. + assert (x = tf) by congruence. subst x. + eapply exec_step_internal; eauto. eapply find_bblock_tail; eauto. + unfold exec_bblock. simpl. + eexists; eexists; split; eauto. + econstructor. + + econstructor. + 1,2,3: eauto. + * + unfold incrPC. rewrite Pregmap.gss. + unfold Val.offset_ptr. rewrite <- H. + assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned). + { eapply transf_function_no_overflow; eauto. } + econstructor; eauto. + generalize (code_tail_next_int _ _ _ _ NOOV H3). intro CT1. eauto. + * + eapply agree_exten; eauto. intros. unfold incrPC. rewrite Pregmap.gso; auto. + unfold data_preg in H2. destruct r; try congruence. + * + intros. discriminate. + - subst. exploit mbsize_neqz. { instantiate (1 := bb). rewrite SIZE. discriminate. } + intros Hnotempty. + + (* initial setting *) + exploit match_state_codestate. + eapply Hnotempty. + all: eauto. + intros (cs1 & fb & f0 & tbb & tc & ep & MCS & MAS & FIND & TLBS & Hbody & Hexit & Hcur & Hrem & Hpstate). + + (* step_simu_header part *) + assert (exists rs1 m1, pstate cs1 = State rs1 m1). { inv MAS. simpl. eauto. } + destruct H as (rs1 & m1 & Hpstate2). subst. + assert (f = fb). { inv MCS. auto. } subst fb. + exploit step_simu_header. + 2: eapply MCS. + all: eauto. + intros (cs1' & EXEH & MCS2). + + (* step_simu_body part *) + assert (Hpstate': pstate cs1' = pstate cs1). { inv EXEH; auto. } + exploit step_simu_body. + 2: eapply BSTEP. + 3: eapply MCS2. + all: eauto. rewrite Hpstate'. eauto. + intros (rs2 & m2 & cs2 & ep' & Hcs2 & EXEB & MCS'). + + (* step_simu_control part *) + assert (exists tf, Genv.find_funct_ptr tge f = Some (Internal tf)). + { exploit functions_translated; eauto. intros (tf & FIND' & TRANSF'). monadInv TRANSF'. eauto. } + destruct H as (tf & FIND'). + inv EXEH. simpl in *. + subst. exploit step_simu_control. + 8: eapply MCS'. all: simpl. + 9: eapply ESTEP. + all: simpl; eauto. + { inv MAS; simpl in *. inv Hpstate2. eapply match_asmstate_some; eauto. + erewrite exec_body_pc; eauto. } + intros (rs3 & m3 & rs4 & m4 & EXEB' & EXECTL' & MS'). + + (* bringing the pieces together *) + exploit exec_body_trans. + eapply EXEB. + eauto. + intros EXEB2. + exploit exec_body_control; eauto. + rewrite <- Hbody in EXEB2. eauto. + rewrite Hexit. eauto. + intros EXECB. (* inv EXECB. *) + exists (State rs4 m4). + split; auto. eapply plus_one. rewrite Hpstate2. + assert (exists ofs, rs1 PC = Vptr f ofs). + { rewrite Hpstate2 in MAS. inv MAS. simpl in *. eauto. } + destruct H as (ofs & Hrs1pc). + eapply exec_step_internal; eauto. + + (* proving the initial find_bblock *) + rewrite Hpstate2 in MAS. inv MAS. simpl in *. + assert (f1 = f0) by congruence. subst f0. + rewrite PCeq in Hrs1pc. inv Hrs1pc. + exploit functions_translated; eauto. intros (tf1 & FIND'' & TRANS''). rewrite FIND' in FIND''. + inv FIND''. monadInv TRANS''. rewrite TRANSF0 in EQ. inv EQ. + eapply find_bblock_tail; eauto. +Qed. + +Theorem step_simulation_bblock: + forall t sf f sp bb ms m ms' m' S2 c, + body_step ge sf f sp (Machblock.body bb) ms m ms' m' -> + exit_step return_address_offset ge (Machblock.exit bb) (Machblock.State sf f sp (bb :: c) ms' m') t S2 -> + forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' -> + exists S2' : state, plus (step lk) tge S1' t S2' /\ match_states S2 S2'. +Proof. + intros until c. intros BSTEP ESTEP S1' MS. + eapply step_simulation_bblock'; eauto. + all: destruct bb as [hd bdy ex]; simpl in *; eauto. + inv ESTEP. + - econstructor. inv H; try (econstructor; eauto; fail). + - econstructor. +Qed. + +(* Measure to prove finite stuttering, see the other backends *) +Definition measure (s: MB.state) : nat := + match s with + | MB.State _ _ _ _ _ _ => 0%nat + | MB.Callstate _ _ _ _ => 0%nat + | MB.Returnstate _ _ _ => 1%nat + end. + +Lemma next_sep: + forall rs m rs' m', rs = rs' -> m = m' -> Next rs m = Next rs' m'. +Proof. + congruence. +Qed. + +(* The actual MB.step/AB.step simulation, using the above theorems, plus extra proofs + for the internal and external function cases *) +Theorem step_simulation: + forall S1 t S2, MB.step return_address_offset ge S1 t S2 -> + forall S1' (MS: match_states S1 S1'), + (exists S2', plus (step lk) tge S1' t S2' /\ match_states S2 S2') + \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat. +Proof. + induction 1; intros. + +- (* bblock *) + left. destruct (Machblock.exit bb) eqn:MBE; try destruct c0. + all: try(inversion H0; subst; inv H2; eapply step_simulation_bblock; + try (rewrite MBE; try discriminate); eauto). + + inversion H0. subst. eapply step_simulation_bblock; try (rewrite MBE; try discriminate); eauto. +- (* internal function *) + inv MS. + exploit functions_translated; eauto. intros [tf [A B]]. monadInv B. + generalize EQ; intros EQ'. monadInv EQ'. + destruct (zlt Ptrofs.max_unsigned (size_blocks x0.(fn_blocks))); inversion EQ1. clear EQ1. subst x0. + unfold Mach.store_stack in *. + exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. + intros [m1' [C D]]. + exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto. + intros [m2' [F G]]. + simpl chunk_of_type in F. + exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto. + intros [m3' [P Q]]. + (* Execution of function prologue *) + monadInv EQ0. + set (tfbody := make_prologue f x0) in *. + set (tf := {| fn_sig := MB.fn_sig f; fn_blocks := tfbody |}) in *. + set (rs2 := rs0#X29 <- (parent_sp s) #SP <- sp #X16 <- Vundef). + exploit (storeptr_correct lk tge XSP (fn_retaddr_ofs f) RA nil m2' m3' rs2). + { rewrite chunk_of_Tptr in P. + assert (rs0 X30 = rs2 RA) by auto. + rewrite <- H3. + rewrite ATLR. + change (rs2 XSP) with sp. eexact P. } + 1-2: discriminate. + intros (rs3 & U & V). + assert (EXEC_PROLOGUE: exists rs3', + exec_straight_blocks tge lk tf + tf.(fn_blocks) rs0 m' + x0 rs3' m3' + /\ forall r, r <> PC -> r <> X16 -> rs3' r = rs3 r). + { eexists. split. + - change (fn_blocks tf) with tfbody; unfold tfbody. + econstructor; eauto. + assert (Archi.ptr64 = true) as SF; auto. + + unfold exec_bblock. simpl exec_body. + rewrite C. fold sp. rewrite <- (sp_val _ _ _ AG). rewrite chunk_of_Tptr in F. + assert (Mptr = Mint64) by auto. rewrite H3 in F. simpl in F. rewrite F. simpl. + unfold exec_store_rs_a. repeat Simpl; try discriminate. + exists rs2. exists m3'. split. + * unfold eval_addressing. Simpl; try discriminate. rewrite Pregmap.gss. + rewrite chunk_of_Tptr in P. rewrite H3 in P. + unfold Val.addl. unfold Val.offset_ptr in P. + destruct sp; simpl; try discriminate. rewrite SF; simpl. + rewrite Ptrofs.of_int64_to_int64. unfold Mem.storev in P. rewrite ATLR. + rewrite P. simpl. apply next_sep; eauto. apply SF. + * econstructor. + + eauto. + - intros. unfold incrPC. + rewrite Pregmap.gso; auto. rewrite V; auto. + } destruct EXEC_PROLOGUE as (rs3' & EXEC_PROLOGUE & Heqrs3'). + exploit exec_straight_steps_2; eauto using functions_transl. + simpl fn_blocks. simpl fn_blocks in g. lia. constructor. + intros (ofs' & X & Y). + left; exists (State rs3' m3'); split. + eapply exec_straight_steps_1; eauto. + simpl fn_blocks. simpl fn_blocks in g. lia. + constructor. + econstructor; eauto. + rewrite X; econstructor; eauto. + apply agree_exten with rs2; eauto with asmgen. + unfold rs2. + apply agree_set_other; auto with asmgen. + apply agree_change_sp with (parent_sp s). + apply agree_undef_regs with rs0. auto. +Local Transparent destroyed_at_function_entry. + simpl; intros; Simpl. auto. + assert (r' <> X29). { contradict H3; rewrite H3; unfold data_preg; auto. } auto. + unfold sp; congruence. + + intros. + + rewrite Heqrs3'. rewrite V. 2-5: try apply X16_not_data_preg; try apply PC_not_data_preg; auto. + auto. + intros. rewrite Heqrs3'; try discriminate. rewrite V by auto with asmgen. reflexivity. +- (* external function *) + inv MS. + exploit functions_translated; eauto. + intros [tf [A B]]. simpl in B. inv B. + exploit extcall_arguments_match; eauto. + intros [args' [C D]]. + exploit external_call_mem_extends; eauto. + intros [res' [m2' [P [Q [R S]]]]]. + left; econstructor; split. + apply plus_one. eapply exec_step_external; eauto. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. + econstructor; eauto. + unfold loc_external_result. + apply agree_set_other; auto. + apply agree_set_pair; auto. + apply agree_undef_caller_save_regs; auto. + +- (* return *) + inv MS. + inv STACKS. simpl in *. + right. split. lia. split. auto. + rewrite <- ATPC in H5. + econstructor; eauto. congruence. +Qed. + +Lemma transf_initial_states: + forall st1, MB.initial_state prog st1 -> + exists st2, AB.initial_state tprog st2 /\ match_states st1 st2. +Proof. + intros. inversion H. unfold ge0 in *. + econstructor; split. + econstructor. + eapply (Genv.init_mem_transf_partial TRANSF); eauto. + replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero) + with (Vptr fb Ptrofs.zero). + econstructor; eauto. + constructor. + apply Mem.extends_refl. + split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence. + intros. rewrite Mach.Regmap.gi. auto. + unfold Genv.symbol_address. + rewrite (match_program_main TRANSF). + rewrite symbols_preserved. + unfold ge; rewrite H1. auto. +Qed. + +Lemma transf_final_states: + forall st1 st2 r, + match_states st1 st2 -> MB.final_state st1 r -> AB.final_state st2 r. +Proof. + intros. inv H0. inv H. constructor. assumption. + compute in H1. inv H1. + generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto. +Qed. + +Definition return_address_offset : Machblock.function -> Machblock.code -> ptrofs -> Prop := + Asmblockgenproof0.return_address_offset. + +Lemma transf_program_correct: + forward_simulation (MB.semantics return_address_offset prog) (AB.semantics lk tprog). +Proof. + eapply forward_simulation_star with (measure := measure). + - apply senv_preserved. + - eexact transf_initial_states. + - eexact transf_final_states. + - exact step_simulation. +Qed. + +End PRESERVATION. diff --git a/aarch64/Asmblockgenproof0.v b/aarch64/Asmblockgenproof0.v new file mode 100644 index 00000000..004cfd5c --- /dev/null +++ b/aarch64/Asmblockgenproof0.v @@ -0,0 +1,885 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** * "block" version of Asmgenproof0 + + This module is largely adapted from Asmgenproof0.v of the other backends + It needs to stand apart because of the block structure, and the distinction control/basic that there isn't in the other backends + It has similar definitions than Asmgenproof0, but adapted to this new structure *) + +Require Import Coqlib. +Require Intv. +Require Import AST. +Require Import Errors. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Memory. +Require Import Globalenvs. +Require Import Events. +Require Import Smallstep. +Require Import Locations. +Require Import Machblock. +Require Import Asmblock. +Require Import Asmblockgen. +Require Import Conventions1. +Require Import Axioms. +Require Import Asmblockprops. +Require Import Lia. + +Module MB:=Machblock. +Module AB:=Asmblock. + +(** * Agreement between Mach registers and processor registers *) + +Hint Extern 2 (_ <> _) => congruence: asmgen. + +Lemma ireg_of_eq: + forall r r', ireg_of r = OK r' -> preg_of r = IR r'. +Proof. + unfold ireg_of; intros. destruct (preg_of r) as [[[rr1|]|]|xsp|]; inv H; auto. +Qed. + +Lemma freg_of_eq: + forall r r', freg_of r = OK r' -> preg_of r = FR r'. +Proof. + unfold freg_of; intros. destruct (preg_of r) as [[fr|]|xsp|]; inv H; auto. +Qed. + +Lemma ireg_of_eq': + forall r r', ireg_of r = OK r' -> dreg_of r = IR r'. +Proof. + unfold ireg_of; intros. destruct r; simpl in *; inv H; auto. +Qed. + +Lemma freg_of_eq': + forall r r', freg_of r = OK r' -> dreg_of r = FR r'. +Proof. + unfold freg_of; intros. destruct r; simpl in *; inv H; auto. +Qed. + +Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop := + match rl with + | nil => True + | r1 :: nil => r <> preg_of r1 + | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl + end. + +Remark preg_notin_charact: + forall r rl, + preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr). +Proof. + induction rl; simpl; intros. + tauto. + destruct rl. + simpl. split. intros. intuition congruence. auto. + rewrite IHrl. split. + intros [A B]. intros. destruct H. congruence. auto. + auto. +Qed. + +Record agree (ms: Mach.regset) (sp: val) (rs: AB.regset) : Prop := mkagree { + agree_sp: rs#SP = sp; + agree_sp_def: sp <> Vundef; + agree_mregs: forall r: mreg, Val.lessdef (ms r) (rs#(preg_of r)) +}. + +Lemma agree_exten: + forall ms sp rs rs', + agree ms sp rs -> + (forall r, data_preg r = true -> rs'#r = rs#r) -> + agree ms sp rs'. +Proof. + intros. destruct H. split; auto. + rewrite H0; auto. auto. + intros. rewrite H0; auto. apply preg_of_data. +Qed. + +Lemma preg_val: + forall ms sp rs r, agree ms sp rs -> Val.lessdef (ms r) rs#(preg_of r). +Proof. + intros. destruct H. auto. +Qed. + +Lemma preg_vals: + forall ms sp rs, agree ms sp rs -> + forall l, Val.lessdef_list (map ms l) (map rs (map preg_of l)). +Proof. + induction l; simpl. constructor. constructor. eapply preg_val; eauto. auto. +Qed. + +Lemma preg_of_injective: + forall r1 r2, preg_of r1 = preg_of r2 -> r1 = r2. +Proof. + destruct r1; destruct r2; simpl; intros; reflexivity || discriminate. +Qed. + +Lemma sp_val: + forall ms sp rs, agree ms sp rs -> sp = rs#SP. +Proof. + intros. destruct H; auto. +Qed. + +Lemma ireg_val: + forall ms sp rs r r', + agree ms sp rs -> + ireg_of r = OK r' -> + Val.lessdef (ms r) rs#r'. +Proof. + intros. rewrite <- (ireg_of_eq _ _ H0). eapply preg_val; eauto. +Qed. + +Lemma preg_of_not_X29: forall dst, + negb (mreg_eq dst R29) = true -> + DR (IR X29) <> preg_of dst. +Proof. + intros. destruct dst; try discriminate. +Qed. + +Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen. + +(** Preservation of register agreement under various assignments. *) + +Lemma agree_set_mreg: + forall ms sp rs r v rs', + agree ms sp rs -> + Val.lessdef v (rs'#(preg_of r)) -> + (forall r', data_preg r' = true -> r' <> preg_of r -> rs'#r' = rs#r') -> + agree (Mach.Regmap.set r v ms) sp rs'. +Proof. + intros. destruct H. split; auto. + rewrite H1; auto. apply not_eq_sym. apply preg_of_not_SP. + intros. unfold Mach.Regmap.set. destruct (Mach.RegEq.eq r0 r). congruence. + rewrite H1. auto. apply preg_of_data. + red; intros; elim n. eapply preg_of_injective; eauto. +Qed. + +Corollary agree_set_mreg_parallel: + forall ms sp rs r v v', + agree ms sp rs -> + Val.lessdef v v' -> + agree (Mach.Regmap.set r v ms) sp (Pregmap.set (preg_of r) v' rs). +Proof. + intros. eapply agree_set_mreg; eauto. rewrite Pregmap.gss; auto. intros; apply Pregmap.gso; auto. +Qed. + +Lemma agree_set_other: + forall ms sp rs r v, + agree ms sp rs -> + data_preg r = false -> + agree ms sp (rs#r <- v). +Proof. + intros. apply agree_exten with rs. auto. + intros. apply Pregmap.gso. congruence. +Qed. + +Lemma agree_nextblock: + forall ms sp rs b, + agree ms sp rs -> agree ms sp (incrPC (Ptrofs.repr (size b)) rs). +Proof. + intros. unfold incrPC. apply agree_set_other. auto. auto. +Qed. + +Lemma agree_set_pair: + forall sp p v v' ms rs, + agree ms sp rs -> + Val.lessdef v v' -> + agree (Mach.set_pair p v ms) sp (set_pair (map_rpair preg_of p) v' rs). +Proof. + intros. destruct p; simpl. + - apply agree_set_mreg_parallel; auto. + - apply agree_set_mreg_parallel. apply agree_set_mreg_parallel; auto. + apply Val.hiword_lessdef; auto. apply Val.loword_lessdef; auto. +Qed. + +Lemma agree_set_res: + forall res ms sp rs v v', + agree ms sp rs -> + Val.lessdef v v' -> + agree (Mach.set_res res v ms) sp (set_res (map_builtin_res DR (map_builtin_res dreg_of res)) v' rs). +Proof. + induction res; simpl; intros. + - eapply agree_set_mreg; eauto. rewrite Pregmap.gss. auto. + intros. apply Pregmap.gso; auto. + - auto. + - apply IHres2. apply IHres1. auto. + apply Val.hiword_lessdef; auto. + apply Val.loword_lessdef; auto. +Qed. + +Lemma agree_undef_regs: + forall ms sp rl rs rs', + agree ms sp rs -> + (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') -> + agree (Mach.undef_regs rl ms) sp rs'. +Proof. + intros. destruct H. split; auto. + rewrite <- agree_sp0. apply H0; auto. + rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP. + intros. destruct (In_dec mreg_eq r rl). + rewrite Mach.undef_regs_same; auto. + rewrite Mach.undef_regs_other; auto. rewrite H0; auto. + apply preg_of_data. + rewrite preg_notin_charact. intros; red; intros. elim n. + exploit preg_of_injective; eauto. congruence. +Qed. + +Lemma agree_set_undef_mreg: + forall ms sp rs r v rl rs', + agree ms sp rs -> + Val.lessdef v (rs'#(preg_of r)) -> + (forall r', data_preg r' = true -> r' <> preg_of r -> preg_notin r' rl -> rs'#r' = rs#r') -> + agree (Mach.Regmap.set r v (Mach.undef_regs rl ms)) sp rs'. +Proof. + intros. apply agree_set_mreg with (rs'#(preg_of r) <- (rs#(preg_of r))); auto. + apply agree_undef_regs with rs; auto. + intros. unfold Pregmap.set. destruct (PregEq.eq r' (preg_of r)). + congruence. auto. + intros. rewrite Pregmap.gso; auto. +Qed. + +Lemma agree_undef_caller_save_regs: + forall ms sp rs, + agree ms sp rs -> + agree (Mach.undef_caller_save_regs ms) sp (undef_caller_save_regs rs). +Proof. + intros. destruct H. unfold Mach.undef_caller_save_regs, undef_caller_save_regs; split. + - unfold proj_sumbool; rewrite dec_eq_true. auto. + - auto. + - intros. unfold proj_sumbool. rewrite dec_eq_false by (apply preg_of_not_SP). + destruct (List.in_dec preg_eq (preg_of r) (List.map preg_of (List.filter is_callee_save all_mregs))); simpl. + + apply list_in_map_inv in i. destruct i as (mr & A & B). + assert (r = mr) by (apply preg_of_injective; auto). subst mr; clear A. + apply List.filter_In in B. destruct B as [C D]. rewrite D. auto. + + destruct (is_callee_save r) eqn:CS; auto. + elim n. apply List.in_map. apply List.filter_In. auto using all_mregs_complete. +Qed. + +Lemma agree_change_sp: + forall ms sp rs sp', + agree ms sp rs -> sp' <> Vundef -> + agree ms sp' (rs#SP <- sp'). +Proof. + intros. inv H. split; auto. + intros. rewrite Pregmap.gso; auto with asmgen. +Qed. + +Remark builtin_arg_match: + forall ge (rs: regset) sp m a v, + eval_builtin_arg ge (fun r => rs (dreg_of r)) sp m a v -> + eval_builtin_arg ge (fun r => rs (DR r)) sp m (map_builtin_arg dreg_of a) v. +Proof. + induction 1; simpl; eauto with barg. econstructor. +Qed. + +Lemma builtin_args_match: + forall ge ms sp rs m m', agree ms sp rs -> Mem.extends m m' -> + forall al vl, eval_builtin_args ge ms sp m al vl -> + exists vl', eval_builtin_args ge (fun r => rs (DR r)) sp m' (map (map_builtin_arg dreg_of) al) vl' + /\ Val.lessdef_list vl vl'. +Proof. + induction 3; intros; simpl. + exists (@nil val); split; constructor. + exploit (@eval_builtin_arg_lessdef _ ge ms (fun r => rs (preg_of r))); eauto. + intros; eapply preg_val; eauto. + intros (v1' & A & B). + destruct IHlist_forall2 as [vl' [C D]]. + exists (v1' :: vl'); split; constructor; auto. apply builtin_arg_match; auto. +Qed. + +(** Connection between Mach and Asm calling conventions for external + functions. *) + +Lemma extcall_arg_match: + forall ms sp rs m m' l v, + agree ms sp rs -> + Mem.extends m m' -> + Mach.extcall_arg ms m sp l v -> + exists v', extcall_arg rs m' l v' /\ Val.lessdef v v'. +Proof. + intros. inv H1. + exists (rs#(preg_of r)); split. constructor. eapply preg_val; eauto. + unfold Mach.load_stack in H2. + exploit Mem.loadv_extends; eauto. intros [v' [A B]]. + rewrite (sp_val _ _ _ H) in A. + exists v'; split; auto. + econstructor. eauto. assumption. +Qed. + +Lemma extcall_arg_pair_match: + forall ms sp rs m m' p v, + agree ms sp rs -> + Mem.extends m m' -> + Mach.extcall_arg_pair ms m sp p v -> + exists v', extcall_arg_pair rs m' p v' /\ Val.lessdef v v'. +Proof. + intros. inv H1. + - exploit extcall_arg_match; eauto. intros (v' & A & B). exists v'; split; auto. constructor; auto. + - exploit extcall_arg_match. eauto. eauto. eexact H2. intros (v1 & A1 & B1). + exploit extcall_arg_match. eauto. eauto. eexact H3. intros (v2 & A2 & B2). + exists (Val.longofwords v1 v2); split. constructor; auto. apply Val.longofwords_lessdef; auto. +Qed. + +Lemma extcall_args_match: + forall ms sp rs m m', agree ms sp rs -> Mem.extends m m' -> + forall ll vl, + list_forall2 (Mach.extcall_arg_pair ms m sp) ll vl -> + exists vl', list_forall2 (extcall_arg_pair rs m') ll vl' /\ Val.lessdef_list vl vl'. +Proof. + induction 3; intros. + exists (@nil val); split. constructor. constructor. + exploit extcall_arg_pair_match; eauto. intros [v1' [A B]]. + destruct IHlist_forall2 as [vl' [C D]]. + exists (v1' :: vl'); split; constructor; auto. +Qed. + +Lemma extcall_arguments_match: + forall ms m m' sp rs sg args, + agree ms sp rs -> Mem.extends m m' -> + Mach.extcall_arguments ms m sp sg args -> + exists args', extcall_arguments rs m' sg args' /\ Val.lessdef_list args args'. +Proof. + unfold Mach.extcall_arguments, extcall_arguments; intros. + eapply extcall_args_match; eauto. +Qed. + +Lemma set_res_other: + forall r res v rs, + data_preg r = false -> + set_res (map_builtin_res DR (map_builtin_res dreg_of res)) v rs r = rs r. +Proof. + induction res; simpl; intros. + - apply Pregmap.gso. red; intros; subst r. rewrite dreg_of_data in H; discriminate. + - auto. + - rewrite IHres2, IHres1; auto. +Qed. + +Lemma undef_regs_other: + forall r rl rs, + (forall r', In r' rl -> r <> r') -> + undef_regs rl rs r = rs r. +Proof. + induction rl; simpl; intros. auto. + rewrite IHrl by auto. rewrite Pregmap.gso; auto. +Qed. + +Lemma undef_regs_other_2: + forall r rl rs, + preg_notin r rl -> + undef_regs (map preg_of rl) rs r = rs r. +Proof. + intros. apply undef_regs_other. intros. + exploit list_in_map_inv; eauto. intros [mr [A B]]. subst. + rewrite preg_notin_charact in H. auto. +Qed. + +Inductive code_tail: Z -> bblocks -> bblocks -> Prop := + | code_tail_0: forall c, + code_tail 0 c c + | code_tail_S: forall pos bi c1 c2, + code_tail pos c1 c2 -> + code_tail (pos + (size bi)) (bi :: c1) c2. + +Lemma code_tail_pos: + forall pos c1 c2, code_tail pos c1 c2 -> pos >= 0. +Proof. + induction 1. lia. generalize (size_positive bi); intros; lia. +Qed. + +Lemma find_bblock_tail: + forall c1 bi c2 pos, + code_tail pos c1 (bi :: c2) -> + find_bblock pos c1 = Some bi. +Proof. + induction c1; simpl; intros. + inversion H. + destruct (zlt pos 0). generalize (code_tail_pos _ _ _ H); intro; lia. + destruct (zeq pos 0). subst pos. + inv H. auto. generalize (size_positive a) (code_tail_pos _ _ _ H4). intro; lia. + inv H. congruence. replace (pos0 + size a - size a) with pos0 by lia. + eauto. +Qed. + +Local Hint Resolve code_tail_0 code_tail_S: core. + +Lemma code_tail_next: + forall fn ofs c0, + code_tail ofs fn c0 -> + forall bi c1, c0 = bi :: c1 -> code_tail (ofs + (size bi)) fn c1. +Proof. + induction 1; intros. + - subst; eauto. + - replace (pos + size bi + size bi0) with ((pos + size bi0) + size bi); eauto. + lia. +Qed. + +Lemma size_blocks_pos c: 0 <= size_blocks c. +Proof. + induction c as [| a l ]; simpl; try lia. + generalize (size_positive a); lia. +Qed. + +Remark code_tail_positive: + forall fn ofs c, + code_tail ofs fn c -> 0 <= ofs. +Proof. + induction 1; intros; simpl. + - lia. + - generalize (size_positive bi). lia. +Qed. + +Remark code_tail_size: + forall fn ofs c, + code_tail ofs fn c -> size_blocks fn = ofs + size_blocks c. +Proof. + induction 1; intros; simpl; try lia. +Qed. + +Remark code_tail_bounds fn ofs c: + code_tail ofs fn c -> 0 <= ofs <= size_blocks fn. +Proof. + intro H; + exploit code_tail_size; eauto. + generalize (code_tail_positive _ _ _ H), (size_blocks_pos c). + lia. +Qed. + +Local Hint Resolve code_tail_next: core. + +Lemma code_tail_next_int: + forall fn ofs bi c, + size_blocks fn <= Ptrofs.max_unsigned -> + code_tail (Ptrofs.unsigned ofs) fn (bi :: c) -> + code_tail (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bi)))) fn c. +Proof. + intros. + exploit code_tail_size; eauto. + simpl; generalize (code_tail_positive _ _ _ H0), (size_positive bi), (size_blocks_pos c). + intros. + rewrite Ptrofs.add_unsigned, Ptrofs.unsigned_repr. + - rewrite Ptrofs.unsigned_repr; eauto. + lia. + - rewrite Ptrofs.unsigned_repr; lia. +Qed. + +(** The [find_label] function returns the code tail starting at the + given label. A connection with [code_tail] is then established. *) + +Fixpoint find_label (lbl: label) (c: bblocks) {struct c} : option bblocks := + match c with + | nil => None + | bb1 :: bbl => if is_label lbl bb1 then Some c else find_label lbl bbl + end. + +(* inspired from Mach *) + +Lemma find_label_tail: + forall lbl c c', MB.find_label lbl c = Some c' -> is_tail c' c. +Proof. + induction c; simpl; intros. discriminate. + destruct (MB.is_label lbl a). inv H. auto with coqlib. eauto with coqlib. +Qed. + +Lemma label_pos_code_tail: + forall lbl c pos c', + find_label lbl c = Some c' -> + exists pos', + label_pos lbl pos c = Some pos' + /\ code_tail (pos' - pos) c c' + /\ pos <= pos' <= pos + size_blocks c. +Proof. + induction c. + simpl; intros. discriminate. + simpl; intros until c'. + case (is_label lbl a). + - intros. inv H. exists pos. split; auto. split. + replace (pos - pos) with 0 by lia. constructor. constructor; try lia. + generalize (size_blocks_pos c). generalize (size_positive a). lia. + - intros. generalize (IHc (pos+size a) c' H). intros [pos' [A [B C]]]. + exists pos'. split. auto. split. + replace (pos' - pos) with ((pos' - (pos + (size a))) + (size a)) by lia. + constructor. auto. generalize (size_positive a). lia. +Qed. + +(** Predictor for return addresses in generated Asm code. + + The [return_address_offset] predicate defined here is used in the + semantics for Mach to determine the return addresses that are + stored in activation records. *) + +(** Consider a Mach function [f] and a sequence [c] of Mach instructions + representing the Mach code that remains to be executed after a + function call returns. The predicate [return_address_offset f c ofs] + holds if [ofs] is the integer offset of the PPC instruction + following the call in the Asm code obtained by translating the + code of [f]. Graphically: +<< + Mach function f |--------- Mcall ---------| + Mach code c | |--------| + | \ \ + | \ \ + | \ \ + Asm code | |--------| + Asm function |------------- Pcall ---------| + + <-------- ofs -------> +>> +*) + +Definition return_address_offset (f: MB.function) (c: MB.code) (ofs: ptrofs) : Prop := + forall tf tc, + transf_function f = OK tf -> + transl_blocks f c false = OK tc -> + code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc. + +Lemma transl_blocks_tail: + forall f c1 c2, is_tail c1 c2 -> + forall tc2 ep2, transl_blocks f c2 ep2 = OK tc2 -> + exists tc1, exists ep1, transl_blocks f c1 ep1 = OK tc1 /\ is_tail tc1 tc2. +Proof. + induction 1; simpl; intros. + exists tc2; exists ep2; split; auto with coqlib. + monadInv H0. exploit IHis_tail; eauto. intros (tc1 & ep1 & A & B). + exists tc1; exists ep1; split. auto. + eapply is_tail_trans with x0; eauto with coqlib. +Qed. + +Lemma is_tail_code_tail: + forall c1 c2, is_tail c1 c2 -> exists ofs, code_tail ofs c2 c1. +Proof. + induction 1; eauto. + destruct IHis_tail; eauto. +Qed. + +Section RETADDR_EXISTS. + +Hypothesis transf_function_inv: + forall f tf, transf_function f = OK tf -> + exists tc ep, transl_blocks f (Machblock.fn_code f) ep = OK tc /\ is_tail tc (fn_blocks tf). + +Hypothesis transf_function_len: + forall f tf, transf_function f = OK tf -> size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned. + + +Lemma return_address_exists: + forall b f c, is_tail (b :: c) f.(MB.fn_code) -> + exists ra, return_address_offset f c ra. +Proof. + intros. destruct (transf_function f) as [tf|] eqn:TF. + + exploit transf_function_inv; eauto. intros (tc1 & ep1 & TR1 & TL1). + exploit transl_blocks_tail; eauto. intros (tc2 & ep2 & TR2 & TL2). + monadInv TR2. + assert (TL3: is_tail x0 (fn_blocks tf)). + { apply is_tail_trans with tc1; auto. + apply is_tail_trans with (x++x0); auto. eapply is_tail_app. + } + exploit is_tail_code_tail. eexact TL3. intros [ofs CT]. + exists (Ptrofs.repr ofs). red; intros. + rewrite Ptrofs.unsigned_repr. congruence. + exploit code_tail_bounds; eauto. + intros; apply transf_function_len in TF. lia. + + exists Ptrofs.zero; red; intros. congruence. +Qed. + +End RETADDR_EXISTS. + +(** [transl_code_at_pc pc fb f c ep tf tc] holds if the code pointer [pc] points + within the Asmblock code generated by translating Machblock function [f], + and [tc] is the tail of the generated code at the position corresponding + to the code pointer [pc]. *) + +Inductive transl_code_at_pc (ge: MB.genv): + val -> block -> MB.function -> MB.code -> bool -> AB.function -> AB.bblocks -> Prop := + transl_code_at_pc_intro: + forall b ofs f c ep tf tc, + Genv.find_funct_ptr ge b = Some(Internal f) -> + transf_function f = Errors.OK tf -> + transl_blocks f c ep = OK tc -> + code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc -> + transl_code_at_pc ge (Vptr b ofs) b f c ep tf tc. + +Remark code_tail_no_bigger: + forall pos c1 c2, code_tail pos c1 c2 -> (length c2 <= length c1)%nat. +Proof. + induction 1; simpl; lia. +Qed. + +Remark code_tail_unique: + forall fn c pos pos', + code_tail pos fn c -> code_tail pos' fn c -> pos = pos'. +Proof. + induction fn; intros until pos'; intros ITA CT; inv ITA; inv CT; auto. + generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia. + generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia. + f_equal. eauto. +Qed. + +Lemma return_address_offset_correct: + forall ge b ofs fb f c tf tc ofs', + transl_code_at_pc ge (Vptr b ofs) fb f c false tf tc -> + return_address_offset f c ofs' -> + ofs' = ofs. +Proof. + intros. inv H. red in H0. + exploit code_tail_unique. eexact H12. eapply H0; eauto. intro. + rewrite <- (Ptrofs.repr_unsigned ofs). + rewrite <- (Ptrofs.repr_unsigned ofs'). + congruence. +Qed. + +Section STRAIGHTLINE. + +Variable ge: genv. +Variable lk: aarch64_linker. +Variable fn: function. + +(** Straight-line code is composed of processor instructions that execute + in sequence (no branches, no function calls and returns). + The following inductive predicate relates the machine states + before and after executing a straight-line sequence of instructions. + Instructions are taken from the first list instead of being fetched + from memory. *) + +Inductive exec_straight: list basic -> regset -> mem -> + list basic -> regset -> mem -> Prop := + | exec_straight_one: + forall i1 c rs1 m1 rs2 m2, + exec_basic lk ge i1 rs1 m1 = Next rs2 m2 -> + exec_straight (i1 :: c) rs1 m1 c rs2 m2 + | exec_straight_step: + forall i c rs1 m1 rs2 m2 c' rs3 m3, + exec_basic lk ge i rs1 m1 = Next rs2 m2 -> + exec_straight c rs2 m2 c' rs3 m3 -> + exec_straight (i :: c) rs1 m1 c' rs3 m3. + +Lemma exec_straight_trans: + forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3, + exec_straight c1 rs1 m1 c2 rs2 m2 -> + exec_straight c2 rs2 m2 c3 rs3 m3 -> + exec_straight c1 rs1 m1 c3 rs3 m3. +Proof. + induction 1; intros. + apply exec_straight_step with rs2 m2; auto. + apply exec_straight_step with rs2 m2; auto. +Qed. + +Lemma exec_straight_two: + forall i1 i2 c rs1 m1 rs2 m2 rs3 m3, + exec_basic lk ge i1 rs1 m1 = Next rs2 m2 -> + exec_basic lk ge i2 rs2 m2 = Next rs3 m3 -> + exec_straight (i1 :: i2 :: c) rs1 m1 c rs3 m3. +Proof. + intros. apply exec_straight_step with rs2 m2; auto. + apply exec_straight_one; auto. +Qed. + +Lemma exec_straight_three: + forall i1 i2 i3 c rs1 m1 rs2 m2 rs3 m3 rs4 m4, + exec_basic lk ge i1 rs1 m1 = Next rs2 m2 -> + exec_basic lk ge i2 rs2 m2 = Next rs3 m3 -> + exec_basic lk ge i3 rs3 m3 = Next rs4 m4 -> + exec_straight (i1 :: i2 :: i3 :: c) rs1 m1 c rs4 m4. +Proof. + intros. apply exec_straight_step with rs2 m2; auto. + eapply exec_straight_two; eauto. +Qed. + +Inductive exec_straight_opt: list basic -> regset -> mem -> list basic -> regset -> mem -> Prop := + | exec_straight_opt_refl: forall c rs m, + exec_straight_opt c rs m c rs m + | exec_straight_opt_intro: forall c1 rs1 m1 c2 rs2 m2, + exec_straight c1 rs1 m1 c2 rs2 m2 -> + exec_straight_opt c1 rs1 m1 c2 rs2 m2. + +Remark exec_straight_opt_right: + forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2, + exec_straight_opt c1 rs1 m1 c2 rs2 m2 -> + exec_straight c2 rs2 m2 c3 rs3 m3 -> + exec_straight c1 rs1 m1 c3 rs3 m3. +Proof. + destruct 1; intros. auto. eapply exec_straight_trans; eauto. +Qed. + +Lemma exec_straight_opt_step: + forall i c rs1 m1 rs2 m2 c' rs3 m3, + exec_basic lk ge i rs1 m1 = Next rs2 m2 -> + exec_straight_opt c rs2 m2 c' rs3 m3 -> + exec_straight (i :: c) rs1 m1 c' rs3 m3. +Proof. + intros. inv H0. + - apply exec_straight_one; auto. + - eapply exec_straight_step; eauto. +Qed. + +Lemma exec_straight_opt_step_opt: + forall i c rs1 m1 rs2 m2 c' rs3 m3, + exec_basic lk ge i rs1 m1 = Next rs2 m2 -> + exec_straight_opt c rs2 m2 c' rs3 m3 -> + exec_straight_opt (i :: c) rs1 m1 c' rs3 m3. +Proof. + intros. apply exec_straight_opt_intro. eapply exec_straight_opt_step; eauto. +Qed. + +(** Like exec_straight predicate, but on blocks *) + +Inductive exec_straight_blocks: bblocks -> regset -> mem -> + bblocks -> regset -> mem -> Prop := + | exec_straight_blocks_one: + forall b1 c rs1 m1 rs2 m2, + exec_bblock lk ge fn b1 rs1 m1 E0 rs2 m2 -> + rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b1)) -> + exec_straight_blocks (b1 :: c) rs1 m1 c rs2 m2 + | exec_straight_blocks_step: + forall b c rs1 m1 rs2 m2 c' rs3 m3, + exec_bblock lk ge fn b rs1 m1 E0 rs2 m2 -> + rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b)) -> + exec_straight_blocks c rs2 m2 c' rs3 m3 -> + exec_straight_blocks (b :: c) rs1 m1 c' rs3 m3. + +Lemma exec_straight_blocks_trans: + forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3, + exec_straight_blocks c1 rs1 m1 c2 rs2 m2 -> + exec_straight_blocks c2 rs2 m2 c3 rs3 m3 -> + exec_straight_blocks c1 rs1 m1 c3 rs3 m3. +Proof. + induction 1; intros. + eapply exec_straight_blocks_step; eauto. + eapply exec_straight_blocks_step; eauto. +Qed. + +(** Linking exec_straight with exec_straight_blocks *) + +Lemma exec_straight_pc: + forall c c' rs1 m1 rs2 m2, + exec_straight c rs1 m1 c' rs2 m2 -> + rs2 PC = rs1 PC. +Proof. + induction c; intros; try (inv H; fail). + inv H. + - eapply exec_basic_instr_pc; eauto. + - rewrite (IHc c' rs3 m3 rs2 m2); auto. + erewrite exec_basic_instr_pc; eauto. +Qed. + +Lemma exec_body_pc: + forall ge l rs1 m1 rs2 m2, + exec_body lk ge l rs1 m1 = Next rs2 m2 -> + rs2 PC = rs1 PC. +Proof. + induction l. + - intros. inv H. auto. + - intros until m2. intro EXEB. + inv EXEB. destruct (exec_basic _ _ _ _ _) eqn:EBI; try discriminate. + eapply IHl in H0. rewrite H0. + destruct s. + erewrite exec_basic_instr_pc; eauto. +Qed. + +(** The following lemmas show that straight-line executions + (predicate [exec_straight_blocks]) correspond to correct Asm executions. *) + +Lemma exec_straight_steps_1: + forall c rs m c' rs' m', + exec_straight_blocks c rs m c' rs' m' -> + size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned -> + forall b ofs, + rs#PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal fn) -> + code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c -> + plus (step lk) ge (State rs m) E0 (State rs' m'). +Proof. + induction 1; intros. + apply plus_one. + econstructor; eauto. + eapply find_bblock_tail. eauto. + eapply plus_left'. + econstructor; eauto. + eapply find_bblock_tail. eauto. + apply IHexec_straight_blocks with b0 (Ptrofs.add ofs (Ptrofs.repr (size b))). + auto. rewrite H0. rewrite H3. reflexivity. + auto. + apply code_tail_next_int; auto. + traceEq. +Qed. + +Lemma exec_straight_steps_2: + forall c rs m c' rs' m', + exec_straight_blocks c rs m c' rs' m' -> + size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned -> + forall b ofs, + rs#PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some (Internal fn) -> + code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c -> + exists ofs', + rs'#PC = Vptr b ofs' + /\ code_tail (Ptrofs.unsigned ofs') (fn_blocks fn) c'. +Proof. + induction 1; intros. + exists (Ptrofs.add ofs (Ptrofs.repr (size b1))). split. + rewrite H0. rewrite H2. auto. + apply code_tail_next_int; auto. + apply IHexec_straight_blocks with (Ptrofs.add ofs (Ptrofs.repr (size b))). + auto. rewrite H0. rewrite H3. reflexivity. auto. + apply code_tail_next_int; auto. +Qed. + +End STRAIGHTLINE. + +(** * Properties of the Machblock call stack *) + +Section MATCH_STACK. + +Variable ge: MB.genv. + +Inductive match_stack: list MB.stackframe -> Prop := + | match_stack_nil: + match_stack nil + | match_stack_cons: forall fb sp ra c s f tf tc, + Genv.find_funct_ptr ge fb = Some (Internal f) -> + transl_code_at_pc ge ra fb f c false tf tc -> + sp <> Vundef -> + match_stack s -> + match_stack (Stackframe fb sp ra c :: s). + +Lemma parent_sp_def: forall s, match_stack s -> parent_sp s <> Vundef. +Proof. + induction 1; simpl. + unfold Vnullptr; destruct Archi.ptr64; congruence. + auto. +Qed. + +Lemma parent_ra_def: forall s, match_stack s -> parent_ra s <> Vundef. +Proof. + induction 1; simpl. + unfold Vnullptr; destruct Archi.ptr64; congruence. + inv H0. congruence. +Qed. + +Lemma lessdef_parent_sp: + forall s v, + match_stack s -> Val.lessdef (parent_sp s) v -> v = parent_sp s. +Proof. + intros. inv H0. auto. exploit parent_sp_def; eauto. tauto. +Qed. + +Lemma lessdef_parent_ra: + forall s v, + match_stack s -> Val.lessdef (parent_ra s) v -> v = parent_ra s. +Proof. + intros. inv H0. auto. exploit parent_ra_def; eauto. tauto. +Qed. + +End MATCH_STACK. diff --git a/aarch64/Asmblockgenproof1.v b/aarch64/Asmblockgenproof1.v new file mode 100644 index 00000000..61d77881 --- /dev/null +++ b/aarch64/Asmblockgenproof1.v @@ -0,0 +1,1955 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** * Proof of correctness for individual instructions *) + +Require Import Coqlib Errors Maps Zbits. +Require Import AST Integers Floats Values Memory Globalenvs Linking. +Require Import Op Locations Machblock Conventions Lia. +Require Import Asmblock Asmblockgen Asmblockgenproof0 Asmblockprops. + +Module MB := Machblock. +Module AB := Asmblock. + +Section CONSTRUCTORS. + +Variable lk: aarch64_linker. +Variable ge: genv. +Variable fn: function. + +Hypothesis symbol_high_low: forall (id: ident) (ofs: ptrofs), + Val.addl (symbol_high lk id ofs) (symbol_low lk id ofs) = Genv.symbol_address ge id ofs. + +Ltac Simplif := + ((rewrite Pregmap.gss) + || (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen. + +Ltac Simpl := repeat Simplif. + +Ltac ArgsInv := + repeat (match goal with + | [ H: Error _ = OK _ |- _ ] => discriminate + | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args + | [ H: bind _ _ = OK _ |- _ ] => monadInv H + | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv + | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv + end); + subst; + repeat (match goal with + | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in * + | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in * + end). + +Ltac SimplEval H := + match type of H with + | Some _ = None _ => discriminate + | Some _ = Some _ => inversion H; subst + | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity) +end. + +Ltac TranslOpSimpl := + econstructor; split; + [ apply exec_straight_one; reflexivity + | split; [ apply Val.lessdef_same; simpl; Simpl; fail | intros; simpl; Simpl; fail ] ]. + +Ltac TranslOpSimplN := + econstructor; split; + try apply exec_straight_one; try reflexivity; try split; try apply Val.lessdef_same; + Simpl; simpl; try destruct negb; Simpl; try intros; Simpl; simpl; try destruct negb; Simpl. + +Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC. +Proof. + destruct r; simpl; try discriminate. +Qed. +Hint Resolve preg_of_iregsp_not_PC: asmgen. + +Lemma preg_of_not_X16: forall r, preg_of r <> X16. +Proof. + destruct r; simpl; try discriminate. +Qed. + +Lemma preg_of_not_X30: forall r, preg_of r <> X30. +Proof. + destruct r; simpl; try discriminate. +Qed. + +Lemma ireg_of_not_X16: forall r x, ireg_of r = OK x -> x <> X16. +Proof. + unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H. + red; intros; subst x. elim (preg_of_not_X16 r); auto. + destruct d. destruct i. inv H1; auto. + all: discriminate. +Qed. + +Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16. +Proof. + intros. apply ireg_of_not_X16 in H. congruence. +Qed. + +Lemma ireg_of_not_X16'': forall r x, ireg_of r = OK x -> DR (IR x) <> DR (IR X16). +Proof. + intros. apply ireg_of_not_X16 in H. congruence. +Qed. + +Lemma ireg_of_not_X30: forall r x, ireg_of r = OK x -> x <> X30. +Proof. + unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H. + red; intros; subst x. elim (preg_of_not_X30 r); auto. + destruct d. destruct i. inv H1; auto. + all: discriminate. +Qed. + +Lemma ireg_of_not_X30': forall r x, ireg_of r = OK x -> IR x <> IR X30. +Proof. + intros. apply ireg_of_not_X30 in H. congruence. +Qed. + +Lemma ireg_of_not_X30'': forall r x, ireg_of r = OK x -> DR (IR x) <> DR (IR X30). +Proof. + intros. apply ireg_of_not_X30 in H. congruence. +Qed. + +Hint Resolve preg_of_not_X16 ireg_of_not_X16 ireg_of_not_X16' ireg_of_not_X16'': asmgen. +Hint Resolve preg_of_not_X30 ireg_of_not_X30 ireg_of_not_X30' ireg_of_not_X30'': asmgen. + +Inductive wf_decomposition: list (Z * Z) -> Prop := + | wf_decomp_nil: + wf_decomposition nil + | wf_decomp_cons: forall m n p l, + n = Zzero_ext 16 m -> 0 <= p -> wf_decomposition l -> + wf_decomposition ((n, p) :: l). + +Lemma decompose_int_wf: + forall N n p, 0 <= p -> wf_decomposition (decompose_int N n p). +Proof. +Local Opaque Zzero_ext. + induction N as [ | N]; simpl; intros. + - constructor. + - set (frag := Zzero_ext 16 (Z.shiftr n p)) in *. destruct (Z.eqb frag 0). + + apply IHN. lia. + + econstructor. reflexivity. lia. apply IHN; lia. +Qed. + +Fixpoint recompose_int (accu: Z) (l: list (Z * Z)) : Z := + match l with + | nil => accu + | (n, p) :: l => recompose_int (Zinsert accu n p 16) l + end. + +Lemma decompose_int_correct: + forall N n p accu, + 0 <= p -> + (forall i, p <= i -> Z.testbit accu i = false) -> + (forall i, 0 <= i < p + Z.of_nat N * 16 -> + Z.testbit (recompose_int accu (decompose_int N n p)) i = + if zlt i p then Z.testbit accu i else Z.testbit n i). +Proof. + induction N as [ | N]; intros until accu; intros PPOS ABOVE i RANGE. + - simpl. rewrite zlt_true; auto. lia. + - rewrite inj_S in RANGE. simpl. + set (frag := Zzero_ext 16 (Z.shiftr n p)). + assert (FRAG: forall i, p <= i < p + 16 -> Z.testbit n i = Z.testbit frag (i - p)). + { unfold frag; intros. rewrite Zzero_ext_spec by lia. rewrite zlt_true by lia. + rewrite Z.shiftr_spec by lia. f_equal; lia. } + destruct (Z.eqb_spec frag 0). + + rewrite IHN. + * destruct (zlt i p). rewrite zlt_true by lia. auto. + destruct (zlt i (p + 16)); auto. + rewrite ABOVE by lia. rewrite FRAG by lia. rewrite e, Z.testbit_0_l. auto. + * lia. + * intros; apply ABOVE; lia. + * lia. + + simpl. rewrite IHN. + * destruct (zlt i (p + 16)). + ** rewrite Zinsert_spec by lia. unfold proj_sumbool. + rewrite zlt_true by lia. + destruct (zlt i p). + rewrite zle_false by lia. auto. + rewrite zle_true by lia. simpl. symmetry; apply FRAG; lia. + ** rewrite Z.ldiff_spec, Z.shiftl_spec by lia. + change 65535 with (two_p 16 - 1). rewrite Ztestbit_two_p_m1 by lia. + rewrite zlt_false by lia. rewrite zlt_false by lia. apply andb_true_r. + * lia. + * intros. rewrite Zinsert_spec by lia. unfold proj_sumbool. + rewrite zle_true by lia. rewrite zlt_false by lia. simpl. + apply ABOVE. lia. + * lia. +Qed. + +Corollary decompose_int_eqmod: forall N n, + eqmod (two_power_nat (N * 16)%nat) (recompose_int 0 (decompose_int N n 0)) n. +Proof. + intros; apply eqmod_same_bits; intros. + rewrite decompose_int_correct. apply zlt_false; lia. + lia. intros; apply Z.testbit_0_l. lia. +Qed. + +Corollary decompose_notint_eqmod: forall N n, + eqmod (two_power_nat (N * 16)%nat) + (Z.lnot (recompose_int 0 (decompose_int N (Z.lnot n) 0))) n. +Proof. + intros; apply eqmod_same_bits; intros. + rewrite Z.lnot_spec, decompose_int_correct. + rewrite zlt_false by lia. rewrite Z.lnot_spec by lia. apply negb_involutive. + lia. intros; apply Z.testbit_0_l. lia. lia. +Qed. + +Lemma negate_decomposition_wf: + forall l, wf_decomposition l -> wf_decomposition (negate_decomposition l). +Proof. + induction 1; simpl; econstructor; auto. + instantiate (1 := (Z.lnot m)). + apply equal_same_bits; intros. + rewrite H. change 65535 with (two_p 16 - 1). + rewrite Z.lxor_spec, !Zzero_ext_spec, Z.lnot_spec, Ztestbit_two_p_m1 by lia. + destruct (zlt i 16). + apply xorb_true_r. + auto. +Qed. + +Lemma Zinsert_eqmod: + forall n x1 x2 y p l, 0 <= p -> 0 <= l -> + eqmod (two_power_nat n) x1 x2 -> + eqmod (two_power_nat n) (Zinsert x1 y p l) (Zinsert x2 y p l). +Proof. + intros. apply eqmod_same_bits; intros. rewrite ! Zinsert_spec by lia. + destruct (zle p i && zlt i (p + l)); auto. + apply same_bits_eqmod with n; auto. +Qed. + +Lemma Zinsert_0_l: + forall y p l, + 0 <= p -> 0 <= l -> + Z.shiftl (Zzero_ext l y) p = Zinsert 0 (Zzero_ext l y) p l. +Proof. + intros. apply equal_same_bits; intros. + rewrite Zinsert_spec by lia. unfold proj_sumbool. + destruct (zlt i p); [rewrite zle_false by lia|rewrite zle_true by lia]; simpl. + - rewrite Z.testbit_0_l, Z.shiftl_spec_low by auto. auto. + - rewrite Z.shiftl_spec by lia. + destruct (zlt i (p + l)); auto. + rewrite Zzero_ext_spec, zlt_false, Z.testbit_0_l by lia. auto. +Qed. + +Lemma recompose_int_negated: + forall l, wf_decomposition l -> + forall accu, recompose_int (Z.lnot accu) (negate_decomposition l) = Z.lnot (recompose_int accu l). +Proof. + induction 1; intros accu; simpl. + - auto. + - rewrite <- IHwf_decomposition. f_equal. apply equal_same_bits; intros. + rewrite Z.lnot_spec, ! Zinsert_spec, Z.lxor_spec, Z.lnot_spec by lia. + unfold proj_sumbool. + destruct (zle p i); simpl; auto. + destruct (zlt i (p + 16)); simpl; auto. + change 65535 with (two_p 16 - 1). + rewrite Ztestbit_two_p_m1 by lia. rewrite zlt_true by lia. + apply xorb_true_r. +Qed. + +Lemma exec_loadimm_k_w: + forall (rd: ireg) k m l, + wf_decomposition l -> + forall (rs: regset) accu, + rs#rd = Vint (Int.repr accu) -> + exists rs', + exec_straight_opt ge lk (loadimm_k W rd l k) rs m k rs' m + /\ rs'#rd = Vint (Int.repr (recompose_int accu l)) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + induction 1; intros rs accu ACCU; simpl. + - exists rs; split. apply exec_straight_opt_refl. auto. + - destruct (IHwf_decomposition + ((rs#rd <- (insert_in_int rs#rd n p 16))) + (Zinsert accu n p 16)) + as (rs' & P & Q & R). + Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr. + apply Zinsert_eqmod. auto. lia. apply Int.eqm_sym; apply Int.eqm_unsigned_repr. + exists rs'; split. + eapply exec_straight_opt_step_opt. simpl. eauto. auto. + split. exact Q. intros; Simpl. rewrite R by auto. Simpl. +Qed. + +Lemma exec_loadimm_z_w: + forall rd l k rs m, + wf_decomposition l -> + exists rs', + exec_straight ge lk (loadimm_z W rd l k) rs m k rs' m + /\ rs'#rd = Vint (Int.repr (recompose_int 0 l)) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm_z; destruct 1. + - econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. + - set (accu0 := Zinsert 0 n p 16). + set (rs1 := rs#rd <- (Vint (Int.repr accu0))). + destruct (exec_loadimm_k_w rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto. + unfold rs1; Simpl. + exists rs2; split. + eapply exec_straight_opt_step; eauto. + simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia. + split. exact Q. + intros. rewrite R by auto. unfold rs1; Simpl. +Qed. + +Lemma exec_loadimm_n_w: + forall rd l k rs m, + wf_decomposition l -> + exists rs', + exec_straight ge lk (loadimm_n W rd l k) rs m k rs' m + /\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l))) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm_n; destruct 1. + - econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. + - set (accu0 := Z.lnot (Zinsert 0 n p 16)). + set (rs1 := rs#rd <- (Vint (Int.repr accu0))). + destruct (exec_loadimm_k_w rd k m (negate_decomposition l) + (negate_decomposition_wf l H1) + rs1 accu0) as (rs2 & P & Q & R). + unfold rs1; Simpl. + exists rs2; split. + eapply exec_straight_opt_step; eauto. + simpl. unfold rs1. do 5 f_equal. + unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia. + split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q. + intros. rewrite R by auto. unfold rs1; Simpl. +Qed. + +Lemma exec_loadimm32: + forall rd n k rs m, + exists rs', + exec_straight ge lk (loadimm32 rd n k) rs m k rs' m + /\ rs'#rd = Vint n + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm32, loadimm; intros. + destruct (is_logical_imm32 n). + - econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. rewrite Int.repr_unsigned, Int.or_zero_l; auto. + intros; Simpl. + - set (dz := decompose_int 2%nat (Int.unsigned n) 0). + set (dn := decompose_int 2%nat (Z.lnot (Int.unsigned n)) 0). + assert (A: Int.repr (recompose_int 0 dz) = n). + { transitivity (Int.repr (Int.unsigned n)). + apply Int.eqm_samerepr. apply decompose_int_eqmod. + apply Int.repr_unsigned. } + assert (B: Int.repr (Z.lnot (recompose_int 0 dn)) = n). + { transitivity (Int.repr (Int.unsigned n)). + apply Int.eqm_samerepr. apply decompose_notint_eqmod. + apply Int.repr_unsigned. } + destruct Nat.leb. + + rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; lia. + + rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; lia. +Qed. + +Lemma exec_loadimm_k_x: + forall (rd: ireg) k m l, + wf_decomposition l -> + forall (rs: regset) accu, + rs#rd = Vlong (Int64.repr accu) -> + exists rs', + exec_straight_opt ge lk (loadimm_k X rd l k) rs m k rs' m + /\ rs'#rd = Vlong (Int64.repr (recompose_int accu l)) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + induction 1; intros rs accu ACCU; simpl. + - exists rs; split. apply exec_straight_opt_refl. auto. + - destruct (IHwf_decomposition + (rs#rd <- (insert_in_long rs#rd n p 16)) + (Zinsert accu n p 16)) + as (rs' & P & Q & R). + Simpl. rewrite ACCU. simpl. f_equal. apply Int64.eqm_samerepr. + apply Zinsert_eqmod. auto. lia. apply Int64.eqm_sym; apply Int64.eqm_unsigned_repr. + exists rs'; split. + eapply exec_straight_opt_step_opt. simpl; eauto. auto. + split. exact Q. intros; Simpl. rewrite R by auto. Simpl. +Qed. + +Lemma exec_loadimm_z_x: + forall rd l k rs m, + wf_decomposition l -> + exists rs', + exec_straight ge lk (loadimm_z X rd l k) rs m k rs' m + /\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l)) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm_z; destruct 1. + - econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. + - set (accu0 := Zinsert 0 n p 16). + set (rs1 := rs#rd <- (Vlong (Int64.repr accu0))). + destruct (exec_loadimm_k_x rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto. + unfold rs1; Simpl. + exists rs2; split. + eapply exec_straight_opt_step; eauto. + simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia. + split. exact Q. + intros. rewrite R by auto. unfold rs1; Simpl. +Qed. + +Lemma exec_loadimm_n_x: + forall rd l k rs m, + wf_decomposition l -> + exists rs', + exec_straight ge lk (loadimm_n X rd l k) rs m k rs' m + /\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l))) + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm_n; destruct 1. + - econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. + intros; Simpl. + - set (accu0 := Z.lnot (Zinsert 0 n p 16)). + set (rs1 := rs#rd <- (Vlong (Int64.repr accu0))). + destruct (exec_loadimm_k_x rd k m (negate_decomposition l) + (negate_decomposition_wf l H1) + rs1 accu0) as (rs2 & P & Q & R). + unfold rs1; Simpl. + exists rs2; split. + eapply exec_straight_opt_step; eauto. + simpl. unfold rs1. do 5 f_equal. + unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia. + split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q. + intros. rewrite R by auto. unfold rs1; Simpl. +Qed. + +Lemma exec_loadimm64: + forall rd n k rs m, + exists rs', + exec_straight ge lk (loadimm64 rd n k) rs m k rs' m + /\ rs'#rd = Vlong n + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadimm64, loadimm; intros. + destruct (is_logical_imm64 n). + - econstructor; split. + apply exec_straight_one. simpl; eauto. auto. + split. Simpl. rewrite Int64.repr_unsigned, Int64.or_zero_l; auto. + intros; Simpl. + - set (dz := decompose_int 4%nat (Int64.unsigned n) 0). + set (dn := decompose_int 4%nat (Z.lnot (Int64.unsigned n)) 0). + assert (A: Int64.repr (recompose_int 0 dz) = n). + { transitivity (Int64.repr (Int64.unsigned n)). + apply Int64.eqm_samerepr. apply decompose_int_eqmod. + apply Int64.repr_unsigned. } + assert (B: Int64.repr (Z.lnot (recompose_int 0 dn)) = n). + { transitivity (Int64.repr (Int64.unsigned n)). + apply Int64.eqm_samerepr. apply decompose_notint_eqmod. + apply Int64.repr_unsigned. } + destruct Nat.leb. + + rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; lia. + + rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; lia. +Qed. + +(** Add immediate *) + +Lemma exec_addimm_aux_32: + forall (insn: Z -> arith_pp) (sem: val -> val -> val), + (forall rd r1 n rs m, + exec_basic lk ge (PArith (PArithPP (insn n) rd r1)) rs m = + Next (rs#rd <- (sem rs#r1 (Vint (Int.repr n)))) m) -> + (forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) -> + forall rd r1 n k rs m, + exists rs', + exec_straight ge lk (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Vint n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros insn sem SEM ASSOC; intros. unfold addimm_aux. + set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo). + assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; lia). + rewrite <- (Int.repr_unsigned n). + destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)]. + - econstructor; split. apply exec_straight_one. apply SEM. Simpl. + split. Simpl. do 3 f_equal; lia. + intros; Simpl. + - econstructor; split. apply exec_straight_one. apply SEM. Simpl. + split. Simpl. do 3 f_equal; lia. + intros; Simpl. + - econstructor; split. eapply exec_straight_two. + apply SEM. apply SEM. simpl. Simpl. + split. Simpl. simpl. rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr. + rewrite E. auto with ints. + intros; Simpl. +Qed. + +Lemma exec_addimm32: + forall (rd r1: ireg) n k rs m, + r1 <> X16 -> + exists rs', + exec_straight ge lk (addimm32 rd r1 n k) rs m k rs' m + /\ rs'#rd = Val.add rs#r1 (Vint n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros. unfold addimm32. set (nn := Int.neg n). + destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))]. + - apply exec_addimm_aux_32 with (sem := Val.add). auto. intros; apply Val.add_assoc. + - rewrite <- Val.sub_opp_add. + apply exec_addimm_aux_32 with (sem := Val.sub). auto. + intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto. + - destruct (Int.lt n Int.zero). + + rewrite <- Val.sub_opp_add; fold nn. + edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto. + split. Simpl. rewrite B, C; eauto with asmgen. + intros; Simpl. + + edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto. + split. Simpl. rewrite B, C; eauto with asmgen. + intros; Simpl. +Qed. + +Lemma exec_addimm_aux_64: + forall (insn: Z -> arith_pp) (sem: val -> val -> val), + (forall rd r1 n rs m, + exec_basic lk ge (PArith (PArithPP (insn n) rd r1)) rs m = + Next (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n)))) m) -> + (forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) -> + forall rd r1 n k rs m, + exists rs', + exec_straight ge lk (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Vlong n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros insn sem SEM ASSOC; intros. unfold addimm_aux. + set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo). + assert (E: Int64.unsigned n = nhi + nlo) by (unfold nhi; lia). + rewrite <- (Int64.repr_unsigned n). + destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)]. + - econstructor; split. apply exec_straight_one. apply SEM. Simpl. + split. Simpl. do 3 f_equal; lia. + intros; Simpl. + - econstructor; split. apply exec_straight_one. apply SEM. Simpl. + split. Simpl. do 3 f_equal; lia. + intros; Simpl. + - econstructor; split. eapply exec_straight_two. + apply SEM. apply SEM. Simpl. Simpl. + split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr. + rewrite E. auto with ints. + intros; Simpl. +Qed. + +Lemma exec_addimm64: + forall (rd r1: iregsp) n k rs m, + r1 <> X16 -> + exists rs', + exec_straight ge lk (addimm64 rd r1 n k) rs m k rs' m + /\ rs'#rd = Val.addl rs#r1 (Vlong n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros. + unfold addimm64. set (nn := Int64.neg n). + destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))]. + - apply exec_addimm_aux_64 with (sem := Val.addl). auto. intros; apply Val.addl_assoc. + - rewrite <- Val.subl_opp_addl. + apply exec_addimm_aux_64 with (sem := Val.subl). auto. + intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto. + - destruct (Int64.lt n Int64.zero). + + rewrite <- Val.subl_opp_addl; fold nn. + edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. + split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto. + intros; Simpl. + + edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. + split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto. + intros; Simpl. +Qed. + +(** Logical immediate *) + +Lemma exec_logicalimm32: + forall (insn1: Z -> arith_rr0) + (insn2: shift_op -> arith_rr0r) + (sem: val -> val -> val), + (forall rd r1 n rs m, + exec_basic lk ge (PArith (PArithRR0 (insn1 n) rd r1)) rs m = + Next (rs#rd <- (sem rs##r1 (Vint (Int.repr n)))) m) -> + (forall rd r1 r2 s rs m, + exec_basic lk ge (PArith (PArithRR0R (insn2 s) rd r1 r2)) rs m = + Next (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s))) m) -> + forall rd r1 n k rs m, + r1 <> X16 -> + exists rs', + exec_straight ge lk (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Vint n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32. + destruct (is_logical_imm32 n). + - econstructor; split. + apply exec_straight_one. apply SEM1. + split. Simpl. rewrite Int.repr_unsigned; auto. intros; Simpl. + - edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. apply SEM2. + split. Simpl. f_equal; auto. apply C; auto with asmgen. + intros; Simpl. +Qed. + +Lemma exec_logicalimm64: + forall (insn1: Z -> arith_rr0) + (insn2: shift_op -> arith_rr0r) + (sem: val -> val -> val), + (forall rd r1 n rs m, + exec_basic lk ge (PArith (PArithRR0 (insn1 n) rd r1)) rs m = + Next (rs#rd <- (sem rs###r1 (Vlong (Int64.repr n)))) m) -> + (forall rd r1 r2 s rs m, + exec_basic lk ge (PArith (PArithRR0R (insn2 s) rd r1 r2)) rs m = + Next (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s))) m) -> + forall rd r1 n k rs m, + r1 <> X16 -> + exists rs', + exec_straight ge lk (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Vlong n) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64. + destruct (is_logical_imm64 n). + - econstructor; split. + apply exec_straight_one. apply SEM1. + split. Simpl. rewrite Int64.repr_unsigned. auto. intros; Simpl. + - edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. apply SEM2. + split. Simpl. f_equal; auto. apply C; auto with asmgen. + intros; Simpl. +Qed. + +(** Load address of symbol *) + +Lemma exec_loadsymbol: forall rd s ofs k rs m, + rd <> X16 \/ Archi.pic_code tt = false -> + exists rs', + exec_straight ge lk (loadsymbol rd s ofs k) rs m k rs' m + /\ rs'#rd = Genv.symbol_address ge s ofs + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + unfold loadsymbol; intros. destruct (Archi.pic_code tt). + - predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero. + + subst ofs. econstructor; split. + apply exec_straight_one. simpl; eauto. + split. Simpl. intros; Simpl. + + exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence. + intros (rs1 & A & B & C). + econstructor; split. + econstructor. simpl; eauto. auto. eexact A. + split. simpl in B; rewrite B. Simpl. + rewrite <- Genv.shift_symbol_address_64 by auto. + rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto. + intros. rewrite C by auto. Simpl. + - econstructor; split. + eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. + split. Simpl. + intros; Simpl. +Qed. + +(** Shifted operands *) + +Remark transl_shift_not_none: + forall s a, transl_shift s a <> SOnone. +Proof. + destruct s; intros; simpl; congruence. +Qed. + +Remark or_zero_eval_shift_op_int: + forall v s, s <> SOnone -> Val.or (Vint Int.zero) (eval_shift_op_int v s) = eval_shift_op_int v s. +Proof. + intros; destruct s; try congruence; destruct v; auto; simpl; + destruct (Int.ltu n Int.iwordsize); auto; rewrite Int.or_zero_l; auto. +Qed. + +Remark or_zero_eval_shift_op_long: + forall v s, s <> SOnone -> Val.orl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s. +Proof. + intros; destruct s; try congruence; destruct v; auto; simpl; + destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.or_zero_l; auto. +Qed. + +Remark add_zero_eval_shift_op_long: + forall v s, s <> SOnone -> Val.addl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s. +Proof. + intros; destruct s; try congruence; destruct v; auto; simpl; + destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.add_zero_l; auto. +Qed. + +Lemma transl_eval_shift: forall s v (a: amount32), + eval_shift_op_int v (transl_shift s a) = eval_shift s v a. +Proof. + intros. destruct s; simpl; auto. +Qed. + +Lemma transl_eval_shift': forall s v (a: amount32), + Val.or (Vint Int.zero) (eval_shift_op_int v (transl_shift s a)) = eval_shift s v a. +Proof. + intros. rewrite or_zero_eval_shift_op_int by (apply transl_shift_not_none). + apply transl_eval_shift. +Qed. + +Lemma transl_eval_shiftl: forall s v (a: amount64), + eval_shift_op_long v (transl_shift s a) = eval_shiftl s v a. +Proof. + intros. destruct s; simpl; auto. +Qed. + +Lemma transl_eval_shiftl': forall s v (a: amount64), + Val.orl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a. +Proof. + intros. rewrite or_zero_eval_shift_op_long by (apply transl_shift_not_none). + apply transl_eval_shiftl. +Qed. + +Lemma transl_eval_shiftl'': forall s v (a: amount64), + Val.addl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a. +Proof. + intros. rewrite add_zero_eval_shift_op_long by (apply transl_shift_not_none). + apply transl_eval_shiftl. +Qed. + +(** Zero- and Sign- extensions *) + +Lemma exec_move_extended_base: forall rd r1 ex k rs m, + exists rs', + exec_straight ge lk (move_extended_base rd r1 ex k) rs m k rs' m + /\ rs' rd = match ex with Xsgn32 => Val.longofint rs#r1 | Xuns32 => Val.longofintu rs#r1 end + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold move_extended_base; destruct ex; econstructor; + (split; [apply exec_straight_one; simpl; eauto | split; [Simpl|intros;Simpl]]). +Qed. + +Lemma exec_move_extended: forall rd r1 ex (a: amount64) k rs m, + exists rs', + exec_straight ge lk (move_extended rd r1 ex a k) rs m k rs' m + /\ rs' rd = Op.eval_extend ex rs#r1 a + /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. +Proof. + unfold move_extended; intros. predSpec Int.eq Int.eq_spec a Int.zero. + - exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C). + exists rs'; split. eexact A. split. unfold Op.eval_extend. rewrite H. rewrite B. + destruct ex, (rs r1); simpl; auto; rewrite Int64.shl'_zero; auto. + auto. + - Local Opaque Val.addl. + exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + unfold exec_basic, exec_arith_instr, arith_eval_rr0r. + change (SOlsl a) with (transl_shift Slsl a). rewrite transl_eval_shiftl''. eauto. auto. + split. Simpl. rewrite B. auto. + intros; Simpl. +Qed. + +Lemma exec_arith_extended: + forall (sem: val -> val -> val) + (insnX: extend_op -> arith_ppp) + (insnS: shift_op -> arith_rr0r), + (forall rd r1 r2 x rs m, + exec_basic lk ge (PArith (PArithPPP (insnX x) rd r1 r2)) rs m = + Next (rs#rd <- (sem rs#r1 (eval_extend rs#r2 x))) m) -> + (forall rd r1 r2 s rs m, + exec_basic lk ge (PArith (PArithRR0R (insnS s) rd r1 r2)) rs m = + Next (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s))) m) -> + forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: bcode) rs m, + r1 <> X16 -> + exists rs', + exec_straight ge lk (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m + /\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a) + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)). + - econstructor; split. + apply exec_straight_one. rewrite EX; eauto. auto. + split. Simpl. f_equal. destruct ex; auto. + intros; Simpl. + - exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + rewrite ES. eauto. auto. + split. Simpl. unfold ir0. rewrite C by eauto with asmgen. f_equal. + rewrite B. destruct ex; auto. + intros; Simpl. +Qed. + +(** Extended right shift *) + +Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m, + Val.shrx rs#r1 (Vint n) = Some v -> + r1 <> X16 -> + exists rs', + exec_straight ge lk (shrx32 rd r1 n k) rs m k rs' m + /\ rs'#rd = v + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + unfold shrx32; intros. apply Val.shrx_shr_2 in H. + destruct (Int.eq n Int.zero) eqn:E0. + - econstructor; split. apply exec_straight_one; simpl; eauto. + split. Simpl. subst v; auto. intros; Simpl. + - econstructor; split. eapply exec_straight_three. + unfold exec_basic, exec_arith_instr, arith_eval_rr0r. + rewrite or_zero_eval_shift_op_int by congruence. eauto. + simpl; eauto. + unfold exec_basic, exec_arith_instr, arith_eval_rr0r. + rewrite or_zero_eval_shift_op_int by congruence. eauto. + split. subst v; Simpl. intros; Simpl. +Qed. + +Lemma exec_shrx32_none: forall (rd r1: ireg) (n: int) k x (rs: regset) m, + Val.shrx rs#r1 (Vint n) = None -> + r1 <> X16 -> + exists rs', + exec_straight ge lk (shrx32 rd r1 n k) rs m k rs' m + /\ Val.lessdef (Val.maketotal None) (rs' x) + /\ forall r, data_preg r = true -> r <> rd -> preg_notin r (destroyed_by_op (Oshrximm n)) -> rs'#r = rs#r. +Proof. + unfold shrx32; intros. + destruct (Int.eq n Int.zero) eqn:E0. + - econstructor; split. apply exec_straight_one; simpl; eauto. + split. Simpl. auto. intros; Simpl. + - econstructor; split. eapply exec_straight_three. + unfold exec_basic, exec_arith_instr, arith_eval_rr0r. + rewrite or_zero_eval_shift_op_int by congruence. eauto. + simpl; eauto. + unfold exec_basic, exec_arith_instr, arith_eval_rr0r. + rewrite or_zero_eval_shift_op_int by congruence. eauto. + split. Simpl. intros; Simpl. +Qed. + +Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m, + Val.shrxl rs#r1 (Vint n) = Some v -> + r1 <> X16 -> + exists rs', + exec_straight ge lk (shrx64 rd r1 n k) rs m k rs' m + /\ rs'#rd = v + /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + unfold shrx64; intros. apply Val.shrxl_shrl_2 in H. + destruct (Int.eq n Int.zero) eqn:E. + - econstructor; split. apply exec_straight_one; simpl; eauto. + split. Simpl. subst v; auto. intros; Simpl. + - econstructor; split. eapply exec_straight_three. + unfold exec_basic, exec_arith_instr, arith_eval_rr0r. + rewrite or_zero_eval_shift_op_long by congruence. eauto. + simpl; eauto. + unfold exec_basic, exec_arith_instr, arith_eval_rr0r. + rewrite or_zero_eval_shift_op_long by congruence. eauto. + split. subst v; Simpl. intros; Simpl. +Qed. + +Lemma exec_shrx64_none: forall (rd r1: ireg) (n: int) k x (rs: regset) m, + Val.shrxl rs#r1 (Vint n) = None -> + r1 <> X16 -> + exists rs', + exec_straight ge lk (shrx64 rd r1 n k) rs m k rs' m + /\ Val.lessdef (Val.maketotal None) (rs' x) + /\ forall r, data_preg r = true -> r <> rd -> preg_notin r (destroyed_by_op (Oshrximm n)) -> rs'#r = rs#r. +Proof. + unfold shrx64; intros. + destruct (Int.eq n Int.zero) eqn:E. + - econstructor; split. apply exec_straight_one; simpl; eauto. + split. Simpl. auto. intros; Simpl. + - econstructor; split. eapply exec_straight_three. + unfold exec_basic, exec_arith_instr, arith_eval_rr0r. + rewrite or_zero_eval_shift_op_long by congruence. eauto. + simpl; eauto. + unfold exec_basic, exec_arith_instr, arith_eval_rr0r. + rewrite or_zero_eval_shift_op_long by congruence. eauto. + split. Simpl. intros; Simpl. +Qed. + +Ltac TranslOpBase := + econstructor; split; + [ apply exec_straight_one; [simpl; eauto ] + | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl + | intros; Simpl; fail ] ]. + +(** Condition bits *) + +Lemma compare_int_spec: forall rs v1 v2, + let rs' := compare_int rs v1 v2 in + rs'#CN = (Val.negative (Val.sub v1 v2)) + /\ rs'#CZ = (Val.mxcmpu Ceq v1 v2) + /\ rs'#CC = (Val.mxcmpu Cge v1 v2) + /\ rs'#CV = (Val.sub_overflow v1 v2). +Proof. + intros; unfold rs'; auto. +Qed. + +Lemma eval_testcond_compare_sint: forall c v1 v2 b rs, + Val.cmp_bool c v1 v2 = Some b -> + eval_testcond (cond_for_signed_cmp c) (compare_int rs v1 v2) = Some b. +Proof. + intros. generalize (compare_int_spec rs v1 v2). + set (rs' := compare_int rs v1 v2). intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. + destruct v1; try discriminate; destruct v2; try discriminate. + simpl in H; inv H. + unfold Val.mxcmpu; simpl. destruct c; simpl. + - destruct (Int.eq i i0); auto. + - destruct (Int.eq i i0); auto. + - rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto. + - rewrite Int.lt_sub_overflow, Int.not_lt. + destruct (Int.eq i i0), (Int.lt i i0); auto. + - rewrite Int.lt_sub_overflow, (Int.lt_not i). + destruct (Int.eq i i0), (Int.lt i i0); auto. + - rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto. +Qed. + +Lemma eval_testcond_compare_uint: forall c v1 v2 b rs, + Val.mxcmpu_bool c v1 v2 = Some b -> + eval_testcond (cond_for_unsigned_cmp c) (compare_int rs v1 v2) = Some b. +Proof. + intros. generalize (compare_int_spec rs v1 v2). + set (rs' := compare_int rs v1 v2). intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. + destruct v1; try discriminate; destruct v2; try discriminate. + simpl in H; inv H. + unfold Val.mxcmpu; simpl. destruct c; simpl. + - destruct (Int.eq i i0); auto. + - destruct (Int.eq i i0); auto. + - destruct (Int.ltu i i0); auto. + - rewrite (Int.not_ltu i). destruct (Int.eq i i0), (Int.ltu i i0); auto. + - rewrite (Int.ltu_not i). destruct (Int.eq i i0), (Int.ltu i i0); auto. + - destruct (Int.ltu i i0); auto. +Qed. + +Lemma compare_long_spec: forall rs v1 v2, + let rs' := compare_long rs v1 v2 in + rs'#CN = (Val.negativel (Val.subl v1 v2)) + /\ rs'#CZ = (Val.mxcmplu Ceq v1 v2) + /\ rs'#CC = (Val.mxcmplu Cge v1 v2) + /\ rs'#CV = (Val.subl_overflow v1 v2). +Proof. + intros; unfold rs'; auto. +Qed. + +Remark int64_sub_overflow: + forall x y, + Int.xor (Int.repr (Int64.unsigned (Int64.sub_overflow x y Int64.zero))) + (Int.repr (Int64.unsigned (Int64.negative (Int64.sub x y)))) = + (if Int64.lt x y then Int.one else Int.zero). +Proof. + intros. + transitivity (Int.repr (Int64.unsigned (if Int64.lt x y then Int64.one else Int64.zero))). + rewrite <- (Int64.lt_sub_overflow x y). + unfold Int64.sub_overflow, Int64.negative. + set (s := Int64.signed x - Int64.signed y - Int64.signed Int64.zero). + destruct (zle Int64.min_signed s && zle s Int64.max_signed); + destruct (Int64.lt (Int64.sub x y) Int64.zero); + auto. + destruct (Int64.lt x y); auto. +Qed. + +Lemma eval_testcond_compare_slong: forall c v1 v2 b rs, + Val.cmpl_bool c v1 v2 = Some b -> + eval_testcond (cond_for_signed_cmp c) (compare_long rs v1 v2) = Some b. +Proof. + intros. generalize (compare_long_spec rs v1 v2). + set (rs' := compare_long rs v1 v2). intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. + destruct v1; try discriminate; destruct v2; try discriminate. + simpl in H; inv H. + unfold Val.mxcmplu; simpl. destruct c; simpl. + - destruct (Int64.eq i i0); auto. + - destruct (Int64.eq i i0); auto. + - rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto. + - rewrite int64_sub_overflow, Int64.not_lt. + destruct (Int64.eq i i0), (Int64.lt i i0); auto. + - rewrite int64_sub_overflow, (Int64.lt_not i). + destruct (Int64.eq i i0), (Int64.lt i i0); auto. + - rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto. +Qed. + +Lemma eval_testcond_compare_ulong: forall c v1 v2 b rs, + Val.mxcmplu_bool c v1 v2 = Some b -> + eval_testcond (cond_for_unsigned_cmp c) (compare_long rs v1 v2) = Some b. +Proof. + intros. generalize (compare_long_spec rs v1 v2). + set (rs' := compare_long rs v1 v2). intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E; unfold Val.mxcmplu. + destruct v1; try discriminate; destruct v2; try discriminate; simpl in H. + - (* int-int *) + inv H. destruct c; simpl. + + destruct (Int64.eq i i0); auto. + + destruct (Int64.eq i i0); auto. + + destruct (Int64.ltu i i0); auto. + + rewrite (Int64.not_ltu i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto. + + rewrite (Int64.ltu_not i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto. + + destruct (Int64.ltu i i0); auto. + - (* int-ptr *) + simpl. + destruct (Archi.ptr64); simpl; try discriminate. + destruct (Int64.eq i Int64.zero); simpl; try discriminate. + destruct c; simpl in H; inv H; reflexivity. + - (* ptr-int *) + simpl. + destruct (Archi.ptr64); simpl; try discriminate. + destruct (Int64.eq i0 Int64.zero); try discriminate. + destruct c; simpl in H; inv H; reflexivity. + - (* ptr-ptr *) + simpl. + destruct (eq_block b0 b1). + destruct (Archi.ptr64); simpl; try discriminate. + inv H. + destruct c; simpl. + * destruct (Ptrofs.eq i i0); auto. + * destruct (Ptrofs.eq i i0); auto. + * destruct (Ptrofs.ltu i i0); auto. + * rewrite (Ptrofs.not_ltu i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto. + * rewrite (Ptrofs.ltu_not i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto. + * destruct (Ptrofs.ltu i i0); auto. + * destruct c; simpl in H; inv H; reflexivity. +Qed. + +Lemma compare_float_spec: forall rs f1 f2, + let rs' := compare_float rs (Vfloat f1) (Vfloat f2) in + rs'#CN = (Val.of_bool (Float.cmp Clt f1 f2)) + /\ rs'#CZ = (Val.of_bool (Float.cmp Ceq f1 f2)) + /\ rs'#CC = (Val.of_bool (negb (Float.cmp Clt f1 f2))) + /\ rs'#CV = (Val.of_bool (negb (Float.ordered f1 f2))). +Proof. + intros; auto. +Qed. + +Lemma eval_testcond_compare_float: forall c v1 v2 b rs, + Val.cmpf_bool c v1 v2 = Some b -> + eval_testcond (cond_for_float_cmp c) (compare_float rs v1 v2) = Some b. +Proof. + intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. + generalize (compare_float_spec rs f f0). + set (rs' := compare_float rs (Vfloat f) (Vfloat f0)). + intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. +Local Transparent Float.cmp Float.ordered. + unfold Float.cmp, Float.ordered; + destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity. +Qed. + +Lemma eval_testcond_compare_not_float: forall c v1 v2 b rs, + option_map negb (Val.cmpf_bool c v1 v2) = Some b -> + eval_testcond (cond_for_float_not_cmp c) (compare_float rs v1 v2) = Some b. +Proof. + intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. + generalize (compare_float_spec rs f f0). + set (rs' := compare_float rs (Vfloat f) (Vfloat f0)). + intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. +Local Transparent Float.cmp Float.ordered. + unfold Float.cmp, Float.ordered; + destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity. +Qed. + +Lemma compare_single_spec: forall rs f1 f2, + let rs' := compare_single rs (Vsingle f1) (Vsingle f2) in + rs'#CN = (Val.of_bool (Float32.cmp Clt f1 f2)) + /\ rs'#CZ = (Val.of_bool (Float32.cmp Ceq f1 f2)) + /\ rs'#CC = (Val.of_bool (negb (Float32.cmp Clt f1 f2))) + /\ rs'#CV = (Val.of_bool (negb (Float32.ordered f1 f2))). +Proof. + intros; auto. +Qed. + +Lemma eval_testcond_compare_single: forall c v1 v2 b rs, + Val.cmpfs_bool c v1 v2 = Some b -> + eval_testcond (cond_for_float_cmp c) (compare_single rs v1 v2) = Some b. +Proof. + intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. + generalize (compare_single_spec rs f f0). + set (rs' := compare_single rs (Vsingle f) (Vsingle f0)). + intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. +Local Transparent Float32.cmp Float32.ordered. + unfold Float32.cmp, Float32.ordered; + destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity. +Qed. + +Lemma eval_testcond_compare_not_single: forall c v1 v2 b rs, + option_map negb (Val.cmpfs_bool c v1 v2) = Some b -> + eval_testcond (cond_for_float_not_cmp c) (compare_single rs v1 v2) = Some b. +Proof. + intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H. + generalize (compare_single_spec rs f f0). + set (rs' := compare_single rs (Vsingle f) (Vsingle f0)). + intros (B & C & D & E). + unfold eval_testcond; rewrite B, C, D, E. +Local Transparent Float32.cmp Float32.ordered. + unfold Float32.cmp, Float32.ordered; + destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity. +Qed. + +Remark compare_float_inv: forall rs v1 v2 r, + match r with CR _ => False | _ => True end -> + (compare_float rs v1 v2)#r = rs#r. +Proof. + intros; unfold compare_float. + destruct r; try contradiction; destruct v1; auto; destruct v2; auto. +Qed. + +Remark compare_single_inv: forall rs v1 v2 r, + match r with CR _ => False | _ => True end -> + (compare_single rs v1 v2)#r = rs#r. +Proof. + intros; unfold compare_single. + destruct r; try contradiction; destruct v1; auto; destruct v2; auto. +Qed. + +Lemma transl_cond_correct: + forall cond args k c rs m, + transl_cond cond args k = OK c -> + exists rs', + exec_straight ge lk c rs m k rs' m + /\ (forall b, + eval_condition cond (map rs (map preg_of args)) m = Some b -> + eval_testcond (cond_for_cond cond) rs' = Some b) + /\ forall r, data_preg r = true -> rs'#r = rs#r. +Proof. + intros until m; intros TR. destruct cond; simpl in TR; ArgsInv. + - (* Ccomp *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_sint; auto. + destruct r; reflexivity || discriminate. + - (* Ccompu *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. apply eval_testcond_compare_uint; auto. + destruct r; reflexivity || discriminate. + - (* Ccompimm *) + destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))]. + + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. + destruct r; reflexivity || discriminate. + + econstructor; split. + apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto. + split; intros. apply eval_testcond_compare_sint; auto. + destruct r; reflexivity || discriminate. + + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + simpl. rewrite B, C by eauto with asmgen. eauto. + split; intros. apply eval_testcond_compare_sint; auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + - (* Ccompuimm *) + destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))]. + + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. + destruct r; reflexivity || discriminate. + + econstructor; split. + apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto. + split; intros. apply eval_testcond_compare_uint; auto. + destruct r; reflexivity || discriminate. + + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply eval_testcond_compare_uint; auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + - (* Ccompshift *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. + destruct r; reflexivity || discriminate. + - (* Ccompushift *) + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. + destruct r; reflexivity || discriminate. + - (* Cmaskzero *) + destruct (is_logical_imm32 n). + + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto. + destruct r; reflexivity || discriminate. + + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply (eval_testcond_compare_sint Ceq); auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + - (* Cmasknotzero *) + destruct (is_logical_imm32 n). + + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto. + destruct r; reflexivity || discriminate. + + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply (eval_testcond_compare_sint Cne); auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + - (* Ccompl *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_slong; auto. + destruct r; reflexivity || discriminate. + - (* Ccomplu *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_ulong; auto. + erewrite Val.mxcmplu_bool_correct; eauto. + destruct r; reflexivity || discriminate. + - (* Ccomplimm *) + destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))]. + + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. + destruct r; reflexivity || discriminate. + + econstructor; split. + apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. + split; intros. apply eval_testcond_compare_slong; auto. + destruct r; reflexivity || discriminate. + + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + simpl. rewrite B, C by eauto with asmgen. eauto. auto. + split; intros. apply eval_testcond_compare_slong; auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + - (* Ccompluimm *) + destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))]. + + econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto. + erewrite Val.mxcmplu_bool_correct; eauto. + destruct r; reflexivity || discriminate. + + econstructor; split. + apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. + split; intros. apply eval_testcond_compare_ulong; auto. + erewrite Val.mxcmplu_bool_correct; eauto. + destruct r; reflexivity || discriminate. + + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. + simpl. rewrite B, C by eauto with asmgen. eauto. + split; intros. apply eval_testcond_compare_ulong; auto. + erewrite Val.mxcmplu_bool_correct; eauto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + - (* Ccomplshift *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. + destruct r; reflexivity || discriminate. + - (* Ccomplushift *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto. + erewrite Val.mxcmplu_bool_correct; eauto. + destruct r; reflexivity || discriminate. + - (* Cmasklzero *) + destruct (is_logical_imm64 n). + + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto. + destruct r; reflexivity || discriminate. + + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. + split; intros. apply (eval_testcond_compare_slong Ceq); auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + - (* Cmasknotzero *) + destruct (is_logical_imm64 n). + + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto. + destruct r; reflexivity || discriminate. + + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. + apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. + split; intros. apply (eval_testcond_compare_slong Cne); auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + - (* Ccompf *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_float; auto. + destruct r; discriminate || rewrite compare_float_inv; auto. + - (* Cnotcompf *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_not_float; auto. + destruct r; discriminate || rewrite compare_float_inv; auto. + - (* Ccompfzero *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_float; auto. + destruct r; discriminate || rewrite compare_float_inv; auto. + - (* Cnotcompfzero *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_not_float; auto. + destruct r; discriminate || rewrite compare_float_inv; auto. + - (* Ccompfs *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_single; auto. + destruct r; discriminate || rewrite compare_single_inv; auto. + - (* Cnotcompfs *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_not_single; auto. + destruct r; discriminate || rewrite compare_single_inv; auto. + - (* Ccompfszero *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_single; auto. + destruct r; discriminate || rewrite compare_single_inv; auto. + - (* Cnotcompfszero *) + econstructor; split. apply exec_straight_one. simpl; eauto. + split; intros. apply eval_testcond_compare_not_single; auto. + destruct r; discriminate || rewrite compare_single_inv; auto. +Qed. + +Lemma transl_cond_correct': + forall cond args k c tbb rs m, + transl_cond cond args k = OK c -> + exists rs', + exec_straight ge lk c rs m k rs' m + /\ (forall b, + eval_condition cond (map rs (map preg_of args)) m = Some b -> + eval_testcond (cond_for_cond cond) (incrPC (Ptrofs.repr (size tbb)) rs') = Some b) + /\ forall r, data_preg r = true -> rs'#r = rs#r. +Proof. + intros until m; intros TR. + eapply transl_cond_correct; eauto. +Qed. + +Lemma transl_cbranch_correct_1: + forall cond args lbl c k b m rs tbb bdy, + transl_cond_branch cond args lbl k = OK (bdy,c) -> + eval_condition cond (map rs (map preg_of args)) m = Some b -> + exists rs', + exec_straight_opt ge lk bdy rs m k rs' m + /\ exec_cfi ge fn c (incrPC (Ptrofs.repr (size tbb)) rs') m = + (if b then goto_label fn lbl (incrPC (Ptrofs.repr (size tbb)) rs') m else + Next (incrPC (Ptrofs.repr (size tbb)) rs') m) + /\ forall r, data_preg r = true -> rs'#r = rs#r. +Proof. +intros until bdy; intros TR EV. + assert (Archi.ptr64 = true) as SF; auto. + assert (DFL: + transl_cond_branch_default cond args lbl k = OK (bdy,c) -> + exists rs', + exec_straight_opt ge lk bdy rs m k rs' m + /\ exec_cfi ge fn c (incrPC (Ptrofs.repr (size tbb)) rs') m = eval_branch fn lbl (incrPC (Ptrofs.repr (size tbb)) rs') m (Some b) + /\ forall r, data_preg r = true -> rs'#r = rs#r). + { + unfold transl_cond_branch_default; intros. monadInv H. + exploit transl_cond_correct'; eauto. intros (rs' & A & B & C). + eexists; split. + apply exec_straight_opt_intro. eexact A. + split; auto. simpl. rewrite (B b) by auto. auto. + } +Local Opaque transl_cond transl_cond_branch_default. + destruct args as [ | a1 args]; simpl in TR; auto. + destruct args as [ | a2 args]; simpl in TR; auto. + destruct cond; simpl in TR; auto. + - (* Ccompimm *) + destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto; + apply Int.same_if_eq in N0; subst n; ArgsInv. + + (* Ccompimm Cne 0 *) + econstructor; split. econstructor. + split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. + unfold incrPC. Simpl. rewrite EQRSX. simpl. auto. + + (* Ccompimm Ceq 0 *) + econstructor; split. econstructor. + split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. simpl. + unfold incrPC. Simpl. rewrite EQRSX. simpl. + destruct (Int.eq i Int.zero); auto. + - (* Ccompuimm *) + destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto. + apply Int.same_if_eq in N0; subst n; ArgsInv. + + (* Ccompuimm Cne 0 *) + econstructor; split. econstructor. + split; auto. simpl. apply Val.mxcmpu_bool_correct in EV. + unfold incrPC. Simpl. rewrite EV. auto. + + (* Ccompuimm Ceq 0 *) + monadInv TR. ArgsInv. simpl in *. + econstructor; split. econstructor. + split; auto. simpl. unfold incrPC. Simpl. + apply Int.same_if_eq in N0; subst. + rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Cne), EV. + destruct b; auto. + - (* Cmaskzero *) + destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv. + econstructor; split. econstructor. + split; auto. simpl. + erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto. + unfold incrPC. Simpl. + rewrite (Val.negate_cmp_bool Ceq), EV. destruct b; auto. + - (* Cmasknotzero *) + destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv. + econstructor; split. econstructor. + split; auto. simpl. + erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto. + unfold incrPC. Simpl. + rewrite EV. auto. + - (* Ccomplimm *) + destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto; + apply Int64.same_if_eq in N0; subst n; ArgsInv. + + (* Ccomplimm Cne 0 *) + econstructor; split. econstructor. + split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. + unfold incrPC. Simpl. rewrite EQRSX. simpl. auto. + + (* Ccomplimm Ceq 0 *) + econstructor; split. econstructor. + split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. simpl. + unfold incrPC. Simpl. rewrite EQRSX. simpl. + destruct (Int64.eq i Int64.zero); auto. + - (* Ccompluimm *) + destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto; + apply Int64.same_if_eq in N0; subst n; ArgsInv. + + (* Ccompluimm Cne 0 *) + econstructor; split. econstructor. + split; auto. simpl. apply Val.mxcmplu_bool_correct in EV. + unfold incrPC. Simpl. rewrite EV. auto. + + (* Ccompluimm Ceq 0 *) + econstructor; split. econstructor. + split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. simpl. + unfold incrPC. Simpl. rewrite EQRSX. simpl. + destruct (Int64.eq i Int64.zero); auto. + unfold incrPC. Simpl. rewrite EQRSX. simpl. + rewrite SF in *; simpl in *. + rewrite Int64.eq_true in *. + destruct ((Mem.valid_pointer m b0 (Ptrofs.unsigned i) || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1))); simpl in *. + assert (b = true). { destruct b; try congruence. } + rewrite H; auto. discriminate. + - (* Cmasklzero *) + destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv. + econstructor; split. econstructor. + split; auto. + erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto. + unfold incrPC. Simpl. + rewrite (Val.negate_cmpl_bool Ceq), EV. destruct b; auto. + - (* Cmasklnotzero *) + destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv. + econstructor; split. econstructor. + split; auto. + erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto. + unfold incrPC. Simpl. + rewrite EV. auto. +Qed. + +Lemma transl_op_correct: + forall op args res k (rs: regset) m v c, + transl_op op args res k = OK c -> + eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v -> + exists rs', + exec_straight ge lk c rs m k rs' m + /\ Val.lessdef v rs'#(preg_of res) + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. +Proof. + (* assert (SAME: forall v1 v2, v1 = v2 -> Val.lessdef v2 v1). { intros; subst; auto. } *) +Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize. + intros until c; intros TR EV. + unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl; + try (rewrite <- transl_eval_shift; TranslOpSimpl). + - (* move *) + destruct (preg_of res), (preg_of m0); try destruct d; try destruct d0; inv TR; TranslOpSimpl. + - (* intconst *) + exploit exec_loadimm32. intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen. + - (* longconst *) + exploit exec_loadimm64. intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen. + - (* floatconst *) + destruct (Float.eq_dec n Float.zero). + + subst n. TranslOpSimpl. + + TranslOpSimplN. + - (* singleconst *) + destruct (Float32.eq_dec n Float32.zero). + + subst n. TranslOpSimpl. + + TranslOpSimplN. + - (* loadsymbol *) + exploit (exec_loadsymbol x id ofs). eauto with asmgen. intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. + - (* addrstack *) + exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)); try discriminate. simpl; eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. replace (DR XSP) with (SP) in B by auto. rewrite B. + Local Transparent Val.addl. + destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto. + auto. + - (* shift *) + rewrite <- transl_eval_shift'. TranslOpSimpl. + - (* addimm *) + exploit (exec_addimm32 x x0 n). eauto with asmgen. intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. + - (* mul *) + TranslOpBase. + Local Transparent Val.add. + destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto. + - (* andimm *) + exploit (exec_logicalimm32 (Pandimm W) (Pand W)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. + - (* orimm *) + exploit (exec_logicalimm32 (Porrimm W) (Porr W)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. + - (* xorimm *) + exploit (exec_logicalimm32 (Peorimm W) (Peor W)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. + - (* not *) + TranslOpBase. + destruct (rs x0); auto. simpl. rewrite Int.or_zero_l; auto. + - (* notshift *) + TranslOpBase. + destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto. + - (* shrx *) + assert (Val.maketotal (Val.shrx (rs x0) (Vint n)) = Val.maketotal (Val.shrx (rs x0) (Vint n))) by eauto. + destruct (Val.shrx) eqn:E. + + exploit (exec_shrx32 x x0 n); eauto with asmgen. intros (rs' & A & B & C). + econstructor; split. eexact A. split. rewrite B; auto. auto. + + exploit (exec_shrx32_none x x0 n); eauto with asmgen. + - (* zero-ext *) + TranslOpBase. + destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto. + - (* sign-ext *) + TranslOpBase. + destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto. + - (* shlzext *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite <- Int.shl_zero_ext_min; auto using a32_range. + - (* shlsext *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite <- Int.shl_sign_ext_min; auto using a32_range. + - (* zextshr *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.zero_ext_shru_min; auto using a32_range. + - (* sextshr *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.sign_ext_shr_min; auto using a32_range. + - (* shiftl *) + rewrite <- transl_eval_shiftl'. TranslOpSimpl. + - (* extend *) + exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C). + econstructor; split. eexact A. + split. rewrite B; auto. eauto with asmgen. + - (* addlshift *) + TranslOpBase. + - (* addext *) + exploit (exec_arith_extended Val.addl Paddext (Padd X)). + auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C). + econstructor; split. eexact A. split. rewrite B; auto. auto. + - (* addlimm *) + exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto. + - (* neglshift *) + TranslOpBase. + - (* sublshift *) + TranslOpBase. + - (* subext *) + exploit (exec_arith_extended Val.subl Psubext (Psub X)). + auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C). + econstructor; split. eexact A. split. rewrite B; auto. auto. + - (* mull *) + TranslOpBase. + destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto. + - (* andlshift *) + TranslOpBase. + - (* andlimm *) + exploit (exec_logicalimm64 (Pandimm X) (Pand X)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. + - (* orlshift *) + TranslOpBase. + - (* orlimm *) + exploit (exec_logicalimm64 (Porrimm X) (Porr X)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. + - (* orlshift *) + TranslOpBase. + - (* xorlimm *) + exploit (exec_logicalimm64 (Peorimm X) (Peor X)). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. auto. + - (* notl *) + TranslOpBase. + destruct (rs x0); auto. simpl. rewrite Int64.or_zero_l; auto. + - (* notlshift *) + TranslOpBase. + destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto. + - (* biclshift *) + TranslOpBase. + - (* ornlshift *) + TranslOpBase. + - (* eqvlshift *) + TranslOpBase. + - (* shrx *) + assert (Val.maketotal (Val.shrxl (rs x0) (Vint n)) = Val.maketotal (Val.shrxl (rs x0) (Vint n))) by eauto. + destruct (Val.shrxl) eqn:E. + + exploit (exec_shrx64 x x0 n); eauto with asmgen. intros (rs' & A & B & C). + econstructor; split. eexact A. split. rewrite B; auto. auto. + + exploit (exec_shrx64_none x x0 n); eauto with asmgen. + - (* zero-ext-l *) + TranslOpBase. + destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto. + - (* sign-ext-l *) + TranslOpBase. + destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto. + - (* shllzext *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_zero_ext_min; auto using a64_range. + - (* shllsext *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_sign_ext_min; auto using a64_range. + - (* zextshrl *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.zero_ext_shru'_min; auto using a64_range. + - (* sextshrl *) + TranslOpBase. + destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range. + - (* condition *) + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. + split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. + rewrite (B b) by auto. auto. + auto. + intros; Simpl. + - (* select *) + destruct (preg_of res) as [[ir|fr]|cr|] eqn:RES; monadInv TR. + + (* integer *) + generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2. + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. + split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. + rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize. + rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen. + auto. + intros; Simpl. + + (* FP *) + generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2. + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. + split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. + rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize. + rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen. + auto. + intros; Simpl. +Qed. + +(** Translation of addressing modes, loads, stores *) + +Lemma transl_addressing_correct: + forall sz addr args (insn: Asm.addressing -> basic) k (rs: regset) m c b o, + transl_addressing sz addr args insn k = OK c -> + Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some (Vptr b o) -> + exists ad rs', + exec_straight_opt ge lk c rs m (insn ad :: k) rs' m + /\ eval_addressing lk ad rs' = Vptr b o + /\ forall r, data_preg r = true -> rs' r = rs r. +Proof. + intros until o; intros TR EV. + unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV. + - (* Aindexed *) + destruct (offset_representable sz ofs); inv EQ0. + + econstructor; econstructor; split. apply exec_straight_opt_refl. + auto. + + exploit (exec_loadimm64 X16 ofs). intros (rs' & A & B & C). + econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A. + split. simpl. rewrite B, C by eauto with asmgen. auto. + eauto with asmgen. + - (* Aindexed2 *) + econstructor; econstructor; split. apply exec_straight_opt_refl. + auto. + - (* Aindexed2shift *) + destruct (Int.eq a Int.zero) eqn:E; [|destruct (Int.eq (Int.shl Int.one a) (Int.repr sz))]; inv EQ2. + + apply Int.same_if_eq in E. rewrite E. + econstructor; econstructor; split. apply exec_straight_opt_refl. + split; auto. simpl. + rewrite Val.addl_commut in H0. destruct (rs x0); try discriminate. + unfold Val.shll. rewrite Int64.shl'_zero. auto. + + econstructor; econstructor; split. apply exec_straight_opt_refl. + auto. + + econstructor; econstructor; split. + apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. + split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto. + intros; Simpl. + - (* Aindexed2ext *) + destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2. + + econstructor; econstructor; split. apply exec_straight_opt_refl. + split; auto. destruct x; auto. + + exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto. + instantiate (1 := x0). eauto with asmgen. + intros (rs' & A & B & C). + econstructor; exists rs'; split. + apply exec_straight_opt_intro. eexact A. + split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal. + unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range; + simpl; rewrite Int64.add_zero; auto. + intros. apply C; eauto with asmgen. + - (* Aglobal *) + destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR. + + econstructor; econstructor; split. + apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. + split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence. + intros; Simpl. + + exploit (exec_loadsymbol X16 id ofs). auto. intros (rs' & A & B & C). + econstructor; exists rs'; split. + apply exec_straight_opt_intro. eexact A. + split. simpl. + rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto. + simpl in EV. congruence. + auto with asmgen. + - (* Ainstrack *) + assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o). + { simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl. + rewrite Ptrofs.of_int64_to_int64 by auto. auto. } + destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR. + + econstructor; econstructor; split. apply exec_straight_opt_refl. + auto. + + exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). intros (rs' & A & B & C). + econstructor; exists rs'; split. + apply exec_straight_opt_intro. eexact A. + split. simpl. rewrite B, C by eauto with asmgen. auto. + auto with asmgen. +Qed. + +Lemma transl_load_correct: + forall chunk addr args dst k c (rs: regset) m vaddr v, + transl_load chunk addr args dst k = OK c -> + Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr -> + Mem.loadv chunk m vaddr = Some v -> + exists rs', + exec_straight ge lk c rs m k rs' m + /\ rs'#(preg_of dst) = v + /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r. +Proof. + intros. destruct vaddr; try discriminate. + assert (A: exists sz insn, + transl_addressing sz addr args insn k = OK c + /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m = + exec_load_rd_a lk chunk (fun v => v) ad (dreg_of dst) rs' m)). + { + destruct chunk; monadInv H; + try rewrite (ireg_of_eq' _ _ EQ); try rewrite (freg_of_eq' _ _ EQ); + do 2 econstructor; (split; [eassumption|auto]). + } + destruct A as (sz & insn & B & C). + exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R). + assert (X: exec_load_rd_a lk chunk (fun v => v) ad (dreg_of dst) rs' m = + Next (rs'#(preg_of dst) <- v) m). + { unfold exec_load_rd_a. rewrite Q, H1. auto. } + econstructor; split. + eapply exec_straight_opt_right. eexact P. + apply exec_straight_one. rewrite C, X; eauto. Simpl. + split. auto. intros; Simpl. +Qed. + +Lemma transl_store_correct: + forall chunk addr args src k c (rs: regset) m vaddr m', + transl_store chunk addr args src k = OK c -> + Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr -> + Mem.storev chunk m vaddr rs#(preg_of src) = Some m' -> + exists rs', + exec_straight ge lk c rs m k rs' m' + /\ forall r, data_preg r = true -> rs' r = rs r. +Proof. + intros. destruct vaddr; try discriminate. + set (chunk' := match chunk with Mint8signed => Mint8unsigned + | Mint16signed => Mint16unsigned + | _ => chunk end). + assert (A: exists sz insn, + transl_addressing sz addr args insn k = OK c + /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m = + exec_store_rs_a lk chunk' ad rs'#(preg_of src) rs' m)). + { + unfold chunk'; destruct chunk; monadInv H; + try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ); + do 2 econstructor; (split; [eassumption|auto]). + } + destruct A as (sz & insn & B & C). + exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R). + assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m'). + { rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry. + apply Mem.store_signed_unsigned_8. + apply Mem.store_signed_unsigned_16. } + assert (Y: exec_store_rs_a lk chunk' ad rs'#(preg_of src) rs' m = + Next rs' m'). + { unfold exec_store_rs_a. rewrite Q, R, X by auto with asmgen. auto. } + econstructor; split. + eapply exec_straight_opt_right. eexact P. + apply exec_straight_one. rewrite C, Y; eauto. + intros; Simpl. rewrite R; auto. +Qed. + +(** Memory accesses *) + +Lemma indexed_memory_access_correct: forall insn sz (base: iregsp) ofs k (rs: regset) m b i, + preg_of_iregsp base <> X16 -> + Val.offset_ptr rs#base ofs = Vptr b i -> + exists ad rs', + exec_straight_opt ge lk (indexed_memory_access_bc insn sz base ofs k) rs m (insn ad :: k) rs' m + /\ eval_addressing lk ad rs' = Vptr b i + /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r. +Proof. + unfold indexed_memory_access_bc; intros. + assert (Val.addl rs#base (Vlong (Ptrofs.to_int64 ofs)) = Vptr b i). + { destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. } + destruct offset_representable. + - econstructor; econstructor; split. apply exec_straight_opt_refl. auto. + - exploit (exec_loadimm64 X16); eauto. intros (rs' & A & B & C). + econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A. + split. simpl. rewrite B, C; eauto; try discriminate. + unfold preg_of_iregsp in H. destruct base; auto. auto. +Qed. + +Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset), + Mem.loadv Mint64 m (Val.offset_ptr rs#base ofs) = Some v -> + preg_of_iregsp base <> IR X16 -> + exists rs', + exec_straight ge lk (loadptr_bc base ofs dst k) rs m k rs' m + /\ rs'#dst = v + /\ forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r. +Proof. + intros. + destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. + exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. + apply exec_straight_one. simpl. unfold exec_load_rd_a. rewrite B, H. eauto. + split. Simpl. intros; Simpl. +Qed. + +Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset), + Mem.storev Mint64 m (Val.offset_ptr rs#base ofs) rs#src = Some m' -> + preg_of_iregsp base <> IR X16 -> + (DR (IR (RR1 src))) <> (DR (IR (RR1 X16))) -> + exists rs', + exec_straight ge lk (storeptr_bc src base ofs k) rs m k rs' m' + /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r. +Proof. + intros. + destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. + exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. + apply exec_straight_one. simpl. unfold exec_store_rs_a. rewrite B, C, H. eauto. + discriminate. auto. + intros; Simpl. rewrite C; auto. +Qed. + +Lemma loadind_correct: + forall (base: iregsp) ofs ty dst k c (rs: regset) m v, + loadind base ofs ty dst k = OK c -> + Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v -> + preg_of_iregsp base <> IR X16 -> + exists rs', + exec_straight ge lk c rs m k rs' m + /\ rs'#(preg_of dst) = v + /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r. +Proof. + intros. + destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. + assert (X: exists sz (insn: addressing -> ld_instruction), + c = indexed_memory_access_bc insn sz base ofs k + /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m = + exec_load_rd_a lk (chunk_of_type ty) (fun v => v) ad (dreg_of dst) rs' m)). + { + unfold loadind in H; destruct ty; destruct (dst); inv H; + do 2 econstructor; split; eauto. + } + destruct X as (sz & insn & EQ & SEM). subst c. + exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. + apply exec_straight_one. rewrite SEM. unfold exec_load. + unfold exec_load_rd_a. rewrite B, H0. eauto. Simpl. + split. auto. intros; Simpl. +Qed. + +Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m', + storeind src base ofs ty k = OK c -> + Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' -> + preg_of_iregsp base <> IR X16 -> + exists rs', + exec_straight ge lk c rs m k rs' m' + /\ forall r, data_preg r = true -> rs' r = rs r. +Proof. + intros. + destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. + assert (X: exists sz (insn: addressing -> st_instruction), + c = indexed_memory_access_bc insn sz base ofs k + /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m = + exec_store_rs_a lk (chunk_of_type ty) ad rs'#(preg_of src) rs' m)). + { + unfold storeind in H; destruct ty; destruct (preg_of src) as [[ir|fr]|cr|]; inv H; do 2 econstructor; split; eauto. + } + destruct X as (sz & insn & EQ & SEM). subst c. + exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C). + econstructor; split. + eapply exec_straight_opt_right. eexact A. + apply exec_straight_one. rewrite SEM. + unfold exec_store. unfold exec_store_rs_a. + rewrite B, C, H0 by eauto with asmgen. eauto. + intros; Simpl. unfold data_preg in H2. destruct r as [[[ir|]|fr]|cr|]. + all: rewrite C; auto; try discriminate; + destruct ir; try discriminate. +Qed. + +Lemma make_epilogue_correct: + forall ge0 f m stk soff cs m' ms rs tm, + Mach.load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) -> + Mach.load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) -> + Mem.free m stk 0 f.(fn_stacksize) = Some m' -> + agree ms (Vptr stk soff) rs -> + Mem.extends m tm -> + match_stack ge0 cs -> + exists rs', exists tm', + exec_straight ge lk (make_epilogue f) rs tm nil rs' tm' + /\ agree ms (parent_sp cs) rs' + /\ Mem.extends m' tm' + /\ rs'#RA = parent_ra cs + /\ rs'#SP = parent_sp cs + /\ (forall r, r <> PC -> r <> SP -> r <> X30 -> r <> X16 -> rs'#r = rs#r). +Proof. + assert (Archi.ptr64 = true) as SF; auto. + intros until tm; intros LP LRA FREE AG MEXT MCS. + exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP'). + exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA'). + exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'. + exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'. + exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT'). + unfold make_epilogue. + rewrite chunk_of_Tptr in *. unfold Mptr in *. rewrite SF in *. + + exploit (loadptr_correct XSP (fn_retaddr_ofs f)). + instantiate (2 := rs). simpl. + replace (rs XSP) with (rs SP) by auto. + rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; discriminate. + + intros (rs1 & A1 & B1 & C1). + econstructor; econstructor; split. + eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl. + simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. + rewrite FREE'. eauto. + split. apply agree_set_other; auto. + apply agree_change_sp with (Vptr stk soff). + apply agree_exten with rs; auto. intros; apply C1; auto with asmgen. + eapply parent_sp_def; eauto. + split. auto. + split. Simpl. + split. Simpl. + intros. Simpl. +Qed. + +End CONSTRUCTORS. diff --git a/aarch64/Asmblockprops.v b/aarch64/Asmblockprops.v new file mode 100644 index 00000000..38fbd6d3 --- /dev/null +++ b/aarch64/Asmblockprops.v @@ -0,0 +1,119 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Common definition and proofs on Asmblock required by various modules *) + +Require Import Coqlib. +Require Import Integers. +Require Import Memory. +Require Import Globalenvs. +Require Import Values. +Require Import Asmblock. +Require Import Axioms. + +Definition bblock_simu (lk: aarch64_linker) (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) := + forall rs m rs' m' t, + exec_bblock lk ge f bb rs m t rs' m' -> exec_bblock lk ge f bb' rs m t rs' m'. + +Definition bblock_simu_aux (lk: aarch64_linker) (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) := + forall rs m, + bbstep lk ge f bb rs m <> Stuck -> + bbstep lk ge f bb rs m = bbstep lk ge f bb' rs m. + +Hint Extern 2 (_ <> _) => congruence: asmgen. + +Lemma preg_of_data: + forall r, data_preg (preg_of r) = true. +Proof. + intros. destruct r; reflexivity. +Qed. + +Lemma dreg_of_data: + forall r, data_preg (dreg_of r) = true. +Proof. + intros. destruct r; reflexivity. +Qed. +Hint Resolve preg_of_data dreg_of_data: asmgen. + +Lemma data_diff: + forall r r', + data_preg r = true -> data_preg r' = false -> r <> r'. +Proof. + congruence. +Qed. +Hint Resolve data_diff: asmgen. + +Lemma preg_of_not_PC: + forall r, preg_of r <> PC. +Proof. + intros. apply data_diff; auto with asmgen. +Qed. + +Lemma preg_of_not_SP: + forall r, preg_of r <> SP. +Proof. + intros. unfold preg_of; destruct r; simpl; try discriminate. +Qed. + +Ltac desif := + repeat match goal with + | [ |- context[ if ?f then _ else _ ] ] => destruct f + end. + +Ltac decomp := + repeat match goal with + | [ |- context[ match (?rs ?r) with | _ => _ end ] ] => destruct (rs r) + end. + +Ltac Simplif := + ((desif) + || (try unfold compare_long) + || (try unfold compare_int) + || (try unfold compare_float) + || (try unfold compare_single) + || decomp + || (rewrite Pregmap.gss) + || (rewrite Pregmap.gso by eauto with asmgen) + ); auto with asmgen. + +Ltac Simpl := repeat Simplif. + +Section EPC. + +Variable lk: aarch64_linker. + +(* For Asmblockgenproof0 *) + +Theorem exec_basic_instr_pc: + forall ge b rs1 m1 rs2 m2, + exec_basic lk ge b rs1 m1 = Next rs2 m2 -> + rs2 PC = rs1 PC. +Proof. + intros. destruct b; try destruct i; try destruct i; try destruct ld; try destruct ld; + try destruct st; try destruct st; try destruct a. + all: try (inv H; simpl in *; auto; Simpl). + all: try (try unfold exec_load_rd_a in H1; try destruct (Mem.loadv _ _ _); inv H1; Simpl). + all: try (try unfold exec_load_double in H0; destruct (Mem.loadv _ _ _); simpl in *; + try destruct (Mem.loadv _ _ _); simpl in *; inv H0; Simpl). + all: try (try unfold exec_store_rs_a in H0; try destruct (Mem.storev _ _ _); inv H0; auto; Simpl). + all: try (try unfold exec_store_double in H1; destruct (Mem.storev _ _ _); simpl in *; + try destruct (Mem.storev _ _ _); simpl in *; inv H1; auto; Simpl). + - (* alloc *) + destruct (Mem.alloc _ _ _); destruct (Mem.store _ _ _); inv H1; auto; Simpl. + - (* free *) + destruct (rs1 SP); try discriminate; destruct (Mem.free _ _ _ _); inv H1; Simpl. +Qed. + +End EPC. diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml index 573e8b92..eb9ec600 100644 --- a/aarch64/Asmexpand.ml +++ b/aarch64/Asmexpand.ml @@ -34,13 +34,13 @@ let _m1 = Z.of_sint (-1) (* Emit instruction sequences that set or offset a register by a constant. *) let expand_loadimm32 (dst: ireg) n = - List.iter emit (Asmgen.loadimm32 dst n []) + List.iter emit (Asmgen.Asmgen_expand.loadimm32 dst n []) let expand_addimm64 (dst: iregsp) (src: iregsp) n = - List.iter emit (Asmgen.addimm64 dst src n []) + List.iter emit (Asmgen.Asmgen_expand.addimm64 dst src n []) let expand_storeptr (src: ireg) (base: iregsp) ofs = - List.iter emit (Asmgen.storeptr src base ofs []) + List.iter emit (Asmgen.Asmgen_expand.storeptr src base ofs []) (* Handling of varargs *) @@ -84,8 +84,8 @@ let save_parameter_registers ir fr = while !i < 8 do let pos = 8*16 + !i*8 in if !i land 1 = 0 then begin - emit (Pstp(int_param_regs.(!i), int_param_regs.(!i + 1), - ADimm(XSP, Z.of_uint pos))); + emit (Pstpx(int_param_regs.(!i), int_param_regs.(!i + 1), + Mint64, Mint64, ADimm(XSP, Z.of_uint pos))); i := !i + 2 end else begin emit (Pstrx(int_param_regs.(!i), ADimm(XSP, Z.of_uint pos))); @@ -160,9 +160,9 @@ let expand_builtin_va_start r = let expand_annot_val kind txt targ args res = emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none)); match args, res with - | [BA(IR src)], BR(IR dst) -> - if dst <> src then emit (Pmov (RR1 dst, RR1 src)) - | [BA(FR src)], BR(FR dst) -> + | [BA(DR(IR src))], BR(DR(IR dst)) -> + if dst <> src then emit (Pmov (dst, src)) + | [BA(DR(FR src))], BR(DR(FR dst)) -> if dst <> src then emit (Pfmov (dst, src)) | _, _ -> raise (Error "ill-formed __builtin_annot_val") @@ -180,8 +180,8 @@ let offset_in_range ofs = let memcpy_small_arg sz arg tmp = match arg with - | BA (IR r) -> - (RR1 r, _0) + | BA (DR(IR r)) -> + (r, _0) | BA_addrstack ofs -> if offset_in_range ofs && offset_in_range (Ptrofs.add ofs (Ptrofs.repr (Z.of_uint sz))) @@ -192,14 +192,14 @@ let memcpy_small_arg sz arg tmp = assert false let expand_builtin_memcpy_small sz al src dst = - let tsrc = if dst <> BA (IR X17) then X17 else X29 in - let tdst = if src <> BA (IR X29) then X29 else X17 in + let tsrc = if dst <> BA (DR(IR(RR1 X17))) then X17 else X29 in + let tdst = if src <> BA (DR(IR(RR1 X29))) then X29 else X17 in let (rsrc, osrc) = memcpy_small_arg sz src tsrc in let (rdst, odst) = memcpy_small_arg sz dst tdst in let rec copy osrc odst sz = if sz >= 16 then begin - emit (Pldp(X16, X30, ADimm(rsrc, osrc))); - emit (Pstp(X16, X30, ADimm(rdst, odst))); + emit (Pldpx(X16, X30, Mint64, Mint64, ADimm(rsrc, osrc))); + emit (Pstpx(X16, X30, Mint64, Mint64, ADimm(rdst, odst))); copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16) end else if sz >= 8 then begin @@ -226,7 +226,7 @@ let expand_builtin_memcpy_small sz al src dst = let memcpy_big_arg arg tmp = match arg with - | BA (IR r) -> emit (Pmov(RR1 tmp, RR1 r)) + | BA (DR(IR r)) -> emit (Pmov(RR1 tmp, r)) | BA_addrstack ofs -> expand_addimm64 (RR1 tmp) XSP ofs | _ -> assert false @@ -237,8 +237,8 @@ let expand_builtin_memcpy_big sz al src dst = let lbl = new_label () in expand_loadimm32 X15 (Z.of_uint (sz / 16)); emit (Plabel lbl); - emit (Pldp(X16, X17, ADpostincr(RR1 X30, _16))); - emit (Pstp(X16, X17, ADpostincr(RR1 X29, _16))); + emit (Pldpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X30, _16))); + emit (Pstpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X29, _16))); emit (Psubimm(W, RR1 X15, RR1 X15, _1)); emit (Pcbnz(W, X15, lbl)); if sz mod 16 >= 8 then begin @@ -270,29 +270,29 @@ let expand_builtin_memcpy sz al args = let expand_builtin_vload_common chunk base ofs res = let addr = ADimm(base, ofs) in match chunk, res with - | Mint8unsigned, BR(IR res) -> + | Mint8unsigned, BR(DR(IR(RR1 res))) -> emit (Pldrb(W, res, addr)) - | Mint8signed, BR(IR res) -> + | Mint8signed, BR(DR(IR(RR1 res))) -> emit (Pldrsb(W, res, addr)) - | Mint16unsigned, BR(IR res) -> + | Mint16unsigned, BR(DR(IR(RR1 res))) -> emit (Pldrh(W, res, addr)) - | Mint16signed, BR(IR res) -> + | Mint16signed, BR(DR(IR(RR1 res))) -> emit (Pldrsh(W, res, addr)) - | Mint32, BR(IR res) -> + | Mint32, BR(DR(IR(RR1 res))) -> emit (Pldrw(res, addr)) - | Mint64, BR(IR res) -> + | Mint64, BR(DR(IR(RR1 res))) -> emit (Pldrx(res, addr)) - | Mfloat32, BR(FR res) -> + | Mfloat32, BR(DR(FR res)) -> emit (Pldrs(res, addr)) - | Mfloat64, BR(FR res) -> + | Mfloat64, BR(DR(FR res)) -> emit (Pldrd(res, addr)) | _ -> assert false let expand_builtin_vload chunk args res = match args with - | [BA(IR addr)] -> - expand_builtin_vload_common chunk (RR1 addr) _0 res + | [BA(DR(IR addr))] -> + expand_builtin_vload_common chunk addr _0 res | [BA_addrstack ofs] -> if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then expand_builtin_vload_common chunk XSP ofs res @@ -300,11 +300,11 @@ let expand_builtin_vload chunk args res = expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *) expand_builtin_vload_common chunk (RR1 X16) _0 res end - | [BA_addptr(BA(IR addr), BA_long ofs)] -> + | [BA_addptr(BA(DR(IR addr)), BA_long ofs)] -> if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then - expand_builtin_vload_common chunk (RR1 addr) ofs res + expand_builtin_vload_common chunk addr ofs res else begin - expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *) + expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *) expand_builtin_vload_common chunk (RR1 X16) _0 res end | _ -> @@ -313,25 +313,25 @@ let expand_builtin_vload chunk args res = let expand_builtin_vstore_common chunk base ofs src = let addr = ADimm(base, ofs) in match chunk, src with - | (Mint8signed | Mint8unsigned), BA(IR src) -> + | (Mint8signed | Mint8unsigned), BA(DR(IR(RR1 src))) -> emit (Pstrb(src, addr)) - | (Mint16signed | Mint16unsigned), BA(IR src) -> + | (Mint16signed | Mint16unsigned), BA(DR(IR(RR1 src))) -> emit (Pstrh(src, addr)) - | Mint32, BA(IR src) -> + | Mint32, BA(DR(IR(RR1 src))) -> emit (Pstrw(src, addr)) - | Mint64, BA(IR src) -> + | Mint64, BA(DR(IR(RR1 src))) -> emit (Pstrx(src, addr)) - | Mfloat32, BA(FR src) -> + | Mfloat32, BA(DR(FR src)) -> emit (Pstrs(src, addr)) - | Mfloat64, BA(FR src) -> + | Mfloat64, BA(DR(FR src)) -> emit (Pstrd(src, addr)) | _ -> assert false let expand_builtin_vstore chunk args = match args with - | [BA(IR addr); src] -> - expand_builtin_vstore_common chunk (RR1 addr) _0 src + | [BA(DR(IR addr)); src] -> + expand_builtin_vstore_common chunk addr _0 src | [BA_addrstack ofs; src] -> if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then expand_builtin_vstore_common chunk XSP ofs src @@ -339,11 +339,11 @@ let expand_builtin_vstore chunk args = expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *) expand_builtin_vstore_common chunk (RR1 X16) _0 src end - | [BA_addptr(BA(IR addr), BA_long ofs); src] -> + | [BA_addptr(BA(DR(IR addr)), BA_long ofs); src] -> if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then - expand_builtin_vstore_common chunk (RR1 addr) ofs src + expand_builtin_vstore_common chunk addr ofs src else begin - expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *) + expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *) expand_builtin_vstore_common chunk (RR1 X16) _0 src end | _ -> @@ -363,45 +363,47 @@ let expand_builtin_inline name args res = | "__builtin_unreachable", [], _ -> () (* Byte swap *) - | ("__builtin_bswap" | "__builtin_bswap32"), [BA(IR a1)], BR(IR res) -> + | ("__builtin_bswap" | "__builtin_bswap32"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Prev(W, res, a1)) - | "__builtin_bswap64", [BA(IR a1)], BR(IR res) -> + | "__builtin_bswap64", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Prev(X, res, a1)) - | "__builtin_bswap16", [BA(IR a1)], BR(IR res) -> + | "__builtin_bswap16", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Prev16(W, res, a1)); emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF)) (* Count leading zeros, leading sign bits, trailing zeros *) - | "__builtin_clz", [BA(IR a1)], BR(IR res) -> + | "__builtin_clz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Pclz(W, res, a1)) - | ("__builtin_clzl" | "__builtin_clzll"), [BA(IR a1)], BR(IR res) -> + | ("__builtin_clzl" | "__builtin_clzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Pclz(X, res, a1)) - | "__builtin_cls", [BA(IR a1)], BR(IR res) -> + | "__builtin_cls", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Pcls(W, res, a1)) - | ("__builtin_clsl" | "__builtin_clsll"), [BA(IR a1)], BR(IR res) -> + | ("__builtin_clsl" | "__builtin_clsll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Pcls(X, res, a1)) - | "__builtin_ctz", [BA(IR a1)], BR(IR res) -> + | "__builtin_ctz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Prbit(W, res, a1)); emit (Pclz(W, res, res)) - | ("__builtin_ctzl" | "__builtin_ctzll"), [BA(IR a1)], BR(IR res) -> + | ("__builtin_ctzl" | "__builtin_ctzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Prbit(X, res, a1)); emit (Pclz(X, res, res)) (* Float arithmetic *) - | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) -> + | "__builtin_fabs", [BA(DR(FR a1))], BR(DR(FR res)) -> + emit (Pfabs(D, res, a1)) + | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(DR(FR a1))], BR(DR(FR res)) -> emit (Pfsqrt(D, res, a1)) - | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> + | "__builtin_fmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) -> emit (Pfmadd(D, res, a1, a2, a3)) - | "__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> + | "__builtin_fmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) -> emit (Pfmsub(D, res, a1, a2, a3)) - | "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> + | "__builtin_fnmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) -> emit (Pfnmadd(D, res, a1, a2, a3)) - | "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> + | "__builtin_fnmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) -> emit (Pfnmsub(D, res, a1, a2, a3)) - | "__builtin_fmax", [BA (FR a1); BA (FR a2)], BR (FR res) -> + | "__builtin_fmax", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) -> emit (Pfmax (D, res, a1, a2)) - | "__builtin_fmin", [BA (FR a1); BA (FR a2)], BR (FR res) -> + | "__builtin_fmin", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) -> emit (Pfmin (D, res, a1, a2)) (* Vararg *) - | "__builtin_va_start", [BA(IR a)], _ -> + | "__builtin_va_start", [BA(DR(IR(RR1 a)))], _ -> expand_builtin_va_start a (* Catch-all *) | _ -> @@ -441,7 +443,7 @@ let expand_instruction instr = expand_annot_val kind txt targ args res | EF_memcpy(sz, al) -> expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args - | EF_annot _ | EF_debug _ | EF_inline_asm _ -> + | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ -> emit instr | _ -> assert false @@ -468,9 +470,9 @@ let float_reg_to_dwarf = function | D30 -> 94 | D31 -> 95 let preg_to_dwarf = function - | IR r -> int_reg_to_dwarf r - | FR r -> float_reg_to_dwarf r - | SP -> 31 + | DR(IR(RR1 r)) -> int_reg_to_dwarf r + | DR(FR r) -> float_reg_to_dwarf r + | DR(IR(XSP)) -> 31 | _ -> assert false let expand_function id fn = diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v index baaab6c4..6bb791c4 100644 --- a/aarch64/Asmgen.v +++ b/aarch64/Asmgen.v @@ -1,168 +1,46 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, Collège de France and INRIA Paris *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Translation from Mach to AArch64. *) +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Justus Fasse UGA, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) Require Import Recdef Coqlib Zwf Zbits. Require Import Errors AST Integers Floats Op. -Require Import Locations Mach Asm. -Require SelectOp. +Require Import Locations Compopts. +Require Import Mach Asm Asmblock Asmblockgen Machblockgen PostpassScheduling. -Local Open Scope string_scope. -Local Open Scope list_scope. Local Open Scope error_monad_scope. -(** Alignment check for symbols *) - -Parameter symbol_is_aligned : ident -> Z -> bool. -(** [symbol_is_aligned id sz] checks whether the symbol [id] is [sz] aligned *) - -(** Extracting integer or float registers. *) - -Definition ireg_of (r: mreg) : res ireg := - match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgen.ireg_of") end. - -Definition freg_of (r: mreg) : res freg := - match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgen.freg_of") end. - -(** Recognition of immediate arguments for logical integer operations.*) - -(** Valid immediate arguments are repetitions of a bit pattern [B] - of length [e] = 2, 4, 8, 16, 32 or 64. - The bit pattern [B] must be of the form [0*1*0*] or [1*0*1*] - but must not be all zeros or all ones. *) - -(** The following automaton recognizes [0*1*0*|1*0*1*]. -<< - 0 1 0 - / \ / \ / \ - \ / \ / \ / - -0--> [B] --1--> [D] --0--> [F] - / - [A] - \ - -1--> [C] --0--> [E] --1--> [G] - / \ / \ / \ - \ / \ / \ / - 1 0 1 ->> -*) - -Module Automaton. - -Inductive state : Type := SA | SB | SC | SD | SE | SF | SG | Sbad. - -Definition start := SA. - -Definition next (s: state) (b: bool) := - match s, b with - | SA,false => SB | SA,true => SC - | SB,false => SB | SB,true => SD - | SC,false => SE | SC,true => SC - | SD,false => SF | SD,true => SD - | SE,false => SE | SE,true => SG - | SF,false => SF | SF,true => Sbad - | SG,false => Sbad | SG,true => SG - | Sbad,_ => Sbad - end. - -Definition accepting (s: state) := - match s with - | SA | SB | SC | SD | SE | SF | SG => true - | Sbad => false - end. - -Fixpoint run (len: nat) (s: state) (x: Z) : bool := - match len with - | Datatypes.O => accepting s - | Datatypes.S len => run len (next s (Z.odd x)) (Z.div2 x) - end. - -End Automaton. - -(** The following function determines the candidate length [e], - ensuring that [x] is a repetition [BB...B] - of a bit pattern [B] of length [e]. *) - -Definition logical_imm_length (x: Z) (sixtyfour: bool) : nat := - (** [test n] checks that the low [2n] bits of [x] are of the - form [BB], that is, two occurrences of the same [n] bits *) - let test (n: Z) : bool := - Z.eqb (Zzero_ext n x) (Zzero_ext n (Z.shiftr x n)) in - (** If [test n] fails, we know that the candidate length [e] is - at least [2n]. Hence we test with decreasing values of [n]: - 32, 16, 8, 4, 2. *) - if sixtyfour && negb (test 32) then 64%nat - else if negb (test 16) then 32%nat - else if negb (test 8) then 16%nat - else if negb (test 4) then 8%nat - else if negb (test 2) then 4%nat - else 2%nat. +(** Functions called by the Asmexpand ocaml file, inspired and adapted from Asmblockgen.v *) -(** A valid logical immediate is -- neither [0] nor [-1]; -- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e] -- the low [e] bits of the number, that is, [B], match [0*1*0*] or [1*0*1*]. -*) +Module Asmgen_expand. -Definition is_logical_imm32 (x: int) : bool := - negb (Int.eq x Int.zero) && negb (Int.eq x Int.mone) && - Automaton.run (logical_imm_length (Int.unsigned x) false) - Automaton.start (Int.unsigned x). - -Definition is_logical_imm64 (x: int64) : bool := - negb (Int64.eq x Int64.zero) && negb (Int64.eq x Int64.mone) && - Automaton.run (logical_imm_length (Int64.unsigned x) true) - Automaton.start (Int64.unsigned x). - -(** Arithmetic immediates are 12-bit unsigned numbers, possibly shifted left 12 bits *) - -Definition is_arith_imm32 (x: int) : bool := - Int.eq x (Int.zero_ext 12 x) - || Int.eq x (Int.shl (Int.zero_ext 12 (Int.shru x (Int.repr 12))) (Int.repr 12)). - -Definition is_arith_imm64 (x: int64) : bool := - Int64.eq x (Int64.zero_ext 12 x) - || Int64.eq x (Int64.shl (Int64.zero_ext 12 (Int64.shru x (Int64.repr 12))) (Int64.repr 12)). - -(** Decompose integer literals into 16-bit fragments *) - -Fixpoint decompose_int (N: nat) (n p: Z) {struct N} : list (Z * Z) := - match N with - | Datatypes.O => nil - | Datatypes.S N => - let frag := Zzero_ext 16 (Z.shiftr n p) in - if Z.eqb frag 0 then - decompose_int N n (p + 16) - else - (frag, p) :: decompose_int N (Z.ldiff n (Z.shiftl 65535 p)) (p + 16) - end. - -Definition negate_decomposition (l: list (Z * Z)) := - List.map (fun np => (Z.lxor (fst np) 65535, snd np)) l. +(* Load immediate *) Definition loadimm_k (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code := - List.fold_right (fun np k => Pmovk sz rd (fst np) (snd np) :: k) k l. + List.fold_right (fun np k => Asm.Pmovk sz rd (fst np) (snd np) :: k) k l. Definition loadimm_z (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code := match l with - | nil => Pmovz sz rd 0 0 :: k - | (n1, p1) :: l => Pmovz sz rd n1 p1 :: loadimm_k sz rd l k + | nil => Asm.Pmovz sz rd 0 0 :: k + | (n1, p1) :: l => Asm.Pmovz sz rd n1 p1 :: loadimm_k sz rd l k end. Definition loadimm_n (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code := match l with - | nil => Pmovn sz rd 0 0 :: k - | (n1, p1) :: l => Pmovn sz rd n1 p1 :: loadimm_k sz rd (negate_decomposition l) k + | nil => Asm.Pmovn sz rd 0 0 :: k + | (n1, p1) :: l => Asm.Pmovn sz rd n1 p1 :: loadimm_k sz rd (negate_decomposition l) k end. Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: code) : code := @@ -175,15 +53,15 @@ Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: code) : code := Definition loadimm32 (rd: ireg) (n: int) (k: code) : code := if is_logical_imm32 n - then Porrimm W rd XZR (Int.unsigned n) :: k + then Asm.Porrimm W rd XZR (Int.unsigned n) :: k else loadimm W rd (Int.unsigned n) k. Definition loadimm64 (rd: ireg) (n: int64) (k: code) : code := if is_logical_imm64 n - then Porrimm X rd XZR (Int64.unsigned n) :: k + then Asm.Porrimm X rd XZR (Int64.unsigned n) :: k else loadimm X rd (Int64.unsigned n) k. -(** Add immediate *) +(* Add immediate *) Definition addimm_aux (insn: iregsp -> iregsp -> Z -> instruction) (rd r1: iregsp) (n: Z) (k: code) := @@ -196,962 +74,393 @@ Definition addimm_aux (insn: iregsp -> iregsp -> Z -> instruction) else insn rd r1 nhi :: insn rd rd nlo :: k. -Definition addimm32 (rd r1: ireg) (n: int) (k: code) : code := - let m := Int.neg n in - if Int.eq n (Int.zero_ext 24 n) then - addimm_aux (Paddimm W) rd r1 (Int.unsigned n) k - else if Int.eq m (Int.zero_ext 24 m) then - addimm_aux (Psubimm W) rd r1 (Int.unsigned m) k - else if Int.lt n Int.zero then - loadimm32 X16 m (Psub W rd r1 X16 SOnone :: k) - else - loadimm32 X16 n (Padd W rd r1 X16 SOnone :: k). - Definition addimm64 (rd r1: iregsp) (n: int64) (k: code) : code := let m := Int64.neg n in if Int64.eq n (Int64.zero_ext 24 n) then - addimm_aux (Paddimm X) rd r1 (Int64.unsigned n) k + addimm_aux (Asm.Paddimm X) rd r1 (Int64.unsigned n) k else if Int64.eq m (Int64.zero_ext 24 m) then - addimm_aux (Psubimm X) rd r1 (Int64.unsigned m) k + addimm_aux (Asm.Psubimm X) rd r1 (Int64.unsigned m) k else if Int64.lt n Int64.zero then - loadimm64 X16 m (Psubext rd r1 X16 (EOuxtx Int.zero) :: k) - else - loadimm64 X16 n (Paddext rd r1 X16 (EOuxtx Int.zero) :: k). - -(** Logical immediate *) - -Definition logicalimm32 - (insn1: ireg -> ireg0 -> Z -> instruction) - (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction) - (rd r1: ireg) (n: int) (k: code) : code := - if is_logical_imm32 n - then insn1 rd r1 (Int.unsigned n) :: k - else loadimm32 X16 n (insn2 rd r1 X16 SOnone :: k). - -Definition logicalimm64 - (insn1: ireg -> ireg0 -> Z -> instruction) - (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction) - (rd r1: ireg) (n: int64) (k: code) : code := - if is_logical_imm64 n - then insn1 rd r1 (Int64.unsigned n) :: k - else loadimm64 X16 n (insn2 rd r1 X16 SOnone :: k). - -(** Sign- or zero-extended arithmetic *) - -Definition transl_extension (ex: extension) (a: int) : extend_op := - match ex with Xsgn32 => EOsxtw a | Xuns32 => EOuxtw a end. - -Definition move_extended_base - (rd: ireg) (r1: ireg) (ex: extension) (k: code) : code := - match ex with - | Xsgn32 => Pcvtsw2x rd r1 :: k - | Xuns32 => Pcvtuw2x rd r1 :: k - end. - -Definition move_extended - (rd: ireg) (r1: ireg) (ex: extension) (a: int) (k: code) : code := - if Int.eq a Int.zero then - move_extended_base rd r1 ex k - else - move_extended_base rd r1 ex (Padd X rd XZR rd (SOlsl a) :: k). - -Definition arith_extended - (insnX: iregsp -> iregsp -> ireg -> extend_op -> instruction) - (insnS: ireg -> ireg0 -> ireg -> shift_op -> instruction) - (rd r1 r2: ireg) (ex: extension) (a: int) (k: code) : code := - if Int.ltu a (Int.repr 5) then - insnX rd r1 r2 (transl_extension ex a) :: k - else - move_extended_base X16 r2 ex (insnS rd r1 X16 (SOlsl a) :: k). - -(** Extended right shift *) - -Definition shrx32 (rd r1: ireg) (n: int) (k: code) : code := - if Int.eq n Int.zero then - Pmov rd r1 :: k - else - Porr W X16 XZR r1 (SOasr (Int.repr 31)) :: - Padd W X16 r1 X16 (SOlsr (Int.sub Int.iwordsize n)) :: - Porr W rd XZR X16 (SOasr n) :: k. - -Definition shrx64 (rd r1: ireg) (n: int) (k: code) : code := - if Int.eq n Int.zero then - Pmov rd r1 :: k - else - Porr X X16 XZR r1 (SOasr (Int.repr 63)) :: - Padd X X16 r1 X16 (SOlsr (Int.sub Int64.iwordsize' n)) :: - Porr X rd XZR X16 (SOasr n) :: k. - -(** Load the address [id + ofs] in [rd] *) - -Definition loadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (k: code) : code := - if SelectOp.symbol_is_relocatable id then - if Ptrofs.eq ofs Ptrofs.zero then - Ploadsymbol rd id :: k - else - Ploadsymbol rd id :: addimm64 rd rd (Ptrofs.to_int64 ofs) k + loadimm64 X16 m (Asm.Psubext rd r1 X16 (EOuxtx Int.zero) :: k) else - Padrp rd id ofs :: Paddadr rd rd id ofs :: k. + loadimm64 X16 n (Asm.Paddext rd r1 X16 (EOuxtx Int.zero) :: k). -(** Translate a shifted operand *) +(** Register-indexed stores *) -Definition transl_shift (s: Op.shift) (a: int): Asm.shift_op := - match s with - | Slsl => SOlsl a - | Slsr => SOlsr a - | Sasr => SOasr a - | Sror => SOror a - end. - -(** Translation of a condition. Prepends to [k] the instructions - that evaluate the condition and leave its boolean result in one of - the bits of the condition register. The bit in question is - determined by the [crbit_for_cond] function. *) +Definition indexed_memory_access (insn: Asm.addressing -> instruction) + (sz: Z) (base: iregsp) (ofs: ptrofs) (k: code) := + let ofs := Ptrofs.to_int64 ofs in + if offset_representable sz ofs + then insn (ADimm base ofs) :: k + else loadimm64 X16 ofs (insn (ADreg base X16) :: k). -Definition transl_cond - (cond: condition) (args: list mreg) (k: code) := - match cond, args with - | (Ccomp c | Ccompu c), a1 :: a2 :: nil => - do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pcmp W r1 r2 SOnone :: k) - | (Ccompshift c s a | Ccompushift c s a), a1 :: a2 :: nil => - do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pcmp W r1 r2 (transl_shift s a) :: k) - | (Ccompimm c n | Ccompuimm c n), a1 :: nil => - do r1 <- ireg_of a1; - OK (if is_arith_imm32 n then - Pcmpimm W r1 (Int.unsigned n) :: k - else if is_arith_imm32 (Int.neg n) then - Pcmnimm W r1 (Int.unsigned (Int.neg n)) :: k - else - loadimm32 X16 n (Pcmp W r1 X16 SOnone :: k)) - | (Cmaskzero n | Cmasknotzero n), a1 :: nil => - do r1 <- ireg_of a1; - OK (if is_logical_imm32 n then - Ptstimm W r1 (Int.unsigned n) :: k - else - loadimm32 X16 n (Ptst W r1 X16 SOnone :: k)) - | (Ccompl c | Ccomplu c), a1 :: a2 :: nil => - do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pcmp X r1 r2 SOnone :: k) - | (Ccomplshift c s a | Ccomplushift c s a), a1 :: a2 :: nil => - do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pcmp X r1 r2 (transl_shift s a) :: k) - | (Ccomplimm c n | Ccompluimm c n), a1 :: nil => - do r1 <- ireg_of a1; - OK (if is_arith_imm64 n then - Pcmpimm X r1 (Int64.unsigned n) :: k - else if is_arith_imm64 (Int64.neg n) then - Pcmnimm X r1 (Int64.unsigned (Int64.neg n)) :: k - else - loadimm64 X16 n (Pcmp X r1 X16 SOnone :: k)) - | (Cmasklzero n | Cmasklnotzero n), a1 :: nil => - do r1 <- ireg_of a1; - OK (if is_logical_imm64 n then - Ptstimm X r1 (Int64.unsigned n) :: k - else - loadimm64 X16 n (Ptst X r1 X16 SOnone :: k)) - | Ccompf cmp, a1 :: a2 :: nil => - do r1 <- freg_of a1; do r2 <- freg_of a2; - OK (Pfcmp D r1 r2 :: k) - | Cnotcompf cmp, a1 :: a2 :: nil => - do r1 <- freg_of a1; do r2 <- freg_of a2; - OK (Pfcmp D r1 r2 :: k) - | Ccompfzero cmp, a1 :: nil => - do r1 <- freg_of a1; - OK (Pfcmp0 D r1 :: k) - | Cnotcompfzero cmp, a1 :: nil => - do r1 <- freg_of a1; - OK (Pfcmp0 D r1 :: k) - | Ccompfs cmp, a1 :: a2 :: nil => - do r1 <- freg_of a1; do r2 <- freg_of a2; - OK (Pfcmp S r1 r2 :: k) - | Cnotcompfs cmp, a1 :: a2 :: nil => - do r1 <- freg_of a1; do r2 <- freg_of a2; - OK (Pfcmp S r1 r2 :: k) - | Ccompfszero cmp, a1 :: nil => - do r1 <- freg_of a1; - OK (Pfcmp0 S r1 :: k) - | Cnotcompfszero cmp, a1 :: nil => - do r1 <- freg_of a1; - OK (Pfcmp0 S r1 :: k) - | _, _ => - Error(msg "Asmgen.transl_cond") - end. +Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) := + indexed_memory_access (Asm.Pstrx src) 8 base ofs k. -Definition cond_for_signed_cmp (cmp: comparison) := - match cmp with - | Ceq => TCeq - | Cne => TCne - | Clt => TClt - | Cle => TCle - | Cgt => TCgt - | Cge => TCge - end. +End Asmgen_expand. -Definition cond_for_unsigned_cmp (cmp: comparison) := - match cmp with - | Ceq => TCeq - | Cne => TCne - | Clt => TClo - | Cle => TCls - | Cgt => TChi - | Cge => TChs - end. +(** * Translation from Asmblock to assembly language + Inspired from the KVX backend (see kvx/Asm.v and kvx/Asmgen.v) *) -Definition cond_for_float_cmp (cmp: comparison) := - match cmp with - | Ceq => TCeq - | Cne => TCne - | Clt => TCmi - | Cle => TCls - | Cgt => TCgt - | Cge => TCge - end. +Module Asmblock_TRANSF. +(* STUB *) -Definition cond_for_float_not_cmp (cmp: comparison) := - match cmp with - | Ceq => TCne - | Cne => TCeq - | Clt => TCpl - | Cle => TChi - | Cgt => TCle - | Cge => TClt +Definition ireg_of_preg (p : Asm.preg) : res ireg := + match p with + | DR (IR (RR1 r)) => OK r + | _ => Error (msg "Asmgen.ireg_of_preg") end. -Definition cond_for_cond (cond: condition) := - match cond with - | Ccomp cmp => cond_for_signed_cmp cmp - | Ccompu cmp => cond_for_unsigned_cmp cmp - | Ccompshift cmp s a => cond_for_signed_cmp cmp - | Ccompushift cmp s a => cond_for_unsigned_cmp cmp - | Ccompimm cmp n => cond_for_signed_cmp cmp - | Ccompuimm cmp n => cond_for_unsigned_cmp cmp - | Cmaskzero n => TCeq - | Cmasknotzero n => TCne - | Ccompl cmp => cond_for_signed_cmp cmp - | Ccomplu cmp => cond_for_unsigned_cmp cmp - | Ccomplshift cmp s a => cond_for_signed_cmp cmp - | Ccomplushift cmp s a => cond_for_unsigned_cmp cmp - | Ccomplimm cmp n => cond_for_signed_cmp cmp - | Ccompluimm cmp n => cond_for_unsigned_cmp cmp - | Cmasklzero n => TCeq - | Cmasklnotzero n => TCne - | Ccompf cmp => cond_for_float_cmp cmp - | Cnotcompf cmp => cond_for_float_not_cmp cmp - | Ccompfzero cmp => cond_for_float_cmp cmp - | Cnotcompfzero cmp => cond_for_float_not_cmp cmp - | Ccompfs cmp => cond_for_float_cmp cmp - | Cnotcompfs cmp => cond_for_float_not_cmp cmp - | Ccompfszero cmp => cond_for_float_cmp cmp - | Cnotcompfszero cmp => cond_for_float_not_cmp cmp +Definition freg_of_preg (p : Asm.preg) : res freg := + match p with + | DR (FR r) => OK r + | _ => Error (msg "Asmgen.freg_of_preg") end. -(** Translation of a conditional branch. Prepends to [k] the instructions - that evaluate the condition and ranch to [lbl] if it holds. - We recognize some conditional branches that can be implemented - without setting then testing condition flags. *) - -Definition transl_cond_branch_default - (c: condition) (args: list mreg) (lbl: label) (k: code) := - transl_cond c args (Pbc (cond_for_cond c) lbl :: k). - -Definition transl_cond_branch - (c: condition) (args: list mreg) (lbl: label) (k: code) := - match args, c with - | a1 :: nil, (Ccompimm Cne n | Ccompuimm Cne n) => - if Int.eq n Int.zero - then (do r1 <- ireg_of a1; OK (Pcbnz W r1 lbl :: k)) - else transl_cond_branch_default c args lbl k - | a1 :: nil, (Ccompimm Ceq n | Ccompuimm Ceq n) => - if Int.eq n Int.zero - then (do r1 <- ireg_of a1; OK (Pcbz W r1 lbl :: k)) - else transl_cond_branch_default c args lbl k - | a1 :: nil, (Ccomplimm Cne n | Ccompluimm Cne n) => - if Int64.eq n Int64.zero - then (do r1 <- ireg_of a1; OK (Pcbnz X r1 lbl :: k)) - else transl_cond_branch_default c args lbl k - | a1 :: nil, (Ccomplimm Ceq n | Ccompluimm Ceq n) => - if Int64.eq n Int64.zero - then (do r1 <- ireg_of a1; OK (Pcbz X r1 lbl :: k)) - else transl_cond_branch_default c args lbl k - | a1 :: nil, Cmaskzero n => - match Int.is_power2 n with - | Some bit => do r1 <- ireg_of a1; OK (Ptbz W r1 bit lbl :: k) - | None => transl_cond_branch_default c args lbl k - end - | a1 :: nil, Cmasknotzero n => - match Int.is_power2 n with - | Some bit => do r1 <- ireg_of a1; OK (Ptbnz W r1 bit lbl :: k) - | None => transl_cond_branch_default c args lbl k - end - | a1 :: nil, Cmasklzero n => - match Int64.is_power2' n with - | Some bit => do r1 <- ireg_of a1; OK (Ptbz X r1 bit lbl :: k) - | None => transl_cond_branch_default c args lbl k - end - | a1 :: nil, Cmasklnotzero n => - match Int64.is_power2' n with - | Some bit => do r1 <- ireg_of a1; OK (Ptbnz X r1 bit lbl :: k) - | None => transl_cond_branch_default c args lbl k - end - | _, _ => - transl_cond_branch_default c args lbl k +Definition iregsp_of_preg (p : Asm.preg) : res iregsp := + match p with + | DR (IR r) => OK r + | _ => Error (msg "Asmgen.iregsp_of_preg") end. - -(** Translation of the arithmetic operation [res <- op(args)]. - The corresponding instructions are prepended to [k]. *) -Definition transl_op - (op: operation) (args: list mreg) (res: mreg) (k: code) := - match op, args with - | Omove, a1 :: nil => - match preg_of res, preg_of a1 with - | IR r, IR a => OK (Pmov r a :: k) - | FR r, FR a => OK (Pfmov r a :: k) - | _ , _ => Error(msg "Asmgen.Omove") - end - | Ointconst n, nil => - do rd <- ireg_of res; - OK (loadimm32 rd n k) - | Olongconst n, nil => - do rd <- ireg_of res; - OK (loadimm64 rd n k) - | Ofloatconst f, nil => - do rd <- freg_of res; - OK (if Float.eq_dec f Float.zero - then Pfmovi D rd XZR :: k - else Pfmovimmd rd f :: k) - | Osingleconst f, nil => - do rd <- freg_of res; - OK (if Float32.eq_dec f Float32.zero - then Pfmovi S rd XZR :: k - else Pfmovimms rd f :: k) - | Oaddrsymbol id ofs, nil => - do rd <- ireg_of res; - OK (loadsymbol rd id ofs k) - | Oaddrstack ofs, nil => - do rd <- ireg_of res; - OK (addimm64 rd XSP (Ptrofs.to_int64 ofs) k) -(** 32-bit integer arithmetic *) - | Oshift s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Porr W rd XZR r1 (transl_shift s a) :: k) - | Oadd, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Padd W rd r1 r2 SOnone :: k) - | Oaddshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Padd W rd r1 r2 (transl_shift s a) :: k) - | Oaddimm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (addimm32 rd r1 n k) - | Oneg, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psub W rd XZR r1 SOnone :: k) - | Onegshift s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psub W rd XZR r1 (transl_shift s a) :: k) - | Osub, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Psub W rd r1 r2 SOnone :: k) - | Osubshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Psub W rd r1 r2 (transl_shift s a) :: k) - | Omul, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pmadd W rd r1 r2 XZR :: k) - | Omuladd, a1 :: a2 :: a3 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3; - OK (Pmadd W rd r2 r3 r1 :: k) - | Omulsub, a1 :: a2 :: a3 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3; - OK (Pmsub W rd r2 r3 r1 :: k) - | Odiv, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Psdiv W rd r1 r2 :: k) - | Odivu, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pudiv W rd r1 r2 :: k) - | Oand, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pand W rd r1 r2 SOnone :: k) - | Oandshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pand W rd r1 r2 (transl_shift s a) :: k) - | Oandimm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (logicalimm32 (Pandimm W) (Pand W) rd r1 n k) - | Oor, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Porr W rd r1 r2 SOnone :: k) - | Oorshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Porr W rd r1 r2 (transl_shift s a) :: k) - | Oorimm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (logicalimm32 (Porrimm W) (Porr W) rd r1 n k) - | Oxor, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Peor W rd r1 r2 SOnone :: k) - | Oxorshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Peor W rd r1 r2 (transl_shift s a) :: k) - | Oxorimm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (logicalimm32 (Peorimm W) (Peor W) rd r1 n k) - | Onot, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Porn W rd XZR r1 SOnone :: k) - | Onotshift s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Porn W rd XZR r1 (transl_shift s a) :: k) - | Obic, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pbic W rd r1 r2 SOnone :: k) - | Obicshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pbic W rd r1 r2 (transl_shift s a) :: k) - | Oorn, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Porn W rd r1 r2 SOnone :: k) - | Oornshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Porn W rd r1 r2 (transl_shift s a) :: k) - | Oeqv, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Peon W rd r1 r2 SOnone :: k) - | Oeqvshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Peon W rd r1 r2 (transl_shift s a) :: k) - | Oshl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Plslv W rd r1 r2 :: k) - | Oshr, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pasrv W rd r1 r2 :: k) - | Oshru, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Plsrv W rd r1 r2 :: k) - | Oshrximm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (shrx32 rd r1 n k) - | Ozext s, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Pubfiz W rd r1 Int.zero s :: k) - | Osext s, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psbfiz W rd r1 Int.zero s :: k) - | Oshlzext s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Pubfiz W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k) - | Oshlsext s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psbfiz W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k) - | Ozextshr a s, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Pubfx W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k) - | Osextshr a s, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psbfx W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k) -(** 64-bit integer arithmetic *) - | Oshiftl s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Porr X rd XZR r1 (transl_shift s a) :: k) - | Oextend x a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (move_extended rd r1 x a k) - (* [Omakelong] and [Ohighlong] should not occur *) - | Olowlong, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - assertion (ireg_eq rd r1); - OK (Pcvtx2w rd :: k) - | Oaddl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Padd X rd r1 r2 SOnone :: k) - | Oaddlshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Padd X rd r1 r2 (transl_shift s a) :: k) - | Oaddlext x a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (arith_extended Paddext (Padd X) rd r1 r2 x a k) - | Oaddlimm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (addimm64 rd r1 n k) - | Onegl, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psub X rd XZR r1 SOnone :: k) - | Oneglshift s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psub X rd XZR r1 (transl_shift s a) :: k) - | Osubl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Psub X rd r1 r2 SOnone :: k) - | Osublshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Psub X rd r1 r2 (transl_shift s a) :: k) - | Osublext x a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (arith_extended Psubext (Psub X) rd r1 r2 x a k) - | Omull, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pmadd X rd r1 r2 XZR :: k) - | Omulladd, a1 :: a2 :: a3 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3; - OK (Pmadd X rd r2 r3 r1 :: k) - | Omullsub, a1 :: a2 :: a3 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3; - OK (Pmsub X rd r2 r3 r1 :: k) - | Omullhs, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Psmulh rd r1 r2 :: k) - | Omullhu, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pumulh rd r1 r2 :: k) - | Odivl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Psdiv X rd r1 r2 :: k) - | Odivlu, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pudiv X rd r1 r2 :: k) - | Oandl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pand X rd r1 r2 SOnone :: k) - | Oandlshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pand X rd r1 r2 (transl_shift s a) :: k) - | Oandlimm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (logicalimm64 (Pandimm X) (Pand X) rd r1 n k) - | Oorl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Porr X rd r1 r2 SOnone :: k) - | Oorlshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Porr X rd r1 r2 (transl_shift s a) :: k) - | Oorlimm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (logicalimm64 (Porrimm X) (Porr X) rd r1 n k) - | Oxorl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Peor X rd r1 r2 SOnone :: k) - | Oxorlshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Peor X rd r1 r2 (transl_shift s a) :: k) - | Oxorlimm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (logicalimm64 (Peorimm X) (Peor X) rd r1 n k) - | Onotl, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Porn X rd XZR r1 SOnone :: k) - | Onotlshift s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Porn X rd XZR r1 (transl_shift s a) :: k) - | Obicl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pbic X rd r1 r2 SOnone :: k) - | Obiclshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pbic X rd r1 r2 (transl_shift s a) :: k) - | Oornl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Porn X rd r1 r2 SOnone :: k) - | Oornlshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Porn X rd r1 r2 (transl_shift s a) :: k) - | Oeqvl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Peon X rd r1 r2 SOnone :: k) - | Oeqvlshift s a, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Peon X rd r1 r2 (transl_shift s a) :: k) - | Oshll, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Plslv X rd r1 r2 :: k) - | Oshrl, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pasrv X rd r1 r2 :: k) - | Oshrlu, a1 :: a2 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Plsrv X rd r1 r2 :: k) - | Oshrlximm n, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (shrx64 rd r1 n k) - | Ozextl s, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Pubfiz X rd r1 Int.zero s :: k) - | Osextl s, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psbfiz X rd r1 Int.zero s :: k) - | Oshllzext s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Pubfiz X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k) - | Oshllsext s a, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psbfiz X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k) - | Ozextshrl a s, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Pubfx X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k) - | Osextshrl a s, a1 :: nil => - do rd <- ireg_of res; do r1 <- ireg_of a1; - OK (Psbfx X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k) -(** 64-bit floating-point arithmetic *) - | Onegf, a1 :: nil => - do rd <- freg_of res; do rs <- freg_of a1; - OK (Pfneg D rd rs :: k) - | Oabsf, a1 :: nil => - do rd <- freg_of res; do rs <- freg_of a1; - OK (Pfabs D rd rs :: k) - | Oaddf, a1 :: a2 :: nil => - do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; - OK (Pfadd D rd rs1 rs2 :: k) - | Osubf, a1 :: a2 :: nil => - do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; - OK (Pfsub D rd rs1 rs2 :: k) - | Omulf, a1 :: a2 :: nil => - do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; - OK (Pfmul D rd rs1 rs2 :: k) - | Odivf, a1 :: a2 :: nil => - do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; - OK (Pfdiv D rd rs1 rs2 :: k) -(** 32-bit floating-point arithmetic *) - | Onegfs, a1 :: nil => - do rd <- freg_of res; do rs <- freg_of a1; - OK (Pfneg S rd rs :: k) - | Oabsfs, a1 :: nil => - do rd <- freg_of res; do rs <- freg_of a1; - OK (Pfabs S rd rs :: k) - | Oaddfs, a1 :: a2 :: nil => - do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; - OK (Pfadd S rd rs1 rs2 :: k) - | Osubfs, a1 :: a2 :: nil => - do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; - OK (Pfsub S rd rs1 rs2 :: k) - | Omulfs, a1 :: a2 :: nil => - do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; - OK (Pfmul S rd rs1 rs2 :: k) - | Odivfs, a1 :: a2 :: nil => - do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2; - OK (Pfdiv S rd rs1 rs2 :: k) - | Osingleoffloat, a1 :: nil => - do rd <- freg_of res; do rs <- freg_of a1; - OK (Pfcvtsd rd rs :: k) - | Ofloatofsingle, a1 :: nil => - do rd <- freg_of res; do rs <- freg_of a1; - OK (Pfcvtds rd rs :: k) -(** Conversions between int and float *) - | Ointoffloat, a1 :: nil => - do rd <- ireg_of res; do rs <- freg_of a1; - OK (Pfcvtzs W D rd rs :: k) - | Ointuoffloat, a1 :: nil => - do rd <- ireg_of res; do rs <- freg_of a1; - OK (Pfcvtzu W D rd rs :: k) - | Ofloatofint, a1 :: nil => - do rd <- freg_of res; do rs <- ireg_of a1; - OK (Pscvtf D W rd rs :: k) - | Ofloatofintu, a1 :: nil => - do rd <- freg_of res; do rs <- ireg_of a1; - OK (Pucvtf D W rd rs :: k) - | Ointofsingle, a1 :: nil => - do rd <- ireg_of res; do rs <- freg_of a1; - OK (Pfcvtzs W S rd rs :: k) - | Ointuofsingle, a1 :: nil => - do rd <- ireg_of res; do rs <- freg_of a1; - OK (Pfcvtzu W S rd rs :: k) - | Osingleofint, a1 :: nil => - do rd <- freg_of res; do rs <- ireg_of a1; - OK (Pscvtf S W rd rs :: k) - | Osingleofintu, a1 :: nil => - do rd <- freg_of res; do rs <- ireg_of a1; - OK (Pucvtf S W rd rs :: k) - | Olongoffloat, a1 :: nil => - do rd <- ireg_of res; do rs <- freg_of a1; - OK (Pfcvtzs X D rd rs :: k) - | Olonguoffloat, a1 :: nil => - do rd <- ireg_of res; do rs <- freg_of a1; - OK (Pfcvtzu X D rd rs :: k) - | Ofloatoflong, a1 :: nil => - do rd <- freg_of res; do rs <- ireg_of a1; - OK (Pscvtf D X rd rs :: k) - | Ofloatoflongu, a1 :: nil => - do rd <- freg_of res; do rs <- ireg_of a1; - OK (Pucvtf D X rd rs :: k) - | Olongofsingle, a1 :: nil => - do rd <- ireg_of res; do rs <- freg_of a1; - OK (Pfcvtzs X S rd rs :: k) - | Olonguofsingle, a1 :: nil => - do rd <- ireg_of res; do rs <- freg_of a1; - OK (Pfcvtzu X S rd rs :: k) - | Osingleoflong, a1 :: nil => - do rd <- freg_of res; do rs <- ireg_of a1; - OK (Pscvtf S X rd rs :: k) - | Osingleoflongu, a1 :: nil => - do rd <- freg_of res; do rs <- ireg_of a1; - OK (Pucvtf S X rd rs :: k) -(** Boolean tests *) - | Ocmp c, _ => - do rd <- ireg_of res; - transl_cond c args (Pcset rd (cond_for_cond c) :: k) -(** Conditional move *) - | Osel cmp ty, a1 :: a2 :: args => - match preg_of res with - | IR r => - do r1 <- ireg_of a1; do r2 <- ireg_of a2; - transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) :: k) - | FR r => - do r1 <- freg_of a1; do r2 <- freg_of a2; - transl_cond cmp args (Pfsel r r1 r2 (cond_for_cond cmp) :: k) - | _ => - Error(msg "Asmgen.Osel") +Definition basic_to_instruction (b: basic) : res Asm.instruction := + match b with + (* Aithmetic instructions *) + | PArith (PArithP (Padrp id ofs) rd) => do rd' <- ireg_of_preg rd; + OK (Asm.Padrp rd' id ofs) + | PArith (PArithP (Pmovz sz n pos) rd) => do rd' <- ireg_of_preg rd; + OK (Asm.Pmovz sz rd' n pos) + | PArith (PArithP (Pmovn sz n pos) rd) => do rd' <- ireg_of_preg rd; + OK (Asm.Pmovn sz rd' n pos) + | PArith (PArithP (Pfmovimms f) rd) => do rd' <- freg_of_preg rd; + OK (Asm.Pfmovimms rd' f) + | PArith (PArithP (Pfmovimmd f) rd) => do rd' <- freg_of_preg rd; + OK (Asm.Pfmovimmd rd' f) + + | PArith (PArithPP (Pmovk sz n pos) rd rs) => + if (Asm.preg_eq rd rs) then ( + do rd' <- ireg_of_preg rd; + OK (Asm.Pmovk sz rd' n pos) + ) else + Error (msg "Asmgen.basic_to_instruction: Pmovk uses a single register as both source and target") + | PArith (PArithPP Pmov rd rs) => do rd' <- iregsp_of_preg rd; + do rs' <- iregsp_of_preg rs; + OK (Asm.Pmov rd' rs') + | PArith (PArithPP (Paddadr id ofs) rd rs) => do rd' <- ireg_of_preg rd; + do rs' <- ireg_of_preg rs; + OK (Asm.Paddadr rd' rs' id ofs) + | PArith (PArithPP (Psbfiz sz r s) rd rs) => do rd' <- ireg_of_preg rd; + do rs' <- ireg_of_preg rs; + OK (Asm.Psbfiz sz rd' rs' r s) + | PArith (PArithPP (Psbfx sz r s) rd rs) => do rd' <- ireg_of_preg rd; + do rs' <- ireg_of_preg rs; + OK (Asm.Psbfx sz rd' rs' r s) + | PArith (PArithPP (Pubfiz sz r s) rd rs) => do rd' <- ireg_of_preg rd; + do rs' <- ireg_of_preg rs; + OK (Asm.Pubfiz sz rd' rs' r s) + | PArith (PArithPP (Pubfx sz r s) rd rs) => do rd' <- ireg_of_preg rd; + do rs' <- ireg_of_preg rs; + OK (Asm.Pubfx sz rd' rs' r s) + | PArith (PArithPP Pfmov rd rs) => do rd' <- freg_of_preg rd; + do rs' <- freg_of_preg rs; + OK (Asm.Pfmov rd' rs') + | PArith (PArithPP Pfcvtds rd rs) => do rd' <- freg_of_preg rd; + do rs' <- freg_of_preg rs; + OK (Asm.Pfcvtds rd' rs') + | PArith (PArithPP Pfcvtsd rd rs) => do rd' <- freg_of_preg rd; + do rs' <- freg_of_preg rs; + OK (Asm.Pfcvtsd rd' rs') + | PArith (PArithPP (Pfabs sz) rd rs) => do rd' <- freg_of_preg rd; + do rs' <- freg_of_preg rs; + OK (Asm.Pfabs sz rd' rs') + | PArith (PArithPP (Pfneg sz) rd rs) => do rd' <- freg_of_preg rd; + do rs' <- freg_of_preg rs; + OK (Asm.Pfneg sz rd' rs') + | PArith (PArithPP (Pscvtf fsz isz) rd rs) => do rd' <- freg_of_preg rd; + do rs' <- ireg_of_preg rs; + OK (Asm.Pscvtf fsz isz rd' rs') + | PArith (PArithPP (Pucvtf fsz isz) rd rs) => do rd' <- freg_of_preg rd; + do rs' <- ireg_of_preg rs; + OK (Asm.Pucvtf fsz isz rd' rs') + | PArith (PArithPP (Pfcvtzs isz fsz) rd rs) => do rd' <- ireg_of_preg rd; + do rs' <- freg_of_preg rs; + OK (Asm.Pfcvtzs isz fsz rd' rs') + | PArith (PArithPP (Pfcvtzu isz fsz) rd rs) => do rd' <- ireg_of_preg rd; + do rs' <- freg_of_preg rs; + OK (Asm.Pfcvtzu isz fsz rd' rs') + | PArith (PArithPP (Paddimm sz n) rd rs) => do rd' <- iregsp_of_preg rd; + do rs' <- iregsp_of_preg rs; + OK (Asm.Paddimm sz rd' rs' n) + | PArith (PArithPP (Psubimm sz n) rd rs) => do rd' <- iregsp_of_preg rd; + do rs' <- iregsp_of_preg rs; + OK (Asm.Psubimm sz rd' rs' n) + + | PArith (PArithPPP (Pasrv sz) rd r1 r2) => do rd' <- ireg_of_preg rd; + do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Pasrv sz rd' r1' r2') + | PArith (PArithPPP (Plslv sz) rd r1 r2) => do rd' <- ireg_of_preg rd; + do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Plslv sz rd' r1' r2') + | PArith (PArithPPP (Plsrv sz) rd r1 r2) => do rd' <- ireg_of_preg rd; + do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Plsrv sz rd' r1' r2') + | PArith (PArithPPP (Prorv sz) rd r1 r2) => do rd' <- ireg_of_preg rd; + do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Prorv sz rd' r1' r2') + | PArith (PArithPPP Psmulh rd r1 r2) => do rd' <- ireg_of_preg rd; + do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Psmulh rd' r1' r2') + | PArith (PArithPPP Pumulh rd r1 r2) => do rd' <- ireg_of_preg rd; + do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Pumulh rd' r1' r2') + | PArith (PArithPPP (Psdiv sz) rd r1 r2) => do rd' <- ireg_of_preg rd; + do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Psdiv sz rd' r1' r2') + | PArith (PArithPPP (Pudiv sz) rd r1 r2) => do rd' <- ireg_of_preg rd; + do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Pudiv sz rd' r1' r2') + | PArith (PArithPPP (Paddext x) rd r1 r2) => do rd' <- iregsp_of_preg rd; + do r1' <- iregsp_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Paddext rd' r1' r2' x) + | PArith (PArithPPP (Psubext x) rd r1 r2) => do rd' <- iregsp_of_preg rd; + do r1' <- iregsp_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Psubext rd' r1' r2' x) + | PArith (PArithPPP (Pfadd sz) rd r1 r2) => do rd' <- freg_of_preg rd; + do r1' <- freg_of_preg r1; + do r2' <- freg_of_preg r2; + OK (Asm.Pfadd sz rd' r1' r2') + | PArith (PArithPPP (Pfdiv sz) rd r1 r2) => do rd' <- freg_of_preg rd; + do r1' <- freg_of_preg r1; + do r2' <- freg_of_preg r2; + OK (Asm.Pfdiv sz rd' r1' r2') + | PArith (PArithPPP (Pfmul sz) rd r1 r2) => do rd' <- freg_of_preg rd; + do r1' <- freg_of_preg r1; + do r2' <- freg_of_preg r2; + OK (Asm.Pfmul sz rd' r1' r2') + | PArith (PArithPPP (Pfsub sz) rd r1 r2) => do rd' <- freg_of_preg rd; + do r1' <- freg_of_preg r1; + do r2' <- freg_of_preg r2; + OK (Asm.Pfsub sz rd' r1' r2') + + | PArith (PArithRR0 (Pandimm sz n) rd r1) => OK (Asm.Pandimm sz rd r1 n) + | PArith (PArithRR0 (Peorimm sz n) rd r1) => OK (Asm.Peorimm sz rd r1 n) + | PArith (PArithRR0 (Porrimm sz n) rd r1) => OK (Asm.Porrimm sz rd r1 n) + + + | PArith (PArithRR0R (Padd sz s) rd r1 r2) => OK (Asm.Padd sz rd r1 r2 s) + | PArith (PArithRR0R (Psub sz s) rd r1 r2) => OK (Asm.Psub sz rd r1 r2 s) + | PArith (PArithRR0R (Pand sz s) rd r1 r2) => OK (Asm.Pand sz rd r1 r2 s) + | PArith (PArithRR0R (Pbic sz s) rd r1 r2) => OK (Asm.Pbic sz rd r1 r2 s) + | PArith (PArithRR0R (Peon sz s) rd r1 r2) => OK (Asm.Peon sz rd r1 r2 s) + | PArith (PArithRR0R (Peor sz s) rd r1 r2) => OK (Asm.Peor sz rd r1 r2 s) + | PArith (PArithRR0R (Porr sz s) rd r1 r2) => OK (Asm.Porr sz rd r1 r2 s) + | PArith (PArithRR0R (Porn sz s) rd r1 r2) => OK (Asm.Porn sz rd r1 r2 s) + + | PArith (PArithARRRR0 (Pmadd sz) rd r1 r2 r3) => OK (Asm.Pmadd sz rd r1 r2 r3) + | PArith (PArithARRRR0 (Pmsub sz) rd r1 r2 r3) => OK (Asm.Pmsub sz rd r1 r2 r3) + + | PArith (PArithComparisonPP (Pcmpext x) r1 r2) => do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Pcmpext r1' r2' x) + | PArith (PArithComparisonPP (Pcmnext x) r1 r2) => do r1' <- ireg_of_preg r1; + do r2' <- ireg_of_preg r2; + OK (Asm.Pcmnext r1' r2' x) + | PArith (PArithComparisonPP (Pfcmp sz) r1 r2) => do r1' <- freg_of_preg r1; + do r2' <- freg_of_preg r2; + OK (Asm.Pfcmp sz r1' r2') + + | PArith (PArithComparisonR0R (Pcmp is s) r1 r2) => OK (Asm.Pcmp is r1 r2 s) + | PArith (PArithComparisonR0R (Pcmn is s) r1 r2) => OK (Asm.Pcmn is r1 r2 s) + | PArith (PArithComparisonR0R (Ptst is s) r1 r2) => OK (Asm.Ptst is r1 r2 s) + + | PArith (PArithComparisonP (Pcmpimm sz n) r1) => do r1' <- ireg_of_preg r1; + OK (Asm.Pcmpimm sz r1' n) + | PArith (PArithComparisonP (Pcmnimm sz n) r1) => do r1' <- ireg_of_preg r1; + OK (Asm.Pcmnimm sz r1' n) + | PArith (PArithComparisonP (Ptstimm sz n) r1) => do r1' <- ireg_of_preg r1; + OK (Asm.Ptstimm sz r1' n) + | PArith (PArithComparisonP (Pfcmp0 sz) r1) => do r1' <- freg_of_preg r1; + OK (Asm.Pfcmp0 sz r1') + + | PArith (Pcset rd c) => OK (Asm.Pcset rd c) + | PArith (Pfmovi fsz rd r1) => OK (Asm.Pfmovi fsz rd r1) + | PArith (Pcsel rd r1 r2 c) => + match r1, r2 with + | IR r1', IR r2' => do rd' <- ireg_of_preg rd; + do r1'' <- ireg_of_preg r1'; + do r2'' <- ireg_of_preg r2'; + OK (Asm.Pcsel rd' r1'' r2'' c) + | FR r1', FR r2' => do rd' <- freg_of_preg rd; + do r1'' <- freg_of_preg r1'; + do r2'' <- freg_of_preg r2'; + OK (Asm.Pfsel rd' r1'' r2'' c) + | _, _ => Error (msg "Asmgen.basic_to_instruction: Pcsel is only defind on iregs and fregs.") end - | _, _ => - Error(msg "Asmgen.transl_op") + | PArith (Pfnmul fsz rd r1 r2) => OK (Asm.Pfnmul fsz rd r1 r2) + + | PLoad (PLd_rd_a Pldrw rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw rd' a) + | PLoad (PLd_rd_a Pldrw_a rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw_a rd' a) + | PLoad (PLd_rd_a Pldrx rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx rd' a) + | PLoad (PLd_rd_a Pldrx_a rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx_a rd' a) + | PLoad (PLd_rd_a (Pldrb sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrb sz rd' a) + | PLoad (PLd_rd_a (Pldrsb sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsb sz rd' a) + | PLoad (PLd_rd_a (Pldrh sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrh sz rd' a) + | PLoad (PLd_rd_a (Pldrsh sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsh sz rd' a) + | PLoad (PLd_rd_a Pldrzw rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrzw rd' a) + | PLoad (PLd_rd_a Pldrsw rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsw rd' a) + + | PLoad (PLd_rd_a Pldrs rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrs rd' a) + | PLoad (PLd_rd_a Pldrd rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd rd' a) + | PLoad (PLd_rd_a Pldrd_a rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd_a rd' a) + + | PLoad (Pldp Pldpw rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1; + do rd2' <- ireg_of_preg rd2; + OK (Asm.Pldpw rd1' rd2' chk1 chk2 a) + | PLoad (Pldp Pldpx rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1; + do rd2' <- ireg_of_preg rd2; + OK (Asm.Pldpx rd1' rd2' chk1 chk2 a) + | PLoad (Pldp Pldps rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1; + do rd2' <- freg_of_preg rd2; + OK (Asm.Pldps rd1' rd2' chk1 chk2 a) + | PLoad (Pldp Pldpd rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1; + do rd2' <- freg_of_preg rd2; + OK (Asm.Pldpd rd1' rd2' chk1 chk2 a) + + | PStore (PSt_rs_a Pstrw r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw r' a) + | PStore (PSt_rs_a Pstrw_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw_a r' a) + | PStore (PSt_rs_a Pstrx r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrx r' a) + | PStore (PSt_rs_a Pstrx_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrx_a r' a) + | PStore (PSt_rs_a Pstrb r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrb r' a) + | PStore (PSt_rs_a Pstrh r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrh r' a) + + | PStore (PSt_rs_a Pstrs r a) => do r' <- freg_of_preg r; OK (Asm.Pstrs r' a) + | PStore (PSt_rs_a Pstrd r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd r' a) + | PStore (PSt_rs_a Pstrd_a r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd_a r' a) + + | PStore (Pstp Pstpw rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1; + do rs2' <- ireg_of_preg rs2; + OK (Asm.Pstpw rs1' rs2' chk1 chk2 a) + | PStore (Pstp Pstpx rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1; + do rs2' <- ireg_of_preg rs2; + OK (Asm.Pstpx rs1' rs2' chk1 chk2 a) + | PStore (Pstp Pstps rs1 rs2 chk1 chk2 a) => do rs1' <- freg_of_preg rs1; + do rs2' <- freg_of_preg rs2; + OK (Asm.Pstps rs1' rs2' chk1 chk2 a) + | PStore (Pstp Pstpd rs1 rs2 chk1 chk2 a) => do rs1' <- freg_of_preg rs1; + do rs2' <- freg_of_preg rs2; + OK (Asm.Pstpd rs1' rs2' chk1 chk2 a) + + | Pallocframe sz linkofs => OK (Asm.Pallocframe sz linkofs) + | Pfreeframe sz linkofs => OK (Asm.Pfreeframe sz linkofs) + + | Ploadsymbol rd id => OK (Asm.Ploadsymbol rd id) + + | Pcvtsw2x rd r1 => OK (Asm.Pcvtsw2x rd r1) + + | Pcvtuw2x rd r1 => OK (Asm.Pcvtuw2x rd r1) + + | Pcvtx2w rd => OK (Asm.Pcvtx2w rd) + | Pnop => OK (Asm.Pnop) end. -(** Translation of addressing modes *) - -Definition offset_representable (sz: Z) (ofs: int64) : bool := - let isz := Int64.repr sz in - (** either unscaled 9-bit signed *) - Int64.eq ofs (Int64.sign_ext 9 ofs) || - (** or scaled 12-bit unsigned *) - (Int64.eq (Int64.modu ofs isz) Int64.zero - && Int64.ltu ofs (Int64.shl isz (Int64.repr 12))). - -Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg) - (insn: Asm.addressing -> instruction) (k: code) : res code := - match addr, args with - | Aindexed ofs, a1 :: nil => - do r1 <- ireg_of a1; - if offset_representable sz ofs then - OK (insn (ADimm r1 ofs) :: k) - else - OK (loadimm64 X16 ofs (insn (ADreg r1 X16) :: k)) - | Aindexed2, a1 :: a2 :: nil => - do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (insn (ADreg r1 r2) :: k) - | Aindexed2shift a, a1 :: a2 :: nil => - do r1 <- ireg_of a1; do r2 <- ireg_of a2; - if Int.eq a Int.zero then - OK (insn (ADreg r1 r2) :: k) - else if Int.eq (Int.shl Int.one a) (Int.repr sz) then - OK (insn (ADlsl r1 r2 a) :: k) - else - OK (Padd X X16 r1 r2 (SOlsl a) :: insn (ADimm X16 Int64.zero) :: k) - | Aindexed2ext x a, a1 :: a2 :: nil => - do r1 <- ireg_of a1; do r2 <- ireg_of a2; - if Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz) then - OK (insn (match x with Xsgn32 => ADsxt r1 r2 a - | Xuns32 => ADuxt r1 r2 a end) :: k) - else - OK (arith_extended Paddext (Padd X) X16 r1 r2 x a - (insn (ADimm X16 Int64.zero) :: k)) - | Aglobal id ofs, nil => - assertion (negb (SelectOp.symbol_is_relocatable id)); - if Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz - then OK (Padrp X16 id ofs :: insn (ADadr X16 id ofs) :: k) - else OK (loadsymbol X16 id ofs (insn (ADimm X16 Int64.zero) :: k)) - | Ainstack ofs, nil => - let ofs := Ptrofs.to_int64 ofs in - if offset_representable sz ofs then - OK (insn (ADimm XSP ofs) :: k) - else - OK (loadimm64 X16 ofs (insn (ADreg XSP X16) :: k)) - | _, _ => - Error(msg "Asmgen.transl_addressing") +Definition cf_instruction_to_instruction (cfi: cf_instruction) : Asm.instruction := + match cfi with + | Pb l => Asm.Pb l + | Pbc c lbl => Asm.Pbc c lbl + | Pbl id sg => Asm.Pbl id sg + | Pbs id sg => Asm.Pbs id sg + | Pblr r sg => Asm.Pblr r sg + | Pbr r sg => Asm.Pbr r sg + | Pret r => Asm.Pret r + | Pcbnz sz r lbl => Asm.Pcbnz sz r lbl + | Pcbz sz r lbl => Asm.Pcbz sz r lbl + | Ptbnz sz r n lbl => Asm.Ptbnz sz r n lbl + | Ptbz sz r n lbl => Asm.Ptbz sz r n lbl + | Pbtbl r1 tbl => Asm.Pbtbl r1 tbl end. -(** Translation of loads and stores *) - -Definition transl_load (chunk: memory_chunk) (addr: Op.addressing) - (args: list mreg) (dst: mreg) (k: code) : res code := - match chunk with - | Mint8unsigned => - do rd <- ireg_of dst; transl_addressing 1 addr args (Pldrb W rd) k - | Mint8signed => - do rd <- ireg_of dst; transl_addressing 1 addr args (Pldrsb W rd) k - | Mint16unsigned => - do rd <- ireg_of dst; transl_addressing 2 addr args (Pldrh W rd) k - | Mint16signed => - do rd <- ireg_of dst; transl_addressing 2 addr args (Pldrsh W rd) k - | Mint32 => - do rd <- ireg_of dst; transl_addressing 4 addr args (Pldrw rd) k - | Mint64 => - do rd <- ireg_of dst; transl_addressing 8 addr args (Pldrx rd) k - | Mfloat32 => - do rd <- freg_of dst; transl_addressing 4 addr args (Pldrs rd) k - | Mfloat64 => - do rd <- freg_of dst; transl_addressing 8 addr args (Pldrd rd) k - | Many32 => - do rd <- ireg_of dst; transl_addressing 4 addr args (Pldrw_a rd) k - | Many64 => - do rd <- ireg_of dst; transl_addressing 8 addr args (Pldrx_a rd) k +Definition control_to_instruction (c: control) := + match c with + | PCtlFlow i => cf_instruction_to_instruction i + | Pbuiltin ef args res => Asm.Pbuiltin ef (List.map (map_builtin_arg DR) args) (map_builtin_res DR res) end. -Definition transl_store (chunk: memory_chunk) (addr: Op.addressing) - (args: list mreg) (src: mreg) (k: code) : res code := - match chunk with - | Mint8unsigned | Mint8signed => - do r1 <- ireg_of src; transl_addressing 1 addr args (Pstrb r1) k - | Mint16unsigned | Mint16signed => - do r1 <- ireg_of src; transl_addressing 2 addr args (Pstrh r1) k - | Mint32 => - do r1 <- ireg_of src; transl_addressing 4 addr args (Pstrw r1) k - | Mint64 => - do r1 <- ireg_of src; transl_addressing 8 addr args (Pstrx r1) k - | Mfloat32 => - do r1 <- freg_of src; transl_addressing 4 addr args (Pstrs r1) k - | Mfloat64 => - do r1 <- freg_of src; transl_addressing 8 addr args (Pstrd r1) k - | Many32 => - do r1 <- ireg_of src; transl_addressing 4 addr args (Pstrw_a r1) k - | Many64 => - do r1 <- ireg_of src; transl_addressing 8 addr args (Pstrx_a r1) k +Fixpoint unfold_label (ll: list label) := + match ll with + | nil => nil + | l :: ll => Plabel l :: unfold_label ll end. -(** Register-indexed loads and stores *) - -Definition indexed_memory_access (insn: Asm.addressing -> instruction) - (sz: Z) (base: iregsp) (ofs: ptrofs) (k: code) := - let ofs := Ptrofs.to_int64 ofs in - if offset_representable sz ofs - then insn (ADimm base ofs) :: k - else loadimm64 X16 ofs (insn (ADreg base X16) :: k). - -Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: code) := - match ty, preg_of dst with - | Tint, IR rd => OK (indexed_memory_access (Pldrw rd) 4 base ofs k) - | Tlong, IR rd => OK (indexed_memory_access (Pldrx rd) 8 base ofs k) - | Tsingle, FR rd => OK (indexed_memory_access (Pldrs rd) 4 base ofs k) - | Tfloat, FR rd => OK (indexed_memory_access (Pldrd rd) 8 base ofs k) - | Tany32, IR rd => OK (indexed_memory_access (Pldrw_a rd) 4 base ofs k) - | Tany64, IR rd => OK (indexed_memory_access (Pldrx_a rd) 8 base ofs k) - | Tany64, FR rd => OK (indexed_memory_access (Pldrd_a rd) 8 base ofs k) - | _, _ => Error (msg "Asmgen.loadind") - end. - -Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: code) := - match ty, preg_of src with - | Tint, IR rd => OK (indexed_memory_access (Pstrw rd) 4 base ofs k) - | Tlong, IR rd => OK (indexed_memory_access (Pstrx rd) 8 base ofs k) - | Tsingle, FR rd => OK (indexed_memory_access (Pstrs rd) 4 base ofs k) - | Tfloat, FR rd => OK (indexed_memory_access (Pstrd rd) 8 base ofs k) - | Tany32, IR rd => OK (indexed_memory_access (Pstrw_a rd) 4 base ofs k) - | Tany64, IR rd => OK (indexed_memory_access (Pstrx_a rd) 8 base ofs k) - | Tany64, FR rd => OK (indexed_memory_access (Pstrd_a rd) 8 base ofs k) - | _, _ => Error (msg "Asmgen.storeind") - end. - -Definition loadptr (base: iregsp) (ofs: ptrofs) (dst: ireg) (k: code) := - indexed_memory_access (Pldrx dst) 8 base ofs k. - -Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) := - indexed_memory_access (Pstrx src) 8 base ofs k. - -(** Function epilogue *) - -Definition make_epilogue (f: Mach.function) (k: code) := - loadptr XSP f.(fn_retaddr_ofs) RA - (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k). - -(** Translation of a Mach instruction. *) - -Definition transl_instr (f: Mach.function) (i: Mach.instruction) - (r29_is_parent: bool) (k: code) : res code := - match i with - | Mgetstack ofs ty dst => - loadind XSP ofs ty dst k - | Msetstack src ofs ty => - storeind src XSP ofs ty k - | Mgetparam ofs ty dst => - (* load via the frame pointer if it is valid *) - do c <- loadind X29 ofs ty dst k; - OK (if r29_is_parent then c else loadptr XSP f.(fn_link_ofs) X29 c) - | Mop op args res => - transl_op op args res k - | Mload chunk addr args dst => - transl_load chunk addr args dst k - | Mstore chunk addr args src => - transl_store chunk addr args src k - | Mcall sig (inl r) => - do r1 <- ireg_of r; OK (Pblr r1 sig :: k) - | Mcall sig (inr symb) => - OK (Pbl symb sig :: k) - | Mtailcall sig (inl r) => - do r1 <- ireg_of r; - OK (make_epilogue f (Pbr r1 sig :: k)) - | Mtailcall sig (inr symb) => - OK (make_epilogue f (Pbs symb sig :: k)) - | Mbuiltin ef args res => - OK (Pbuiltin ef (List.map (map_builtin_arg preg_of) args) (map_builtin_res preg_of res) :: k) - | Mlabel lbl => - OK (Plabel lbl :: k) - | Mgoto lbl => - OK (Pb lbl :: k) - | Mcond cond args lbl => - transl_cond_branch cond args lbl k - | Mjumptable arg tbl => - do r <- ireg_of arg; - OK (Pbtbl r tbl :: k) - | Mreturn => - OK (make_epilogue f (Pret RA :: k)) - end. - -(** Translation of a code sequence *) - -Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool := - match i with - | Msetstack src ofs ty => before - | Mgetparam ofs ty dst => negb (mreg_eq dst R29) - | Mop op args res => before && negb (mreg_eq res R29) - | _ => false - end. - -(** This is the naive definition that we no longer use because it - is not tail-recursive. It is kept as specification. *) - -Fixpoint transl_code (f: Mach.function) (il: list Mach.instruction) (it1p: bool) := - match il with +Fixpoint unfold_body (lb: list basic) : res Asm.code := + match lb with | nil => OK nil - | i1 :: il' => - do k <- transl_code f il' (it1_is_parent it1p i1); - transl_instr f i1 it1p k + | b :: lb => + (* x_is: x's instructions *) + do b_is <- basic_to_instruction b; + do lb_is <- unfold_body lb; + OK (b_is :: lb_is) end. -(** This is an equivalent definition in continuation-passing style - that runs in constant stack space. *) - -Fixpoint transl_code_rec (f: Mach.function) (il: list Mach.instruction) - (it1p: bool) (k: code -> res code) := - match il with - | nil => k nil - | i1 :: il' => - transl_code_rec f il' (it1_is_parent it1p i1) - (fun c1 => do c2 <- transl_instr f i1 it1p c1; k c2) +Definition unfold_exit (oc: option control) := + match oc with + | None => nil + | Some c => control_to_instruction c :: nil end. -Definition transl_code' (f: Mach.function) (il: list Mach.instruction) (it1p: bool) := - transl_code_rec f il it1p (fun c => OK c). +Definition unfold_bblock (bb: bblock) := + let lbl := unfold_label (header bb) in + (* + * With this dynamically checked assumption on a previous optimization we + * can show that [Asmblock.label_pos] and [Asm.label_pos] retrieve the same + * exact address. Maintaining this property allows us to use the simple + * formulation of match_states defined as equality. + * Otherwise we would have to deal with the case of a basic block header + * that has multiple labels. Asmblock.label_pos will, for all labels, point + * to the same location at the beginning of the basic block. Asm.label_pos + * on the other hand could return a position pointing into the original + * basic block. + *) + if zle (list_length_z (header bb)) 1 then + do bo_is <- unfold_body (body bb); + OK (lbl ++ bo_is ++ unfold_exit (exit bb)) + else + Error (msg "Asmgen.unfold_bblock: Multiple labels were generated."). + +Fixpoint unfold (bbs: Asmblock.bblocks) : res Asm.code := + match bbs with + | nil => OK (nil) + | bb :: bbs' => + do bb_is <- unfold_bblock bb; + do bbs'_is <- unfold bbs'; + OK (bb_is ++ bbs'_is) + end. -(** Translation of a whole function. Note that we must check - that the generated code contains less than [2^32] instructions, - otherwise the offset part of the [PC] code pointer could wrap - around, leading to incorrect executions. *) +Definition transf_function (f: Asmblock.function) : res Asm.function := + do c <- unfold (Asmblock.fn_blocks f); + if zlt Ptrofs.max_unsigned (list_length_z c) + then Error (msg "Asmgen.trans_function: code size exceeded") + else OK {| Asm.fn_sig := Asmblock.fn_sig f; Asm.fn_code := c |}. -Definition transl_function (f: Mach.function) := - do c <- transl_code' f f.(Mach.fn_code) true; - OK (mkfunction f.(Mach.fn_sig) - (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) :: - storeptr RA XSP f.(fn_retaddr_ofs) c)). +Definition transf_fundef (f: Asmblock.fundef) : res Asm.fundef := + transf_partial_fundef transf_function f. -Definition transf_function (f: Mach.function) : res Asm.function := - do tf <- transl_function f; - if zlt Ptrofs.max_unsigned (list_length_z tf.(fn_code)) - then Error (msg "code size exceeded") - else OK tf. +Definition transf_program (p: Asmblock.program) : res Asm.program := + transform_partial_program transf_fundef p. -Definition transf_fundef (f: Mach.fundef) : res Asm.fundef := - transf_partial_fundef transf_function f. +End Asmblock_TRANSF. Definition transf_program (p: Mach.program) : res Asm.program := - transform_partial_program transf_fundef p. + let mbp := Machblockgen.transf_program p in + do abp <- Asmblockgen.transf_program mbp; + do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp; + Asmblock_TRANSF.transf_program abp'. diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v index dc0bc509..d27b3f8c 100644 --- a/aarch64/Asmgenproof.v +++ b/aarch64/Asmgenproof.v @@ -1,24 +1,35 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, Collège de France and INRIA Paris *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Correctness proof for AArch64 code generation. *) +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* Léo Gourdin UGA, VERIMAG *) +(* Justus Fasse UGA, VERIMAG *) +(* Xavier Leroy INRIA Paris-Rocquencourt *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) Require Import Coqlib Errors. Require Import Integers Floats AST Linking. Require Import Values Memory Events Globalenvs Smallstep. -Require Import Op Locations Mach Conventions Asm. -Require Import Asmgen Asmgenproof0 Asmgenproof1. +Require Import Op Locations Machblock Conventions Asm Asmblock. +Require Machblockgenproof Asmblockgenproof PostpassSchedulingproof. +Require Import Asmgen. +Require Import Axioms. +Require Import IterList. +Require Import Ring Lia. -Definition match_prog (p: Mach.program) (tp: Asm.program) := +Module Asmblock_PRESERVATION. + +Import Asmblock_TRANSF. + +Definition match_prog (p: Asmblock.program) (tp: Asm.program) := match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp. Lemma transf_program_match: @@ -29,20 +40,36 @@ Qed. Section PRESERVATION. -Variable prog: Mach.program. +Variable prog: Asmblock.program. Variable tprog: Asm.program. Hypothesis TRANSF: match_prog prog tprog. Let ge := Genv.globalenv prog. Let tge := Genv.globalenv tprog. +Definition lk :aarch64_linker := {| Asmblock.symbol_low:=Asm.symbol_low tge; Asmblock.symbol_high:=Asm.symbol_high tge|}. + Lemma symbols_preserved: forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. Proof (Genv.find_symbol_match TRANSF). +Lemma symbol_addresses_preserved: + forall (s: ident) (ofs: ptrofs), + Genv.symbol_address tge s ofs = Genv.symbol_address ge s ofs. +Proof. + intros; unfold Genv.symbol_address; rewrite symbols_preserved; reflexivity. +Qed. + Lemma senv_preserved: Senv.equiv ge tge. Proof (Genv.senv_match TRANSF). +Lemma symbol_high_low: forall (id: ident) (ofs: ptrofs), + Val.addl (Asmblock.symbol_high lk id ofs) (Asmblock.symbol_low lk id ofs) = Genv.symbol_address ge id ofs. +Proof. + unfold lk; simpl. intros; rewrite Asm.symbol_high_low; unfold Genv.symbol_address; + rewrite symbols_preserved; reflexivity. +Qed. + Lemma functions_translated: forall b f, Genv.find_funct_ptr ge b = Some f -> @@ -50,980 +77,2242 @@ Lemma functions_translated: Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf. Proof (Genv.find_funct_ptr_transf_partial TRANSF). -Lemma functions_transl: - forall fb f tf, +Lemma internal_functions_translated: + forall b f, + Genv.find_funct_ptr ge b = Some (Internal f) -> + exists tf, + Genv.find_funct_ptr tge b = Some (Internal tf) /\ transf_function f = OK tf. +Proof. + intros; exploit functions_translated; eauto. + intros (x & FIND & TRANSf). + apply bind_inversion in TRANSf. + destruct TRANSf as (tf & TRANSf & X). + inv X. + eauto. +Qed. + +Lemma internal_functions_unfold: + forall b f, + Genv.find_funct_ptr ge b = Some (Internal f) -> + exists tc, + Genv.find_funct_ptr tge b = Some (Internal (Asm.mkfunction (fn_sig f) tc)) + /\ unfold (fn_blocks f) = OK tc + /\ list_length_z tc <= Ptrofs.max_unsigned. +Proof. + intros. + exploit internal_functions_translated; eauto. + intros (tf & FINDtf & TRANStf). + unfold transf_function in TRANStf. + monadInv TRANStf. + destruct (zlt _ _); try congruence. + inv EQ. inv EQ0. + eexists; intuition eauto. + lia. +Qed. + + +Inductive is_nth_inst (bb: bblock) (n:Z) (i:Asm.instruction): Prop := + | is_nth_label l: + list_nth_z (header bb) n = Some l -> + i = Asm.Plabel l -> + is_nth_inst bb n i + | is_nth_basic bi: + list_nth_z (body bb) (n - list_length_z (header bb)) = Some bi -> + basic_to_instruction bi = OK i -> + is_nth_inst bb n i + | is_nth_ctlflow cfi: + (exit bb) = Some cfi -> + n = size bb - 1 -> + i = control_to_instruction cfi -> + is_nth_inst bb n i. + +(* Asmblock and Asm share the same definition of state *) +Definition match_states (s1 s2 : state) := s1 = s2. + +Inductive match_internal: forall n, state -> state -> Prop := + | match_internal_intro n rs1 m1 rs2 m2 + (MEM: m1 = m2) + (AG: forall r, r <> PC -> rs1 r = rs2 r) + (AGPC: Val.offset_ptr (rs1 PC) (Ptrofs.repr n) = rs2 PC) + : match_internal n (State rs1 m1) (State rs2 m2). + +Lemma match_internal_set_parallel: + forall n rs1 m1 rs2 m2 r val, + match_internal n (State rs1 m1) (State rs2 m2) -> + r <> PC -> + match_internal n (State (rs1#r <- val) m1) (State (rs2#r <- val ) m2). +Proof. + intros n rs1 m1 rs2 m2 r v MI. + inversion MI; constructor; auto. + - intros r' NOTPC. + unfold Pregmap.set; rewrite AG. reflexivity. assumption. + - unfold Pregmap.set; destruct (PregEq.eq PC r); congruence. +Qed. + +Lemma agree_match_states: + forall rs1 m1 rs2 m2, + match_states (State rs1 m1) (State rs2 m2) -> + forall r : preg, rs1#r = rs2#r. +Proof. + intros. + unfold match_states in *. + assert (rs1 = rs2) as EQ. { congruence. } + rewrite EQ. reflexivity. +Qed. + +Lemma match_states_set_parallel: + forall rs1 m1 rs2 m2 r v, + match_states (State rs1 m1) (State rs2 m2) -> + match_states (State (rs1#r <- v) m1) (State (rs2#r <- v) m2). +Proof. + intros; unfold match_states in *. + assert (rs1 = rs2) as RSEQ. { congruence. } + assert (m1 = m2) as MEQ. { congruence. } + rewrite RSEQ in *; rewrite MEQ in *; unfold Pregmap.set; reflexivity. +Qed. + +(* match_internal from match_states *) +Lemma mi_from_ms: + forall rs1 m1 rs2 m2 b ofs, + match_states (State rs1 m1) (State rs2 m2) -> + rs1#PC = Vptr b ofs -> + match_internal 0 (State rs1 m1) (State rs2 m2). +Proof. + intros rs1 m1 rs2 m2 b ofs MS PCVAL. + inv MS; constructor; auto; unfold Val.offset_ptr; + rewrite PCVAL; rewrite Ptrofs.add_zero; reflexivity. +Qed. + +Lemma transf_initial_states: + forall s1, Asmblock.initial_state prog s1 -> + exists s2, Asm.initial_state tprog s2 /\ match_states s1 s2. +Proof. + intros ? INIT_s1. + inversion INIT_s1 as (m, ?, ge0, rs). unfold ge0 in *. + econstructor; split. + - econstructor. + eapply (Genv.init_mem_transf_partial TRANSF); eauto. + - rewrite (match_program_main TRANSF); rewrite symbol_addresses_preserved. + unfold rs; reflexivity. +Qed. + +Lemma transf_final_states: + forall s1 s2 r, + match_states s1 s2 -> Asmblock.final_state s1 r -> Asm.final_state s2 r. +Proof. + intros s1 s2 r MATCH FINAL_s1. + inv FINAL_s1; inv MATCH; constructor; assumption. +Qed. + +Definition max_pos (f : Asm.function) := list_length_z f.(Asm.fn_code). + +Lemma functions_bound_max_pos: forall fb f tf, Genv.find_funct_ptr ge fb = Some (Internal f) -> transf_function f = OK tf -> - Genv.find_funct_ptr tge fb = Some (Internal tf). + max_pos tf <= Ptrofs.max_unsigned. +Proof. + intros fb f tf FINDf TRANSf. + unfold transf_function in TRANSf. + apply bind_inversion in TRANSf. + destruct TRANSf as (c & TRANSf). + destruct TRANSf as (_ & TRANSf). + destruct (zlt _ _). + - inversion TRANSf. + - unfold max_pos. + assert (Asm.fn_code tf = c) as H. { inversion TRANSf as (H'); auto. } + rewrite H; lia. +Qed. + +Lemma one_le_max_unsigned: + 1 <= Ptrofs.max_unsigned. +Proof. + unfold Ptrofs.max_unsigned; simpl; unfold Ptrofs.wordsize; + unfold Wordsize_Ptrofs.wordsize; destruct Archi.ptr64; simpl; lia. +Qed. + +(* NB: does not seem useful anymore, with the [exec_header_simulation] proof below +Lemma match_internal_exec_label: + forall n rs1 m1 rs2 m2 l fb f tf, + Genv.find_funct_ptr ge fb = Some (Internal f) -> + transf_function f = OK tf -> + match_internal n (State rs1 m1) (State rs2 m2) -> + n >= 0 -> + (* There is no step if n is already max_pos *) + n < (max_pos tf) -> + exists rs2' m2', Asm.exec_instr tge tf (Asm.Plabel l) rs2 m2 = Next rs2' m2' + /\ match_internal (n+1) (State rs1 m1) (State rs2' m2'). +Proof. + intros. (* XXX auto generated names *) + unfold Asm.exec_instr. + eexists; eexists; split; eauto. + inversion H1; constructor; auto. + - intros; unfold Asm.nextinstr; unfold Pregmap.set; + destruct (PregEq.eq r PC); auto; contradiction. + - unfold Asm.nextinstr; rewrite Pregmap.gss; unfold Ptrofs.one. + rewrite <- AGPC; rewrite Val.offset_ptr_assoc; unfold Ptrofs.add; + rewrite Ptrofs.unsigned_repr. rewrite Ptrofs.unsigned_repr; trivial. + + split. + * apply Z.le_0_1. + * apply one_le_max_unsigned. + + split. + * apply Z.ge_le; assumption. + * rewrite <- functions_bound_max_pos; eauto; lia. +Qed. +*) + +Lemma incrPC_agree_but_pc: + forall rs r ofs, + r <> PC -> + (incrPC ofs rs)#r = rs#r. +Proof. + intros rs r ofs NOTPC. + unfold incrPC; unfold Pregmap.set; destruct (PregEq.eq r PC). + - contradiction. + - reflexivity. +Qed. + +Lemma bblock_non_empty bb: body bb <> nil \/ exit bb <> None. +Proof. + destruct bb. simpl. + unfold non_empty_bblockb in correct. + unfold non_empty_body, non_empty_exit, Is_true in correct. + destruct body, exit. + - right. discriminate. + - contradiction. + - right. discriminate. + - left. discriminate. +Qed. + +Lemma list_length_z_aux_increase A (l: list A): forall acc, + list_length_z_aux l acc >= acc. +Proof. + induction l; simpl; intros. + - lia. + - generalize (IHl (Z.succ acc)). lia. +Qed. + +Lemma bblock_size_aux_pos bb: list_length_z (body bb) + Z.of_nat (length_opt (exit bb)) >= 1. +Proof. + destruct (bblock_non_empty bb), (body bb) as [|hd tl], (exit bb); simpl; + try (congruence || lia); + unfold list_length_z; simpl; + generalize (list_length_z_aux_increase _ tl 1); lia. +Qed. + + +Lemma list_length_add_acc A (l : list A) acc: + list_length_z_aux l acc = (list_length_z l) + acc. +Proof. + unfold list_length_z, list_length_z_aux. simpl. + fold list_length_z_aux. + rewrite (list_length_z_aux_shift l acc 0). + lia. +Qed. + +Lemma list_length_z_cons A hd (tl : list A): + list_length_z (hd :: tl) = list_length_z tl + 1. Proof. - intros. exploit functions_translated; eauto. intros [tf' [A B]]. - monadInv B. rewrite H0 in EQ; inv EQ; auto. + unfold list_length_z; simpl; rewrite list_length_add_acc; reflexivity. Qed. -(** * Properties of control flow *) +Lemma bblock_size_aux bb: size bb = list_length_z (header bb) + list_length_z (body bb) + Z.of_nat (length_opt (exit bb)). +Proof. + unfold size. + repeat (rewrite list_length_z_nat). repeat (rewrite Nat2Z.inj_add). reflexivity. +Qed. + +Lemma header_size_lt_block_size bb: + list_length_z (header bb) < size bb. +Proof. + rewrite bblock_size_aux. + generalize (bblock_non_empty bb); intros NEMPTY; destruct NEMPTY as [HDR|EXIT]. + - destruct (body bb); try contradiction; rewrite list_length_z_cons; + repeat rewrite list_length_z_nat; lia. + - destruct (exit bb); try contradiction; simpl; repeat rewrite list_length_z_nat; lia. +Qed. -Lemma transf_function_no_overflow: - forall f tf, - transf_function f = OK tf -> list_length_z tf.(fn_code) <= Ptrofs.max_unsigned. +Lemma body_size_le_block_size bb: + list_length_z (body bb) <= size bb. Proof. - intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0. + rewrite bblock_size_aux; repeat rewrite list_length_z_nat; lia. +Qed. + + +Lemma bblock_size_pos bb: size bb >= 1. +Proof. + rewrite (bblock_size_aux bb). + generalize (bblock_size_aux_pos bb). + generalize (list_length_z_pos (header bb)). lia. Qed. -Lemma exec_straight_exec: - forall fb f c ep tf tc c' rs m rs' m', - transl_code_at_pc ge (rs PC) fb f c ep tf tc -> - exec_straight tge tf tc rs m c' rs' m' -> - plus step tge (State rs m) E0 (State rs' m'). -Proof. - intros. inv H. - eapply exec_straight_steps_1; eauto. - eapply transf_function_no_overflow; eauto. - eapply functions_transl; eauto. -Qed. - -Lemma exec_straight_at: - forall fb f c ep tf tc c' ep' tc' rs m rs' m', - transl_code_at_pc ge (rs PC) fb f c ep tf tc -> - transl_code f c' ep' = OK tc' -> - exec_straight tge tf tc rs m tc' rs' m' -> - transl_code_at_pc ge (rs' PC) fb f c' ep' tf tc'. -Proof. - intros. inv H. - exploit exec_straight_steps_2; eauto. - eapply transf_function_no_overflow; eauto. - eapply functions_transl; eauto. - intros [ofs' [PC' CT']]. - rewrite PC'. constructor; auto. -Qed. - -(** The following lemmas show that the translation from Mach to Asm - preserves labels, in the sense that the following diagram commutes: -<< - translation - Mach code ------------------------ Asm instr sequence - | | - | Mach.find_label lbl find_label lbl | - | | - v v - Mach code tail ------------------- Asm instr seq tail - translation ->> - The proof demands many boring lemmas showing that Asm constructor - functions do not introduce new labels. -*) +Lemma unfold_car_cdr bb bbs tc: + unfold (bb :: bbs) = OK tc -> + exists tbb tc', unfold_bblock bb = OK tbb + /\ unfold bbs = OK tc' + /\ unfold (bb :: bbs) = OK (tbb ++ tc'). +Proof. + intros UNFOLD. + assert (UF := UNFOLD). + unfold unfold in UNFOLD. + apply bind_inversion in UNFOLD. destruct UNFOLD as (? & UBB). destruct UBB as (UBB & REST). + apply bind_inversion in REST. destruct REST as (? & UNFOLD'). + fold unfold in UNFOLD'. destruct UNFOLD' as (UNFOLD' & UNFOLD). + rewrite <- UNFOLD in UF. + eauto. +Qed. -Section TRANSL_LABEL. +Lemma unfold_cdr bb bbs tc: + unfold (bb :: bbs) = OK tc -> + exists tc', unfold bbs = OK tc'. +Proof. + intros; exploit unfold_car_cdr; eauto. intros (_ & ? & _ & ? & _). + eexists; eauto. +Qed. -Remark loadimm_z_label: forall sz rd l k, tail_nolabel k (loadimm_z sz rd l k). -Proof. - intros; destruct l as [ | [n1 p1] l]; simpl; TailNoLabel. - induction l as [ | [n p] l]; simpl; TailNoLabel. +Lemma unfold_car bb bbs tc: + unfold (bb :: bbs) = OK tc -> + exists tbb, unfold_bblock bb = OK tbb. +Proof. + intros; exploit unfold_car_cdr; eauto. intros (? & _ & ? & _ & _). + eexists; eauto. Qed. -Remark loadimm_n_label: forall sz rd l k, tail_nolabel k (loadimm_n sz rd l k). -Proof. - intros; destruct l as [ | [n1 p1] l]; simpl; TailNoLabel. - induction l as [ | [n p] l]; simpl; TailNoLabel. +Lemma all_blocks_translated: + forall bbs tc, + unfold bbs = OK tc -> + forall bb, In bb bbs -> + exists c, unfold_bblock bb = OK c. +Proof. + induction bbs as [| bb bbs IHbbs]. + - contradiction. + - intros ? UNFOLD ? IN. + (* unfold proceeds by unfolding the basic block at the head of the list and + * then recurring *) + exploit unfold_car_cdr; eauto. intros (? & ? & ? & ? & _). + (* basic block is either in head or tail *) + inversion IN as [EQ | NEQ]. + + rewrite <- EQ; eexists; eauto. + + eapply IHbbs; eauto. Qed. -Remark loadimm_label: forall sz rd n k, tail_nolabel k (loadimm sz rd n k). +Lemma entire_body_translated: + forall lbi tc, + unfold_body lbi = OK tc -> + forall bi, In bi lbi -> + exists bi', basic_to_instruction bi = OK bi'. Proof. - unfold loadimm; intros. destruct Nat.leb; [apply loadimm_z_label|apply loadimm_n_label]. + induction lbi as [| a lbi IHlbi]. + - intros. contradiction. + - intros tc UNFOLD_BODY bi IN. + unfold unfold_body in UNFOLD_BODY. apply bind_inversion in UNFOLD_BODY. + destruct UNFOLD_BODY as (? & TRANSbi & REST). + apply bind_inversion in REST. destruct REST as (? & UNFOLD_BODY' & ?). + fold unfold_body in UNFOLD_BODY'. + + inversion IN as [EQ | NEQ]. + + rewrite <- EQ; eauto. + + eapply IHlbi; eauto. Qed. -Hint Resolve loadimm_label: labels. -Remark loadimm32_label: forall r n k, tail_nolabel k (loadimm32 r n k). +Lemma bblock_in_bblocks bbs bb: forall + tc pos + (UNFOLD: unfold bbs = OK tc) + (FINDBB: find_bblock pos bbs = Some bb), + In bb bbs. Proof. - unfold loadimm32; intros. destruct (is_logical_imm32 n); TailNoLabel. + induction bbs as [| b bbs IH]. + - intros. inversion FINDBB. + - destruct pos. + + intros. inversion FINDBB as (EQ). rewrite <- EQ. apply in_eq. + + intros. + exploit unfold_cdr; eauto. intros (tc' & UNFOLD'). + unfold find_bblock in FINDBB. simpl in FINDBB. + fold find_bblock in FINDBB. + apply in_cons. eapply IH; eauto. + + intros. inversion FINDBB. Qed. -Hint Resolve loadimm32_label: labels. -Remark loadimm64_label: forall r n k, tail_nolabel k (loadimm64 r n k). +Lemma blocks_translated tc pos bbs bb: forall + (UNFOLD: unfold bbs = OK tc) + (FINDBB: find_bblock pos bbs = Some bb), + exists tbb, unfold_bblock bb = OK tbb. Proof. - unfold loadimm64; intros. destruct (is_logical_imm64 n); TailNoLabel. + intros; exploit bblock_in_bblocks; eauto; intros; + eapply all_blocks_translated; eauto. Qed. -Hint Resolve loadimm64_label: labels. -Remark addimm_aux: forall insn rd r1 n k, - (forall rd r1 n, nolabel (insn rd r1 n)) -> - tail_nolabel k (addimm_aux insn rd r1 n k). +Lemma size_header b pos f bb: forall + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock pos (fn_blocks f) = Some bb), + list_length_z (header bb) <= 1. Proof. - unfold addimm_aux; intros. - destruct Z.eqb. TailNoLabel. destruct Z.eqb; TailNoLabel. + intros. + exploit internal_functions_unfold; eauto. + intros (tc & FINDtf & TRANStf & ?). + exploit blocks_translated; eauto. intros TBB. + + unfold unfold_bblock in TBB. + destruct (zle (list_length_z (header bb)) 1). + - assumption. + - destruct TBB as (? & TBB). discriminate TBB. Qed. -Remark addimm32_label: forall rd r1 n k, tail_nolabel k (addimm32 rd r1 n k). +Lemma list_nth_z_neg A (l: list A): forall n, + n < 0 -> list_nth_z l n = None. Proof. - unfold addimm32; intros. - destruct Int.eq. apply addimm_aux; intros; red; auto. - destruct Int.eq. apply addimm_aux; intros; red; auto. - destruct Int.lt; eapply tail_nolabel_trans; TailNoLabel. + induction l; simpl; auto. + intros n H; destruct (zeq _ _); (try eapply IHl); lia. Qed. -Hint Resolve addimm32_label: labels. -Remark addimm64_label: forall rd r1 n k, tail_nolabel k (addimm64 rd r1 n k). +Lemma find_bblock_neg bbs: forall pos, + pos < 0 -> find_bblock pos bbs = None. Proof. - unfold addimm64; intros. - destruct Int64.eq. apply addimm_aux; intros; red; auto. - destruct Int64.eq. apply addimm_aux; intros; red; auto. - destruct Int64.lt; eapply tail_nolabel_trans; TailNoLabel. + induction bbs; simpl; auto. + intros. destruct (zlt pos 0). { reflexivity. } + destruct (zeq pos 0); contradiction. Qed. -Hint Resolve addimm64_label: labels. -Remark logicalimm32_label: forall insn1 insn2 rd r1 n k, - (forall rd r1 n, nolabel (insn1 rd r1 n)) -> - (forall rd r1 r2 s, nolabel (insn2 rd r1 r2 s)) -> - tail_nolabel k (logicalimm32 insn1 insn2 rd r1 n k). +Lemma equal_header_size bb: + length (header bb) = length (unfold_label (header bb)). Proof. - unfold logicalimm32; intros. - destruct (is_logical_imm32 n). TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. + induction (header bb); auto. + simpl. rewrite IHl. auto. Qed. -Remark logicalimm64_label: forall insn1 insn2 rd r1 n k, - (forall rd r1 n, nolabel (insn1 rd r1 n)) -> - (forall rd r1 r2 s, nolabel (insn2 rd r1 r2 s)) -> - tail_nolabel k (logicalimm64 insn1 insn2 rd r1 n k). +Lemma equal_body_size: + forall bb tb, + unfold_body (body bb) = OK tb -> + length (body bb) = length tb. Proof. - unfold logicalimm64; intros. - destruct (is_logical_imm64 n). TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. + intros bb. induction (body bb). + - simpl. intros ? H. inversion H. auto. + - intros tb H. simpl in H. apply bind_inversion in H. destruct H as (? & BI & TAIL). + apply bind_inversion in TAIL. destruct TAIL as (tb' & BODY' & CONS). inv CONS. + simpl. specialize (IHl tb' BODY'). rewrite IHl. reflexivity. Qed. -Remark move_extended_label: forall rd r1 ex a k, tail_nolabel k (move_extended rd r1 ex a k). +Lemma equal_exit_size bb: + length_opt (exit bb) = length (unfold_exit (exit bb)). Proof. - unfold move_extended, move_extended_base; intros. destruct Int.eq, ex; TailNoLabel. + destruct (exit bb); trivial. Qed. -Hint Resolve move_extended_label: labels. -Remark arith_extended_label: forall insnX insnS rd r1 r2 ex a k, - (forall rd r1 r2 x, nolabel (insnX rd r1 r2 x)) -> - (forall rd r1 r2 s, nolabel (insnS rd r1 r2 s)) -> - tail_nolabel k (arith_extended insnX insnS rd r1 r2 ex a k). +Lemma bblock_size_preserved bb tb: + unfold_bblock bb = OK tb -> + size bb = list_length_z tb. Proof. - unfold arith_extended; intros. destruct Int.ltu. - TailNoLabel. - destruct ex; simpl; TailNoLabel. + unfold unfold_bblock. intros UNFOLD_BBLOCK. + destruct (zle (list_length_z (header bb)) 1). 2: { inversion UNFOLD_BBLOCK. } + apply bind_inversion in UNFOLD_BBLOCK. destruct UNFOLD_BBLOCK as (? & UNFOLD_BODY & CONS). + inversion CONS. + unfold size. + rewrite equal_header_size, equal_exit_size. + erewrite equal_body_size; eauto. + rewrite list_length_z_nat. + repeat (rewrite app_length). + rewrite plus_assoc. auto. Qed. -Remark loadsymbol_label: forall r id ofs k, tail_nolabel k (loadsymbol r id ofs k). +Lemma size_of_blocks_max_pos_aux: + forall bbs tbbs pos bb, + find_bblock pos bbs = Some bb -> + unfold bbs = OK tbbs -> + pos + size bb <= list_length_z tbbs. Proof. - intros; unfold loadsymbol. - destruct (SelectOp.symbol_is_relocatable id); TailNoLabel. destruct Ptrofs.eq; TailNoLabel. -Qed. -Hint Resolve loadsymbol_label: labels. + induction bbs as [| bb ? IHbbs]. + - intros tbbs ? ? FINDBB; inversion FINDBB. + - simpl; intros tbbs pos bb' FINDBB UNFOLD. + apply bind_inversion in UNFOLD; destruct UNFOLD as (tbb & UNFOLD_BBLOCK & H). + apply bind_inversion in H; destruct H as (tbbs' & UNFOLD & CONS). + inv CONS. + destruct (zlt pos 0). { discriminate FINDBB. } + destruct (zeq pos 0). + + inv FINDBB. + exploit bblock_size_preserved; eauto; intros SIZE; rewrite SIZE. + repeat (rewrite list_length_z_nat). rewrite app_length, Nat2Z.inj_add. + lia. + + generalize (IHbbs tbbs' (pos - size bb) bb' FINDBB UNFOLD). intros IH. + exploit bblock_size_preserved; eauto; intros SIZE. + repeat (rewrite list_length_z_nat); rewrite app_length. + rewrite Nat2Z.inj_add; repeat (rewrite <- list_length_z_nat). + lia. +Qed. -Remark transl_cond_label: forall cond args k c, - transl_cond cond args k = OK c -> tail_nolabel k c. +Lemma size_of_blocks_max_pos pos f tf bi: + find_bblock pos (fn_blocks f) = Some bi -> + transf_function f = OK tf -> + pos + size bi <= max_pos tf. Proof. - unfold transl_cond; intros; destruct cond; TailNoLabel. -- destruct is_arith_imm32; TailNoLabel. destruct is_arith_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. -- destruct is_arith_imm32; TailNoLabel. destruct is_arith_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. -- destruct is_logical_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. -- destruct is_logical_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. -- destruct is_arith_imm64; TailNoLabel. destruct is_arith_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. -- destruct is_arith_imm64; TailNoLabel. destruct is_arith_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. -- destruct is_logical_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. -- destruct is_logical_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. + unfold transf_function, max_pos. + intros FINDBB UNFOLD. + apply bind_inversion in UNFOLD. destruct UNFOLD as (? & UNFOLD & H). + destruct (zlt Ptrofs.max_unsigned (list_length_z x)). { discriminate H. } + inv H. simpl. + eapply size_of_blocks_max_pos_aux; eauto. Qed. -Remark transl_cond_branch_default_label: forall cond args lbl k c, - transl_cond_branch_default cond args lbl k = OK c -> tail_nolabel k c. +Lemma unfold_bblock_not_nil bb: + unfold_bblock bb = OK nil -> False. Proof. - unfold transl_cond_branch_default; intros. - eapply tail_nolabel_trans; [eapply transl_cond_label;eauto|TailNoLabel]. + intros. + exploit bblock_size_preserved; eauto. unfold list_length_z; simpl. intros SIZE. + generalize (bblock_size_pos bb). intros SIZE'. lia. Qed. -Hint Resolve transl_cond_branch_default_label: labels. -Remark transl_cond_branch_label: forall cond args lbl k c, - transl_cond_branch cond args lbl k = OK c -> tail_nolabel k c. +(* same proof as list_nth_z_range (Coqlib) *) +Lemma find_instr_range: + forall c n i, + Asm.find_instr n c = Some i -> 0 <= n < list_length_z c. Proof. - unfold transl_cond_branch; intros; destruct args; TailNoLabel; destruct cond; TailNoLabel. -- destruct c0; TailNoLabel. -- destruct c0; TailNoLabel. -- destruct (Int.is_power2 n); TailNoLabel. -- destruct (Int.is_power2 n); TailNoLabel. -- destruct c0; TailNoLabel. -- destruct c0; TailNoLabel. -- destruct (Int64.is_power2' n); TailNoLabel. -- destruct (Int64.is_power2' n); TailNoLabel. + induction c; simpl; intros. + discriminate. + rewrite list_length_z_cons. destruct (zeq n 0). + generalize (list_length_z_pos c); lia. + exploit IHc; eauto. lia. Qed. -Remark transl_op_label: - forall op args r k c, - transl_op op args r k = OK c -> tail_nolabel k c. +Lemma find_instr_tail: + forall tbb pos c i, + Asm.find_instr pos c = Some i -> + Asm.find_instr (pos + list_length_z tbb) (tbb ++ c) = Some i. Proof. - unfold transl_op; intros; destruct op; TailNoLabel. -- destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel. -- destruct (Float.eq_dec n Float.zero); TailNoLabel. -- destruct (Float32.eq_dec n Float32.zero); TailNoLabel. -- apply logicalimm32_label; unfold nolabel; auto. -- apply logicalimm32_label; unfold nolabel; auto. -- apply logicalimm32_label; unfold nolabel; auto. -- unfold shrx32. destruct Int.eq; TailNoLabel. -- apply arith_extended_label; unfold nolabel; auto. -- apply arith_extended_label; unfold nolabel; auto. -- apply logicalimm64_label; unfold nolabel; auto. -- apply logicalimm64_label; unfold nolabel; auto. -- apply logicalimm64_label; unfold nolabel; auto. -- unfold shrx64. destruct Int.eq; TailNoLabel. -- eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel. -- destruct (preg_of r); try discriminate; TailNoLabel; - (eapply tail_nolabel_trans; [eapply transl_cond_label; eauto | TailNoLabel]). + induction tbb as [| ? ? IHtbb]. + - intros. unfold list_length_z; simpl. rewrite Z.add_0_r. assumption. + - intros. rewrite list_length_z_cons. simpl. + destruct (zeq (pos + (list_length_z tbb + 1)) 0). + + exploit find_instr_range; eauto. intros POS_RANGE. + generalize (list_length_z_pos tbb). lia. + + replace (pos + (list_length_z tbb + 1) - 1) with (pos + list_length_z tbb) by lia. + eapply IHtbb; eauto. Qed. -Remark transl_addressing_label: - forall sz addr args insn k c, - transl_addressing sz addr args insn k = OK c -> - (forall ad, nolabel (insn ad)) -> - tail_nolabel k c. +Lemma size_of_blocks_bounds fb pos f bi: + Genv.find_funct_ptr ge fb = Some (Internal f) -> + find_bblock pos (fn_blocks f) = Some bi -> + pos + size bi <= Ptrofs.max_unsigned. Proof. - unfold transl_addressing; intros; destruct addr; TailNoLabel; - eapply tail_nolabel_trans; TailNoLabel. - eapply tail_nolabel_trans. apply arith_extended_label; unfold nolabel; auto. TailNoLabel. + intros; exploit internal_functions_translated; eauto. + intros (tf & _ & TRANSf). + assert (pos + size bi <= max_pos tf). { eapply size_of_blocks_max_pos; eauto. } + assert (max_pos tf <= Ptrofs.max_unsigned). { eapply functions_bound_max_pos; eauto. } + lia. Qed. -Remark transl_load_label: - forall chunk addr args dst k c, - transl_load chunk addr args dst k = OK c -> tail_nolabel k c. +Lemma find_instr_bblock_tail: + forall tbb bb pos c i, + Asm.find_instr pos c = Some i -> + unfold_bblock bb = OK tbb -> + Asm.find_instr (pos + size bb ) (tbb ++ c) = Some i. Proof. - unfold transl_load; intros; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto. + induction tbb. + - intros. exploit unfold_bblock_not_nil; eauto. intros. contradiction. + - intros. simpl. + destruct (zeq (pos + size bb) 0). + + (* absurd *) + exploit find_instr_range; eauto. intros POS_RANGE. + generalize (bblock_size_pos bb). intros SIZE. lia. + + erewrite bblock_size_preserved; eauto. + rewrite list_length_z_cons. + replace (pos + (list_length_z tbb + 1) - 1) with (pos + list_length_z tbb) by lia. + apply find_instr_tail; auto. Qed. -Remark transl_store_label: - forall chunk addr args src k c, - transl_store chunk addr args src k = OK c -> tail_nolabel k c. +Lemma list_nth_z_find_label: + forall (ll : list label) il n l, + list_nth_z ll n = Some l -> + Asm.find_instr n ((unfold_label ll) ++ il) = Some (Asm.Plabel l). Proof. - unfold transl_store; intros; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto. + induction ll. + - intros. inversion H. + - intros. simpl. + destruct (zeq n 0) as [Z | NZ]. + + inversion H as (H'). rewrite Z in H'. simpl in H'. inv H'. reflexivity. + + simpl in H. destruct (zeq n 0). { contradiction. } + apply IHll; auto. Qed. -Remark indexed_memory_access_label: - forall insn sz base ofs k, - (forall ad, nolabel (insn ad)) -> - tail_nolabel k (indexed_memory_access insn sz base ofs k). +Lemma list_nth_z_find_bi: + forall lbi bi tlbi n bi' exit, + list_nth_z lbi n = Some bi -> + unfold_body lbi = OK tlbi -> + basic_to_instruction bi = OK bi' -> + Asm.find_instr n (tlbi ++ exit) = Some bi'. Proof. - unfold indexed_memory_access; intros. destruct offset_representable. - TailNoLabel. - eapply tail_nolabel_trans; TailNoLabel. + induction lbi. + - intros. inversion H. + - simpl. intros. + apply bind_inversion in H0. destruct H0 as (? & ? & ?). + apply bind_inversion in H2. destruct H2 as (? & ? & ?). + destruct (zeq n 0) as [Z | NZ]. + + destruct n. + * inversion H as (BI). rewrite BI in *. + inversion H3. simpl. congruence. + * (* absurd *) congruence. + * (* absurd *) congruence. + + inv H3. simpl. destruct (zeq n 0). { contradiction. } + eapply IHlbi; eauto. Qed. -Remark loadind_label: - forall base ofs ty dst k c, - loadind base ofs ty dst k = OK c -> tail_nolabel k c. +Lemma list_nth_z_find_bi_with_header: + forall ll lbi bi tlbi n bi' (rest : list Asm.instruction), + list_nth_z lbi (n - list_length_z ll) = Some bi -> + unfold_body lbi = OK tlbi -> + basic_to_instruction bi = OK bi' -> + Asm.find_instr n ((unfold_label ll) ++ (tlbi) ++ (rest)) = Some bi'. Proof. - unfold loadind; intros. - destruct ty, (preg_of dst); inv H; apply indexed_memory_access_label; intros; exact I. + induction ll. + - unfold list_length_z. simpl. intros. + replace (n - 0) with n in H by lia. eapply list_nth_z_find_bi; eauto. + - intros. simpl. destruct (zeq n 0). + + rewrite list_length_z_cons in H. rewrite e in H. + replace (0 - (list_length_z ll + 1)) with (-1 - (list_length_z ll)) in H by lia. + generalize (list_length_z_pos ll). intros. + rewrite list_nth_z_neg in H; try lia. inversion H. + + rewrite list_length_z_cons in H. + replace (n - (list_length_z ll + 1)) with (n -1 - (list_length_z ll)) in H by lia. + eapply IHll; eauto. Qed. -Remark storeind_label: - forall src base ofs ty k c, - storeind src base ofs ty k = OK c -> tail_nolabel k c. +(* XXX unused *) +Lemma range_list_nth_z: + forall (A: Type) (l: list A) n, + 0 <= n < list_length_z l -> + exists x, list_nth_z l n = Some x. Proof. - unfold storeind; intros. - destruct ty, (preg_of src); inv H; apply indexed_memory_access_label; intros; exact I. + induction l. + - intros. unfold list_length_z in H. simpl in H. lia. + - intros n. destruct (zeq n 0). + + intros. simpl. destruct (zeq n 0). { eauto. } contradiction. + + intros H. rewrite list_length_z_cons in H. + simpl. destruct (zeq n 0). { contradiction. } + replace (Z.pred n) with (n - 1) by lia. + eapply IHl; lia. Qed. -Remark loadptr_label: - forall base ofs dst k, tail_nolabel k (loadptr base ofs dst k). +Lemma list_nth_z_n_too_big: + forall (A: Type) (l: list A) n, + 0 <= n -> + list_nth_z l n = None -> + n >= list_length_z l. Proof. - intros. apply indexed_memory_access_label. unfold nolabel; auto. + induction l. + - intros. unfold list_length_z. simpl. lia. + - intros. rewrite list_length_z_cons. + simpl in H0. + destruct (zeq n 0) as [N | N]. + + inversion H0. + + (* XXX there must be a more elegant way to prove this simple fact *) + assert (n > 0). { lia. } + assert (0 <= n - 1). { lia. } + generalize (IHl (n - 1)). intros IH. + assert (n - 1 >= list_length_z l). { auto. } + assert (n > list_length_z l); lia. Qed. -Remark storeptr_label: - forall src base ofs k, tail_nolabel k (storeptr src base ofs k). +Lemma find_instr_past_header: + forall labels n rest, + list_nth_z labels n = None -> + Asm.find_instr n (unfold_label labels ++ rest) = + Asm.find_instr (n - list_length_z labels) rest. Proof. - intros. apply indexed_memory_access_label. unfold nolabel; auto. + induction labels as [| label labels' IH]. + - unfold list_length_z; simpl; intros; rewrite Z.sub_0_r; reflexivity. + - intros. simpl. destruct (zeq n 0) as [N | N]. + + rewrite N in H. inversion H. + + rewrite list_length_z_cons. + replace (n - (list_length_z labels' + 1)) with (n - 1 - list_length_z labels') by lia. + simpl in H. destruct (zeq n 0). { contradiction. } + replace (Z.pred n) with (n - 1) in H by lia. + apply IH; auto. Qed. -Remark make_epilogue_label: - forall f k, tail_nolabel k (make_epilogue f k). +(* very similar to find_instr_past_header *) +Lemma find_instr_past_body: + forall lbi n tlbi rest, + list_nth_z lbi n = None -> + unfold_body lbi = OK tlbi -> + Asm.find_instr n (tlbi ++ rest) = + Asm.find_instr (n - list_length_z lbi) rest. Proof. - unfold make_epilogue; intros. eapply tail_nolabel_trans. apply loadptr_label. TailNoLabel. + induction lbi. + - unfold list_length_z; simpl; intros ? ? ? ? H. inv H; rewrite Z.sub_0_r; reflexivity. + - intros n tlib ? NTH UNFOLD_BODY. + unfold unfold_body in UNFOLD_BODY. apply bind_inversion in UNFOLD_BODY. + destruct UNFOLD_BODY as (? & BI & H). + apply bind_inversion in H. destruct H as (? & UNFOLD_BODY' & CONS). + fold unfold_body in UNFOLD_BODY'. inv CONS. + simpl; destruct (zeq n 0) as [N|N]. + + rewrite N in NTH; inversion NTH. + + rewrite list_length_z_cons. + replace (n - (list_length_z lbi + 1)) with (n - 1 - list_length_z lbi) by lia. + simpl in NTH. destruct (zeq n 0). { contradiction. } + replace (Z.pred n) with (n - 1) in NTH by lia. + apply IHlbi; auto. Qed. -Lemma transl_instr_label: - forall f i ep k c, - transl_instr f i ep k = OK c -> - match i with Mlabel lbl => c = Plabel lbl :: k | _ => tail_nolabel k c end. +Lemma n_beyond_body: + forall bb n, + 0 <= n < size bb -> + list_nth_z (header bb) n = None -> + list_nth_z (body bb) (n - list_length_z (header bb)) = None -> + n >= Z.of_nat (length (header bb) + length (body bb)). Proof. - unfold transl_instr; intros; destruct i; TailNoLabel. -- eapply loadind_label; eauto. -- eapply storeind_label; eauto. -- destruct ep. eapply loadind_label; eauto. - eapply tail_nolabel_trans. apply loadptr_label. eapply loadind_label; eauto. -- eapply transl_op_label; eauto. -- eapply transl_load_label; eauto. -- eapply transl_store_label; eauto. -- destruct s0; monadInv H; TailNoLabel. -- destruct s0; monadInv H; (eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel]). -- eapply transl_cond_branch_label; eauto. -- eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel]. + intros. + assert (0 <= n). { lia. } + generalize (list_nth_z_n_too_big label (header bb) n H2 H0). intros. + generalize (list_nth_z_n_too_big _ (body bb) (n - list_length_z (header bb))). intros. + unfold size in H. + + assert (0 <= n - list_length_z (header bb)). { lia. } + assert (n - list_length_z (header bb) >= list_length_z (body bb)). { apply H4; auto. } + + assert (n >= list_length_z (header bb) + list_length_z (body bb)). { lia. } + rewrite Nat2Z.inj_add. + repeat (rewrite <- list_length_z_nat). assumption. Qed. -Lemma transl_instr_label': - forall lbl f i ep k c, - transl_instr f i ep k = OK c -> - find_label lbl c = if Mach.is_label lbl i then Some k else find_label lbl k. +Lemma exec_arith_instr_dont_move_PC ai rs rs': forall + (BASIC: exec_arith_instr lk ai rs = rs'), + rs PC = rs' PC. Proof. - intros. exploit transl_instr_label; eauto. - destruct i; try (intros [A B]; apply B). - intros. subst c. simpl. auto. + destruct ai; simpl; intros; + try (rewrite <- BASIC; rewrite Pregmap.gso; auto; discriminate). + - destruct i; simpl in BASIC; + try destruct (negb _); rewrite <- BASIC; + repeat rewrite Pregmap.gso; try discriminate; reflexivity. + - destruct i; simpl in BASIC. + 1,2: rewrite <- BASIC; repeat rewrite Pregmap.gso; try discriminate; reflexivity. + destruct sz; + try (unfold compare_single in BASIC || unfold compare_float in BASIC); + destruct (rs r1), (rs r2); + try (rewrite <- BASIC; repeat rewrite Pregmap.gso; try (discriminate || reflexivity)). + - destruct i; simpl in BASIC; + destruct is; + try (unfold compare_int in BASIC || unfold compare_long in BASIC); + try (rewrite <- BASIC; repeat rewrite Pregmap.gso; try (discriminate || reflexivity)). + - destruct i; simpl in BASIC; destruct sz; + try (unfold compare_single in BASIC || unfold compare_float in BASIC); + destruct (rs r1); + try (rewrite <- BASIC; repeat rewrite Pregmap.gso; try (discriminate || reflexivity)). + - destruct fsz; rewrite <- BASIC; rewrite Pregmap.gso; try (discriminate || reflexivity). + - destruct fsz; rewrite <- BASIC; rewrite Pregmap.gso; try (discriminate || reflexivity). Qed. -Lemma transl_code_label: - forall lbl f c ep tc, - transl_code f c ep = OK tc -> - match Mach.find_label lbl c with - | None => find_label lbl tc = None - | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_code f c' false = OK tc' - end. +Lemma exec_basic_dont_move_PC bi rs m rs' m': forall + (BASIC: exec_basic lk ge bi rs m = Next rs' m'), + rs PC = rs' PC. Proof. - induction c; simpl; intros. - inv H. auto. - monadInv H. rewrite (transl_instr_label' lbl _ _ _ _ _ EQ0). - generalize (Mach.is_label_correct lbl a). - destruct (Mach.is_label lbl a); intros. - subst a. simpl in EQ. exists x; auto. - eapply IHc; eauto. + destruct bi; simpl; intros. + - inv BASIC. exploit exec_arith_instr_dont_move_PC; eauto. + - unfold exec_load in BASIC. + destruct ld. + + unfold exec_load_rd_a in BASIC. + destruct Mem.loadv. 2: { discriminate BASIC. } + inv BASIC. rewrite Pregmap.gso; try discriminate; auto. + + unfold exec_load_double, is_pair_addressing_mode_correct in BASIC. + destruct a; try discriminate BASIC. + do 2 (destruct Mem.loadv; try discriminate BASIC). + inv BASIC. rewrite Pregmap.gso; try discriminate; auto. + - unfold exec_store in BASIC. + destruct st. + + unfold exec_store_rs_a in BASIC. + destruct Mem.storev. 2: { discriminate BASIC. } + inv BASIC; reflexivity. + + unfold exec_store_double in BASIC. + destruct a; try discriminate BASIC. + do 2 (destruct Mem.storev; try discriminate BASIC). + inv BASIC; reflexivity. + - destruct Mem.alloc, Mem.store. 2: { discriminate BASIC. } + inv BASIC. repeat (rewrite Pregmap.gso; try discriminate). reflexivity. + - destruct Mem.loadv. 2: { discriminate BASIC. } + destruct rs, Mem.free; try discriminate BASIC. + inv BASIC; rewrite Pregmap.gso; try discriminate; auto. + - inv BASIC; rewrite Pregmap.gso; try discriminate; auto. + - inv BASIC; rewrite Pregmap.gso; try discriminate; auto. + - inv BASIC; rewrite Pregmap.gso; try discriminate; auto. + - inv BASIC; rewrite Pregmap.gso; try discriminate; auto. + - inv BASIC; auto. Qed. -Lemma transl_find_label: - forall lbl f tf, - transf_function f = OK tf -> - match Mach.find_label lbl f.(Mach.fn_code) with - | None => find_label lbl tf.(fn_code) = None - | Some c => exists tc, find_label lbl tf.(fn_code) = Some tc /\ transl_code f c false = OK tc +Lemma exec_body_dont_move_PC_aux: + forall bis rs m rs' m' + (BODY: exec_body lk ge bis rs m = Next rs' m'), + rs PC = rs' PC. +Proof. + induction bis. + - intros; inv BODY; reflexivity. + - simpl; intros. + remember (exec_basic lk ge a rs m) as bi eqn:BI; destruct bi. 2: { discriminate BODY. } + symmetry in BI; destruct s in BODY, BI; simpl in BODY, BI. + exploit exec_basic_dont_move_PC; eauto; intros AGPC; rewrite AGPC. + eapply IHbis; eauto. +Qed. + +Lemma exec_body_dont_move_PC bb rs m rs' m': forall + (BODY: exec_body lk ge (body bb) rs m = Next rs' m'), + rs PC = rs' PC. +Proof. apply exec_body_dont_move_PC_aux. Qed. + +Lemma find_instr_bblock: + forall n lb pos bb tlb + (FINDBB: find_bblock pos lb = Some bb) + (UNFOLD: unfold lb = OK tlb) + (SIZE: 0 <= n < size bb), + exists i, is_nth_inst bb n i /\ Asm.find_instr (pos+n) tlb = Some i. +Proof. + induction lb as [| b lb IHlb]. + - intros. inversion FINDBB. + - intros pos bb tlb FINDBB UNFOLD SIZE. + destruct pos. + + inv FINDBB. simpl. + exploit unfold_car_cdr; eauto. intros (tbb & tlb' & UNFOLD_BBLOCK & UNFOLD' & UNFOLD_cons). + rewrite UNFOLD in UNFOLD_cons. inversion UNFOLD_cons. + unfold unfold_bblock in UNFOLD_BBLOCK. + destruct (zle (list_length_z (header bb)) 1). 2: { inversion UNFOLD_BBLOCK. } + apply bind_inversion in UNFOLD_BBLOCK. + destruct UNFOLD_BBLOCK as (? & UNFOLD_BODY & H). + inversion H as (UNFOLD_BBLOCK). + remember (list_nth_z (header bb) n) as label_opt eqn:LBL. destruct label_opt. + * (* nth instruction is a label *) + eexists; split. { eapply is_nth_label; eauto. } + inversion UNFOLD_cons. + symmetry in LBL. + rewrite <- app_assoc. + apply list_nth_z_find_label; auto. + * remember (list_nth_z (body bb) (n - list_length_z (header bb))) as bi_opt eqn:BI. + destruct bi_opt. + -- (* nth instruction is a basic instruction *) + exploit list_nth_z_in; eauto. intros INBB. + exploit entire_body_translated; eauto. intros BI'. + destruct BI'. + eexists; split. + ++ eapply is_nth_basic; eauto. + ++ repeat (rewrite <- app_assoc). eapply list_nth_z_find_bi_with_header; eauto. + -- (* nth instruction is the exit instruction *) + generalize n_beyond_body. intros TEMP. + assert (n >= Z.of_nat (Datatypes.length (header bb) + + Datatypes.length (body bb))) as NGE. { auto. } clear TEMP. + remember (exit bb) as exit_opt eqn:EXIT. destruct exit_opt. + ++ rewrite <- app_assoc. rewrite find_instr_past_header; auto. + rewrite <- app_assoc. erewrite find_instr_past_body; eauto. + assert (SIZE' := SIZE). + unfold size in SIZE. rewrite <- EXIT in SIZE. simpl in SIZE. + destruct SIZE as (LOWER & UPPER). + repeat (rewrite Nat2Z.inj_add in UPPER). + repeat (rewrite <- list_length_z_nat in UPPER). repeat (rewrite Nat2Z.inj_add in NGE). + repeat (rewrite <- list_length_z_nat in NGE). simpl in UPPER. + assert (n = list_length_z (header bb) + list_length_z (body bb)). { lia. } + assert (n = size bb - 1). { + unfold size. rewrite <- EXIT. simpl. + repeat (rewrite Nat2Z.inj_add). repeat (rewrite <- list_length_z_nat). simpl. lia. + } + symmetry in EXIT. + eexists; split. + ** eapply is_nth_ctlflow; eauto. + ** simpl. + destruct (zeq (n - list_length_z (header bb) - list_length_z (body bb)) 0). { reflexivity. } + (* absurd *) lia. + ++ (* absurd *) + unfold size in SIZE. rewrite <- EXIT in SIZE. simpl in SIZE. + destruct SIZE as (? & SIZE'). rewrite Nat.add_0_r in SIZE'. lia. + + unfold find_bblock in FINDBB; simpl in FINDBB; fold find_bblock in FINDBB. + inversion UNFOLD as (UNFOLD'). + apply bind_inversion in UNFOLD'. destruct UNFOLD' as (? & (UNFOLD_BBLOCK' & UNFOLD')). + apply bind_inversion in UNFOLD'. destruct UNFOLD' as (? & (UNFOLD' & TLB)). + inversion TLB. + generalize (IHlb _ _ _ FINDBB UNFOLD'). intros IH. + destruct IH as (? & (IH_is_nth & IH_find_instr)); eauto. + eexists; split. + * apply IH_is_nth. + * replace (Z.pos p + n) with (Z.pos p + n - size b + size b) by lia. + eapply find_instr_bblock_tail; try assumption. + replace (Z.pos p + n - size b) with (Z.pos p - size b + n) by lia. + apply IH_find_instr. + + (* absurd *) + generalize (Pos2Z.neg_is_neg p). intros. exploit (find_bblock_neg (b :: lb)); eauto. + rewrite FINDBB. intros CONTRA. inversion CONTRA. +Qed. + +Lemma exec_header_simulation b ofs f bb rs m: forall + (ATPC: rs PC = Vptr b ofs) + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb), + exists s', star Asm.step tge (State rs m) E0 s' + /\ match_internal (list_length_z (header bb)) (State rs m) s'. +Proof. + intros. + exploit internal_functions_unfold; eauto. + intros (tc & FINDtf & TRANStf & _). + assert (BNDhead: list_length_z (header bb) <= 1). { eapply size_header; eauto. } + destruct (header bb) as [|l[|]] eqn: EQhead. + + (* header nil *) + eexists; split. + - eapply star_refl. + - split; eauto. + unfold list_length_z; rewrite !ATPC; simpl. + rewrite Ptrofs.add_zero; auto. + + (* header one *) + assert (Lhead: list_length_z (header bb) = 1). { rewrite EQhead; unfold list_length_z; simpl. auto. } + exploit (find_instr_bblock 0); eauto. + { generalize (bblock_size_pos bb). lia. } + intros (i & NTH & FIND_INSTR). + inv NTH. + * rewrite EQhead in H; simpl in H. inv H. + replace (Ptrofs.unsigned ofs + 0) with (Ptrofs.unsigned ofs) in FIND_INSTR by lia. + eexists. split. + - eapply star_one. + eapply Asm.exec_step_internal; eauto. + simpl; eauto. + - unfold list_length_z; simpl. split; eauto. + intros r; destruct r; simpl; congruence || auto. + * (* absurd case *) + erewrite list_nth_z_neg in * |-; [ congruence | rewrite Lhead; lia]. + * (* absurd case *) + rewrite bblock_size_aux, Lhead in *. generalize (bblock_size_aux_pos bb). lia. + + (* absurd case *) + unfold list_length_z in BNDhead. simpl in *. + generalize (list_length_z_aux_increase _ l1 2); lia. +Qed. + +Lemma eval_addressing_preserved a rs1 rs2: + (forall r : preg, r <> PC -> rs1 r = rs2 r) -> + eval_addressing lk a rs1 = Asm.eval_addressing tge a rs2. +Proof. + intros EQ. + destruct a; simpl; try (rewrite !EQ; congruence). + auto. +Qed. + +Ltac next_stuck_cong := try (unfold Next, Stuck in *; congruence). + +Ltac inv_ok_eq := + repeat match goal with + | [EQ: OK ?x = OK ?y |- _ ] + => inversion EQ; clear EQ; subst end. + +Ltac reg_rwrt := + match goal with + | [e: DR _ = DR _ |- _ ] + => rewrite e in * + end. + +Ltac destruct_reg_inv := + repeat match goal with + | [ H : match ?reg with _ => _ end = _ |- _ ] + => simpl in *; destruct reg; try congruence; try inv_ok_eq; try reg_rwrt + end. + +Ltac destruct_ireg_inv := + repeat match goal with + | [ H : match ?reg with _ => _ end = _ |- _ ] + => destruct reg as [[r|]|]; try congruence; try inv_ok_eq; subst + end. + +Ltac destruct_reg_size := + simpl in *; + match goal with + | [ |- context [ match ?reg with _ => _ end ] ] + => destruct reg; try congruence + end. + +Ltac find_rwrt_ag := + simpl in *; + match goal with + | [ AG: forall r, r <> ?PC -> _ r = _ r |- _ ] + => repeat rewrite <- AG; try congruence + end. + +Ltac inv_matchi := + match goal with + | [ MATCHI : match_internal _ _ _ |- _ ] + => inversion MATCHI; subst; find_rwrt_ag + end. + +Ltac destruct_ir0_reg := + match goal with + | [ |- context [ ir0 _ _ ?r ] ] + => unfold ir0 in *; destruct r; find_rwrt_ag; eauto + end. + +Ltac pc_not_sp := + match goal with + | [ |- ?PC <> ?SP ] + => destruct (PregEq.eq SP PC); repeat congruence; discriminate + end. + +Ltac update_x_access_x := + subst; rewrite !Pregmap.gss; auto. + +Ltac update_x_access_r := + rewrite !Pregmap.gso; auto. + +Lemma nextinstr_agree_but_pc rs1 rs2: forall + (AG: forall r, r <> PC -> rs1 r = rs2 r), + forall r, r <> PC -> rs1 r = Asm.nextinstr rs2 r. Proof. - intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0. - monadInv EQ. rewrite transl_code'_transl_code in EQ0. unfold fn_code. - simpl. destruct (storeptr_label X30 XSP (fn_retaddr_ofs f) x) as [A B]; rewrite B. - eapply transl_code_label; eauto. + intros; unfold Asm.nextinstr in *; rewrite Pregmap.gso in *; eauto. Qed. -End TRANSL_LABEL. +Lemma ptrofs_nextinstr_agree rs1 rs2 n: forall + (BOUNDED : 0 <= n <= Ptrofs.max_unsigned) + (AGPC : Val.offset_ptr (rs1 PC) (Ptrofs.repr n) = rs2 PC), + Val.offset_ptr (rs1 PC) (Ptrofs.repr (n + 1)) = Asm.nextinstr rs2 PC. +Proof. + intros; unfold Asm.nextinstr; rewrite Pregmap.gss. + rewrite <- Ptrofs.unsigned_one; rewrite <- (Ptrofs.unsigned_repr n); eauto; + rewrite <- Ptrofs.add_unsigned; rewrite <- Val.offset_ptr_assoc; rewrite AGPC; eauto. +Qed. -(** A valid branch in a piece of Mach code translates to a valid ``go to'' - transition in the generated Asm code. *) +Lemma load_rd_a_preserved n rs1 m1 rs1' m1' rs2 m2 rd chk f a: forall + (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) + (MATCHI: match_internal n (State rs1 m1) (State rs2 m2)) + (HLOAD: exec_load_rd_a lk chk f a rd rs1 m1 = Next rs1' m1'), + exists (rs2' : regset) (m2' : mem), Asm.exec_load tge chk f a rd rs2 m2 = Next rs2' m2' + /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2'). +Proof. + intros. + unfold exec_load_rd_a, Asm.exec_load in *. + inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst. + rewrite <- (eval_addressing_preserved a rs1 rs2); auto. + destruct (Mem.loadv _ _ _). + + inversion HLOAD; auto. repeat (econstructor; eauto). + * eapply nextinstr_agree_but_pc; intros. + destruct (PregEq.eq r rd); try update_x_access_x; try update_x_access_r. + * eapply ptrofs_nextinstr_agree; eauto. + + next_stuck_cong. +Qed. + +Lemma load_double_preserved n rs1 m1 rs1' m1' rs2 m2 rd1 rd2 chk1 chk2 f a: forall + (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) + (MATCHI: match_internal n (State rs1 m1) (State rs2 m2)) + (HLOAD: exec_load_double lk chk1 chk2 f a rd1 rd2 rs1 m1 = Next rs1' m1'), + exists (rs2' : regset) (m2' : mem), Asm.exec_load_double tge chk1 chk2 f a rd1 rd2 rs2 m2 = Next rs2' m2' + /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2'). +Proof. + intros. + unfold exec_load_double, Asm.exec_load_double in *. + inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst. + erewrite <- !eval_addressing_preserved; eauto. + destruct (is_pair_addressing_mode_correct a); try discriminate. + destruct (Mem.loadv _ _ _); + destruct (Mem.loadv chk2 m2 + (eval_addressing lk + (get_offset_addr a match chk1 with + | Mint32 | Mfloat32| Many32 => 4 + | _ => 8 + end) rs1)); + inversion HLOAD; auto. + repeat (econstructor; eauto). + * eapply nextinstr_agree_but_pc; intros. + destruct (PregEq.eq r rd2); destruct (PregEq.eq r rd1). + - try update_x_access_x. + - try update_x_access_x. + - subst; repeat rewrite Pregmap.gso, Pregmap.gss; auto. + - try update_x_access_r. + * eapply ptrofs_nextinstr_agree; eauto. +Qed. -Lemma find_label_goto_label: - forall f tf lbl rs m c' b ofs, - Genv.find_funct_ptr ge b = Some (Internal f) -> - transf_function f = OK tf -> - rs PC = Vptr b ofs -> - Mach.find_label lbl f.(Mach.fn_code) = Some c' -> - exists tc', exists rs', - goto_label tf lbl rs m = Next rs' m - /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc' - /\ forall r, r <> PC -> rs'#r = rs#r. -Proof. - intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2. - intros [tc [A B]]. - exploit label_pos_code_tail; eauto. instantiate (1 := 0). - intros [pos' [P [Q R]]]. - exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))). - split. unfold goto_label. rewrite P. rewrite H1. auto. - split. rewrite Pregmap.gss. constructor; auto. - rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q. - auto. lia. - generalize (transf_function_no_overflow _ _ H0). lia. - intros. apply Pregmap.gso; auto. -Qed. - -(** Existence of return addresses *) +Lemma store_rs_a_preserved n rs1 m1 rs1' m1' rs2 m2 v chk a: forall + (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) + (MATCHI: match_internal n (State rs1 m1) (State rs2 m2)) + (HSTORE: exec_store_rs_a lk chk a v rs1 m1 = Next rs1' m1'), + exists (rs2' : regset) (m2' : mem), Asm.exec_store tge chk a v rs2 m2 = Next rs2' m2' + /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2'). +Proof. + intros. + unfold exec_store_rs_a, Asm.exec_store in *. + inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst. + rewrite <- (eval_addressing_preserved a rs1 rs2); auto. + destruct (Mem.storev _ _ _ _). + + inversion HSTORE; auto. repeat (econstructor; eauto). + * eapply nextinstr_agree_but_pc; intros. + subst. apply EQR. auto. + * eapply ptrofs_nextinstr_agree; subst; eauto. + + next_stuck_cong. +Qed. -Lemma return_address_exists: - forall f sg ros c, is_tail (Mcall sg ros :: c) f.(Mach.fn_code) -> - exists ra, return_address_offset f c ra. +Lemma store_double_preserved n rs1 m1 rs1' m1' rs2 m2 v1 v2 chk1 chk2 a: forall + (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) + (MATCHI: match_internal n (State rs1 m1) (State rs2 m2)) + (HSTORE: exec_store_double lk chk1 chk2 a v1 v2 rs1 m1 = Next rs1' m1'), + exists (rs2' : regset) (m2' : mem), Asm.exec_store_double tge chk1 chk2 a v1 v2 rs2 m2 = Next rs2' m2' + /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2'). +Proof. + intros. + unfold exec_store_double, Asm.exec_store_double in *. + inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst. + erewrite <- !eval_addressing_preserved; eauto. + destruct (is_pair_addressing_mode_correct a); try discriminate. + destruct (Mem.storev _ _ _ _); + try destruct (Mem.storev chk2 m + (eval_addressing lk + (get_offset_addr a + match chk1 with + | Mint32 | Mfloat32 | Many32 => 4 + | _ => 8 + end) rs1) v2); + inversion HSTORE; auto. + repeat (econstructor; eauto). + * eapply nextinstr_agree_but_pc; intros. + subst. apply EQR. auto. + * eapply ptrofs_nextinstr_agree; subst; eauto. +Qed. + +Lemma next_inst_preserved n rs1 m1 rs1' m1' rs2 m2 (x: dreg) v: forall + (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) + (MATCHI: match_internal n (State rs1 m1) (State rs2 m2)) + (NEXTI: Next rs1 # x <- v m1 = Next rs1' m1'), + exists (rs2' : regset) (m2' : mem), + Next (Asm.nextinstr rs2 # x <- v) m2 = Next rs2' m2' + /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2'). +Proof. + intros. + inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst. + inversion NEXTI. repeat (econstructor; eauto). + * eapply nextinstr_agree_but_pc; intros. + destruct (PregEq.eq r x); try update_x_access_x; try update_x_access_r. + * eapply ptrofs_nextinstr_agree; eauto. +Qed. + +Lemma match_internal_nextinstr_switch: + forall n s rs2 m2 r v, + r <> PC -> + match_internal n s (State ((Asm.nextinstr rs2)#r <- v) m2) -> + match_internal n s (State (Asm.nextinstr (rs2#r <- v)) m2). Proof. - intros. eapply Asmgenproof0.return_address_exists; eauto. -- intros. exploit transl_instr_label; eauto. - destruct i; try (intros [A B]; apply A). intros. subst c0. repeat constructor. -- intros. monadInv H0. - destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0. monadInv EQ. - rewrite transl_code'_transl_code in EQ0. - exists x; exists true; split; auto. unfold fn_code. - constructor. apply (storeptr_label X30 XSP (fn_retaddr_ofs f0) x). -- exact transf_function_no_overflow. -Qed. - -(** * Proof of semantic preservation *) - -(** Semantic preservation is proved using simulation diagrams - of the following form. -<< - st1 --------------- st2 - | | - t| *|t - | | - v v - st1'--------------- st2' ->> - The invariant is the [match_states] predicate below, which includes: -- The Asm code pointed by the PC register is the translation of - the current Mach code sequence. -- Mach register values and Asm register values agree. + unfold Asm.nextinstr; intros n s rs2 m2 r v NOTPC1 MI. + inversion MI; subst; constructor; auto. + - eapply nextinstr_agree_but_pc; intros. + rewrite AG; try congruence. + destruct (PregEq.eq r r0); try update_x_access_x; try update_x_access_r. + - rewrite !Pregmap.gss, !Pregmap.gso; try congruence. + rewrite AGPC. + rewrite Pregmap.gso, Pregmap.gss; try congruence. +Qed. + +Lemma match_internal_nextinstr_set_parallel: + forall n rs1 m1 rs2 m2 r v1 v2, + r <> PC -> + match_internal n (State rs1 m1) (State (Asm.nextinstr rs2) m2) -> + v1 = v2 -> + match_internal n (State (rs1#r <- v1) m1) (State (Asm.nextinstr (rs2#r <- v2)) m2). +Proof. + intros; subst; eapply match_internal_nextinstr_switch; eauto. + intros; eapply match_internal_set_parallel; eauto. +Qed. + +Lemma exec_basic_simulation: + forall tf n rs1 m1 rs1' m1' rs2 m2 bi tbi + (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) + (BASIC: exec_basic lk ge bi rs1 m1 = Next rs1' m1') + (MATCHI: match_internal n (State rs1 m1) (State rs2 m2)) + (TRANSBI: basic_to_instruction bi = OK tbi), + exists rs2' m2', Asm.exec_instr tge tf tbi + rs2 m2 = Next rs2' m2' + /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2'). +Proof. + intros. + destruct bi. + { (* PArith *) + simpl in *; destruct i. + 1: { + destruct i. + 1,2,3: + try (destruct sumbool_rec; try congruence); + try (monadInv TRANSBI); + try (destruct_reg_inv); + try (inv_matchi); + try (exploit next_inst_preserved; eauto); + try (repeat destruct_reg_size); + try (destruct_ir0_reg). + 1,2: (* Special case for Pfmovimmd / Pfmovimms *) + try (monadInv TRANSBI); + try (destruct_reg_inv); + try (inv_matchi); + inversion BASIC; clear BASIC; subst; + try (destruct (is_immediate_float64 _)); + try (destruct (is_immediate_float32 _)); + eexists; eexists; split; eauto; + repeat (eapply match_internal_nextinstr_set_parallel; try congruence); + try (econstructor; eauto); + try (eapply nextinstr_agree_but_pc; eauto); + try (eapply ptrofs_nextinstr_agree; eauto). + } + 1,2,3,4,5: (* PArithP, PArithPP, PArithPPP, PArithRR0R, PArithRR0, PArithARRRR0 *) + destruct i; + try (destruct sumbool_rec; try congruence); + try (monadInv TRANSBI); + try (destruct_reg_inv); + try (inv_matchi); + try (exploit next_inst_preserved; eauto); + try (repeat destruct_reg_size); + try (destruct_ir0_reg). + { (* PArithComparisonPP *) + destruct i; + try (monadInv TRANSBI); + try (inv_matchi); + try (destruct_reg_inv); + simpl in *. + 1,2: (* compare_long *) + inversion BASIC; clear BASIC; subst; + eexists; eexists; split; eauto; + unfold compare_long; + repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]); + try (econstructor; eauto); + try (eapply nextinstr_agree_but_pc; eauto); + try (eapply ptrofs_nextinstr_agree; eauto). + + destruct sz. + - (* compare_single *) + unfold compare_single in BASIC. + destruct (rs1 x), (rs1 x0); + inversion BASIC; + eexists; eexists; split; eauto; + repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]); + try (econstructor; eauto); + try (eapply nextinstr_agree_but_pc; eauto); + try (eapply ptrofs_nextinstr_agree; eauto). + - (* compare_float *) + unfold compare_float in BASIC. + destruct (rs1 x), (rs1 x0); + inversion BASIC; + eexists; eexists; split; eauto; + repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]); + try (econstructor; eauto); + try (eapply nextinstr_agree_but_pc; eauto); + try (eapply ptrofs_nextinstr_agree; eauto). } + 1,2: (* PArithComparisonR0R, PArithComparisonP *) + destruct i; + try (monadInv TRANSBI); + try (inv_matchi); + try (destruct_reg_inv); + try (destruct_reg_size); + simpl in *; + inversion BASIC; clear BASIC; subst; + eexists; eexists; split; eauto; + unfold compare_long, compare_int, compare_float, compare_single; + try (destruct_reg_size); + repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]); + try (econstructor; eauto); + try (destruct_ir0_reg); + try (eapply nextinstr_agree_but_pc; eauto); + try (eapply ptrofs_nextinstr_agree; eauto). + { (* Pcset *) + try (monadInv TRANSBI); + try (inv_matchi). + try (exploit next_inst_preserved; eauto); + try (simpl in *; intros; + unfold if_opt_bool_val in *; unfold eval_testcond in *; + rewrite <- !AG; try congruence; eauto). } + { (* Pfmovi *) + try (monadInv TRANSBI); + try (inv_matchi); + try (destruct_reg_size); + try (destruct_ir0_reg); + try (exploit next_inst_preserved; eauto). } + { (* Pcsel *) + try (destruct_reg_inv); + try (monadInv TRANSBI); + try (destruct_reg_inv); + try (inv_matchi); + try (exploit next_inst_preserved; eauto); + simpl in *; intros; + unfold if_opt_bool_val in *; unfold eval_testcond in *; + rewrite <- !AG; try congruence; eauto. } + { (* Pfnmul *) + try (monadInv TRANSBI); + try (inv_matchi); + try (destruct_reg_size); + try (exploit next_inst_preserved; eauto); + try (find_rwrt_ag). } } + { (* PLoad *) + destruct ld. + - destruct ld; monadInv TRANSBI; try destruct_ireg_inv; exploit load_rd_a_preserved; eauto; + intros; simpl in *; destruct sz; eauto. + - destruct ld; monadInv TRANSBI; destruct rd1 as [[rd1'|]|]; destruct rd2 as [[rd2'|]|]; + inv EQ; inv EQ1; exploit load_double_preserved; eauto. } + { (* PStore *) + destruct st. + - destruct st; monadInv TRANSBI; try destruct_ireg_inv; exploit store_rs_a_preserved; eauto; + simpl in *; inv_matchi; find_rwrt_ag. + - destruct st; monadInv TRANSBI; destruct rs0 as [[rs0'|]|]; destruct rs3 as [[rs3'|]|]; + inv EQ; inv EQ1; exploit store_double_preserved; eauto; + simpl in *; inv_matchi; find_rwrt_ag. } + { (* Pallocframe *) + monadInv TRANSBI; + inv_matchi; try pc_not_sp; + destruct sz eqn:EQSZ; + destruct Mem.alloc eqn:EQALLOC; + destruct Mem.store eqn:EQSTORE; inversion BASIC; try pc_not_sp; + eexists; eexists; split; eauto; + repeat (eapply match_internal_nextinstr_set_parallel; [ try (pc_not_sp; congruence) | idtac | try (reflexivity)]); + try (econstructor; eauto); + try (eapply nextinstr_agree_but_pc; eauto); + try (eapply ptrofs_nextinstr_agree; eauto). } + { (* Pfreeframe *) + monadInv TRANSBI; + inv_matchi; try pc_not_sp; + destruct sz eqn:EQSZ; + destruct Mem.loadv eqn:EQLOAD; + destruct (rs1 SP) eqn:EQRS1SP; + try (destruct Mem.free eqn:EQFREE); + inversion BASIC; try pc_not_sp; + eexists; eexists; split; eauto; + repeat (eapply match_internal_nextinstr_set_parallel; [ try (pc_not_sp; congruence) | idtac | try (reflexivity)]); + try (econstructor; eauto); + try (eapply nextinstr_agree_but_pc; eauto); + try (eapply ptrofs_nextinstr_agree; eauto). } + 1,2,3,4: (* Ploadsymbol, Pcvtsw2x, Pcvtuw2x, Pcvtx2w *) + try (monadInv TRANSBI); + try (inv_matchi); + try (exploit next_inst_preserved; eauto); + rewrite symbol_addresses_preserved; eauto; + try (find_rwrt_ag). + { (* Pnop *) + monadInv TRANSBI; inv_matchi. + inversion BASIC. + repeat (econstructor; eauto). + eapply nextinstr_agree_but_pc; intros; + try rewrite <- H0, AG; auto. + try eapply ptrofs_nextinstr_agree; auto; rewrite <- H0; + assumption. } +Qed. + +Lemma find_basic_instructions b ofs f bb tc: forall + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb) + (UNFOLD: unfold (fn_blocks f) = OK tc), + forall n, + (n < length (body bb))%nat -> + exists (i : Asm.instruction) (bi : basic), + list_nth_z (body bb) (Z.of_nat n) = Some bi + /\ basic_to_instruction bi = OK i + /\ Asm.find_instr (Ptrofs.unsigned ofs + + (list_length_z (header bb)) + + Z.of_nat n) tc + = Some i. +Proof. + intros until n; intros NLT. + exploit internal_functions_unfold; eauto. + intros (tc' & FINDtf & TRANStf & _). + assert (tc' = tc) by congruence; subst. + exploit (find_instr_bblock (list_length_z (header bb) + Z.of_nat n)); eauto. + { unfold size; split. + - rewrite list_length_z_nat; lia. + - repeat (rewrite list_length_z_nat). repeat (rewrite Nat2Z.inj_add). lia. } + intros (i & NTH & FIND_INSTR). + exists i; intros. + inv NTH. + - (* absurd *) apply list_nth_z_range in H; lia. + - exists bi; + rewrite Z.add_simpl_l in H; + rewrite Z.add_assoc in FIND_INSTR; + intuition. + - (* absurd *) rewrite bblock_size_aux in H0; + rewrite H in H0; simpl in H0; repeat rewrite list_length_z_nat in H0; lia. +Qed. + +(* TODO: remplacer find_basic_instructions directement par ce lemme ? *) +Lemma find_basic_instructions_alt b ofs f bb tc n: forall + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb) + (UNFOLD: unfold (fn_blocks f) = OK tc) + (BOUND: 0 <= n < list_length_z (body bb)), + exists (i : Asm.instruction) (bi : basic), + list_nth_z (body bb) n = Some bi + /\ basic_to_instruction bi = OK i + /\ Asm.find_instr (Ptrofs.unsigned ofs + + (list_length_z (header bb)) + + n) tc + = Some i. +Proof. + intros; assert ((Z.to_nat n) < length (body bb))%nat. + { rewrite Nat2Z.inj_lt, <- list_length_z_nat, Z2Nat.id; try lia. } + exploit find_basic_instructions; eauto. + rewrite Z2Nat.id; try lia. intros (i & bi & X). + eexists; eexists; intuition eauto. +Qed. + +Lemma header_body_tail_bound: forall (a: basic) (li: list basic) bb ofs + (BOUNDBB : Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned) + (BDYLENPOS : 0 <= list_length_z (body bb) - list_length_z (a :: li) < + list_length_z (body bb)), +0 <= list_length_z (header bb) + list_length_z (body bb) - list_length_z (a :: li) <= +Ptrofs.max_unsigned. +Proof. + intros. + assert (HBBPOS: list_length_z (header bb) >= 0) by eapply list_length_z_pos. + assert (HBBSIZE: list_length_z (header bb) < size bb) by eapply header_size_lt_block_size. + assert (OFSBOUND: 0 <= Ptrofs.unsigned ofs <= Ptrofs.max_unsigned) by eapply Ptrofs.unsigned_range_2. + assert (BBSIZE: size bb <= Ptrofs.max_unsigned) by lia. + unfold size in BBSIZE. + rewrite !Nat2Z.inj_add in BBSIZE. + rewrite <- !list_length_z_nat in BBSIZE. + lia. +Qed. + +(* A more general version of the exec_body_simulation_plus lemma below. + This generalization is necessary for the induction proof inside the body. *) +Lemma exec_body_simulation_plus_gen li: forall b ofs f bb rs m s2 rs' m' + (BLI: is_tail li (body bb)) + (ATPC: rs PC = Vptr b ofs) + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb) + (NEMPTY_BODY: li <> nil) + (MATCHI: match_internal ((list_length_z (header bb)) + (list_length_z (body bb)) - (list_length_z li)) (State rs m) s2) + (BODY: exec_body lk ge li rs m = Next rs' m'), + exists s2', plus Asm.step tge s2 E0 s2' + /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'. +Proof. + induction li as [|a li]; simpl; try congruence. + intros. + assert (BDYLENPOS: 0 <= (list_length_z (body bb) - list_length_z (a::li)) < list_length_z (body bb)). { + assert (Z.of_nat O < list_length_z (a::li) <= list_length_z (body bb)); try lia. + rewrite !list_length_z_nat; split. + - rewrite <- Nat2Z.inj_lt. simpl. lia. + - rewrite <- Nat2Z.inj_le; eapply is_tail_bound; eauto. + } + exploit internal_functions_unfold; eauto. + intros (tc & FINDtf & TRANStf & _). + exploit find_basic_instructions_alt; eauto. + intros (tbi & (bi & (NTHBI & TRANSBI & FIND_INSTR))). + exploit is_tail_list_nth_z; eauto. + rewrite NTHBI; simpl. + intros X; inversion X; subst; clear X NTHBI. + destruct (exec_basic _ _ _ _ _) eqn:EXEC_BASIC; next_stuck_cong. + destruct s as (rs1 & m1); simpl in *. + destruct s2 as (rs2 & m2); simpl in *. + assert (BOUNDBBMAX: Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned) + by (eapply size_of_blocks_bounds; eauto). + exploit header_body_tail_bound; eauto. intros BDYTAIL. + exploit exec_basic_simulation; eauto. + intros (rs_next' & m_next' & EXEC_INSTR & MI_NEXT). + exploit exec_basic_dont_move_PC; eauto. intros AGPC. + inversion MI_NEXT as [A B C D E M_NEXT_AGREE RS_NEXT_AGREE ATPC_NEXT PC_OFS_NEXT RS RS']. + subst A. subst B. subst C. subst D. subst E. + rewrite ATPC in AGPC. symmetry in AGPC, ATPC_NEXT. + + inv MATCHI. symmetry in AGPC0. + rewrite ATPC in AGPC0. + unfold Val.offset_ptr in AGPC0. + + simpl in FIND_INSTR. + (* Execute internal step. *) + exploit (Asm.exec_step_internal tge b); eauto. + { + rewrite Ptrofs.add_unsigned. + repeat (rewrite Ptrofs.unsigned_repr); try lia. + 2: { + assert (BOUNDOFS: 0 <= Ptrofs.unsigned ofs <= Ptrofs.max_unsigned) by eapply Ptrofs.unsigned_range_2. + assert (list_length_z (body bb) <= size bb) by eapply body_size_le_block_size. + assert (list_length_z (header bb) <= 1). { eapply size_header; eauto. } + lia. } + try rewrite list_length_z_nat; try split; + simpl; rewrite <- !list_length_z_nat; + replace (Ptrofs.unsigned ofs + (list_length_z (header bb) + list_length_z (body bb) - + list_length_z (a :: li))) with (Ptrofs.unsigned ofs + list_length_z (header bb) + + (list_length_z (body bb) - list_length_z (a :: li))) by lia; + try assumption; try lia. } + + (* This is our STEP hypothesis. *) + intros STEP_NEXT. + destruct li as [|a' li]; simpl in *. + - (* case of a single instruction in li: this our base case in the induction *) + inversion BODY; subst. + eexists; split. + + apply plus_one. eauto. + + constructor; auto. + rewrite ATPC_NEXT. + apply f_equal. + apply f_equal. + rewrite bblock_size_aux, list_length_z_cons; simpl. + lia. + - exploit (IHli b ofs f bb rs1 m_next' (State rs_next' m_next')); congruence || eauto. + + exploit is_tail_app_def; eauto. + intros (l3 & EQ); rewrite EQ. + exploit (is_tail_app_right (l3 ++ a::nil)). + rewrite <- app_assoc; simpl; eauto. + + constructor; auto. + rewrite ATPC_NEXT. + apply f_equal. + apply f_equal. + rewrite! list_length_z_cons; simpl. + lia. + + intros (s2' & LAST_STEPS & LAST_MATCHS). + eexists. split; eauto. + eapply plus_left'; eauto. +Qed. -Inductive match_states: Mach.state -> Asm.state -> Prop := - | match_states_intro: - forall s fb sp c ep ms m m' rs f tf tc - (STACKS: match_stack ge s) - (FIND: Genv.find_funct_ptr ge fb = Some (Internal f)) - (MEXT: Mem.extends m m') - (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc) - (AG: agree ms sp rs) - (DXP: ep = true -> rs#X29 = parent_sp s), - match_states (Mach.State s fb sp c ms m) - (Asm.State rs m') - | match_states_call: - forall s fb ms m m' rs - (STACKS: match_stack ge s) - (MEXT: Mem.extends m m') - (AG: agree ms (parent_sp s) rs) - (ATPC: rs PC = Vptr fb Ptrofs.zero) - (ATLR: rs RA = parent_ra s), - match_states (Mach.Callstate s fb ms m) - (Asm.State rs m') - | match_states_return: - forall s ms m m' rs - (STACKS: match_stack ge s) - (MEXT: Mem.extends m m') - (AG: agree ms (parent_sp s) rs) - (ATPC: rs PC = parent_ra s), - match_states (Mach.Returnstate s ms m) - (Asm.State rs m'). - -Lemma exec_straight_steps: - forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2, - match_stack ge s -> - Mem.extends m2 m2' -> - Genv.find_funct_ptr ge fb = Some (Internal f) -> - transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc -> - (forall k c (TR: transl_instr f i ep k = OK c), - exists rs2, - exec_straight tge tf c rs1 m1' k rs2 m2' - /\ agree ms2 sp rs2 - /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s)) -> - exists st', - plus step tge (State rs1 m1') E0 st' /\ - match_states (Mach.State s fb sp c ms2 m2) st'. -Proof. - intros. inversion H2. subst. monadInv H7. - exploit H3; eauto. intros [rs2 [A [B C]]]. - exists (State rs2 m2'); split. - eapply exec_straight_exec; eauto. - econstructor; eauto. eapply exec_straight_at; eauto. -Qed. - -Lemma exec_straight_steps_goto: - forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2 lbl c', - match_stack ge s -> - Mem.extends m2 m2' -> - Genv.find_funct_ptr ge fb = Some (Internal f) -> - Mach.find_label lbl f.(Mach.fn_code) = Some c' -> - transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc -> - it1_is_parent ep i = false -> - (forall k c (TR: transl_instr f i ep k = OK c), - exists jmp, exists k', exists rs2, - exec_straight tge tf c rs1 m1' (jmp :: k') rs2 m2' - /\ agree ms2 sp rs2 - /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') -> - exists st', - plus step tge (State rs1 m1') E0 st' /\ - match_states (Mach.State s fb sp c' ms2 m2) st'. -Proof. - intros. inversion H3. subst. monadInv H9. - exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]]. - generalize (functions_transl _ _ _ H7 H8); intro FN. - generalize (transf_function_no_overflow _ _ H8); intro NOOV. - exploit exec_straight_steps_2; eauto. - intros [ofs' [PC2 CT2]]. - exploit find_label_goto_label; eauto. - intros [tc' [rs3 [GOTO [AT' OTH]]]]. - exists (State rs3 m2'); split. - eapply plus_right'. - eapply exec_straight_steps_1; eauto. - econstructor; eauto. - eapply find_instr_tail. eauto. - rewrite C. eexact GOTO. - traceEq. - econstructor; eauto. - apply agree_exten with rs2; auto with asmgen. - congruence. -Qed. - -Lemma exec_straight_opt_steps_goto: - forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2 lbl c', - match_stack ge s -> - Mem.extends m2 m2' -> - Genv.find_funct_ptr ge fb = Some (Internal f) -> - Mach.find_label lbl f.(Mach.fn_code) = Some c' -> - transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc -> - it1_is_parent ep i = false -> - (forall k c (TR: transl_instr f i ep k = OK c), - exists jmp, exists k', exists rs2, - exec_straight_opt tge tf c rs1 m1' (jmp :: k') rs2 m2' - /\ agree ms2 sp rs2 - /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') -> - exists st', - plus step tge (State rs1 m1') E0 st' /\ - match_states (Mach.State s fb sp c' ms2 m2) st'. -Proof. - intros. inversion H3. subst. monadInv H9. - exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]]. - generalize (functions_transl _ _ _ H7 H8); intro FN. - generalize (transf_function_no_overflow _ _ H8); intro NOOV. - inv A. -- exploit find_label_goto_label; eauto. - intros [tc' [rs3 [GOTO [AT' OTH]]]]. - exists (State rs3 m2'); split. - apply plus_one. econstructor; eauto. - eapply find_instr_tail. eauto. - rewrite C. eexact GOTO. - econstructor; eauto. - apply agree_exten with rs2; auto with asmgen. - congruence. -- exploit exec_straight_steps_2; eauto. - intros [ofs' [PC2 CT2]]. - exploit find_label_goto_label; eauto. - intros [tc' [rs3 [GOTO [AT' OTH]]]]. - exists (State rs3 m2'); split. - eapply plus_right'. - eapply exec_straight_steps_1; eauto. - econstructor; eauto. - eapply find_instr_tail. eauto. - rewrite C. eexact GOTO. - traceEq. - econstructor; eauto. - apply agree_exten with rs2; auto with asmgen. - congruence. -Qed. - -(** We need to show that, in the simulation diagram, we cannot - take infinitely many Mach transitions that correspond to zero - transitions on the Asm side. Actually, all Mach transitions - correspond to at least one Asm transition, except the - transition from [Machsem.Returnstate] to [Machsem.State]. - So, the following integer measure will suffice to rule out - the unwanted behaviour. *) - -Definition measure (s: Mach.state) : nat := - match s with - | Mach.State _ _ _ _ _ _ => 0%nat - | Mach.Callstate _ _ _ _ => 0%nat - | Mach.Returnstate _ _ _ => 1%nat +Lemma exec_body_simulation_plus b ofs f bb rs m s2 rs' m': forall + (ATPC: rs PC = Vptr b ofs) + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb) + (NEMPTY_BODY: body bb <> nil) + (MATCHI: match_internal (list_length_z (header bb)) (State rs m) s2) + (BODY: exec_body lk ge (body bb) rs m = Next rs' m'), + exists s2', plus Asm.step tge s2 E0 s2' + /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'. +Proof. + intros. + exploit exec_body_simulation_plus_gen; eauto. + - constructor. + - replace (list_length_z (header bb) + list_length_z (body bb) - list_length_z (body bb)) with (list_length_z (header bb)); auto. + lia. +Qed. + +Lemma exec_body_simulation_star b ofs f bb rs m s2 rs' m': forall + (ATPC: rs PC = Vptr b ofs) + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb) + (MATCHI: match_internal (list_length_z (header bb)) (State rs m) s2) + (BODY: exec_body lk ge (body bb) rs m = Next rs' m'), + exists s2', star Asm.step tge s2 E0 s2' + /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'. +Proof. + intros. + destruct (body bb) eqn: Hbb. + - simpl in BODY. inv BODY. + eexists. split. + eapply star_refl; eauto. + assert (EQ: (size bb - Z.of_nat (length_opt (exit bb))) = list_length_z (header bb)). + { rewrite bblock_size_aux. rewrite Hbb; unfold list_length_z; simpl. lia. } + rewrite EQ; eauto. + - exploit exec_body_simulation_plus; congruence || eauto. + { rewrite Hbb; eauto. } + intros (s2' & PLUS & MATCHI'). + eexists; split; eauto. + eapply plus_star; eauto. +Qed. + +Lemma list_nth_z_range_exceeded A (l : list A) n: + n >= list_length_z l -> + list_nth_z l n = None. +Proof. + intros N. + remember (list_nth_z l n) as opt eqn:H. symmetry in H. + destruct opt; auto. + exploit list_nth_z_range; eauto. lia. +Qed. + +Lemma label_in_header_list lbl a: + is_label lbl a = true -> list_length_z (header a) <= 1 -> header a = lbl :: nil. +Proof. + intros. + eapply is_label_correct_true in H. + destruct (header a). + - eapply in_nil in H. contradiction. + - rewrite list_length_z_cons in H0. + assert (list_length_z l0 >= 0) by eapply list_length_z_pos. + assert (list_length_z l0 = 0) by lia. + rewrite list_length_z_nat in H2. + assert (Datatypes.length l0 = 0%nat) by lia. + eapply length_zero_iff_nil in H3. subst. + unfold In in H. destruct H. + + subst; eauto. + + destruct H. +Qed. + +Lemma no_label_in_basic_inst: forall a lbl x, + basic_to_instruction a = OK x -> Asm.is_label lbl x = false. +Proof. + intros. + destruct a; simpl in *; + repeat destruct i; + repeat destruct ld; repeat destruct st; + simpl in *; + try (try destruct_reg_inv; monadInv H; simpl in *; reflexivity). +Qed. + +Lemma label_pos_body bdy: forall c1 c2 z ex lbl + (HUNF : unfold_body bdy = OK c2), + Asm.label_pos lbl (z + Z.of_nat ((Datatypes.length bdy) + length_opt ex)) c1 = Asm.label_pos lbl (z) ((c2 ++ unfold_exit ex) ++ c1). +Proof. + induction bdy. + - intros. inversion HUNF. simpl in *. + destruct ex eqn:EQEX. + + simpl in *. unfold Asm.is_label. destruct c; simpl; try congruence. + destruct i; simpl; try congruence. + + simpl in *. ring_simplify (z + 0). auto. + - intros. inversion HUNF; clear HUNF. monadInv H0. simpl in *. + erewrite no_label_in_basic_inst; eauto. rewrite <- IHbdy; eauto. + erewrite Zpos_P_of_succ_nat. + apply f_equal2; auto. lia. +Qed. + +Lemma asm_label_pos_header: forall z a x0 x1 lbl + (HUNF: unfold_body (body a) = OK x1), + Asm.label_pos lbl (z + size a) x0 = + Asm.label_pos lbl (z + list_length_z (header a)) ((x1 ++ unfold_exit (exit a)) ++ x0). +Proof. + intros. + unfold size. + rewrite <- plus_assoc. rewrite Nat2Z.inj_add. + rewrite list_length_z_nat. + replace (z + (Z.of_nat (Datatypes.length (header a)) + Z.of_nat (Datatypes.length (body a) + length_opt (exit a)))) with (z + Z.of_nat (Datatypes.length (header a)) + Z.of_nat (Datatypes.length (body a) + length_opt (exit a))) by lia. + eapply (label_pos_body (body a) x0 x1 (z + Z.of_nat (Datatypes.length (header a))) (exit a) lbl). auto. +Qed. + +Lemma header_size_cons_nil: forall (l0: label) (l1: list label) + (HSIZE: list_length_z (l0 :: l1) <= 1), + l1 = nil. +Proof. + intros. + destruct l1; try congruence. rewrite !list_length_z_cons in HSIZE. + assert (list_length_z l1 >= 0) by eapply list_length_z_pos. + assert (list_length_z l1 + 1 + 1 >= 2) by lia. + assert (2 <= 1) by lia. contradiction H1. lia. +Qed. + +Lemma label_pos_preserved_gen bbs: forall lbl c z + (HUNF: unfold bbs = OK c), + label_pos lbl z bbs = Asm.label_pos lbl z c. +Proof. + induction bbs. + - intros. simpl in *. inversion HUNF. simpl. reflexivity. + - intros. simpl in *. monadInv HUNF. unfold unfold_bblock in EQ. + destruct (zle _ _); try congruence. monadInv EQ. + destruct (is_label _ _) eqn:EQLBL. + + erewrite label_in_header_list; eauto. + simpl in *. destruct (peq lbl lbl); try congruence. + + erewrite IHbbs; eauto. + rewrite (asm_label_pos_header z a x0 x1 lbl); auto. + unfold is_label in *. + destruct (header a). + * replace (z + list_length_z (@nil label)) with (z); eauto. + unfold list_length_z. simpl. lia. + * eapply header_size_cons_nil in l as HL1. + subst. simpl in *. destruct (in_dec _ _); try congruence. + simpl in *. + destruct (peq _ _); try intuition congruence. +Qed. + +Lemma label_pos_preserved f lbl z tf: forall + (FINDF: transf_function f = OK tf), + label_pos lbl z (fn_blocks f) = Asm.label_pos lbl z (Asm.fn_code tf). +Proof. + intros. + eapply label_pos_preserved_gen. + unfold transf_function in FINDF. monadInv FINDF. + destruct zlt; try congruence. inversion EQ0. eauto. +Qed. + +Lemma goto_label_preserved bb rs1 m1 rs1' m1' rs2 m2 lbl f tf v: forall + (FINDF: transf_function f = OK tf) + (BOUNDED: size bb <= Ptrofs.max_unsigned) + (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)) + (HGOTO: goto_label f lbl (incrPC v rs1) m1 = Next rs1' m1'), + exists (rs2' : regset) (m2' : mem), Asm.goto_label tf lbl rs2 m2 = Next rs2' m2' + /\ match_states (State rs1' m1') (State rs2' m2'). +Proof. + intros. + unfold goto_label, Asm.goto_label in *. + rewrite <- (label_pos_preserved f); auto. + inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst. + destruct label_pos; next_stuck_cong. + destruct (incrPC v rs1 PC) eqn:INCRPC; next_stuck_cong. + inversion HGOTO; auto. repeat (econstructor; eauto). + rewrite <- EQPC. + unfold incrPC in *. + rewrite !Pregmap.gss in *. + destruct (rs1 PC) eqn:EQRS1; simpl in *; try congruence. + replace (rs2 # PC <- (Vptr b0 (Ptrofs.repr z))) with ((rs1 # PC <- (Vptr b0 (Ptrofs.add i0 v))) # PC <- (Vptr b (Ptrofs.repr z))); auto. + eapply functional_extensionality. intros. + destruct (PregEq.eq x PC); subst. + rewrite !Pregmap.gss. congruence. + rewrite !Pregmap.gso; auto. +Qed. + +Lemma next_inst_incr_pc_preserved bb rs1 m1 rs1' m1' rs2 m2 f tf: forall + (FINDF: transf_function f = OK tf) + (BOUNDED: size bb <= Ptrofs.max_unsigned) + (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)) + (NEXT: Next (incrPC (Ptrofs.repr (size bb)) rs1) m2 = Next rs1' m1'), + exists (rs2' : regset) (m2' : mem), + Next (Asm.nextinstr rs2) m2 = Next rs2' m2' + /\ match_states (State rs1' m1') (State rs2' m2'). +Proof. + intros; simpl in *; unfold incrPC in NEXT; + inv_matchi; + assert (size bb >= 1) by eapply bblock_size_pos; + assert (0 <= size bb - 1 <= Ptrofs.max_unsigned) by lia; + inversion NEXT; subst; + eexists; eexists; split; eauto. + assert (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb))) = Asm.nextinstr rs2). { + unfold Pregmap.set. apply functional_extensionality. + intros x. destruct (PregEq.eq x PC). + -- unfold Asm.nextinstr. rewrite <- AGPC. + rewrite Val.offset_ptr_assoc. rewrite Ptrofs.add_unsigned. + rewrite (Ptrofs.unsigned_repr (size bb - 1)); try lia. + rewrite Ptrofs.unsigned_one. + replace (size bb - 1 + 1) with (size bb) by lia. + rewrite e. rewrite Pregmap.gss. + reflexivity. + -- eapply nextinstr_agree_but_pc; eauto. } + rewrite H1. econstructor. +Qed. + +Lemma pc_reg_overwrite: forall (r: ireg) rs1 m1 rs2 m2 bb + (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)), + rs2 # PC <- (rs2 r) = + (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb)))) # PC <- + (rs1 r). +Proof. + intros. + unfold Pregmap.set; apply functional_extensionality. + intros x; destruct (PregEq.eq x PC) as [X | X]; try discriminate; inv_matchi. +Qed. + +Lemma exec_cfi_simulation: + forall bb f tf rs1 m1 rs1' m1' rs2 m2 cfi + (SIZE: size bb <= Ptrofs.max_unsigned) + (FINDF: transf_function f = OK tf) + (* Warning: Asmblock's PC is assumed to be already pointing on the next instruction ! *) + (CFI: exec_cfi ge f cfi (incrPC (Ptrofs.repr (size bb)) rs1) m1 = Next rs1' m1') + (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)), + exists rs2' m2', Asm.exec_instr tge tf (cf_instruction_to_instruction cfi) + rs2 m2 = Next rs2' m2' + /\ match_states (State rs1' m1') (State rs2' m2'). +Proof. + intros. + assert (BBPOS: size bb >= 1) by eapply bblock_size_pos. + destruct cfi; inv CFI; simpl. + - (* Pb *) + exploit goto_label_preserved; eauto. + - (* Pbc *) + inv_matchi. + unfold eval_testcond in *. destruct c; + erewrite !incrPC_agree_but_pc in H0; try rewrite <- !AG; try congruence. + all: + destruct_reg_size; + try destruct b eqn:EQB. + 1,4,7,10,13,16,19,22,25,28,31,34: + exploit goto_label_preserved; eauto. + 1,3,5,7,9,11,13,15,17,19,21,23: + exploit next_inst_incr_pc_preserved; eauto. + all: repeat (econstructor; eauto). + - (* Pbl *) + eexists; eexists; split; eauto. + assert ( ((incrPC (Ptrofs.repr (size bb)) rs1) # X30 <- (incrPC (Ptrofs.repr (size bb)) rs1 PC)) + # PC <- (Genv.symbol_address ge id Ptrofs.zero) + = (rs2 # X30 <- (Val.offset_ptr (rs2 PC) Ptrofs.one)) + # PC <- (Genv.symbol_address tge id Ptrofs.zero) + ) as EQRS. { + unfold incrPC. unfold Pregmap.set. simpl. apply functional_extensionality. + intros x. destruct (PregEq.eq x PC). + * rewrite symbol_addresses_preserved. reflexivity. + * destruct (PregEq.eq x X30). + -- inv MATCHI. rewrite <- AGPC. rewrite Val.offset_ptr_assoc. + unfold Ptrofs.add, Ptrofs.one. repeat (rewrite Ptrofs.unsigned_repr); try lia. + replace (size bb - 1 + 1) with (size bb) by lia. reflexivity. + -- inv MATCHI; rewrite AG; try assumption; reflexivity. + } rewrite EQRS; inv MATCHI; reflexivity. + - (* Pbs *) + eexists; eexists; split; eauto. + assert ( (incrPC (Ptrofs.repr (size bb)) rs1) # PC <- + (Genv.symbol_address ge id Ptrofs.zero) + = rs2 # PC <- (Genv.symbol_address tge id Ptrofs.zero) + ) as EQRS. { + unfold incrPC, Pregmap.set. rewrite symbol_addresses_preserved. inv MATCHI. + apply functional_extensionality. intros x. destruct (PregEq.eq x PC); auto. + } rewrite EQRS; inv MATCHI; reflexivity. + - (* Pblr *) + eexists; eexists; split; eauto. + unfold incrPC. rewrite Pregmap.gss. rewrite Pregmap.gso; try discriminate. + assert ( (rs2 # X30 <- (Val.offset_ptr (rs2 PC) Ptrofs.one)) # PC <- (rs2 r) + = ((rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb)))) + # X30 <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb)))) + # PC <- (rs1 r) + ) as EQRS. { + unfold Pregmap.set. apply functional_extensionality. + intros x; destruct (PregEq.eq x PC) as [X | X]. + - inv_matchi; rewrite AG; auto. + - destruct (PregEq.eq x X30) as [X' | X']. + + inversion MATCHI; subst. rewrite <- AGPC. + rewrite Val.offset_ptr_assoc. unfold Ptrofs.one. + rewrite Ptrofs.add_unsigned. rewrite Ptrofs.unsigned_repr; try lia. rewrite Ptrofs.unsigned_repr; try lia. + rewrite Z.sub_add; reflexivity. + + inv_matchi. + } rewrite EQRS. inv_matchi. + - (* Pbr *) + eexists; eexists; split; eauto. + unfold incrPC. rewrite Pregmap.gso; try discriminate. + rewrite (pc_reg_overwrite r rs1 m1' rs2 m2 bb); auto. + inv_matchi. + - (* Pret *) + eexists; eexists; split; eauto. + unfold incrPC. rewrite Pregmap.gso; try discriminate. + rewrite (pc_reg_overwrite r rs1 m1' rs2 m2 bb); auto. + inv_matchi. + - (* Pcbnz *) + inv_matchi. + unfold eval_neg_branch in *. + erewrite incrPC_agree_but_pc in H0; try congruence. + destruct eval_testzero; next_stuck_cong. + destruct b. + * exploit next_inst_incr_pc_preserved; eauto. + * exploit goto_label_preserved; eauto. + - (* Pcbz *) + inv_matchi. + unfold eval_branch in *. + erewrite incrPC_agree_but_pc in H0; try congruence. + destruct eval_testzero; next_stuck_cong. + destruct b. + * exploit goto_label_preserved; eauto. + * exploit next_inst_incr_pc_preserved; eauto. + - (* Ptbnbz *) + inv_matchi. + unfold eval_branch in *. + erewrite incrPC_agree_but_pc in H0; try congruence. + destruct eval_testbit; next_stuck_cong. + destruct b. + * exploit goto_label_preserved; eauto. + * exploit next_inst_incr_pc_preserved; eauto. + - (* Ptbz *) + inv_matchi. + unfold eval_neg_branch in *. + erewrite incrPC_agree_but_pc in H0; try congruence. + destruct eval_testbit; next_stuck_cong. + destruct b. + * exploit next_inst_incr_pc_preserved; eauto. + * exploit goto_label_preserved; eauto. + - (* Pbtbl *) + assert (rs2 # X16 <- Vundef r1 = (incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef r1) + as EQUNDEFX16. { + unfold incrPC, Pregmap.set. + destruct (PregEq.eq r1 X16) as [X16 | X16]; auto. + destruct (PregEq.eq r1 PC) as [PC' | PC']; try discriminate. + inv MATCHI; rewrite AG; auto. + } rewrite <- EQUNDEFX16 in H0. + destruct_reg_inv; next_stuck_cong. + unfold goto_label, Asm.goto_label in *. + rewrite <- (label_pos_preserved f); auto. + inversion MATCHI; subst. + destruct label_pos; next_stuck_cong. + destruct ((incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef PC) eqn:INCRPC; next_stuck_cong. + inversion H0; auto. repeat (econstructor; eauto). + rewrite !Pregmap.gso; try congruence. + rewrite <- AGPC. + unfold incrPC in *. + destruct (rs1 PC) eqn:EQRS1; simpl in *; try discriminate. + replace ((rs2 # X16 <- Vundef) # PC <- (Vptr b0 (Ptrofs.repr z))) with + (((rs1 # PC <- (Vptr b0 (Ptrofs.add i1 (Ptrofs.repr (size bb))))) # X16 <- + Vundef) # PC <- (Vptr b (Ptrofs.repr z))); auto. + eapply functional_extensionality; intros x. + destruct (PregEq.eq x PC); subst. + + rewrite Pregmap.gso in INCRPC; try congruence. + rewrite Pregmap.gss in INCRPC. + rewrite !Pregmap.gss in *; congruence. + + rewrite Pregmap.gso; auto. + rewrite (Pregmap.gso (i := x) (j := PC)); auto. + destruct (PregEq.eq x X16); subst. + * rewrite !Pregmap.gss; auto. + * rewrite !Pregmap.gso; auto. +Qed. + +Lemma last_instruction_cannot_be_label bb: + list_nth_z (header bb) (size bb - 1) = None. +Proof. + assert (list_length_z (header bb) <= size bb - 1). { + rewrite bblock_size_aux. generalize (bblock_size_aux_pos bb). lia. + } + remember (list_nth_z (header bb) (size bb - 1)) as label_opt; destruct label_opt; auto; + exploit list_nth_z_range; eauto; lia. +Qed. + +Lemma pc_ptr_exec_step: forall ofs bb b rs m _rs _m + (ATPC : rs PC = Vptr b ofs) + (MATCHI : match_internal (size bb - 1) + {| _rs := rs; _m := m |} + {| _rs := _rs; _m := _m |}), + _rs PC = Vptr b (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))). +Proof. + intros; inv MATCHI. rewrite <- AGPC; rewrite ATPC; unfold Val.offset_ptr; eauto. +Qed. + +Lemma find_instr_ofs_somei: forall ofs bb f tc asmi rs m _rs _m + (BOUNDOFS : Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned) + (FIND_INSTR : Asm.find_instr (Ptrofs.unsigned ofs + (size bb - 1)) tc = + Some (asmi)) + (MATCHI : match_internal (size bb - 1) + {| _rs := rs; _m := m |} + {| _rs := _rs; _m := _m |}), + Asm.find_instr (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bb - 1)))) + (Asm.fn_code {| Asm.fn_sig := fn_sig f; Asm.fn_code := tc |}) = + Some (asmi). +Proof. + intros; simpl. + replace (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bb - 1)))) + with (Ptrofs.unsigned ofs + (size bb - 1)); try assumption. + generalize (bblock_size_pos bb); generalize (Ptrofs.unsigned_range_2 ofs); intros. + unfold Ptrofs.add. + rewrite Ptrofs.unsigned_repr. rewrite Ptrofs.unsigned_repr; try lia. + rewrite Ptrofs.unsigned_repr; lia. +Qed. + +Lemma eval_builtin_arg_match: forall rs _m _rs a1 b1 + (AG : forall r : preg, r <> PC -> rs r = _rs r) + (EVAL : eval_builtin_arg tge (fun r : dreg => rs r) (rs SP) _m a1 b1), + eval_builtin_arg tge _rs (_rs SP) _m (map_builtin_arg DR a1) b1. +Proof. + intros; induction EVAL; simpl in *; try rewrite AG; try rewrite AG in EVAL; try discriminate; try congruence; eauto with barg. + econstructor. rewrite <- AG; try discriminate; auto. +Qed. + +Lemma eval_builtin_args_match: forall bb rs m _rs _m args vargs + (MATCHI : match_internal (size bb - 1) + {| _rs := rs; _m := m |} + {| _rs := _rs; _m := _m |}) + (EVAL : eval_builtin_args tge (fun r : dreg => rs r) (rs SP) m args vargs), + eval_builtin_args tge _rs (_rs SP) _m (map (map_builtin_arg DR) args) vargs. +Proof. + intros; inv MATCHI. + induction EVAL; subst. + - econstructor. + - econstructor. + + eapply eval_builtin_arg_match; eauto. + + eauto. +Qed. + +Lemma pc_both_sides: forall (rs _rs: regset) v + (AG : forall r : preg, r <> PC -> rs r = _rs r), + rs # PC <- v = _rs # PC <- v. +Proof. + intros; unfold Pregmap.set; apply functional_extensionality; intros y. + destruct (PregEq.eq y PC); try rewrite AG; eauto. +Qed. + +Lemma set_buitin_res_sym res: forall vres rs _rs r + (NPC: r <> PC) + (AG : forall r : preg, r <> PC -> rs r = _rs r), + set_res res vres rs r = set_res res vres _rs r. +Proof. + induction res; simpl; intros; unfold Pregmap.set; try rewrite AG; eauto. +Qed. + +Lemma set_builtin_res_dont_move_pc_gen res: forall vres rs _rs v1 v2 + (HV: v1 = v2) + (AG : forall r : preg, r <> PC -> rs r = _rs r), + (set_res res vres rs) # PC <- v1 = + (set_res res vres _rs) # PC <- v2. +Proof. + intros. rewrite HV. generalize res vres rs _rs AG v2. + clear res vres rs _rs AG v1 v2 HV. + induction res. + - simpl; intros. apply pc_both_sides; intros. + unfold Pregmap.set; try rewrite AG; eauto. + - simpl; intros; apply pc_both_sides; eauto. + - simpl; intros. + erewrite IHres2; eauto; intros. + eapply set_buitin_res_sym; eauto. +Qed. + +Lemma set_builtin_map_not_pc (res: builtin_res dreg): forall vres rs, + set_res (map_builtin_res DR res) vres rs PC = rs PC. +Proof. + induction res. + - intros; simpl. unfold Pregmap.set. destruct (PregEq.eq PC x); try congruence. + - intros; simpl; congruence. + - intros; simpl in *. rewrite IHres2. rewrite IHres1. reflexivity. +Qed. + +Lemma undef_reg_preserved (rl: list mreg): forall rs _rs r + (NPC: r <> PC) + (AG : forall r : preg, r <> PC -> rs r = _rs r), + undef_regs (map preg_of rl) rs r = undef_regs (map preg_of rl) _rs r. +Proof. + induction rl. + - simpl; auto. + - simpl; intros. erewrite IHrl; eauto. + intros. unfold Pregmap.set. destruct (PregEq.eq r0 (preg_of a)); try rewrite AG; eauto. +Qed. + +Lemma undef_regs_other: + forall r rl rs, + (forall r', In r' rl -> r <> r') -> + undef_regs rl rs r = rs r. +Proof. + induction rl; simpl; intros. auto. + rewrite IHrl by auto. rewrite Pregmap.gso; auto. +Qed. + +Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop := + match rl with + | nil => True + | r1 :: nil => r <> preg_of r1 + | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl end. -Remark preg_of_not_X29: forall r, negb (mreg_eq r R29) = true -> IR X29 <> preg_of r. -Proof. - intros. change (IR X29) with (preg_of R29). red; intros. - exploit preg_of_injective; eauto. intros; subst r; discriminate. -Qed. - -Lemma sp_val': forall ms sp rs, agree ms sp rs -> sp = rs XSP. -Proof. - intros. eapply sp_val; eauto. -Qed. - -(** This is the simulation diagram. We prove it by case analysis on the Mach transition. *) - -Theorem step_simulation: - forall S1 t S2, Mach.step return_address_offset ge S1 t S2 -> - forall S1' (MS: match_states S1 S1'), - (exists S2', plus step tge S1' t S2' /\ match_states S2 S2') - \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat. -Proof. - induction 1; intros; inv MS. - -- (* Mlabel *) - left; eapply exec_straight_steps; eauto; intros. - monadInv TR. econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split. apply agree_nextinstr; auto. simpl; congruence. - -- (* Mgetstack *) - unfold load_stack in H. - exploit Mem.loadv_extends; eauto. intros [v' [A B]]. - rewrite (sp_val _ _ _ AG) in A. - left; eapply exec_straight_steps; eauto. intros. simpl in TR. - exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q R]]]. - exists rs'; split. eauto. - split. eapply agree_set_mreg; eauto with asmgen. congruence. - simpl; congruence. - -- (* Msetstack *) - unfold store_stack in H. - assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto). - exploit Mem.storev_extends; eauto. intros [m2' [A B]]. - left; eapply exec_straight_steps; eauto. - rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR. - exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]]. - exists rs'; split. eauto. - split. eapply agree_undef_regs; eauto with asmgen. - simpl; intros. rewrite Q; auto with asmgen. - -- (* Mgetparam *) - assert (f0 = f) by congruence; subst f0. - unfold load_stack in *. - exploit Mem.loadv_extends. eauto. eexact H0. auto. - intros [parent' [A B]]. rewrite (sp_val' _ _ _ AG) in A. - exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'. - exploit Mem.loadv_extends. eauto. eexact H1. auto. - intros [v' [C D]]. -Opaque loadind. - left; eapply exec_straight_steps; eauto; intros. monadInv TR. - destruct ep. -(* X30 contains parent *) - exploit loadind_correct. eexact EQ. - instantiate (2 := rs0). simpl; rewrite DXP; eauto. simpl; congruence. - intros [rs1 [P [Q R]]]. - exists rs1; split. eauto. - split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen. - simpl; intros. rewrite R; auto with asmgen. - apply preg_of_not_X29; auto. -(* X30 does not contain parent *) - exploit loadptr_correct. eexact A. simpl; congruence. intros [rs1 [P [Q R]]]. - exploit loadind_correct. eexact EQ. instantiate (2 := rs1). simpl; rewrite Q. eauto. simpl; congruence. - intros [rs2 [S [T U]]]. - exists rs2; split. eapply exec_straight_trans; eauto. - split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto. - instantiate (1 := rs1#X29 <- (rs2#X29)). intros. - rewrite Pregmap.gso; auto with asmgen. - congruence. - intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen. - simpl; intros. rewrite U; auto with asmgen. - apply preg_of_not_X29; auto. - -- (* Mop *) - assert (eval_operation tge sp op (map rs args) m = Some v). - { rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. } - exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eexact H0. - intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. - left; eapply exec_straight_steps; eauto; intros. simpl in TR. - exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. - exists rs2; split. eauto. split. - apply agree_set_undef_mreg with rs0; auto. - apply Val.lessdef_trans with v'; auto. - simpl; intros. InvBooleans. - rewrite R; auto. apply preg_of_not_X29; auto. -Local Transparent destroyed_by_op. - destruct op; try exact I; simpl; congruence. - -- (* Mload *) - assert (Op.eval_addressing tge sp addr (map rs args) = Some a). - { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. } - exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1. - intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A. - exploit Mem.loadv_extends; eauto. intros [v' [C D]]. - left; eapply exec_straight_steps; eauto; intros. simpl in TR. - exploit transl_load_correct; eauto. intros [rs2 [P [Q R]]]. - exists rs2; split. eauto. - split. eapply agree_set_undef_mreg; eauto. congruence. - simpl; congruence. - -- (* Mstore *) - assert (Op.eval_addressing tge sp addr (map rs args) = Some a). - { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. } - exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1. - intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A. - assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto). - exploit Mem.storev_extends; eauto. intros [m2' [C D]]. - left; eapply exec_straight_steps; eauto. - intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]]. - exists rs2; split. eauto. - split. eapply agree_undef_regs; eauto with asmgen. - simpl; congruence. - -- (* Mcall *) - assert (f0 = f) by congruence. subst f0. - inv AT. - assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned). - { eapply transf_function_no_overflow; eauto. } - destruct ros as [rf|fid]; simpl in H; monadInv H5. -+ (* Indirect call *) - assert (rs rf = Vptr f' Ptrofs.zero). - { destruct (rs rf); try discriminate. - revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. } - assert (rs0 x0 = Vptr f' Ptrofs.zero). - { exploit ireg_val; eauto. rewrite H5; intros LD; inv LD; auto. } - generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1. - assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x). - { econstructor; eauto. } - exploit return_address_offset_correct; eauto. intros; subst ra. - left; econstructor; split. - apply plus_one. eapply exec_step_internal. Simpl. rewrite <- H2; simpl; eauto. - eapply functions_transl; eauto. eapply find_instr_tail; eauto. - simpl. eauto. - econstructor; eauto. - econstructor; eauto. - eapply agree_sp_def; eauto. - simpl. eapply agree_exten; eauto. intros. Simpl. - Simpl. rewrite <- H2. auto. -+ (* Direct call *) - generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1. - assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x). - econstructor; eauto. - exploit return_address_offset_correct; eauto. intros; subst ra. - left; econstructor; split. - apply plus_one. eapply exec_step_internal. eauto. - eapply functions_transl; eauto. eapply find_instr_tail; eauto. - simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. eauto. - econstructor; eauto. - econstructor; eauto. - eapply agree_sp_def; eauto. - simpl. eapply agree_exten; eauto. intros. Simpl. - Simpl. rewrite <- H2. auto. - -- (* Mtailcall *) - assert (f0 = f) by congruence. subst f0. - inversion AT; subst. - assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned). - { eapply transf_function_no_overflow; eauto. } - exploit Mem.loadv_extends. eauto. eexact H1. auto. simpl. intros [parent' [A B]]. - destruct ros as [rf|fid]; simpl in H; monadInv H7. -+ (* Indirect call *) - assert (rs rf = Vptr f' Ptrofs.zero). - { destruct (rs rf); try discriminate. - revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. } - assert (rs0 x0 = Vptr f' Ptrofs.zero). - { exploit ireg_val; eauto. rewrite H7; intros LD; inv LD; auto. } - exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). - exploit exec_straight_steps_2; eauto using functions_transl. - intros (ofs' & P & Q). - left; econstructor; split. - (* execution *) - eapply plus_right'. eapply exec_straight_exec; eauto. - econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q. - simpl. reflexivity. - traceEq. - (* match states *) - econstructor; eauto. - apply agree_set_other; auto with asmgen. - Simpl. rewrite Z by (rewrite <- (ireg_of_eq _ _ EQ1); eauto with asmgen). assumption. -+ (* Direct call *) - exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). - exploit exec_straight_steps_2; eauto using functions_transl. - intros (ofs' & P & Q). - left; econstructor; split. - (* execution *) - eapply plus_right'. eapply exec_straight_exec; eauto. - econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q. - simpl. reflexivity. - traceEq. - (* match states *) - econstructor; eauto. - apply agree_set_other; auto with asmgen. - Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. auto. - -- (* Mbuiltin *) - inv AT. monadInv H4. - exploit functions_transl; eauto. intro FN. - generalize (transf_function_no_overflow _ _ H3); intro NOOV. - exploit builtin_args_match; eauto. intros [vargs' [P Q]]. - exploit external_call_mem_extends; eauto. - intros [vres' [m2' [A [B [C D]]]]]. - left. econstructor; split. apply plus_one. - eapply exec_step_builtin. eauto. eauto. - eapply find_instr_tail; eauto. - erewrite <- sp_val by eauto. - eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. - eapply external_call_symbols_preserved; eauto. apply senv_preserved. - eauto. - econstructor; eauto. - instantiate (2 := tf); instantiate (1 := x). - unfold nextinstr. rewrite Pregmap.gss. - rewrite set_res_other. rewrite undef_regs_other. - rewrite <- H1. simpl. econstructor; eauto. - eapply code_tail_next_int; eauto. - simpl; intros. destruct H4. congruence. destruct H4. congruence. - exploit list_in_map_inv; eauto. intros (mr & U & V). subst. - auto with asmgen. - auto with asmgen. - apply agree_nextinstr. eapply agree_set_res; auto. - eapply agree_undef_regs; eauto. intros. - simpl. rewrite undef_regs_other_2; auto. Simpl. - congruence. - -- (* Mgoto *) - assert (f0 = f) by congruence. subst f0. - inv AT. monadInv H4. - exploit find_label_goto_label; eauto. intros [tc' [rs' [GOTO [AT2 INV]]]]. - left; exists (State rs' m'); split. - apply plus_one. econstructor; eauto. - eapply functions_transl; eauto. - eapply find_instr_tail; eauto. - simpl; eauto. - econstructor; eauto. - eapply agree_exten; eauto with asmgen. - congruence. - -- (* Mcond true *) - assert (f0 = f) by congruence. subst f0. - exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC. - left; eapply exec_straight_opt_steps_goto; eauto. - intros. simpl in TR. - exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C). - exists jmp; exists k; exists rs'. - split. eexact A. - split. apply agree_exten with rs0; auto with asmgen. - exact B. - -- (* Mcond false *) - exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC. - left; eapply exec_straight_steps; eauto. intros. simpl in TR. - exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C). - econstructor; split. - eapply exec_straight_opt_right. eexact A. apply exec_straight_one. eexact B. auto. - split. apply agree_exten with rs0; auto. intros. Simpl. - simpl; congruence. - -- (* Mjumptable *) - assert (f0 = f) by congruence. subst f0. - inv AT. monadInv H6. - exploit functions_transl; eauto. intro FN. - generalize (transf_function_no_overflow _ _ H5); intro NOOV. - exploit find_label_goto_label. eauto. eauto. - instantiate (2 := rs0#X16 <- Vundef). - Simpl. eauto. - eauto. - intros [tc' [rs' [A [B C]]]]. - exploit ireg_val; eauto. rewrite H. intros LD; inv LD. - left; econstructor; split. - apply plus_one. econstructor; eauto. - eapply find_instr_tail; eauto. - simpl. Simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A. - econstructor; eauto. - eapply agree_undef_regs; eauto. - simpl. intros. rewrite C; auto with asmgen. Simpl. - congruence. - -- (* Mreturn *) - assert (f0 = f) by congruence. subst f0. - inversion AT; subst. simpl in H6; monadInv H6. - assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned). - eapply transf_function_no_overflow; eauto. - exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z). - exploit exec_straight_steps_2; eauto using functions_transl. - intros (ofs' & P & Q). - left; econstructor; split. - (* execution *) - eapply plus_right'. eapply exec_straight_exec; eauto. - econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q. - simpl. reflexivity. - traceEq. - (* match states *) - econstructor; eauto. - apply agree_set_other; auto with asmgen. - -- (* internal function *) - - exploit functions_translated; eauto. intros [tf [A B]]. monadInv B. - generalize EQ; intros EQ'. monadInv EQ'. - destruct (zlt Ptrofs.max_unsigned (list_length_z x0.(fn_code))); inversion EQ1. clear EQ1. subst x0. - unfold store_stack in *. - exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. - intros [m1' [C D]]. - exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto. - intros [m2' [F G]]. - simpl chunk_of_type in F. - exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto. - intros [m3' [P Q]]. - change (chunk_of_type Tptr) with Mint64 in *. - (* Execution of function prologue *) - monadInv EQ0. rewrite transl_code'_transl_code in EQ1. - set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: - storeptr RA XSP (fn_retaddr_ofs f) x0) in *. - set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *. - set (rs2 := nextinstr (rs0#X29 <- (parent_sp s) #SP <- sp #X16 <- Vundef)). - exploit (storeptr_correct tge tf XSP (fn_retaddr_ofs f) RA x0 m2' m3' rs2). - simpl preg_of_iregsp. change (rs2 X30) with (rs0 X30). rewrite ATLR. - change (rs2 X2) with sp. eexact P. - simpl; congruence. congruence. - intros (rs3 & U & V). - assert (EXEC_PROLOGUE: - exec_straight tge tf - tf.(fn_code) rs0 m' - x0 rs3 m3'). - { change (fn_code tf) with tfbody; unfold tfbody. - apply exec_straight_step with rs2 m2'. - unfold exec_instr. rewrite C. fold sp. - rewrite <- (sp_val _ _ _ AG). rewrite F. reflexivity. - reflexivity. - eexact U. } - exploit exec_straight_steps_2; eauto using functions_transl. lia. constructor. - intros (ofs' & X & Y). - left; exists (State rs3 m3'); split. - eapply exec_straight_steps_1; eauto. lia. constructor. - econstructor; eauto. - rewrite X; econstructor; eauto. - apply agree_exten with rs2; eauto with asmgen. - unfold rs2. - apply agree_nextinstr. apply agree_set_other; auto with asmgen. - apply agree_change_sp with (parent_sp s). - apply agree_undef_regs with rs0. auto. -Local Transparent destroyed_at_function_entry. simpl. - simpl; intros; Simpl. - unfold sp; congruence. - intros. rewrite V by auto with asmgen. reflexivity. - -- (* external function *) - exploit functions_translated; eauto. - intros [tf [A B]]. simpl in B. inv B. - exploit extcall_arguments_match; eauto. - intros [args' [C D]]. - exploit external_call_mem_extends; eauto. - intros [res' [m2' [P [Q [R S]]]]]. - left; econstructor; split. - apply plus_one. eapply exec_step_external; eauto. - eapply external_call_symbols_preserved; eauto. apply senv_preserved. - econstructor; eauto. - unfold loc_external_result. apply agree_set_other; auto. apply agree_set_pair; auto. - apply agree_undef_caller_save_regs; auto. - -- (* return *) - inv STACKS. simpl in *. - right. split. lia. split. auto. - rewrite <- ATPC in H5. - econstructor; eauto. congruence. +Remark preg_notin_charact: + forall r rl, + preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr). +Proof. + induction rl; simpl; intros. + tauto. + destruct rl. + simpl. split. intros. intuition congruence. auto. + rewrite IHrl. split. + intros [A B]. intros. destruct H. congruence. auto. + auto. Qed. -Lemma transf_initial_states: - forall st1, Mach.initial_state prog st1 -> - exists st2, Asm.initial_state tprog st2 /\ match_states st1 st2. +Lemma undef_regs_other_2: + forall r rl rs, + preg_notin r rl -> + undef_regs (map preg_of rl) rs r = rs r. Proof. - intros. inversion H. unfold ge0 in *. - econstructor; split. - econstructor. - eapply (Genv.init_mem_transf_partial TRANSF); eauto. - replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero) - with (Vptr fb Ptrofs.zero). - econstructor; eauto. - constructor. - apply Mem.extends_refl. - split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence. - intros. rewrite Regmap.gi. auto. - unfold Genv.symbol_address. - rewrite (match_program_main TRANSF). - rewrite symbols_preserved. - unfold ge; rewrite H1. auto. + intros. apply undef_regs_other. intros. + exploit list_in_map_inv; eauto. intros [mr [A B]]. subst. + rewrite preg_notin_charact in H. auto. Qed. -Lemma transf_final_states: - forall st1 st2 r, - match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r. +Lemma exec_exit_simulation_plus b ofs f bb s2 t rs m rs' m': forall + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb) + (NEMPTY_EXIT: exit bb <> None) + (MATCHI: match_internal (size bb - Z.of_nat (length_opt (exit bb))) (State rs m) s2) + (EXIT: exec_exit ge f (Ptrofs.repr (size bb)) rs m (exit bb) t rs' m') + (ATPC: rs PC = Vptr b ofs), + plus Asm.step tge s2 t (State rs' m'). +Proof. + intros. + exploit internal_functions_unfold; eauto. + intros (tc & FINDtf & TRANStf & _). + + exploit (find_instr_bblock (size bb - 1)); eauto. + { generalize (bblock_size_pos bb). lia. } + intros (i' & NTH & FIND_INSTR). + + inv NTH. + + rewrite last_instruction_cannot_be_label in *. discriminate. + + destruct (exit bb) as [ctrl |] eqn:NEMPTY_EXIT'. 2: { contradiction. } + rewrite bblock_size_aux in *. rewrite NEMPTY_EXIT' in *. simpl in *. + (* XXX: Is there a better way to simplify this expression i.e. automatically? *) + replace (list_length_z (header bb) + list_length_z (body bb) + 1 - 1 - + list_length_z (header bb)) with (list_length_z (body bb)) in H by lia. + rewrite list_nth_z_range_exceeded in H; try lia. discriminate. + + assert (Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned). { + eapply size_of_blocks_bounds; eauto. + } + assert (size bb <= Ptrofs.max_unsigned). { generalize (Ptrofs.unsigned_range_2 ofs); lia. } + destruct cfi. + * (* control flow instruction *) + destruct s2. + rewrite H in EXIT. (* exit bb is a cfi *) + inv EXIT. + rewrite H in MATCHI. simpl in MATCHI. + exploit internal_functions_translated; eauto. + rewrite FINDtf. + intros (tf & FINDtf' & TRANSf). inversion FINDtf'; subst; clear FINDtf'. + exploit exec_cfi_simulation; eauto. + (* extract exec_cfi_simulation's conclusion as separate hypotheses *) + intros (rs2' & m2' & EXECI & MATCHS); rewrite MATCHS. + apply plus_one. + eapply Asm.exec_step_internal; eauto. + - eapply pc_ptr_exec_step; eauto. + - eapply find_instr_ofs_somei; eauto. + * (* builtin *) + destruct s2. + rewrite H in EXIT. + rewrite H in MATCHI. simpl in MATCHI. + simpl in FIND_INSTR. + inversion EXIT. + apply plus_one. + eapply external_call_symbols_preserved in H10; try (apply senv_preserved). + eapply eval_builtin_args_preserved in H6; try (apply symbols_preserved). + eapply Asm.exec_step_builtin; eauto. + - eapply pc_ptr_exec_step; eauto. + - eapply find_instr_ofs_somei; eauto. + - eapply eval_builtin_args_match; eauto. + - inv MATCHI; eauto. + - inv MATCHI. + unfold Asm.nextinstr, incrPC. + assert (HPC: Val.offset_ptr (rs PC) (Ptrofs.repr (size bb)) + = Val.offset_ptr (_rs PC) Ptrofs.one). + { rewrite <- AGPC. rewrite ATPC. unfold Val.offset_ptr. + rewrite Ptrofs.add_assoc. unfold Ptrofs.add. + assert (BBPOS: size bb >= 1) by eapply bblock_size_pos. + rewrite (Ptrofs.unsigned_repr (size bb - 1)); try lia. + rewrite Ptrofs.unsigned_one. + replace (size bb - 1 + 1) with (size bb) by lia. + reflexivity. } + apply set_builtin_res_dont_move_pc_gen. + -- erewrite !set_builtin_map_not_pc. + erewrite !undef_regs_other. + rewrite HPC; auto. + all: intros; simpl in *; destruct H3 as [HX16 | [HX30 | HDES]]; subst; try discriminate; + exploit list_in_map_inv; eauto; intros [mr [A B]]; subst; discriminate. + -- intros. eapply undef_reg_preserved; eauto. + intros. destruct (PregEq.eq X16 r0); destruct (PregEq.eq X30 r0); subst. + rewrite Pregmap.gso, Pregmap.gss; try congruence. + do 2 (rewrite Pregmap.gso, Pregmap.gss; try discriminate; auto). + rewrite 2Pregmap.gss; auto. + rewrite !Pregmap.gso; auto. +Qed. + +Lemma exec_exit_simulation_star b ofs f bb s2 t rs m rs' m': forall + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb) + (MATCHI: match_internal (size bb - Z.of_nat (length_opt (exit bb))) (State rs m) s2) + (EXIT: exec_exit ge f (Ptrofs.repr (size bb)) rs m (exit bb) t rs' m') + (ATPC: rs PC = Vptr b ofs), + star Asm.step tge s2 t (State rs' m'). +Proof. + intros. + destruct (exit bb) eqn: Hex. + - eapply plus_star. + eapply exec_exit_simulation_plus; try rewrite Hex; congruence || eauto. + - inv MATCHI. + inv EXIT. + assert (X: rs2 = incrPC (Ptrofs.repr (size bb)) rs). { + unfold incrPC. unfold Pregmap.set. + apply functional_extensionality. intros x. + destruct (PregEq.eq x PC) as [X|]. + - rewrite X. rewrite <- AGPC. simpl. + replace (size bb - 0) with (size bb) by lia. reflexivity. + - rewrite AG; try assumption. reflexivity. + } + destruct X. + subst; eapply star_refl; eauto. +Qed. + +Lemma exec_bblock_simulation b ofs f bb t rs m rs' m': forall + (ATPC: rs PC = Vptr b ofs) + (FINDF: Genv.find_funct_ptr ge b = Some (Internal f)) + (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb) + (EXECBB: exec_bblock lk ge f bb rs m t rs' m'), + plus Asm.step tge (State rs m) t (State rs' m'). Proof. - intros. inv H0. inv H. constructor. assumption. - compute in H1. inv H1. - generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto. + intros; destruct EXECBB as (rs1 & m1 & BODY & CTL). + exploit exec_header_simulation; eauto. + intros (s0 & STAR & MATCH0). + eapply star_plus_trans; traceEq || eauto. + destruct (bblock_non_empty bb). + - (* body bb <> nil *) + exploit exec_body_simulation_plus; eauto. + intros (s1 & PLUS & MATCH1). + eapply plus_star_trans; traceEq || eauto. + eapply exec_exit_simulation_star; eauto. + erewrite <- exec_body_dont_move_PC; eauto. + - (* exit bb <> None *) + exploit exec_body_simulation_star; eauto. + intros (s1 & STAR1 & MATCH1). + eapply star_plus_trans; traceEq || eauto. + eapply exec_exit_simulation_plus; eauto. + erewrite <- exec_body_dont_move_PC; eauto. Qed. +Lemma step_simulation s t s': + Asmblock.step lk ge s t s' -> plus Asm.step tge s t s'. +Proof. + intros STEP. + inv STEP; simpl; exploit functions_translated; eauto; + intros (tf0 & FINDtf & TRANSf); + monadInv TRANSf. + - (* internal step *) eapply exec_bblock_simulation; eauto. + - (* external step *) + apply plus_one. + exploit external_call_symbols_preserved; eauto. apply senv_preserved. + intros ?. + eapply Asm.exec_step_external; eauto. +Qed. + +Lemma transf_program_correct: + forward_simulation (Asmblock.semantics lk prog) (Asm.semantics tprog). +Proof. + eapply forward_simulation_plus. + - apply senv_preserved. + - eexact transf_initial_states. + - eexact transf_final_states. + - unfold match_states. + simpl; intros; subst; eexists; split; eauto. + eapply step_simulation; eauto. +Qed. + +End PRESERVATION. + +End Asmblock_PRESERVATION. + + +Local Open Scope linking_scope. + +Definition block_passes := + mkpass Machblockgenproof.match_prog + ::: mkpass Asmblockgenproof.match_prog + ::: mkpass PostpassSchedulingproof.match_prog + ::: mkpass Asmblock_PRESERVATION.match_prog + ::: pass_nil _. + +Definition match_prog := pass_match (compose_passes block_passes). + +Lemma transf_program_match: + forall p tp, Asmgen.transf_program p = OK tp -> match_prog p tp. +Proof. + intros p tp H. + unfold Asmgen.transf_program in H. apply bind_inversion in H. destruct H. + inversion_clear H. apply bind_inversion in H1. destruct H1. + inversion_clear H. + unfold Compopts.time in *. remember (Machblockgen.transf_program p) as mbp. + unfold match_prog; simpl. + exists mbp; split. apply Machblockgenproof.transf_program_match; auto. + exists x; split. apply Asmblockgenproof.transf_program_match; auto. + exists x0; split. apply PostpassSchedulingproof.transf_program_match; auto. + exists tp; split. apply Asmblock_PRESERVATION.transf_program_match; auto. auto. +Qed. + +(** Return Address Offset *) + +Definition return_address_offset: Mach.function -> Mach.code -> ptrofs -> Prop := + Machblockgenproof.Mach_return_address_offset (Asmblockgenproof.return_address_offset). + +Lemma return_address_exists: + forall f sg ros c, is_tail (Mach.Mcall sg ros :: c) f.(Mach.fn_code) -> + exists ra, return_address_offset f c ra. +Proof. + intros; unfold return_address_offset; eapply Machblockgenproof.Mach_return_address_exists; eauto. + intros; eapply Asmblockgenproof.return_address_exists; eauto. +Qed. + +Section PRESERVATION. + +Variable prog: Mach.program. +Variable tprog: Asm.program. +Hypothesis TRANSF: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + Theorem transf_program_correct: forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog). Proof. - eapply forward_simulation_star with (measure := measure). - apply senv_preserved. - eexact transf_initial_states. - eexact transf_final_states. - exact step_simulation. + unfold match_prog in TRANSF. simpl in TRANSF. + inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H. + eapply compose_forward_simulations. + { exploit Machblockgenproof.transf_program_correct; eauto. } + + eapply compose_forward_simulations. + + apply Asmblockgenproof.transf_program_correct; eauto. + { intros. + unfold Genv.symbol_address. + erewrite <- PostpassSchedulingproof.symbols_preserved; eauto. + erewrite Asmblock_PRESERVATION.symbol_high_low; eauto. + reflexivity. + } + + eapply compose_forward_simulations. + - apply PostpassSchedulingproof.transf_program_correct; eauto. + - apply Asmblock_PRESERVATION.transf_program_correct; eauto. Qed. End PRESERVATION. + +Instance TransfAsm: TransfLink match_prog := pass_match_link (compose_passes block_passes). + +(*******************************************) +(* Stub actually needed by driver/Compiler *) + +Module Asmgenproof0. + +Definition return_address_offset := return_address_offset. + +End Asmgenproof0. diff --git a/aarch64/BTL_SEsimplify.v b/aarch64/BTL_SEsimplify.v new file mode 100644 index 00000000..3e930439 --- /dev/null +++ b/aarch64/BTL_SEsimplify.v @@ -0,0 +1,41 @@ +Require Import Coqlib Floats Values Memory. +Require Import Integers. +Require Import Op Registers. +Require Import BTL_SEtheory. +Require Import BTL_SEsimuref. + +(** Target op simplifications using "fake" values *) + +Definition target_op_simplify (op: operation) (lr: list reg) (hrs: ristate): option sval := + None. + +Definition target_cbranch_expanse (prev: ristate) (cond: condition) (args: list reg) : option (condition * list_sval) := + None. + +(* Main proof of simplification *) + +Lemma target_op_simplify_correct ctx op lr hrs fsv st args: forall + (H: target_op_simplify op lr hrs = Some fsv) + (REF: ris_refines ctx hrs st) + (OK0: ris_ok ctx hrs) + (OK1: eval_list_sval ctx (list_sval_inj (map (si_sreg st) lr)) = Some args), + eval_sval ctx fsv = eval_operation (cge ctx) (csp ctx) op args (cm0 ctx). +Proof. + unfold target_op_simplify; simpl. + intros H ? ? ?. + congruence. +Qed. + +Lemma target_cbranch_expanse_correct hrs c l ctx st c' l': forall + (TARGET: target_cbranch_expanse hrs c l = Some (c', l')) + (REF: ris_refines ctx hrs st) + (OK: ris_ok ctx hrs), + eval_scondition ctx c' l' = + eval_scondition ctx c (list_sval_inj (map (si_sreg st) l)). +Proof. + unfold target_cbranch_expanse; simpl. + intros H ? ?. + congruence. +Qed. +Global Opaque target_op_simplify. +Global Opaque target_cbranch_expanse. diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v new file mode 100644 index 00000000..d5c7ee0f --- /dev/null +++ b/aarch64/CSE2deps.v @@ -0,0 +1,35 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* David Monniaux CNRS, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs Events. +Require Import Op. + + +Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw := + (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk)) + && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk)) + && ((ofsw + size_chunk chunkw <=? ofsr) || + (ofsr + size_chunk chunkr <=? ofsw)). + +Definition may_overlap chunk addr args chunk' addr' args' := + match addr, addr', args, args' with + | (Aindexed ofs), (Aindexed ofs'), + (base :: nil), (base' :: nil) => + if peq base base' + then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk) + else true + | (Ainstack ofs), (Ainstack ofs'), _, _ => + negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk) + | _, _, _, _ => true + end. diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v new file mode 100644 index 00000000..653c88f4 --- /dev/null +++ b/aarch64/CSE2depsproof.v @@ -0,0 +1,209 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* David Monniaux CNRS, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. +Require Import AST Linking. +Require Import Memory Registers Op RTL Maps. + +Require Import Globalenvs Values. +Require Import Linking Values Memory Globalenvs Events Smallstep. +Require Import Registers Op RTL. +Require Import CSE2 CSE2deps. +Require Import Lia. + +Lemma ptrofs_size : + Ptrofs.wordsize = 64%nat. +Proof. + unfold Ptrofs.wordsize. + unfold Wordsize_Ptrofs.wordsize. + trivial. +Qed. + +Lemma ptrofs_modulus : + Ptrofs.modulus = 18446744073709551616. +Proof. + reflexivity. +Qed. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Section MEMORY_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + Variable base : val. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : int64. + Hypothesis ADDRW : eval_addressing genv sp + (Aindexed ofsw) (base :: nil) = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aindexed ofsr) (base :: nil) = Some addrr. + + Lemma load_store_away1 : + forall RANGEW : 0 <= Int64.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Int64.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Int64.unsigned ofsw + size_chunk chunkw <= Int64.unsigned ofsr + \/ Int64.unsigned ofsr + size_chunk chunkr <= Int64.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + simpl in *. + inv ADDRR. + inv ADDRW. + + destruct base; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: unfold Ptrofs.of_int64. + + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsr))) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsw))) as [OFSW | OFSW]; + rewrite OFSW). + all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia). + + all: try rewrite ptrofs_modulus in *. + + all: intuition lia. + Qed. + + Theorem load_store_away : + can_swap_accesses_ofs (Int64.unsigned ofsr) chunkr (Int64.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End MEMORY_WRITE. + + +Section STACK_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : ptrofs. + Hypothesis ADDRW : eval_addressing genv sp + (Ainstack ofsw) nil = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Ainstack ofsr) nil = Some addrr. + + Lemma stack_load_store_away1 : + forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr + \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + simpl in *. + inv ADDRR. + inv ADDRW. + + destruct sp; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW]; + rewrite OFSW). + + all: try rewrite ptrofs_modulus in *. + + all: intuition lia. + Qed. + + Theorem stack_load_store_away : + can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply stack_load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End STACK_WRITE. +End SOUNDNESS. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Lemma may_overlap_sound: + forall m m' : mem, + forall chunk addr args chunk' addr' args' v a a' rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (eval_addressing genv sp addr' (rs ## args')) = Some a' -> + (may_overlap chunk addr args chunk' addr' args') = false -> + (Mem.storev chunk m a v) = Some m' -> + (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a'). +Proof. + intros until rs. + intros ADDR ADDR' OVERLAP STORE. + destruct addr; destruct addr'; try discriminate. + - (* Aindexed / Aindexed *) + destruct args as [ | base [ | ]]. 1,3: discriminate. + destruct args' as [ | base' [ | ]]. 1,3: discriminate. + simpl in OVERLAP. + destruct (peq base base'). 2: discriminate. + subst base'. + destruct (can_swap_accesses_ofs (Int64.unsigned ofs0) chunk' (Int64.unsigned ofs) chunk) eqn:SWAP. + 2: discriminate. + simpl in *. + eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. +- (* Ainstack / Ainstack *) + destruct args. 2: discriminate. + destruct args'. 2: discriminate. + cbn in OVERLAP. + destruct (can_swap_accesses_ofs (Ptrofs.unsigned ofs0) chunk' (Ptrofs.unsigned ofs) chunk) eqn:SWAP. + 2: discriminate. + cbn in *. + eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. +Qed. + +End SOUNDNESS. diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v index 7f5f1e06..24498aa4 100644 --- a/aarch64/ConstpropOpproof.v +++ b/aarch64/ConstpropOpproof.v @@ -335,40 +335,63 @@ Qed. Lemma make_divimm_correct: forall n r1 r2 v, - Val.divs e#r1 e#r2 = Some v -> + Val.maketotal (Val.divs e#r1 e#r2) = v -> e#r2 = Vint n -> let (op, args) := make_divimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divimm. - predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. - destruct (e#r1) eqn:?; - try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto); - inv H; auto. - destruct (Int.is_power2 n) eqn:?. - destruct (Int.ltu i (Int.repr 31)) eqn:?. - exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence. - exists v; auto. - exists v; auto. + predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0. + { destruct (e # r1) eqn:Er1. + all: try (cbn; exists (e # r1); split; auto; fail). + rewrite Val.divs_one. + cbn. + rewrite Er1. + exists (Vint i); split; auto. + } + destruct (Int.is_power2 n) eqn:Power2. + { + destruct (Int.ltu i (Int.repr 31)) eqn:iLT31. + { + cbn. + exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto. + destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto. + rewrite Val.divs_pow2 with (y:=v) (n:=n). + cbn. + all: auto. + } + exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence. + } + exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence. Qed. + Lemma make_divuimm_correct: forall n r1 r2 v, - Val.divu e#r1 e#r2 = Some v -> + Val.maketotal (Val.divu e#r1 e#r2) = v -> e#r2 = Vint n -> let (op, args) := make_divuimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divuimm. - predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. - destruct (e#r1) eqn:?; - try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto); - inv H; auto. - destruct (Int.is_power2 n) eqn:?. - econstructor; split. simpl; eauto. - rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto). - rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto. - exists v; auto. + predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0. + { destruct (e # r1) eqn:Er1. + all: try (cbn; exists (e # r1); split; auto; fail). + rewrite Val.divu_one. + cbn. + rewrite Er1. + exists (Vint i); split; auto. + } + destruct (Int.is_power2 n) eqn:Power2. + { + cbn. + rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto). + exists (Val.shru e # r1 (Vint i)); split; auto. + destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto. + rewrite Val.divu_pow2 with (y:=v) (n:=n). + all: auto. + } + exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence. Qed. Lemma make_andimm_correct: @@ -503,34 +526,60 @@ Qed. Lemma make_divlimm_correct: forall n r1 r2 v, - Val.divls e#r1 e#r2 = Some v -> + Val.maketotal (Val.divls e#r1 e#r2) = v -> e#r2 = Vlong n -> let (op, args) := make_divlimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divlimm. - destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?. - rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto. - exists v; auto. - exists v; auto. + destruct (Int64.is_power2' n) eqn:Power2. + { + destruct (Int.ltu i (Int.repr 63)) eqn:iLT63. + { + cbn. + exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto. + rewrite H0 in H. + destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto. + { + subst v0. + rewrite Val.divls_pow2 with (y:=v) (n:=n). + cbn. + all: auto. + } + subst. auto. + } + cbn. subst. rewrite H0. + exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto. + } + cbn. subst. rewrite H0. + exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto. Qed. + Lemma make_divluimm_correct: forall n r1 r2 v, - Val.divlu e#r1 e#r2 = Some v -> + Val.maketotal (Val.divlu e#r1 e#r2) = v -> e#r2 = Vlong n -> let (op, args) := make_divluimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divluimm. destruct (Int64.is_power2' n) eqn:?. + { econstructor; split. simpl; eauto. - rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto). - rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. - simpl. - erewrite Int64.is_power2'_range by eauto. - erewrite Int64.divu_pow2' by eauto. auto. - exists v; auto. + rewrite H0 in H. destruct (e#r1); inv H. + all: cbn; auto. + { + rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto). + destruct (Int64.eq n Int64.zero); cbn; auto. + erewrite Int64.is_power2'_range by eauto. + erewrite Int64.divu_pow2' by eauto. auto. + } + } + exists v; split; auto. + cbn. + rewrite H. + reflexivity. Qed. Lemma make_andlimm_correct: @@ -679,10 +728,10 @@ Proof. InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto. - (* divs *) assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divimm_correct; auto. + apply make_divimm_correct; auto. congruence. - (* divu *) assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divuimm_correct; auto. + apply make_divuimm_correct; auto. congruence. - (* and 1 *) rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto. - (* and 2 *) @@ -745,10 +794,10 @@ Proof. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto. - (* divl *) assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divlimm_correct; auto. + apply make_divlimm_correct; auto. congruence. - (* divlu *) assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divluimm_correct; auto. + apply make_divluimm_correct; auto. congruence. - (* andl 1 *) rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto. - (* andl 2 *) diff --git a/aarch64/DuplicateOpcodeHeuristic.ml b/aarch64/DuplicateOpcodeHeuristic.ml new file mode 100644 index 00000000..3a3b87fc --- /dev/null +++ b/aarch64/DuplicateOpcodeHeuristic.ml @@ -0,0 +1,41 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open Op +open Integers + +let opcode_heuristic code cond ifso ifnot is_loop_header = + match cond with + | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccompf c | Ccompfs c -> (match c with + | Ceq -> Some false + | Cne -> Some true + | _ -> None + ) + | Cnotcompf c | Cnotcompfs c -> (match c with + | Ceq -> Some true + | Cne -> Some false + | _ -> None + ) + | _ -> None + diff --git a/aarch64/ExpansionOracle.ml b/aarch64/ExpansionOracle.ml new file mode 100644 index 00000000..afcb29c2 --- /dev/null +++ b/aarch64/ExpansionOracle.ml @@ -0,0 +1,15 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +let expanse n ibf btl = btl + +let find_last_reg c = () diff --git a/aarch64/Machregs.v b/aarch64/Machregs.v index 704c392e..454a7cbe 100644 --- a/aarch64/Machregs.v +++ b/aarch64/Machregs.v @@ -158,6 +158,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg := match ef with | EF_memcpy sz al => R15 :: R17 :: R29 :: nil | EF_inline_asm txt sg clob => destroyed_by_clobber clob + | EF_profiling _ _ => R15 :: R17 :: R29 :: nil | _ => nil end. diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 5f3c1a8d..98e461eb 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -14,3 +14,11 @@ let is_scratch_register s = s = "X16" || s = "x16" || s = "X30" || s = "x30" + +let class_of_type = function + | AST.Tint | AST.Tlong -> 0 + | AST.Tfloat | AST.Tsingle -> 1 + | AST.Tany32 | AST.Tany64 -> assert false + +(* number of available registers per class *) +let nr_regs = [| 29; 32 |] diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli new file mode 100644 index 00000000..23ac1c9a --- /dev/null +++ b/aarch64/Machregsaux.mli @@ -0,0 +1,20 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Auxiliary functions on machine registers *) + +val is_scratch_register: string -> bool + +val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/aarch64/Op.v b/aarch64/Op.v index f8d2510e..4c0dfb72 100644 --- a/aarch64/Op.v +++ b/aarch64/Op.v @@ -386,8 +386,8 @@ Definition eval_operation | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2) | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3)) | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3)) - | Odiv, v1 :: v2 :: nil => Val.divs v1 v2 - | Odivu, v1 :: v2 :: nil => Val.divu v1 v2 + | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2)) + | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2)) | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2) | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a)) | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n)) @@ -408,7 +408,7 @@ Definition eval_operation | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2) | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2) | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2) - | Oshrximm n, v1::nil => Val.shrx v1 (Vint n) + | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n))) | Ozext s, v1 :: nil => Some (Val.zero_ext s v1) | Osext s, v1 :: nil => Some (Val.sign_ext s v1) | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a)) @@ -435,8 +435,8 @@ Definition eval_operation | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3)) | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2) | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2) - | Odivl, v1 :: v2 :: nil => Val.divls v1 v2 - | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2 + | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2)) + | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2)) | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2) | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a)) | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n)) @@ -457,7 +457,7 @@ Definition eval_operation | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2) | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2) | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2) - | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n) + | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n))) | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1) | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1) | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a)) @@ -481,22 +481,22 @@ Definition eval_operation | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1) | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1) - | Ointoffloat, v1::nil => Val.intoffloat v1 - | Ointuoffloat, v1::nil => Val.intuoffloat v1 - | Ofloatofint, v1::nil => Val.floatofint v1 - | Ofloatofintu, v1::nil => Val.floatofintu v1 - | Ointofsingle, v1::nil => Val.intofsingle v1 - | Ointuofsingle, v1::nil => Val.intuofsingle v1 - | Osingleofint, v1::nil => Val.singleofint v1 - | Osingleofintu, v1::nil => Val.singleofintu v1 - | Olongoffloat, v1::nil => Val.longoffloat v1 - | Olonguoffloat, v1::nil => Val.longuoffloat v1 - | Ofloatoflong, v1::nil => Val.floatoflong v1 - | Ofloatoflongu, v1::nil => Val.floatoflongu v1 - | Olongofsingle, v1::nil => Val.longofsingle v1 - | Olonguofsingle, v1::nil => Val.longuofsingle v1 - | Osingleoflong, v1::nil => Val.singleoflong v1 - | Osingleoflongu, v1::nil => Val.singleoflongu v1 + | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1)) + | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1)) + | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1)) + | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1)) + | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1)) + | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1)) + | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1)) + | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1)) + | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1)) + | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1)) + | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1)) + | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1)) + | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1)) + | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1)) + | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1)) + | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1)) | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m)) | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty) @@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0... destruct v1... - apply type_add. - apply type_sub. - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int.eq i0 Int.zero); inv H2... + - destruct v0; destruct v1; cbn in *; trivial. + destruct (_ || _); trivial... + - destruct v0; destruct v1; cbn in *; trivial. + destruct (Int.eq i0 Int.zero); constructor. - destruct v0... destruct v1... - destruct v0... destruct (eval_shift s v1 a)... - destruct v0... @@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0... + - destruct v0; cbn; trivial. + destruct (Int.ltu n (Int.repr 31)); cbn; trivial. - destruct v0... - destruct v0... - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... @@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - apply type_subl. - destruct v0... destruct v1... - destruct v0... destruct v1... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int64.eq i0 Int64.zero); inv H2... + - destruct v0; destruct v1; cbn; trivial. + destruct (_ || _); cbn; trivial. + - destruct v0; destruct v1; cbn; trivial. + destruct (Int64.eq i0 Int64.zero); cbn; trivial. - destruct v0... destruct v1... - destruct v0... destruct (eval_shiftl s v1 a)... - destruct v0... @@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0... + - destruct v0; cbn; trivial. + destruct (Int.ltu n (Int.repr 63)); cbn; trivial. - destruct v0... - destruct v0... - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... @@ -893,34 +895,60 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0... - destruct v0... (* intoffloat, intuoffloat *) - - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial. (* floatofint, floatofintu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* intofsingle, intuofsingle *) - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial. (* singleofint, singleofintu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* longoffloat, longuoffloat *) - - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial. (* floatoflong, floatoflongu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* longofsingle, longuofsingle *) - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial. (* singleoflong, singleoflongu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* cmp *) - destruct (eval_condition cond vl m) as [[]|]... - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I. Qed. + +Definition is_trapping_op (op : operation) := + match op with + | Omove => false + | _ => false + end. + + +Definition args_of_operation op := + if eq_operation op Omove + then 1%nat + else List.length (fst (type_of_operation op)). + +Lemma is_trapping_op_sound: + forall op vl sp m, + is_trapping_op op = false -> + (List.length vl) = args_of_operation op -> + eval_operation genv sp op vl m <> None. +Proof. + unfold args_of_operation. + destruct op; destruct eq_operation; intros; simpl in *; try congruence. + all: try (destruct vl as [ | vh1 vl1]; try discriminate). + all: try (destruct vl1 as [ | vh2 vl2]; try discriminate). + all: try (destruct vl2 as [ | vh3 vl3]; try discriminate). + all: try (destruct vl3 as [ | vh4 vl4]; try discriminate). +Qed. End SOUNDNESS. (** * Manipulating and transforming operations *) @@ -1174,6 +1202,28 @@ Proof. rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto. Qed. +Lemma cond_valid_pointer_eq: + forall cond args m1 m2, + (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) -> + eval_condition cond args m1 = eval_condition cond args m2. +Proof. + intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence. + all: repeat (destruct args; simpl; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. +Qed. + +Lemma op_valid_pointer_eq: + forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2, + (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) -> + eval_operation ge sp op args m1 = eval_operation ge sp op args m2. +Proof. + intros until m2. intro MEM. destruct op eqn:OP; simpl; try congruence. + - f_equal; f_equal; auto using cond_valid_pointer_eq. + - destruct cond; simpl; try congruence; + repeat (destruct args; simpl; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. +Qed. + (** Global variables mentioned in an operation or addressing mode *) Definition globals_addressing (addr: addressing) : list ident := @@ -1374,12 +1424,12 @@ Proof. - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto. - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto. (* div, divu *) - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int.eq i0 Int.zero - || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. - TrivialExists. - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int.eq i0 Int.zero); inv H2. TrivialExists. + - inv H4; inv H2; trivial. cbn. + destruct (_ || _); cbn; + constructor. + - inv H4; inv H2; trivial. cbn. + destruct (Int.eq i0 Int.zero); cbn; + constructor. (* and*) - inv H4; inv H2; simpl; auto. - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto. @@ -1411,8 +1461,8 @@ Proof. (* shru *) - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto. (* shrx *) - - inv H4; simpl in H1; try discriminate. simpl. - destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists. + - inv H4; cbn; trivial. + destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial. (* shift-ext *) - inv H4; simpl; auto. - inv H4; simpl; auto. @@ -1447,12 +1497,10 @@ Proof. - inv H4; inv H2; simpl; auto. - inv H4; inv H2; simpl; auto. (* divl, divlu *) - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int64.eq i0 Int64.zero - || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2. - TrivialExists. - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists. + - inv H4; inv H2; cbn; trivial. + destruct (_ || _); cbn; trivial. + - inv H4; inv H2; cbn; trivial. + destruct (Int64.eq i0 Int64.zero); cbn; trivial. (* andl *) - inv H4; inv H2; simpl; auto. - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto. @@ -1484,8 +1532,8 @@ Proof. (* shrlu *) - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto. (* shrlx *) - - inv H4; simpl in H1; try discriminate. simpl. - destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists. + - inv H4; cbn; trivial. + destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial. (* shift-ext *) - inv H4; simpl; auto. - inv H4; simpl; auto. @@ -1516,37 +1564,29 @@ Proof. - inv H4; simpl; auto. - inv H4; simpl; auto. (* intoffloat, intuoffloat *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2. - exists (Vint i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2. - exists (Vint i); auto. + - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial. (* floatofint, floatofintu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* intofsingle, intuofsingle *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2. - exists (Vint i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2. - exists (Vint i); auto. + - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial. (* singleofint, singleofintu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* longoffloat, longuoffloat *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2. - exists (Vlong i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2. - exists (Vlong i); auto. + - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial. (* floatoflong, floatoflongu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* longofsingle, longuofsingle *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2. - exists (Vlong i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2. - exists (Vlong i); auto. + - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial. (* singleoflong, singleoflongu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* cmp, sel *) - subst v1. destruct (eval_condition cond vl1 m1) eqn:?. exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. @@ -1576,6 +1616,21 @@ Proof. - apply Val.offset_ptr_inject; auto. Qed. + +Lemma eval_addressing_inj_none: + forall addr sp1 vl1 sp2 vl2, + (forall id ofs, + In id (globals_addressing addr) -> + Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) -> + Val.inject f sp1 sp2 -> + Val.inject_list f vl1 vl2 -> + eval_addressing ge1 sp1 addr vl1 = None -> + eval_addressing ge2 sp2 addr vl2 = None. +Proof. + intros until vl2. intros Hglobal Hinjsp Hinjvl. + destruct addr; simpl in *; + inv Hinjvl; trivial; try discriminate; inv H0; trivial; try discriminate; inv H2; trivial; try discriminate. +Qed. End EVAL_COMPAT. (** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *) @@ -1682,6 +1737,18 @@ Proof. destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto. Qed. +Lemma eval_addressing_lessdef_none: + forall sp addr vl1 vl2, + Val.lessdef_list vl1 vl2 -> + eval_addressing genv sp addr vl1 = None -> + eval_addressing genv sp addr vl2 = None. +Proof. + intros. rewrite val_inject_list_lessdef in H. + eapply eval_addressing_inj_none with (sp1 := sp). + intros. rewrite <- val_inject_lessdef; auto. + rewrite <- val_inject_lessdef; auto. + eauto. auto. +Qed. End EVAL_LESSDEF. (** Compatibility of the evaluation functions with memory injections. *) @@ -1734,6 +1801,19 @@ Proof. econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. Qed. +Lemma eval_addressing_inject_none: + forall addr vl1 vl2, + Val.inject_list f vl1 vl2 -> + eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None -> + eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None. +Proof. + intros. + rewrite eval_shift_stack_addressing. + eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto. + intros. apply symbol_address_inject. + econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. +Qed. + Lemma eval_operation_inject: forall op vl1 vl2 v1 m1 m2, Val.inject_list f vl1 vl2 -> diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml new file mode 100644 index 00000000..5cdd002c --- /dev/null +++ b/aarch64/OpWeights.ml @@ -0,0 +1,353 @@ +open Op;; +open PrepassSchedulingOracleDeps;; + +module Cortex_A53= + struct + let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *) + let nr_non_pipelined_units = 1;; + + let latency_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ -> 1 + | Oshift _ -> 2 + | Oadd -> 1 + | Oaddshift _ -> 2 + | Oaddimm _ + | Oneg -> 1 + | Onegshift _ -> 2 + | Osub -> 1 + | Osubshift _ -> 2 + | Omul + | Omuladd + | Omulsub -> 4 + | Odiv + | Odivu -> 29 + | Oand -> 1 + | Oandshift _ -> 2 + | Oandimm _ -> 1 + | Oor -> 1 + | Oorshift _ -> 2 + | Oorimm _ -> 1 + | Oxor -> 1 + | Oxorshift _ -> 2 + | Oxorimm _ -> 1 + | Onot -> 1 + | Onotshift _ -> 2 + | Obic -> 1 + | Obicshift _ -> 2 + | Oorn -> 1 + | Oornshift _ -> 2 + | Oeqv -> 1 + | Oeqvshift _ -> 2 + | Oshl + | Oshr + | Oshru -> 2 + | Oshrximm _ -> 6 + | Ozext _ + | Osext _ -> 1 + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ -> 2 + + (* 64-bit integer arithmetic *) + | Oshiftl _ -> 2 + | Oextend _ -> 1 + | Omakelong + | Olowlong + | Ohighlong + | Oaddl -> 1 + | Oaddlshift _ + | Oaddlext _ -> 2 + | Oaddlimm _ + | Onegl -> 1 + | Oneglshift _ -> 2 + | Osubl -> 1 + | Osublshift _ + | Osublext _ -> 2 + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> 4 + | Odivl -> 50 + | Odivlu -> 50 + | Oandl -> 1 + | Oandlshift _ -> 2 + | Oandlimm _ + | Oorl -> 1 + | Oorlshift _ -> 2 + | Oorlimm _ + | Oxorl -> 1 + | Oxorlshift _ -> 2 + | Oxorlimm _ + | Onotl -> 1 + | Onotlshift _ -> 2 + | Obicl -> 1 + | Obiclshift _ -> 2 + | Oornl -> 1 + | Oornlshift _ -> 2 + | Oeqvl -> 1 + | Oeqvlshift _ -> 2 + | Oshll + | Oshrl + | Oshrlu -> 2 + | Oshrlximm _ -> 6 + | Ozextl _ + | Osextl _ -> 1 + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> 2 + + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) +(* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> 6 + | Odivf -> 50 (* r [rd = r1 / r2] *) + | Odivfs -> 20 + (* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6 + | _ -> 1);; + + let resources_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ + (* 32-bit integer arithmetic *) + | Oshift _ + | Oadd + | Oaddshift _ + | Oaddimm _ + | Oneg + | Onegshift _ + | Osub + | Osubshift _ -> [| 1 ; 1; 0; 0 |] + | Omul + | Omuladd + | Omulsub -> [| 1; 1; 1; 0 |] + | Odiv + | Odivu -> [| 1; 0; 0; 0 |] + | Oand + | Oandshift _ + | Oandimm _ + | Oor + | Oorshift _ + | Oorimm _ + | Oxor + | Oxorshift _ + | Oxorimm _ + | Onot + | Onotshift _ + | Obic + | Obicshift _ + | Oorn + | Oornshift _ + | Oeqv + | Oeqvshift _ + | Oshl + | Oshr + | Oshru + | Oshrximm _ + | Ozext _ + | Osext _ + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ + +(* 64-bit integer arithmetic *) + | Oshiftl _ + | Oextend _ + | Omakelong + | Olowlong + | Ohighlong + | Oaddl + | Oaddlshift _ + | Oaddlext _ + | Oaddlimm _ + | Onegl + | Oneglshift _ + | Osubl + | Osublshift _ + | Osublext _ -> [| 1 ; 1 ; 0; 0 |] + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> [| 1 ; 1 ; 1; 0 |] + | Odivl + | Odivlu -> [| 1; 0; 0; 0 |] + | Oandl + | Oandlshift _ + | Oandlimm _ + | Oorl + | Oorlshift _ + | Oorlimm _ + | Oxorl + | Oxorlshift _ + | Oxorlimm _ + | Onotl + | Onotlshift _ + | Obicl + | Obiclshift _ + | Oornl + | Oornlshift _ + | Oeqvl + | Oeqvlshift _ + | Oshll + | Oshrl + | Oshrlu + | Oshrlximm _ + | Ozextl _ + | Osextl _ + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> [| 1; 1; 0; 0 |] + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) + | Odivf + (* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Odivfs (* r [rd = r1 / r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> [| 1 ; 1; 1; 0 |] + +(* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let non_pipelined_resources_of_op (op : operation) (nargs : int) = + match op with + | Odiv | Odivu -> [| 29 |] + | Odivfs -> [| 20 |] + | Odivl | Odivlu | Odivf -> [| 50 |] + | _ -> [| -1 |];; + + let resources_of_cond (cmp : condition) (nargs : int) = + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; + let latency_of_call _ _ = 6;; + + let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_call _ _ = resource_bounds;; + let resources_of_builtin _ = resource_bounds;; + end;; + +let get_opweights () : opweights = + match !Clflags.option_mtune with + | "cortex-a53" | "cortex-a35" | "" -> + { + pipelined_resource_bounds = Cortex_A53.resource_bounds; + nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units; + latency_of_op = Cortex_A53.latency_of_op; + resources_of_op = Cortex_A53.resources_of_op; + non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op; + latency_of_load = Cortex_A53.latency_of_load; + resources_of_load = Cortex_A53.resources_of_load; + resources_of_store = Cortex_A53.resources_of_store; + resources_of_cond = Cortex_A53.resources_of_cond; + latency_of_call = Cortex_A53.latency_of_call; + resources_of_call = Cortex_A53.resources_of_call; + resources_of_builtin = Cortex_A53.resources_of_builtin + } + | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);; diff --git a/aarch64/OpWeightsAsm.ml b/aarch64/OpWeightsAsm.ml new file mode 100644 index 00000000..44c6f16b --- /dev/null +++ b/aarch64/OpWeightsAsm.ml @@ -0,0 +1,165 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* David Monniaux CNRS, VERIMAG *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open Asmblock + +(*type called_function = (Registers.reg, AST.ident) Datatypes.sum*) + +type instruction = PBasic of Asmblock.basic | PControl of Asmblock.control + +type opweights = { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : instruction -> int -> int; + resources_of_op : instruction -> int -> int array; + (*non_pipelined_resources_of_op : Op.operation -> int -> int array;*) + (*latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;*) + (*resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;*) + (*resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;*) + (*resources_of_cond : Op.condition -> int -> int array;*) + (*latency_of_call : AST.signature -> called_function -> int;*) + (*resources_of_call : AST.signature -> called_function -> int array;*) + (*resources_of_builtin : AST.external_function -> int array*) +} + +module Cortex_A53 = struct + let resource_bounds = [| 2; 2; 1; 1 |] + + (* instr ; ALU ; MAC; LSU *) + let nr_non_pipelined_units = 1 + + let latency_of_op (i : instruction) (nargs : int) = + match i with + | PBasic (PArith (PArithP (_, _))) -> 1 + | PBasic (PArith (PArithPP (i', _, _))) -> ( + match i' with + | Psbfiz (_, _, _) | Psbfx (_, _, _) | Pubfiz (_, _, _) | Pubfx (_, _, _) + -> + 2 + | Pfcvtds | Pfcvtsd + | Pfcvtzs (_, _) + | Pfcvtzu (_, _) + | Pfabs _ | Pfneg _ + | Pscvtf (_, _) + | Pucvtf (_, _) -> + 6 + | _ -> 1) + | PBasic (PArith (PArithPPP (i', _, _, _))) -> ( + match i' with + | Pasrv _ | Plslv _ | Plsrv _ | Prorv _ | Paddext _ | Psubext _ -> 2 + | Psmulh | Pumulh -> 4 + | Pfdiv _ | Psdiv _ | Pudiv _ -> 50 + | _ -> 6) + | PBasic (PArith (PArithRR0R (_, _, _, _))) -> 2 + | PBasic (PArith (PArithRR0 (_, _, _))) -> 1 + | PBasic (PArith (PArithARRRR0 (_, _, _, _, _))) -> 4 + | PBasic (PArith (PArithComparisonPP (_, _, _))) -> 6 + | PBasic (PArith (PArithComparisonR0R (_, _, _))) -> 1 + | PBasic (PArith (PArithComparisonP (i', _))) -> ( + match i' with Pfcmp0 _ -> 6 | _ -> 1) + | PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> 6 + | PBasic (PArith _) -> 1 + | PBasic (PLoad _) -> 3 + | PBasic (PStore _) -> 3 + | PBasic (Pallocframe (_, _)) -> 3 + | PBasic (Pfreeframe (_, _)) -> 1 + | PBasic (Ploadsymbol (_, _)) -> 1 + | PBasic (Pcvtsw2x (_, _)) -> 2 + | PBasic (Pcvtuw2x (_, _)) -> 2 + | PBasic (Pcvtx2w _) -> 1 + | PBasic (Pnop) -> 0 + | PControl _ -> 6 + + let resources_of_op (i : instruction) (nargs : int) = + match i with + | PBasic (PArith (PArithP (i', _))) -> ( + match i' with + | Pfmovimmd _ | Pfmovimms _ -> [| 1; 1; 0; 1 |] + | _ -> [| 1; 1; 0; 0 |]) + | PBasic (PArith (PArithPP (i', _, _))) -> ( + match i' with + | Pfcvtds | Pfcvtsd + | Pfcvtzs (_, _) + | Pfcvtzu (_, _) + | Pfabs _ | Pfneg _ + | Pscvtf (_, _) + | Pucvtf (_, _) -> + [| 1 ; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |]) + | PBasic (PArith (PArithPPP (i', _, _, _))) -> ( + match i' with + | Pasrv _ | Plslv _ | Plsrv _ | Prorv _ | Paddext _ | Psubext _ -> [| 1; 1; 0; 0 |] + | Pfdiv _ | Psdiv _ | Pudiv _ -> [| 1; 0; 0; 0 |] + | _ -> [| 1; 1; 1; 0 |]) + | PBasic (PArith (PArithRR0R (_, _, _, _))) -> [| 1; 1; 0; 0 |] + | PBasic (PArith (PArithRR0 (_, _, _))) -> [| 1; 1; 0; 0 |] + | PBasic (PArith (PArithARRRR0 (_, _, _, _, _))) -> [| 1; 1; 1; 0 |] + | PBasic (PArith (PArithComparisonPP (_, _, _))) -> [| 1; 1; 1; 0 |] + | PBasic (PArith (PArithComparisonR0R (_, _, _))) -> [| 1; 1; 0; 0 |] + | PBasic (PArith (PArithComparisonP (i', _))) -> ( + match i' with Pfcmp0 _ -> [| 1; 1; 1; 0 |] | _ -> [| 1; 1; 0; 0 |]) + | PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> [| 1; 1; 1; 0 |] + | PBasic (PArith _) -> [| 1; 1; 0; 0 |] + | PBasic (PLoad _) -> [| 1; 0; 0; 1 |] + | PBasic (PStore _) -> [| 1; 0; 0; 1 |] + | PBasic (Pallocframe (_, _)) -> [| 1; 0; 0; 1 |] + | PBasic (Pfreeframe (_, _)) -> [| 1; 1; 0; 0 |] + | PBasic (Ploadsymbol (_, _)) -> [| 1; 1; 0; 0 |] + | PBasic (Pcvtsw2x (_, _)) -> [| 1; 1; 0; 0 |] + | PBasic (Pcvtuw2x (_, _)) -> [| 1; 1; 0; 0 |] + | PBasic (Pcvtx2w _) -> [| 1; 1; 0; 0 |] + | PBasic (Pnop) -> [| 0; 0; 0; 0 |] + | PControl _ -> resource_bounds + + (*let non_pipelined_resources_of_op (op : operation) (nargs : int) = + match op with + | Odiv | Odivu -> [| 29 |] + | Odivfs -> [| 20 |] + | Odivl | Odivlu | Odivf -> [| 50 |] + | _ -> [| -1 |];; + + let resources_of_cond (cmp : condition) (nargs : int) = + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] )*) + +end + +let get_opweights () : opweights = + (*match !Clflags.option_mtune with*) + (*| "cortex-a53" | "cortex-a35" | "" ->*) + { + pipelined_resource_bounds = Cortex_A53.resource_bounds; + nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units; + latency_of_op = Cortex_A53.latency_of_op; + resources_of_op = + Cortex_A53.resources_of_op + (*non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;*) + (*latency_of_load = Cortex_A53.latency_of_load;*) + (*resources_of_load = Cortex_A53.resources_of_load;*) + (*resources_of_store = Cortex_A53.resources_of_store;*) + (*resources_of_cond = Cortex_A53.resources_of_cond;*) + (*latency_of_call = Cortex_A53.latency_of_call;*) + (*resources_of_call = Cortex_A53.resources_of_call;*) + (*resources_of_builtin = Cortex_A53.resources_of_builtin*); + } + +(*| xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;*) diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml new file mode 100644 index 00000000..1d890f2c --- /dev/null +++ b/aarch64/PeepholeOracle.ml @@ -0,0 +1,624 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open Camlcoq +open Asmblock +open Asm +open Int64 +open Printf + +(* If true, the oracle will print a msg for each applied peephole *) +let debug = false + +(* Functions to verify the immediate offset range for ldp/stp *) +let is_valid_immofs_32 z = + if z <= 252 && z >= -256 && z mod 4 = 0 then true else false + +let is_valid_immofs_64 z = + if z <= 504 && z >= -512 && z mod 8 = 0 then true else false + +(* Functions to check if a ldp/stp replacement is valid according to args *) +let is_valid_ldr32 rd1 rd2 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if + (not (dreg_eq rd1 rd2)) + && iregsp_eq b1 b2 + && (not (dreg_eq rd1 (IR b2))) + && (z2 = z1 + 4 || z2 = z1 - 4) + && is_valid_immofs_32 z1 + then true + else false + +let is_valid_ldr64 rd1 rd2 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if + (not (dreg_eq rd1 rd2)) + && iregsp_eq b1 b2 + && (not (dreg_eq rd1 (IR b2))) + && (z2 = z1 + 8 || z2 = z1 - 8) + && is_valid_immofs_64 z1 + then true + else false + +let is_valid_str32 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if iregsp_eq b1 b2 && z2 = z1 + 4 && is_valid_immofs_32 z1 then true + else false + +let is_valid_str64 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if iregsp_eq b1 b2 && z2 = z1 + 8 && is_valid_immofs_64 z1 then true + else false + +let dreg_of_ireg r = IR (RR1 r) + +let dreg_of_freg r = FR r + +(* Return true if an intermediate + * affectation eliminates the potential + * candidate *) +let verify_load_affect reg rd b rev = + let b = IR b in + if not rev then dreg_eq reg b else dreg_eq reg b || dreg_eq reg rd + +(* Return true if an intermediate + * read eliminates the potential + * candidate *) +let verify_load_read reg rd b rev = dreg_eq reg rd + +(* Return true if an intermediate + * affectation eliminates the potential + * candidate *) +let verify_store_affect reg rs b rev = + let b = IR b in + dreg_eq reg b || dreg_eq reg rs + +type ph_type = P32 | P32f | P64 | P64f + +type inst_type = Ldr of ph_type | Str of ph_type + +let ph_ty_to_string = function + | Ldr P32 -> "ldr32" + | Ldr P32f -> "ldr32f" + | Ldr P64 -> "ldr64" + | Ldr P64f -> "ldr64f" + | Str P32 -> "str32" + | Str P32f -> "str32f" + | Str P64 -> "str64" + | Str P64f -> "str64f" + +let print_ph_ty chan v = output_string chan (ph_ty_to_string v) + +let symb_mem = Hashtbl.create 9 + +(* Affect a symbolic memory list of potential replacements + * for a given write in reg *) +let rec affect_symb_mem reg insta pot_rep stype rev = + match pot_rep with + | [] -> [] + | h0 :: t0 -> ( + match (insta.(h0), stype) with + | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ -> + if verify_load_affect reg rd b rev then + affect_symb_mem reg insta t0 stype rev + else h0 :: affect_symb_mem reg insta t0 stype rev + | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ -> + if verify_store_affect reg rs b rev then + affect_symb_mem reg insta t0 stype rev + else h0 :: affect_symb_mem reg insta t0 stype rev + | _, _ -> + failwith "affect_symb_mem: Found an inconsistent inst in pot_rep") + +(* Affect a symbolic memory list of potential replacements + * for a given read in reg *) +let rec read_symb_mem reg insta pot_rep stype rev = + match pot_rep with + | [] -> [] + | h0 :: t0 -> ( + match (insta.(h0), stype) with + | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ -> + if verify_load_read reg rd b rev then + read_symb_mem reg insta t0 stype rev + else h0 :: read_symb_mem reg insta t0 stype rev + | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ -> + h0 :: read_symb_mem reg insta t0 stype rev + | _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep") + +(* Update a symbolic memory list of potential replacements + * for any addressing *) +let update_pot_rep_addressing a insta pot_rep stype rev = + match a with + | ADimm (base, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev + | ADreg (base, r) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev + | ADlsl (base, r, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev + | ADsxt (base, r, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev + | ADuxt (base, r, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev + | ADadr (base, _, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev + | ADpostincr (base, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev + +(* Update a symbolic memory list of potential replacements + * for any basic instruction *) +let update_pot_rep_basic inst insta stype rev = + let pot_rep = Hashtbl.find symb_mem stype in + (match inst with + | PArith i -> ( + match i with + | PArithP (_, rd) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev + | PArithPP (_, rd, rs) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev; + pot_rep := read_symb_mem rs insta !pot_rep stype rev + | PArithPPP (_, rd, rs1, rs2) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev; + pot_rep := read_symb_mem rs1 insta !pot_rep stype rev; + pot_rep := read_symb_mem rs2 insta !pot_rep stype rev + | PArithRR0R (_, rd, rs1, rs2) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + (match rs1 with + | RR0 rs1 -> + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + | _ -> ()); + pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev + | PArithRR0 (_, rd, rs) -> ( + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + match rs with + | RR0 rs1 -> + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + | _ -> ()) + | PArithARRRR0 (_, rd, rs1, rs2, rs3) -> ( + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev; + match rs3 with + | RR0 rs1 -> + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + | _ -> ()) + | PArithComparisonPP (_, rs1, rs2) -> + pot_rep := read_symb_mem rs1 insta !pot_rep stype rev; + pot_rep := read_symb_mem rs2 insta !pot_rep stype rev + | PArithComparisonR0R (_, rs1, rs2) -> + (match rs1 with + | RR0 rs1 -> + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + | _ -> ()); + pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev + | PArithComparisonP (_, rs1) -> + pot_rep := read_symb_mem rs1 insta !pot_rep stype rev + | Pcset (rd, _) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev + | Pfmovi (_, rd, rs) -> ( + pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev; + match rs with + | RR0 rs -> + pot_rep := + read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev + | _ -> ()) + | Pcsel (rd, rs1, rs2, _) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev; + pot_rep := read_symb_mem rs1 insta !pot_rep stype rev; + pot_rep := read_symb_mem rs2 insta !pot_rep stype rev + | Pfnmul (_, rd, rs1, rs2) -> + pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_freg rs1) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_freg rs2) insta !pot_rep stype rev) + | PLoad i -> ( + (* Here, we consider a different behavior for load and store potential candidates: + * a load does not obviously cancel the ldp candidates, but it does for any stp candidate. *) + match stype with + | Ldr _ -> ( + match i with + | PLd_rd_a (_, rd, a) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev; + update_pot_rep_addressing a insta pot_rep stype rev + | Pldp (_, rd1, rd2, _, _, a) -> + pot_rep := affect_symb_mem rd1 insta !pot_rep stype rev; + pot_rep := affect_symb_mem rd2 insta !pot_rep stype rev; + update_pot_rep_addressing a insta pot_rep stype rev) + | _ -> pot_rep := []) + | PStore _ -> ( + (* Here, we consider that a store cancel all ldp candidates, but it is far more complicated for stp ones : + * if we cancel stp candidates here, we would prevent ourselves to apply the non-consec store peephole. + * To solve this issue, the store candidates cleaning is managed directly in the peephole function below. *) + match stype with Ldr _ -> pot_rep := [] | _ -> ()) + | Pallocframe (_, _) -> pot_rep := [] + | Pfreeframe (_, _) -> pot_rep := [] + | Ploadsymbol (rd, _) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev + | Pcvtsw2x (rd, rs) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev + | Pcvtuw2x (rd, rs) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev + | Pcvtx2w rd -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev + | Pnop -> ()); + Hashtbl.replace symb_mem stype pot_rep + +(* This is useful to manage the case were the immofs + * of the first ldr/str is greater than the second one *) +let min_is_rev n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if z1 < z2 then true else false + +(* Below functions were added to merge pattern matching cases in peephole, + * thanks to this, we can make the chunk difference (int/any) compatible. *) +let trans_ldi (ldi : load_rd_a) : load_rd1_rd2_a = + match ldi with + | Pldrw | Pldrw_a -> Pldpw + | Pldrx | Pldrx_a -> Pldpx + | Pldrs -> Pldps + | Pldrd | Pldrd_a -> Pldpd + | _ -> failwith "trans_ldi: Found a non compatible load to translate" + +let trans_sti (sti : store_rs_a) : store_rs1_rs2_a = + match sti with + | Pstrw | Pstrw_a -> Pstpw + | Pstrx | Pstrx_a -> Pstpx + | Pstrs -> Pstps + | Pstrd | Pstrd_a -> Pstpd + | _ -> failwith "trans_sti: Found a non compatible store to translate" + +let is_compat_load (ldi : load_rd_a) = + match ldi with + | Pldrw | Pldrw_a | Pldrx | Pldrx_a | Pldrs | Pldrd | Pldrd_a -> true + | _ -> false + +let are_compat_load (ldi1 : load_rd_a) (ldi2 : load_rd_a) = + match ldi1 with + | Pldrw | Pldrw_a -> ( match ldi2 with Pldrw | Pldrw_a -> true | _ -> false) + | Pldrx | Pldrx_a -> ( match ldi2 with Pldrx | Pldrx_a -> true | _ -> false) + | Pldrs -> ( match ldi2 with Pldrs -> true | _ -> false) + | Pldrd | Pldrd_a -> ( match ldi2 with Pldrd | Pldrd_a -> true | _ -> false) + | _ -> false + +let is_compat_store (sti : store_rs_a) = + match sti with + | Pstrw | Pstrw_a | Pstrx | Pstrx_a | Pstrs | Pstrd | Pstrd_a -> true + | _ -> false + +let are_compat_store (sti1 : store_rs_a) (sti2 : store_rs_a) = + match sti1 with + | Pstrw | Pstrw_a -> ( match sti2 with Pstrw | Pstrw_a -> true | _ -> false) + | Pstrx | Pstrx_a -> ( match sti2 with Pstrx | Pstrx_a -> true | _ -> false) + | Pstrs -> ( match sti2 with Pstrs -> true | _ -> false) + | Pstrd | Pstrd_a -> ( match sti2 with Pstrd | Pstrd_a -> true | _ -> false) + | _ -> false + +let get_load_pht (ldi : load_rd_a) = + match ldi with + | Pldrw | Pldrw_a -> Ldr P32 + | Pldrs -> Ldr P32f + | Pldrx | Pldrx_a -> Ldr P64 + | Pldrd | Pldrd_a -> Ldr P64f + | _ -> failwith "get_load_string: Found a non compatible load to translate" + +let get_store_pht (sti : store_rs_a) = + match sti with + | Pstrw | Pstrw_a -> Str P32 + | Pstrs -> Str P32f + | Pstrx | Pstrx_a -> Str P64 + | Pstrd | Pstrd_a -> Str P64f + | _ -> failwith "get_store_string: Found a non compatible store to translate" + +let is_valid_ldr rd1 rd2 b1 b2 n1 n2 stype = + match stype with + | Ldr P32 | Ldr P32f -> is_valid_ldr32 rd1 rd2 b1 b2 n1 n2 + | _ -> is_valid_ldr64 rd1 rd2 b1 b2 n1 n2 + +let is_valid_str b1 b2 n1 n2 stype = + match stype with + | Str P32 | Str P32f -> is_valid_str32 b1 b2 n1 n2 + | _ -> is_valid_str64 b1 b2 n1 n2 + +(* Try to find the index of the first previous compatible + * replacement in a given symbolic memory *) +let rec search_compat_rep r2 b2 n2 insta pot_rep stype = + match pot_rep with + | [] -> None + | h0 :: t0 -> ( + match insta.(h0) with + | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) -> + if is_valid_ldr rd1 r2 b1 b2 n1 n2 stype then + Some (h0, chunk_load ld1, rd1, b1, n1) + else search_compat_rep r2 b2 n2 insta t0 stype + | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) -> + if is_valid_str b1 b2 n1 n2 stype then + Some (h0, chunk_store st1, rs1, b1, n1) + else search_compat_rep r2 b2 n2 insta t0 stype + | _ -> failwith "search_compat_rep: Found an inconsistent inst in pot_rep" + ) + +(* Try to find the index of the first previous compatible + * replacement in a given symbolic memory (when iterating in the reversed list) *) +let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype = + match pot_rep with + | [] -> None + | h0 :: t0 -> ( + match insta.(h0) with + | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) -> + if is_valid_ldr r2 rd1 b2 b1 n2 n1 stype then + Some (h0, chunk_load ld1, rd1, b1, n1) + else search_compat_rep_inv r2 b2 n2 insta t0 stype + | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) -> + if is_valid_str b2 b1 n2 n1 stype then + Some (h0, chunk_store st1, rs1, b1, n1) + else search_compat_rep_inv r2 b2 n2 insta t0 stype + | _ -> + failwith + "search_compat_rep_ldst_inv: Found an inconsistent inst in pot_rep") + +let init_symb_mem () = + Hashtbl.clear symb_mem; + Hashtbl.add symb_mem (Ldr P32) (ref []); + Hashtbl.add symb_mem (Ldr P64) (ref []); + Hashtbl.add symb_mem (Ldr P32f) (ref []); + Hashtbl.add symb_mem (Ldr P64f) (ref []); + Hashtbl.add symb_mem (Str P32) (ref []); + Hashtbl.add symb_mem (Str P64) (ref []); + Hashtbl.add symb_mem (Str P32f) (ref []); + Hashtbl.add symb_mem (Str P64f) (ref []) + +let reset_str_symb_mem () = + Hashtbl.replace symb_mem (Str P32) (ref []); + Hashtbl.replace symb_mem (Str P64) (ref []); + Hashtbl.replace symb_mem (Str P32f) (ref []); + Hashtbl.replace symb_mem (Str P64f) (ref []) + +(* Main peephole function in backward style *) +let pair_rep_inv insta = + (* Each list below is a symbolic mem representation + * for one type of inst. Lists contains integers which + * are the indices of insts in the main array "insta". *) + init_symb_mem (); + for i = Array.length insta - 1 downto 0 do + let h0 = insta.(i) in + (* Here we need to update every symbolic memory according to the matched inst *) + update_pot_rep_basic h0 insta (Ldr P32) true; + update_pot_rep_basic h0 insta (Ldr P64) true; + update_pot_rep_basic h0 insta (Ldr P32f) true; + update_pot_rep_basic h0 insta (Ldr P64f) true; + update_pot_rep_basic h0 insta (Str P32) true; + update_pot_rep_basic h0 insta (Str P64) true; + update_pot_rep_basic h0 insta (Str P32f) true; + update_pot_rep_basic h0 insta (Str P64f) true; + match h0 with + (* Non-consecutive ldr *) + | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))) -> + if is_compat_load ldi then ( + (* Search a previous compatible load *) + let ld_t = get_load_pht ldi in + let pot_rep = Hashtbl.find symb_mem ld_t in + (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep ld_t with + (* If we can't find a candidate, add the current load as a potential future one *) + | None -> pot_rep := i :: !pot_rep + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if min_is_rev n n1 then ( + if debug then + eprintf "LDP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n)))) + else ( + if debug then + eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))); + Hashtbl.replace symb_mem ld_t pot_rep) + (* Non-consecutive str *) + | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))) -> + if is_compat_store sti then ( + (* Search a previous compatible store *) + let st_t = get_store_pht sti in + let pot_rep = Hashtbl.find symb_mem st_t in + (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep st_t with + (* If we can't find a candidate, clean and add the current store as a potential future one *) + | None -> + reset_str_symb_mem (); + pot_rep := [ i ] + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if debug then + eprintf "STP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t; + insta.(i) <- + PStore + (Pstp + (trans_sti sti, rd1, r, chunk_store sti, c, ADimm (b, n1)))); + Hashtbl.replace symb_mem st_t pot_rep + (* Any other inst *)) + else reset_str_symb_mem () + | i -> ( + (* Clear list of candidates if there is a non supported store *) + match i with PStore _ -> reset_str_symb_mem () | _ -> ()) + done + +(* Main peephole function in forward style *) +let pair_rep insta = + (* Each list below is a symbolic mem representation + * for one type of inst. Lists contains integers which + * are the indices of insts in the main array "insta". *) + init_symb_mem (); + for i = 0 to Array.length insta - 2 do + let h0 = insta.(i) in + let h1 = insta.(i + 1) in + (* Here we need to update every symbolic memory according to the matched inst *) + update_pot_rep_basic h0 insta (Ldr P32) false; + update_pot_rep_basic h0 insta (Ldr P64) false; + update_pot_rep_basic h0 insta (Ldr P32f) false; + update_pot_rep_basic h0 insta (Ldr P64f) false; + update_pot_rep_basic h0 insta (Str P32) false; + update_pot_rep_basic h0 insta (Str P64) false; + update_pot_rep_basic h0 insta (Str P32f) false; + update_pot_rep_basic h0 insta (Str P64f) false; + match (h0, h1) with + (* Consecutive ldr *) + | ( PLoad (PLd_rd_a (ldi1, rd1, ADimm (b1, n1))), + PLoad (PLd_rd_a (ldi2, rd2, ADimm (b2, n2))) ) -> + if are_compat_load ldi1 ldi2 then + let ld_t = get_load_pht ldi1 in + if is_valid_ldr rd1 rd2 b1 b2 n1 n2 ld_t then ( + if min_is_rev n1 n2 then ( + if debug then + eprintf "LDP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + ( trans_ldi ldi1, + rd1, + rd2, + chunk_load ldi1, + chunk_load ldi2, + ADimm (b1, n1) ))) + else ( + if debug then + eprintf "LDP_CONSEC_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + ( trans_ldi ldi1, + rd2, + rd1, + chunk_load ldi2, + chunk_load ldi1, + ADimm (b1, n2) ))); + insta.(i + 1) <- Pnop) + (* Non-consecutive ldr *) + | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ -> + if is_compat_load ldi then ( + (* Search a previous compatible load *) + let ld_t = get_load_pht ldi in + let pot_rep = Hashtbl.find symb_mem ld_t in + (match search_compat_rep rd1 b1 n1 insta !pot_rep ld_t with + (* If we can't find a candidate, add the current load as a potential future one *) + | None -> pot_rep := i :: !pot_rep + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if min_is_rev n n1 then ( + if debug then + eprintf "LDP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n)))) + else ( + if debug then + eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))); + Hashtbl.replace symb_mem ld_t pot_rep) + (* Consecutive str *) + | ( PStore (PSt_rs_a (sti1, rd1, ADimm (b1, n1))), + PStore (PSt_rs_a (sti2, rd2, ADimm (b2, n2))) ) -> + (* Regardless of whether we can perform the peephole or not, + * we have to clean the potential candidates for stp now as we are + * looking at two new store instructions. *) + reset_str_symb_mem (); + if are_compat_store sti1 sti2 then + let st_t = get_store_pht sti1 in + if is_valid_str b1 b2 n1 n2 st_t then ( + if debug then + eprintf "STP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty st_t; + insta.(i) <- + PStore + (Pstp + ( trans_sti sti1, + rd1, + rd2, + chunk_store sti1, + chunk_store sti2, + ADimm (b1, n1) )); + insta.(i + 1) <- Pnop) + (* Non-consecutive str *) + | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ -> + if is_compat_store sti then ( + (* Search a previous compatible store *) + let st_t = get_store_pht sti in + let pot_rep = Hashtbl.find symb_mem st_t in + (match search_compat_rep rd1 b1 n1 insta !pot_rep st_t with + (* If we can't find a candidate, clean and add the current store as a potential future one *) + | None -> + reset_str_symb_mem (); + pot_rep := [ i ] + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if debug then + eprintf "STP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t; + insta.(i) <- + PStore + (Pstp (trans_sti sti, r, rd1, c, chunk_store sti, ADimm (b, n)))); + Hashtbl.replace symb_mem st_t pot_rep) + else reset_str_symb_mem () + (* Any other inst *) + | i, _ -> ( + (* Clear list of candidates if there is a non supported store *) + match i with PStore _ -> reset_str_symb_mem () | _ -> ()) + done + +(* Calling peephole if flag is set *) +let optimize_bdy (bdy : basic list) : basic list = + if !Clflags.option_fcoalesce_mem then ( + let insta = Array.of_list bdy in + pair_rep insta; + pair_rep_inv insta; + Array.to_list insta) + else bdy + +(* Called peephole function from Coq *) +let peephole_opt bdy = + Timing.time_coq + [ + 'P'; 'e'; 'e'; 'p'; 'h'; 'o'; 'l'; 'e'; ' '; 'o'; 'r'; 'a'; 'c'; 'l'; 'e'; + ] + optimize_bdy bdy diff --git a/aarch64/PostpassScheduling.v b/aarch64/PostpassScheduling.v new file mode 100644 index 00000000..f826632b --- /dev/null +++ b/aarch64/PostpassScheduling.v @@ -0,0 +1,146 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(** Implementation (and basic properties) of the verified postpass scheduler *) + +Require Import Coqlib Errors AST Integers. +Require Import Asmblock Axioms Memory Globalenvs. +Require Import Asmblockdeps Asmblockprops. +Require Import IterList. + +Local Open Scope error_monad_scope. + +(** * Oracle taking as input a basic block, + returns a scheduled basic block *) +Axiom schedule: bblock -> (list basic) * option control. + +Axiom peephole_opt: (list basic) -> list basic. + +Extract Constant schedule => "PostpassSchedulingOracle.schedule". + +Extract Constant peephole_opt => "PeepholeOracle.peephole_opt". + +Section verify_schedule. + +Variable lk: aarch64_linker. + +Definition verify_schedule (bb bb' : bblock) : res unit := + match bblock_simub bb bb' with + | true => OK tt + | false => Error (msg "PostpassScheduling.verify_schedule") + end. + +Definition verify_size bb bb' := if (Z.eqb (size bb) (size bb')) then OK tt else Error (msg "PostpassScheduling:verify_size: wrong size"). + +Lemma verify_size_size: + forall bb bb', verify_size bb bb' = OK tt -> size bb = size bb'. +Proof. + intros. unfold verify_size in H. destruct (size bb =? size bb') eqn:SIZE; try discriminate. + apply Z.eqb_eq. assumption. +Qed. + +Program Definition make_bblock_from_basics lb := + match lb with + | nil => Error (msg "PostpassScheduling.make_bblock_from_basics") + | b :: lb => OK {| header := nil; body := b::lb; exit := None |} + end. + +Program Definition schedule_to_bblock (lb: list basic) (oc: option control) : res bblock := + match oc with + | None => make_bblock_from_basics lb + | Some c => OK {| header := nil; body := lb; exit := Some c |} + end. +Next Obligation. + unfold Is_true, non_empty_bblockb. + unfold non_empty_exit. rewrite orb_true_r. reflexivity. +Qed. + +Definition do_schedule (bb: bblock) : res bblock := + if (Z.eqb (size bb) 1) then OK (bb) + else match (schedule bb) with (lb, oc) => schedule_to_bblock lb oc end. + +(*Definition do_peephole (bb: bblock) : bblock :=*) + (*let res := Peephole.optimize_bblock bb in*) + (*if (size res =? size bb) then res else bb.*) + +Program Definition do_peephole (bb : bblock) := + let optimized := peephole_opt (body bb) in + let wf_ok := non_empty_bblockb optimized (exit bb) in + {| header := header bb; + body := if wf_ok then optimized else (body bb); + exit := exit bb |}. +Next Obligation. + destruct (non_empty_bblockb (peephole_opt (body bb))) eqn:Rwf. + - rewrite Rwf. cbn. trivial. + - exact (correct bb). +Qed. + +Definition verified_schedule (bb : bblock) : res bblock := + let nhbb := no_header bb in + let phbb := do_peephole nhbb in + do schbb <- do_schedule phbb; + let bb' := stick_header (header bb) schbb in + do sizecheck <- verify_size bb bb'; + do schedcheck <- verify_schedule bb bb'; + OK (bb'). + +Lemma verified_schedule_size: + forall bb bb', verified_schedule bb = OK bb' -> size bb = size bb'. +Proof. + intros. unfold verified_schedule in H. + monadInv H. + unfold verify_size in EQ1. + destruct (size _ =? size _) eqn:ESIZE_H in EQ1; try discriminate. + rewrite Z.eqb_eq in ESIZE_H; rewrite ESIZE_H; reflexivity. +Qed. + +Theorem verified_schedule_correct: + forall ge f bb bb', + verified_schedule bb = OK bb' -> + bblock_simu lk ge f bb bb'. +Proof. + intros. + monadInv H. + eapply bblock_simub_correct. + unfold verify_schedule in EQ0. + destruct (bblock_simub _ _) in *; try discriminate; auto. +Qed. + +End verify_schedule. + +Fixpoint transf_blocks (lbb : list bblock) : res (list bblock) := + match lbb with + | nil => OK nil + | bb :: lbb => + do tlbb <- transf_blocks lbb; + do tbb <- verified_schedule bb; + OK (tbb :: tlbb) + end. + +Definition transl_function (f: function) : res function := + do lb <- transf_blocks (fn_blocks f); + OK (mkfunction (fn_sig f) lb). + +Definition transf_function (f: function) : res function := + do tf <- transl_function f; + if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks)) + then Error (msg "code size exceeded") + else OK tf. + +Definition transf_fundef (f: fundef) : res fundef := + transf_partial_fundef transf_function f. + +Definition transf_program (p: program) : res program := + transform_partial_program transf_fundef p. diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml new file mode 100644 index 00000000..6f784238 --- /dev/null +++ b/aarch64/PostpassSchedulingOracle.ml @@ -0,0 +1,674 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open Asmblock +open OpWeightsAsm +open InstructionScheduler + +let debug = false + +let stats = false + +(** + * Extracting infos from Asm instructions + *) + +type location = Reg of Asm.preg | Mem | IREG0_XZR + +type ab_inst_rec = { + inst : instruction; + write_locs : location list; + read_locs : location list; + is_control : bool; +} + +(** Asm constructor to real instructions *) + +exception OpaqueInstruction + +let is_XZR = function IREG0_XZR -> true | _ -> false + +let reg_of_pc = Reg Asm.PC + +let reg_of_dreg r = Reg (Asm.DR r) + +let reg_of_ireg r = Reg (Asm.DR (Asm.IR (Asm.RR1 r))) + +let reg_of_iregsp r = Reg (Asm.DR (Asm.IR r)) + +let reg_of_ireg0 r = + match r with Asm.RR0 ir -> reg_of_ireg ir | Asm.XZR -> IREG0_XZR + +let reg_of_freg r = Reg (Asm.DR (Asm.FR r)) + +let reg_of_cr r = Reg (Asm.CR r) + +let regXSP = Reg (Asm.DR (Asm.IR Asm.XSP)) + +let flags_wlocs = + [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CC; reg_of_cr Asm.CV ] + +let get_arith_p_wlocs = function + | Pfmovimms _ -> [ reg_of_ireg Asm.X16 ] + | Pfmovimmd _ -> [ reg_of_ireg Asm.X16 ] + | _ -> [] + +let arith_p_rec i i' rd = + { + inst = i; + write_locs = [ rd ] @ get_arith_p_wlocs i'; + read_locs = []; + is_control = false; + } + +let arith_pp_rec i rd rs = + { inst = i; write_locs = [ rd ]; read_locs = [ rs ]; is_control = false } + +let arith_ppp_rec i rd r1 r2 = + { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false } + +let arith_rr0r_rec i rd r1 r2 = + let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } + +let arith_rr0_rec i rd r1 = + let rlocs = if is_XZR r1 then [] else [ r1 ] in + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } + +let arith_arrrr0_rec i rd r1 r2 r3 = + let rlocs = if is_XZR r3 then [ r1; r2 ] else [ r1; r2; r3 ] in + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } + +let arith_comparison_pp_rec i r1 r2 = + { + inst = i; + write_locs = flags_wlocs; + read_locs = [ r1; r2 ]; + is_control = false; + } + +let arith_comparison_r0r_rec i r1 r2 = + let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in + { inst = i; write_locs = flags_wlocs; read_locs = rlocs; is_control = false } + +let arith_comparison_p_rec i r1 = + { inst = i; write_locs = flags_wlocs; read_locs = [ r1 ]; is_control = false } + +let get_eval_addressing_rlocs a = + match a with + | Asm.ADimm (base, _) -> [ reg_of_iregsp base ] + | Asm.ADreg (base, r) -> [ reg_of_iregsp base; reg_of_ireg r ] + | Asm.ADlsl (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ] + | Asm.ADsxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ] + | Asm.ADuxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ] + | Asm.ADadr (base, _, _) -> [ reg_of_iregsp base ] + | Asm.ADpostincr (base, _) -> [ reg_of_iregsp base ] + +let load_rd_a_rec ld rd a = + { + inst = ld; + write_locs = [ rd ]; + read_locs = [ Mem ] @ get_eval_addressing_rlocs a; + is_control = false; + } + +let load_rd1_rd2_a_rec ld rd1 rd2 a = + { + inst = ld; + write_locs = [ rd1; rd2 ]; + read_locs = [ Mem ] @ get_eval_addressing_rlocs a; + is_control = false; + } + +let load_rec ldi = + match ldi with + | PLd_rd_a (ld, rd, a) -> + load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a + | Pldp (ld, rd1, rd2, _, _, a) -> + load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd1) + (reg_of_dreg rd2) a + +let store_rs_a_rec st rs a = + { + inst = st; + write_locs = [ Mem ]; + read_locs = [ rs; Mem ] @ get_eval_addressing_rlocs a; + is_control = false; + } + +let store_rs1_rs2_a_rec st rs1 rs2 a = + { + inst = st; + write_locs = [ Mem ]; + read_locs = [ rs1; rs2; Mem ] @ get_eval_addressing_rlocs a; + is_control = false; + } + +let store_rec sti = + match sti with + | PSt_rs_a (st, rs, a) -> + store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a + | Pstp (st, rs1, rs2, _, _, a) -> + store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_dreg rs1) + (reg_of_dreg rs2) a + +let loadsymbol_rec i rd id = + { inst = i; write_locs = [ rd ]; read_locs = [ Mem ]; is_control = false } + +let cvtsw2x_rec i rd r1 = + { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false } + +let cvtuw2x_rec i rd r1 = + { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false } + +let cvtx2w_rec i rd = + { inst = i; write_locs = [ rd ]; read_locs = [ rd ]; is_control = false } + +let get_testcond_rlocs c = + match c with + | Asm.TCeq -> [ reg_of_cr Asm.CZ ] + | Asm.TCne -> [ reg_of_cr Asm.CZ ] + | Asm.TChs -> [ reg_of_cr Asm.CC ] + | Asm.TClo -> [ reg_of_cr Asm.CC ] + | Asm.TCmi -> [ reg_of_cr Asm.CN ] + | Asm.TCpl -> [ reg_of_cr Asm.CN ] + | Asm.TChi -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ] + | Asm.TCls -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ] + | Asm.TCge -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ] + | Asm.TClt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ] + | Asm.TCgt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ] + | Asm.TCle -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ] + +let cset_rec i rd c = + { + inst = i; + write_locs = [ rd ]; + read_locs = get_testcond_rlocs c; + is_control = false; + } + +let csel_rec i rd r1 r2 c = + { + inst = i; + write_locs = [ rd ]; + read_locs = [ r1; r2 ] @ get_testcond_rlocs c; + is_control = false; + } + +let fmovi_rec i fsz rd r1 = + let rlocs = if is_XZR r1 then [] else [ r1 ] in + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } + +let fnmul_rec i fsz rd r1 r2 = + { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false } + +let allocframe_rec i sz linkofs = + { + inst = i; + write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16; reg_of_ireg Asm.X29 ]; + read_locs = [ regXSP; Mem ]; + is_control = false; + } + +let freeframe_rec i sz linkofs = + { + inst = i; + write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16 ]; + read_locs = [ regXSP; Mem ]; + is_control = false; + } + +let nop_rec i = + { inst = i; write_locs = []; read_locs = []; is_control = false } + +let arith_rec i = + match i with + | PArithP (i', rd) -> arith_p_rec (PBasic (PArith i)) i' (reg_of_dreg rd) + | PArithPP (i', rd, rs) -> + arith_pp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg rs) + | PArithPPP (i', rd, r1, r2) -> + arith_ppp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) + (reg_of_dreg r2) + | PArithRR0R (i', rd, r1, r2) -> + arith_rr0r_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) + (reg_of_ireg r2) + | PArithRR0 (i', rd, r1) -> + arith_rr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) + | PArithARRRR0 (i', rd, r1, r2, r3) -> + arith_arrrr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg r1) + (reg_of_ireg r2) (reg_of_ireg0 r3) + | PArithComparisonPP (i', r1, r2) -> + arith_comparison_pp_rec (PBasic (PArith i)) (reg_of_dreg r1) + (reg_of_dreg r2) + | PArithComparisonR0R (i', r1, r2) -> + arith_comparison_r0r_rec (PBasic (PArith i)) (reg_of_ireg0 r1) + (reg_of_ireg r2) + | PArithComparisonP (i', r1) -> + arith_comparison_p_rec (PBasic (PArith i)) (reg_of_dreg r1) + | Pcset (rd, c) -> cset_rec (PBasic (PArith i)) (reg_of_ireg rd) c + | Pfmovi (fsz, rd, r1) -> + fmovi_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_ireg0 r1) + | Pcsel (rd, r1, r2, c) -> + csel_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) + (reg_of_dreg r2) c + | Pfnmul (fsz, rd, r1, r2) -> + fnmul_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_freg r1) + (reg_of_freg r2) + +let basic_rec i = + match i with + | PArith i' -> arith_rec i' + | PLoad ld -> load_rec ld + | PStore st -> store_rec st + | Pallocframe (sz, linkofs) -> allocframe_rec (PBasic i) sz linkofs + | Pfreeframe (sz, linkofs) -> freeframe_rec (PBasic i) sz linkofs + | Ploadsymbol (rd, id) -> loadsymbol_rec (PBasic i) (reg_of_ireg rd) id + | Pcvtsw2x (rd, r1) -> + cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) + | Pcvtuw2x (rd, r1) -> + cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) + | Pcvtx2w rd -> cvtx2w_rec (PBasic i) (reg_of_ireg rd) + | Pnop -> nop_rec (PBasic i) + +let builtin_rec i ef args res = + { inst = i; write_locs = [ Mem ]; read_locs = [ Mem ]; is_control = true } + +let ctl_flow_rec i = + match i with + | Pb lbl -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_pc ]; + is_control = true; + } + | Pbc (c, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_pc ]; + is_control = true; + } + | Pbl (id, sg) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ]; + read_locs = [ reg_of_pc ]; + is_control = true; + } + | Pbs (id, sg) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = []; + is_control = true; + } + | Pblr (r, sg) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ]; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pbr (r, sg) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pret r -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pcbnz (sz, r, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r; reg_of_pc ]; + is_control = true; + } + | Pcbz (sz, r, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r; reg_of_pc ]; + is_control = true; + } + | Ptbnz (sz, r, n, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r; reg_of_pc ]; + is_control = true; + } + | Ptbz (sz, r, n, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r; reg_of_pc ]; + is_control = true; + } + | Pbtbl (r1, tbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_ireg Asm.X16; reg_of_ireg Asm.X17; reg_of_pc ]; + read_locs = [ reg_of_ireg r1; reg_of_pc ]; + is_control = true; + } + +let control_rec i = + match i with + | Pbuiltin (ef, args, res) -> builtin_rec (PControl i) ef args res + | PCtlFlow i' -> ctl_flow_rec i' + +let rec basic_recs body = + match body with [] -> [] | bi :: body -> basic_rec bi :: basic_recs body + +let exit_rec exit = match exit with None -> [] | Some ex -> [ control_rec ex ] + +let instruction_recs bb = basic_recs bb.body @ exit_rec bb.exit + +(** + * Providing informations relative to the real instructions + *) + +type inst_info = { + write_locs : location list; + read_locs : location list; + is_control : bool; + (* resources consumed by the instruction *) + usage : int array; + latency : int; +} + +(** Abstraction providing all the necessary informations for solving the scheduling problem *) + +let rec_to_info (r : ab_inst_rec) : inst_info = + let opweights = OpWeightsAsm.get_opweights () in + let usage = opweights.resources_of_op r.inst 0 + and latency = opweights.latency_of_op r.inst 0 in + { + write_locs = r.write_locs; + read_locs = r.read_locs; + usage; + latency; + is_control = r.is_control; + } + +let instruction_infos bb = List.map rec_to_info (instruction_recs bb) + +let instruction_usages bb = + let usages = List.map (fun info -> info.usage) (instruction_infos bb) in + Array.of_list usages + +(** + * Latency constraints building + *) + +module LocHash = Hashtbl + +(* Hash table : location => list of instruction ids *) + +let rec intlist n = + if n < 0 then failwith "intlist: n < 0" + else if n = 0 then [] + else (n - 1) :: intlist (n - 1) + +let find_in_hash hashloc loc = + match LocHash.find_opt hashloc loc with Some idl -> idl | None -> [] + +(* Returns a list of instruction ids *) +let rec get_accesses hashloc (ll : location list) = + match ll with + | [] -> [] + | loc :: llocs -> find_in_hash hashloc loc @ get_accesses hashloc llocs + +let compute_latency (ifrom : inst_info) = ifrom.latency + +let latency_constraints bb = + let written = LocHash.create 70 + and read = LocHash.create 70 + and count = ref 0 + and constraints = ref [] + and instr_infos = instruction_infos bb in + let step (i : inst_info) = + let raw = get_accesses written i.read_locs + and waw = get_accesses written i.write_locs + and war = get_accesses read i.write_locs in + List.iter + (fun i -> + constraints := + { + instr_from = i; + instr_to = !count; + latency = compute_latency (List.nth instr_infos i); + } + :: !constraints) + raw; + List.iter + (fun i -> + constraints := + { + instr_from = i; + instr_to = !count; + latency = compute_latency (List.nth instr_infos i); + } + :: !constraints) + waw; + List.iter + (fun i -> + constraints := + { instr_from = i; instr_to = !count; latency = 0 } :: !constraints) + war; + if i.is_control then + List.iter + (fun n -> + constraints := + { instr_from = n; instr_to = !count; latency = 0 } :: !constraints) + (intlist !count); + (* Updating "read" and "written" hashmaps *) + List.iter + (fun loc -> + LocHash.replace written loc [ !count ]; + LocHash.replace read loc [] + (* Clearing all the entries of "read" hashmap when a register is written *)) + i.write_locs; + List.iter + (fun loc -> LocHash.replace read loc (!count :: find_in_hash read loc)) + i.read_locs; + count := !count + 1 + in + List.iter step instr_infos; + !constraints + +(** + * Using the InstructionScheduler + *) + +let opweights = OpWeightsAsm.get_opweights () + +let build_problem bb = + { + max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None; + instruction_usages = instruction_usages bb; + latency_constraints = latency_constraints bb; + } + +let get_from_indexes indexes l = List.map (List.nth l) indexes + +(*let is_basic = function PBasic _ -> true | _ -> false*) +let is_control = function PControl _ -> true | _ -> false + +let to_control = function + | PControl i -> i + | _ -> failwith "to_control: basic instruction found" + +let rec body_to_instrs bdy = + match bdy with [] -> [] | i :: l' -> PBasic i :: body_to_instrs l' + +let rec instrs_to_bdy instrs = + match instrs with + | [] -> [] + | PBasic i :: l' -> i :: instrs_to_bdy l' + | PControl _ :: l' -> failwith "instrs_to_bdy: control instruction found" + +let repack li hd = + let last = List.nth li (List.length li - 1) in + if is_control last then + let cut_li = + Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1) + in + { header = hd; body = instrs_to_bdy cut_li; exit = Some (to_control last) } + else { header = hd; body = instrs_to_bdy li; exit = None } + +module TimeHash = Hashtbl + +(*Hash table : time => list of instruction ids *) + +(* Flattening the minpack result *) +let hashtbl2flatarray h maxint = + let rec f i = + match TimeHash.find_opt h i with + | None -> if i > maxint then [] else f (i + 1) + | Some bund -> bund @ f (i + 1) + in + f 0 + +let find_max l = + let rec f = function + | [] -> None + | e :: l -> ( + match f l with + | None -> Some e + | Some m -> if e > m then Some e else Some m) + in + match f l with None -> raise Not_found | Some m -> m + +(* We still use the minpack algorithm even without bundles, but the result is then flattened *) +(*(* [0, 2, 3, 1, 1, 2, 4, 5] -> [[0], [3, 4], [1, 5], [2], [6], [7]] *)*) +let minpack_list (l : int list) = + let timehash = TimeHash.create (List.length l) in + let rec f i = function + | [] -> () + | t :: l -> + (match TimeHash.find_opt timehash t with + | None -> TimeHash.add timehash t [ i ] + | Some bund -> TimeHash.replace timehash t (bund @ [ i ])); + f (i + 1) l + in + f 0 l; + hashtbl2flatarray timehash (find_max l) + +let bb_to_instrs bb = + body_to_instrs bb.body + @ match bb.exit with None -> [] | Some e -> [ PControl e ] + +let build_solution bb sol = + (* Remove last element - the total *) + let tmp = Array.to_list @@ Array.sub sol 0 (Array.length sol - 1) in + let pack = minpack_list tmp and instrs = bb_to_instrs bb in + repack (get_from_indexes pack instrs) bb.header + +let print_schedule sched = + print_string "[ "; + Array.iter (fun x -> Printf.printf "%d; " x) sched; + print_endline "]" + +let do_schedule bb = + let problem = build_problem bb in + if debug then print_problem stdout problem; + let solution = scheduler_by_name !Clflags.option_fpostpass_sched problem in + match solution with + | None -> failwith "Could not find a valid schedule" + | Some sol -> + if debug then print_schedule sol; + build_solution bb sol + +(** + * Dumb schedule if the above doesn't work + *) + +(* Pack result *) +let pack_result (bb : bblock) = (bb.body, bb.exit) + +let smart_schedule bb = + let bb' = + try do_schedule bb with + | OpaqueInstruction -> + if debug then + Printf.eprintf "OpaqueInstruction raised, using identity scheduling\n"; + bb (* Identity in case of failure *) + | e -> + let msg = Printexc.to_string e and stack = Printexc.get_backtrace () in + Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg + stack; + failwith "Invalid schedule" + in + pack_result bb' + +let bblock_schedule bb = + let identity_mode = not !Clflags.option_fpostpass in + if debug && not identity_mode then ( + Printf.eprintf "###############################\n"; + Printf.eprintf "SCHEDULING\n"); + if stats then ( + let oc = + open_out_gen [ Open_append; Open_creat ] 0o666 "oracle_stats.csv" + in + Printf.fprintf oc "%d\n" (Camlcoq.Z.to_int (size bb)); + close_out oc); + if identity_mode then pack_result bb else smart_schedule bb + +(** Called schedule function from Coq *) + +(*let schedule_notime bb = let toto = bblock_schedule in toto*) +let schedule bb = + Timing.time_coq + [ + 'P'; + 'o'; + 's'; + 't'; + 'p'; + 'a'; + 's'; + 's'; + 'S'; + 'c'; + 'h'; + 'e'; + 'd'; + 'u'; + 'l'; + 'i'; + 'n'; + 'g'; + ' '; + 'o'; + 'r'; + 'a'; + 'c'; + 'l'; + 'e'; + ] + bblock_schedule bb diff --git a/aarch64/PostpassSchedulingproof.v b/aarch64/PostpassSchedulingproof.v new file mode 100644 index 00000000..a5084b5f --- /dev/null +++ b/aarch64/PostpassSchedulingproof.v @@ -0,0 +1,482 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib Errors. +Require Import Integers Floats AST Linking. +Require Import Values Memory Events Globalenvs Smallstep. +Require Import Op Locations Machblock Conventions Asmblock. +Require Import Asmblockprops. +Require Import PostpassScheduling. +Require Import Asmblockgenproof. +Require Import Axioms. +Require Import Lia. + +Local Open Scope error_monad_scope. + +Definition match_prog (p tp: program) := + match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp. + +Lemma transf_program_match: + forall p tp, transf_program p = OK tp -> match_prog p tp. +Proof. + intros. eapply match_transform_partial_program; eauto. +Qed. + +Section PRESERVATION_ASMBLOCK. + +Variables prog tprog: program. +Variable lk: aarch64_linker. +Hypothesis TRANSL: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma symbols_preserved: + forall id, + Genv.find_symbol tge id = Genv.find_symbol ge id. +Proof (Genv.find_symbol_match TRANSL). + +Lemma senv_preserved: + Senv.equiv ge tge. +Proof (Genv.senv_match TRANSL). + +Lemma function_ptr_translated: + forall v f, + Genv.find_funct_ptr ge v = Some f -> + exists tf, + Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf. +Proof (Genv.find_funct_ptr_transf_partial TRANSL). + +Inductive match_states: state -> state -> Prop := + | match_states_intro: + forall s1 s2, s1 = s2 -> match_states s1 s2. + +Lemma prog_main_preserved: + prog_main tprog = prog_main prog. +Proof (match_program_main TRANSL). + +Lemma prog_main_address_preserved: + (Genv.symbol_address (Genv.globalenv prog) (prog_main prog) Ptrofs.zero) = + (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero). +Proof. + unfold Genv.symbol_address. rewrite symbols_preserved. + rewrite prog_main_preserved. auto. +Qed. + +Lemma transf_initial_states: + forall st1, initial_state prog st1 -> + exists st2, initial_state tprog st2 /\ match_states st1 st2. +Proof. + intros. inv H. + econstructor; split. + - eapply initial_state_intro. + eapply (Genv.init_mem_transf_partial TRANSL); eauto. + - econstructor; eauto. subst ge0. subst rs0. rewrite prog_main_address_preserved. auto. +Qed. + +Lemma transf_final_states: + forall st1 st2 r, + match_states st1 st2 -> final_state st1 r -> final_state st2 r. +Proof. + intros. inv H0. inv H. econstructor; eauto. +Qed. + +Lemma transf_find_bblock: + forall ofs f bb tf, + find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb -> + transf_function f = OK tf -> + exists tbb, + verified_schedule bb = OK tbb + /\ find_bblock (Ptrofs.unsigned ofs) (fn_blocks tf) = Some tbb. +Proof. + intros. + monadInv H0. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); try (inv EQ0; fail). inv EQ0. + monadInv EQ. simpl in *. + generalize (Ptrofs.unsigned ofs) H x EQ0; clear ofs H x g EQ0. + induction (fn_blocks f). + - intros. simpl in *. discriminate. + - intros. simpl in *. + monadInv EQ0. simpl. + destruct (zlt z 0); try discriminate. + destruct (zeq z 0). + + inv H. eauto. + + monadInv EQ0. + exploit IHb; eauto. + intros (tbb & SCH & FIND). + eexists; split; eauto. + inv FIND. + unfold verify_size in EQ0. + destruct (size a =? size (stick_header (header a) x)) eqn:EQSIZE; try discriminate. + rewrite Z.eqb_eq in EQSIZE; rewrite EQSIZE. + reflexivity. +Qed. + +Lemma stick_header_neutral: forall a, + a = (stick_header (header a) (no_header a)). +Proof. + intros. + unfold stick_header. unfold Asmblock.stick_header_obligation_1. simpl. destruct a. + simpl. reflexivity. +Qed. + +Lemma symbol_address_preserved: + forall l ofs, Genv.symbol_address ge l ofs = Genv.symbol_address tge l ofs. +Proof. + intros. unfold Genv.symbol_address. repeat (rewrite symbols_preserved). reflexivity. +Qed. + +Lemma verified_schedule_label: + forall bb tbb l, + verified_schedule bb = OK (tbb) -> + is_label l bb = is_label l tbb. +Proof. + intros. + unfold is_label. + monadInv H. simpl. auto. +Qed. + +Remark label_pos_pvar_none_add: + forall tc l p p' k, + label_pos l (p+k) tc = None -> label_pos l (p'+k) tc = None. +Proof. + induction tc. + - intros. simpl. auto. + - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL. + + discriminate. + + pose (IHtc l p p' (k + size a)). repeat (rewrite Z.add_assoc in e). auto. +Qed. + +Lemma label_pos_pvar_none: + forall tc l p p', + label_pos l p tc = None -> label_pos l p' tc = None. +Proof. + intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1. + eapply label_pos_pvar_none_add; eauto. +Qed. + +Remark label_pos_pvar_some_add_add: + forall tc l p p' k k', + label_pos l (p+k') tc = Some (p+k) -> label_pos l (p'+k') tc = Some (p'+k). +Proof. + induction tc. + - intros. simpl in H. discriminate. + - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL. + + inv H. assert (k = k') by lia. subst. reflexivity. + + pose (IHtc l p p' k (k' + size a)). repeat (rewrite Z.add_assoc in e). auto. +Qed. + +Lemma label_pos_pvar_some_add: + forall tc l p p' k, + label_pos l p tc = Some (p+k) -> label_pos l p' tc = Some (p'+k). +Proof. + intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1. + eapply label_pos_pvar_some_add_add; eauto. +Qed. + +Remark label_pos_pvar_add: + forall c tc l p p' k, + label_pos l (p+k) c = label_pos l p tc -> + label_pos l (p'+k) c = label_pos l p' tc. +Proof. + induction c. + - intros. simpl in *. + exploit label_pos_pvar_none; eauto. + - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL. + + exploit label_pos_pvar_some_add; eauto. + + pose (IHc tc l p p' (k+size a)). repeat (rewrite Z.add_assoc in e). auto. +Qed. + +Lemma label_pos_pvar: + forall c tc l p p', + label_pos l p c = label_pos l p tc -> + label_pos l p' c = label_pos l p' tc. +Proof. + intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1. + eapply label_pos_pvar_add; eauto. +Qed. + +Lemma label_pos_head_cons: + forall c bb tbb l tc p, + verified_schedule bb = OK tbb -> + label_pos l p c = label_pos l p tc -> + label_pos l p (bb :: c) = label_pos l p (tbb :: tc). +Proof. + intros. simpl. + exploit verified_schedule_label; eauto. intros ISLBL. + rewrite ISLBL. + destruct (is_label l tbb) eqn:ISLBL'; simpl; auto. + eapply label_pos_pvar in H0. erewrite H0. + erewrite verified_schedule_size; eauto. +Qed. + +Lemma label_pos_preserved: + forall c tc l, + transf_blocks c = OK tc -> label_pos l 0 c = label_pos l 0 tc. +Proof. + induction c. + - intros. simpl in *. inv H. reflexivity. + - intros. unfold transf_blocks in H; fold transf_blocks in H. monadInv H. + eapply IHc in EQ. eapply label_pos_head_cons; eauto. +Qed. + +Lemma label_pos_preserved_blocks: + forall l f tf, + transf_function f = OK tf -> + label_pos l 0 (fn_blocks f) = label_pos l 0 (fn_blocks tf). +Proof. + intros. monadInv H. monadInv EQ. + destruct (zlt Ptrofs.max_unsigned _); try discriminate. + monadInv EQ0. simpl. eapply label_pos_preserved; eauto. +Qed. + +Lemma transf_exec_basic: + forall i rs m, exec_basic lk ge i rs m = exec_basic lk tge i rs m. +Proof. + intros. pose symbol_address_preserved. + unfold exec_basic. + destruct i; simpl; auto; try congruence. +Qed. + +Lemma transf_exec_body: + forall bdy rs m, exec_body lk ge bdy rs m = exec_body lk tge bdy rs m. +Proof. + induction bdy; intros. + - simpl. reflexivity. + - simpl. rewrite transf_exec_basic. + destruct (exec_basic _ _ _); auto. +Qed. + +Lemma transf_exec_cfi: forall f tf cfi rs m, + transf_function f = OK tf -> + exec_cfi ge f cfi rs m = exec_cfi tge tf cfi rs m. +Proof. + intros. destruct cfi; simpl; auto; + assert (ge = Genv.globalenv prog); auto; + assert (tge = Genv.globalenv tprog); auto; + pose symbol_address_preserved. + all: try unfold eval_branch; try unfold eval_neg_branch; try unfold goto_label; + try erewrite label_pos_preserved_blocks; try rewrite e; eauto. + destruct (rs # X16 <- Vundef r1); auto. + destruct (list_nth_z tbl (Int.unsigned i)); auto. + erewrite label_pos_preserved_blocks; eauto. +Qed. + +Lemma transf_exec_exit: + forall f tf sz ex t rs m rs' m', + transf_function f = OK tf -> + exec_exit ge f sz rs m ex t rs' m' -> + exec_exit tge tf sz rs m ex t rs' m'. +Proof. + intros. induction H0. + - econstructor. + - econstructor. erewrite <- transf_exec_cfi; eauto. + - econstructor; eauto. + eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. +Qed. + +Lemma transf_exec_bblock: + forall f tf bb t rs m rs' m', + transf_function f = OK tf -> + exec_bblock lk ge f bb rs m t rs' m' -> + exec_bblock lk tge tf bb rs m t rs' m'. +Proof. + intros. + destruct H0 as [rs1[m1[BDY EXIT]]]. + unfold exec_bblock. + eexists; eexists; split. + rewrite <- transf_exec_body; eauto. + eapply transf_exec_exit; eauto. +Qed. + +Theorem transf_step_correct: + forall s1 t s2, step lk ge s1 t s2 -> + forall s1' (MS: match_states s1 s1'), + (exists s2', step lk tge s1' t s2' /\ match_states s2 s2'). +Proof. + induction 1; intros; inv MS. + - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF. + exploit transf_find_bblock; eauto. intros (tbb & VES & FIND). + exploit verified_schedule_correct; eauto. intros EBB. + eapply transf_exec_bblock in EBB; eauto. + exists (State rs' m'). + split; try (econstructor; eauto). + - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF. + remember (State _ m') as s'. exists s'. split; try constructor; auto. + subst s'. eapply exec_step_external; eauto. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. +Qed. + +Theorem transf_program_correct_Asmblock: + forward_simulation (Asmblock.semantics lk prog) (Asmblock.semantics lk tprog). +Proof. + eapply forward_simulation_step. + - apply senv_preserved. + - apply transf_initial_states. + - apply transf_final_states. + - apply transf_step_correct. +Qed. + +End PRESERVATION_ASMBLOCK. + +(* +Require Import Asm. + +Lemma verified_par_checks_alls_bundles lb x: forall bundle, + verify_par lb = OK x -> + List.In bundle lb -> verify_par_bblock bundle = OK tt. +Proof. + induction lb; simpl; try tauto. + intros bundle H; monadInv H. + destruct 1; subst; eauto. + destruct x0; auto. +Qed. + +Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle: + verified_schedule_nob bb = OK lb -> + List.In bundle lb -> verify_par_bblock bundle = OK tt. +Proof. + unfold verified_schedule_nob. intros H; + monadInv H. destruct x4. + intros; eapply verified_par_checks_alls_bundles; eauto. +Qed. + +Lemma verify_par_bblock_PExpand bb i: + exit bb = Some (PExpand i) -> verify_par_bblock bb = OK tt. +Proof. + destruct bb as [h bdy ext H]; simpl. + intros; subst. destruct i. + generalize H. + rewrite <- wf_bblock_refl in H. + destruct H as [H H0]. + unfold builtin_alone in H0. erewrite H0; eauto. +Qed. + +Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core. + +Lemma verified_schedule_checks_alls_bundles bb lb bundle: + verified_schedule bb = OK lb -> + List.In bundle lb -> verify_par_bblock bundle = OK tt. +Proof. + unfold verified_schedule. remember (exit bb) as exb. + destruct exb as [c|]; eauto. + destruct c as [i|]; eauto. + destruct i; intros H. inversion_clear H; simpl. + intuition subst. + intros; eapply verify_par_bblock_PExpand; eauto. +Qed. + +Lemma transf_blocks_checks_all_bundles lbb: forall lb bundle, + transf_blocks lbb = OK lb -> + List.In bundle lb -> verify_par_bblock bundle = OK tt. +Proof. + induction lbb; simpl. + - intros lb bundle H; inversion_clear H. simpl; try tauto. + - intros lb bundle H0. + monadInv H0. + rewrite in_app. destruct 1; eauto. + eapply verified_schedule_checks_alls_bundles; eauto. +Qed. + +Lemma find_bblock_Some_in lb: + forall ofs b, find_bblock ofs lb = Some b -> List.In b lb. +Proof. + induction lb; simpl; try congruence. + intros ofs b. + destruct (zlt ofs 0); try congruence. + destruct (zeq ofs 0); eauto. + intros X; inversion X; eauto. +Qed.*) + +(* +Section PRESERVATION_ASMVLIW. + +Variables prog tprog: program. +Hypothesis TRANSL: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma all_bundles_are_checked b ofs f bundle: + Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) -> + find_bblock ofs (fn_blocks f) = Some bundle -> + verify_par_bblock bundle = OK tt. +Proof. + unfold match_prog, match_program in TRANSL. + unfold Genv.find_funct_ptr; simpl; intros X. + destruct (Genv.find_def_match_2 TRANSL b) as [|f0 y H]; try congruence. + destruct y as [tf0|]; try congruence. + inversion X as [H1]. subst. clear X. + remember (@Gfun fundef unit (Internal f)) as f2. + destruct H as [ctx' f1 f2 H0|]; try congruence. + inversion Heqf2 as [H2]. subst; clear Heqf2. + unfold transf_fundef, transf_partial_fundef in H. + destruct f1 as [f1|f1]; try congruence. + unfold transf_function, transl_function in H. + monadInv H. monadInv EQ. + destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks _))); simpl in *|-; try congruence. + injection EQ1; intros; subst. + monadInv EQ0. simpl in * |-. + intros; exploit transf_blocks_checks_all_bundles; eauto. + intros; eapply find_bblock_Some_in; eauto. +Qed. + +Lemma checked_bundles_are_parexec_equiv f bundle rs rs' m m': + exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' -> + verify_par_bblock bundle = OK tt -> + det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'. +Proof. + intros. unfold verify_par_bblock in H0. destruct (Asmblockdeps.bblock_para_check _) eqn:BPC; try discriminate. clear H0. + simpl in H. + eapply Asmblockdeps.bblock_para_check_correct; eauto. +Qed. + +Lemma seqexec_parexec_equiv b ofs f bundle rs rs' m m': + Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) -> + find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle -> + exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' -> + det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'. +Proof. + intros; eapply checked_bundles_are_parexec_equiv; eauto. + eapply all_bundles_are_checked; eauto. +Qed. + +Theorem transf_program_correct_Asmvliw: + forward_simulation (Asmblock.semantics tprog) (Asmvliw.semantics tprog). +Proof. + eapply forward_simulation_step with (match_states:=fun (s1:Asmvliw.state) s2 => s1=s2); eauto. + - intros; subst; auto. + - intros s1 t s1' H s2 H0; subst; inversion H; clear H; subst; eexists; split; eauto. + + eapply exec_step_internal; eauto. + intros; eapply seqexec_parexec_equiv; eauto. + + eapply exec_step_builtin; eauto. + + eapply exec_step_external; eauto. +Qed. + +End PRESERVATION_ASMVLIW.*) + +Section PRESERVATION. + +Variable lk: aarch64_linker. +Variables prog tprog: program. +Hypothesis TRANSL: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Theorem transf_program_correct: + forward_simulation (Asmblock.semantics lk prog) (Asmblock.semantics lk tprog). +Proof. + eapply transf_program_correct_Asmblock; eauto. +Qed. + +End PRESERVATION. diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..d7e80cd9 --- /dev/null +++ b/aarch64/PrepassSchedulingOracle.ml @@ -0,0 +1,297 @@ +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* Nicolas Nardino ENS-Lyon, VERIMAG *) +(* *) +(* *) +(* *************************************************************) + +open AST +open BTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps +open PrintBTL +open DebugPrint + +let use_alias_analysis () = false + +let build_constraints_and_resources (opweights : opweights) seqa btl = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int * int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = + Array.make opweights.nr_non_pipelined_units (-1) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to then + if latency = 0 then () + else + failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop" + else + latency_constraints := + { instr_from; instr_to; latency } :: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads with Some l -> l | None -> [] + in + let add_input_mem i = + if not (use_alias_analysis ()) then ( + (* Read after write *) + (match !last_mem_write with None -> () | Some j -> add_constraint j i 1); + last_mem_reads := i :: !last_mem_reads) + and add_output_mem i = + if not (use_alias_analysis ()) then ( + (* Write after write *) + (match !last_mem_write with None -> () | Some j -> add_constraint j i 1); + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := []) + and add_input_reg i reg = + (* Read after write *) + (match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency); + last_reg_reads := PTree.set reg (i :: get_last_reads reg) !last_reg_reads + and add_output_reg i latency reg = + (* Write after write *) + (match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1); + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg); + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> + add_builtin_res i hi; + add_builtin_res i lo + in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack (_, _) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal (_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong (hi, lo) -> + add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr (a1, a2) -> + add_builtin_arg i a1; + add_builtin_arg i a2 + and irreversible_action i = + match !last_branch with None -> () | Some j -> add_constraint j i 1 + in + let set_branch i = + irreversible_action i; + last_branch := Some i + and add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri + (fun rsc latency -> if latency >= 0 then last_non_pipelined_op.(rsc) <- i) + resources + in + Array.iteri + (fun i (inst, other_uses) -> + List.iter (fun use -> add_input_reg i use) (Regset.elements other_uses); + match inst with + | Bnop _ -> () + | Bop (op, lr, rd, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length lr)); + if Op.is_trapping_op op then irreversible_action i; + add_input_regs i lr; + add_output_reg i (opweights.latency_of_op op (List.length lr)) rd + | Bload (trap, chk, addr, lr, rd, _) -> + if trap = TRAP then irreversible_action i; + add_input_mem i; + add_input_regs i lr; + add_output_reg i + (opweights.latency_of_load trap chk addr (List.length lr)) + rd + | Bstore (chk, addr, lr, src, _) -> + irreversible_action i; + add_input_regs i lr; + add_input_reg i src; + add_output_mem i + | Bcond (cond, lr, BF (Bgoto s, _), ibnot, _) -> + set_branch i; + add_input_mem i; + add_input_regs i lr + | Bcond (_, _, _, _, _) -> + failwith "build_constraints_and_resources: invalid Bcond" + | BF (Bcall (signature, ef, lr, rd, _), _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> ()); + add_input_mem i; + add_input_regs i lr; + add_output_reg i (opweights.latency_of_call signature ef) rd; + add_output_mem i; + failwith "build_constraints_and_resources: invalid Bcall" + | BF (Btailcall (signature, ef, lr), _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> ()); + add_input_mem i; + add_input_regs i lr; + failwith "build_constraints_and_resources: invalid Btailcall" + | BF (Bbuiltin (ef, lr, rd, _), _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) lr; + add_builtin_res i rd; + add_output_mem i; + failwith "build_constraints_and_resources: invalid Bbuiltin" + | BF (Bjumptable (lr, _), _) -> + set_branch i; + add_input_reg i lr; + failwith "build_constraints_and_resources: invalid Bjumptable" + | BF (Breturn (Some r), _) -> + set_branch i; + add_input_reg i r; + failwith "build_constraints_and_resources: invalid Breturn Some" + | BF (Breturn None, _) -> + set_branch i; + failwith "build_constraints_and_resources: invalid Breturn None" + | BF (Bgoto _, _) -> + failwith "build_constraints_and_resources: invalid Bgoto" + | Bseq (_, _) -> failwith "build_constraints_and_resources: Bseq") + seqa; + !latency_constraints + +let resources_of_instruction (opweights : opweights) = function + | Bnop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Bop (op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Bload (trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Bstore (chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | BF (Bcall (signature, ef, inputs, output, _), _) -> + opweights.resources_of_call signature ef + | BF (Bbuiltin (ef, builtin_inputs, builtin_output, _), _) -> + opweights.resources_of_builtin ef + | Bcond (cond, args, _, _, _) -> + opweights.resources_of_cond cond (List.length args) + | BF (Btailcall _, _) | BF (Bjumptable _, _) | BF (Breturn _, _) -> + opweights.pipelined_resource_bounds + | BF (Bgoto _, _) | Bseq (_, _) -> + failwith "resources_of_instruction: invalid btl instruction" + +let print_sequence pp seqa = + Array.iteri + (fun i (inst, other_uses) -> + debug "i=%d\n inst = " i; + print_btl_inst pp inst; + debug "\n other_uses="; + print_regset other_uses; + debug "\n") + seqa + +let length_of_chunk = function + | Mint8signed | Mint8unsigned -> 1 + | Mint16signed | Mint16unsigned -> 2 + | Mint32 | Mfloat32 | Many32 -> 4 + | Mint64 | Mfloat64 | Many64 -> 8 + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa btl = + let simple_deps = build_constraints_and_resources opweights seqa btl in + { + max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing; + reference_counting = Some reference_counting; + instruction_usages = + Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = simple_deps; + } + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert (nr_instructions = Array.length early_ones); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.(Array.length fwd_schedule - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri + (fun i is_early -> + if is_early then + constraints' := + { + instr_from = i; + instr_to = nr_instructions; + latency = fwd_makespan - fwd_schedule.(i); + } + :: !constraints') + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None + +let prepass_scheduler_by_name name problem seqa = + match name with + | "zigzag" -> + let early_ones = + Array.map + (fun (inst, _) -> + match inst with Bcond (_, _, _, _, _) -> true | _ -> false) + seqa + in + zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence seqa btl (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) reference = + let opweights = OpWeights.get_opweights () in + try + if Array.length seqa <= 1 then None + else + let nr_instructions = Array.length seqa in + if !Clflags.option_debug_compcert > 6 then + Printf.printf "prepass scheduling length = %d\n" nr_instructions; + let problem = + define_problem opweights live_regs_entry typing reference seqa btl + in + if !Clflags.option_debug_compcert > 7 then ( + print_sequence stdout seqa; + print_problem stdout problem); + match + prepass_scheduler_by_name !Clflags.option_fprepass_sched problem seqa + with + | None -> + Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort + (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 else if si > sj then 1 else i - j) + positions; + Some positions + with Failure s -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/aarch64/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v index aee09b12..0984943c 100644 --- a/aarch64/SelectLongproof.v +++ b/aarch64/SelectLongproof.v @@ -16,6 +16,7 @@ Require Import Coqlib Zbits. Require Import AST Integers Floats Values Memory Globalenvs. Require Import Cminor Op CminorSel. Require Import SelectOp SelectLong SelectOpproof. +Require Import OpHelpers OpHelpersproof. Local Open Scope cminorsel_scope. Local Transparent Archi.ptr64. @@ -23,8 +24,10 @@ Local Transparent Archi.ptr64. (** * Correctness of the smart constructors *) Section CMCONSTR. - -Variable ge: genv. +Variable prog: program. +Variable hf: helper_functions. +Hypothesis HELPERS: helper_functions_declared prog hf. +Let ge := Genv.globalenv prog. Variable sp: val. Variable e: env. Variable m: mem. @@ -556,25 +559,29 @@ Qed. Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls. Proof. red; intros; unfold divls_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls. Proof. red; intros; unfold modls_base, modl_aux. exploit Val.modls_divls; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu. Proof. red; intros; unfold divlu_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu. Proof. red; intros; unfold modlu_base, modl_aux. exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_shrxlimm: @@ -589,7 +596,7 @@ Proof. destruct x; simpl in H0; try discriminate. change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0. rewrite Int64.shrx'_zero. auto. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** General shifts *) @@ -723,42 +730,42 @@ Qed. Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. End CMCONSTR. diff --git a/aarch64/SelectOp.vp b/aarch64/SelectOp.vp index b5a03989..7f73d592 100644 --- a/aarch64/SelectOp.vp +++ b/aarch64/SelectOp.vp @@ -56,9 +56,13 @@ Nondetfunction add (e1: expr) (e2: expr) := | t1, Eop (Oshift s a) (t2:::Enil) ?? arith_shift s => Eop (Oaddshift s a) (t1 ::: t2 ::: Enil) | Eop Omul (t1:::t2:::Enil), t3 => - Eop Omuladd (t3:::t1:::t2:::Enil) + if Compopts.optim_madd tt + then Eop Omuladd (t3:::t1:::t2:::Enil) + else Eop Oadd (e1:::e2:::Enil) | t1, Eop Omul (t2:::t3:::Enil) => - Eop Omuladd (t1:::t2:::t3:::Enil) + if Compopts.optim_madd tt + then Eop Omuladd (t1:::t2:::t3:::Enil) + else Eop Oadd (e1:::e2:::Enil) | _, _ => Eop Oadd (e1:::e2:::Enil) end. @@ -555,6 +559,13 @@ Nondetfunction addressing (chunk: memory_chunk) (e: expr) := | _ => (Aindexed Int64.zero, e:::Enil) end. +(* floats *) +Definition divf_base (e1: expr) (e2: expr) := + Eop Odivf (e1 ::: e2 ::: Enil). + +Definition divfs_base (e1: expr) (e2: expr) := + Eop Odivfs (e1 ::: e2 ::: Enil). + (** ** Arguments of builtins *) Nondetfunction builtin_arg (e: expr) := diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v index ccc4c0f1..dfa4c598 100644 --- a/aarch64/SelectOpproof.v +++ b/aarch64/SelectOpproof.v @@ -16,6 +16,7 @@ Require Import Coqlib Zbits. Require Import AST Integers Floats Values Memory Builtins Globalenvs. Require Import Cminor Op CminorSel. Require Import SelectOp. +Require Import OpHelpers OpHelpersproof. Local Open Scope cminorsel_scope. Local Transparent Archi.ptr64. @@ -74,8 +75,10 @@ Ltac TrivialExists := (** * Correctness of the smart constructors *) Section CMCONSTR. - -Variable ge: genv. +Variable prog: program. +Variable hf: helper_functions. +Hypothesis HELPERS: helper_functions_declared prog hf. +Let ge := Genv.globalenv prog. Variable sp: val. Variable e: env. Variable m: mem. @@ -158,8 +161,10 @@ Proof. - rewrite <- Val.add_assoc. apply eval_addimm. EvalOp. - rewrite Val.add_commut. TrivialExists. - TrivialExists. -- rewrite Val.add_commut. TrivialExists. -- TrivialExists. +- destruct (Compopts.optim_madd tt). + + rewrite Val.add_commut. TrivialExists. + + TrivialExists. +- destruct (Compopts.optim_madd tt); TrivialExists. - TrivialExists. Qed. @@ -661,7 +666,8 @@ Theorem eval_divs_base: Val.divs x y = Some z -> exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v. Proof. - intros; unfold divs_base; TrivialExists. + intros; unfold divs_base; TrivialExists; cbn. + rewrite H1. reflexivity. Qed. Theorem eval_mods_base: @@ -673,7 +679,8 @@ Theorem eval_mods_base: Proof. intros; unfold mods_base, mod_aux. exploit Val.mods_divs; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + cbn. rewrite A. reflexivity. Qed. Theorem eval_divu_base: @@ -684,6 +691,7 @@ Theorem eval_divu_base: exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v. Proof. intros; unfold divu_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modu_base: @@ -695,7 +703,8 @@ Theorem eval_modu_base: Proof. intros; unfold modu_base, mod_aux. exploit Val.modu_divu; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_shrximm: @@ -710,7 +719,7 @@ Proof. destruct x; simpl in H0; try discriminate. change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0. rewrite Int.shrx_zero by (compute; auto). auto. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** General shifts *) @@ -923,7 +932,7 @@ Theorem eval_intoffloat: Val.intoffloat x = Some y -> exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatofint: @@ -934,7 +943,7 @@ Theorem eval_floatofint: Proof. intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intuoffloat: @@ -943,7 +952,7 @@ Theorem eval_intuoffloat: Val.intuoffloat x = Some y -> exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatofintu: @@ -954,7 +963,7 @@ Theorem eval_floatofintu: Proof. intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intofsingle: @@ -963,7 +972,7 @@ Theorem eval_intofsingle: Val.intofsingle x = Some y -> exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleofint: @@ -974,7 +983,7 @@ Theorem eval_singleofint: Proof. intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intuofsingle: @@ -983,7 +992,7 @@ Theorem eval_intuofsingle: Val.intuofsingle x = Some y -> exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleofintu: @@ -994,7 +1003,7 @@ Theorem eval_singleofintu: Proof. intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** Selection *) @@ -1061,6 +1070,26 @@ Proof. - constructor; auto. Qed. +Theorem eval_divf_base: + forall le a b x y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (divf_base a b) v /\ Val.lessdef (Val.divf x y) v. +Proof. + intros; unfold divf_base. + TrivialExists. +Qed. + +Theorem eval_divfs_base: + forall le a b x y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (divfs_base a b) v /\ Val.lessdef (Val.divfs x y) v. +Proof. + intros; unfold divfs_base. + TrivialExists. +Qed. + (** Platform-specific known builtins *) Theorem eval_platform_builtin: diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml index a9d47bdd..1ca3be16 100644 --- a/aarch64/TargetPrinter.ml +++ b/aarch64/TargetPrinter.ml @@ -21,23 +21,6 @@ open AisAnnot open PrintAsmaux open Fileinfo -(* Recognition of FP numbers that are supported by the fmov #imm instructions: - "a normalized binary floating point encoding with 1 sign bit, - 4 bits of fraction and a 3-bit exponent" -*) - -let is_immediate_float64 bits = - let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in - let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in - exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant - -let is_immediate_float32 bits = - let exp = (Int32.(to_int (shift_right_logical bits 23)) land 0xFF) - 127 in - let mant = Int32.logand bits 0x7F_FFFFl in - exp >= -3 && exp <= 4 && Int32.logand mant 0x78_0000l = mant - -(* Naming and printing registers *) - let intsz oc (sz, n) = match sz with X -> coqint64 oc n | W -> coqint oc n @@ -107,14 +90,14 @@ let freg oc (sz, r) = output_string oc (match sz with D -> dreg_name r | S -> sreg_name r) let preg_asm oc ty = function - | IR r -> if ty = Tint then wreg oc r else xreg oc r - | FR r -> if ty = Tsingle then sreg oc r else dreg oc r - | _ -> assert false + | DR (IR (RR1 r)) -> if ty = Tint then wreg oc r else xreg oc r + | DR (FR r) -> if ty = Tsingle then sreg oc r else dreg oc r +| _ -> assert false let preg_annot = function - | IR r -> xreg_name r - | FR r -> dreg_name r - | _ -> assert false + | DR (IR (RR1 r)) -> xreg_name r + | DR (FR r) -> dreg_name r + | _ -> assert false (* Base-2 log of a Caml integer *) let rec log2 n = @@ -156,12 +139,16 @@ module ELF_System : SYSTEM = fprintf oc "#:lo12:%a" elf_label lbl let load_symbol_address oc rd id = - fprintf oc " adrp %a, :got:%a\n" xreg rd symbol id; - fprintf oc " ldr %a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id + fprintf oc " adrp %a, :got:%a\n" xreg rd symbol id; + fprintf oc " ldr %a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id + + (* Names of sections *) let name_of_section = function | Section_text -> ".text" - | Section_data i | Section_small_data i -> + | Section_data(i, true) -> + failwith "_Thread_local unsupported on this platform" + | Section_data(i, false) | Section_small_data i -> variable_section ~sec:".data" ~bss:".bss" i | Section_const i | Section_small_const i -> variable_section ~sec:".section .rodata" i @@ -223,7 +210,9 @@ module MacOS_System : SYSTEM = let name_of_section = function | Section_text -> ".text" - | Section_data i | Section_small_data i -> + | Section_data(i, true) -> + failwith "_Thread_local unsupported on this platform" + | Section_data(i, false) | Section_small_data i -> variable_section ~sec:".data" i | Section_const i | Section_small_const i -> variable_section ~sec:".const" ~reloc:".const_data" i @@ -343,6 +332,28 @@ module Target(System: SYSTEM): TARGET = | (r, EOuxtw n) -> fprintf oc "%a, uxtw #%a" wreg r coqint n | (r, EOuxtx n) -> fprintf oc "%a, uxtx #%a" xreg r coqint n + let next_profiling_label = + let atomic_incr_counter = ref 0 in + fun () -> + let r = sprintf ".Lcompcert_atomic_incr%d" !atomic_incr_counter in + incr atomic_incr_counter; r;; + + let print_profiling_logger oc id kind = + assert (kind >= 0); + assert (kind <= 1); + fprintf oc "%s begin profiling %a %d: atomic increment\n" comment + Profilingaux.pp_id id kind; + let ofs = profiling_offset id kind and lbl = next_profiling_label () in + fprintf oc " adrp x15, %s+%d\n" profiling_counter_table_name ofs; + fprintf oc " add x15, x15, :lo12:(%s+%d)\n" profiling_counter_table_name ofs; + fprintf oc "%s:\n" lbl; + fprintf oc " ldaxr x17, [x15]\n"; + fprintf oc " add x17, x17, 1\n"; + fprintf oc " stlxr w29, x17, [x15]\n"; + fprintf oc " cbnz w29, %s\n" lbl; + fprintf oc "%s end profiling %a %d\n" comment + Profilingaux.pp_id id kind;; + (* Printing of instructions *) let print_instruction oc = function (* Branches *) @@ -386,7 +397,9 @@ module Target(System: SYSTEM): TARGET = (* the upper 32 bits of Xrd are set to 0, performing zero-extension *) | Pldrsw(rd, a) -> fprintf oc " ldrsw %a, %a\n" xreg rd addressing a - | Pldp(rd1, rd2, a) -> + | Pldpw(rd1, rd2, _, _, a) -> + fprintf oc " ldp %a, %a, %a\n" wreg rd1 wreg rd2 addressing a + | Pldpx(rd1, rd2, _, _, a) -> fprintf oc " ldp %a, %a, %a\n" xreg rd1 xreg rd2 addressing a | Pstrw(rs, a) | Pstrw_a(rs, a) -> fprintf oc " str %a, %a\n" wreg rs addressing a @@ -396,7 +409,9 @@ module Target(System: SYSTEM): TARGET = fprintf oc " strb %a, %a\n" wreg rs addressing a | Pstrh(rs, a) -> fprintf oc " strh %a, %a\n" wreg rs addressing a - | Pstp(rs1, rs2, a) -> + | Pstpw(rs1, rs2, _, _, a) -> + fprintf oc " stp %a, %a, %a\n" wreg rs1 wreg rs2 addressing a + | Pstpx(rs1, rs2, _, _, a) -> fprintf oc " stp %a, %a, %a\n" xreg rs1 xreg rs2 addressing a (* Integer arithmetic, immediate *) | Paddimm(sz, rd, r1, n) -> @@ -520,12 +535,20 @@ module Target(System: SYSTEM): TARGET = fprintf oc " str %a, %a\n" sreg rd addressing a | Pstrd(rd, a) | Pstrd_a(rd, a) -> fprintf oc " str %a, %a\n" dreg rd addressing a + | Pldps(rd1, rd2, _, _, a) -> + fprintf oc " ldp %a, %a, %a\n" sreg rd1 sreg rd2 addressing a + | Pldpd(rd1, rd2, _, _, a) -> + fprintf oc " ldp %a, %a, %a\n" dreg rd1 dreg rd2 addressing a + | Pstps(rd1, rd2, _, _, a) -> + fprintf oc " stp %a, %a, %a\n" sreg rd1 sreg rd2 addressing a + | Pstpd(rd1, rd2, _, _, a) -> + fprintf oc " stp %a, %a, %a\n" dreg rd1 dreg rd2 addressing a (* Floating-point move *) | Pfmov(rd, r1) -> fprintf oc " fmov %a, %a\n" dreg rd dreg r1 | Pfmovimmd(rd, f) -> let d = camlint64_of_coqint (Floats.Float.to_bits f) in - if is_immediate_float64 d then + if is_immediate_float64 f then fprintf oc " fmov %a, #%.7f\n" dreg rd (Int64.float_of_bits d) else begin let lbl = label_literal64 d in @@ -534,7 +557,7 @@ module Target(System: SYSTEM): TARGET = end | Pfmovimms(rd, f) -> let d = camlint_of_coqint (Floats.Float32.to_bits f) in - if is_immediate_float32 d then + if is_immediate_float32 f then fprintf oc " fmov %a, #%.7f\n" sreg rd (Int32.float_of_bits d) else begin let lbl = label_literal32 d in @@ -596,8 +619,8 @@ module Target(System: SYSTEM): TARGET = | Pfsel(rd, r1, r2, c) -> fprintf oc " fcsel %a, %a, %a, %s\n" dreg rd dreg r1 dreg r2 (condition_name c) (* No-op *) - | Pnop -> - fprintf oc " nop\n" + | Pnop -> () + (*fprintf oc " nop\n"*) (* Pseudo-instructions expanded in Asmexpand *) | Pallocframe(sz, linkofs) -> assert false | Pfreeframe(sz, linkofs) -> assert false @@ -640,6 +663,8 @@ module Target(System: SYSTEM): TARGET = fprintf oc "%s begin inline assembly\n\t" comment; print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res; fprintf oc "%s end inline assembly\n" comment + | EF_profiling (id, coq_kind) -> + print_profiling_logger oc id (Z.to_int coq_kind) | _ -> assert false end @@ -689,7 +714,24 @@ module Target(System: SYSTEM): TARGET = section oc Section_text; end + let aarch64_profiling_stub oc nr_items + profiling_id_table_name + profiling_counter_table_name = + fprintf oc " adrp x2, %s\n" profiling_counter_table_name; + fprintf oc " adrp x1, %s\n" profiling_id_table_name; + fprintf oc " add x2, x2, :lo12:%s\n" profiling_counter_table_name; + fprintf oc " add x1, x1, :lo12:%s\n" profiling_id_table_name; + fprintf oc " mov w0, %d\n" nr_items; + fprintf oc " b %s\n" profiling_write_table_helper ;; + + let print_atexit oc to_be_called = + fprintf oc " adrp x0, %s\n" to_be_called; + fprintf oc " add x0, x0, :lo12:%s\n" to_be_called; + fprintf oc " b atexit\n";; + + let print_epilogue oc = + print_profiling_epilogue elf_text_print_fun_info (Init_atexit print_atexit) aarch64_profiling_stub oc; if !Clflags.option_g then begin Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f)); section oc Section_text; diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v index e0d98c85..e6a60d4e 100644 --- a/aarch64/ValueAOp.v +++ b/aarch64/ValueAOp.v @@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Omul, v1::v2::nil => mul v1 v2 | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3) | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3) - | Odiv, v1::v2::nil => divs v1 v2 - | Odivu, v1::v2::nil => divu v1 v2 + | Odiv, v1::v2::nil => divs_total v1 v2 + | Odivu, v1::v2::nil => divu_total v1 v2 | Oand, v1::v2::nil => and v1 v2 | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a) | Oandimm n, v1::nil => and v1 (I n) @@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3) | Omullhs, v1::v2::nil => mullhs v1 v2 | Omullhu, v1::v2::nil => mullhu v1 v2 - | Odivl, v1::v2::nil => divls v1 v2 - | Odivlu, v1::v2::nil => divlu v1 v2 + | Odivl, v1::v2::nil => divls_total v1 v2 + | Odivlu, v1::v2::nil => divlu_total v1 v2 | Oandl, v1::v2::nil => andl v1 v2 | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a) | Oandlimm n, v1::nil => andl v1 (L n) @@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Osingleoffloat, v1::nil => singleoffloat v1 | Ofloatofsingle, v1::nil => floatofsingle v1 - | Ointoffloat, v1::nil => intoffloat v1 - | Ointuoffloat, v1::nil => intuoffloat v1 + | Ointoffloat, v1::nil => intoffloat_total v1 + | Ointuoffloat, v1::nil => intuoffloat_total v1 | Ofloatofint, v1::nil => floatofint v1 | Ofloatofintu, v1::nil => floatofintu v1 - | Ointofsingle, v1::nil => intofsingle v1 - | Ointuofsingle, v1::nil => intuofsingle v1 + | Ointofsingle, v1::nil => intofsingle_total v1 + | Ointuofsingle, v1::nil => intuofsingle_total v1 | Osingleofint, v1::nil => singleofint v1 | Osingleofintu, v1::nil => singleofintu v1 - | Olongoffloat, v1::nil => longoffloat v1 - | Olonguoffloat, v1::nil => longuoffloat v1 + | Olongoffloat, v1::nil => longoffloat_total v1 + | Olonguoffloat, v1::nil => longuoffloat_total v1 | Ofloatoflong, v1::nil => floatoflong v1 | Ofloatoflongu, v1::nil => floatoflongu v1 - | Olongofsingle, v1::nil => longofsingle v1 - | Olonguofsingle, v1::nil => longuofsingle v1 + | Olongofsingle, v1::nil => longofsingle_total v1 + | Olonguofsingle, v1::nil => longuofsingle_total v1 | Osingleoflong, v1::nil => singleoflong v1 | Osingleoflongu, v1::nil => singleoflongu v1 diff --git a/aarch64/extractionMachdep.v b/aarch64/extractionMachdep.v index ae0006bc..0401d0fa 100644 --- a/aarch64/extractionMachdep.v +++ b/aarch64/extractionMachdep.v @@ -20,6 +20,9 @@ Require Archi Asm Asmgen SelectOp. (* Archi *) + +Extract Constant Archi.pic_code => "fun () -> false". (* for the time being *) + Extract Constant Archi.abi => "match Configuration.abi with | ""apple"" -> Apple @@ -36,7 +39,4 @@ Extract Constant SelectOp.symbol_is_relocatable => Extract Constant Asm.symbol_low => "fun _ _ _ -> assert false". Extract Constant Asm.symbol_high => "fun _ _ _ -> assert false". - -(* Asmgen *) - -Extract Constant Asmgen.symbol_is_aligned => "C2C.atom_is_aligned". +Extract Constant Asmblockgen.symbol_is_aligned => "C2C.atom_is_aligned". |