aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64
diff options
context:
space:
mode:
Diffstat (limited to 'aarch64')
-rw-r--r--aarch64/Archi.v4
-rw-r--r--aarch64/Asm.v589
-rw-r--r--aarch64/Asmblock.v1049
-rw-r--r--aarch64/Asmblockdeps.v2688
-rw-r--r--aarch64/Asmblockgen.v1252
-rw-r--r--aarch64/Asmblockgenproof.v1547
-rw-r--r--aarch64/Asmblockgenproof0.v885
-rw-r--r--aarch64/Asmblockgenproof1.v1955
-rw-r--r--aarch64/Asmblockprops.v119
-rw-r--r--aarch64/Asmexpand.ml126
-rw-r--r--aarch64/Asmgen.v1451
-rw-r--r--aarch64/Asmgenproof.v3033
-rw-r--r--aarch64/BTL_SEsimplify.v41
-rw-r--r--aarch64/CSE2deps.v35
-rw-r--r--aarch64/CSE2depsproof.v209
-rw-r--r--aarch64/ConstpropOpproof.v121
-rw-r--r--aarch64/DuplicateOpcodeHeuristic.ml41
-rw-r--r--aarch64/ExpansionOracle.ml15
-rw-r--r--aarch64/Machregs.v1
-rw-r--r--aarch64/Machregsaux.ml8
-rw-r--r--aarch64/Machregsaux.mli20
-rw-r--r--aarch64/Op.v256
-rw-r--r--aarch64/OpWeights.ml353
-rw-r--r--aarch64/OpWeightsAsm.ml165
-rw-r--r--aarch64/PeepholeOracle.ml624
-rw-r--r--aarch64/PostpassScheduling.v146
-rw-r--r--aarch64/PostpassSchedulingOracle.ml674
-rw-r--r--aarch64/PostpassSchedulingproof.v482
-rw-r--r--aarch64/PrepassSchedulingOracle.ml297
-rw-r--r--aarch64/PrepassSchedulingOracleDeps.ml17
-rw-r--r--aarch64/SelectLongproof.v33
-rw-r--r--aarch64/SelectOp.vp15
-rw-r--r--aarch64/SelectOpproof.v61
-rw-r--r--aarch64/TargetPrinter.ml108
-rw-r--r--aarch64/ValueAOp.v24
-rw-r--r--aarch64/extractionMachdep.v8
36 files changed, 16001 insertions, 2451 deletions
diff --git a/aarch64/Archi.v b/aarch64/Archi.v
index 4911db73..378ca0d1 100644
--- a/aarch64/Archi.v
+++ b/aarch64/Archi.v
@@ -88,6 +88,10 @@ Global Opaque ptr64 big_endian splitlong
(** Which ABI to implement *)
+Parameter pic_code: unit -> bool.
+
+Definition has_notrap_loads := false.
+
Inductive abi_kind: Type :=
| AAPCS64 (**r ARM's standard as used in Linux and other ELF platforms *)
| Apple. (**r the variant used in macOS and iOS *)
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index b5f4c838..e5111220 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -10,13 +10,20 @@
(* *)
(* *********************************************************************)
-(** Abstract syntax and semantics for AArch64 assembly language *)
+(** Abstract syntax and semantics for AArch64 assembly language
+
+W.r.t Asmblock, this is a "flat" syntax and semantics of "aarch64" assembly:
+ - without the basic block structure
+ - without the hierarchy of instructions.
+
+*)
Require Import Coqlib Zbits Maps.
Require Import AST Integers Floats.
Require Import Values Memory Events Globalenvs Smallstep.
Require Import Locations Conventions.
Require Stacklayout.
+Require Import OptionMonad.
(** * Abstract syntax *)
@@ -43,6 +50,9 @@ Coercion RR1: ireg >-> iregsp.
Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
Proof. decide equality. Defined.
+Lemma iregsp_eq: forall (x y: iregsp), {x=y} + {x<>y}.
+Proof. decide equality; apply ireg_eq. Defined.
+
(** In assembly files, [Dn] denotes the low 64-bit of a vector register,
and [Sn] the low 32 bits. *)
@@ -66,21 +76,29 @@ Inductive crbit: Type :=
Lemma crbit_eq: forall (x y: crbit), {x=y} + {x<>y}.
Proof. decide equality. Defined.
+Inductive dreg : Type :=
+ | IR: iregsp -> dreg (**r 64- or 32-bit integer registers *)
+ | FR: freg -> dreg. (**r double- or single-precision float registers *)
+
(** We model the following registers of the ARM architecture. *)
Inductive preg: Type :=
- | IR: ireg -> preg (**r 64- or 32-bit integer registers *)
- | FR: freg -> preg (**r double- or single-precision float registers *)
- | CR: crbit -> preg (**r bits in the condition register *)
- | SP: preg (**r register X31 used as stack pointer *)
- | PC: preg. (**r program counter *)
-
-Coercion IR: ireg >-> preg.
-Coercion FR: freg >-> preg.
+ | DR: dreg -> preg (** see dreg **)
+ | CR: crbit -> preg (**r bits in the condition register *)
+ | PC: preg. (**r program counter *)
+
+(* XXX: ireg no longer coerces to preg *)
+Coercion IR: iregsp >-> dreg.
+Coercion FR: freg >-> dreg.
+Coercion DR: dreg >-> preg.
+Definition SP:preg := XSP.
Coercion CR: crbit >-> preg.
+Lemma dreg_eq: forall (x y: dreg), {x=y} + {x<>y}.
+Proof. decide equality. apply iregsp_eq. apply freg_eq. Defined.
+
Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}.
-Proof. decide equality. apply ireg_eq. apply freg_eq. apply crbit_eq. Defined.
+Proof. decide equality. apply dreg_eq. apply crbit_eq. Defined.
Module PregEq.
Definition t := preg.
@@ -92,8 +110,6 @@ Module Pregmap := EMap(PregEq).
Definition preg_of_iregsp (r: iregsp) : preg :=
match r with RR1 r => IR r | XSP => SP end.
-Coercion preg_of_iregsp: iregsp >-> preg.
-
(** Conventional name for return address ([RA]) *)
Notation "'RA'" := X30 (only parsing) : asm.
@@ -177,14 +193,16 @@ Inductive instruction: Type :=
| Pldrsh (sz: isize) (rd: ireg) (a: addressing) (**r load int16, sign-extend *)
| Pldrzw (rd: ireg) (a: addressing) (**r load int32, zero-extend to int64 *)
| Pldrsw (rd: ireg) (a: addressing) (**r load int32, sign-extend to int64 *)
- | Pldp (rd1 rd2: ireg) (a: addressing) (**r load two int64 *)
+ | Pldpw (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int32 *)
+ | Pldpx (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *)
| Pstrw (rs: ireg) (a: addressing) (**r store int32 *)
| Pstrw_a (rs: ireg) (a: addressing) (**r store int32 as any32 *)
| Pstrx (rs: ireg) (a: addressing) (**r store int64 *)
| Pstrx_a (rs: ireg) (a: addressing) (**r store int64 as any64 *)
| Pstrb (rs: ireg) (a: addressing) (**r store int8 *)
| Pstrh (rs: ireg) (a: addressing) (**r store int16 *)
- | Pstp (rs1 rs2: ireg) (a: addressing) (**r store two int64 *)
+ | Pstpw (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int32 *)
+ | Pstpx (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
(** Integer arithmetic, immediate *)
| Paddimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r addition *)
| Psubimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r subtraction *)
@@ -255,9 +273,13 @@ Inductive instruction: Type :=
| Pldrs (rd: freg) (a: addressing) (**r load float32 (single precision) *)
| Pldrd (rd: freg) (a: addressing) (**r load float64 (double precision) *)
| Pldrd_a (rd: freg) (a: addressing) (**r load float64 as any64 *)
+ | Pldps (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two float32 *)
+ | Pldpd (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two float64 *)
| Pstrs (rs: freg) (a: addressing) (**r store float32 *)
| Pstrd (rs: freg) (a: addressing) (**r store float64 *)
| Pstrd_a (rs: freg) (a: addressing) (**r store float64 as any64 *)
+ | Pstps (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two float32 *)
+ | Pstpd (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two float64 *)
(** Floating-point move *)
| Pfmov (rd r1: freg)
| Pfmovimms (rd: freg) (f: float32) (**r load float32 constant *)
@@ -310,9 +332,30 @@ Definition code := list instruction.
Record function : Type := mkfunction { fn_sig: signature; fn_code: code }.
Definition fundef := AST.fundef function.
Definition program := AST.program fundef unit.
+Definition genv := Genv.t fundef unit.
+
+(** The two functions below axiomatize how the linker processes
+ symbolic references [symbol + offset]. It computes the
+ difference between the address and the PC, and splits it into:
+ - 12 low bits usable as an offset in an addressing mode;
+ - 21 high bits usable as argument to the ADRP instruction.
+
+ In CompCert's model, we cannot really describe PC-relative addressing,
+ but we can claim that the address of [symbol + offset] decomposes
+ as the sum of
+ - a low part, usable as an offset in an addressing mode;
+ - a high part, usable as argument to the ADRP instruction. *)
+
+Parameter symbol_low: genv -> ident -> ptrofs -> val.
+Parameter symbol_high: genv -> ident -> ptrofs -> val.
+
+Axiom symbol_high_low:
+ forall (ge: genv) (id: ident) (ofs: ptrofs),
+ Val.addl (symbol_high ge id ofs) (symbol_low ge id ofs) = Genv.symbol_address ge id ofs.
(** * Operational semantics *)
+
(** The semantics operates over a single mapping from registers
(type [preg]) to values. We maintain (but do not enforce)
the convention that integer registers are mapped to values of
@@ -320,22 +363,19 @@ Definition program := AST.program fundef unit.
and condition bits to either [Vzero] or [Vone]. *)
Definition regset := Pregmap.t val.
-Definition genv := Genv.t fundef unit.
(** The value of an [ireg0] is either the value of the integer register,
or 0. *)
-Definition ir0w (rs: regset) (r: ireg0) : val :=
- match r with RR0 r => rs (IR r) | XZR => Vint Int.zero end.
-Definition ir0x (rs: regset) (r: ireg0) : val :=
- match r with RR0 r => rs (IR r) | XZR => Vlong Int64.zero end.
+Definition ir0 (is:isize) (rs: regset) (r: ireg0) : val :=
+ match r with RR0 r => rs (IR r) | XZR => if is (* is W *) then Vint Int.zero else Vlong Int64.zero end.
(** Concise notations for accessing and updating the values of registers. *)
Notation "a # b" := (a b) (at level 1, only parsing) : asm.
Notation "a # b <- c" := (Pregmap.set b c a) (at level 1, b at next level) : asm.
-Notation "a ## b" := (ir0w a b) (at level 1, only parsing) : asm.
-Notation "a ### b" := (ir0x a b) (at level 1, only parsing) : asm.
+Notation "a ## b" := (ir0 W a b) (at level 1, only parsing) : asm.
+Notation "a ### b" := (ir0 X a b) (at level 1, only parsing) : asm.
Open Scope asm.
@@ -369,84 +409,16 @@ Fixpoint set_res (res: builtin_res preg) (v: val) (rs: regset) : regset :=
| BR_splitlong hi lo => set_res lo (Val.loword v) (set_res hi (Val.hiword v) rs)
end.
-(** The two functions below axiomatize how the linker processes
- symbolic references [symbol + offset]. It computes the
- difference between the address and the PC, and splits it into:
- - 12 low bits usable as an offset in an addressing mode;
- - 21 high bits usable as argument to the ADRP instruction.
-
- In CompCert's model, we cannot really describe PC-relative addressing,
- but we can claim that the address of [symbol + offset] decomposes
- as the sum of
- - a low part, usable as an offset in an addressing mode;
- - a high part, usable as argument to the ADRP instruction. *)
-
-Parameter symbol_low: genv -> ident -> ptrofs -> val.
-Parameter symbol_high: genv -> ident -> ptrofs -> val.
-
-Axiom symbol_high_low:
- forall (ge: genv) (id: ident) (ofs: ptrofs),
- Val.addl (symbol_high ge id ofs) (symbol_low ge id ofs) = Genv.symbol_address ge id ofs.
-
-Section RELSEM.
-
-Variable ge: genv.
-
-(** Looking up instructions in a code sequence by position. *)
-
-Fixpoint find_instr (pos: Z) (c: code) {struct c} : option instruction :=
- match c with
- | nil => None
- | i :: il => if zeq pos 0 then Some i else find_instr (pos - 1) il
- end.
-
-(** Position corresponding to a label *)
-
-Definition is_label (lbl: label) (instr: instruction) : bool :=
- match instr with
- | Plabel lbl' => if peq lbl lbl' then true else false
- | _ => false
- end.
-
-Lemma is_label_correct:
- forall lbl instr,
- if is_label lbl instr then instr = Plabel lbl else instr <> Plabel lbl.
-Proof.
- intros. destruct instr; simpl; try discriminate. destruct (peq lbl lbl0); congruence.
-Qed.
-
-Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z :=
- match c with
- | nil => None
- | instr :: c' =>
- if is_label lbl instr then Some (pos + 1) else label_pos lbl (pos + 1) c'
- end.
-
(** The semantics is purely small-step and defined as a function
from the current state (a register set + a memory state)
to either [Next rs' m'] where [rs'] and [m'] are the updated register
set and memory state after execution of the instruction at [rs#PC],
or [Stuck] if the processor is stuck. *)
-Inductive outcome: Type :=
- | Next: regset -> mem -> outcome
- | Stuck: outcome.
-
-(** Manipulations over the [PC] register: continuing with the next
- instruction ([nextinstr]) or branching to a label ([goto_label]). *)
-
-Definition nextinstr (rs: regset) :=
- rs#PC <- (Val.offset_ptr rs#PC Ptrofs.one).
-
-Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) :=
- match label_pos lbl 0 (fn_code f) with
- | None => Stuck
- | Some pos =>
- match rs#PC with
- | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m
- | _ => Stuck
- end
- end.
+Record state: Type := State { _rs: regset; _m: mem }.
+Definition outcome := option state.
+Definition Next rs m: outcome := Some (State rs m).
+Definition Stuck: outcome := None.
(** Testing a condition *)
@@ -516,10 +488,10 @@ Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
(** Integer "is zero?" test *)
-Definition eval_testzero (sz: isize) (v: val) (m: mem): option bool :=
+Definition eval_testzero (sz: isize) (v: val): option bool :=
match sz with
- | W => Val.cmpu_bool (Mem.valid_pointer m) Ceq v (Vint Int.zero)
- | X => Val.cmplu_bool (Mem.valid_pointer m) Ceq v (Vlong Int64.zero)
+ | W => Val.mxcmpu_bool Ceq v (Vint Int.zero)
+ | X => Val.mxcmplu_bool Ceq v (Vlong Int64.zero)
end.
(** Integer "bit is set?" test *)
@@ -530,48 +502,19 @@ Definition eval_testbit (sz: isize) (v: val) (n: int): option bool :=
| X => Val.cmpl_bool Cne (Val.andl v (Vlong (Int64.shl' Int64.one n))) (Vlong Int64.zero)
end.
-(** Evaluating an addressing mode *)
-
-Definition eval_addressing (a: addressing) (rs: regset): val :=
- match a with
- | ADimm base n => Val.addl rs#base (Vlong n)
- | ADreg base r => Val.addl rs#base rs#r
- | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n))
- | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n))
- | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n))
- | ADadr base id ofs => Val.addl rs#base (symbol_low ge id ofs)
- | ADpostincr base n => Vundef (* not modeled yet *)
- end.
-
-(** Auxiliaries for memory accesses *)
-
-Definition exec_load (chunk: memory_chunk) (transf: val -> val)
- (a: addressing) (r: preg) (rs: regset) (m: mem) :=
- match Mem.loadv chunk m (eval_addressing a rs) with
- | None => Stuck
- | Some v => Next (nextinstr (rs#r <- (transf v))) m
- end.
-
-Definition exec_store (chunk: memory_chunk)
- (a: addressing) (v: val)
- (rs: regset) (m: mem) :=
- match Mem.storev chunk m (eval_addressing a rs) v with
- | None => Stuck
- | Some m' => Next (nextinstr rs) m'
- end.
(** Comparisons *)
-Definition compare_int (rs: regset) (v1 v2: val) (m: mem) :=
+Definition compare_int (rs: regset) (v1 v2: val) :=
rs#CN <- (Val.negative (Val.sub v1 v2))
- #CZ <- (Val.cmpu (Mem.valid_pointer m) Ceq v1 v2)
- #CC <- (Val.cmpu (Mem.valid_pointer m) Cge v1 v2)
+ #CZ <- (Val.mxcmpu Ceq v1 v2)
+ #CC <- (Val.mxcmpu Cge v1 v2)
#CV <- (Val.sub_overflow v1 v2).
-Definition compare_long (rs: regset) (v1 v2: val) (m: mem) :=
+Definition compare_long (rs: regset) (v1 v2: val) :=
rs#CN <- (Val.negativel (Val.subl v1 v2))
- #CZ <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Ceq v1 v2))
- #CC <- (Val.maketotal (Val.cmplu (Mem.valid_pointer m) Cge v1 v2))
+ #CZ <- (Val.mxcmplu Ceq v1 v2)
+ #CC <- (Val.mxcmplu Cge v1 v2)
#CV <- (Val.subl_overflow v1 v2).
(** Semantics of [fcmp] instructions:
@@ -672,19 +615,230 @@ Definition float64_of_bits (v: val): val :=
| _ => Vundef
end.
-(** Execution of a single instruction [i] in initial state
- [rs] and [m]. Return updated state. For instructions
- that correspond to actual AArch64 instructions, the cases are
- straightforward transliterations of the informal descriptions
- given in the ARMv8 reference manuals. For pseudo-instructions,
- refer to the informal descriptions given above.
-
- Note that we set to [Vundef] the registers used as temporaries by
- the expansions of the pseudo-instructions, so that the code we
- generate cannot use those registers to hold values that must
- survive the execution of the pseudo-instruction.
+(* Recognition of FP numbers that are supported by the fmov #imm instructions:
+ "a normalized binary floating point encoding with 1 sign bit,
+ 4 bits of fraction and a 3-bit exponent"
*)
+Definition is_immediate_float64 (f: float): bool :=
+ let bits := Float.to_bits f in
+ let exp :=
+ Int64.sub
+ (Int64.and (Int64.shr' bits (Int.repr 52))
+ (Int64.repr 2047)) (Int64.repr 1023) in
+ let mant :=
+ Int64.and bits (Int64.repr 4503599627370495) in
+ andb (Int64.cmp Cge exp (Int64.repr (-3)))
+ (andb (Int64.cmp Cle exp (Int64.repr 4))
+ (Int64.eq
+ (Int64.and mant
+ (Int64.repr 4222124650659840)) mant)).
+
+Definition is_immediate_float32 (f: float32): bool :=
+ let bits := Float32.to_bits f in
+ let exp :=
+ Int.sub
+ (Int.and (Int.shr bits (Int.repr 23))
+ (Int.repr 255)) (Int.repr 127) in
+ let mant :=
+ Int.and bits (Int.repr 8388607) in
+ andb (Int.cmp Cge exp (Int.repr (-3)))
+ (andb (Int.cmp Cle exp (Int.repr 4))
+ (Int.eq
+ (Int.and mant
+ (Int.repr 7864320)) mant)).
+
+(** Translation of the LTL/Linear/Mach view of machine registers
+ to the AArch64 view. Note that no LTL register maps to [X16],
+ [X18], nor [X30].
+ [X18] is reserved as the platform register and never used by the
+ code generated by CompCert.
+ [X30] is used for the return address, and can also be used as temporary.
+ [X16] can be used as temporary. *)
+
+Definition dreg_of (r: mreg) : dreg :=
+ match r with
+ | R0 => X0 | R1 => X1 | R2 => X2 | R3 => X3
+ | R4 => X4 | R5 => X5 | R6 => X6 | R7 => X7
+ | R8 => X8 | R9 => X9 | R10 => X10 | R11 => X11
+ | R12 => X12 | R13 => X13 | R14 => X14 | R15 => X15
+ | R17 => X17 | R19 => X19
+ | R20 => X20 | R21 => X21 | R22 => X22 | R23 => X23
+ | R24 => X24 | R25 => X25 | R26 => X26 | R27 => X27
+ | R28 => X28 | R29 => X29
+ | F0 => D0 | F1 => D1 | F2 => D2 | F3 => D3
+ | F4 => D4 | F5 => D5 | F6 => D6 | F7 => D7
+ | F8 => D8 | F9 => D9 | F10 => D10 | F11 => D11
+ | F12 => D12 | F13 => D13 | F14 => D14 | F15 => D15
+ | F16 => D16 | F17 => D17 | F18 => D18 | F19 => D19
+ | F20 => D20 | F21 => D21 | F22 => D22 | F23 => D23
+ | F24 => D24 | F25 => D25 | F26 => D26 | F27 => D27
+ | F28 => D28 | F29 => D29 | F30 => D30 | F31 => D31
+ end.
+
+Definition preg_of (r: mreg) : preg :=
+ dreg_of r.
+
+(** Undefine all registers except SP and callee-save registers *)
+
+Definition undef_caller_save_regs (rs: regset) : regset :=
+ fun r =>
+ if preg_eq r SP
+ || In_dec preg_eq r (List.map preg_of (List.filter is_callee_save all_mregs))
+ then rs r
+ else Vundef.
+
+(** Extract the values of the arguments of an external call.
+ We exploit the calling conventions from module [Conventions], except that
+ we use AArch64 registers instead of locations. *)
+
+Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
+ | extcall_arg_reg: forall r,
+ extcall_arg rs m (R r) (rs (preg_of r))
+ | extcall_arg_stack: forall ofs ty bofs v,
+ bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
+ Mem.loadv (chunk_of_type ty) m
+ (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v ->
+ extcall_arg rs m (Locations.S Outgoing ofs ty) v.
+
+Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop :=
+ | extcall_arg_one: forall l v,
+ extcall_arg rs m l v ->
+ extcall_arg_pair rs m (One l) v
+ | extcall_arg_twolong: forall hi lo vhi vlo,
+ extcall_arg rs m hi vhi ->
+ extcall_arg rs m lo vlo ->
+ extcall_arg_pair rs m (Twolong hi lo) (Val.longofwords vhi vlo).
+
+Definition extcall_arguments
+ (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
+ list_forall2 (extcall_arg_pair rs m) (loc_arguments sg) args.
+
+Definition loc_external_result (sg: signature) : rpair preg :=
+ map_rpair preg_of (loc_result sg).
+
+(** Looking up instructions in a code sequence by position. *)
+
+Fixpoint find_instr (pos: Z) (c: code) {struct c} : option instruction :=
+ match c with
+ | nil => None
+ | i :: il => if zeq pos 0 then Some i else find_instr (pos - 1) il
+ end.
+
+(** Position corresponding to a label *)
+
+Definition is_label (lbl: label) (instr: instruction) : bool :=
+ match instr with
+ | Plabel lbl' => if peq lbl lbl' then true else false
+ | _ => false
+ end.
+
+Lemma is_label_correct:
+ forall lbl instr,
+ if is_label lbl instr then instr = Plabel lbl else instr <> Plabel lbl.
+Proof.
+ intros. destruct instr; simpl; try discriminate. destruct (peq lbl lbl0); congruence.
+Qed.
+
+Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z :=
+ match c with
+ | nil => None
+ | instr :: c' =>
+ if is_label lbl instr then Some pos else label_pos lbl (pos + 1) c'
+ end.
+
+Definition nextinstr (rs: regset) :=
+ rs#PC <- (Val.offset_ptr rs#PC Ptrofs.one).
+
+Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) :=
+ match label_pos lbl 0 (fn_code f) with
+ | None => Stuck
+ | Some pos =>
+ match rs#PC with
+ | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m
+ | _ => Stuck
+ end
+ end.
+
+Section RELSEM.
+
+Variable ge: genv.
+
+(** Evaluating an addressing mode *)
+
+Definition eval_addressing (a: addressing) (rs: regset): val :=
+ match a with
+ | ADimm base n => Val.addl rs#base (Vlong n)
+ | ADreg base r => Val.addl rs#base rs#r
+ | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n))
+ | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n))
+ | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n))
+ | ADadr base id ofs => Val.addl rs#base (symbol_low ge id ofs)
+ | ADpostincr base n => Vundef
+ end.
+
+Definition is_pair_addressing_mode_correct (a: addressing): bool :=
+ match a with
+ | ADimm _ _ => true
+ | _ => false
+ end.
+
+Definition get_offset_addr (a: addressing) (ofs: Z) : addressing :=
+ match a with
+ | ADimm base n => (ADimm base (Int64.add n (Int64.repr ofs)))
+ | _ => a
+ end.
+
+(** Auxiliaries for memory accesses *)
+
+Definition exec_load (chunk: memory_chunk) (transf: val -> val)
+ (a: addressing) (r: preg) (rs: regset) (m: mem) :=
+ match Mem.loadv chunk m (eval_addressing a rs) with
+ | None => Stuck
+ | Some v => Next (nextinstr (rs#r <- (transf v))) m
+ end.
+
+Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
+ (a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) :=
+ if is_pair_addressing_mode_correct a then
+ let addr := (eval_addressing a rs) in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
+ let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
+ match Mem.loadv chk1 m addr with
+ | None => Stuck
+ | Some v1 =>
+ match Mem.loadv chk2 m addr' with
+ | None => Stuck
+ | Some v2 =>
+ Next (nextinstr ((rs#rd1 <- (transf v1))#rd2 <- (transf v2))) m
+ end
+ end
+ else Stuck.
+
+Definition exec_store (chunk: memory_chunk)
+ (a: addressing) (v: val)
+ (rs: regset) (m: mem) :=
+ match Mem.storev chunk m (eval_addressing a rs) v with
+ | None => Stuck
+ | Some m' => Next (nextinstr rs) m'
+ end.
+
+Definition exec_store_double (chk1 chk2: memory_chunk)
+ (a: addressing) (v1 v2: val)
+ (rs: regset) (m: mem) :=
+ if is_pair_addressing_mode_correct a then
+ let addr := (eval_addressing a rs) in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
+ let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
+ match Mem.storev chk1 m addr v1 with
+ | None => Stuck
+ | Some m' => match Mem.storev chk2 m' addr' v2 with
+ | None => Stuck
+ | Some m'' => Next (nextinstr rs) m''
+ end
+ end
+ else Stuck.
+
Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : outcome :=
match i with
(** Branches *)
@@ -707,13 +861,13 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Pret r =>
Next (rs#PC <- (rs#r)) m
| Pcbnz sz r lbl =>
- match eval_testzero sz rs#r m with
+ match eval_testzero sz rs#r with
| Some true => Next (nextinstr rs) m
| Some false => goto_label f lbl rs m
| None => Stuck
end
| Pcbz sz r lbl =>
- match eval_testzero sz rs#r m with
+ match eval_testzero sz rs#r with
| Some true => goto_label f lbl rs m
| Some false => Next (nextinstr rs) m
| None => Stuck
@@ -781,13 +935,13 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Psubimm X rd r1 n =>
Next (nextinstr (rs#rd <- (Val.subl rs#r1 (Vlong (Int64.repr n))))) m
| Pcmpimm W r1 n =>
- Next (nextinstr (compare_int rs rs#r1 (Vint (Int.repr n)) m)) m
+ Next (nextinstr (compare_int rs rs#r1 (Vint (Int.repr n)))) m
| Pcmpimm X r1 n =>
- Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.repr n)) m)) m
+ Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.repr n)))) m
| Pcmnimm W r1 n =>
- Next (nextinstr (compare_int rs rs#r1 (Vint (Int.neg (Int.repr n))) m)) m
+ Next (nextinstr (compare_int rs rs#r1 (Vint (Int.neg (Int.repr n))))) m
| Pcmnimm X r1 n =>
- Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.neg (Int64.repr n))) m)) m
+ Next (nextinstr (compare_long rs rs#r1 (Vlong (Int64.neg (Int64.repr n))))) m
(** Move integer register *)
| Pmov rd r1 =>
Next (nextinstr (rs#rd <- (rs#r1))) m
@@ -805,9 +959,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Porrimm X rd r1 n =>
Next (nextinstr (rs#rd <- (Val.orl rs###r1 (Vlong (Int64.repr n))))) m
| Ptstimm W r1 n =>
- Next (nextinstr (compare_int rs (Val.and rs#r1 (Vint (Int.repr n))) (Vint Int.zero) m)) m
+ Next (nextinstr (compare_int rs (Val.and rs#r1 (Vint (Int.repr n))) (Vint Int.zero))) m
| Ptstimm X r1 n =>
- Next (nextinstr (compare_long rs (Val.andl rs#r1 (Vlong (Int64.repr n))) (Vlong Int64.zero) m)) m
+ Next (nextinstr (compare_long rs (Val.andl rs#r1 (Vlong (Int64.repr n))) (Vlong Int64.zero))) m
(** Move wide immediate *)
| Pmovz W rd n pos =>
Next (nextinstr (rs#rd <- (Vint (Int.repr (Z.shiftl n pos))))) m
@@ -853,22 +1007,22 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Psub X rd r1 r2 s =>
Next (nextinstr (rs#rd <- (Val.subl rs###r1 (eval_shift_op_long rs#r2 s)))) m
| Pcmp W r1 r2 s =>
- Next (nextinstr (compare_int rs rs##r1 (eval_shift_op_int rs#r2 s) m)) m
+ Next (nextinstr (compare_int rs rs##r1 (eval_shift_op_int rs#r2 s))) m
| Pcmp X r1 r2 s =>
- Next (nextinstr (compare_long rs rs###r1 (eval_shift_op_long rs#r2 s) m)) m
+ Next (nextinstr (compare_long rs rs###r1 (eval_shift_op_long rs#r2 s))) m
| Pcmn W r1 r2 s =>
- Next (nextinstr (compare_int rs rs##r1 (Val.neg (eval_shift_op_int rs#r2 s)) m)) m
+ Next (nextinstr (compare_int rs rs##r1 (Val.neg (eval_shift_op_int rs#r2 s)))) m
| Pcmn X r1 r2 s =>
- Next (nextinstr (compare_long rs rs###r1 (Val.negl (eval_shift_op_long rs#r2 s)) m)) m
+ Next (nextinstr (compare_long rs rs###r1 (Val.negl (eval_shift_op_long rs#r2 s)))) m
(** Integer arithmetic, extending register *)
| Paddext rd r1 r2 x =>
Next (nextinstr (rs#rd <- (Val.addl rs#r1 (eval_extend rs#r2 x)))) m
| Psubext rd r1 r2 x =>
Next (nextinstr (rs#rd <- (Val.subl rs#r1 (eval_extend rs#r2 x)))) m
| Pcmpext r1 r2 x =>
- Next (nextinstr (compare_long rs rs#r1 (eval_extend rs#r2 x) m)) m
+ Next (nextinstr (compare_long rs rs#r1 (eval_extend rs#r2 x))) m
| Pcmnext r1 r2 x =>
- Next (nextinstr (compare_long rs rs#r1 (Val.negl (eval_extend rs#r2 x)) m)) m
+ Next (nextinstr (compare_long rs rs#r1 (Val.negl (eval_extend rs#r2 x)))) m
(** Logical, shifted register *)
| Pand W rd r1 r2 s =>
Next (nextinstr (rs#rd <- (Val.and rs##r1 (eval_shift_op_int rs#r2 s)))) m
@@ -895,9 +1049,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Porn X rd r1 r2 s =>
Next (nextinstr (rs#rd <- (Val.orl rs###r1 (Val.notl (eval_shift_op_long rs#r2 s))))) m
| Ptst W r1 r2 s =>
- Next (nextinstr (compare_int rs (Val.and rs##r1 (eval_shift_op_int rs#r2 s)) (Vint Int.zero) m)) m
+ Next (nextinstr (compare_int rs (Val.and rs##r1 (eval_shift_op_int rs#r2 s)) (Vint Int.zero))) m
| Ptst X r1 r2 s =>
- Next (nextinstr (compare_long rs (Val.andl rs###r1 (eval_shift_op_long rs#r2 s)) (Vlong Int64.zero) m)) m
+ Next (nextinstr (compare_long rs (Val.andl rs###r1 (eval_shift_op_long rs#r2 s)) (Vlong Int64.zero))) m
(** Variable shifts *)
| Pasrv W rd r1 r2 =>
Next (nextinstr (rs#rd <- (Val.shr rs#r1 rs#r2))) m
@@ -969,10 +1123,16 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
(** Floating-point move *)
| Pfmov rd r1 =>
Next (nextinstr (rs#rd <- (rs#r1))) m
- | Pfmovimms rd f =>
- Next (nextinstr (rs#X16 <- Vundef #rd <- (Vsingle f))) m
+ | Pfmovimms rd f =>
+ if is_immediate_float32 f then
+ Next (nextinstr (rs#rd <- (Vsingle f))) m
+ else
+ Next (nextinstr ((rs#rd <- (Vsingle f))#X16 <- Vundef)) m
| Pfmovimmd rd f =>
- Next (nextinstr (rs#X16 <- Vundef #rd <- (Vfloat f))) m
+ if is_immediate_float64 f then
+ Next (nextinstr (rs#rd <- (Vfloat f))) m
+ else
+ Next (nextinstr ((rs#rd <- (Vfloat f))#X16 <- Vundef)) m
| Pfmovi S rd r1 =>
Next (nextinstr (rs#rd <- (float32_of_bits rs##r1))) m
| Pfmovi D rd r1 =>
@@ -1102,10 +1262,27 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| _ => Stuck
end
| Pbuiltin ef args res => Stuck (**r treated specially below *)
+ (** loads and stores pairs int/int64 *)
+ | Pldpw rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ | Pldpx rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ | Pstpw rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ | Pstpx rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ (** loads and stores pairs floating-point *)
+ | Pldps rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ | Pldpd rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ | Pstps rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ | Pstpd rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ | Pnop => Next (nextinstr rs) m
(** The following instructions and directives are not generated directly
by Asmgen, so we do not model them. *)
- | Pldp _ _ _
- | Pstp _ _ _
| Pcls _ _ _
| Pclz _ _ _
| Prev _ _ _
@@ -1118,86 +1295,14 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Pfnmsub _ _ _ _ _
| Pfmax _ _ _ _
| Pfmin _ _ _ _
- | Pnop
| Pcfi_adjust _
| Pcfi_rel_offset _ =>
Stuck
end.
-(** Translation of the LTL/Linear/Mach view of machine registers
- to the AArch64 view. Note that no LTL register maps to [X16],
- [X18], nor [X30].
- [X18] is reserved as the platform register and never used by the
- code generated by CompCert.
- [X30] is used for the return address, and can also be used as temporary.
- [X16] can be used as temporary. *)
-
-Definition preg_of (r: mreg) : preg :=
- match r with
- | R0 => X0 | R1 => X1 | R2 => X2 | R3 => X3
- | R4 => X4 | R5 => X5 | R6 => X6 | R7 => X7
- | R8 => X8 | R9 => X9 | R10 => X10 | R11 => X11
- | R12 => X12 | R13 => X13 | R14 => X14 | R15 => X15
- | R17 => X17 | R19 => X19
- | R20 => X20 | R21 => X21 | R22 => X22 | R23 => X23
- | R24 => X24 | R25 => X25 | R26 => X26 | R27 => X27
- | R28 => X28 | R29 => X29
- | F0 => D0 | F1 => D1 | F2 => D2 | F3 => D3
- | F4 => D4 | F5 => D5 | F6 => D6 | F7 => D7
- | F8 => D8 | F9 => D9 | F10 => D10 | F11 => D11
- | F12 => D12 | F13 => D13 | F14 => D14 | F15 => D15
- | F16 => D16 | F17 => D17 | F18 => D18 | F19 => D19
- | F20 => D20 | F21 => D21 | F22 => D22 | F23 => D23
- | F24 => D24 | F25 => D25 | F26 => D26 | F27 => D27
- | F28 => D28 | F29 => D29 | F30 => D30 | F31 => D31
- end.
-
-(** Undefine all registers except SP and callee-save registers *)
-
-Definition undef_caller_save_regs (rs: regset) : regset :=
- fun r =>
- if preg_eq r SP
- || In_dec preg_eq r (List.map preg_of (List.filter is_callee_save all_mregs))
- then rs r
- else Vundef.
-
-(** Extract the values of the arguments of an external call.
- We exploit the calling conventions from module [Conventions], except that
- we use AArch64 registers instead of locations. *)
-
-Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
- | extcall_arg_reg: forall r,
- extcall_arg rs m (R r) (rs (preg_of r))
- | extcall_arg_stack: forall ofs ty bofs v,
- bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
- Mem.loadv (chunk_of_type ty) m
- (Val.offset_ptr rs#SP (Ptrofs.repr bofs)) = Some v ->
- extcall_arg rs m (Locations.S Outgoing ofs ty) v.
-
-Inductive extcall_arg_pair (rs: regset) (m: mem): rpair loc -> val -> Prop :=
- | extcall_arg_one: forall l v,
- extcall_arg rs m l v ->
- extcall_arg_pair rs m (One l) v
- | extcall_arg_twolong: forall hi lo vhi vlo,
- extcall_arg rs m hi vhi ->
- extcall_arg rs m lo vlo ->
- extcall_arg_pair rs m (Twolong hi lo) (Val.longofwords vhi vlo).
-
-Definition extcall_arguments
- (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
- list_forall2 (extcall_arg_pair rs m) (loc_arguments sg) args.
-
-Definition loc_external_result (sg: signature) : rpair preg :=
- map_rpair preg_of (loc_result sg).
-
-(** Execution of the instruction at [rs#PC]. *)
-
-Inductive state: Type :=
- | State: regset -> mem -> state.
-
Inductive step: state -> trace -> state -> Prop :=
| exec_step_internal:
- forall b ofs f i rs m rs' m',
+ forall b ofs (f:function) i rs m rs' m',
rs PC = Vptr b ofs ->
Genv.find_funct_ptr ge b = Some (Internal f) ->
find_instr (Ptrofs.unsigned ofs) f.(fn_code) = Some i ->
@@ -1212,7 +1317,7 @@ Inductive step: state -> trace -> state -> Prop :=
external_call ef ge vargs m t vres m' ->
rs' = nextinstr
(set_res res vres
- (undef_regs (IR X16 :: IR X30 :: map preg_of (destroyed_by_builtin ef)) rs)) ->
+ (undef_regs (DR X16 :: DR X30 :: map preg_of (destroyed_by_builtin ef)) rs)) ->
step (State rs m) t (State rs' m')
| exec_step_external:
forall b ef args res rs m t rs' m',
@@ -1308,11 +1413,11 @@ Qed.
Definition data_preg (r: preg) : bool :=
match r with
- | IR X16 => false
- | IR X30 => false
- | IR _ => true
- | FR _ => true
+ | DR (IR X16) => false
+ | DR (IR X30) => false
+ | DR (IR _) => true
+ | DR (FR _) => true
| CR _ => false
- | SP => true
+ (* | SP => true; subsumed by IR (iregsp) *)
| PC => false
end.
diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v
new file mode 100644
index 00000000..073f1f4c
--- /dev/null
+++ b/aarch64/Asmblock.v
@@ -0,0 +1,1049 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Justus Fasse UGA, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+
+(* Asmblock language for aarch64
+
+WORK IN PROGRESS: we want to define an Asmblock syntax, with an Asmblock semantics
+(e.g. we don't need the parallel semantics of Asmvliw)
+
+
+NOTE: this file is inspired from
+ - aarch64/Asm.v
+ - kvx/Asmvliw.v (only the Asmblock syntax)
+ - kvx/Asmblock.v
+*)
+
+
+(** Abstract syntax and semantics for AArch64 assembly language *)
+
+Require Import Coqlib Zbits Maps.
+Require Import AST Integers Floats.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Locations Conventions.
+Require Stacklayout.
+Require Import OptionMonad Asm.
+Require Import Lia.
+Require Export Asm.
+
+Local Open Scope option_monad_scope.
+
+Notation regset := Asm.regset.
+
+(** * Abstract syntax *)
+
+(* First task: splitting the big [instruction] type below into CFI and basic instructions.
+ Actually a finer splitting in order to regroup "similar" instructions could be much better for automation of the scheduler proof!
+ e.g. "similar" means identical "interface" w.r.t. pseudo-registers when translated to AbstractBB,
+ or with a "similar" semantics.
+
+ see example of loads below.
+*)
+
+(** Control Flow instructions
+
+*)
+Inductive cf_instruction : Type :=
+ | Pb (lbl: label) (**r branch *)
+ | Pbc (c: testcond) (lbl: label) (**r conditional branch *)
+ | Pbl (id: ident) (sg: signature) (**r jump to function and link *)
+ | Pbs (id: ident) (sg: signature) (**r jump to function *)
+ | Pblr (r: ireg) (sg: signature) (**r indirect jump and link *)
+ | Pbr (r: ireg) (sg: signature) (**r indirect jump *)
+ | Pret (r: ireg) (**r return *)
+ | Pcbnz (sz: isize) (r: ireg) (lbl: label) (**r branch if not zero *)
+ | Pcbz (sz: isize) (r: ireg) (lbl: label) (**r branch if zero *)
+ | Ptbnz (sz: isize) (r: ireg) (n: int) (lbl: label) (**r branch if bit n is not zero *)
+ | Ptbz (sz: isize) (r: ireg) (n: int) (lbl: label) (**r branch if bit n is zero *)
+ (** Pseudo-instructions *)
+ | Pbtbl (r1: ireg) (tbl: list label) (**r N-way branch through a jump table *)
+ .
+
+(*
+A builtin is considered as a control-flow instruction, because it could emit a trace (cf. Machblock semantics).
+Here, we do not need to have builtins alone in basic-blocks (on the contrary to KVX bundles).
+*)
+
+Inductive control: Type :=
+ | PCtlFlow (i: cf_instruction)
+ (** Pseudo-instructions *)
+ | Pbuiltin (ef: external_function)
+ (args: list (builtin_arg dreg)) (res: builtin_res dreg) (**r built-in function (pseudo) *)
+ .
+
+(** Basic instructions *)
+
+(* Loads waiting for (rd: dreg) (a: addressing)
+ * XXX Use dreg because exec_load is defined in terms of it, thus allowing us to
+ * treat integer and floating point loads the same. *)
+Inductive load_rd_a: Type :=
+ (* Integer load *)
+ | Pldrw (**r load int32 *)
+ | Pldrw_a (**r load int32 as any32 *)
+ | Pldrx (**r load int64 *)
+ | Pldrx_a (**r load int64 as any64 *)
+ | Pldrb (sz: isize) (**r load int8, zero-extend *)
+ | Pldrsb (sz: isize) (**r load int8, sign-extend *)
+ | Pldrh (sz: isize) (**r load int16, zero-extend *)
+ | Pldrsh (sz: isize) (**r load int16, sign-extend *)
+ | Pldrzw (**r load int32, zero-extend to int64 *)
+ | Pldrsw (**r load int32, sign-extend to int64 *)
+ (* Floating-point load *)
+ | Pldrs (**r load float32 (single precision) *)
+ | Pldrd (**r load float64 (double precision) *)
+ | Pldrd_a (**r load float64 as any64 *)
+ .
+
+Inductive load_rd1_rd2_a: Type :=
+ | Pldpw
+ | Pldpx
+ | Pldps
+ | Pldpd
+ .
+
+Inductive ld_instruction: Type :=
+ | PLd_rd_a (ld: load_rd_a) (rd: dreg) (a: addressing)
+ | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: dreg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *)
+ .
+
+Inductive store_rs_a : Type :=
+ (* Integer store *)
+ | Pstrw (**r store int32 *)
+ | Pstrw_a (**r store int32 as any32 *)
+ | Pstrx (**r store int64 *)
+ | Pstrx_a (**r store int64 as any64 *)
+ | Pstrb (**r store int8 *)
+ | Pstrh (**r store int16 *)
+ (* Floating-point store *)
+ | Pstrs (**r store float32 *)
+ | Pstrd (**r store float64 *)
+ | Pstrd_a (**r store float64 as any64 *)
+ .
+
+Inductive store_rs1_rs2_a : Type :=
+ | Pstpw
+ | Pstpx
+ | Pstps
+ | Pstpd
+ .
+
+Inductive st_instruction : Type :=
+ | PSt_rs_a (st: store_rs_a) (rs: dreg) (a: addressing)
+ | Pstp (st: store_rs1_rs2_a) (rs1 rs2: dreg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
+ .
+
+Inductive arith_p : Type :=
+ (** PC-relative addressing *)
+ | Padrp (id: ident) (ofs: ptrofs) (**r set [rd] to high address of [id + ofs] *)
+ (** Move wide immediate *)
+ | Pmovz (sz: isize) (n: Z) (pos: Z) (**r move [n << pos] to [rd] *)
+ | Pmovn (sz: isize) (n: Z) (pos: Z) (**r move [NOT(n << pos)] to [rd] *)
+ (** Floating-point move *)
+ | Pfmovimms (f: float32) (**r load float32 constant *)
+ | Pfmovimmd (f: float) (**r load float64 constant *)
+.
+
+Inductive arith_comparison_p : Type :=
+ (** Floating-point comparison *)
+ | Pfcmp0 (sz: fsize) (**r compare [r1] and [+0.0] *)
+ (** Integer arithmetic, immediate *)
+ | Pcmpimm (sz: isize) (n: Z) (**r compare *)
+ | Pcmnimm (sz: isize) (n: Z) (**r compare negative *)
+ (** Logical, immediate *)
+ | Ptstimm (sz: isize) (n: Z) (**r and, then set flags *)
+.
+
+Inductive arith_pp : Type :=
+ (** Move integer register *)
+ | Pmov
+ (** Move wide immediate *)
+ (* XXX: has to have the same register supplied both times *)
+ | Pmovk (sz: isize) (n: Z) (pos: Z) (**r insert 16 bits of [n] at [pos] in rd *)
+ (** PC-relative addressing *)
+ | Paddadr (id: ident) (ofs: ptrofs) (**r add the low address of [id + ofs] *)
+ (** Bit-field operations *)
+ | Psbfiz (sz: isize) (r: int) (s: Z) (**r sign extend and shift left *)
+ | Psbfx (sz: isize) (r: int) (s: Z) (**r shift right and sign extend *)
+ | Pubfiz (sz: isize) (r: int) (s: Z) (**r zero extend and shift left *)
+ | Pubfx (sz: isize) (r: int) (s: Z) (**r shift right and zero extend *)
+(* Bit operations are not used in the aarch64/asm semantics
+ (** Bit operations *)
+ | Pcls (sz: isize) (**r count leading sign bits *)
+ | Pclz (sz: isize) (**r count leading zero bits *)
+ | Prev (sz: isize) (**r reverse bytes *)
+ | Prev16 (sz: isize) (**r reverse bytes in each 16-bit word *)
+*)
+ (** Floating-point move *)
+ | Pfmov
+ (** Floating-point conversions *)
+ | Pfcvtds (**r convert float32 to float64 *)
+ | Pfcvtsd (**r convert float64 to float32 *)
+ (** Floating-point arithmetic *)
+ | Pfabs (sz: fsize) (**r absolute value *)
+ | Pfneg (sz: fsize) (**r negation *)
+ (* Pfsqrt is not used in the semantics of aarch64/asm
+ | Pfsqrt (sz: fsize) (**r square root *) *)
+ (** Floating-point conversions *)
+ | Pscvtf (fsz: fsize) (isz: isize) (**r convert signed int to float *)
+ | Pucvtf (fsz: fsize) (isz: isize) (**r convert unsigned int to float *)
+ | Pfcvtzs (isz: isize) (fsz: fsize) (**r convert float to signed int *)
+ | Pfcvtzu (isz: isize) (fsz: fsize) (**r convert float to unsigned int *)
+ (** Integer arithmetic, immediate *)
+ | Paddimm (sz: isize) (n: Z) (**r addition *)
+ | Psubimm (sz: isize) (n: Z) (**r subtraction *)
+.
+
+Inductive arith_comparison_r0r : Type :=
+ (** Integer arithmetic, shifted register *)
+ | Pcmp (is:isize) (s: shift_op) (**r compare *)
+ | Pcmn (is:isize) (s: shift_op) (**r compare negative *)
+ (** Logical, shifted register *)
+ | Ptst (is:isize) (s: shift_op) (**r and, then set flags *)
+.
+
+Inductive arith_comparison_pp : Type :=
+ (** Integer arithmetic, extending register *)
+ | Pcmpext (x: extend_op) (**r int64-int32 cmp *)
+ | Pcmnext (x: extend_op) (**r int64-int32 cmn *)
+ (** Floating-point comparison *)
+ | Pfcmp (sz: fsize) (**r compare [r1] and [r2] *)
+.
+
+Inductive arith_ppp : Type :=
+ (** Variable shifts *)
+ | Pasrv (sz: isize) (**r arithmetic right shift *)
+ | Plslv (sz: isize) (**r left shift *)
+ | Plsrv (sz: isize) (**r logical right shift *)
+ | Prorv (sz: isize) (**r rotate right *)
+ (** Integer multiply/divide *)
+ | Psmulh (**r signed multiply high *)
+ | Pumulh (**r unsigned multiply high *)
+ | Psdiv (sz: isize) (**r signed division *)
+ | Pudiv (sz: isize) (**r unsigned division *)
+ (** Integer arithmetic, extending register *)
+ | Paddext (x: extend_op) (**r int64-int32 add *)
+ | Psubext (x: extend_op) (**r int64-int32 sub *)
+ (** Floating-point arithmetic *)
+ | Pfadd (sz: fsize) (**r addition *)
+ | Pfdiv (sz: fsize) (**r division *)
+ | Pfmul (sz: fsize) (**r multiplication *)
+ | Pfsub (sz: fsize) (**r subtraction *)
+.
+
+Inductive arith_rr0r : Type :=
+ (** Integer arithmetic, shifted register *)
+ | Padd (sz:isize) (s: shift_op) (**r addition *)
+ | Psub (sz:isize) (s: shift_op) (**r subtraction *)
+ (** Logical, shifted register *)
+ | Pand (sz:isize) (s: shift_op) (**r and *)
+ | Pbic (sz:isize) (s: shift_op) (**r and-not *)
+ | Peon (sz:isize) (s: shift_op) (**r xor-not *)
+ | Peor (sz:isize) (s: shift_op) (**r xor *)
+ | Porr (sz:isize) (s: shift_op) (**r or *)
+ | Porn (sz:isize) (s: shift_op) (**r or-not *)
+.
+
+
+Inductive arith_rr0 : Type :=
+ (** Logical, immediate *)
+ | Pandimm (sz: isize) (n: Z) (**r and *)
+ | Peorimm (sz: isize) (n: Z) (**r xor *)
+ | Porrimm (sz: isize) (n: Z) (**r or *)
+.
+
+Inductive arith_arrrr0 : Type :=
+ (** Integer multiply/divide *)
+ | Pmadd (sz: isize) (**r multiply-add *)
+ | Pmsub (sz: isize) (**r multiply-sub *)
+.
+
+(* Currently not used by the semantics of aarch64/Asm
+ * Inductive arith_apppp : Type :=
+ * (** Floating-point arithmetic *)
+ * | Pfmadd (sz: fsize) (**r [rd = r3 + r1 * r2] *)
+ * | Pfmsub (sz: fsize) (**r [rd = r3 - r1 * r2] *)
+ * .
+
+ * Inductive arith_aapppp : Type :=
+ * (** Floating-point arithmetic *)
+ * | Pfnmadd (sz: fsize) (**r [rd = - r3 - r1 * r2] *)
+ * | Pfnmsub (sz: fsize) (**r [rd = - r3 + r1 * r2] *)
+ * . *)
+
+(* Notes on the naming scheme used here:
+ * R: ireg
+ * R0: ireg0
+ * Rsp: iregsp
+ * F: freg
+ * W/X: Occur in conjunction with R0, says whether an ireg0 should be evaluated
+ * as W regsiter (32 bit) or X register (64 bit)
+ * S/D: Used for completeness sake. Only used for copying an integer register
+ * to a floating point register. Could be removed.
+ * A: These instructions perform an additional arithmetic operation
+ XXX Does this interpretation match the use in kvx/Asmvliw?
+ * Comparison: For these instructions the first register is not the target.
+ * Instead, the condition register is mutated.
+ *)
+Inductive ar_instruction : Type :=
+ | PArithP (i : arith_p) (rd : dreg)
+ | PArithPP (i : arith_pp) (rd rs : dreg)
+ | PArithPPP (i : arith_ppp) (rd r1 r2 : dreg)
+ | PArithRR0R (i : arith_rr0r) (rd : ireg) (r1 : ireg0) (r2 : ireg)
+ | PArithRR0 (i : arith_rr0) (rd : ireg) (r1 : ireg0)
+ | PArithARRRR0 (i : arith_arrrr0) (rd r1 r2 : ireg) (r3 : ireg0)
+ (* Pfmadd and Pfmsub are currently not used by the semantics of aarch64/Asm
+ | PArithAPPPP (i : arith_apppp) (rd r1 r2 r3 : preg) *)
+ (* Pfnmadd and Pfnmsub are currently not used by the semantics of aarch64/Asm
+ | PArithAAPPPP (i : arith_aapppp) (rd r1 r2 r3 : preg) *)
+ | PArithComparisonPP (i : arith_comparison_pp) (r1 r2 : dreg)
+ | PArithComparisonR0R (i : arith_comparison_r0r) (r1 : ireg0) (r2 : ireg)
+ | PArithComparisonP (i : arith_comparison_p) (r1 : dreg)
+ (* Instructions without indirection sine they would be the only ones *)
+ (* PArithCP: Pcsetm is commented out by aarch64/Asm, so Pcset is alone *)
+ | Pcset (rd : ireg) (c : testcond) (**r set to 1/0 if cond is true/false *)
+ (* PArithFR0 *)
+ | Pfmovi (fsz : fsize) (rd : freg) (r1 : ireg0) (**r copy int reg to FP reg *)
+ (* PArithCPPP *)
+ | Pcsel (rd r1 r2 : dreg) (c : testcond) (**r int/float conditional move *)
+ (* PArithAFFF *)
+ | Pfnmul (fsz : fsize) (rd r1 r2 : freg) (**r multiply-negate *)
+.
+
+Inductive basic : Type :=
+ | PArith (i: ar_instruction)
+ | PLoad (ld: ld_instruction)
+ | PStore (st: st_instruction)
+ | Pallocframe (sz: Z) (linkofs: ptrofs) (**r allocate new stack frame *)
+ | Pfreeframe (sz: Z) (linkofs: ptrofs) (**r deallocate stack frame and restore previous frame *)
+ | Ploadsymbol (rd: ireg) (id: ident) (**r load the address of [id] *)
+ | Pcvtsw2x (rd: ireg) (r1: ireg) (**r sign-extend 32-bit int to 64-bit *)
+ | Pcvtuw2x (rd: ireg) (r1: ireg) (**r zero-extend 32-bit int to 64-bit *)
+ | Pcvtx2w (rd: ireg) (**r retype a 64-bit int as a 32-bit int *)
+ | Pnop (**r no operation *)
+(* NOT USED IN THE SEMANTICS !
+ | Pcfi_adjust (ofs: int) (**r .cfi_adjust debug directive *)
+ | Pcfi_rel_offset (ofs: int) (**r .cfi_rel_offset debug directive *)
+*)
+.
+
+Coercion PCtlFlow: cf_instruction >-> control.
+Coercion PLoad: ld_instruction >-> basic.
+Coercion PStore : st_instruction >-> basic.
+Coercion PArithP: arith_p >-> Funclass.
+Coercion PArithPP: arith_pp >-> Funclass.
+Coercion PArithPPP: arith_ppp >-> Funclass.
+Coercion PArithRR0: arith_rr0 >-> Funclass.
+Coercion PArithRR0R: arith_rr0r >-> Funclass.
+Coercion PArithARRRR0: arith_arrrr0 >-> Funclass.
+Coercion PArithComparisonP: arith_comparison_p >-> Funclass.
+Coercion PArithComparisonPP: arith_comparison_pp >-> Funclass.
+Coercion PArithComparisonR0R: arith_comparison_r0r >-> Funclass.
+Coercion PArith: ar_instruction >-> basic.
+
+
+(* Not used in Coq, declared in ocaml directly in PostpassSchedulingOracle.ml
+Inductive instruction : Type :=
+ | PBasic (i: basic)
+ | PControl (i: control).
+
+Coercion PBasic: basic >-> instruction.
+Coercion PControl: control >-> instruction. *)
+
+(** * Definition of a bblock
+
+A bblock must contain at least one instruction.
+
+This choice simplifies the definition of [find_bblock] below:
+indeed, each address of a code block identifies at most one bblock.
+*)
+
+Definition non_empty_body (body: list basic): bool :=
+ match body with
+ | nil => false
+ | _ => true
+ end.
+
+Definition non_empty_exit (exit: option control): bool :=
+ match exit with
+ | None => false
+ | _ => true
+ end.
+
+Definition non_empty_bblockb (body: list basic) (exit: option control): bool := non_empty_body body || non_empty_exit exit.
+
+(** A bblock is well-formed if he contains at least one instruction. *)
+
+Record bblock := mk_bblock {
+ header: list label;
+ body: list basic;
+ exit: option control;
+ correct: Is_true (non_empty_bblockb body exit)
+}.
+
+(* FIXME? redundant with definition in Machblock *)
+Definition length_opt {A} (o: option A) : nat :=
+ match o with
+ | Some o => 1
+ | None => 0
+ end.
+
+Program Definition no_header (bb : bblock) := {| header := nil; body := body bb; exit := exit bb |}.
+Next Obligation.
+ destruct bb; cbn. assumption.
+Defined.
+
+Program Definition stick_header (h : list label) (bb : bblock) := {| header := h; body := body bb; exit := exit bb |}.
+Next Obligation.
+ destruct bb; cbn. assumption.
+Defined.
+
+(* This notion of size induces the notion of "valid" code address given by [find_bblock]
+
+ The result is in Z to be compatible with operations on PC.
+*)
+Definition size (b:bblock): Z := Z.of_nat (length (header b) + length (body b) + length_opt (exit b)).
+
+Definition bblocks := list bblock.
+
+Fixpoint size_blocks (l: bblocks): Z :=
+ match l with
+ | nil => 0
+ | b :: l =>
+ (size b) + (size_blocks l)
+ end
+ .
+
+Lemma to_nat_pos : forall z:Z, (Z.to_nat z > 0)%nat -> z > 0.
+Proof.
+ intros. destruct z; auto.
+ - contradict H. cbn. apply gt_irrefl.
+ - apply Zgt_pos_0.
+ - contradict H. cbn. apply gt_irrefl.
+Qed.
+
+Lemma size_positive (b:bblock): size b > 0.
+Proof.
+ unfold size. destruct b as [hd bdy ex cor]. cbn.
+ destruct ex; destruct bdy; try (apply to_nat_pos; rewrite Nat2Z.id; cbn; lia);
+ unfold non_empty_bblockb in cor; simpl in cor.
+ inversion cor.
+Qed.
+
+Record function : Type := mkfunction { fn_sig: signature; fn_blocks: bblocks }.
+Definition fundef := AST.fundef function.
+Definition program := AST.program fundef unit.
+
+(** * Operational semantics *)
+
+(** See "Parameters" of the same names in Asm.v *)
+Record aarch64_linker: Type := {
+ symbol_low: ident -> ptrofs -> val;
+ symbol_high: ident -> ptrofs -> val
+}.
+
+Definition genv := Genv.t fundef unit.
+
+Section RELSEM.
+
+Variable lk: aarch64_linker.
+Variable ge: genv.
+
+(** Evaluating an addressing mode *)
+
+Definition eval_addressing (a: addressing) (rs: regset): val :=
+ match a with
+ | ADimm base n => Val.addl rs#base (Vlong n)
+ | ADreg base r => Val.addl rs#base rs#r
+ | ADlsl base r n => Val.addl rs#base (Val.shll rs#r (Vint n))
+ | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n))
+ | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n))
+ | ADadr base id ofs => Val.addl rs#base (symbol_low lk id ofs)
+ | ADpostincr base n => Vundef
+ end.
+
+(** Auxiliaries for memory accesses *)
+
+Definition exec_load_rd_a (chunk: memory_chunk) (transf: val -> val)
+ (a: addressing) (r: dreg) (rs: regset) (m: mem) :=
+ SOME v <- Mem.loadv chunk m (eval_addressing a rs) IN
+ Next (rs#r <- (transf v)) m.
+
+Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
+ (a: addressing) (rd1 rd2: dreg) (rs: regset) (m: mem) :=
+ if is_pair_addressing_mode_correct a then
+ let addr := (eval_addressing a rs) in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
+ let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
+ match Mem.loadv chk1 m addr with
+ | None => Stuck
+ | Some v1 =>
+ match Mem.loadv chk2 m addr' with
+ | None => Stuck
+ | Some v2 =>
+ Next ((rs#rd1 <- (transf v1))#rd2 <- (transf v2)) m
+ end
+ end
+ else Stuck.
+
+Definition exec_store_rs_a (chunk: memory_chunk)
+ (a: addressing) (v: val)
+ (rs: regset) (m: mem) :=
+ SOME m' <- Mem.storev chunk m (eval_addressing a rs) v IN
+ Next rs m'.
+
+Definition exec_store_double (chk1 chk2: memory_chunk)
+ (a: addressing) (v1 v2: val)
+ (rs: regset) (m: mem) :=
+ if is_pair_addressing_mode_correct a then
+ let addr := (eval_addressing a rs) in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
+ let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
+ match Mem.storev chk1 m addr v1 with
+ | None => Stuck
+ | Some m' => match Mem.storev chk2 m' addr' v2 with
+ | None => Stuck
+ | Some m'' => Next rs m''
+ end
+ end
+ else Stuck.
+
+(** execution of loads
+*)
+
+Definition chunk_load (ld: load_rd_a): memory_chunk :=
+ match ld with
+ | Pldrw => Mint32
+ | Pldrw_a => Many32
+ | Pldrx => Mint64
+ | Pldrx_a => Many64
+ | Pldrb _ => Mint8unsigned
+ | Pldrsb _ => Mint8signed
+ | Pldrh _ => Mint16unsigned
+ | Pldrsh _ => Mint16signed
+ | Pldrzw => Mint32
+ | Pldrsw => Mint32
+ | Pldrs => Mfloat32
+ | Pldrd => Mfloat64
+ | Pldrd_a => Many64
+ end.
+
+Definition chunk_store (st: store_rs_a) : memory_chunk :=
+ match st with
+ | Pstrw => Mint32
+ | Pstrw_a => Many32
+ | Pstrx => Mint64
+ | Pstrx_a => Many64
+ | Pstrb => Mint8unsigned
+ | Pstrh => Mint16unsigned
+ | Pstrs => Mfloat32
+ | Pstrd => Mfloat64
+ | Pstrd_a => Many64
+ end.
+
+Definition interp_load (ld: load_rd_a): val -> val :=
+ match ld with
+ | Pldrb X => Val.longofintu
+ | Pldrsb X => Val.longofint
+ | Pldrh X => Val.longofintu
+ | Pldrsh X => Val.longofint
+ | Pldrzw => Val.longofintu
+ | Pldrsw => Val.longofint
+ (* Changed to exhaustive list because I tend to forgot all the places I need
+ * to check when changing things. *)
+ | Pldrb W | Pldrsb W | Pldrh W | Pldrsh W
+ | Pldrw | Pldrw_a | Pldrx
+ | Pldrx_a | Pldrs | Pldrd
+ | Pldrd_a => fun v => v
+ end.
+
+Definition exec_load (ldi: ld_instruction) (rs: regset) (m: mem) :=
+ match ldi with
+ | PLd_rd_a ld rd a => exec_load_rd_a (chunk_load ld) (interp_load ld) a rd rs m
+ | Pldp ld rd1 rd2 chk1 chk2 a => exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ end.
+
+Definition exec_store (sti: st_instruction) (rs: regset) (m: mem) :=
+ match sti with
+ | PSt_rs_a st rsr a => exec_store_rs_a (chunk_store st) a rs#rsr rs m
+ | Pstp st rs1 rs2 chk1 chk2 a => exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ end.
+
+(** TODO: redundant w.r.t Machblock ?? *)
+Lemma in_dec (lbl: label) (l: list label): { List.In lbl l } + { ~(List.In lbl l) }.
+Proof.
+ apply List.in_dec.
+ apply Pos.eq_dec.
+Qed.
+
+(** Note: copy-paste from Machblock *)
+Definition is_label (lbl: label) (bb: bblock) : bool :=
+ if in_dec lbl (header bb) then true else false.
+
+Lemma is_label_correct_true lbl bb:
+ List.In lbl (header bb) <-> is_label lbl bb = true.
+Proof.
+ unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition.
+Qed.
+
+Lemma is_label_correct_false lbl bb:
+ ~(List.In lbl (header bb)) <-> is_label lbl bb = false.
+Proof.
+ unfold is_label; destruct (in_dec lbl (header bb)); simpl; intuition.
+Qed.
+
+(** convert a label into a position in the code *)
+Fixpoint label_pos (lbl: label) (pos: Z) (lb: bblocks) {struct lb} : option Z :=
+ match lb with
+ | nil => None
+ | b :: lb' => if is_label lbl b then Some pos else label_pos lbl (pos + (size b)) lb'
+ end.
+
+Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) :=
+ SOME pos <- label_pos lbl 0 (fn_blocks f) IN
+ match rs#PC with
+ | Vptr b ofs => Next (rs#PC <- (Vptr b (Ptrofs.repr pos))) m
+ | _ => Stuck
+ end.
+
+(** Evaluating a branch
+
+Warning: PC is assumed to be already pointing on the next instruction !
+
+*)
+
+Definition eval_branch (f: function) (lbl: label) (rs: regset) (m: mem) (ores: option bool) :=
+ SOME res <- ores IN
+ if res then goto_label f lbl rs m else Next rs m.
+
+Definition eval_neg_branch (f: function) (lbl: label) (rs: regset) (m: mem) (ores: option bool) :=
+ SOME res <- ores IN
+ if res then Next rs m else goto_label f lbl rs m.
+
+Definition exec_cfi (f: function) (cfi: cf_instruction) (rs: regset) (m: mem) : outcome :=
+ match cfi with
+ (** Branches *)
+ | Pb lbl =>
+ goto_label f lbl rs m
+ | Pbc cond lbl =>
+ eval_branch f lbl rs m (eval_testcond cond rs)
+ | Pbl id sg =>
+ Next (rs#RA <- (rs#PC) #PC <- (Genv.symbol_address ge id Ptrofs.zero)) m
+ | Pbs id sg =>
+ Next (rs#PC <- (Genv.symbol_address ge id Ptrofs.zero)) m
+ | Pblr r sg =>
+ Next (rs#RA <- (rs#PC) #PC <- (rs#r)) m
+ | Pbr r sg =>
+ Next (rs#PC <- (rs#r)) m
+ | Pret r =>
+ Next (rs#PC <- (rs#r)) m
+ | Pcbnz sz r lbl =>
+ eval_neg_branch f lbl rs m (eval_testzero sz rs#r)
+ | Pcbz sz r lbl =>
+ eval_branch f lbl rs m (eval_testzero sz rs#r)
+ | Ptbnz sz r n lbl =>
+ eval_branch f lbl rs m (eval_testbit sz rs#r n)
+ | Ptbz sz r n lbl =>
+ eval_neg_branch f lbl rs m (eval_testbit sz rs#r n)
+ (** Pseudo-instructions *)
+ | Pbtbl r tbl =>
+ match (rs#X16 <- Vundef)#r with
+ | Vint n =>
+ SOME lbl <- list_nth_z tbl (Int.unsigned n) IN
+ goto_label f lbl (rs#X16 <- Vundef) m
+ | _ => Stuck
+ end
+ end.
+
+Definition arith_eval_p (i : arith_p) : val :=
+ match i with
+ | Padrp id ofs => symbol_high lk id ofs
+ (** Move wide immediate *)
+ | Pmovz W n pos => Vint (Int.repr (Z.shiftl n pos))
+ | Pmovz X n pos => Vlong (Int64.repr (Z.shiftl n pos))
+ | Pmovn W n pos => Vint (Int.repr (Z.lnot (Z.shiftl n pos)))
+ | Pmovn X n pos => Vlong (Int64.repr (Z.lnot (Z.shiftl n pos)))
+ (** Floating-point move *)
+ | Pfmovimms f => Vsingle f
+ | Pfmovimmd f => Vfloat f
+ end.
+
+Definition destroy_X16 (i : arith_p) : bool :=
+ match i with
+ | Pfmovimms d => negb (is_immediate_float32 d)
+ | Pfmovimmd d => negb (is_immediate_float64 d)
+ | _ => false
+ end.
+
+Definition if_opt_bool_val (c: option bool) v1 v2: val :=
+ match c with
+ | Some true => v1
+ | Some false => v2
+ | None => Vundef
+ end.
+
+Definition arith_eval_pp i v :=
+ match i with
+ | Pmov => v
+ | Pmovk W n pos => insert_in_int v n pos 16
+ | Pmovk X n pos => insert_in_long v n pos 16
+ | Paddadr id ofs => Val.addl v (symbol_low lk id ofs)
+ | Psbfiz W r s => Val.shl (Val.sign_ext s v) (Vint r)
+ | Psbfiz X r s => Val.shll (Val.sign_ext_l s v) (Vint r)
+ | Psbfx W r s => Val.sign_ext s (Val.shr v (Vint r))
+ | Psbfx X r s => Val.sign_ext_l s (Val.shrl v (Vint r))
+ | Pubfiz W r s => Val.shl (Val.zero_ext s v) (Vint r)
+ | Pubfiz X r s => Val.shll (Val.zero_ext_l s v) (Vint r)
+ | Pubfx W r s => Val.zero_ext s (Val.shru v (Vint r))
+ | Pubfx X r s => Val.zero_ext_l s (Val.shrlu v (Vint r))
+ | Pfmov => v
+ | Pfcvtds => Val.floatofsingle v
+ | Pfcvtsd => Val.singleoffloat v
+ | Pfabs S => Val.absfs v
+ | Pfabs D => Val.absf v
+ | Pfneg S => Val.negfs v
+ | Pfneg D => Val.negf v
+ | Pfcvtzs W S => Val.maketotal (Val.intofsingle v)
+ | Pfcvtzs W D => Val.maketotal (Val.intoffloat v)
+ | Pfcvtzs X S => Val.maketotal (Val.longofsingle v)
+ | Pfcvtzs X D => Val.maketotal (Val.longoffloat v)
+ | Pfcvtzu W S => Val.maketotal (Val.intuofsingle v)
+ | Pfcvtzu W D => Val.maketotal (Val.intuoffloat v)
+ | Pfcvtzu X S => Val.maketotal (Val.longuofsingle v)
+ | Pfcvtzu X D => Val.maketotal (Val.longuoffloat v)
+ | Paddimm W n => Val.add v (Vint (Int.repr n))
+ | Paddimm X n => Val.addl v (Vlong (Int64.repr n))
+ | Psubimm W n => Val.sub v (Vint (Int.repr n))
+ | Psubimm X n => Val.subl v (Vlong (Int64.repr n))
+ | Pscvtf S W => Val.maketotal (Val.singleofint v)
+ | Pscvtf D W => Val.maketotal (Val.floatofint v)
+ | Pscvtf S X => Val.maketotal (Val.singleoflong v)
+ | Pscvtf D X => Val.maketotal (Val.floatoflong v)
+ | Pucvtf S W => Val.maketotal (Val.singleofintu v)
+ | Pucvtf D W => Val.maketotal (Val.floatofintu v)
+ | Pucvtf S X => Val.maketotal (Val.singleoflongu v)
+ | Pucvtf D X => Val.maketotal (Val.floatoflongu v)
+ end.
+
+Definition arith_eval_ppp i v1 v2 :=
+ match i with
+ | Pasrv W => Val.shr v1 v2
+ | Pasrv X => Val.shrl v1 v2
+ | Plslv W => Val.shl v1 v2
+ | Plslv X => Val.shll v1 v2
+ | Plsrv W => Val.shru v1 v2
+ | Plsrv X => Val.shrlu v1 v2
+ | Prorv W => Val.ror v1 v2
+ | Prorv X => Val.rorl v1 v2
+ | Psmulh => Val.mullhs v1 v2
+ | Pumulh => Val.mullhu v1 v2
+ | Psdiv W => Val.maketotal (Val.divs v1 v2)
+ | Psdiv X => Val.maketotal (Val.divls v1 v2)
+ | Pudiv W => Val.maketotal (Val.divu v1 v2)
+ | Pudiv X => Val.maketotal (Val.divlu v1 v2)
+ | Paddext x => Val.addl v1 (eval_extend v2 x)
+ | Psubext x => Val.subl v1 (eval_extend v2 x)
+ | Pfadd S => Val.addfs v1 v2
+ | Pfadd D => Val.addf v1 v2
+ | Pfdiv S => Val.divfs v1 v2
+ | Pfdiv D => Val.divf v1 v2
+ | Pfmul S => Val.mulfs v1 v2
+ | Pfmul D => Val.mulf v1 v2
+ | Pfsub S => Val.subfs v1 v2
+ | Pfsub D => Val.subf v1 v2
+ end.
+
+Definition arith_rr0r_isize (i: arith_rr0r) :=
+ match i with
+ | Padd is _ => is
+ | Psub is _ => is
+ | Pand is _ => is
+ | Pbic is _ => is
+ | Peon is _ => is
+ | Peor is _ => is
+ | Porr is _ => is
+ | Porn is _ => is
+ end.
+
+(* obtain v1 by [ir0 (arith_rr0r_isize i) rs s1] *)
+Definition arith_eval_rr0r i v1 v2 :=
+ match i with
+ | Padd W s => Val.add v1 (eval_shift_op_int v2 s)
+ | Padd X s => Val.addl v1 (eval_shift_op_long v2 s)
+ | Psub W s => Val.sub v1 (eval_shift_op_int v2 s)
+ | Psub X s => Val.subl v1 (eval_shift_op_long v2 s)
+ | Pand W s => Val.and v1 (eval_shift_op_int v2 s)
+ | Pand X s => Val.andl v1 (eval_shift_op_long v2 s)
+ | Pbic W s => Val.and v1 (Val.notint (eval_shift_op_int v2 s))
+ | Pbic X s => Val.andl v1 (Val.notl (eval_shift_op_long v2 s))
+ | Peon W s => Val.xor v1 (Val.notint (eval_shift_op_int v2 s))
+ | Peon X s => Val.xorl v1 (Val.notl (eval_shift_op_long v2 s))
+ | Peor W s => Val.xor v1 (eval_shift_op_int v2 s)
+ | Peor X s => Val.xorl v1 (eval_shift_op_long v2 s)
+ | Porr W s => Val.or v1 (eval_shift_op_int v2 s)
+ | Porr X s => Val.orl v1 (eval_shift_op_long v2 s)
+ | Porn W s => Val.or v1 (Val.notint (eval_shift_op_int v2 s))
+ | Porn X s => Val.orl v1 (Val.notl (eval_shift_op_long v2 s))
+ end.
+
+Definition arith_rr0_isize (i : arith_rr0) :=
+ match i with
+ | Pandimm is _ | Peorimm is _ | Porrimm is _ => is
+ end.
+
+(* obtain v by [ir0 (arith_rr0_isize i) rs s] *)
+Definition arith_eval_rr0 i v :=
+ match i with
+ | Pandimm W n => Val.and v (Vint (Int.repr n))
+ | Pandimm X n => Val.andl v (Vlong (Int64.repr n))
+ | Peorimm W n => Val.xor v (Vint (Int.repr n))
+ | Peorimm X n => Val.xorl v (Vlong (Int64.repr n))
+ | Porrimm W n => Val.or v (Vint (Int.repr n))
+ | Porrimm X n => Val.orl v (Vlong (Int64.repr n))
+ end.
+
+Definition arith_arrrr0_isize (i : arith_arrrr0) :=
+ match i with
+ | Pmadd is | Pmsub is => is
+ end.
+
+(* obtain v3 by [ir0 (arith_arrrr0_isize i) rs s3] *)
+Definition arith_eval_arrrr0 i v1 v2 v3 :=
+ match i with
+ | Pmadd W => Val.add v3 (Val.mul v1 v2)
+ | Pmadd X => Val.addl v3 (Val.mull v1 v2)
+ | Pmsub W => Val.sub v3 (Val.mul v1 v2)
+ | Pmsub X => Val.subl v3 (Val.mull v1 v2)
+ end.
+
+Definition arith_prepare_comparison_pp i (v1 v2 : val) :=
+ match i with
+ | Pcmpext x => (v1, (eval_extend v2 x))
+ | Pcmnext x => (v1, (Val.negl (eval_extend v2 x)))
+ | Pfcmp _ => (v1, v2)
+ end.
+
+Definition arith_comparison_r0r_isize i :=
+ match i with
+ | Pcmp is _ => is
+ | Pcmn is _ => is
+ | Ptst is _ => is
+ end.
+
+Definition arith_prepare_comparison_r0r i v1 v2 :=
+ match i with
+ | Pcmp W s => (v1, (eval_shift_op_int v2 s))
+ | Pcmp X s => (v1, (eval_shift_op_long v2 s))
+ | Pcmn W s => (v1, (Val.neg (eval_shift_op_int v2 s)))
+ | Pcmn X s => (v1, (Val.negl (eval_shift_op_long v2 s)))
+ | Ptst W s => ((Val.and v1 (eval_shift_op_int v2 s)), (Vint Int.zero))
+ | Ptst X s => ((Val.andl v1 (eval_shift_op_long v2 s)), (Vlong Int64.zero))
+ end.
+
+Definition arith_prepare_comparison_p i v :=
+ match i with
+ | Pcmpimm W n => (v, (Vint (Int.repr n)))
+ | Pcmpimm X n => (v, (Vlong (Int64.repr n)))
+ | Pcmnimm W n => (v, (Vint (Int.neg (Int.repr n))))
+ | Pcmnimm X n => (v, (Vlong (Int64.neg (Int64.repr n))))
+ | Ptstimm W n => ((Val.and v (Vint (Int.repr n))), (Vint Int.zero))
+ | Ptstimm X n => ((Val.andl v (Vlong (Int64.repr n))), (Vlong Int64.zero))
+ | Pfcmp0 S => (v, (Vsingle Float32.zero))
+ | Pfcmp0 D => (v, (Vfloat Float.zero))
+ end.
+
+Definition arith_comparison_pp_compare i :=
+ match i with
+ | Pcmpext _ | Pcmnext _ => compare_long
+ | Pfcmp S => compare_single
+ | Pfcmp D => compare_float
+ end.
+
+Definition arith_comparison_p_compare i :=
+ match i with
+ | Pcmpimm W _ | Pcmnimm W _ | Ptstimm W _ => compare_int
+ | Pcmpimm X _ | Pcmnimm X _ | Ptstimm X _ => compare_long
+ | Pfcmp0 S => compare_single
+ | Pfcmp0 D => compare_float
+ end.
+
+Definition exec_arith_instr (ai: ar_instruction) (rs: regset): regset :=
+ match ai with
+ | PArithP i d =>
+ let rs' := rs#d <- (arith_eval_p i) in
+ if destroy_X16 i then rs'#X16 <- Vundef else rs'
+ | PArithPP i d s => rs#d <- (arith_eval_pp i rs#s)
+ | PArithPPP i d s1 s2 => rs#d <- (arith_eval_ppp i rs#s1 rs#s2)
+
+ | PArithRR0R i d s1 s2 => rs#d <- (arith_eval_rr0r i (ir0 (arith_rr0r_isize i) rs s1) rs#s2)
+
+ | PArithRR0 i d s => rs#d <- (arith_eval_rr0 i (ir0 (arith_rr0_isize i) rs s))
+
+ | PArithARRRR0 i d s1 s2 s3 =>
+ rs#d <- (arith_eval_arrrr0 i rs#s1 rs#s2 (ir0 (arith_arrrr0_isize i) rs s3))
+
+ | PArithComparisonPP i s1 s2 =>
+ let (v1, v2) := arith_prepare_comparison_pp i rs#s1 rs#s2 in
+ arith_comparison_pp_compare i rs v1 v2
+ | PArithComparisonR0R i s1 s2 =>
+ let is := arith_comparison_r0r_isize i in
+ let (v1, v2) := arith_prepare_comparison_r0r i (ir0 is rs s1) rs#s2 in
+ (if is (* is W *) then compare_int else compare_long) rs v1 v2
+ | PArithComparisonP i s =>
+ let (v1, v2) := arith_prepare_comparison_p i rs#s in
+ arith_comparison_p_compare i rs v1 v2
+ | Pcset d c => rs#d <- (if_opt_bool_val (eval_testcond c rs) (Vint Int.one) (Vint Int.zero))
+ | Pfmovi S d s => rs#d <- (float32_of_bits rs##s)
+ | Pfmovi D d s => rs#d <- (float64_of_bits rs###s)
+ | Pcsel d s1 s2 c => rs#d <- (if_opt_bool_val (eval_testcond c rs) (rs#s1) (rs#s2))
+ | Pfnmul S d s1 s2 => rs#d <- (Val.negfs (Val.mulfs rs#s1 rs#s2))
+ | Pfnmul D d s1 s2 => rs#d <- (Val.negf (Val.mulf rs#s1 rs#s2))
+ end.
+
+(* basic exec *)
+Definition exec_basic (b: basic) (rs: regset) (m: mem): outcome :=
+ match b with
+ | PArith ai => Next (exec_arith_instr ai rs) m
+ | PLoad ldi => exec_load ldi rs m
+ | PStore sti => exec_store sti rs m
+ | Pallocframe sz pos =>
+ let (m1, stk) := Mem.alloc m 0 sz in
+ let sp := (Vptr stk Ptrofs.zero) in
+ SOME m2 <- Mem.storev Mint64 m1 (Val.offset_ptr sp pos) rs#SP IN
+ Next (rs #X29 <- (rs#SP) #SP <- sp #X16 <- Vundef) m2
+ | Pfreeframe sz pos =>
+ SOME v <- Mem.loadv Mint64 m (Val.offset_ptr rs#SP pos) IN
+ match rs#SP with
+ | Vptr stk ofs =>
+ SOME m' <- Mem.free m stk 0 sz IN
+ Next (rs#SP <- v #X16 <- Vundef) m'
+ | _ => Stuck
+ end
+ | Ploadsymbol rd id =>
+ Next (rs#rd <- (Genv.symbol_address ge id Ptrofs.zero)) m
+ | Pcvtsw2x rd r1 =>
+ Next (rs#rd <- (Val.longofint rs#r1)) m
+ | Pcvtuw2x rd r1 =>
+ Next (rs#rd <- (Val.longofintu rs#r1)) m
+ | Pcvtx2w rd =>
+ Next (rs#rd <- (Val.loword rs#rd)) m
+ | Pnop => Next rs m
+ end.
+
+(** execution of the body of a bblock *)
+Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
+ match body with
+ | nil => Next rs m
+ | bi::body' =>
+ SOME o <- exec_basic bi rs m IN
+ exec_body body' (_rs o) (_m o)
+ end.
+
+Definition incrPC size_b (rs: regset) :=
+ rs#PC <- (Val.offset_ptr rs#PC size_b).
+
+Definition estep (f: function) oc size_b (rs: regset) (m: mem) :=
+ match oc with
+ | Some (PCtlFlow cfi) => exec_cfi f cfi (incrPC size_b rs) m
+ | Some (Pbuiltin ef args res) => Next (incrPC size_b rs) m
+ | None => Next (incrPC size_b rs) m
+ end.
+
+(** execution of the exit instruction of a bblock *)
+Inductive exec_exit (f: function) size_b (rs: regset) (m: mem): (option control) -> trace -> regset -> mem -> Prop :=
+ | none_step:
+ exec_exit f size_b rs m None E0 (incrPC size_b rs) m
+ | cfi_step (cfi: cf_instruction) rs' m':
+ exec_cfi f cfi (incrPC size_b rs) m = Next rs' m' ->
+ exec_exit f size_b rs m (Some (PCtlFlow cfi)) E0 rs' m'
+ | builtin_step ef args res vargs t vres rs' m':
+ eval_builtin_args ge (fun (r: dreg) => rs r) rs#SP m args vargs ->
+ external_call ef ge vargs m t vres m' ->
+ rs' = incrPC size_b
+ (set_res (map_builtin_res DR res) vres
+ (undef_regs (DR (IR X16) :: DR (IR X30) :: map preg_of (destroyed_by_builtin ef)) rs)) ->
+ exec_exit f size_b rs m (Some (Pbuiltin ef args res)) t rs' m'
+ .
+
+(*Definition bbstep f cfi size_b bdy rs m :=*)
+ (*match exec_body bdy rs m with*)
+ (*| Some (State rs' m') => estep f cfi size_b rs' m'*)
+ (*| Stuck => Stuck*)
+ (*end.*)
+
+Definition bbstep f bb rs m :=
+ match exec_body (body bb) rs m with
+ | Some (State rs' m') => estep f (exit bb) (Ptrofs.repr (size bb)) rs' m'
+ | Stuck => Stuck
+ end.
+
+Definition exec_bblock (f: function) (b: bblock) (rs: regset) (m: mem) (t:trace) (rs':regset) (m':mem): Prop
+ := exists rs1 m1, exec_body (body b) rs m = Next rs1 m1 /\ exec_exit f (Ptrofs.repr (size b)) rs1 m1 (exit b) t rs' m'.
+
+Fixpoint find_bblock (pos: Z) (lb: bblocks) {struct lb} : option bblock :=
+ match lb with
+ | nil => None
+ | b :: il =>
+ if zlt pos 0 then None (* NOTE: It is impossible to branch inside a block *)
+ else if zeq pos 0 then Some b
+ else find_bblock (pos - (size b)) il
+ end.
+
+(** Execution of the instruction at [rs PC]. *)
+
+Inductive step: state -> trace -> state -> Prop :=
+ | exec_step_internal:
+ forall b ofs f bi rs m t rs' m',
+ rs PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bi ->
+ exec_bblock f bi rs m t rs' m' ->
+ step (State rs m) t (State rs' m')
+ | exec_step_external:
+ forall b ef args res rs m t rs' m',
+ rs PC = Vptr b Ptrofs.zero ->
+ Genv.find_funct_ptr ge b = Some (External ef) ->
+ external_call ef ge args m t res m' ->
+ extcall_arguments rs m (ef_sig ef) args ->
+ rs' = (set_pair (loc_external_result (ef_sig ef) ) res (undef_caller_save_regs rs))#PC <- (rs RA) ->
+ step (State rs m) t (State rs' m')
+ .
+
+
+End RELSEM.
+
+
+(** Execution of whole programs. *)
+
+Inductive initial_state (p: program): state -> Prop :=
+ | initial_state_intro: forall m0,
+ Genv.init_mem p = Some m0 ->
+ let ge := Genv.globalenv p in
+ let rs0 :=
+ (Pregmap.init Vundef)
+ # PC <- (Genv.symbol_address ge p.(prog_main) Ptrofs.zero)
+ # RA <- Vnullptr
+ # SP <- Vnullptr in
+ initial_state p (State rs0 m0).
+
+Inductive final_state: state -> int -> Prop :=
+ | final_state_intro: forall rs m r,
+ rs#PC = Vnullptr ->
+ rs#X0 = Vint r ->
+ final_state (State rs m) r.
+
+Definition semantics (lk: aarch64_linker) (p: program) :=
+ Semantics (step lk) (initial_state p) final_state (Genv.globalenv p).
diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v
new file mode 100644
index 00000000..5cd049c5
--- /dev/null
+++ b/aarch64/Asmblockdeps.v
@@ -0,0 +1,2688 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * Translation from [Asmblock] to [AbstractBB] *)
+
+(** We define a specific instance [L] of [AbstractBB] and translate [bblocks] from [Asmblock] into [L].
+ [AbstractBB] will then define a sequential semantics for [L].
+ We prove a bisimulation between the sequential semantics of [L] and [Asmblock].
+ Then, the checker on [Asmblock] is deduced from those of [L].
+ *)
+
+Require Import AST.
+Require Import Asm Asmblock.
+Require Import Asmblockprops.
+Require Import Values.
+Require Import Globalenvs.
+Require Import Memory.
+Require Import Errors.
+Require Import Integers.
+Require Import Floats.
+Require Import ZArith.
+Require Import Coqlib.
+Require Import ImpSimuTest.
+Require Import Axioms.
+Require Import Permutation.
+Require Import Events.
+
+Require Import Lia.
+
+Import ListNotations.
+Local Open Scope list_scope.
+
+Open Scope impure.
+(** auxiliary treatments of builtins *)
+
+Definition is_builtin(ex: option control): bool :=
+ match ex with
+ | Some (Pbuiltin _ _ _) => true
+ | _ => false
+ end.
+
+Definition has_builtin(bb: bblock): bool :=
+ is_builtin (exit bb).
+
+Remark builtin_arg_eq_dreg: forall (a b: builtin_arg dreg), {a=b} + {a<>b}.
+Proof.
+ intros.
+ apply (builtin_arg_eq dreg_eq).
+Qed.
+
+Remark builtin_res_eq_dreg: forall (a b: builtin_res dreg), {a=b} + {a<>b}.
+Proof.
+ intros.
+ apply (builtin_res_eq dreg_eq).
+Qed.
+
+Definition assert_same_builtin (bb1 bb2: bblock): ?? unit :=
+ match exit bb1 with
+ | Some (Pbuiltin ef1 lbar1 brr1) =>
+ match exit bb2 with
+ | Some (Pbuiltin ef2 lbar2 brr2) =>
+ if (external_function_eq ef1 ef2) then
+ if (list_eq_dec builtin_arg_eq_dreg lbar1 lbar2) then
+ if (builtin_res_eq_dreg brr1 brr2) then RET tt
+ else FAILWITH "Different brr in Pbuiltin"
+ else FAILWITH "Different lbar in Pbuiltin"
+ else FAILWITH "Different ef in Pbuiltin"
+ | _ => FAILWITH "Expected a builtin: found something else" (* XXX: on peut raffiner le message d'erreur si nécessaire *)
+ end
+ | _ => match exit bb2 with
+ | Some (Pbuiltin ef2 lbar2 brr2) => FAILWITH "Expected a something else: found a builtin"
+ | _ => RET tt (* ok *)
+ end
+ end.
+
+Lemma assert_same_builtin_correct (bb1 bb2: bblock):
+ WHEN assert_same_builtin bb1 bb2 ~> _ THEN
+ has_builtin bb1 = true \/ has_builtin bb2 = true -> exit bb1 = exit bb2.
+Proof.
+ unfold assert_same_builtin, has_builtin.
+ destruct (exit bb1) as [[]|]; simpl;
+ destruct (exit bb2) as [[]|]; wlp_simplify; try congruence.
+Qed.
+Global Opaque assert_same_builtin.
+Local Hint Resolve assert_same_builtin_correct: wlp.
+
+(** Definition of [L] *)
+
+Module P<: ImpParam.
+Module R := Pos.
+
+Section IMPPARAM.
+
+Definition env := Genv.t fundef unit.
+
+Record genv_wrap := { _genv: env; _fn: function; _lk: aarch64_linker }.
+Definition genv := genv_wrap.
+
+Variable Ge: genv.
+
+Inductive value_wrap :=
+ | Val (v: val)
+ | Memstate (m: mem)
+ | Bool (b: bool)
+.
+
+Definition value := value_wrap.
+
+Record CRflags := { _CN: val; _CZ:val; _CC: val; _CV: val }.
+
+Inductive control_op :=
+ | Ob (l: label)
+ | Obc (c: testcond) (l: label)
+ | Obl (id: ident)
+ | Obs (id: ident)
+ | Ocbnz (sz: isize) (l: label)
+ | Ocbz (sz: isize) (l: label)
+ | Otbnz (sz: isize) (n: int) (l: label)
+ | Otbz (sz: isize) (n: int) (l: label)
+ | Obtbl (l: list label)
+ | OError
+ | OIncremPC (sz: Z)
+.
+
+Inductive arith_op :=
+ | OArithP (n: arith_p)
+ | OArithPP (n: arith_pp)
+ | OArithPPP (n: arith_ppp)
+ | OArithRR0R (n: arith_rr0r)
+ | OArithRR0R_XZR (n: arith_rr0r) (vz: val)
+ | OArithRR0 (n: arith_rr0)
+ | OArithRR0_XZR (n: arith_rr0) (vz: val)
+ | OArithARRRR0 (n: arith_arrrr0)
+ | OArithARRRR0_XZR (n: arith_arrrr0) (vz: val)
+ | OArithComparisonPP_CN (n: arith_comparison_pp)
+ | OArithComparisonPP_CZ (n: arith_comparison_pp)
+ | OArithComparisonPP_CC (n: arith_comparison_pp)
+ | OArithComparisonPP_CV (n: arith_comparison_pp)
+ | OArithComparisonR0R_CN (n: arith_comparison_r0r) (is: isize)
+ | OArithComparisonR0R_CZ (n: arith_comparison_r0r) (is: isize)
+ | OArithComparisonR0R_CC (n: arith_comparison_r0r) (is: isize)
+ | OArithComparisonR0R_CV (n: arith_comparison_r0r) (is: isize)
+ | OArithComparisonR0R_CN_XZR (n: arith_comparison_r0r) (is: isize) (vz: val)
+ | OArithComparisonR0R_CZ_XZR (n: arith_comparison_r0r) (is: isize) (vz: val)
+ | OArithComparisonR0R_CC_XZR (n: arith_comparison_r0r) (is: isize) (vz: val)
+ | OArithComparisonR0R_CV_XZR (n: arith_comparison_r0r) (is: isize) (vz: val)
+ | OArithComparisonP_CN (n: arith_comparison_p)
+ | OArithComparisonP_CZ (n: arith_comparison_p)
+ | OArithComparisonP_CC (n: arith_comparison_p)
+ | OArithComparisonP_CV (n: arith_comparison_p)
+ | Ocset (c: testcond)
+ | Ofmovi (fsz: fsize)
+ | Ofmovi_XZR (fsz: fsize)
+ | Ocsel (c: testcond)
+ | Ofnmul (fsz: fsize)
+.
+
+Inductive store_op :=
+ | Ostore1 (st: store_rs_a) (chunk: memory_chunk) (a: addressing)
+ | Ostore2 (st: store_rs_a) (chunk: memory_chunk) (a: addressing)
+ | OstoreU (st: store_rs_a) (chunk: memory_chunk) (a: addressing)
+.
+
+Inductive load_op :=
+ | Oload1 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing)
+ | Oload2 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing)
+ | OloadU (ld: load_rd_a) (chunk: memory_chunk) (a: addressing)
+.
+
+Inductive allocf_op :=
+ | OAllocf_SP (sz: Z) (linkofs: ptrofs)
+ | OAllocf_Mem (sz: Z) (linkofs: ptrofs)
+.
+
+Inductive freef_op :=
+ | OFreef_SP (sz: Z) (linkofs: ptrofs)
+ | OFreef_Mem (sz: Z) (linkofs: ptrofs)
+.
+
+Inductive op_wrap :=
+ (* arithmetic operation *)
+ | Arith (op: arith_op)
+ | Load (ld: load_op)
+ | Store (st: store_op)
+ | Allocframe (al: allocf_op)
+ | Freeframe (fr: freef_op)
+ | Loadsymbol (id: ident)
+ | Cvtsw2x
+ | Cvtuw2x
+ | Cvtx2w
+ | Control (co: control_op)
+ | Constant (v: val)
+.
+
+Definition op:=op_wrap.
+
+Coercion Arith: arith_op >-> op_wrap.
+Coercion Control: control_op >-> op_wrap.
+
+Definition v_compare_int (v1 v2: val) : CRflags :=
+ {| _CN := (Val.negative (Val.sub v1 v2));
+ _CZ := (Val.mxcmpu Ceq v1 v2);
+ _CC := (Val.mxcmpu Cge v1 v2);
+ _CV := (Val.sub_overflow v1 v2) |}.
+
+Definition v_compare_long (v1 v2: val) : CRflags :=
+ {| _CN := (Val.negativel (Val.subl v1 v2));
+ _CZ := (Val.mxcmplu Ceq v1 v2);
+ _CC := (Val.mxcmplu Cge v1 v2);
+ _CV := (Val.subl_overflow v1 v2) |}.
+
+Definition v_compare_float (v1 v2: val) : CRflags :=
+ match v1, v2 with
+ | Vfloat f1, Vfloat f2 =>
+ {| _CN := (Val.of_bool (Float.cmp Clt f1 f2));
+ _CZ := (Val.of_bool (Float.cmp Ceq f1 f2));
+ _CC := (Val.of_bool (negb (Float.cmp Clt f1 f2)));
+ _CV := (Val.of_bool (negb (Float.ordered f1 f2))) |}
+ | _, _ =>
+ {| _CN := Vundef;
+ _CZ := Vundef;
+ _CC := Vundef;
+ _CV := Vundef |}
+ end.
+
+Definition v_compare_single (v1 v2: val) : CRflags :=
+ match v1, v2 with
+ | Vsingle f1, Vsingle f2 =>
+ {| _CN := (Val.of_bool (Float32.cmp Clt f1 f2));
+ _CZ := (Val.of_bool (Float32.cmp Ceq f1 f2));
+ _CC := (Val.of_bool (negb (Float32.cmp Clt f1 f2)));
+ _CV := (Val.of_bool (negb (Float32.ordered f1 f2))) |}
+ | _, _ =>
+ {| _CN := Vundef;
+ _CZ := Vundef;
+ _CC := Vundef;
+ _CV := Vundef |}
+ end.
+
+Definition arith_eval_comparison_pp (n: arith_comparison_pp) (v1 v2: val) :=
+ let (v1',v2') := arith_prepare_comparison_pp n v1 v2 in
+ match n with
+ | Pcmpext _ | Pcmnext _ => v_compare_long v1' v2'
+ | Pfcmp S => v_compare_single v1' v2'
+ | Pfcmp D => v_compare_float v1' v2'
+ end.
+
+Definition arith_eval_comparison_p (n: arith_comparison_p) (v: val) :=
+ let (v1',v2') := arith_prepare_comparison_p n v in
+ match n with
+ | Pcmpimm W _ | Pcmnimm W _ | Ptstimm W _ => v_compare_int v1' v2'
+ | Pcmpimm X _ | Pcmnimm X _ | Ptstimm X _ => v_compare_long v1' v2'
+ | Pfcmp0 S => v_compare_single v1' v2'
+ | Pfcmp0 D => v_compare_float v1' v2'
+ end.
+
+Definition arith_eval_comparison_r0r (n: arith_comparison_r0r) (v1 v2: val) (is: isize) :=
+ let (v1',v2') := arith_prepare_comparison_r0r n v1 v2 in
+ if is then v_compare_int v1' v2' else v_compare_long v1' v2'.
+
+Definition flags_testcond_value (c: testcond) (vCN vCZ vCC vCV: val) :=
+ match c with
+ | TCeq => (**r equal *)
+ match vCZ with
+ | Vint n => Some (Int.eq n Int.one)
+ | _ => None
+ end
+ | TCne => (**r not equal *)
+ match vCZ with
+ | Vint n => Some (Int.eq n Int.zero)
+ | _ => None
+ end
+ | TClo => (**r unsigned less than *)
+ match vCC with
+ | Vint n => Some (Int.eq n Int.zero)
+ | _ => None
+ end
+ | TCls => (**r unsigned less or equal *)
+ match vCC, vCZ with
+ | Vint c, Vint z => Some (Int.eq c Int.zero || Int.eq z Int.one)
+ | _, _ => None
+ end
+ | TChs => (**r unsigned greater or equal *)
+ match vCC with
+ | Vint n => Some (Int.eq n Int.one)
+ | _ => None
+ end
+ | TChi => (**r unsigned greater *)
+ match vCC, vCZ with
+ | Vint c, Vint z => Some (Int.eq c Int.one && Int.eq z Int.zero)
+ | _, _ => None
+ end
+ | TClt => (**r signed less than *)
+ match vCV, vCN with
+ | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.one)
+ | _, _ => None
+ end
+ | TCle => (**r signed less or equal *)
+ match vCV, vCN, vCZ with
+ | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.one || Int.eq z Int.one)
+ | _, _, _ => None
+ end
+ | TCge => (**r signed greater or equal *)
+ match vCV, vCN with
+ | Vint o, Vint s => Some (Int.eq (Int.xor o s) Int.zero)
+ | _, _ => None
+ end
+ | TCgt => (**r signed greater *)
+ match vCV, vCN, vCZ with
+ | Vint o, Vint s, Vint z => Some (Int.eq (Int.xor o s) Int.zero && Int.eq z Int.zero)
+ | _, _, _ => None
+ end
+ | TCpl => (**r positive *)
+ match vCN with
+ | Vint n => Some (Int.eq n Int.zero)
+ | _ => None
+ end
+ | TCmi => (**r negative *)
+ match vCN with
+ | Vint n => Some (Int.eq n Int.one)
+ | _ => None
+ end
+ end.
+
+(* The is argument is used to identify the source inst and avoid rewriting some code
+ 0 -> Ocset
+ 1 -> Ocsel
+ 2 -> Obc *)
+Definition cond_eval_is (c: testcond) (v1 v2 vCN vCZ vCC vCV: val) (is: Z) :=
+ let res := flags_testcond_value c vCN vCZ vCC vCV in
+ match is, res with
+ | 0, res => Some (Val (if_opt_bool_val res (Vint Int.one) (Vint Int.zero)))
+ | 1, res => Some (Val (if_opt_bool_val res v1 v2))
+ | 2, Some b => Some (Bool (b))
+ | _, _ => None
+ end.
+
+Definition fmovi_eval (fsz: fsize) (v: val) :=
+ match fsz with
+ | S => float32_of_bits v
+ | D => float64_of_bits v
+ end.
+
+Definition fmovi_eval_xzr (fsz: fsize) :=
+ match fsz with
+ | S => float32_of_bits (Vint Int.zero)
+ | D => float64_of_bits (Vlong Int64.zero)
+ end.
+
+Definition fnmul_eval (fsz: fsize) (v1 v2: val) :=
+ match fsz with
+ | S => Val.negfs (Val.mulfs v1 v2)
+ | D => Val.negf (Val.mulf v1 v2)
+ end.
+
+Definition cflags_eval (c: testcond) (l: list value) (v1 v2: val) (is: Z) :=
+ match c, l with
+ | TCeq, [Val vCZ] => cond_eval_is TCeq v1 v2 Vundef vCZ Vundef Vundef is
+ | TCne, [Val vCZ] => cond_eval_is TCne v1 v2 Vundef vCZ Vundef Vundef is
+ | TChs, [Val vCC] => cond_eval_is TChs v1 v2 Vundef Vundef vCC Vundef is
+ | TClo, [Val vCC] => cond_eval_is TClo v1 v2 Vundef Vundef vCC Vundef is
+ | TCmi, [Val vCN] => cond_eval_is TCmi v1 v2 vCN Vundef Vundef Vundef is
+ | TCpl, [Val vCN] => cond_eval_is TCpl v1 v2 vCN Vundef Vundef Vundef is
+ | TChi, [Val vCZ; Val vCC] => cond_eval_is TChi v1 v2 Vundef vCZ vCC Vundef is
+ | TCls, [Val vCZ; Val vCC] => cond_eval_is TCls v1 v2 Vundef vCZ vCC Vundef is
+ | TCge, [Val vCN; Val vCV] => cond_eval_is TCge v1 v2 vCN Vundef Vundef vCV is
+ | TClt, [Val vCN; Val vCV] => cond_eval_is TClt v1 v2 vCN Vundef Vundef vCV is
+ | TCgt, [Val vCN; Val vCZ; Val vCV] => cond_eval_is TCgt v1 v2 vCN vCZ Vundef vCV is
+ | TCle, [Val vCN; Val vCZ; Val vCV] => cond_eval_is TCle v1 v2 vCN vCZ Vundef vCV is
+ | _, _ => None
+ end.
+
+Definition arith_op_eval (op: arith_op) (l: list value) :=
+ match op, l with
+ | OArithP n, [] => Some (Val (arith_eval_p Ge.(_lk) n))
+ | OArithPP n, [Val v] => Some (Val (arith_eval_pp Ge.(_lk) n v))
+ | OArithPPP n, [Val v1; Val v2] => Some (Val (arith_eval_ppp n v1 v2))
+ | OArithRR0R n, [Val v1; Val v2] => Some (Val (arith_eval_rr0r n v1 v2))
+ | OArithRR0R_XZR n vz, [Val v] => Some (Val (arith_eval_rr0r n vz v))
+ | OArithRR0 n, [Val v] => Some (Val (arith_eval_rr0 n v))
+ | OArithRR0_XZR n vz, [] => Some (Val (arith_eval_rr0 n vz))
+ | OArithARRRR0 n, [Val v1; Val v2; Val v3] => Some (Val (arith_eval_arrrr0 n v1 v2 v3))
+ | OArithARRRR0_XZR n vz, [Val v1; Val v2] => Some (Val (arith_eval_arrrr0 n v1 v2 vz))
+ | OArithComparisonPP_CN n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CN)))
+ | OArithComparisonPP_CZ n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CZ)))
+ | OArithComparisonPP_CC n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CC)))
+ | OArithComparisonPP_CV n, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_pp n v1 v2).(_CV)))
+ | OArithComparisonR0R_CN n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CN)))
+ | OArithComparisonR0R_CZ n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CZ)))
+ | OArithComparisonR0R_CC n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CC)))
+ | OArithComparisonR0R_CV n is, [Val v1; Val v2] => Some (Val ((arith_eval_comparison_r0r n v1 v2 is).(_CV)))
+ | OArithComparisonR0R_CN_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CN)))
+ | OArithComparisonR0R_CZ_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CZ)))
+ | OArithComparisonR0R_CC_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CC)))
+ | OArithComparisonR0R_CV_XZR n is vz, [Val v2] => Some (Val ((arith_eval_comparison_r0r n vz v2 is).(_CV)))
+ | OArithComparisonP_CN n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CN)))
+ | OArithComparisonP_CZ n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CZ)))
+ | OArithComparisonP_CC n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CC)))
+ | OArithComparisonP_CV n, [Val v] => Some (Val ((arith_eval_comparison_p n v).(_CV)))
+ | Ocset c, l => cflags_eval c l Vundef Vundef 0
+ | Ofmovi fsz, [Val v] => Some (Val (fmovi_eval fsz v))
+ | Ofmovi_XZR fsz, [] => Some (Val (fmovi_eval_xzr fsz))
+ | Ocsel c, Val v1 :: Val v2 :: l' => cflags_eval c l' v1 v2 1
+ | Ofnmul fsz, [Val v1; Val v2] => Some (Val (fnmul_eval fsz v1 v2))
+ | _, _ => None
+ end.
+
+Definition call_ll_storev (c: memory_chunk) (m: mem) (v: option val) (vs: val) :=
+ match v with
+ | Some va => match Mem.storev c m va vs with
+ | Some m' => Some (Memstate m')
+ | None => None
+ end
+ | None => None (* should never occurs *)
+ end.
+
+Definition exec_store1 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs: val) :=
+ let v :=
+ match a with
+ | ADimm _ n => Some (Val.addl vs (Vlong n))
+ | ADadr _ id ofs => Some (Val.addl vs (symbol_low Ge.(_lk) id ofs))
+ | _ => None
+ end in
+ call_ll_storev chunk m v vr.
+
+Definition exec_store2 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs1 vs2: val) :=
+ let v :=
+ match a with
+ | ADreg _ _ => Some (Val.addl vs1 vs2)
+ | ADlsl _ _ n => Some (Val.addl vs1 (Val.shll vs2 (Vint n)))
+ | ADsxt _ _ n => Some (Val.addl vs1 (Val.shll (Val.longofint vs2) (Vint n)))
+ | ADuxt _ _ n => Some (Val.addl vs1 (Val.shll (Val.longofintu vs2) (Vint n)))
+ | _ => None
+ end in
+ call_ll_storev chunk m v vr.
+
+Definition exec_storeU (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr: val) :=
+ call_ll_storev chunk m None vr.
+
+Definition goto_label_deps (f: function) (lbl: label) (vpc: val) :=
+ match label_pos lbl 0 (fn_blocks f) with
+ | None => None
+ | Some pos =>
+ match vpc with
+ | Vptr b ofs => Some (Val (Vptr b (Ptrofs.repr pos)))
+ | _ => None
+ end
+ end.
+
+Definition control_eval (o: control_op) (l: list value) :=
+ let (ge, fn, lk) := Ge in
+ match o, l with
+ | Ob lbl, [Val vpc] => goto_label_deps fn lbl vpc
+ | Obc c lbl, Val vpc :: l' => match cflags_eval c l' Vundef Vundef 2 with
+ | Some (Bool true) => goto_label_deps fn lbl vpc
+ | Some (Bool false) => Some (Val vpc)
+ | _ => None
+ end
+ | Obl id, [] => Some (Val (Genv.symbol_address Ge.(_genv) id Ptrofs.zero))
+ | Obs id, [] => Some (Val (Genv.symbol_address Ge.(_genv) id Ptrofs.zero))
+ | Ocbnz sz lbl, [Val v; Val vpc] => match eval_testzero sz v with
+ | Some (true) => Some (Val vpc)
+ | Some (false) => goto_label_deps fn lbl vpc
+ | None => None
+ end
+ | Ocbz sz lbl, [Val v; Val vpc] => match eval_testzero sz v with
+ | Some (true) => goto_label_deps fn lbl vpc
+ | Some (false) => Some (Val vpc)
+ | None => None
+ end
+ | Otbnz sz n lbl, [Val v; Val vpc] => match eval_testbit sz v n with
+ | Some (true) => goto_label_deps fn lbl vpc
+ | Some (false) => Some (Val vpc)
+ | None => None
+ end
+ | Otbz sz n lbl, [Val v; Val vpc] => match eval_testbit sz v n with
+ | Some (true) => Some (Val vpc)
+ | Some (false) => goto_label_deps fn lbl vpc
+ | None => None
+ end
+ | Obtbl tbl, [Val index; Val vpc] => match index with
+ | Vint n =>
+ match list_nth_z tbl (Int.unsigned n) with
+ | None => None
+ | Some lbl => goto_label_deps fn lbl vpc
+ end
+ | _ => None
+ end
+ | OIncremPC sz, [Val vpc] => Some (Val (Val.offset_ptr vpc (Ptrofs.repr sz)))
+ | OError, _ => None
+ | _, _ => None
+ end.
+
+Definition store_eval (o: store_op) (l: list value) :=
+ match o, l with
+ | Ostore1 st chunk a, [Val vr; Val vs; Memstate m] => exec_store1 st m chunk a vr vs
+ | Ostore2 st chunk a, [Val vr; Val vs1; Val vs2; Memstate m] => exec_store2 st m chunk a vr vs1 vs2
+ | OstoreU st chunk a, [Val vr; Memstate m] => exec_storeU st m chunk a vr
+ | _, _ => None
+ end.
+
+Definition call_ll_loadv (c: memory_chunk) (transf: val -> val) (m: mem) (v: option val) :=
+ match v with
+ | Some va => match Mem.loadv c m va with
+ | Some v' => Some (Val (transf v'))
+ | None => None
+ end
+ | None => None (* should never occurs *)
+ end.
+
+Definition exec_load1 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl: val) :=
+ let v :=
+ match a with
+ | ADimm _ n => Some (Val.addl vl (Vlong n))
+ | ADadr _ id ofs => Some (Val.addl vl (symbol_low Ge.(_lk) id ofs))
+ | _ => None
+ end in
+ call_ll_loadv chunk (interp_load ld) m v.
+
+Definition exec_load2 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl1 vl2: val) :=
+ let v :=
+ match a with
+ | ADreg _ _ => Some (Val.addl vl1 vl2)
+ | ADlsl _ _ n => Some (Val.addl vl1 (Val.shll vl2 (Vint n)))
+ | ADsxt _ _ n => Some (Val.addl vl1 (Val.shll (Val.longofint vl2) (Vint n)))
+ | ADuxt _ _ n => Some (Val.addl vl1 (Val.shll (Val.longofintu vl2) (Vint n)))
+ | _ => None
+ end in
+ call_ll_loadv chunk (interp_load ld) m v.
+
+Definition exec_loadU (n: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) :=
+ call_ll_loadv chunk (interp_load n) m None.
+
+Definition load_eval (o: load_op) (l: list value) :=
+ match o, l with
+ | Oload1 ld chunk a, [Val vs; Memstate m] => exec_load1 ld m chunk a vs
+ | Oload2 ld chunk a, [Val vs1; Val vs2; Memstate m] => exec_load2 ld m chunk a vs1 vs2
+ | OloadU st chunk a, [Memstate m] => exec_loadU st m chunk a
+ | _, _ => None
+ end.
+
+Definition eval_allocf (o: allocf_op) (l: list value) :=
+ match o, l with
+ | OAllocf_Mem sz linkofs, [Val spv; Memstate m] =>
+ let (m1, stk) := Mem.alloc m 0 sz in
+ let sp := (Vptr stk Ptrofs.zero) in
+ call_ll_storev Mint64 m1 (Some (Val.offset_ptr sp linkofs)) spv
+ | OAllocf_SP sz linkofs, [Val spv; Memstate m] =>
+ let (m1, stk) := Mem.alloc m 0 sz in
+ let sp := (Vptr stk Ptrofs.zero) in
+ match call_ll_storev Mint64 m1 (Some (Val.offset_ptr sp linkofs)) spv with
+ | None => None
+ | Some ms => Some (Val sp)
+ end
+ | _, _ => None
+ end.
+
+Definition eval_freef (o: freef_op) (l: list value) :=
+ match o, l with
+ | OFreef_Mem sz linkofs, [Val spv; Memstate m] =>
+ match call_ll_loadv Mint64 (fun v => v) m (Some (Val.offset_ptr spv linkofs)) with
+ | None => None
+ | Some v =>
+ match spv with
+ | Vptr stk ofs =>
+ match Mem.free m stk 0 sz with
+ | None => None
+ | Some m' => Some (Memstate m')
+ end
+ | _ => None
+ end
+ end
+ | OFreef_SP sz linkofs, [Val spv; Memstate m] =>
+ match call_ll_loadv Mint64 (fun v => v) m (Some (Val.offset_ptr spv linkofs)) with
+ | None => None
+ | Some v =>
+ match spv with
+ | Vptr stk ofs =>
+ match Mem.free m stk 0 sz with
+ | None => None
+ | Some m' => Some (v)
+ end
+ | _ => None
+ end
+ end
+ | _, _ => None
+ end.
+
+Definition op_eval (op: op) (l:list value) :=
+ match op, l with
+ | Arith op, l => arith_op_eval op l
+ | Load o, l => load_eval o l
+ | Store o, l => store_eval o l
+ | Allocframe o, l => eval_allocf o l
+ | Freeframe o, l => eval_freef o l
+ | Loadsymbol id, [] => Some (Val (Genv.symbol_address Ge.(_genv) id Ptrofs.zero))
+ | Cvtsw2x, [Val v] => Some (Val (Val.longofint v))
+ | Cvtuw2x, [Val v] => Some (Val (Val.longofintu v))
+ | Cvtx2w, [Val v] => Some (Val (Val.loword v))
+ | Control o, l => control_eval o l
+ | Constant v, [] => Some (Val v)
+ | _, _ => None
+ end.
+
+Definition vz_eq (vz1 vz2: val) : ?? bool :=
+ RET (match vz1 with
+ | Vint i1 => match vz2 with
+ | Vint i2 => Int.eq i1 i2
+ | _ => false
+ end
+ | Vlong l1 => match vz2 with
+ | Vlong l2 => Int64.eq l1 l2
+ | _ => false
+ end
+ | _ => false
+ end).
+
+Lemma vz_eq_correct vz1 vz2:
+ WHEN vz_eq vz1 vz2 ~> b THEN b = true -> vz1 = vz2.
+Proof.
+ wlp_simplify.
+ destruct vz1; destruct vz2; trivial; try discriminate.
+ - eapply f_equal; apply Int.same_if_eq; auto.
+ - eapply f_equal. apply Int64.same_if_eq; auto.
+Qed.
+Hint Resolve vz_eq_correct: wlp.
+
+Definition is_eq (is1 is2: isize) : ?? bool :=
+ RET (match is1 with
+ | W => match is2 with
+ | W => true
+ | _ => false
+ end
+ | X => match is2 with
+ | X => true
+ | _ => false
+ end
+ end).
+
+Lemma is_eq_correct is1 is2:
+ WHEN is_eq is1 is2 ~> b THEN b = true -> is1 = is2.
+Proof.
+ wlp_simplify; destruct is1; destruct is2; trivial; try discriminate.
+Qed.
+Hint Resolve is_eq_correct: wlp.
+
+Definition arith_op_eq (o1 o2: arith_op): ?? bool :=
+ match o1 with
+ | OArithP n1 =>
+ match o2 with OArithP n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithPP n1 =>
+ match o2 with OArithPP n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithPPP n1 =>
+ match o2 with OArithPPP n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithRR0R n1 =>
+ match o2 with OArithRR0R n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithRR0R_XZR n1 vz1 =>
+ match o2 with OArithRR0R_XZR n2 vz2 => iandb (phys_eq n1 n2) (vz_eq vz1 vz2) | _ => RET false end
+ | OArithRR0 n1 =>
+ match o2 with OArithRR0 n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithRR0_XZR n1 vz1 =>
+ match o2 with OArithRR0_XZR n2 vz2 => iandb (phys_eq n1 n2) (vz_eq vz1 vz2) | _ => RET false end
+ | OArithARRRR0 n1 =>
+ match o2 with OArithARRRR0 n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithARRRR0_XZR n1 vz1 =>
+ match o2 with OArithARRRR0_XZR n2 vz2 => iandb (phys_eq n1 n2) (vz_eq vz1 vz2) | _ => RET false end
+ | OArithComparisonPP_CN n1 =>
+ match o2 with OArithComparisonPP_CN n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithComparisonPP_CZ n1 =>
+ match o2 with OArithComparisonPP_CZ n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithComparisonPP_CC n1 =>
+ match o2 with OArithComparisonPP_CC n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithComparisonPP_CV n1 =>
+ match o2 with OArithComparisonPP_CV n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithComparisonR0R_CN n1 is1 =>
+ match o2 with OArithComparisonR0R_CN n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end
+ | OArithComparisonR0R_CZ n1 is1 =>
+ match o2 with OArithComparisonR0R_CZ n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end
+ | OArithComparisonR0R_CC n1 is1 =>
+ match o2 with OArithComparisonR0R_CC n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end
+ | OArithComparisonR0R_CV n1 is1 =>
+ match o2 with OArithComparisonR0R_CV n2 is2 => iandb (phys_eq n1 n2) (is_eq is1 is2) | _ => RET false end
+ | OArithComparisonR0R_CN_XZR n1 is1 vz1 =>
+ match o2 with OArithComparisonR0R_CN_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end
+ | OArithComparisonR0R_CZ_XZR n1 is1 vz1 =>
+ match o2 with OArithComparisonR0R_CZ_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end
+ | OArithComparisonR0R_CC_XZR n1 is1 vz1 =>
+ match o2 with OArithComparisonR0R_CC_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end
+ | OArithComparisonR0R_CV_XZR n1 is1 vz1 =>
+ match o2 with OArithComparisonR0R_CV_XZR n2 is2 vz2 => iandb (vz_eq vz1 vz2) (iandb (phys_eq n1 n2) (is_eq is1 is2)) | _ => RET false end
+ | OArithComparisonP_CN n1 =>
+ match o2 with OArithComparisonP_CN n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithComparisonP_CZ n1 =>
+ match o2 with OArithComparisonP_CZ n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithComparisonP_CC n1 =>
+ match o2 with OArithComparisonP_CC n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithComparisonP_CV n1 =>
+ match o2 with OArithComparisonP_CV n2 => phys_eq n1 n2 | _ => RET false end
+ | Ocset c1 =>
+ match o2 with Ocset c2 => struct_eq c1 c2 | _ => RET false end
+ | Ofmovi fsz1 =>
+ match o2 with Ofmovi fsz2 => phys_eq fsz1 fsz2 | _ => RET false end
+ | Ofmovi_XZR fsz1 =>
+ match o2 with Ofmovi_XZR fsz2 => phys_eq fsz1 fsz2 | _ => RET false end
+ | Ocsel c1 =>
+ match o2 with Ocsel c2 => struct_eq c1 c2 | _ => RET false end
+ | Ofnmul fsz1 =>
+ match o2 with Ofnmul fsz2 => phys_eq fsz1 fsz2 | _ => RET false end
+ end.
+
+Ltac my_wlp_simplify := wlp_xsimplify ltac:(intros; subst; simpl in * |- *; congruence || intuition eauto with wlp).
+
+Lemma arith_op_eq_correct o1 o2:
+ WHEN arith_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
+Proof.
+ destruct o1, o2; my_wlp_simplify; try congruence;
+ try (destruct vz; destruct vz0); try (destruct is; destruct is0);
+ repeat apply f_equal; try congruence;
+ try apply Int.same_if_eq; try apply Int64.same_if_eq; try auto.
+Qed.
+Hint Resolve arith_op_eq_correct: wlp.
+Opaque arith_op_eq_correct.
+
+Definition control_op_eq (c1 c2: control_op): ?? bool :=
+ match c1 with
+ | Ob lbl1 =>
+ match c2 with Ob lbl2 => phys_eq lbl1 lbl2 | _ => RET false end
+ | Obc c1 lbl1 =>
+ match c2 with Obc c2 lbl2 => iandb (struct_eq c1 c2) (phys_eq lbl1 lbl2) | _ => RET false end
+ | Obl id1 =>
+ match c2 with Obl id2 => phys_eq id1 id2 | _ => RET false end
+ | Obs id1 =>
+ match c2 with Obs id2 => phys_eq id1 id2 | _ => RET false end
+ | Ocbnz sz1 lbl1 =>
+ match c2 with Ocbnz sz2 lbl2 => iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2) | _ => RET false end
+ | Ocbz sz1 lbl1 =>
+ match c2 with Ocbz sz2 lbl2 => iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2) | _ => RET false end
+ | Otbnz sz1 n1 lbl1 =>
+ match c2 with Otbnz sz2 n2 lbl2 => iandb (RET (Int.eq n1 n2)) (iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2)) | _ => RET false end
+ | Otbz sz1 n1 lbl1 =>
+ match c2 with Otbz sz2 n2 lbl2 => iandb (RET (Int.eq n1 n2)) (iandb (phys_eq sz1 sz2) (phys_eq lbl1 lbl2)) | _ => RET false end
+ | Obtbl tbl1 =>
+ match c2 with Obtbl tbl2 => (phys_eq tbl1 tbl2) | _ => RET false end
+ | OIncremPC sz1 =>
+ match c2 with OIncremPC sz2 => RET (Z.eqb sz1 sz2) | _ => RET false end
+ | OError =>
+ match c2 with OError => RET true | _ => RET false end
+ end.
+
+Lemma control_op_eq_correct c1 c2:
+ WHEN control_op_eq c1 c2 ~> b THEN b = true -> c1 = c2.
+Proof.
+ destruct c1, c2; wlp_simplify; try rewrite Z.eqb_eq in * |-; try congruence;
+ try apply Int.same_if_eq in H; try congruence.
+Qed.
+Hint Resolve control_op_eq_correct: wlp.
+Opaque control_op_eq_correct.
+
+Definition store_op_eq (s1 s2: store_op): ?? bool :=
+ match s1 with
+ | Ostore1 st1 chk1 a1 =>
+ match s2 with Ostore1 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end
+ | Ostore2 st1 chk1 a1 =>
+ match s2 with Ostore2 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end
+ | OstoreU st1 chk1 a1 =>
+ match s2 with OstoreU st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end
+ end.
+
+Lemma store_op_eq_correct s1 s2:
+ WHEN store_op_eq s1 s2 ~> b THEN b = true -> s1 = s2.
+Proof.
+ destruct s1, s2; wlp_simplify; try congruence.
+ all: rewrite H1 in H0; rewrite H0, H; reflexivity.
+Qed.
+Hint Resolve store_op_eq_correct: wlp.
+Opaque store_op_eq_correct.
+
+Definition load_op_eq (l1 l2: load_op): ?? bool :=
+ match l1 with
+ | Oload1 ld1 chk1 a1 =>
+ match l2 with Oload1 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end
+ | Oload2 ld1 chk1 a1 =>
+ match l2 with Oload2 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end
+ | OloadU ld1 chk1 a1 =>
+ match l2 with OloadU ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end
+ end.
+
+Lemma load_op_eq_correct l1 l2:
+ WHEN load_op_eq l1 l2 ~> b THEN b = true -> l1 = l2.
+Proof.
+ destruct l1, l2; wlp_simplify; try congruence.
+ all: rewrite H1 in H0; rewrite H, H0; reflexivity.
+Qed.
+Hint Resolve load_op_eq_correct: wlp.
+Opaque load_op_eq_correct.
+
+Definition allocf_op_eq (al1 al2: allocf_op): ?? bool :=
+ match al1 with
+ | OAllocf_SP sz1 linkofs1 =>
+ match al2 with OAllocf_SP sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end
+ | OAllocf_Mem sz1 linkofs1 =>
+ match al2 with OAllocf_Mem sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end
+ end.
+
+Lemma allocf_op_eq_correct al1 al2:
+ WHEN allocf_op_eq al1 al2 ~> b THEN b = true -> al1 = al2.
+Proof.
+ destruct al1, al2; wlp_simplify; try congruence.
+ all: rewrite H2; rewrite Z.eqb_eq in H; rewrite H; reflexivity.
+Qed.
+Hint Resolve allocf_op_eq_correct: wlp.
+Opaque allocf_op_eq_correct.
+
+Definition freef_op_eq (fr1 fr2: freef_op): ?? bool :=
+ match fr1 with
+ | OFreef_SP sz1 linkofs1 =>
+ match fr2 with OFreef_SP sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end
+ | OFreef_Mem sz1 linkofs1 =>
+ match fr2 with OFreef_Mem sz2 linkofs2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq linkofs1 linkofs2) | _ => RET false end
+ end.
+
+Lemma freef_op_eq_correct fr1 fr2:
+ WHEN freef_op_eq fr1 fr2 ~> b THEN b = true -> fr1 = fr2.
+Proof.
+ destruct fr1, fr2; wlp_simplify; try congruence.
+ all: rewrite H2; rewrite Z.eqb_eq in H; rewrite H; reflexivity.
+Qed.
+Hint Resolve freef_op_eq_correct: wlp.
+Opaque freef_op_eq_correct.
+
+Definition op_eq (o1 o2: op): ?? bool :=
+ match o1 with
+ | Arith i1 =>
+ match o2 with Arith i2 => arith_op_eq i1 i2 | _ => RET false end
+ | Control i1 =>
+ match o2 with Control i2 => control_op_eq i1 i2 | _ => RET false end
+ | Load i1 =>
+ match o2 with Load i2 => load_op_eq i1 i2 | _ => RET false end
+ | Store i1 =>
+ match o2 with Store i2 => store_op_eq i1 i2 | _ => RET false end
+ | Allocframe i1 =>
+ match o2 with Allocframe i2 => allocf_op_eq i1 i2 | _ => RET false end
+ | Freeframe i1 =>
+ match o2 with Freeframe i2 => freef_op_eq i1 i2 | _ => RET false end
+ | Loadsymbol id1 =>
+ match o2 with Loadsymbol id2 => phys_eq id1 id2 | _ => RET false end
+ | Cvtsw2x =>
+ match o2 with Cvtsw2x => RET true | _ => RET false end
+ | Cvtuw2x =>
+ match o2 with Cvtuw2x => RET true | _ => RET false end
+ | Cvtx2w =>
+ match o2 with Cvtx2w => RET true | _ => RET false end
+ | Constant c1 =>
+ match o2 with Constant c2 => phys_eq c1 c2 | _ => RET false end
+ end.
+
+Lemma op_eq_correct o1 o2:
+ WHEN op_eq o1 o2 ~> b THEN b=true -> o1 = o2.
+Proof.
+ destruct o1, o2; wlp_simplify; congruence.
+Qed.
+
+End IMPPARAM.
+
+End P.
+
+Module L <: ISeqLanguage with Module LP:=P.
+
+Module LP:=P.
+
+Include MkSeqLanguage P.
+
+End L.
+
+Module IST := ImpSimu L ImpPosDict.
+
+Import L.
+Import P.
+
+(** Compilation from [Asmblock] to [L] *)
+
+Local Open Scope positive_scope.
+
+Definition pmem : R.t := 1.
+
+Definition ireg_to_pos (ir: ireg) : R.t :=
+ match ir with
+ | X0 => 8 | X1 => 9 | X2 => 10 | X3 => 11 | X4 => 12 | X5 => 13 | X6 => 14 | X7 => 15
+ | X8 => 16 | X9 => 17 | X10 => 18 | X11 => 19 | X12 => 20 | X13 => 21 | X14 => 22 | X15 => 23
+ | X16 => 24 | X17 => 25 | X18 => 26 | X19 => 27 | X20 => 28 | X21 => 29 | X22 => 30 | X23 => 31
+ | X24 => 32 | X25 => 33 | X26 => 34 | X27 => 35 | X28 => 36 | X29 => 37 | X30 => 38
+ end
+.
+
+Definition freg_to_pos (fr: freg) : R.t :=
+ match fr with
+ | D0 => 39 | D1 => 40 | D2 => 41 | D3 => 42 | D4 => 43 | D5 => 44 | D6 => 45 | D7 => 46
+ | D8 => 47 | D9 => 48 | D10 => 49 | D11 => 50 | D12 => 51 | D13 => 52 | D14 => 53 | D15 => 54
+ | D16 => 55 | D17 => 56 | D18 => 57 | D19 => 58 | D20 => 59 | D21 => 60 | D22 => 61 | D23 => 62
+ | D24 => 63 | D25 => 64 | D26 => 65 | D27 => 66 | D28 => 67 | D29 => 68 | D30 => 69 | D31 => 70
+ end
+.
+
+Lemma ireg_to_pos_discr: forall r r', r <> r' -> ireg_to_pos r <> ireg_to_pos r'.
+Proof.
+ destruct r; destruct r'; try contradiction; discriminate.
+Qed.
+
+Lemma freg_to_pos_discr: forall r r', r <> r' -> freg_to_pos r <> freg_to_pos r'.
+Proof.
+ destruct r; destruct r'; try contradiction; discriminate.
+Qed.
+
+Definition ppos (r: preg) : R.t :=
+ match r with
+ | CR c => match c with
+ | CN => 2
+ | CZ => 3
+ | CC => 4
+ | CV => 5
+ end
+ | PC => 6
+ | DR d => match d with
+ | IR i => match i with
+ | XSP => 7
+ | RR1 ir => ireg_to_pos ir
+ end
+ | FR fr => freg_to_pos fr
+ end
+ end
+.
+
+Notation "# r" := (ppos r) (at level 100, right associativity).
+
+Lemma not_eq_add:
+ forall k n n', n <> n' -> k + n <> k + n'.
+Proof.
+ intros k n n' H1 H2. apply H1; clear H1. eapply Pos.add_reg_l; eauto.
+Qed.
+
+Lemma ppos_equal: forall r r', r = r' <-> ppos r = ppos r'.
+Proof.
+ destruct r as [dr|cr|]; destruct r' as [dr'|cr'|];
+ try destruct dr as [ir|fr]; try destruct dr' as [ir'|fr'];
+ try destruct ir as [irr|]; try destruct ir' as [irr'|].
+ all: split; intros; try rewrite H; try discriminate; try contradiction; simpl; eauto;
+ try destruct irr; try destruct irr';
+ try destruct fr; try destruct fr';
+ try destruct cr; try destruct cr';
+ simpl; try discriminate; try reflexivity.
+Qed.
+
+Lemma ppos_discr: forall r r', r <> r' <-> ppos r <> ppos r'.
+Proof.
+ split; unfold not; try intros; try apply ppos_equal in H0; try discriminate; try contradiction.
+Qed.
+
+Lemma ppos_pmem_discr: forall r, pmem <> ppos r.
+Proof.
+ intros. destruct r as [dr|cr|].
+ - destruct dr as [ir|fr]; try destruct ir as [irr|]; try destruct irr; try destruct fr;
+ unfold ppos; unfold pmem; discriminate.
+ - unfold ppos; unfold pmem; destruct cr; discriminate.
+ - unfold ppos; unfold pmem; discriminate.
+Qed.
+
+(** Inversion functions, used for debug traces *)
+
+Definition pos_to_ireg (p: R.t) : option ireg :=
+ match p with
+ | 8 => Some (X0) | 9 => Some (X1) | 10 => Some (X2) | 11 => Some (X3) | 12 => Some (X4) | 13 => Some (X5) | 14 => Some (X6) | 15 => Some (X7)
+ | 16 => Some (X8) | 17 => Some (X9) | 18 => Some (X10) | 19 => Some (X11) | 20 => Some (X12) | 21 => Some (X13) | 22 => Some (X14) | 23 => Some (X15)
+ | 24 => Some (X16) | 25 => Some (X17) | 26 => Some (X18) | 27 => Some (X19) | 28 => Some (X20) | 29 => Some (X21) | 30 => Some (X22) | 31 => Some (X23)
+ | 32 => Some (X24) | 33 => Some (X25) | 34 => Some (X26) | 35 => Some (X27) | 36 => Some (X28) | 37 => Some (X29) | 38 => Some (X30) | _ => None
+ end.
+
+Definition pos_to_freg (p: R.t) : option freg :=
+ match p with
+ | 39 => Some(D0) | 40 => Some(D1) | 41 => Some(D2) | 42 => Some(D3) | 43 => Some(D4) | 44 => Some(D5) | 45 => Some(D6) | 46 => Some(D7)
+ | 47 => Some(D8) | 48 => Some(D9) | 49 => Some(D10) | 50 => Some(D11) | 51 => Some(D12) | 52 => Some(D13) | 53 => Some(D14) | 54 => Some(D15)
+ | 55 => Some(D16) | 56 => Some(D17) | 57 => Some(D18) | 58 => Some(D19) | 59 => Some(D20) | 60 => Some(D21) | 61 => Some(D22) | 62 => Some(D23)
+ | 63 => Some(D24) | 64 => Some(D25) | 65 => Some(D26) | 66 => Some(D27) | 67 => Some(D28) | 68 => Some(D29) | 69 => Some(D30) | 70 => Some(D31) | _ => None
+ end.
+
+Definition inv_ppos (p: R.t) : option preg :=
+ match p with
+ | 1 => None
+ | 2 => Some (CR CN)
+ | 3 => Some (CR CZ)
+ | 4 => Some (CR CC)
+ | 5 => Some (CR CV)
+ | 6 => Some (PC)
+ | 7 => Some (DR (IR XSP))
+ | n => match pos_to_ireg n with
+ | None => match pos_to_freg n with
+ | None => None
+ | Some fr => Some (DR (FR fr))
+ end
+ | Some ir => Some (DR (IR ir))
+ end
+ end.
+
+Notation "a @ b" := (Econs a b) (at level 102, right associativity).
+
+(** Translations of instructions *)
+
+Definition get_testcond_rlocs (c: testcond) :=
+ match c with
+ | TCeq => (PReg(#CZ) @ Enil)
+ | TCne => (PReg(#CZ) @ Enil)
+ | TChs => (PReg(#CC) @ Enil)
+ | TClo => (PReg(#CC) @ Enil)
+ | TCmi => (PReg(#CN) @ Enil)
+ | TCpl => (PReg(#CN) @ Enil)
+ | TChi => (PReg(#CZ) @ PReg(#CC) @ Enil)
+ | TCls => (PReg(#CZ) @ PReg(#CC) @ Enil)
+ | TCge => (PReg(#CN) @ PReg(#CV) @ Enil)
+ | TClt => (PReg(#CN) @ PReg(#CV) @ Enil)
+ | TCgt => (PReg(#CN) @ PReg(#CZ) @ PReg(#CV) @ Enil)
+ | TCle => (PReg(#CN) @ PReg(#CZ) @ PReg(#CV) @ Enil)
+ end.
+
+Definition trans_control (ctl: control) : inst :=
+ match ctl with
+ | Pb lbl => [(#PC, Op (Control (Ob lbl)) (PReg(#PC) @ Enil))]
+ | Pbc c lbl =>
+ let lr := get_testcond_rlocs c in
+ [(#PC, Op (Control (Obc c lbl)) (PReg(#PC) @ lr))]
+ | Pbl id sg => [(#RA, PReg(#PC));
+ (#PC, Op (Control (Obl id)) Enil)]
+ | Pbs id sg => [(#PC, Op (Control (Obs id)) Enil)]
+ | Pblr r sg => [(#RA, PReg(#PC));
+ (#PC, Old (PReg(#r)))]
+ | Pbr r sg => [(#PC, PReg(#r))]
+ | Pret r => [(#PC, PReg(#r))]
+ | Pcbnz sz r lbl => [(#PC, Op (Control (Ocbnz sz lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
+ | Pcbz sz r lbl => [(#PC, Op (Control (Ocbz sz lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
+ | Ptbnz sz r n lbl => [(#PC, Op (Control (Otbnz sz n lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
+ | Ptbz sz r n lbl => [(#PC, Op (Control (Otbz sz n lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
+ | Pbtbl r tbl => [(#X16, Op (Constant Vundef) Enil);
+ (#PC, Op (Control (Obtbl tbl)) (PReg(#r) @ PReg(#PC) @ Enil));
+ (#X16, Op (Constant Vundef) Enil)]
+ | Pbuiltin ef args res => []
+ end.
+
+Definition trans_exit (ex: option control) : L.inst :=
+ match ex with
+ | None => []
+ | Some ctl => trans_control ctl
+ end
+.
+
+Definition trans_arith (ai: ar_instruction) : inst :=
+ match ai with
+ | PArithP n rd =>
+ if destroy_X16 n then [(#rd, Op(Arith (OArithP n)) Enil); (#X16, Op (Constant Vundef) Enil)]
+ else [(#rd, Op(Arith (OArithP n)) Enil)]
+ | PArithPP n rd r1 => [(#rd, Op(Arith (OArithPP n)) (PReg(#r1) @ Enil))]
+ | PArithPPP n rd r1 r2 => [(#rd, Op(Arith (OArithPPP n)) (PReg(#r1) @ PReg(#r2) @ Enil))]
+ | PArithRR0R n rd r1 r2 =>
+ let lr := match r1 with
+ | RR0 r1' => Op(Arith (OArithRR0R n)) (PReg(#r1') @ PReg(#r2) @ Enil)
+ | XZR => let vz := if arith_rr0r_isize n then Vint Int.zero else Vlong Int64.zero in
+ Op(Arith (OArithRR0R_XZR n vz)) (PReg(#r2) @ Enil)
+ end in
+ [(#rd, lr)]
+ | PArithRR0 n rd r1 =>
+ let lr := match r1 with
+ | RR0 r1' => Op(Arith (OArithRR0 n)) (PReg(#r1') @ Enil)
+ | XZR => let vz := if arith_rr0_isize n then Vint Int.zero else Vlong Int64.zero in
+ Op(Arith (OArithRR0_XZR n vz)) (Enil)
+ end in
+ [(#rd, lr)]
+ | PArithARRRR0 n rd r1 r2 r3 =>
+ let lr := match r3 with
+ | RR0 r3' => Op(Arith (OArithARRRR0 n)) (PReg(#r1) @ PReg (#r2) @ PReg(#r3') @ Enil)
+ | XZR => let vz := if arith_arrrr0_isize n then Vint Int.zero else Vlong Int64.zero in
+ Op(Arith (OArithARRRR0_XZR n vz)) (PReg(#r1) @ PReg(#r2) @ Enil)
+ end in
+ [(#rd, lr)]
+ | PArithComparisonPP n r1 r2 =>
+ [(#CN, Op(Arith (OArithComparisonPP_CN n)) (PReg(#r1) @ PReg(#r2) @ Enil));
+ (#CZ, Op(Arith (OArithComparisonPP_CZ n)) (PReg(#r1) @ PReg(#r2) @ Enil));
+ (#CC, Op(Arith (OArithComparisonPP_CC n)) (PReg(#r1) @ PReg(#r2) @ Enil));
+ (#CV, Op(Arith (OArithComparisonPP_CV n)) (PReg(#r1) @ PReg(#r2) @ Enil))]
+ | PArithComparisonR0R n r1 r2 =>
+ let is := arith_comparison_r0r_isize n in
+ match r1 with
+ | RR0 r1' => [(#CN, Op(Arith (OArithComparisonR0R_CN n is)) (PReg(#r1') @ PReg(#r2) @ Enil));
+ (#CZ, Op(Arith (OArithComparisonR0R_CZ n is)) (PReg(#r1') @ PReg(#r2) @ Enil));
+ (#CC, Op(Arith (OArithComparisonR0R_CC n is)) (PReg(#r1') @ PReg(#r2) @ Enil));
+ (#CV, Op(Arith (OArithComparisonR0R_CV n is)) (PReg(#r1') @ PReg(#r2) @ Enil))]
+ | XZR => let vz := if is then Vint Int.zero else Vlong Int64.zero in
+ [(#CN, Op(Arith (OArithComparisonR0R_CN_XZR n is vz)) (PReg(#r2) @ Enil));
+ (#CZ, Op(Arith (OArithComparisonR0R_CZ_XZR n is vz)) (PReg(#r2) @ Enil));
+ (#CC, Op(Arith (OArithComparisonR0R_CC_XZR n is vz)) (PReg(#r2) @ Enil));
+ (#CV, Op(Arith (OArithComparisonR0R_CV_XZR n is vz)) (PReg(#r2) @ Enil))]
+ end
+ | PArithComparisonP n r1 =>
+ [(#CN, Op(Arith (OArithComparisonP_CN n)) (PReg(#r1) @ Enil));
+ (#CZ, Op(Arith (OArithComparisonP_CZ n)) (PReg(#r1) @ Enil));
+ (#CC, Op(Arith (OArithComparisonP_CC n)) (PReg(#r1) @ Enil));
+ (#CV, Op(Arith (OArithComparisonP_CV n)) (PReg(#r1) @ Enil))]
+ | Pcset rd c =>
+ let lr := get_testcond_rlocs c in
+ [(#rd, Op(Arith (Ocset c)) lr)]
+ | Pfmovi fsz rd r1 =>
+ let lr := match r1 with
+ | RR0 r1' => Op(Arith (Ofmovi fsz)) (PReg(#r1') @ Enil)
+ | XZR => Op(Arith (Ofmovi_XZR fsz)) Enil
+ end in
+ [(#rd, lr)]
+ | Pcsel rd r1 r2 c =>
+ let lr := get_testcond_rlocs c in
+ [(#rd, Op(Arith (Ocsel c)) (PReg(#r1) @ PReg (#r2) @ lr))]
+ | Pfnmul fsz rd r1 r2 => [(#rd, Op(Arith (Ofnmul fsz)) (PReg(#r1) @ PReg(#r2) @ Enil))]
+ end.
+
+Definition eval_addressing_rlocs_st (st: store_rs_a) (chunk: memory_chunk) (rs: dreg) (a: addressing) :=
+ match a with
+ | ADimm base n => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil)
+ | ADreg base r => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADlsl base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADsxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADuxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADadr base id ofs => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil)
+ | ADpostincr base n => Op (Store (OstoreU st chunk a)) (PReg (#rs) @ PReg (pmem) @ Enil) (* not modeled yet *)
+ end.
+
+Definition eval_addressing_rlocs_ld (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) :=
+ match a with
+ | ADimm base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
+ | ADreg base r => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADlsl base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADsxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADuxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADadr base id ofs => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
+ | ADpostincr base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
+ end.
+
+Definition trans_ldp_chunk (chunk: memory_chunk) (r: dreg): load_rd_a :=
+ match chunk with
+ | Mint32 => Pldrw
+ | Mint64 => Pldrx
+ | Mfloat32 => Pldrs
+ | Mfloat64 => Pldrd
+ | Many32 => Pldrw_a
+ | _ => (* This case should always correspond to Many64 *)
+ match r with
+ | IR _ => Pldrx_a
+ | FR _ => Pldrd_a
+ end
+ end.
+
+Definition trans_stp_chunk (chunk: memory_chunk) (r: dreg): store_rs_a :=
+ match chunk with
+ | Mint32 => Pstrw
+ | Mint64 => Pstrx
+ | Mfloat32 => Pstrs
+ | Mfloat64 => Pstrd
+ | Many32 => Pstrw_a
+ | _ => (* This case should always correspond to Many64 *)
+ match r with
+ | IR _ => Pstrx_a
+ | FR _ => Pstrd_a
+ end
+ end.
+
+Definition trans_load (ldi: ld_instruction) :=
+ match ldi with
+ | PLd_rd_a ld r a =>
+ let lr := eval_addressing_rlocs_ld ld (chunk_load ld) a in [(#r, lr)]
+ | Pldp ld r1 r2 chk1 chk2 a =>
+ let ldi1 := trans_ldp_chunk chk1 r1 in
+ let ldi2 := trans_ldp_chunk chk2 r1 in
+ let lr := eval_addressing_rlocs_ld ldi1 chk1 a in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4%Z | _ => 8%Z end in
+ match a with
+ | ADimm base n =>
+ let a' := (get_offset_addr a ofs) in
+ [(#r1, lr);
+ (#r2, Op (Load (Oload1 ldi2 chk2 a'))
+ (Old(PReg (#base)) @ PReg (pmem) @ Enil))]
+ | _ => [(#PC, (Op (OError)) Enil)]
+ end
+ end.
+
+Definition trans_store (sti: st_instruction) :=
+ match sti with
+ | PSt_rs_a st r a =>
+ let lr := eval_addressing_rlocs_st st (chunk_store st) r a in [(pmem, lr)]
+ | Pstp st r1 r2 chk1 chk2 a =>
+ let sti1 := trans_stp_chunk chk1 r1 in
+ let sti2 := trans_stp_chunk chk2 r1 in
+ let lr := eval_addressing_rlocs_st sti1 chk1 r1 a in
+ let ofs := match chk1 with | Mint32 | Mfloat32| Many32 => 4%Z | _ => 8%Z end in
+ match a with
+ | ADimm base n =>
+ let a' := (get_offset_addr a ofs) in
+ [(pmem, lr);
+ (pmem, Op (Store (Ostore1 sti2 chk2 a'))
+ (PReg (#r2) @ Old(PReg (#base)) @ PReg (pmem) @ Enil))]
+ | _ => [(#PC, (Op (OError)) Enil)]
+ end
+ end.
+
+Definition trans_basic (b: basic) : inst :=
+ match b with
+ | PArith ai => trans_arith ai
+ | PLoad ld => trans_load ld
+ | PStore st => trans_store st
+ | Pallocframe sz linkofs =>
+ [(#X29, PReg(#SP));
+ (#SP, Op (Allocframe (OAllocf_SP sz linkofs)) (PReg (#SP) @ PReg pmem @ Enil));
+ (#X16, Op (Constant Vundef) Enil);
+ (pmem, Op (Allocframe (OAllocf_Mem sz linkofs)) (Old(PReg(#SP)) @ PReg pmem @ Enil))]
+ | Pfreeframe sz linkofs =>
+ [(pmem, Op (Freeframe (OFreef_Mem sz linkofs)) (PReg (#SP) @ PReg pmem @ Enil));
+ (#SP, Op (Freeframe (OFreef_SP sz linkofs)) (PReg (#SP) @ Old (PReg pmem) @ Enil));
+ (#X16, Op (Constant Vundef) Enil)]
+ | Ploadsymbol rd id => [(#rd, Op (Loadsymbol id) Enil)]
+ | Pcvtsw2x rd r1 => [(#rd, Op (Cvtsw2x) (PReg (#r1) @ Enil))]
+ | Pcvtuw2x rd r1 => [(#rd, Op (Cvtuw2x) (PReg (#r1) @ Enil))]
+ | Pcvtx2w rd => [(#rd, Op (Cvtx2w) (PReg (#rd) @ Enil))]
+ | Pnop => []
+ end.
+
+Fixpoint trans_body (b: list basic) : list L.inst :=
+ match b with
+ | nil => nil
+ | b :: lb => (trans_basic b) :: (trans_body lb)
+ end.
+
+Definition trans_pcincr (sz: Z) (k: L.inst) := (#PC, Op (Control (OIncremPC sz)) (PReg(#PC) @ Enil)) :: k.
+
+Definition trans_block (b: Asmblock.bblock) : L.bblock :=
+ trans_body (body b) ++ (trans_pcincr (size b) (trans_exit (exit b)) :: nil).
+
+(*Theorem trans_block_noheader_inv: forall bb, trans_block (no_header bb) = trans_block bb.*)
+(*Proof.*)
+ (*intros. destruct bb as [hd bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.*)
+(*Qed.*)
+
+(*Theorem trans_block_header_inv: forall bb hd, trans_block (stick_header hd bb) = trans_block bb.*)
+(*Proof.*)
+ (*intros. destruct bb as [hdr bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.*)
+(*Qed.*)
+
+(** Lemmas on the translation *)
+
+Definition state := L.mem.
+Definition exec := L.run.
+
+Definition match_states (s: Asm.state) (s': state) :=
+ let (rs, m) := s in
+ s' pmem = Memstate m
+ /\ forall r, s' (#r) = Val (rs r).
+
+Definition match_outcome (o:outcome) (s: option state) :=
+ match o with
+ | Some n => exists s', s=Some s' /\ match_states n s'
+ | None => s=None
+ end.
+
+Notation "a <[ b <- c ]>" := (assign a b c) (at level 102, right associativity).
+
+Definition trans_state (s: Asm.state) : state :=
+ let (rs, m) := s in
+ fun x => if (Pos.eq_dec x pmem) then Memstate m
+ else match (inv_ppos x) with
+ | Some r => Val (rs r)
+ | None => Val Vundef
+ end.
+
+Lemma not_eq_IR:
+ forall r r', r <> r' -> IR r <> IR r'.
+Proof.
+ intros. congruence.
+Qed.
+
+Lemma ireg_pos_ppos: forall r,
+ ireg_to_pos r = # r.
+Proof.
+ intros. simpl. reflexivity.
+Qed.
+
+Lemma freg_pos_ppos: forall r,
+ freg_to_pos r = # r.
+Proof.
+ intros. simpl. reflexivity.
+Qed.
+
+Lemma ireg_not_pc: forall r,
+ (#PC) <> ireg_to_pos r.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_pmem: forall r,
+ ireg_to_pos r <> pmem.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_CN: forall r,
+ 2 <> ireg_to_pos r.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_CZ: forall r,
+ 3 <> ireg_to_pos r.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_CC: forall r,
+ 4 <> ireg_to_pos r.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma ireg_not_CV: forall r,
+ 5 <> ireg_to_pos r.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_pmem: forall r,
+ freg_to_pos r <> pmem.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_CN: forall r,
+ 2 <> freg_to_pos r.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_CZ: forall r,
+ 3 <> freg_to_pos r.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_CC: forall r,
+ 4 <> freg_to_pos r.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma freg_not_CV: forall r,
+ 5 <> freg_to_pos r.
+Proof.
+ intros; destruct r; discriminate.
+Qed.
+
+Lemma dreg_not_pmem: forall (r: dreg),
+ (# r) <> pmem.
+Proof.
+ intros; destruct r as [i|f].
+ - destruct i. apply ireg_not_pmem. discriminate.
+ - apply freg_not_pmem.
+Qed.
+
+Ltac DPRM pr :=
+ destruct pr as [drDPRF|crDPRF|];
+ [destruct drDPRF as [irDPRF|frDPRF]; [destruct irDPRF |]
+ | destruct crDPRF|].
+
+Ltac DPRF pr :=
+ destruct pr as [drDPRF|crDPRF|];
+ [destruct drDPRF as [irDPRF|frDPRF]; [destruct irDPRF as [irrDPRF|]; [destruct irrDPRF|]
+ | destruct frDPRF]
+ | destruct crDPRF|].
+
+Lemma preg_not_pmem: forall r,
+ pmem <> # r.
+Proof.
+ intros. DPRF r; simpl; discriminate.
+Qed.
+
+Ltac DIRN1 ir := destruct ir as [irrDIRN1|]; subst; try destruct irrDIRN1; simpl.
+
+Lemma dreg_not_CN: forall (r: dreg),
+ 2 <> (#r).
+Proof.
+ intros; DIRN1 r; [ apply ireg_not_CN | discriminate | apply freg_not_CN].
+Qed.
+
+Lemma dreg_not_CZ: forall (r: dreg),
+ 3 <> (#r).
+Proof.
+ intros; DIRN1 r; [ apply ireg_not_CZ | discriminate | apply freg_not_CZ].
+Qed.
+
+Lemma dreg_not_CC: forall (r: dreg),
+ 4 <> (#r).
+Proof.
+ intros; DIRN1 r; [ apply ireg_not_CC | discriminate | apply freg_not_CC].
+Qed.
+
+Lemma dreg_not_CV: forall (r: dreg),
+ 5 <> (#r).
+Proof.
+ intros; DIRN1 r; [ apply ireg_not_CV | discriminate | apply freg_not_CV].
+Qed.
+
+Lemma sr_update_both: forall sr rsr r1 rr v
+ (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+ (sr <[ (#r1) <- Val (v) ]>) (#rr) =
+ Val (rsr # r1 <- v rr).
+Proof.
+ intros. unfold assign.
+ destruct (R.eq_dec (#r1) (#rr)); subst.
+ - apply ppos_equal in e; subst; rewrite Pregmap.gss; reflexivity.
+ - rewrite Pregmap.gso; eapply ppos_discr in n; auto.
+Qed.
+
+Lemma sr_gss: forall sr pos v,
+ (sr <[ pos <- v ]>) pos = v.
+Proof.
+ intros. unfold assign.
+ destruct (R.eq_dec pos pos) eqn:REQ; try reflexivity; try congruence.
+Qed.
+
+Lemma sr_update_overwrite: forall sr pos v1 v2,
+ (sr <[ pos <- v1 ]>) <[ pos <- v2 ]> = (sr <[ pos <- v2 ]>).
+Proof.
+ intros.
+ unfold assign. apply functional_extensionality; intros x.
+ destruct (R.eq_dec pos x); reflexivity.
+Qed.
+
+Ltac sr_val_rwrt :=
+ repeat match goal with
+ | [H: forall r: preg, ?sr (# r) = Val (?rsr r) |- _ ]
+ => rewrite H
+ end.
+
+Ltac sr_memstate_rwrt :=
+ repeat match goal with
+ | [H: ?sr pmem = Memstate ?mr |- _ ]
+ => rewrite <- H
+ end.
+
+Ltac replace_ppos :=
+ try erewrite !ireg_pos_ppos;
+ try erewrite !freg_pos_ppos;
+ try replace (7) with (#XSP) by eauto;
+ try replace (24) with (#X16) by auto.
+
+Ltac DDRM dr :=
+ destruct dr as [irsDDRF|frDDRF];
+ [destruct irsDDRF
+ | idtac ].
+
+(* Ltac DDRF dr :=
+ destruct dr as [irsDDRF|frDDRF];
+ [destruct irsDDRF as [irsDDRF|]; [destruct irsDDRF|]
+ | destruct frDDRF]. *)
+
+(* Ltac DPRI pr :=
+ destruct pr as [drDPRI|crDPRI|];
+ [destruct drDPRI as [irDPRI|frDPRI]; [destruct irDPRI as [irrDPRI|]; [destruct irrDPRI|]|]
+ | idtac
+ | idtac ]. *)
+
+Ltac discriminate_ppos :=
+ try apply ireg_not_pmem;
+ try apply ireg_not_pc;
+ try apply freg_not_pmem;
+ try apply dreg_not_pmem;
+ try apply ireg_not_CN;
+ try apply ireg_not_CZ;
+ try apply ireg_not_CC;
+ try apply ireg_not_CV;
+ try apply freg_not_CN;
+ try apply freg_not_CZ;
+ try apply freg_not_CC;
+ try apply freg_not_CV;
+ try apply dreg_not_CN;
+ try apply dreg_not_CZ;
+ try apply dreg_not_CC;
+ try apply dreg_not_CV;
+ try(simpl; discriminate).
+
+Ltac replace_pc := try replace (6) with (#PC) by eauto.
+
+Ltac replace_regs_pos sr :=
+ try replace (sr 7) with (sr (ppos XSP)) by eauto;
+ try replace (sr 6) with (sr (ppos PC)) by eauto;
+ try replace (sr 2) with (sr (ppos CN)) by eauto;
+ try replace (sr 3) with (sr (ppos CZ)) by eauto;
+ try replace (sr 4) with (sr (ppos CC)) by eauto;
+ try replace (sr 5) with (sr (ppos CV)) by eauto.
+
+Ltac Simpl_exists sr :=
+ replace_ppos;
+ replace_regs_pos sr;
+ try sr_val_rwrt;
+ try (eexists; split; [| split]); eauto;
+ try (sr_memstate_rwrt; rewrite assign_diff;
+ try reflexivity;
+ discriminate_ppos
+ ).
+
+Ltac Simpl_rep sr :=
+ replace_ppos;
+ replace_regs_pos sr;
+ try sr_val_rwrt;
+ try (sr_memstate_rwrt; rewrite assign_diff;
+ try reflexivity;
+ discriminate_ppos
+ ).
+
+Ltac Simpl_update :=
+ try eapply sr_update_both; eauto.
+
+Ltac Simpl sr := Simpl_exists sr; try (intros rr(* ; try rewrite sr_update_overwrite; replace_regs_pos sr; DPRM rr *)); Simpl_update.
+
+Ltac destruct_res_flag rsr := try (rewrite Pregmap.gso; discriminate_ppos); destruct (rsr _); simpl; try reflexivity.
+
+(* Ltac discriminate_preg_flags := rewrite !assign_diff; try rewrite !Pregmap.gso; discriminate_ppos; sr_val_rwrt; reflexivity. *)
+
+(* Ltac destruct_reg_neq r1 r2 :=
+ destruct (PregEq.eq r1 r2); subst;
+ [ rewrite sr_gss; rewrite Pregmap.gss; reflexivity |
+ rewrite assign_diff; try rewrite Pregmap.gso; fold (ppos r1); try apply ppos_discr; auto]. *)
+
+Lemma reg_update_overwrite: forall rsr sr r rd v1 v2
+ (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+ ((sr <[ # rd <- Val (v1) ]>) <[ # rd <- Val (v2) ]>) (# r) =
+ Val ((rsr # rd <- v1) # rd <- v2 r).
+Proof.
+ intros.
+ unfold Pregmap.set; destruct (PregEq.eq r rd).
+ - rewrite e; apply sr_gss; reflexivity.
+ - rewrite sr_update_overwrite. rewrite assign_diff; eauto.
+ unfold not; intros. apply ppos_equal in H. congruence.
+Qed.
+
+Ltac replace_regs_cond_force :=
+ try replace (5) with (#CV) in * by auto;
+ try replace (4) with (#CC) in * by auto;
+ try replace (3) with (#CZ) in * by auto;
+ try replace (2) with (#CN) in * by auto.
+
+Ltac validate_crbit_flags rr v1 v2 :=
+ destruct (R.eq_dec 5 (#rr)) as [e0|n0];
+ destruct (R.eq_dec 4 (#rr)) as [e1|n1];
+ destruct (R.eq_dec 3 (#rr)) as [e2|n2];
+ destruct (R.eq_dec 2 (#rr)) as [e3|n3];
+ replace_regs_cond_force;
+ try apply ppos_equal in e0;
+ try apply ppos_equal in e1;
+ try apply ppos_equal in e2;
+ try apply ppos_equal in e3;
+ try apply ppos_discr in n0;
+ try apply ppos_discr in n1;
+ try apply ppos_discr in n2;
+ try apply ppos_discr in n3;
+ subst.
+
+Ltac Simpl_flags :=
+ try (rewrite Pregmap.gss; reflexivity);
+ try (rewrite Pregmap.gso, Pregmap.gss; [reflexivity|]; try auto);
+ try (rewrite Pregmap.gso; try auto);
+ try (rewrite !Pregmap.gso; try auto).
+
+Lemma compare_single_res: forall sr mr rsr rr v1 v2
+ (HMEM: sr pmem = Memstate mr)
+ (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+ ((((sr <[ 2 <- Val (_CN (v_compare_single v1 v2)) ]>) <[ 3 <-
+ Val (_CZ (v_compare_single v1 v2)) ]>) <[ 4 <-
+ Val (_CC (v_compare_single v1 v2)) ]>) <[ 5 <-
+ Val (_CV (v_compare_single v1 v2)) ]>) (# rr) =
+Val
+ ((compare_single rsr v1 v2) rr).
+Proof.
+ intros. unfold v_compare_single, compare_single, assign.
+ validate_crbit_flags rr v1 v2.
+ all: destruct v1; destruct v2; Simpl_flags.
+Qed.
+
+Lemma compare_float_res: forall sr mr rsr rr v1 v2
+ (HMEM: sr pmem = Memstate mr)
+ (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+ ((((sr <[ 2 <- Val (_CN (v_compare_float v1 v2)) ]>) <[ 3 <-
+ Val (_CZ (v_compare_float v1 v2)) ]>) <[ 4 <-
+ Val (_CC (v_compare_float v1 v2)) ]>) <[ 5 <-
+ Val (_CV (v_compare_float v1 v2)) ]>) (# rr) =
+Val
+ ((compare_float rsr v1 v2) rr).
+Proof.
+ intros. unfold v_compare_float, compare_float, assign.
+ validate_crbit_flags rr v1 v2.
+ all: destruct v1; destruct v2; Simpl_flags.
+Qed.
+
+Lemma compare_long_res: forall sr mr rsr rr v1 v2
+ (HMEM: sr pmem = Memstate mr)
+ (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+ ((((sr <[ 2 <- Val (_CN (v_compare_long v1 v2)) ]>) <[ 3 <-
+ Val (_CZ (v_compare_long v1 v2)) ]>) <[ 4 <-
+ Val (_CC (v_compare_long v1 v2)) ]>) <[ 5 <-
+ Val (_CV (v_compare_long v1 v2)) ]>) (# rr) =
+Val
+ ((compare_long rsr v1 v2) rr).
+Proof.
+ intros. unfold v_compare_long, compare_long, assign.
+ validate_crbit_flags rr v1 v2.
+ all: Simpl_flags.
+Qed.
+
+Lemma compare_int_res: forall sr mr rsr rr v1 v2
+ (HMEM: sr pmem = Memstate mr)
+ (HEQV: forall r : preg, sr (# r) = Val (rsr r)),
+ ((((sr <[ 2 <- Val (_CN (v_compare_int v1 v2)) ]>) <[ 3 <-
+ Val (_CZ (v_compare_int v1 v2)) ]>) <[ 4 <-
+ Val (_CC (v_compare_int v1 v2)) ]>) <[ 5 <-
+ Val (_CV (v_compare_int v1 v2)) ]>) (# rr) =
+Val
+ ((compare_int rsr v1 v2) rr).
+Proof.
+ intros. unfold v_compare_int, compare_int, assign.
+ validate_crbit_flags rr v1 v2.
+ all: Simpl_flags.
+Qed.
+
+Section SECT_SEQ.
+
+Variable Ge: genv.
+
+Lemma trans_arith_correct rsr mr sr rsw' old i:
+ match_states (State rsr mr) sr ->
+ exec_arith_instr Ge.(_lk) i rsr = rsw' ->
+ exists sw,
+ inst_run Ge (trans_arith i) sr old = Some sw
+ /\ match_states (State rsw' mr) sw.
+Proof.
+ induction i.
+ all: intros MS EARITH; subst; inv MS; unfold exec_arith_instr.
+ - (* PArithP *)
+ destruct i.
+ 1,2,3: DIRN1 rd; Simpl sr.
+ (* Special case for Pmovimms/Pmovimmd *)
+ all: simpl;
+ try(destruct (negb (is_immediate_float32 _)));
+ try(destruct (negb (is_immediate_float64 _)));
+ DIRN1 rd; Simpl sr;
+ try (rewrite assign_diff; discriminate_ppos; reflexivity);
+ try (intros rr'; Simpl_update).
+ - (* PArithPP *)
+ DIRN1 rs; DIRN1 rd; Simpl sr.
+ - (* PArithPPP *)
+ DIRN1 r1; DIRN1 r2; DIRN1 rd; Simpl sr.
+ - (* PArithRR0R *)
+ simpl. destruct r1.
+ + (* OArithRR0R *) simpl; Simpl sr.
+ + (* OArithRR0R_XZR *) simpl; destruct (arith_rr0r_isize _); Simpl sr.
+ - (* PArithRR0 *)
+ simpl. destruct r1.
+ + (* OArithRR0 *) simpl; Simpl sr.
+ + (* OArithRR0_XZR *) simpl; destruct (arith_rr0_isize _); Simpl sr.
+ - (* PArithARRRR0 *)
+ simpl. destruct r3.
+ + (* OArithARRRR0 *) simpl; Simpl sr.
+ + (* OArithARRRR0_XZR *) simpl; destruct (arith_arrrr0_isize _); Simpl sr.
+ - (* PArithComparisonPP *)
+ simpl; destruct i;
+ unfold arith_eval_comparison_pp, arith_prepare_comparison_pp; simpl;
+ fold (ppos r2); fold(ppos r1); try rewrite !H0; repeat Simpl_rep sr;
+ Simpl sr;
+ try destruct sz;
+ try (eapply compare_single_res; eauto);
+ try (eapply compare_long_res; eauto);
+ try (eapply compare_float_res; eauto).
+ - (* PArithComparisonR0R *)
+ simpl; destruct r1; simpl; destruct i;
+ repeat (replace_regs_cond_force; try fold (ppos r); try fold(ppos r2);
+ try rewrite !assign_diff; discriminate_ppos; try rewrite !H0);
+ Simpl sr; unfold arith_eval_comparison_r0r, arith_comparison_r0r_isize;
+ destruct arith_prepare_comparison_r0r; destruct is;
+ try (eapply compare_long_res; eauto);
+ try (eapply compare_int_res; eauto).
+ - (* PArithComparisonP *)
+ simpl; destruct i;
+ unfold arith_eval_comparison_p, arith_prepare_comparison_p; simpl;
+ fold(ppos r1); try rewrite !H0; repeat Simpl_rep sr;
+ Simpl sr;
+ try destruct sz;
+ try (eapply compare_int_res; eauto);
+ try (eapply compare_single_res; eauto);
+ try (eapply compare_long_res; eauto);
+ try (eapply compare_float_res; eauto).
+ - (* Pcset *)
+ simpl; unfold eval_testcond, get_testcond_rlocs, cflags_eval;
+ unfold cond_eval_is; unfold flags_testcond_value, list_exp_eval; destruct c; simpl;
+ repeat Simpl_rep sr; Simpl_exists sr;
+ destruct_res_flag rsr;
+ Simpl sr.
+ - (* Pfmovi *)
+ simpl; destruct r1; simpl; destruct fsz; Simpl sr.
+ - (* Pcsel *)
+ destruct c; simpl; DIRN1 rd; fold (ppos r2); fold (ppos r1); Simpl sr.
+ - (* Pfnmul *)
+ simpl; destruct fsz; Simpl sr.
+Qed.
+
+Lemma sp_xsp:
+ SP = XSP.
+Proof.
+ econstructor.
+Qed.
+
+Lemma load_chunk_neutral: forall chk v r,
+ interp_load (trans_ldp_chunk chk r) v = v.
+Proof.
+ intros; destruct chk; destruct r; simpl; reflexivity.
+Qed.
+
+Theorem bisimu_basic rsr mr sr bi:
+ match_states (State rsr mr) sr ->
+ match_outcome (exec_basic Ge.(_lk) Ge.(_genv) bi rsr mr) (inst_run Ge (trans_basic bi) sr sr).
+Proof.
+(* a little tactic to automate reasoning on preg_eq *)
+Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
+Local Ltac preg_eq_discr r rd :=
+ destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto);
+ rewrite (assign_diff _ (#rd) (#r) _); auto;
+ rewrite Pregmap.gso; auto.
+
+ intros MS; inversion MS as (H & H0).
+ destruct bi; simpl.
+ (* Loadsymbol / Cvtsw2x / Cvtuw2x / Cvtx2w *)
+ 6,7,8,9: Simpl sr.
+ - (* Arith *)
+ exploit trans_arith_correct; eauto.
+ - (* Load *)
+ destruct ld.
+ + unfold exec_load, exec_load_rd_a, eval_addressing_rlocs_ld, exp_eval;
+ destruct ld; destruct a; simpl;
+ try fold (ppos base); try fold (ppos r);
+ erewrite !H0, H; simpl;
+ unfold exec_load1, exec_load2, chunk_load; unfold call_ll_loadv;
+ try destruct (Mem.loadv _ _ _); simpl; auto.
+ all: try (fold (ppos rd); Simpl_exists sr; auto; intros rr; Simpl_update).
+ +
+ unfold exec_load, exec_load_double, eval_addressing_rlocs_ld, exp_eval;
+ destruct ld; destruct a; simpl; unfold control_eval; destruct Ge; auto;
+ try fold (ppos base);
+ try erewrite !H0, H; simpl;
+ unfold exec_load1, exec_load2; unfold call_ll_loadv;
+ destruct (Mem.loadv _ _ _); simpl; auto;
+ fold (ppos rd1); rewrite assign_diff; discriminate_ppos; rewrite H;
+ try destruct (Mem.loadv _ _ _); simpl; auto; Simpl_exists sr;
+ rewrite !load_chunk_neutral;
+ try (rewrite !assign_diff; discriminate_ppos; reflexivity);
+ try (destruct rd1 as [ir1|fr1]; try destruct ir1; destruct rd2 as [ir2|fr2]; try destruct ir2;
+ destruct base; discriminate_ppos);
+ repeat (try fold (ppos r); try fold (ppos r0);
+ try fold (ppos fr1); try fold (ppos fr2); intros; Simpl_update).
+ - (* Store *)
+ destruct st.
+ + unfold exec_store, exec_store_rs_a, eval_addressing_rlocs_st, exp_eval;
+ destruct st; destruct a; simpl;
+ try fold (ppos base); try fold (ppos rs); try fold (ppos r);
+ erewrite !H0, H; simpl;
+ unfold exec_store1, exec_store2, chunk_store; unfold call_ll_storev;
+ try destruct (Mem.storev _ _ _ _); simpl; auto.
+ all: eexists; split; [| split]; eauto;
+ intros rr; rewrite assign_diff; try rewrite H0; auto; discriminate_ppos.
+ + unfold exec_store, exec_store_double, eval_addressing_rlocs_st, exp_eval;
+ destruct st; destruct a; simpl; unfold control_eval; destruct Ge; auto;
+ try fold (ppos base); try fold (ppos rs1);
+ erewrite !H0, H; simpl;
+ unfold exec_store1, exec_store2; unfold call_ll_storev;
+ try destruct (Mem.storev _ _ _ _); simpl; auto;
+ fold (ppos rs2); rewrite assign_diff; try congruence; try rewrite H0; simpl;
+ try destruct (Mem.storev _ _ _ _); simpl; auto.
+ all: eexists; split; [| split]; eauto;
+ repeat (try intros rr; rewrite assign_diff; try rewrite H0; auto; discriminate_ppos).
+ - (* Alloc *)
+ destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS.
+ + eexists; repeat split.
+ * rewrite !assign_diff; try discriminate_ppos; Simpl_exists sr;
+ rewrite H; destruct (Mem.alloc _ _ _) eqn:MEMAL2;
+ injection MEMAL; intros Hm Hb; try rewrite Hm, Hb;
+ rewrite sp_xsp in MEMS; rewrite MEMS.
+ rewrite !assign_diff; try discriminate_ppos; Simpl_exists sr; rewrite H;
+ destruct (Mem.alloc _ _ _) eqn:MEMAL3;
+ injection MEMAL2; intros Hm2 Hb2; try rewrite Hm2, Hb2;
+ rewrite Hm, Hb; rewrite MEMS; reflexivity.
+ * eauto.
+ * intros rr. rewrite assign_diff; try apply preg_not_pmem; try rewrite sp_xsp.
+ replace 37 with (#X29) by auto; repeat (Simpl_update; intros).
+ + simpl; repeat Simpl_exists sr. erewrite H. destruct (Mem.alloc _ _ _) eqn:HMEMAL2.
+ injection MEMAL; intros Hm Hb.
+ try rewrite Hm, Hb; clear Hm Hb.
+ try rewrite sp_xsp in MEMS; rewrite MEMS. reflexivity.
+ - (* Free *)
+ destruct (Mem.loadv _ _ _) eqn:MLOAD; simpl; auto;
+ repeat Simpl_exists sr; rewrite H; simpl.
+ + destruct (rsr SP) eqn:EQSP; simpl; rewrite <- sp_xsp; rewrite EQSP; rewrite MLOAD; try reflexivity.
+ destruct (Mem.free _ _ _) eqn:EQFREE; try reflexivity. rewrite assign_diff; discriminate_ppos.
+ replace_regs_pos sr; sr_val_rwrt. rewrite <- sp_xsp; rewrite EQSP; rewrite MLOAD. rewrite EQFREE.
+ replace 24 with (#X16) by auto; rewrite sp_xsp; Simpl sr.
+ intros rr'; destruct (PregEq.eq XSP rr').
+ * rewrite e; rewrite Pregmap.gss, sr_gss; auto.
+ * rewrite Pregmap.gso, !assign_diff; auto; apply ppos_discr in n; auto.
+ + rewrite <- sp_xsp; rewrite MLOAD; reflexivity.
+ - (* Nop *)
+ Simpl sr.
+Qed.
+
+Theorem bisimu_body:
+ forall bdy rsr mr sr,
+ match_states (State rsr mr) sr ->
+ match_outcome (exec_body Ge.(_lk) Ge.(_genv) bdy rsr mr) (exec Ge (trans_body bdy) sr).
+Proof.
+ induction bdy as [|i bdy]; simpl; eauto.
+ intros.
+ exploit (bisimu_basic rsr mr sr i); eauto.
+ destruct (exec_basic _ _ _ _ _); simpl.
+ - intros (s' & X1 & X2).
+ rewrite X1; simpl; eauto. eapply IHbdy; eauto; simpl.
+ unfold match_states in *. destruct s. unfold Asm._m. eauto.
+ - intros X; rewrite X; simpl; auto.
+Qed.
+
+Theorem bisimu_control ex sz rsr mr sr:
+ match_states (State rsr mr) sr ->
+ match_outcome (exec_cfi Ge.(_genv) Ge.(_fn) ex (incrPC (Ptrofs.repr sz) rsr) mr) (inst_run Ge (trans_pcincr sz (trans_exit (Some (PCtlFlow ex)))) sr sr).
+Proof.
+ intros MS.
+ simpl in *. inv MS.
+ destruct ex.
+ (* Obr / Oret *)
+ 6,7: unfold control_eval, incrPC; simpl; destruct Ge;
+ replace_pc; rewrite (H0 PC);
+ repeat Simpl_rep sr; Simpl_exists sr;
+ intros rr; destruct (preg_eq rr PC); [
+ rewrite e; rewrite sr_gss; rewrite Pregmap.gss;
+ try rewrite Pregmap.gso; discriminate_ppos; fold (ppos r); auto |
+ repeat Simpl_rep sr; try rewrite !Pregmap.gso; auto; apply ppos_discr in n; auto ].
+ (* Ocbnz / Ocbz *)
+ 6,7,8,9: unfold control_eval; destruct Ge; simpl;
+ replace_pc; rewrite (H0 PC);
+ unfold eval_branch, eval_neg_branch, eval_testzero, eval_testbit,
+ incrPC, goto_label_deps, goto_label;
+ destruct (PregEq.eq r PC);
+ [ rewrite e; destruct sz0; simpl; Simpl sr |
+ destruct sz0; simpl; replace_pc; rewrite Pregmap.gso; auto; repeat Simpl_rep sr; try rewrite H0;
+ try (destruct (Val.mxcmpu_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b);
+ try (destruct (Val.mxcmplu_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b);
+ try (destruct (Val.cmp_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b);
+ try (destruct (Val.cmpl_bool _ _ _) eqn:EQCMP; try reflexivity; destruct b);
+ try (simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr);
+ destruct (label_pos _ _ _); try reflexivity; rewrite Pregmap.gss;
+ destruct Val.offset_ptr; try reflexivity; rewrite sr_update_overwrite;
+ simpl; Simpl sr; (destruct (PregEq.eq rr PC); subst;
+ [ rewrite sr_gss, Pregmap.gss; reflexivity |
+ rewrite !assign_diff, !Pregmap.gso; replace_pc; auto; apply ppos_discr; auto]) ].
+ - (* Ob *)
+ replace_pc. rewrite (H0 PC). simpl.
+ unfold goto_label, control_eval. destruct Ge.
+ unfold goto_label_deps. destruct (label_pos _ _ _); auto.
+ + unfold incrPC. rewrite Pregmap.gss; eauto. destruct (Val.offset_ptr _ _); auto;
+ try (rewrite sr_gss; unfold Stuck; reflexivity).
+ simpl. eexists; split; split.
+ * rewrite sr_update_overwrite. unfold pmem, assign in *. simpl. rewrite H; reflexivity.
+ * intros. rewrite sr_update_overwrite. unfold Pregmap.set, assign.
+ destruct r as [dr|cr|]; try destruct dr as [ir|fr]; try destruct ir as [irr|];
+ try destruct irr; try destruct fr; try destruct cr; simpl; try rewrite <- H0; eauto.
+ + rewrite sr_gss; reflexivity.
+ - (* Obc *)
+ replace_pc. rewrite (H0 PC). simpl.
+ unfold eval_branch, goto_label, control_eval. destruct Ge.
+ unfold goto_label_deps, cflags_eval, eval_testcond, list_exp_eval.
+ destruct c; simpl; unfold incrPC;
+ repeat (replace_ppos; replace_pc; replace_regs_pos sr; sr_val_rwrt; try rewrite !assign_diff; discriminate_ppos).
+ 1,2,3,4,5,6: destruct_res_flag rsr.
+ 7,8,9,10: do 2 (destruct_res_flag rsr).
+ 11,12 : do 3 (destruct_res_flag rsr).
+ 1,2,3,4,5,6,9,10: destruct (Int.eq _ _); [| simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr ];
+ destruct (label_pos _ _ _); [| reflexivity]; replace_pc;
+ rewrite !Pregmap.gss; destruct Val.offset_ptr;
+ try (unfold Stuck; reflexivity); Simpl_exists sr; intros rr;
+ apply reg_update_overwrite; eauto.
+ 1,3: destruct (andb); [| simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr ];
+ destruct (label_pos _ _ _); [| reflexivity]; replace_pc; rewrite !Pregmap.gss;
+ destruct Val.offset_ptr; try (unfold Stuck; reflexivity); Simpl_exists sr;
+ intros rr; apply reg_update_overwrite; eauto.
+ 1,2: destruct (orb); [| simpl; rewrite sr_update_overwrite; replace_pc; Simpl sr ];
+ destruct (label_pos _ _ _); [| reflexivity]; replace_pc; rewrite !Pregmap.gss;
+ destruct Val.offset_ptr; try (unfold Stuck; reflexivity); Simpl_exists sr;
+ intros rr; apply reg_update_overwrite; eauto.
+ - (* Obl *)
+ replace_pc. rewrite (H0 PC). simpl.
+ unfold control_eval. destruct Ge.
+ rewrite sr_gss. Simpl sr.
+ replace_pc; try replace (38) with (#X30) by eauto; unfold incrPC. Simpl_update.
+ repeat (intros; Simpl_update).
+ - (* Obs *)
+ unfold control_eval. destruct Ge. replace_pc. rewrite (H0 PC). simpl; unfold incrPC.
+ replace_pc; Simpl_exists sr; intros rr; apply reg_update_overwrite; eauto.
+ - (* Oblr *)
+ replace_pc. rewrite (H0 PC).
+ unfold control_eval. destruct Ge. simpl. unfold incrPC.
+ try (eexists; split; [ | split ]); eauto.
+ intros rr; destruct (PregEq.eq PC rr).
+ + replace_pc; rewrite e; rewrite !Pregmap.gss, !sr_gss;
+ rewrite Pregmap.gso; fold (ppos r); try rewrite H0; auto.
+ rewrite <- e; destruct r; discriminate.
+ + replace_pc; rewrite Pregmap.gso, assign_diff; auto; apply ppos_discr in n; auto;
+ rewrite Pregmap.gss, sr_gss. destruct (PregEq.eq X30 rr); replace 38 with (#X30) by auto.
+ * rewrite e; rewrite Pregmap.gss, sr_gss; auto.
+ * replace_pc; rewrite !Pregmap.gso, !assign_diff; auto; apply ppos_discr in n0; auto;
+ apply ppos_discr; eauto.
+ - (* Obtbl *)
+ replace_pc; rewrite (H0 PC);
+ unfold control_eval; destruct Ge; simpl; unfold incrPC.
+ destruct (PregEq.eq X16 r1).
+ + fold (ppos r1); rewrite <- e; rewrite Pregmap.gss, sr_gss; reflexivity.
+ + rewrite !Pregmap.gso; auto; rewrite ppos_discr in n;
+ fold (ppos r1); replace 24 with (#X16) by auto; try rewrite !assign_diff; auto;
+ discriminate_ppos; rewrite H0; destruct (rsr r1) eqn:EQR;
+ try (try rewrite EQR; reflexivity).
+ try destruct (list_nth_z _ _); try reflexivity;
+ unfold goto_label, goto_label_deps; destruct (label_pos _ _ _);
+ try rewrite 2Pregmap.gso, Pregmap.gss; destruct (Val.offset_ptr (rsr PC) (Ptrofs.repr sz));
+ try reflexivity; discriminate_ppos. Simpl sr.
+ destruct (PregEq.eq X16 rr); [ subst; Simpl_update |];
+ destruct (PregEq.eq PC rr); [ subst; Simpl_update |].
+ rewrite !Pregmap.gso; auto;
+ apply ppos_discr in n0; apply ppos_discr in n1;
+ rewrite !assign_diff; auto.
+Qed.
+
+Theorem bisimu_exit ex sz rsr mr sr:
+ match_states (State rsr mr) sr ->
+ (*is_builtin ex = false ->*)
+ match_outcome (estep Ge.(_genv) Ge.(_fn) ex (Ptrofs.repr sz) rsr mr) (inst_run Ge (trans_pcincr sz (trans_exit ex)) sr sr).
+Proof.
+ intros MS; unfold estep.
+ destruct ex.
+ - destruct c.
+ + exploit (bisimu_control i sz rsr mr sr); eauto.
+ + simpl. inv MS. eexists; split; [| split].
+ unfold control_eval; destruct Ge.
+ replace_pc; rewrite (H0 PC); eauto.
+ rewrite assign_diff; auto.
+ intros rr. unfold incrPC. destruct (PregEq.eq rr PC); subst.
+ * rewrite sr_gss, Pregmap.gss. reflexivity.
+ * rewrite assign_diff, Pregmap.gso; try rewrite H0;
+ auto; try rewrite ppos_discr in n; auto.
+ - simpl. inv MS. eexists; split; [| split].
+ + unfold control_eval; destruct Ge.
+ replace_pc; rewrite (H0 PC); eauto.
+ + rewrite assign_diff; auto.
+ + intros rr. unfold incrPC. destruct (PregEq.eq rr PC); subst.
+ * rewrite sr_gss, Pregmap.gss. reflexivity.
+ * rewrite assign_diff, Pregmap.gso; try rewrite H0; auto; try rewrite ppos_discr in n; auto.
+Qed.
+
+(* Definition trans_block_aux bdy sz ex := (trans_body bdy) ++ (trans_pcincr sz (trans_exit ex) :: nil). *)
+
+Theorem bisimu rsr mr sr bb:
+ match_states (State rsr mr) sr ->
+ match_outcome (bbstep Ge.(_lk) Ge.(_genv) Ge.(_fn) bb rsr mr) (exec Ge (trans_block bb) sr).
+Proof.
+ intros MS. unfold bbstep, trans_block.
+ exploit (bisimu_body (body bb) rsr mr sr); eauto.
+ destruct (exec_body _ _ _ _ _); simpl.
+ - unfold match_states in *. intros (s' & X1 & X2). destruct s.
+ erewrite run_app_Some; eauto.
+ exploit (bisimu_exit (exit bb) (size bb) _rs _m s'); eauto.
+ destruct Ge; simpl. destruct MS as (Y1 & Y2). destruct X2 as (X2 & X3).
+ replace_pc. erewrite !X3; simpl.
+ destruct (inst_run _ _ _ _); simpl; auto.
+ - intros X; erewrite run_app_None; eauto.
+Qed.
+
+Theorem trans_state_match: forall S, match_states S (trans_state S).
+Proof.
+ intros. destruct S as (rs & m). simpl.
+ split. reflexivity.
+ intro. destruct r as [dr|cr|]; try destruct dr as [ir|fr]; try destruct cr;
+ try destruct ir as [irr|]; try destruct irr; try destruct fr; try reflexivity.
+Qed.
+
+Lemma state_eq_decomp:
+ forall rs1 m1 rs2 m2, rs1 = rs2 -> m1 = m2 -> State rs1 m1 = State rs2 m2.
+Proof.
+ intros. congruence.
+Qed.
+
+Theorem state_equiv S1 S2 S': match_states S1 S' -> match_states S2 S' -> S1 = S2.
+Proof.
+ unfold match_states; intros H0 H1. destruct S1 as (rs1 & m1). destruct S2 as (rs2 & m2). inv H0. inv H1.
+ apply state_eq_decomp.
+ - apply functional_extensionality. intros. assert (Val (rs1 x) = Val (rs2 x)) by congruence. congruence.
+ - congruence.
+Qed.
+
+End SECT_SEQ.
+
+Section SECT_BBLOCK_EQUIV.
+
+Variable Ge: genv.
+
+Local Hint Resolve trans_state_match: core.
+
+Lemma bblock_simu_reduce_aux:
+ forall p1 p2,
+ L.bblock_simu Ge (trans_block p1) (trans_block p2) ->
+ Asmblockprops.bblock_simu_aux Ge.(_lk) Ge.(_genv) Ge.(_fn) p1 p2.
+Proof.
+ intros p1 p2 H0 rs m EBB.
+ generalize (H0 (trans_state (State rs m))); clear H0.
+ intro H0.
+ exploit (bisimu Ge rs m (trans_state (State rs m)) p1); eauto.
+ exploit (bisimu Ge rs m (trans_state (State rs m)) p2); eauto.
+ destruct (bbstep Ge.(_lk) Ge.(_genv) Ge.(_fn) p1 rs m); try (unfold Stuck in EBB; congruence).
+ intros H1 (s2' & exp2 & MS'). unfold exec in exp2, H1. rewrite exp2 in H0.
+ destruct H0 as (m2' & H0 & H2). discriminate. rewrite H0 in H1.
+ destruct (bbstep Ge.(_lk) Ge.(_genv) Ge.(_fn) p2 rs m); simpl in H1.
+ * unfold match_states in H1, MS'. destruct s, s0.
+ destruct H1 as (s' & H1 & H3 & H4). inv H1. inv MS'.
+ replace (_rs0) with (_rs).
+ - replace (_m0) with (_m); auto. congruence.
+ - apply functional_extensionality. intros r.
+ generalize (H1 r). intros Hr. congruence.
+ * discriminate.
+Qed.
+
+Lemma incrPC_set_res_commut res: forall d vres rs,
+ incrPC d (set_res (map_builtin_res DR res) vres rs) =
+ set_res (map_builtin_res DR res) vres (incrPC d rs).
+Proof.
+ induction res; simpl; auto.
+ - intros; apply functional_extensionality.
+ unfold incrPC; intros x0.
+ destruct (PregEq.eq x0 PC).
+ + subst; rewrite! Pregmap.gss; auto.
+ + rewrite Pregmap.gso; auto.
+ destruct (PregEq.eq x x0).
+ * subst; rewrite! Pregmap.gss; auto.
+ * rewrite !Pregmap.gso; auto.
+ - intros; rewrite IHres2. f_equal. auto.
+Qed.
+
+Lemma incrPC_undef_regs_commut l : forall d rs,
+ incrPC d (undef_regs l rs) = undef_regs l (incrPC d rs).
+Proof.
+ induction l; simpl; auto.
+ intros. rewrite IHl. unfold incrPC.
+ destruct (PregEq.eq a PC).
+ - rewrite e. rewrite Pregmap.gss.
+ simpl. apply f_equal. unfold Pregmap.set.
+ apply functional_extensionality. intros x.
+ destruct (PregEq.eq x PC); auto.
+ - rewrite Pregmap.gso; auto.
+ apply f_equal. unfold Pregmap.set.
+ apply functional_extensionality. intros x.
+ destruct (PregEq.eq x PC).
+ + subst. destruct a; simpl; auto. congruence.
+ + auto.
+Qed.
+
+Lemma bblock_simu_reduce:
+ forall p1 p2,
+ L.bblock_simu Ge (trans_block p1) (trans_block p2) ->
+ (has_builtin p1 = true \/ has_builtin p2 = true -> exit p1 = exit p2) ->
+ Asmblockprops.bblock_simu Ge.(_lk) Ge.(_genv) Ge.(_fn) p1 p2.
+Proof.
+ unfold bblock_simu. intros p1 p2 H0 BLT rs m EBB.
+ unfold exec_bblock.
+ generalize (bblock_simu_reduce_aux p1 p2 H0).
+ unfold bblock_simu_aux. clear H0.
+ unfold exec_bblock, bbstep. intros H0 m' t0 (rs1 & m1 & H1 & H2).
+ assert ((has_builtin p1 = false /\ has_builtin p2 = false) \/ (has_builtin p1 = true \/ has_builtin p2 = true)). { repeat destruct (has_builtin _); simpl; intuition. }
+ destruct H as [[X1 X2]|H].
+ - (* Not a builtin *)
+ exploit (H0); eauto; erewrite H1; simpl; (*gen*)
+ unfold estep; unfold has_builtin in *.
+ { destruct (exit p1) as [[]|] eqn: EQEX1; try discriminate; simpl in *.
+ inversion H2; subst. rewrite H3; discriminate. }
+ destruct (exit p1) as [[]|] eqn: EQEX1; try discriminate; simpl in *.
+ { inversion H2; subst. rewrite H3.
+ destruct (exec_body _ _ (body p2) _ _) as [[rs2 m2]|]; try discriminate.
+ intros H4. eexists; eexists; split; try reflexivity.
+ destruct (exit p2) as [[]|] eqn:EQEX2; simpl in *; try discriminate; try econstructor; eauto.
+ inversion H4. econstructor. }
+ { inversion H2; subst.
+ destruct (exec_body _ _ (body p2) _ _) as [[rs2 m2]|]; try discriminate.
+intros H4. eexists; eexists; split; try reflexivity.
+ destruct (exit p2) as [[]|] eqn:EQEX2; simpl in *; try discriminate; try econstructor; eauto.
+ inversion H4. rewrite H3. econstructor. }
+ - (* Builtin *)
+ exploit (BLT); eauto.
+ intros EXIT.
+ unfold has_builtin in H.
+ assert (is_builtin (exit p1) = true). { rewrite <- EXIT in H; intuition. }
+ clear H.
+ generalize (H0 rs m); eauto; erewrite H1; simpl;
+ unfold estep. destruct (exit p1) as [[]|] eqn:EQEX1; try discriminate.
+ rewrite <- EXIT.
+ intros CONTRA.
+ exploit (CONTRA); try discriminate.
+ destruct (exec_body _ _ (body p2) _ _) as [[rs2 m2]|]; try discriminate.
+ intros H4. eexists; eexists; split; try reflexivity.
+ inversion H2; subst. inversion H4; subst.
+ econstructor; eauto.
+ + unfold incrPC in H5.
+ replace (rs2 SP) with (rs1 SP).
+ replace (fun r : dreg => rs2 r) with (fun r : dreg => rs1 r); auto.
+ * apply functional_extensionality.
+ intros r; destruct (PregEq.eq r PC); try discriminate.
+ replace (rs1 r) with (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size p1))) r) by auto.
+ rewrite H5; rewrite Pregmap.gso; auto.
+ * replace (rs1 SP) with (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size p1))) SP) by auto.
+ rewrite H5; rewrite Pregmap.gso; auto; try discriminate.
+ + rewrite !incrPC_set_res_commut.
+ rewrite !incrPC_undef_regs_commut.
+ rewrite H5.
+ reflexivity.
+Qed.
+
+(** Used for debug traces *)
+
+Definition ireg_name (ir: ireg) : pstring :=
+ match ir with
+ | X0 => Str ("X0") | X1 => Str ("X1") | X2 => Str ("X2") | X3 => Str ("X3") | X4 => Str ("X4") | X5 => Str ("X5") | X6 => Str ("X6") | X7 => Str ("X7")
+ | X8 => Str ("X8") | X9 => Str ("X9") | X10 => Str ("X10") | X11 => Str ("X11") | X12 => Str ("X12") | X13 => Str ("X13") | X14 => Str ("X14") | X15 => Str ("X15")
+ | X16 => Str ("X16") | X17 => Str ("X17") | X18 => Str ("X18") | X19 => Str ("X19") | X20 => Str ("X20") | X21 => Str ("X21") | X22 => Str ("X22") | X23 => Str ("X23")
+ | X24 => Str ("X24") | X25 => Str ("X25") | X26 => Str ("X26") | X27 => Str ("X27") | X28 => Str ("X28") | X29 => Str ("X29") | X30 => Str ("X30")
+ end
+.
+
+Definition freg_name (fr: freg) : pstring :=
+ match fr with
+ | D0 => Str ("D0") | D1 => Str ("D1") | D2 => Str ("D2") | D3 => Str ("D3") | D4 => Str ("D4") | D5 => Str ("D5") | D6 => Str ("D6") | D7 => Str ("D7")
+ | D8 => Str ("D8") | D9 => Str ("D9") | D10 => Str ("D10") | D11 => Str ("D11") | D12 => Str ("D12") | D13 => Str ("D13") | D14 => Str ("D14") | D15 => Str ("D15")
+ | D16 => Str ("D16") | D17 => Str ("D17") | D18 => Str ("D18") | D19 => Str ("D19") | D20 => Str ("D20") | D21 => Str ("D21") | D22 => Str ("D22") | D23 => Str ("D23")
+ | D24 => Str ("D24") | D25 => Str ("D25") | D26 => Str ("D26") | D27 => Str ("D27") | D28 => Str ("D28") | D29 => Str ("D29") | D30 => Str ("D30") | D31 => Str ("D31")
+ end
+.
+
+Definition iregsp_name (irsp: iregsp) : pstring :=
+ match irsp with
+ | RR1 ir => ireg_name ir
+ | XSP => Str ("XSP")
+ end.
+
+Definition dreg_name (dr: dreg) : ?? pstring :=
+ match dr with
+ | IR ir => match ir with
+ | XSP => RET (Str ("XSP"))
+ | RR1 irr => RET (ireg_name irr)
+ end
+ | FR fr => RET (freg_name fr)
+ end.
+
+Definition string_of_name (x: P.R.t): ?? pstring :=
+ if (Pos.eqb x pmem) then
+ RET (Str "MEM")
+ else
+ match inv_ppos x with
+ | Some (CR cr) => match cr with
+ | CN => RET (Str ("CN"))
+ | CZ => RET (Str ("CZ"))
+ | CC => RET (Str ("CC"))
+ | CV => RET (Str ("CV"))
+ end
+ | Some (PC) => RET (Str ("PC"))
+ | Some (DR dr) => dreg_name dr
+ | _ => RET (Str ("UNDEFINED"))
+ end.
+
+Definition string_of_name_ArithP (n: arith_p) : pstring :=
+ match n with
+ | Padrp _ _ => "Padrp"
+ | Pmovz _ _ _ => "Pmov"
+ | Pmovn _ _ _ => "Pmov"
+ | Pfmovimms _ => "Pfmovimm"
+ | Pfmovimmd _ => "Pfmovimm"
+ end.
+
+Definition string_of_name_ArithPP (n: arith_pp) : pstring :=
+ match n with
+ | Pmov => "Pmov"
+ | Pmovk _ _ _ => "Pmovk"
+ | Paddadr _ _ => "Paddadr"
+ | Psbfiz _ _ _ => "Psbfiz"
+ | Psbfx _ _ _ => "Psbfx"
+ | Pubfiz _ _ _ => "Pubfiz"
+ | Pubfx _ _ _ => "Pubfx"
+ | Pfmov => "Pfmov"
+ | Pfcvtds => "Pfcvtds"
+ | Pfcvtsd => "Pfcvtsd"
+ | Pfabs _ => "Pfabs"
+ | Pfneg _ => "Pfneg"
+ | Pscvtf _ _ => "Pscvtf"
+ | Pucvtf _ _ => "Pucvtf"
+ | Pfcvtzs _ _ => "Pfcvtzs"
+ | Pfcvtzu _ _ => "Pfcvtzu"
+ | Paddimm _ _ => "Paddimm"
+ | Psubimm _ _ => "Psubimm"
+ end.
+
+Definition string_of_name_ArithPPP (n: arith_ppp) : pstring :=
+ match n with
+ | Pasrv _ => "Pasrv"
+ | Plslv _ => "Plslv"
+ | Plsrv _ => "Plsrv"
+ | Prorv _ => "Prorv"
+ | Psmulh => "Psmulh"
+ | Pumulh => "Pumulh"
+ | Psdiv _ => "Psdiv"
+ | Pudiv _ => "Pudiv"
+ | Paddext _ => "Paddext"
+ | Psubext _ => "Psubext"
+ | Pfadd _ => "Pfadd"
+ | Pfdiv _ => "Pfdiv"
+ | Pfmul _ => "Pfmul"
+ | Pfsub _ => "Pfsub"
+ end.
+
+Definition string_of_name_ArithRR0R (n: arith_rr0r) : pstring :=
+ match n with
+ | Padd _ _ => "ArithRR0R=>Padd"
+ | Psub _ _ => "ArithRR0R=>Psub"
+ | Pand _ _ => "ArithRR0R=>Pand"
+ | Pbic _ _ => "ArithRR0R=>Pbic"
+ | Peon _ _ => "ArithRR0R=>Peon"
+ | Peor _ _ => "ArithRR0R=>Peor"
+ | Porr _ _ => "ArithRR0R=>Porr"
+ | Porn _ _ => "ArithRR0R=>Porn"
+ end.
+
+Definition string_of_name_ArithRR0R_XZR (n: arith_rr0r) : pstring :=
+ match n with
+ | Padd _ _ => "ArithRR0R_XZR=>Padd"
+ | Psub _ _ => "ArithRR0R_XZR=>Psub"
+ | Pand _ _ => "ArithRR0R_XZR=>Pand"
+ | Pbic _ _ => "ArithRR0R_XZR=>Pbic"
+ | Peon _ _ => "ArithRR0R_XZR=>Peon"
+ | Peor _ _ => "ArithRR0R_XZR=>Peor"
+ | Porr _ _ => "ArithRR0R_XZR=>Porr"
+ | Porn _ _ => "ArithRR0R_XZR=>Porn"
+ end.
+
+Definition string_of_name_ArithRR0 (n: arith_rr0) : pstring :=
+ match n with
+ | Pandimm _ _ => "ArithRR0=>Pandimm"
+ | Peorimm _ _ => "ArithRR0=>Peorimm"
+ | Porrimm _ _ => "ArithRR0=>Porrimm"
+ end.
+
+Definition string_of_name_ArithRR0_XZR (n: arith_rr0) : pstring :=
+match n with
+ | Pandimm _ _ => "ArithRR0_XZR=>Pandimm"
+ | Peorimm _ _ => "ArithRR0_XZR=>Peorimm"
+ | Porrimm _ _ => "ArithRR0_XZR=>Porrimm"
+ end.
+
+Definition string_of_name_ArithARRRR0 (n: arith_arrrr0) : pstring :=
+ match n with
+ | Pmadd _ => "ArithARRRR0=>Pmadd"
+ | Pmsub _ => "ArithARRRR0=>Pmsub"
+ end.
+
+Definition string_of_name_ArithARRRR0_XZR (n: arith_arrrr0) : pstring :=
+ match n with
+ | Pmadd _ => "ArithARRRR0_XZR=>Pmadd"
+ | Pmsub _ => "ArithARRRR0_XZR=>Pmsub"
+ end.
+
+Definition string_of_name_ArithComparisonPP_CN (n: arith_comparison_pp) : pstring :=
+ match n with
+ | Pcmpext _ => "ArithComparisonPP_CN=>Pcmpext"
+ | Pcmnext _ => "ArithComparisonPP_CN=>Pcmnext"
+ | Pfcmp _ => "ArithComparisonPP_CN=>Pfcmp"
+ end.
+
+Definition string_of_name_ArithComparisonPP_CZ (n: arith_comparison_pp) : pstring :=
+ match n with
+ | Pcmpext _ => "ArithComparisonPP_CZ=>Pcmpext"
+ | Pcmnext _ => "ArithComparisonPP_CZ=>Pcmnext"
+ | Pfcmp _ => "ArithComparisonPP_CZ=>Pfcmp"
+ end.
+
+Definition string_of_name_ArithComparisonPP_CC (n: arith_comparison_pp) : pstring :=
+match n with
+ | Pcmpext _ => "ArithComparisonPP_CC=>Pcmpext"
+ | Pcmnext _ => "ArithComparisonPP_CC=>Pcmnext"
+ | Pfcmp _ => "ArithComparisonPP_CC=>Pfcmp"
+ end.
+
+Definition string_of_name_ArithComparisonPP_CV (n: arith_comparison_pp) : pstring :=
+match n with
+ | Pcmpext _ => "ArithComparisonPP_CV=>Pcmpext"
+ | Pcmnext _ => "ArithComparisonPP_CV=>Pcmnext"
+ | Pfcmp _ => "ArithComparisonPP_CV=>Pfcmp"
+ end.
+
+Definition string_of_name_ArithComparisonR0R_CN (n: arith_comparison_r0r) : pstring :=
+ match n with
+ | Pcmp _ _ => "ArithComparisonR0R_CN=>Pcmp"
+ | Pcmn _ _ => "ArithComparisonR0R_CN=>Pcmn"
+ | Ptst _ _ => "ArithComparisonR0R_CN=>Ptst"
+ end.
+
+Definition string_of_name_ArithComparisonR0R_CZ (n: arith_comparison_r0r) : pstring :=
+ match n with
+ | Pcmp _ _ => "ArithComparisonR0R_CZ=>Pcmp"
+ | Pcmn _ _ => "ArithComparisonR0R_CZ=>Pcmn"
+ | Ptst _ _ => "ArithComparisonR0R_CZ=>Ptst"
+ end.
+
+Definition string_of_name_ArithComparisonR0R_CC (n: arith_comparison_r0r) : pstring :=
+ match n with
+ | Pcmp _ _ => "ArithComparisonR0R_CC=>Pcmp"
+ | Pcmn _ _ => "ArithComparisonR0R_CC=>Pcmn"
+ | Ptst _ _ => "ArithComparisonR0R_CC=>Ptst"
+ end.
+
+Definition string_of_name_ArithComparisonR0R_CV (n: arith_comparison_r0r) : pstring :=
+ match n with
+ | Pcmp _ _ => "ArithComparisonR0R_CV=>Pcmp"
+ | Pcmn _ _ => "ArithComparisonR0R_CV=>Pcmn"
+ | Ptst _ _ => "ArithComparisonR0R_CV=>Ptst"
+ end.
+
+Definition string_of_name_ArithComparisonR0R_CN_XZR (n: arith_comparison_r0r) : pstring :=
+ match n with
+ | Pcmp _ _ => "ArithComparisonR0R_CN_XZR=>Pcmp"
+ | Pcmn _ _ => "ArithComparisonR0R_CN_XZR=>Pcmn"
+ | Ptst _ _ => "ArithComparisonR0R_CN_XZR=>Ptst"
+ end.
+
+Definition string_of_name_ArithComparisonR0R_CZ_XZR (n: arith_comparison_r0r) : pstring :=
+ match n with
+ | Pcmp _ _ => "ArithComparisonR0R_CZ_XZR=>Pcmp"
+ | Pcmn _ _ => "ArithComparisonR0R_CZ_XZR=>Pcmn"
+ | Ptst _ _ => "ArithComparisonR0R_CZ_XZR=>Ptst"
+ end.
+
+Definition string_of_name_ArithComparisonR0R_CC_XZR (n: arith_comparison_r0r) : pstring :=
+match n with
+ | Pcmp _ _ => "ArithComparisonR0R_CC_XZR=>Pcmp"
+ | Pcmn _ _ => "ArithComparisonR0R_CC_XZR=>Pcmn"
+ | Ptst _ _ => "ArithComparisonR0R_CC_XZR=>Ptst"
+ end.
+
+Definition string_of_name_ArithComparisonR0R_CV_XZR (n: arith_comparison_r0r) : pstring :=
+match n with
+ | Pcmp _ _ => "ArithComparisonR0R_CV_XZR=>Pcmp"
+ | Pcmn _ _ => "ArithComparisonR0R_CV_XZR=>Pcmn"
+ | Ptst _ _ => "ArithComparisonR0R_CV_XZR=>Ptst"
+ end.
+
+Definition string_of_name_ArithComparisonP_CN (n: arith_comparison_p) : pstring :=
+ match n with
+ | Pfcmp0 _ => "ArithComparisonP_CN=>Pfcmp0"
+ | Pcmpimm _ _ => "ArithComparisonP_CN=>Pcmpimm"
+ | Pcmnimm _ _ => "ArithComparisonP_CN=>Pcmnimm"
+ | Ptstimm _ _ => "ArithComparisonP_CN=>Ptstimm"
+ end.
+
+Definition string_of_name_ArithComparisonP_CZ (n: arith_comparison_p) : pstring :=
+ match n with
+ | Pfcmp0 _ => "ArithComparisonP_CZ=>Pfcmp0"
+ | Pcmpimm _ _ => "ArithComparisonP_CZ=>Pcmpimm"
+ | Pcmnimm _ _ => "ArithComparisonP_CZ=>Pcmnimm"
+ | Ptstimm _ _ => "ArithComparisonP_CZ=>Ptstimm"
+ end.
+
+Definition string_of_name_ArithComparisonP_CC (n: arith_comparison_p) : pstring :=
+ match n with
+ | Pfcmp0 _ => "ArithComparisonP_CC=>Pfcmp0"
+ | Pcmpimm _ _ => "ArithComparisonP_CC=>Pcmpimm"
+ | Pcmnimm _ _ => "ArithComparisonP_CC=>Pcmnimm"
+ | Ptstimm _ _ => "ArithComparisonP_CC=>Ptstimm"
+ end.
+
+Definition string_of_name_ArithComparisonP_CV (n: arith_comparison_p) : pstring :=
+ match n with
+ | Pfcmp0 _ => "ArithComparisonP_CV=>Pfcmp0"
+ | Pcmpimm _ _ => "ArithComparisonP_CV=>Pcmpimm"
+ | Pcmnimm _ _ => "ArithComparisonP_CV=>Pcmnimm"
+ | Ptstimm _ _ => "ArithComparisonP_CV=>Ptstimm"
+ end.
+
+Definition string_of_name_cset (c: testcond) : pstring :=
+ match c with
+ | TCeq => "Cset=>TCeq"
+ | TCne => "Cset=>TCne"
+ | TChs => "Cset=>TChs"
+ | TClo => "Cset=>TClo"
+ | TCmi => "Cset=>TCmi"
+ | TCpl => "Cset=>TCpl"
+ | TChi => "Cset=>TChi"
+ | TCls => "Cset=>TCls"
+ | TCge => "Cset=>TCge"
+ | TClt => "Cset=>TClt"
+ | TCgt => "Cset=>TCgt"
+ | TCle => "Cset=>TCle"
+ end.
+
+Definition string_of_arith (op: arith_op): pstring :=
+ match op with
+ | OArithP n => string_of_name_ArithP n
+ | OArithPP n => string_of_name_ArithPP n
+ | OArithPPP n => string_of_name_ArithPPP n
+ | OArithRR0R n => string_of_name_ArithRR0R n
+ | OArithRR0R_XZR n _ => string_of_name_ArithRR0R_XZR n
+ | OArithRR0 n => string_of_name_ArithRR0 n
+ | OArithRR0_XZR n _ => string_of_name_ArithRR0_XZR n
+ | OArithARRRR0 n => string_of_name_ArithARRRR0 n
+ | OArithARRRR0_XZR n _ => string_of_name_ArithARRRR0_XZR n
+ | OArithComparisonPP_CN n => string_of_name_ArithComparisonPP_CN n
+ | OArithComparisonPP_CZ n => string_of_name_ArithComparisonPP_CZ n
+ | OArithComparisonPP_CC n => string_of_name_ArithComparisonPP_CC n
+ | OArithComparisonPP_CV n => string_of_name_ArithComparisonPP_CV n
+ | OArithComparisonR0R_CN n _ => string_of_name_ArithComparisonR0R_CN n
+ | OArithComparisonR0R_CZ n _ => string_of_name_ArithComparisonR0R_CZ n
+ | OArithComparisonR0R_CC n _ => string_of_name_ArithComparisonR0R_CC n
+ | OArithComparisonR0R_CV n _ => string_of_name_ArithComparisonR0R_CV n
+ | OArithComparisonR0R_CN_XZR n _ _ => string_of_name_ArithComparisonR0R_CN_XZR n
+ | OArithComparisonR0R_CZ_XZR n _ _ => string_of_name_ArithComparisonR0R_CZ_XZR n
+ | OArithComparisonR0R_CC_XZR n _ _ => string_of_name_ArithComparisonR0R_CC_XZR n
+ | OArithComparisonR0R_CV_XZR n _ _ => string_of_name_ArithComparisonR0R_CV_XZR n
+ | OArithComparisonP_CN n => string_of_name_ArithComparisonP_CN n
+ | OArithComparisonP_CZ n => string_of_name_ArithComparisonP_CZ n
+ | OArithComparisonP_CC n => string_of_name_ArithComparisonP_CC n
+ | OArithComparisonP_CV n => string_of_name_ArithComparisonP_CV n
+ | Ocset c => string_of_name_cset c
+ | Ofmovi _ => "Ofmovi"
+ | Ofmovi_XZR _ => "Ofmovi_XZR"
+ | Ocsel _ => "Ocsel"
+ | Ofnmul _ => "Ofnmul"
+ end.
+
+Definition string_of_ofs (ofs: ptrofs) : ?? pstring :=
+ (string_of_Z (Ptrofs.signed ofs)).
+
+Definition string_of_int (n: int) : ?? pstring :=
+ (string_of_Z (Int.signed n)).
+
+Definition string_of_int64 (n: int64) : ?? pstring :=
+ (string_of_Z (Int64.signed n)).
+
+Notation "x +; y" := (Concat x y).
+
+Definition string_of_addressing (a: addressing) : ?? pstring :=
+ match a with
+ | ADimm base n =>
+ DO n' <~ string_of_int64 n;;
+ RET ((Str "[ADimm ") +; (iregsp_name base) +; " " +; n' +; "]")
+ | ADreg base r =>
+ RET ((Str "[ADreg ") +; (iregsp_name base) +; " " +; (ireg_name r) +; "]")
+ | ADlsl base r n =>
+ DO n' <~ string_of_int n;;
+ RET ((Str "[ADlsl ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]")
+ | ADsxt base r n =>
+ DO n' <~ string_of_int n;;
+ RET ((Str "[ADsxt ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]")
+ | ADuxt base r n =>
+ DO n' <~ string_of_int n;;
+ RET ((Str "[ADuxt ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]")
+ | ADadr base id ofs =>
+ DO id' <~ string_of_Z (Zpos id);;
+ DO ofs' <~ string_of_ofs ofs;;
+ RET ((Str "[ADadr ") +; (iregsp_name base) +; " " +; id' +; " " +; ofs' +; "]")
+ | ADpostincr base n =>
+ DO n' <~ string_of_int64 n;;
+ RET ((Str "[ADpostincr ") +; (iregsp_name base) +; " " +; n' +; "]")
+ end.
+
+Definition string_of_ld_rd_a (ld: load_rd_a) : pstring :=
+ match ld with
+ | Pldrw => Str "Pldrw"
+ | Pldrw_a => Str "Pldrw_a"
+ | Pldrx => Str "Pldrx"
+ | Pldrx_a => Str "Pldrx_a"
+ | Pldrb _ => Str "Pldrb"
+ | Pldrsb _ => Str "Pldrsb"
+ | Pldrh _ => Str "Pldrh"
+ | Pldrsh _ => Str "Pldrsh"
+ | Pldrzw => Str "Pldrzw"
+ | Pldrsw => Str "Pldrsw"
+ | Pldrs => Str "Pldrs"
+ | Pldrd => Str "Pldrd"
+ | Pldrd_a => Str "Pldrd_a"
+ end.
+
+Definition string_of_ldi (ldi: ld_instruction) : ?? pstring:=
+ match ldi with
+ | PLd_rd_a ld rd a =>
+ DO a' <~ string_of_addressing a;;
+ DO rd' <~ dreg_name rd;;
+ RET (Str "PLd_rd_a (" +; (string_of_ld_rd_a ld) +; " " +; rd' +; " " +; a' +; ")")
+ | Pldp _ _ _ _ _ a =>
+ DO a' <~ string_of_addressing a;;
+ RET (Str "Pldp (" +; a' +; ")")
+ end.
+
+Definition string_of_load (op: load_op) : ?? pstring :=
+ match op with
+ | Oload1 ld _ a =>
+ DO a' <~ string_of_addressing a;;
+ (*DO ld' <~ string_of_ldi ld;;*)
+ RET((Str "Oload1 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ")
+ | Oload2 ld _ a =>
+ DO a' <~ string_of_addressing a;;
+ (*DO ld' <~ string_of_ldi ld;;*)
+ RET((Str "Oload2 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ")
+ | OloadU _ _ _ => RET (Str "OloadU")
+ end.
+
+Definition string_of_st_rs_a (st: store_rs_a) : pstring :=
+ match st with
+ | Pstrw => Str "Pstrw"
+ | Pstrw_a => Str "Pstrw_a"
+ | Pstrx => Str "Pstrx"
+ | Pstrx_a => Str "Pstrx_a"
+ | Pstrb => Str "Pstrb"
+ | Pstrh => Str "Pstrh"
+ | Pstrs => Str "Pstrs"
+ | Pstrd => Str "Pstrd"
+ | Pstrd_a => Str "Pstrd_a"
+ end.
+
+Definition string_of_sti (sti: st_instruction) : ?? pstring:=
+ match sti with
+ | PSt_rs_a st rs a =>
+ DO a' <~ string_of_addressing a;;
+ DO rs' <~ dreg_name rs;;
+ RET (Str "PSt_rs_a (" +; (string_of_st_rs_a st) +; " " +; rs' +; " " +; a' +; ")")
+ | Pstp _ _ _ _ _ a =>
+ DO a' <~ string_of_addressing a;;
+ RET (Str "Pstp (" +; a' +; ")")
+ end.
+
+Definition string_of_store (op: store_op) : ?? pstring :=
+ match op with
+ | Ostore1 st _ a =>
+ DO a' <~ string_of_addressing a;;
+ (*DO st' <~ string_of_sti st;;*)
+ RET((Str "Ostore1 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ")
+ | Ostore2 st _ a =>
+ DO a' <~ string_of_addressing a;;
+ (*DO st' <~ string_of_sti st;;*)
+ RET((Str "Ostore2 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ")
+ | OstoreU _ _ _ => RET (Str "OstoreU")
+ end.
+
+Definition string_of_control (op: control_op) : pstring :=
+ match op with
+ | Ob _ => "Ob"
+ | Obc _ _ => "Obc"
+ | Obl _ => "Obl"
+ | Obs _ => "Obs"
+ | Ocbnz _ _ => "Ocbnz"
+ | Ocbz _ _ => "Ocbz"
+ | Otbnz _ _ _ => "Otbnz"
+ | Otbz _ _ _ => "Otbz"
+ | Obtbl _ => "Obtbl"
+ | OIncremPC _ => "OIncremPC"
+ | OError => "OError"
+ end.
+
+Definition string_of_allocf (op: allocf_op) : pstring :=
+ match op with
+ | OAllocf_SP _ _ => "OAllocf_SP"
+ | OAllocf_Mem _ _ => "OAllocf_Mem"
+ end.
+
+Definition string_of_freef (op: freef_op) : pstring :=
+ match op with
+ | OFreef_SP _ _ => "OFreef_SP"
+ | OFreef_Mem _ _ => "OFreef_Mem"
+ end.
+
+Definition string_of_op (op: P.op): ?? pstring :=
+ match op with
+ | Arith op => RET (string_of_arith op)
+ | Load op => string_of_load op
+ | Store op => string_of_store op
+ | Control op => RET (string_of_control op)
+ | Allocframe op => RET (string_of_allocf op)
+ | Freeframe op => RET (string_of_freef op)
+ | Loadsymbol _ => RET (Str "Loadsymbol")
+ | Constant _ => RET (Str "Constant")
+ | Cvtsw2x => RET (Str "Cvtsw2x")
+ | Cvtuw2x => RET (Str "Cvtuw2x")
+ | Cvtx2w => RET (Str "Cvtx2w")
+ (*| Fail => RET (Str "Fail")*)
+ end.
+End SECT_BBLOCK_EQUIV.
+
+(** REWRITE RULES *)
+
+Definition is_constant (o: op): bool :=
+ match o with
+ | OArithP _ | OArithRR0_XZR _ _ | Ofmovi_XZR _ | Loadsymbol _ | Constant _ | Obl _ | Obs _ => true
+ | _ => false
+ end.
+
+Lemma is_constant_correct ge o: is_constant o = true -> op_eval ge o [] <> None.
+Proof.
+ destruct o; simpl in * |- *; try congruence.
+ destruct op0; simpl in * |- *; try congruence.
+ destruct co; simpl in * |- *; try congruence;
+ unfold control_eval; destruct ge; simpl in * |- *; try congruence.
+Qed.
+
+Definition main_reduce (t: Terms.term):= RET (Terms.nofail is_constant t).
+
+Local Hint Resolve is_constant_correct: wlp.
+
+Lemma main_reduce_correct t:
+ WHEN main_reduce t ~> pt THEN Terms.match_pt t pt.
+Proof.
+ wlp_simplify.
+Qed.
+
+Definition reduce := {| Terms.result := main_reduce; Terms.result_correct := main_reduce_correct |}.
+
+Definition bblock_simu_test (verb: bool) (p1 p2: Asmblock.bblock) : ?? bool :=
+ assert_same_builtin p1 p2;;
+ if verb then
+ IST.verb_bblock_simu_test reduce string_of_name string_of_op (trans_block p1) (trans_block p2)
+ else
+ IST.bblock_simu_test reduce (trans_block p1) (trans_block p2).
+
+Local Hint Resolve IST.bblock_simu_test_correct IST.verb_bblock_simu_test_correct: wlp.
+
+(** Main simulation (Impure) theorem *)
+Theorem bblock_simu_test_correct verb p1 p2 :
+ WHEN bblock_simu_test verb p1 p2 ~> b THEN b=true -> forall ge fn lk, Asmblockprops.bblock_simu lk ge fn p1 p2.
+Proof.
+ wlp_simplify; eapply bblock_simu_reduce with (Ge:={| _genv := ge; _fn := fn; _lk := lk |}); eauto;
+ intros; destruct H; auto.
+Qed.
+
+Hint Resolve bblock_simu_test_correct: wlp.
+
+(** ** Coerce bblock_simu_test into a pure function (this is a little unsafe like all oracles in CompCert). *)
+
+Import UnsafeImpure.
+
+Definition pure_bblock_simu_test (verb: bool) (p1 p2: Asmblock.bblock): bool :=
+ match unsafe_coerce (bblock_simu_test verb p1 p2) with
+ | Some b => b
+ | None => false
+ end.
+
+Theorem pure_bblock_simu_test_correct verb p1 p2 lk ge fn: pure_bblock_simu_test verb p1 p2 = true -> Asmblockprops.bblock_simu lk ge fn p1 p2.
+Proof.
+ unfold pure_bblock_simu_test.
+ destruct (unsafe_coerce (bblock_simu_test verb p1 p2)) eqn: UNSAFE; try discriminate.
+ intros; subst. eapply bblock_simu_test_correct; eauto.
+ apply unsafe_coerce_not_really_correct; eauto.
+Qed.
+
+Definition bblock_simub: Asmblock.bblock -> Asmblock.bblock -> bool := pure_bblock_simu_test true.
+
+Lemma bblock_simub_correct p1 p2 lk ge fn: bblock_simub p1 p2 = true -> Asmblockprops.bblock_simu lk ge fn p1 p2.
+Proof.
+ eapply (pure_bblock_simu_test_correct true).
+Qed.
diff --git a/aarch64/Asmblockgen.v b/aarch64/Asmblockgen.v
new file mode 100644
index 00000000..acb5a1e6
--- /dev/null
+++ b/aarch64/Asmblockgen.v
@@ -0,0 +1,1252 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * Translation from Machblock to AArch64 assembly block language (Asmblock)
+ Inspired from the Mach->Asm pass of original Leroy's backend, but adapted to the block structure like the KVX backend. *)
+
+Require Import Recdef Coqlib Zwf Zbits.
+Require Import Errors AST Integers Floats Op.
+Require Import Locations Machblock Asm Asmblock.
+
+Local Open Scope string_scope.
+Local Open Scope list_scope.
+Local Open Scope error_monad_scope.
+
+(** Extracting integer or float registers. *)
+
+Definition ireg_of (r: mreg) : res ireg :=
+ match preg_of r with
+ | IR irs => match irs with
+ | RR1 mr => OK mr
+ | _ => Error(msg "Asmgenblock.ireg_of")
+ end
+ | _ => Error(msg "Asmgenblock.iregsp_of")
+ end.
+
+Definition freg_of (r: mreg) : res freg :=
+ match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end.
+
+(** Recognition of immediate arguments for logical integer operations.*)
+
+(** Valid immediate arguments are repetitions of a bit pattern [B]
+ of length [e] = 2, 4, 8, 16, 32 or 64.
+ The bit pattern [B] must be of the form [0*1*0*] or [1*0*1*]
+ but must not be all zeros or all ones. *)
+
+(** The following automaton recognizes [0*1*0*|1*0*1*].
+<<
+ 0 1 0
+ / \ / \ / \
+ \ / \ / \ /
+ -0--> [B] --1--> [D] --0--> [F]
+ /
+ [A]
+ \
+ -1--> [C] --0--> [E] --1--> [G]
+ / \ / \ / \
+ \ / \ / \ /
+ 1 0 1
+>>
+*)
+
+Module Automaton.
+
+Inductive state : Type := SA | SB | SC | SD | SE | SF | SG | Sbad.
+
+Definition start := SA.
+
+Definition next (s: state) (b: bool) :=
+ match s, b with
+ | SA,false => SB | SA,true => SC
+ | SB,false => SB | SB,true => SD
+ | SC,false => SE | SC,true => SC
+ | SD,false => SF | SD,true => SD
+ | SE,false => SE | SE,true => SG
+ | SF,false => SF | SF,true => Sbad
+ | SG,false => Sbad | SG,true => SG
+ | Sbad,_ => Sbad
+ end.
+
+Definition accepting (s: state) :=
+ match s with
+ | SA | SB | SC | SD | SE | SF | SG => true
+ | Sbad => false
+ end.
+
+Fixpoint run (len: nat) (s: state) (x: Z) : bool :=
+ match len with
+ | Datatypes.O => accepting s
+ | Datatypes.S len => run len (next s (Z.odd x)) (Z.div2 x)
+ end.
+
+End Automaton.
+
+(** The following function determines the candidate length [e],
+ ensuring that [x] is a repetition [BB...B]
+ of a bit pattern [B] of length [e]. *)
+
+Definition logical_imm_length (x: Z) (sixtyfour: bool) : nat :=
+ (** [test n] checks that the low [2n] bits of [x] are of the
+ form [BB], that is, two occurrences of the same [n] bits *)
+ let test (n: Z) : bool :=
+ Z.eqb (Zzero_ext n x) (Zzero_ext n (Z.shiftr x n)) in
+ (** If [test n] fails, we know that the candidate length [e] is
+ at least [2n]. Hence we test with decreasing values of [n]:
+ 32, 16, 8, 4, 2. *)
+ if sixtyfour && negb (test 32) then 64%nat
+ else if negb (test 16) then 32%nat
+ else if negb (test 8) then 16%nat
+ else if negb (test 4) then 8%nat
+ else if negb (test 2) then 4%nat
+ else 2%nat.
+
+(** A valid logical immediate is
+- neither [0] nor [-1];
+- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e]
+- the low [e] bits of the number, that is, [B], match [0*1*0*] or [1*0*1*].
+*)
+
+Definition is_logical_imm32 (x: int) : bool :=
+ negb (Int.eq x Int.zero) && negb (Int.eq x Int.mone) &&
+ Automaton.run (logical_imm_length (Int.unsigned x) false)
+ Automaton.start (Int.unsigned x).
+
+Definition is_logical_imm64 (x: int64) : bool :=
+ negb (Int64.eq x Int64.zero) && negb (Int64.eq x Int64.mone) &&
+ Automaton.run (logical_imm_length (Int64.unsigned x) true)
+ Automaton.start (Int64.unsigned x).
+
+(** Arithmetic immediates are 12-bit unsigned numbers, possibly shifted left 12 bits *)
+
+Definition is_arith_imm32 (x: int) : bool :=
+ Int.eq x (Int.zero_ext 12 x)
+ || Int.eq x (Int.shl (Int.zero_ext 12 (Int.shru x (Int.repr 12))) (Int.repr 12)).
+
+Definition is_arith_imm64 (x: int64) : bool :=
+ Int64.eq x (Int64.zero_ext 12 x)
+ || Int64.eq x (Int64.shl (Int64.zero_ext 12 (Int64.shru x (Int64.repr 12))) (Int64.repr 12)).
+
+Definition bcode := list basic.
+
+Program Definition single_basic (bi: basic): bblock :=
+ {| header := nil; body:= bi::nil; exit := None |}.
+
+(* insert [bi] at the head of [k] *)
+Program Definition insert_basic (bi: basic) (k:bblocks): bblocks :=
+ match k with
+ | bb::k' =>
+ match bb.(header) with
+ | nil => {| header := nil; body := bi :: (body bb); exit := exit bb |}::k'
+ | _ => (single_basic bi)::k
+ end
+ | _ => (single_basic bi)::k
+ end.
+
+Notation "bi ::b k" := (insert_basic bi k) (at level 49, right associativity).
+
+(* NB: this notation helps the Coq typechecker to infer coercion [PArith] in [bcode] expressions *)
+(** Alignment check for symbols *)
+Notation "i ::bi k" := (cons (A:=basic) i k) (at level 49, right associativity).
+Notation "a @@ b" := (app a b) (at level 49, right associativity).
+
+(* The pop_bc and push_bc functions are used to adapt the output of some definitions
+ in bblocks format and avoid some redefinitions. *)
+
+(* pop_bc takes the body of the first bblock in the list if it does not have a header. *)
+Definition pop_bc (k: bblocks): bcode :=
+ match k with
+ | bb :: k' => match bb.(header) with
+ | nil => (body bb)
+ | _ => nil
+ end
+ | _ => nil
+ end.
+
+(* push_bc tries to overwrite code in the first bblock if it does not have a header,
+ otherwise, a new bblock is created and appended to the list. *)
+Program Definition push_bc (bc: bcode) (k:bblocks): bblocks :=
+ match bc with
+ | bi :: bc' => match k with
+ | bb :: k' => match bb.(header) with
+ | nil => {| header := nil; body := bc; exit := exit bb |} :: k'
+ | _ => {| header := nil; body := bc; exit := None |} :: k
+ end
+ | _ => {| header := nil; body := bc; exit := None |} :: nil
+ end
+ | nil => k
+ end.
+Next Obligation.
+ simpl; auto.
+Qed.
+Next Obligation.
+ simpl; auto.
+Qed.
+Next Obligation.
+ simpl; auto.
+Qed.
+
+Parameter symbol_is_aligned : ident -> Z -> bool.
+(** [symbol_is_aligned id sz] checks whether the symbol [id] is [sz] aligned *)
+
+(***************************************************************************************)
+
+(** Decompose integer literals into 16-bit fragments *)
+
+Fixpoint decompose_int (N: nat) (n p: Z) {struct N} : list (Z * Z) :=
+ match N with
+ | Datatypes.O => nil
+ | Datatypes.S N =>
+ let frag := Zzero_ext 16 (Z.shiftr n p) in
+ if Z.eqb frag 0 then
+ decompose_int N n (p + 16)
+ else
+ (frag, p) :: decompose_int N (Z.ldiff n (Z.shiftl 65535 p)) (p + 16)
+ end.
+
+Definition negate_decomposition (l: list (Z * Z)) :=
+ List.map (fun np => (Z.lxor (fst np) 65535, snd np)) l.
+
+Definition loadimm_k (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: bcode) : bcode :=
+ List.fold_right (fun np k => Pmovk sz (fst np) (snd np) rd rd ::bi k) k l.
+
+Definition loadimm_z (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: bcode) : bcode :=
+ match l with
+ | nil => Pmovz sz 0 0 rd ::bi k
+ | (n1, p1) :: l => (Pmovz sz n1 p1 rd) ::bi loadimm_k sz rd l k
+ end.
+
+Definition loadimm_n (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: bcode) : bcode :=
+ match l with
+ | nil => Pmovn sz 0 0 rd ::bi k
+ | (n1, p1) :: l => Pmovn sz n1 p1 rd ::bi loadimm_k sz rd (negate_decomposition l) k
+ end.
+
+Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: bcode) : bcode :=
+ let N := match sz with W => 2%nat | X => 4%nat end in
+ let dz := decompose_int N n 0 in
+ let dn := decompose_int N (Z.lnot n) 0 in
+ if Nat.leb (List.length dz) (List.length dn)
+ then loadimm_z sz rd dz k
+ else loadimm_n sz rd dn k.
+
+Definition loadimm32 (rd: ireg) (n: int) (k: bcode) : bcode :=
+ if is_logical_imm32 n
+ then Porrimm W (Int.unsigned n) rd XZR ::bi k
+ else loadimm W rd (Int.unsigned n) k.
+
+Definition loadimm64 (rd: ireg) (n: int64) (k: bcode) : bcode :=
+ if is_logical_imm64 n
+ then Porrimm X (Int64.unsigned n) rd XZR ::bi k
+ else loadimm X rd (Int64.unsigned n) k.
+
+Definition offset_representable (sz: Z) (ofs: int64) : bool :=
+ let isz := Int64.repr sz in
+ (** either unscaled 9-bit signed *)
+ Int64.eq ofs (Int64.sign_ext 9 ofs) ||
+ (** or scaled 12-bit unsigned *)
+ (Int64.eq (Int64.modu ofs isz) Int64.zero
+ && Int64.ltu ofs (Int64.shl isz (Int64.repr 12))).
+
+Definition indexed_memory_access_bc (insn: addressing -> basic)
+ (sz: Z) (base: iregsp) (ofs: ptrofs) (k: bcode) : bcode :=
+ let ofs := Ptrofs.to_int64 ofs in
+ if offset_representable sz ofs
+ then insn (ADimm base ofs) :: k
+ else loadimm64 X16 ofs (insn (ADreg base X16) :: k).
+
+Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: bcode) :=
+ match ty, preg_of dst with
+ | Tint, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrw rd) 4 base ofs k)
+ | Tlong, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrx rd) 8 base ofs k)
+ | Tsingle, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrs rd) 4 base ofs k)
+ | Tfloat, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrd rd) 8 base ofs k)
+ | Tany32, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrw_a rd) 4 base ofs k)
+ | Tany64, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrx_a rd) 8 base ofs k)
+ | Tany64, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrd_a rd) 8 base ofs k)
+ | _, _ => Error (msg "Asmgen.loadind")
+ end.
+
+Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: bcode) :=
+ match ty, preg_of src with
+ | Tint, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrw rd) 4 base ofs k)
+ | Tlong, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrx rd) 8 base ofs k)
+ | Tsingle, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrs rd) 4 base ofs k)
+ | Tfloat, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrd rd) 8 base ofs k)
+ | Tany32, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrw_a rd) 4 base ofs k)
+ | Tany64, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrx_a rd) 8 base ofs k)
+ | Tany64, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrd_a rd) 8 base ofs k)
+ | _, _ => Error (msg "Asmgen.storeind")
+ end.
+
+Definition loadptr_bc (base: iregsp) (ofs: ptrofs) (dst: ireg) (k: bcode): bcode :=
+ indexed_memory_access_bc (PLd_rd_a Pldrx dst) 8 base ofs k.
+
+Definition storeptr_bc (src: ireg) (base: iregsp) (ofs: ptrofs) (k: bcode): bcode :=
+ indexed_memory_access_bc (PSt_rs_a Pstrx src) 8 base ofs k.
+
+(** Function epilogue *)
+
+Definition make_epilogue (f: Machblock.function) : bcode :=
+ loadptr_bc XSP f.(fn_retaddr_ofs) RA
+ (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs)::nil).
+
+(** Add immediate *)
+
+Definition addimm_aux (insn: Z -> arith_pp)
+ (rd r1: iregsp) (n: Z) (k: bcode) :=
+ let nlo := Zzero_ext 12 n in
+ let nhi := n - nlo in
+ if Z.eqb nhi 0 then
+ insn nlo rd r1 ::bi k
+ else if Z.eqb nlo 0 then
+ insn nhi rd r1 ::bi k
+ else
+ insn nhi rd r1 ::bi insn nlo rd rd ::bi k.
+
+Definition addimm32 (rd r1: ireg) (n: int) (k: bcode) : bcode :=
+ let m := Int.neg n in
+ if Int.eq n (Int.zero_ext 24 n) then
+ addimm_aux (Paddimm W) rd r1 (Int.unsigned n) k
+ else if Int.eq m (Int.zero_ext 24 m) then
+ addimm_aux (Psubimm W) rd r1 (Int.unsigned m) k
+ else if Int.lt n Int.zero then
+ loadimm32 X16 m (Psub W SOnone rd r1 X16 ::bi k)
+ else
+ loadimm32 X16 n (Padd W SOnone rd r1 X16 ::bi k).
+
+Definition addimm64 (rd r1: iregsp) (n: int64) (k: bcode) : bcode :=
+ let m := Int64.neg n in
+ if Int64.eq n (Int64.zero_ext 24 n) then
+ addimm_aux (Paddimm X) rd r1 (Int64.unsigned n) k
+ else if Int64.eq m (Int64.zero_ext 24 m) then
+ addimm_aux (Psubimm X) rd r1 (Int64.unsigned m) k
+ else if Int64.lt n Int64.zero then
+ loadimm64 X16 m (Psubext (EOuxtx Int.zero) rd r1 X16 ::bi k)
+ else
+ loadimm64 X16 n (Paddext (EOuxtx Int.zero) rd r1 X16 ::bi k).
+
+(** Logical immediate *)
+
+Definition logicalimm32
+ (insn1: Z -> arith_rr0)
+ (insn2: shift_op -> arith_rr0r)
+ (rd r1: ireg) (n: int) (k: bcode) : bcode :=
+ if is_logical_imm32 n
+ then insn1 (Int.unsigned n) rd r1 ::bi k
+ else loadimm32 X16 n (insn2 SOnone rd r1 X16 ::bi k).
+
+Definition logicalimm64
+ (insn1: Z -> arith_rr0)
+ (insn2: shift_op -> arith_rr0r)
+ (rd r1: ireg) (n: int64) (k: bcode) : bcode :=
+ if is_logical_imm64 n
+ then insn1 (Int64.unsigned n) rd r1 ::bi k
+ else loadimm64 X16 n (insn2 SOnone rd r1 X16 ::bi k).
+
+(** Sign- or zero-extended arithmetic *)
+
+Definition transl_extension (ex: extension) (a: int) : extend_op :=
+ match ex with Xsgn32 => EOsxtw a | Xuns32 => EOuxtw a end.
+
+Definition move_extended_base
+ (rd: ireg) (r1: ireg) (ex: extension) (k: bcode) : bcode :=
+ match ex with
+ | Xsgn32 => Pcvtsw2x rd r1 ::bi k
+ | Xuns32 => Pcvtuw2x rd r1 ::bi k
+ end.
+
+Definition move_extended
+ (rd: ireg) (r1: ireg) (ex: extension) (a: int) (k: bcode) : bcode :=
+ if Int.eq a Int.zero then
+ move_extended_base rd r1 ex k
+ else
+ move_extended_base rd r1 ex (Padd X (SOlsl a) rd XZR rd ::bi k).
+
+Definition arith_extended
+ (insnX: extend_op -> arith_ppp)
+ (insnS: shift_op -> arith_rr0r)
+ (rd r1 r2: ireg) (ex: extension) (a: int) (k: bcode) : bcode :=
+ if Int.ltu a (Int.repr 5) then
+ insnX (transl_extension ex a) rd r1 r2 ::bi k
+ else
+ move_extended_base X16 r2 ex (insnS (SOlsl a) rd r1 X16 ::bi k).
+
+(** Extended right shift *)
+
+Definition shrx32 (rd r1: ireg) (n: int) (k: bcode) : bcode :=
+ if Int.eq n Int.zero then
+ Pmov rd r1 ::bi k
+ else
+ Porr W (SOasr (Int.repr 31)) X16 XZR r1 ::bi
+ Padd W (SOlsr (Int.sub Int.iwordsize n)) X16 r1 X16 ::bi
+ Porr W (SOasr n) rd XZR X16 ::bi k.
+
+Definition shrx64 (rd r1: ireg) (n: int) (k: bcode) : bcode :=
+ if Int.eq n Int.zero then
+ Pmov rd r1 ::bi k
+ else
+ Porr X (SOasr (Int.repr 63)) X16 XZR r1 ::bi
+ Padd X (SOlsr (Int.sub Int64.iwordsize' n)) X16 r1 X16 ::bi
+ Porr X (SOasr n) rd XZR X16 ::bi k.
+
+(** Load the address [id + ofs] in [rd] *)
+
+Definition loadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (k: bcode) : bcode :=
+ if Archi.pic_code tt then
+ if Ptrofs.eq ofs Ptrofs.zero then
+ Ploadsymbol rd id ::bi k
+ else
+ Ploadsymbol rd id :: addimm64 rd rd (Ptrofs.to_int64 ofs) k
+ else
+ Padrp id ofs rd ::bi Paddadr id ofs rd rd ::bi k.
+
+(** Translate a shifted operand *)
+
+Definition transl_shift (s: Op.shift) (a: int): Asm.shift_op :=
+ match s with
+ | Slsl => SOlsl a
+ | Slsr => SOlsr a
+ | Sasr => SOasr a
+ | Sror => SOror a
+ end.
+
+(** Translation of a condition. Prepends to [k] the instructions
+ that evaluate the condition and leave its boolean result in one of
+ the bits of the condition register. The bit in question is
+ determined by the [crbit_for_cond] function. *)
+
+Definition transl_cond
+ (cond: condition) (args: list mreg) (k: bcode) :=
+ match cond, args with
+ | (Ccomp c | Ccompu c), a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pcmp W SOnone r1 r2 ::bi k)
+ | (Ccompshift c s a | Ccompushift c s a), a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pcmp W (transl_shift s a) r1 r2 ::bi k)
+ | (Ccompimm c n | Ccompuimm c n), a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (if is_arith_imm32 n then
+ Pcmpimm W (Int.unsigned n) r1 ::bi k
+ else if is_arith_imm32 (Int.neg n) then
+ Pcmnimm W (Int.unsigned (Int.neg n)) r1 ::bi k
+ else
+ loadimm32 X16 n (Pcmp W SOnone r1 X16 ::bi k))
+ | (Cmaskzero n | Cmasknotzero n), a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (if is_logical_imm32 n then
+ Ptstimm W (Int.unsigned n) r1 ::bi k
+ else
+ loadimm32 X16 n (Ptst W SOnone r1 X16 ::bi k))
+ | (Ccompl c | Ccomplu c), a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pcmp X SOnone r1 r2 ::bi k)
+ | (Ccomplshift c s a | Ccomplushift c s a), a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pcmp X (transl_shift s a) r1 r2 ::bi k)
+ | (Ccomplimm c n | Ccompluimm c n), a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (if is_arith_imm64 n then
+ Pcmpimm X (Int64.unsigned n) r1 ::bi k
+ else if is_arith_imm64 (Int64.neg n) then
+ Pcmnimm X (Int64.unsigned (Int64.neg n)) r1 ::bi k
+ else
+ loadimm64 X16 n (Pcmp X SOnone r1 X16 ::bi k))
+ | (Cmasklzero n | Cmasklnotzero n), a1 :: nil =>
+ do r1 <- ireg_of a1;
+ OK (if is_logical_imm64 n then
+ Ptstimm X (Int64.unsigned n) r1 ::bi k
+ else
+ loadimm64 X16 n (Ptst X SOnone r1 X16 ::bi k))
+ | Ccompf cmp, a1 :: a2 :: nil =>
+ do r1 <- freg_of a1; do r2 <- freg_of a2;
+ OK (Pfcmp D r1 r2 ::bi k)
+ | Cnotcompf cmp, a1 :: a2 :: nil =>
+ do r1 <- freg_of a1; do r2 <- freg_of a2;
+ OK (Pfcmp D r1 r2 ::bi k)
+ | Ccompfzero cmp, a1 :: nil =>
+ do r1 <- freg_of a1;
+ OK (Pfcmp0 D r1 ::bi k)
+ | Cnotcompfzero cmp, a1 :: nil =>
+ do r1 <- freg_of a1;
+ OK (Pfcmp0 D r1 ::bi k)
+ | Ccompfs cmp, a1 :: a2 :: nil =>
+ do r1 <- freg_of a1; do r2 <- freg_of a2;
+ OK (Pfcmp S r1 r2 ::bi k)
+ | Cnotcompfs cmp, a1 :: a2 :: nil =>
+ do r1 <- freg_of a1; do r2 <- freg_of a2;
+ OK (Pfcmp S r1 r2 ::bi k)
+ | Ccompfszero cmp, a1 :: nil =>
+ do r1 <- freg_of a1;
+ OK (Pfcmp0 S r1 ::bi k)
+ | Cnotcompfszero cmp, a1 :: nil =>
+ do r1 <- freg_of a1;
+ OK (Pfcmp0 S r1 ::bi k)
+ | _, _ =>
+ Error(msg "Asmgenblock.transl_cond")
+ end.
+
+Definition cond_for_signed_cmp (cmp: comparison) :=
+ match cmp with
+ | Ceq => TCeq
+ | Cne => TCne
+ | Clt => TClt
+ | Cle => TCle
+ | Cgt => TCgt
+ | Cge => TCge
+ end.
+
+Definition cond_for_unsigned_cmp (cmp: comparison) :=
+ match cmp with
+ | Ceq => TCeq
+ | Cne => TCne
+ | Clt => TClo
+ | Cle => TCls
+ | Cgt => TChi
+ | Cge => TChs
+ end.
+
+Definition cond_for_float_cmp (cmp: comparison) :=
+ match cmp with
+ | Ceq => TCeq
+ | Cne => TCne
+ | Clt => TCmi
+ | Cle => TCls
+ | Cgt => TCgt
+ | Cge => TCge
+ end.
+
+Definition cond_for_float_not_cmp (cmp: comparison) :=
+ match cmp with
+ | Ceq => TCne
+ | Cne => TCeq
+ | Clt => TCpl
+ | Cle => TChi
+ | Cgt => TCle
+ | Cge => TClt
+ end.
+
+Definition cond_for_cond (cond: condition) :=
+ match cond with
+ | Ccomp cmp => cond_for_signed_cmp cmp
+ | Ccompu cmp => cond_for_unsigned_cmp cmp
+ | Ccompshift cmp s a => cond_for_signed_cmp cmp
+ | Ccompushift cmp s a => cond_for_unsigned_cmp cmp
+ | Ccompimm cmp n => cond_for_signed_cmp cmp
+ | Ccompuimm cmp n => cond_for_unsigned_cmp cmp
+ | Cmaskzero n => TCeq
+ | Cmasknotzero n => TCne
+ | Ccompl cmp => cond_for_signed_cmp cmp
+ | Ccomplu cmp => cond_for_unsigned_cmp cmp
+ | Ccomplshift cmp s a => cond_for_signed_cmp cmp
+ | Ccomplushift cmp s a => cond_for_unsigned_cmp cmp
+ | Ccomplimm cmp n => cond_for_signed_cmp cmp
+ | Ccompluimm cmp n => cond_for_unsigned_cmp cmp
+ | Cmasklzero n => TCeq
+ | Cmasklnotzero n => TCne
+ | Ccompf cmp => cond_for_float_cmp cmp
+ | Cnotcompf cmp => cond_for_float_not_cmp cmp
+ | Ccompfzero cmp => cond_for_float_cmp cmp
+ | Cnotcompfzero cmp => cond_for_float_not_cmp cmp
+ | Ccompfs cmp => cond_for_float_cmp cmp
+ | Cnotcompfs cmp => cond_for_float_not_cmp cmp
+ | Ccompfszero cmp => cond_for_float_cmp cmp
+ | Cnotcompfszero cmp => cond_for_float_not_cmp cmp
+ end.
+
+(** Translation of a conditional branch. Prepends to [k] the instructions
+ that evaluate the condition and ranch to [lbl] if it holds.
+ We recognize some conditional branches that can be implemented
+ without setting then testing condition flags. *)
+
+Definition transl_cond_branch_default
+ (c: condition) (args: list mreg) (lbl: label) (k: bcode) : res (bcode*cf_instruction) :=
+ do ccode <- transl_cond c args k;
+ OK(ccode, Pbc (cond_for_cond c) lbl).
+
+Definition transl_cond_branch
+ (c: condition) (args: list mreg) (lbl: label) (k: bcode) : res (bcode*cf_instruction) :=
+ match args, c with
+ | a1 :: nil, (Ccompimm Cne n | Ccompuimm Cne n) =>
+ if Int.eq n Int.zero
+ then (do r1 <- ireg_of a1; OK (k, Pcbnz W r1 lbl))
+ else transl_cond_branch_default c args lbl k
+ | a1 :: nil, (Ccompimm Ceq n | Ccompuimm Ceq n) =>
+ if Int.eq n Int.zero
+ then (do r1 <- ireg_of a1; OK (k, Pcbz W r1 lbl))
+ else transl_cond_branch_default c args lbl k
+ | a1 :: nil, (Ccomplimm Cne n | Ccompluimm Cne n) =>
+ if Int64.eq n Int64.zero
+ then (do r1 <- ireg_of a1; OK (k, Pcbnz X r1 lbl))
+ else transl_cond_branch_default c args lbl k
+ | a1 :: nil, (Ccomplimm Ceq n | Ccompluimm Ceq n) =>
+ if Int64.eq n Int64.zero
+ then (do r1 <- ireg_of a1; OK (k, Pcbz X r1 lbl))
+ else transl_cond_branch_default c args lbl k
+ | a1 :: nil, Cmaskzero n =>
+ match Int.is_power2 n with
+ | Some bit => do r1 <- ireg_of a1; OK (k, Ptbz W r1 bit lbl)
+ | None => transl_cond_branch_default c args lbl k
+ end
+ | a1 :: nil, Cmasknotzero n =>
+ match Int.is_power2 n with
+ | Some bit => do r1 <- ireg_of a1; OK (k, Ptbnz W r1 bit lbl)
+ | None => transl_cond_branch_default c args lbl k
+ end
+ | a1 :: nil, Cmasklzero n =>
+ match Int64.is_power2' n with
+ | Some bit => do r1 <- ireg_of a1; OK (k, Ptbz X r1 bit lbl)
+ | None => transl_cond_branch_default c args lbl k
+ end
+ | a1 :: nil, Cmasklnotzero n =>
+ match Int64.is_power2' n with
+ | Some bit => do r1 <- ireg_of a1; OK (k, Ptbnz X r1 bit lbl)
+ | None => transl_cond_branch_default c args lbl k
+ end
+ | _, _ =>
+ transl_cond_branch_default c args lbl k
+ end.
+
+(** Translation of the arithmetic operation [r <- op(args)].
+ The corresponding instructions are prepended to [k]. *)
+
+Definition transl_op
+ (op: operation) (args: list mreg) (res: mreg) (k: bcode) :=
+ match op, args with
+ | Omove, a1 :: nil =>
+ match preg_of res, preg_of a1 with
+ | IR r, IR a => OK (Pmov r a ::bi k)
+ | FR r, FR a => OK (Pfmov r a ::bi k)
+ | _ , _ => Error(msg "Asmgenblock.Omove")
+ end
+ | Ointconst n, nil =>
+ do rd <- ireg_of res;
+ OK (loadimm32 rd n k)
+ | Olongconst n, nil =>
+ do rd <- ireg_of res;
+ OK (loadimm64 rd n k)
+ | Ofloatconst f, nil =>
+ do rd <- freg_of res;
+ OK (if Float.eq_dec f Float.zero
+ then Pfmovi D rd XZR ::bi k
+ else Pfmovimmd f rd ::bi k)
+ | Osingleconst f, nil =>
+ do rd <- freg_of res;
+ OK (if Float32.eq_dec f Float32.zero
+ then Pfmovi S rd XZR ::bi k
+ else Pfmovimms f rd ::bi k)
+ | Oaddrsymbol id ofs, nil =>
+ do rd <- ireg_of res;
+ OK (loadsymbol rd id ofs k)
+ | Oaddrstack ofs, nil =>
+ do rd <- ireg_of res;
+ OK (addimm64 rd XSP (Ptrofs.to_int64 ofs) k)
+ (** 32-bit integer arithmetic *)
+ | Oshift s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Porr W (transl_shift s a) rd XZR r1 ::bi k)
+ | Oadd, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Padd W SOnone rd r1 r2 ::bi k)
+ | Oaddshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Padd W (transl_shift s a) rd r1 r2 ::bi k)
+ | Oaddimm n, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- ireg_of a1;
+ OK (addimm32 rd rs n k)
+ | Oneg, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psub W SOnone rd XZR r1 ::bi k)
+ | Onegshift s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psub W (transl_shift s a) rd XZR r1 ::bi k)
+ | Osub, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Psub W SOnone rd r1 r2 ::bi k)
+ | Osubshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Psub W (transl_shift s a) rd r1 r2 ::bi k)
+ | Omul, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do rs1 <- ireg_of a1; do rs2 <- ireg_of a2;
+ OK (Pmadd W rd rs1 rs2 XZR ::bi k)
+ | Omuladd, a1 :: a2 :: a3 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+ OK (Pmadd W rd r2 r3 r1 ::bi k)
+ | Omulsub, a1 :: a2 :: a3 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+ OK (Pmsub W rd r2 r3 r1 ::bi k)
+ | Odiv, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Psdiv W rd r1 r2 ::bi k)
+ | Odivu, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pudiv W rd r1 r2 ::bi k)
+ | Oand, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pand W SOnone rd r1 r2 ::bi k)
+ | Oandshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pand W (transl_shift s a) rd r1 r2 ::bi k)
+ | Oandimm n, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (logicalimm32 (Pandimm W) (Pand W) rd r1 n k)
+ | Oor, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Porr W SOnone rd r1 r2 ::bi k)
+ | Oorshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Porr W (transl_shift s a) rd r1 r2 ::bi k)
+ | Oorimm n, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (logicalimm32 (Porrimm W) (Porr W) rd r1 n k)
+ | Oxor, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Peor W SOnone rd r1 r2 ::bi k)
+ | Oxorshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Peor W (transl_shift s a) rd r1 r2 ::bi k)
+ | Oxorimm n, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (logicalimm32 (Peorimm W) (Peor W) rd r1 n k)
+ | Onot, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Porn W SOnone rd XZR r1 ::bi k)
+ | Onotshift s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Porn W (transl_shift s a) rd XZR r1 ::bi k)
+ | Obic, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pbic W SOnone rd r1 r2 ::bi k)
+ | Obicshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pbic W (transl_shift s a) rd r1 r2 ::bi k)
+ | Oorn, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Porn W SOnone rd r1 r2 ::bi k)
+ | Oornshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Porn W (transl_shift s a) rd r1 r2 ::bi k)
+ | Oeqv, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Peon W SOnone rd r1 r2 ::bi k)
+ | Oeqvshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Peon W (transl_shift s a) rd r1 r2 ::bi k)
+ | Oshl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Plslv W rd r1 r2 ::bi k)
+ | Oshr, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pasrv W rd r1 r2 ::bi k)
+ | Oshru, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Plsrv W rd r1 r2 ::bi k)
+ | Oshrximm n, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (shrx32 rd r1 n k)
+ | Ozext s, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Pubfiz W Int.zero s rd r1 ::bi k)
+ | Osext s, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psbfiz W Int.zero s rd r1 ::bi k)
+ | Oshlzext s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Pubfiz W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+ | Oshlsext s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psbfiz W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+ | Ozextshr a s, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Pubfx W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+ | Osextshr a s, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psbfx W a (Z.min s (Int.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+ (** 64-bit integer arithmetic *)
+ | Oshiftl s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Porr X (transl_shift s a) rd XZR r1 ::bi k)
+ | Oextend x a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (move_extended rd r1 x a k)
+ (* [Omakelong] and [Ohighlong] should not occur *)
+ | Olowlong, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ assertion (ireg_eq rd r1);
+ OK (Pcvtx2w rd ::bi k)
+ | Oaddl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Padd X SOnone rd r1 r2 ::bi k)
+ | Oaddlshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Padd X (transl_shift s a) rd r1 r2 ::bi k)
+ | Oaddlext x a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (arith_extended Paddext (Padd X) rd r1 r2 x a k)
+ | Oaddlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (addimm64 rd r1 n k)
+ | Onegl, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psub X SOnone rd XZR r1 ::bi k)
+ | Oneglshift s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psub X (transl_shift s a) rd XZR r1 ::bi k)
+ | Osubl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Psub X SOnone rd r1 r2 ::bi k)
+ | Osublshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Psub X (transl_shift s a) rd r1 r2 ::bi k)
+ | Osublext x a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (arith_extended Psubext (Psub X) rd r1 r2 x a k)
+ | Omull, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pmadd X rd r1 r2 XZR ::bi k)
+ | Omulladd, a1 :: a2 :: a3 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+ OK (Pmadd X rd r2 r3 r1 ::bi k)
+ | Omullsub, a1 :: a2 :: a3 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
+ OK (Pmsub X rd r2 r3 r1 ::bi k)
+ | Omullhs, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Psmulh rd r1 r2 ::bi k)
+ | Omullhu, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pumulh rd r1 r2 ::bi k)
+ | Odivl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Psdiv X rd r1 r2 ::bi k)
+ | Odivlu, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pudiv X rd r1 r2 ::bi k)
+ | Oandl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pand X SOnone rd r1 r2 ::bi k)
+ | Oandlshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pand X (transl_shift s a) rd r1 r2 ::bi k)
+ | Oandlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (logicalimm64 (Pandimm X) (Pand X) rd r1 n k)
+ | Oorl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Porr X SOnone rd r1 r2 ::bi k)
+ | Oorlshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Porr X (transl_shift s a) rd r1 r2 ::bi k)
+ | Oorlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (logicalimm64 (Porrimm X) (Porr X) rd r1 n k)
+ | Oxorl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Peor X SOnone rd r1 r2 ::bi k)
+ | Oxorlshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Peor X (transl_shift s a) rd r1 r2 ::bi k)
+ | Oxorlimm n, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (logicalimm64 (Peorimm X) (Peor X) rd r1 n k)
+ | Onotl, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Porn X SOnone rd XZR r1 ::bi k)
+ | Onotlshift s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Porn X (transl_shift s a) rd XZR r1 ::bi k)
+ | Obicl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pbic X SOnone rd r1 r2 ::bi k)
+ | Obiclshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pbic X (transl_shift s a) rd r1 r2 ::bi k)
+ | Oornl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Porn X SOnone rd r1 r2 ::bi k)
+ | Oornlshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Porn X (transl_shift s a) rd r1 r2 ::bi k)
+ | Oeqvl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Peon X SOnone rd r1 r2 ::bi k)
+ | Oeqvlshift s a, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Peon X (transl_shift s a) rd r1 r2 ::bi k)
+ | Oshll, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Plslv X rd r1 r2 ::bi k)
+ | Oshrl, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Pasrv X rd r1 r2 ::bi k)
+ | Oshrlu, a1 :: a2 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (Plsrv X rd r1 r2 ::bi k)
+ | Oshrlximm n, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (shrx64 rd r1 n k)
+ | Ozextl s, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Pubfiz X Int.zero s rd r1 ::bi k)
+ | Osextl s, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psbfiz X Int.zero s rd r1 ::bi k)
+ | Oshllzext s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Pubfiz X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+ | Oshllsext s a, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psbfiz X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+ | Ozextshrl a s, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Pubfx X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+ | Osextshrl a s, a1 :: nil =>
+ do rd <- ireg_of res; do r1 <- ireg_of a1;
+ OK (Psbfx X a (Z.min s (Int64.zwordsize - Int.unsigned a)) rd r1 ::bi k)
+ (** 64-bit floating-point arithmetic *)
+ | Onegf, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfneg D rd rs ::bi k)
+ | Oabsf, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfabs D rd rs ::bi k)
+ | Oaddf, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfadd D rd rs1 rs2 ::bi k)
+ | Osubf, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfsub D rd rs1 rs2 ::bi k)
+ | Omulf, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfmul D rd rs1 rs2 ::bi k)
+ | Odivf, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfdiv D rd rs1 rs2 ::bi k)
+ (** 32-bit floating-point arithmetic *)
+ | Onegfs, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfneg S rd rs ::bi k)
+ | Oabsfs, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfabs S rd rs ::bi k)
+ | Oaddfs, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfadd S rd rs1 rs2 ::bi k)
+ | Osubfs, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfsub S rd rs1 rs2 ::bi k)
+ | Omulfs, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfmul S rd rs1 rs2 ::bi k)
+ | Odivfs, a1 :: a2 :: nil =>
+ do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
+ OK (Pfdiv S rd rs1 rs2 ::bi k)
+ | Osingleoffloat, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfcvtsd rd rs ::bi k)
+ | Ofloatofsingle, a1 :: nil =>
+ do rd <- freg_of res; do rs <- freg_of a1;
+ OK (Pfcvtds rd rs ::bi k)
+ (** Conversions between int and float *)
+ | Ointoffloat, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfcvtzs W D rd rs ::bi k)
+ | Ointuoffloat, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfcvtzu W D rd rs ::bi k)
+ | Ofloatofint, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pscvtf D W rd rs ::bi k)
+ | Ofloatofintu, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pucvtf D W rd rs ::bi k)
+ | Ointofsingle, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfcvtzs W S rd rs ::bi k)
+ | Ointuofsingle, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfcvtzu W S rd rs ::bi k)
+ | Osingleofint, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pscvtf S W rd rs ::bi k)
+ | Osingleofintu, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pucvtf S W rd rs ::bi k)
+ | Olongoffloat, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfcvtzs X D rd rs ::bi k)
+ | Olonguoffloat, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfcvtzu X D rd rs ::bi k)
+ | Ofloatoflong, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pscvtf D X rd rs ::bi k)
+ | Ofloatoflongu, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pucvtf D X rd rs ::bi k)
+ | Olongofsingle, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfcvtzs X S rd rs ::bi k)
+ | Olonguofsingle, a1 :: nil =>
+ do rd <- ireg_of res; do rs <- freg_of a1;
+ OK (Pfcvtzu X S rd rs ::bi k)
+ | Osingleoflong, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pscvtf S X rd rs ::bi k)
+ | Osingleoflongu, a1 :: nil =>
+ do rd <- freg_of res; do rs <- ireg_of a1;
+ OK (Pucvtf S X rd rs ::bi k)
+ (** Boolean tests *)
+ | Ocmp c, _ =>
+ do rd <- ireg_of res;
+ transl_cond c args (Pcset rd (cond_for_cond c) ::bi k)
+ (** Conditional move *)
+ | Osel cmp ty, a1 :: a2 :: args =>
+ match preg_of res with
+ | IR r =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) ::bi k)
+ | FR r =>
+ do r1 <- freg_of a1; do r2 <- freg_of a2;
+ transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) ::bi k)
+ | _ =>
+ Error(msg "Asmgenblock.Osel")
+ end
+ | _, _ => Error(msg "Asmgenblock.transl_op")
+ end.
+
+(** Translation of addressing modes *)
+
+Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg)
+ (insn: Asm.addressing -> basic) (k: bcode) : res bcode :=
+ match addr, args with
+ | Aindexed ofs, a1 :: nil =>
+ do r1 <- ireg_of a1;
+ if offset_representable sz ofs then
+ OK (insn (ADimm r1 ofs) ::bi k)
+ else
+ OK (loadimm64 X16 ofs (insn (ADreg r1 X16) ::bi k))
+ | Aindexed2, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ OK (insn (ADreg r1 r2) ::bi k)
+ | Aindexed2shift a, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ if Int.eq a Int.zero then
+ OK (insn (ADreg r1 r2) ::bi k)
+ else if Int.eq (Int.shl Int.one a) (Int.repr sz) then
+ OK (insn (ADlsl r1 r2 a) ::bi k)
+ else
+ OK (Padd X (SOlsl a) X16 r1 r2 ::bi insn (ADimm X16 Int64.zero) ::bi k)
+ | Aindexed2ext x a, a1 :: a2 :: nil =>
+ do r1 <- ireg_of a1; do r2 <- ireg_of a2;
+ if Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz) then
+ OK (insn (match x with Xsgn32 => ADsxt r1 r2 a
+ | Xuns32 => ADuxt r1 r2 a end) ::bi k)
+ else
+ OK (arith_extended Paddext (Padd X) X16 r1 r2 x a
+ (insn (ADimm X16 Int64.zero) ::bi k))
+ | Aglobal id ofs, nil =>
+ assertion (negb (Archi.pic_code tt));
+ if Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz
+ then OK (Padrp id ofs X16 ::bi insn (ADadr X16 id ofs) ::bi k)
+ else OK (loadsymbol X16 id ofs (insn (ADimm X16 Int64.zero) ::bi k))
+ | Ainstack ofs, nil =>
+ let ofs := Ptrofs.to_int64 ofs in
+ if offset_representable sz ofs then
+ OK (insn (ADimm XSP ofs) ::bi k)
+ else
+ OK (loadimm64 X16 ofs (insn (ADreg XSP X16) ::bi k))
+ | _, _ =>
+ Error(msg "Asmgen.transl_addressing")
+ end.
+
+(** Translation of loads and stores *)
+
+Definition transl_load (chunk: memory_chunk) (addr: Op.addressing)
+ (args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
+ match chunk with
+ | Mint8unsigned =>
+ do rd <- ireg_of dst; transl_addressing 1 addr args (PLd_rd_a (Pldrb W) rd) k
+ | Mint8signed =>
+ do rd <- ireg_of dst; transl_addressing 1 addr args (PLd_rd_a (Pldrsb W) rd) k
+ | Mint16unsigned =>
+ do rd <- ireg_of dst; transl_addressing 2 addr args (PLd_rd_a (Pldrh W) rd) k
+ | Mint16signed =>
+ do rd <- ireg_of dst; transl_addressing 2 addr args (PLd_rd_a (Pldrsh W) rd) k
+ | Mint32 =>
+ do rd <- ireg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrw rd) k
+ | Mint64 =>
+ do rd <- ireg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrx rd) k
+ | Mfloat32 =>
+ do rd <- freg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrs rd) k
+ | Mfloat64 =>
+ do rd <- freg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrd rd) k
+ | Many32 =>
+ do rd <- ireg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrw_a rd) k
+ | Many64 =>
+ do rd <- ireg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrx_a rd) k
+ end.
+
+Definition transl_store (chunk: memory_chunk) (addr: Op.addressing)
+ (args: list mreg) (src: mreg) (k: bcode) : res bcode :=
+ match chunk with
+ | Mint8unsigned | Mint8signed =>
+ do r1 <- ireg_of src; transl_addressing 1 addr args (PSt_rs_a Pstrb r1) k
+ | Mint16unsigned | Mint16signed =>
+ do r1 <- ireg_of src; transl_addressing 2 addr args (PSt_rs_a Pstrh r1) k
+ | Mint32 =>
+ do r1 <- ireg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrw r1) k
+ | Mint64 =>
+ do r1 <- ireg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrx r1) k
+ | Mfloat32 =>
+ do r1 <- freg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrs r1) k
+ | Mfloat64 =>
+ do r1 <- freg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrd r1) k
+ | Many32 =>
+ do r1 <- ireg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrw_a r1) k
+ | Many64 =>
+ do r1 <- ireg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrx_a r1) k
+ end.
+
+(** Translation of a Machblock instruction. *)
+
+Definition transl_instr_basic (f: Machblock.function) (i: Machblock.basic_inst)
+ (ep: bool) (k: bcode) :=
+ match i with
+ | MBgetstack ofs ty dst =>
+ loadind XSP ofs ty dst k
+ | MBsetstack src ofs ty =>
+ storeind src XSP ofs ty k
+ | MBgetparam ofs ty dst =>
+ do c <- loadind X29 ofs ty dst k;
+ OK (if ep then c else loadptr_bc XSP f.(fn_link_ofs) X29 c)
+ | MBop op args res =>
+ transl_op op args res k
+ | MBload t chunk addr args dst =>
+ match t with
+ | TRAP => transl_load chunk addr args dst k
+ | NOTRAP => Error(msg "Asmgenblock.transl_instr_basic: NOTRAP load not supported in aarch64.")
+ end
+ | MBstore chunk addr args src =>
+ transl_store chunk addr args src k
+ end.
+
+(** Translation of a code sequence *)
+
+Definition it1_is_parent (before: bool) (i: Machblock.basic_inst) : bool :=
+ match i with
+ (*| MBgetstack ofs ty dst => before && negb (mreg_eq dst R29)*)
+ | MBsetstack src ofs ty => before
+ | MBgetparam ofs ty dst => negb (mreg_eq dst R29)
+ | MBop op args res => before && negb (mreg_eq res R29)
+ (*| MBload trapping_mode chunk addr args dst => before && negb (mreg_eq dst R29)*)
+ | _ => false
+ end.
+
+Fixpoint transl_basic_code (f: Machblock.function) (il: list Machblock.basic_inst) (it1p: bool) :=
+ match il with
+ | nil => OK (nil)
+ | i1 :: il' =>
+ do k1 <- transl_basic_code f il' (it1_is_parent it1p i1);
+ transl_instr_basic f i1 it1p k1
+ end.
+
+Program Definition cons_bblocks (ll: list label) (bdy: list basic) (ex: option control): bblocks :=
+ match ex with
+ | None =>
+ match bdy with
+ | nil => {| header := ll; body:= Pnop::nil; exit := None |} :: nil
+ | _ => {| header := ll; body:= bdy; exit := None |} :: nil
+ end
+ | _ =>
+ match bdy with
+ | nil => {| header := ll; body:= nil; exit := ex |} :: nil
+ | _ => {| header := ll; body:= bdy; exit := ex |} :: nil
+ end
+ end.
+Next Obligation.
+ induction bdy. congruence.
+ simpl. auto.
+Qed.
+Next Obligation.
+ destruct ex. simpl. auto.
+ congruence.
+Qed.
+Next Obligation.
+ induction bdy. congruence.
+ simpl. auto.
+Qed.
+
+Definition transl_control (f: Machblock.function) (ctl: control_flow_inst) : res (bcode*control) :=
+ match ctl with
+ | MBcall sig (inl r) => do r1 <- ireg_of r;
+ OK (nil, PCtlFlow (Pblr r1 sig))
+ | MBcall sig (inr symb) => OK (nil, PCtlFlow (Pbl symb sig))
+ | MBtailcall sig (inr symb) => OK(make_epilogue f, PCtlFlow (Pbs symb sig))
+ | MBtailcall sig (inl r) => do r1 <- ireg_of r;
+ OK (make_epilogue f, PCtlFlow (Pbr r1 sig))
+ | MBbuiltin ef args res => OK (nil, Pbuiltin ef (List.map (map_builtin_arg dreg_of) args) (map_builtin_res dreg_of res))
+ | MBgoto lbl => OK (nil, PCtlFlow (Pb lbl))
+ | MBcond cond args lbl => do (bc, c) <- transl_cond_branch cond args lbl nil; OK (bc, PCtlFlow c)
+ | MBreturn => OK (make_epilogue f, PCtlFlow (Pret RA))
+ | MBjumptable arg tbl => do r <- ireg_of arg;
+ OK (nil, PCtlFlow (Pbtbl r tbl))
+ end.
+
+Definition transl_exit (f: Machblock.function) (ext: option control_flow_inst) : res (bcode*option control) :=
+ match ext with
+ Some ctl => do (b,c) <- transl_control f ctl; OK (b, Some c)
+ | None => OK (nil, None)
+ end.
+
+Definition transl_block (f: Machblock.function) (fb: Machblock.bblock) (ep: bool) : res (list bblock) :=
+ do (bdy2, ex) <- transl_exit f fb.(Machblock.exit);
+ do bdy1 <- transl_basic_code f fb.(Machblock.body) ep;
+ OK (cons_bblocks fb.(Machblock.header) (bdy1 @@ bdy2) ex)
+ .
+
+Fixpoint transl_blocks (f: Machblock.function) (lmb: list Machblock.bblock) (ep: bool) :=
+ match lmb with
+ | nil => OK nil
+ | mb :: lmb =>
+ do lb <- transl_block f mb (if Machblock.header mb then ep else false);
+ do lb' <- transl_blocks f lmb false;
+ OK (lb @@ lb')
+ end
+.
+
+Program Definition make_prologue (f: Machblock.function) (k:bblocks) :=
+ {| header := nil; body := Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::bi
+ ((PSt_rs_a Pstrx RA) (ADimm XSP (Ptrofs.to_int64 (f.(fn_retaddr_ofs))))) ::bi nil;
+ exit := None |} :: k.
+
+Definition transl_function (f: Machblock.function) : res Asmblock.function :=
+ do lb <- transl_blocks f f.(Machblock.fn_code) true;
+ OK (mkfunction f.(Machblock.fn_sig)
+ (make_prologue f lb)).
+
+Definition transf_function (f: Machblock.function) : res Asmblock.function :=
+ do tf <- transl_function f;
+ if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks))
+ then Error (msg "code size exceeded")
+ else OK tf.
+
+Definition transf_fundef (f: Machblock.fundef) : res Asmblock.fundef :=
+ transf_partial_fundef transf_function f.
+
+Definition transf_program (p: Machblock.program) : res Asmblock.program :=
+ transform_partial_program transf_fundef p.
+
diff --git a/aarch64/Asmblockgenproof.v b/aarch64/Asmblockgenproof.v
new file mode 100644
index 00000000..11219928
--- /dev/null
+++ b/aarch64/Asmblockgenproof.v
@@ -0,0 +1,1547 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Machblock Conventions Asmblock IterList.
+Require Import Asmblockgen Asmblockgenproof0 Asmblockgenproof1 Asmblockprops.
+Require Import Lia.
+
+Module MB := Machblock.
+Module AB := Asmblock.
+
+Definition match_prog (p: MB.program) (tp: AB.program) :=
+ match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+ forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+ intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable lk: aarch64_linker.
+Variable prog: Machblock.program.
+Variable tprog: Asmblock.program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved:
+ forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof (Genv.find_symbol_match TRANSF).
+
+Lemma senv_preserved:
+ Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSF).
+
+Lemma functions_translated:
+ forall b f,
+ Genv.find_funct_ptr ge b = Some f ->
+ exists tf,
+ Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial TRANSF).
+
+Lemma functions_transl:
+ forall fb f tf,
+ Genv.find_funct_ptr ge fb = Some (Internal f) ->
+ transf_function f = OK tf ->
+ Genv.find_funct_ptr tge fb = Some (Internal tf).
+Proof.
+ intros. exploit functions_translated; eauto. intros [tf' [A B]].
+ monadInv B. rewrite H0 in EQ; inv EQ; auto.
+Qed.
+
+Lemma transf_function_no_overflow:
+ forall f tf,
+ transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned.
+Proof.
+ intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0.
+ lia.
+Qed.
+
+Hypothesis symbol_high_low: forall (id: ident) (ofs: ptrofs),
+ Val.addl (symbol_high lk id ofs) (symbol_low lk id ofs) = Genv.symbol_address tge id ofs.
+
+(** * Proof of semantic preservation *)
+
+(** Semantic preservation is proved using a complex simulation diagram
+ of the following form.
+<<
+ MB.step
+ ---------------------------------------->
+ header body exit
+ st1 -----> st2 -----> st3 ------------------> st4
+ | | | |
+ | (A) | (B) | (C) |
+ match_codestate | | | |
+ | header | body1 | body2 | match_states
+ cs1 -----> cs2 -----> cs3 ------> cs4 |
+ | / \ exit |
+ match_asmstate | --------------- --->--- |
+ | / match_asmstate \ |
+ st'1 ---------------------------------------> st'2
+ AB.step *
+>>
+ The invariant between each MB.step/AB.step is the [match_states] predicate below.
+ However, we also need to introduce an intermediary state [Codestate] which allows
+ us to reason on a finer grain, executing header, body and exit separately.
+
+ This [Codestate] consists in a state like [Asmblock.State], except that the
+ code is directly stored in the state, much like [Machblock.State]. It also features
+ additional useful elements to keep track of while executing a bblock.
+*)
+
+Inductive match_states: Machblock.state -> Asm.state -> Prop :=
+ | match_states_intro:
+ forall s fb sp c ep ms m m' rs f tf tc
+ (STACKS: match_stack ge s)
+ (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+ (MEXT: Mem.extends m m')
+ (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
+ (AG: agree ms sp rs)
+ (DXP: ep = true -> rs#X29 = parent_sp s),
+ match_states (Machblock.State s fb sp c ms m)
+ (Asm.State rs m')
+ | match_states_call:
+ forall s fb ms m m' rs
+ (STACKS: match_stack ge s)
+ (MEXT: Mem.extends m m')
+ (AG: agree ms (parent_sp s) rs)
+ (ATPC: rs PC = Vptr fb Ptrofs.zero)
+ (ATLR: rs RA = parent_ra s),
+ match_states (Machblock.Callstate s fb ms m)
+ (Asm.State rs m')
+ | match_states_return:
+ forall s ms m m' rs
+ (STACKS: match_stack ge s)
+ (MEXT: Mem.extends m m')
+ (AG: agree ms (parent_sp s) rs)
+ (ATPC: rs PC = parent_ra s),
+ match_states (Machblock.Returnstate s ms m)
+ (Asm.State rs m').
+
+Section TRANSL_LABEL. (* Lemmas on translation of MB.is_label into AB.is_label *)
+
+Lemma cons_bblocks_label:
+ forall hd bdy ex tbb tc,
+ cons_bblocks hd bdy ex = tbb::tc ->
+ header tbb = hd.
+Proof.
+ intros until tc. intros CONSB. unfold cons_bblocks in CONSB.
+ destruct ex; try destruct bdy; try destruct c; try destruct i.
+ all: inv CONSB; simpl; auto.
+Qed.
+
+Lemma cons_bblocks_label2:
+ forall hd bdy ex tbb1 tbb2,
+ cons_bblocks hd bdy ex = tbb1::tbb2::nil ->
+ header tbb2 = nil.
+Proof.
+ intros until tbb2. intros CONSB. unfold cons_bblocks in CONSB.
+ destruct ex; try destruct bdy; try destruct c; try destruct i.
+ all: inv CONSB; simpl; auto.
+Qed.
+
+Remark in_dec_transl:
+ forall lbl hd,
+ (if in_dec lbl hd then true else false) = (if MB.in_dec lbl hd then true else false).
+Proof.
+ intros. destruct (in_dec lbl hd), (MB.in_dec lbl hd). all: tauto.
+Qed.
+
+Lemma transl_is_label:
+ forall lbl bb tbb f ep tc,
+ transl_block f bb ep = OK (tbb::tc) ->
+ is_label lbl tbb = MB.is_label lbl bb.
+Proof.
+ intros until tc. intros TLB.
+ destruct tbb as [thd tbdy tex]; simpl in *.
+ monadInv TLB.
+ unfold is_label. simpl.
+ apply cons_bblocks_label in H0. simpl in H0. subst.
+ rewrite in_dec_transl. auto.
+Qed.
+
+Lemma transl_is_label_false2:
+ forall lbl bb f ep tbb1 tbb2,
+ transl_block f bb ep = OK (tbb1::tbb2::nil) ->
+ is_label lbl tbb2 = false.
+Proof.
+ intros until tbb2. intros TLB.
+ destruct tbb2 as [thd tbdy tex]; simpl in *.
+ monadInv TLB. apply cons_bblocks_label2 in H0. simpl in H0. subst.
+ apply is_label_correct_false. simpl. auto.
+Qed.
+
+Lemma transl_block_nonil:
+ forall f c ep tc,
+ transl_block f c ep = OK tc ->
+ tc <> nil.
+Proof.
+ intros. monadInv H. unfold cons_bblocks.
+ destruct x0; try destruct (x1 @@ x); try destruct c0; try destruct i.
+ all: discriminate.
+Qed.
+
+Lemma transl_block_limit: forall f bb ep tbb1 tbb2 tbb3 tc,
+ ~transl_block f bb ep = OK (tbb1 :: tbb2 :: tbb3 :: tc).
+Proof.
+ intros. intro. monadInv H.
+ unfold cons_bblocks in H0.
+ destruct x0; try destruct (x1 @@ x); try destruct c0; try destruct i.
+ all: discriminate.
+Qed.
+
+Lemma find_label_transl_false:
+ forall x f lbl bb ep x',
+ transl_block f bb ep = OK x ->
+ MB.is_label lbl bb = false ->
+ find_label lbl (x++x') = find_label lbl x'.
+Proof.
+ intros until x'. intros TLB MBis; simpl; auto.
+ destruct x as [|x0 x1]; simpl; auto.
+ destruct x1 as [|x1 x2]; simpl; auto.
+ - erewrite <- transl_is_label in MBis; eauto. rewrite MBis. auto.
+ - destruct x2 as [|x2 x3]; simpl; auto.
+ + erewrite <- transl_is_label in MBis; eauto. rewrite MBis.
+ erewrite transl_is_label_false2; eauto.
+ + apply transl_block_limit in TLB. destruct TLB.
+Qed.
+
+Lemma transl_blocks_label:
+ forall lbl f c tc ep,
+ transl_blocks f c ep = OK tc ->
+ match MB.find_label lbl c with
+ | None => find_label lbl tc = None
+ | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_blocks f c' false = OK tc'
+ end.
+Proof.
+ induction c; simpl; intros.
+ inv H. auto.
+ monadInv H.
+ destruct (MB.is_label lbl a) eqn:MBis.
+ - destruct x as [|tbb tc]. { apply transl_block_nonil in EQ. contradiction. }
+ simpl find_label. exploit transl_is_label; eauto. intros ABis. rewrite MBis in ABis.
+ rewrite ABis.
+ eexists. eexists. split; eauto. simpl transl_blocks.
+ assert (MB.header a <> nil).
+ { apply MB.is_label_correct_true in MBis.
+ destruct (MB.header a). contradiction. discriminate. }
+ destruct (MB.header a); try contradiction.
+ rewrite EQ. simpl. rewrite EQ1. simpl. auto.
+ - apply IHc in EQ1. destruct (MB.find_label lbl c).
+ + destruct EQ1 as (tc' & FIND & TLBS). exists tc'; eexists; auto.
+ erewrite find_label_transl_false; eauto.
+ + erewrite find_label_transl_false; eauto.
+Qed.
+
+Lemma find_label_nil:
+ forall bb lbl c,
+ header bb = nil ->
+ find_label lbl (bb::c) = find_label lbl c.
+Proof.
+ intros. destruct bb as [hd bdy ex]; simpl in *. subst.
+ assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false).
+ { erewrite <- is_label_correct_false. simpl. auto. }
+ rewrite H. auto.
+Qed.
+
+Theorem transl_find_label:
+ forall lbl f tf,
+ transf_function f = OK tf ->
+ match MB.find_label lbl f.(MB.fn_code) with
+ | None => find_label lbl tf.(fn_blocks) = None
+ | Some c => exists tc, find_label lbl tf.(fn_blocks) = Some tc /\ transl_blocks f c false = OK tc
+ end.
+Proof.
+ intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); inv EQ0. clear g.
+ monadInv EQ. unfold make_prologue. simpl fn_blocks. repeat (rewrite find_label_nil); simpl; auto.
+ eapply transl_blocks_label; eauto.
+Qed.
+
+End TRANSL_LABEL.
+
+(** A valid branch in a piece of Machblock code translates to a valid ``go to''
+ transition in the generated Asmblock code. *)
+
+Lemma find_label_goto_label:
+ forall f tf lbl rs m c' b ofs,
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ transf_function f = OK tf ->
+ rs PC = Vptr b ofs ->
+ MB.find_label lbl f.(MB.fn_code) = Some c' ->
+ exists tc', exists rs',
+ goto_label tf lbl rs m = Next rs' m
+ /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
+ /\ forall r, r <> PC -> rs'#r = rs#r.
+Proof.
+ intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
+ intros (tc & A & B).
+ exploit label_pos_code_tail; eauto. instantiate (1 := 0).
+ intros [pos' [P [Q R]]].
+ exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
+ split. unfold goto_label. rewrite P. rewrite H1. auto.
+ split. rewrite Pregmap.gss. constructor; auto.
+ rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
+ auto. lia.
+ generalize (transf_function_no_overflow _ _ H0). lia.
+ intros. apply Pregmap.gso; auto.
+Qed.
+
+(** Existence of return addresses *)
+
+Lemma return_address_exists:
+ forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
+ exists ra, return_address_offset f c ra.
+Proof.
+ intros. eapply Asmblockgenproof0.return_address_exists; eauto.
+
+- intros. monadInv H0.
+ destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0. monadInv EQ. simpl.
+ exists x; exists true; split; auto.
+ repeat constructor.
+- exact transf_function_no_overflow.
+Qed.
+
+(* Useful for dealing with the many cases in some proofs *)
+Ltac exploreInst :=
+ repeat match goal with
+ | [ H : match ?var with | _ => _ end = _ |- _ ] => destruct var
+ | [ H : OK _ = OK _ |- _ ] => monadInv H
+ | [ |- context[if ?b then _ else _] ] => destruct b
+ | [ |- context[match ?m with | _ => _ end] ] => destruct m
+ | [ |- context[match ?m as _ return _ with | _ => _ end]] => destruct m
+ | [ H : bind _ _ = OK _ |- _ ] => monadInv H
+ | [ H : Error _ = OK _ |- _ ] => inversion H
+ end.
+
+(** Some translation properties *)
+
+Lemma transl_blocks_distrib:
+ forall c f bb tbb tc ep,
+ transl_blocks f (bb::c) ep = OK (tbb::tc)
+ -> transl_block f bb (if MB.header bb then ep else false) = OK (tbb :: nil)
+ /\ transl_blocks f c false = OK tc.
+Proof.
+ intros until ep. intros TLBS.
+ destruct bb as [hd bdy ex].
+ monadInv TLBS. monadInv EQ.
+ unfold transl_block.
+ rewrite EQ0; simpl.
+ simpl in EQ; rewrite EQ; simpl.
+ unfold cons_bblocks in *. simpl in EQ0.
+ destruct ex.
+ - simpl in *. monadInv EQ0.
+ destruct (x3 @@ x1) eqn: CBDY; inv H0; inv EQ1; auto.
+ - simpl in *. inv EQ0. destruct (x3 @@ nil) eqn: CBDY; inv H0; inv EQ1; auto.
+Qed.
+
+Lemma cons_bblocks_decomp:
+ forall thd tbdy tex tbb,
+ (tbdy <> nil \/ tex <> None) ->
+ cons_bblocks thd tbdy tex = tbb :: nil ->
+ header tbb = thd
+ /\ body tbb = tbdy
+ /\ exit tbb = tex.
+Proof.
+ intros until tbb. intros Hnonil CONSB. unfold cons_bblocks in CONSB.
+ destruct (tex) eqn:ECTL.
+ - destruct tbdy; inv CONSB; simpl; auto.
+ - inversion Hnonil.
+ + destruct tbdy as [|bi tbdy]; [ contradiction H; auto | inv CONSB; auto].
+ + contradict H; simpl; auto.
+Qed.
+
+Lemma transl_blocks_nonil:
+ forall f bb c tc ep,
+ transl_blocks f (bb::c) ep = OK tc ->
+ exists tbb tc', tc = tbb :: tc'.
+Proof.
+ intros until ep. intros TLBS. monadInv TLBS. monadInv EQ. unfold cons_bblocks.
+ destruct (x2);
+ destruct (x3 @@ x1); simpl; eauto.
+Qed.
+
+Definition mb_remove_header bb := {| MB.header := nil; MB.body := MB.body bb; MB.exit := MB.exit bb |}.
+
+Definition mb_remove_body (bb: MB.bblock) :=
+ {| MB.header := MB.header bb; MB.body := nil; MB.exit := MB.exit bb |}.
+
+Definition mbsize (bb: MB.bblock) := (length (MB.body bb) + length_opt (MB.exit bb))%nat.
+
+Lemma mbsize_eqz:
+ forall bb, mbsize bb = 0%nat -> MB.body bb = nil /\ MB.exit bb = None.
+Proof.
+ intros. destruct bb as [hd bdy ex]; simpl in *. unfold mbsize in H.
+ remember (length _) as a. remember (length_opt _) as b.
+ assert (a = 0%nat) by lia. assert (b = 0%nat) by lia. subst. clear H.
+ inv H0. inv H1. destruct bdy; destruct ex; auto.
+ all: try discriminate.
+Qed.
+
+Lemma mbsize_neqz:
+ forall bb, mbsize bb <> 0%nat -> (MB.body bb <> nil \/ MB.exit bb <> None).
+Proof.
+ intros. destruct bb as [hd bdy ex]; simpl in *.
+ destruct bdy; destruct ex; try (right; discriminate); try (left; discriminate).
+ contradict H. unfold mbsize. simpl. auto.
+Qed.
+
+Record codestate :=
+ Codestate { pstate: state; (**r projection to Asmblock.state *)
+ pheader: list label;
+ pbody1: list basic; (**r list of basic instructions coming from the translation of the Machblock body *)
+ pbody2: list basic; (**r list of basic instructions coming from the translation of the Machblock exit *)
+ pctl: option control; (**r exit instruction, coming from the translation of the Machblock exit *)
+ ep: bool; (**r reflects the [ep] variable used in the translation *)
+ rem: list AB.bblock; (**r remaining bblocks to execute *)
+ cur: bblock (**r current bblock to execute - to keep track of its size when incrementing PC *)
+ }.
+
+(* The part that deals with Machblock <-> Codestate agreement
+ * Note about DXP: the property of [ep] only matters if the current block doesn't have a header, hence the condition *)
+Inductive match_codestate fb: Machblock.state -> codestate -> Prop :=
+ | match_codestate_intro:
+ forall s sp ms m rs0 m0 f tc ep c bb tbb tbc1 tbc2 ex
+ (STACKS: match_stack ge s)
+ (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+ (MEXT: Mem.extends m m0)
+ (TBC: transl_basic_code f (MB.body bb) (if MB.header bb then ep else false) = OK tbc1)
+ (TIC: transl_exit f (MB.exit bb) = OK (tbc2, ex))
+ (TBLS: transl_blocks f c false = OK tc)
+ (AG: agree ms sp rs0)
+ (DXP: (if MB.header bb then ep else false) = true -> rs0#X29 = parent_sp s)
+ ,
+ match_codestate fb (Machblock.State s fb sp (bb::c) ms m)
+ {| pstate := (Asm.State rs0 m0);
+ pheader := (MB.header bb);
+ pbody1 := tbc1;
+ pbody2 := tbc2;
+ pctl := ex;
+ ep := ep;
+ rem := tc;
+ cur := tbb
+ |}
+.
+
+(* The part ensuring that the code in Codestate actually resides at [rs PC] *)
+Inductive match_asmstate fb: codestate -> Asm.state -> Prop :=
+ | match_asmstate_some:
+ forall rs f tf tc m tbb ofs ep tbdy1 tbdy2 tex lhd
+ (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
+ (TRANSF: transf_function f = OK tf)
+ (PCeq: rs PC = Vptr fb ofs)
+ (TAIL: code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (tbb::tc))
+ ,
+ match_asmstate fb
+ {| pstate := (Asm.State rs m);
+ pheader := lhd;
+ pbody1 := tbdy1;
+ pbody2 := tbdy2;
+ pctl := tex;
+ ep := ep;
+ rem := tc;
+ cur := tbb |}
+ (Asm.State rs m)
+.
+
+Lemma indexed_memory_access_nonil: forall f ofs r i k,
+ indexed_memory_access_bc f ofs r i k <> nil.
+Proof.
+ intros.
+ unfold indexed_memory_access_bc, loadimm64, loadimm, loadimm_z, loadimm_n;
+ desif; try congruence.
+ all: destruct decompose_int; try destruct p; try congruence.
+Qed.
+
+Lemma loadimm_nonil: forall sz x n k,
+ loadimm sz x n k <> nil.
+Proof.
+ intros.
+ unfold loadimm. desif;
+ unfold loadimm_n, loadimm_z.
+ all: destruct decompose_int; try destruct p; try congruence.
+Qed.
+
+Lemma loadimm32_nonil: forall sz x n,
+ loadimm32 sz x n <> nil.
+Proof.
+ intros.
+ unfold loadimm32. desif; try congruence.
+ apply loadimm_nonil.
+Qed.
+
+Lemma loadimm64_nonil: forall sz x n,
+ loadimm64 sz x n <> nil.
+Proof.
+ intros.
+ unfold loadimm64. desif; try congruence.
+ apply loadimm_nonil.
+Qed.
+
+Lemma loadsymbol_nonil: forall sz x n k,
+ loadsymbol sz x n k <> nil.
+Proof.
+ intros.
+ unfold loadsymbol. desif; try congruence.
+Qed.
+
+Lemma move_extended_nonil: forall x0 x1 x2 a k,
+ move_extended x1 x2 x0 a k <> nil.
+Proof.
+ intros. unfold move_extended, move_extended_base.
+ desif; try congruence.
+Qed.
+
+Lemma arith_extended_nonil: forall insnX insnS x0 x1 x2 x3 a k,
+ arith_extended insnX insnS x1 x2 x3 x0 a k <> nil.
+Proof.
+ intros. unfold arith_extended, move_extended_base.
+ desif; try congruence.
+Qed.
+
+Lemma transl_instr_basic_nonil:
+ forall k f bi ep x,
+ transl_instr_basic f bi ep k = OK x ->
+ x <> nil.
+Proof.
+ intros until x. intros TIB.
+ destruct bi.
+ - simpl in TIB. unfold loadind in TIB;
+ exploreInst; try discriminate; apply indexed_memory_access_nonil.
+ - simpl in TIB. unfold storeind in TIB;
+ exploreInst; try discriminate; apply indexed_memory_access_nonil.
+ - simpl in TIB. monadInv TIB. unfold loadind in EQ. exploreInst; try discriminate;
+ unfold loadptr_bc; apply indexed_memory_access_nonil.
+ - simpl in TIB. unfold transl_op in TIB. exploreInst; try discriminate;
+ unfold addimm32, addimm64, shrx32, shrx64,
+ logicalimm32, logicalimm64, addimm_aux.
+ all: desif; try congruence;
+ try apply loadimm32_nonil; try apply loadimm64_nonil; try apply loadsymbol_nonil;
+ try apply move_extended_nonil; try apply arith_extended_nonil.
+ all: unfold transl_cond in *; exploreInst; try discriminate;
+ try apply loadimm32_nonil; try apply loadimm64_nonil.
+ - simpl in TIB. unfold transl_load in TIB. exploreInst; try discriminate;
+ unfold transl_addressing in *; exploreInst; try discriminate.
+ all: try apply loadimm64_nonil; try apply arith_extended_nonil; try apply loadsymbol_nonil.
+ - simpl in TIB. unfold transl_store in TIB. exploreInst; try discriminate;
+ unfold transl_addressing in *; exploreInst; try discriminate.
+ all: try apply loadimm64_nonil; try apply arith_extended_nonil; try apply loadsymbol_nonil.
+Qed.
+
+Lemma transl_basic_code_nonil:
+ forall bdy f x ep,
+ bdy <> nil ->
+ transl_basic_code f bdy ep = OK x ->
+ x <> nil.
+Proof.
+ induction bdy as [|bi bdy].
+ intros. contradict H0; auto.
+ destruct bdy as [|bi2 bdy].
+ - clear IHbdy. intros f x b _ TBC. simpl in TBC. eapply transl_instr_basic_nonil; eauto.
+ - intros f x b Hnonil TBC. remember (bi2 :: bdy) as bdy'.
+ monadInv TBC.
+ assert (x0 <> nil).
+ eapply IHbdy; eauto. subst bdy'. discriminate.
+ eapply transl_instr_basic_nonil; eauto.
+Qed.
+
+Lemma transl_exit_nonil:
+ forall ex f bdy x,
+ ex <> None ->
+ transl_exit f ex = OK(bdy, x) ->
+ x <> None.
+Proof.
+ intros ex f bdy x Hnonil TIC.
+ destruct ex as [ex|].
+ - clear Hnonil. destruct ex.
+ all: try (simpl in TIC; try monadInv TIC; exploreInst; discriminate).
+ - contradict Hnonil; auto.
+Qed.
+
+Theorem app_nonil: forall A (l1 l2: list A),
+ l1 <> nil ->
+ l1 @@ l2 <> nil.
+Proof.
+ induction l2.
+ - intros; rewrite app_nil_r; auto.
+ - intros. unfold not. intros. symmetry in H0.
+ generalize (app_cons_not_nil); intros. unfold not in H1.
+ generalize (H0). apply H1.
+Qed.
+
+Theorem match_state_codestate:
+ forall mbs abs s fb sp bb c ms m,
+ (MB.body bb <> nil \/ MB.exit bb <> None) ->
+ mbs = (Machblock.State s fb sp (bb::c) ms m) ->
+ match_states mbs abs ->
+ exists cs fb f tbb tc ep,
+ match_codestate fb mbs cs /\ match_asmstate fb cs abs
+ /\ Genv.find_funct_ptr ge fb = Some (Internal f)
+ /\ transl_blocks f (bb::c) ep = OK (tbb::tc)
+ /\ body tbb = pbody1 cs ++ pbody2 cs
+ /\ exit tbb = pctl cs
+ /\ cur cs = tbb /\ rem cs = tc
+ /\ pstate cs = abs.
+Proof.
+ intros until m. intros Hnotempty Hmbs MS. subst. inv MS.
+ inv AT. clear H0. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst.
+ exploit transl_blocks_distrib; eauto. intros (TLB & TLBS). clear H2.
+ monadInv TLB. exploit cons_bblocks_decomp; eauto.
+ { inversion Hnotempty.
+ - destruct (MB.body bb) as [|bi bdy]; try (contradict H0; simpl; auto; fail).
+ left. apply app_nonil. eapply transl_basic_code_nonil; eauto.
+ - destruct (MB.exit bb) as [ei|]; try (contradict H0; simpl; auto; fail).
+ right. eapply transl_exit_nonil; eauto. }
+ intros (Hth & Htbdy & Htexit).
+ exists {| pstate := (State rs m'); pheader := (Machblock.header bb); pbody1 := x1; pbody2 := x;
+ pctl := x0; ep := ep0; rem := tc'; cur := tbb |}, fb, f, tbb, tc', ep0.
+ repeat split. 1-2: econstructor; eauto.
+ { destruct (MB.header bb). eauto. discriminate. } eauto.
+ unfold transl_blocks. fold transl_blocks. unfold transl_block. rewrite EQ. simpl. rewrite EQ1; simpl.
+ rewrite TLBS. simpl. rewrite H2.
+ all: simpl; auto.
+Qed.
+
+Lemma exec_straight_body:
+ forall c c' rs1 m1 rs2 m2,
+ exec_straight tge lk c rs1 m1 c' rs2 m2 ->
+ exists l,
+ c = l ++ c'
+ /\ exec_body lk tge l rs1 m1 = Next rs2 m2.
+Proof.
+ induction c; try (intros; inv H; fail).
+ intros until m2. intros EXES. inv EXES.
+ - exists (a :: nil). repeat (split; simpl; auto). rewrite H6. auto.
+ - eapply IHc in H7; eauto. destruct H7 as (l' & Hc & EXECB). subst.
+ exists (a :: l'). repeat (split; simpl; auto).
+ rewrite H1. auto.
+Qed.
+
+Lemma exec_straight_body2:
+ forall c rs1 m1 c' rs2 m2,
+ exec_straight tge lk c rs1 m1 c' rs2 m2 ->
+ exists body,
+ exec_body lk tge body rs1 m1 = Next rs2 m2
+ /\ body ++ c' = c.
+Proof.
+ intros until m2. induction 1.
+ - exists (i1::nil). split; auto. simpl. rewrite H. auto.
+ - destruct IHexec_straight as (bdy & EXEB & BTC).
+ exists (i:: bdy). split; simpl.
+ + rewrite H. auto.
+ + congruence.
+Qed.
+
+Lemma exec_straight_opt_body2:
+ forall c rs1 m1 c' rs2 m2,
+ exec_straight_opt tge lk c rs1 m1 c' rs2 m2 ->
+ exists body,
+ exec_body lk tge body rs1 m1 = Next rs2 m2
+ /\ body ++ c' = c.
+Proof.
+ intros until m2. intros EXES.
+ inv EXES.
+ - exists nil. split; auto.
+ - eapply exec_straight_body2. auto.
+Qed.
+
+Lemma PC_not_data_preg: forall r ,
+ data_preg r = true ->
+ r <> PC.
+Proof.
+ intros. destruct (PregEq.eq r PC); [ rewrite e in H; simpl in H; discriminate | auto ].
+Qed.
+
+Lemma X30_not_data_preg: forall r ,
+ data_preg r = true ->
+ r <> X30.
+Proof.
+ intros. destruct (PregEq.eq r X30); [ rewrite e in H; simpl in H; discriminate | auto ].
+Qed.
+
+Lemma X16_not_data_preg: forall r ,
+ data_preg r = true ->
+ r <> X16.
+Proof.
+ intros. destruct (PregEq.eq r X16); [ rewrite e in H; simpl in H; discriminate | auto ].
+Qed.
+
+Lemma undef_regs_other_2':
+ forall r rl rs,
+ data_preg r = true ->
+ preg_notin r rl ->
+ undef_regs (DR (IR X16) :: DR (IR X30) :: map preg_of rl) rs r = rs r.
+Proof.
+ intros. apply undef_regs_other. intros. simpl in H1.
+ destruct H1 as [HX16 | [HX30 | HDES]]; subst.
+ apply X16_not_data_preg; auto. apply X30_not_data_preg; auto.
+ exploit list_in_map_inv; eauto. intros [mr [A B]]. subst.
+ rewrite preg_notin_charact in H0. auto.
+Qed.
+
+Ltac Simpl :=
+ rewrite Pregmap.gso; try apply PC_not_data_preg; try apply X30_not_data_preg.
+
+(* See (C) in the diagram. The proofs are mostly adapted from the previous Mach->Asm proofs, but are
+ unfortunately quite cumbersome. To reproduce them, it's best to have a Coq IDE with you and see by
+ yourself the steps *)
+Theorem step_simu_control:
+ forall bb' fb fn s sp c ms' m' rs2 m2 t S'' rs1 m1 tbb tbdy2 tex cs2,
+ MB.body bb' = nil ->
+ Genv.find_funct_ptr tge fb = Some (Internal fn) ->
+ pstate cs2 = (State rs2 m2) ->
+ pbody1 cs2 = nil -> pbody2 cs2 = tbdy2 -> pctl cs2 = tex ->
+ cur cs2 = tbb ->
+ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2 ->
+ match_asmstate fb cs2 (State rs1 m1) ->
+ exit_step return_address_offset ge (MB.exit bb') (MB.State s fb sp (bb'::c) ms' m') t S'' ->
+ (exists rs3 m3 rs4 m4,
+ exec_body lk tge tbdy2 rs2 m2 = Next rs3 m3
+ /\ exec_exit tge fn (Ptrofs.repr (size tbb)) rs3 m3 tex t rs4 m4
+ /\ match_states S'' (State rs4 m4)).
+Proof.
+ intros until cs2. intros Hbody FIND Hpstate Hpbody1 Hpbody2 Hpctl Hcur MCS MAS ESTEP.
+ inv ESTEP.
+ - inv MCS. inv MAS. simpl in *.
+ inv Hpstate.
+ destruct ctl.
+ + (* MBcall *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ destruct s1 as [rf|fid]; simpl in H1.
+ * (* Indirect call *)
+ monadInv H1. monadInv EQ.
+ assert (ms' rf = Vptr f' Ptrofs.zero).
+ { unfold find_function_ptr in H12. destruct (ms' rf); try discriminate.
+ revert H12; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+ assert (rs2 x = Vptr f' Ptrofs.zero).
+ { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
+ generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+ remember (Ptrofs.add _ _) as ofs'.
+ assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+ { econstructor; eauto. }
+ assert (f1 = f) by congruence. subst f1.
+ exploit return_address_offset_correct; eauto. intros; subst ra.
+
+ repeat eexists.
+ econstructor; eauto. econstructor.
+ econstructor; eauto. econstructor; eauto.
+ eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros.
+ unfold incrPC; repeat Simpl; auto.
+ simpl. unfold incrPC; rewrite Pregmap.gso; auto; try discriminate.
+ rewrite !Pregmap.gss. rewrite PCeq. rewrite Heqofs'. simpl. auto.
+
+ * (* Direct call *)
+ monadInv H1.
+ generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+ remember (Ptrofs.add _ _) as ofs'.
+ assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+ econstructor; eauto.
+ assert (f1 = f) by congruence. subst f1.
+ exploit return_address_offset_correct; eauto. intros; subst ra.
+ repeat eexists.
+ econstructor; eauto. econstructor.
+ econstructor; eauto. econstructor; eauto. eapply agree_sp_def; eauto. simpl. eapply agree_exten; eauto. intros.
+ unfold incrPC; repeat Simpl; auto. unfold Genv.symbol_address. rewrite symbols_preserved. simpl in H12. rewrite H12. auto.
+ unfold incrPC; simpl; rewrite Pregmap.gso; try discriminate. rewrite !Pregmap.gss.
+ subst. unfold Val.offset_ptr. rewrite PCeq. auto.
+ + (* MBtailcall *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ exploit Mem.loadv_extends. eauto. eexact H13. auto. simpl. intros [parent' [A B]].
+ destruct s1 as [rf|fid]; simpl in H11.
+ * monadInv H1. monadInv EQ.
+ assert (ms' rf = Vptr f' Ptrofs.zero).
+ { destruct (ms' rf); try discriminate. revert H11. predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
+ assert (rs2 x = Vptr f' Ptrofs.zero).
+ { exploit ireg_val; eauto. rewrite H; intros LD; inv LD; auto. }
+
+ assert (f = f1) by congruence. subst f1. clear FIND1. clear H12.
+ exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+ exploit exec_straight_body; eauto.
+ intros (l & MKEPI & EXEB).
+ repeat eexists. rewrite app_nil_r in MKEPI.
+ rewrite <- MKEPI in EXEB.
+ eauto. econstructor. simpl. unfold incrPC.
+ rewrite !Pregmap.gso; try discriminate. eauto.
+ econstructor; eauto.
+ { apply agree_set_other.
+ - econstructor; auto with asmgen.
+ + apply V.
+ + intro r. destruct r; apply V; auto.
+ - eauto with asmgen. }
+ rewrite Pregmap.gss. rewrite Z; auto; try discriminate.
+ eapply ireg_of_not_X30''; eauto.
+ eapply ireg_of_not_X16''; eauto.
+ * monadInv H1. assert (f = f1) by congruence. subst f1. clear FIND1. clear H12.
+ exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+ exploit exec_straight_body; eauto.
+ intros (l & MKEPI & EXEB).
+ repeat eexists. inv EQ. rewrite app_nil_r in MKEPI.
+ rewrite <- MKEPI in EXEB.
+ eauto. inv EQ. econstructor. simpl. unfold incrPC.
+ eauto.
+ econstructor; eauto.
+ { apply agree_set_other.
+ - econstructor; auto with asmgen.
+ + apply V.
+ + intro r. destruct r; apply V; auto.
+ - eauto with asmgen. }
+ { rewrite Pregmap.gss. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H11. auto. }
+ + (* MBbuiltin *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+
+ assert (f0 = f) by congruence. subst f0.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+ remember (Ptrofs.add _ _) as ofs'.
+ assert (TCA: transl_code_at_pc ge (Vptr fb ofs') fb f c false tf tc).
+ econstructor; eauto.
+
+ monadInv TBC. monadInv TIC. inv H0.
+
+ exploit builtin_args_match; eauto. intros [vargs' [P Q]].
+ exploit external_call_mem_extends; eauto.
+ intros [vres' [m2' [A [B [C D]]]]].
+
+ repeat eexists. econstructor. erewrite <- sp_val by eauto.
+ eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved. eauto.
+ econstructor; eauto.
+ unfold incrPC. rewrite Pregmap.gss.
+ rewrite set_res_other. rewrite undef_regs_other. unfold Val.offset_ptr. rewrite PCeq.
+ eauto.
+ intros; simpl in *; destruct H as [HX16 | [HX30 | HDES]]; subst; try discriminate;
+ exploit list_in_map_inv; eauto; intros [mr [E F]]; subst; discriminate.
+ auto with asmgen. apply agree_nextblock. eapply agree_set_res; auto.
+ eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2'; auto.
+ intros. discriminate.
+ + (* MBgoto *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0. assert (f1 = f) by congruence. subst f1. clear H9.
+ remember (incrPC (Ptrofs.repr (size tbb)) rs2) as rs2'.
+ exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND'.
+ assert (tf = fn) by congruence. subst tf.
+ exploit find_label_goto_label.
+ eauto. eauto.
+ instantiate (2 := rs2').
+ { subst. unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq. eauto. }
+ eauto.
+ intros (tc' & rs' & GOTO & AT2 & INV).
+
+ eexists. eexists. repeat eexists. repeat split.
+ econstructor; eauto.
+ rewrite Heqrs2' in INV. unfold incrPC in INV.
+ rewrite Heqrs2' in GOTO; simpl; eauto.
+ econstructor; eauto.
+ eapply agree_exten; eauto with asmgen.
+ assert (forall r : preg, r <> PC -> rs' r = rs2 r).
+ { intros. rewrite Heqrs2' in INV.
+ rewrite INV; unfold incrPC; try rewrite Pregmap.gso; auto. }
+ eauto with asmgen.
+ congruence.
+ + (* MBcond *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0. monadInv H1. monadInv EQ.
+
+ * (* MBcond true *)
+ assert (f0 = f) by congruence. subst f0.
+ exploit eval_condition_lessdef.
+ eapply preg_vals; eauto.
+ all: eauto.
+ intros EC.
+ exploit transl_cbranch_correct_1; eauto. intros (rs', H).
+ destruct H as [ES [ECFI]].
+ exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
+ assert (PCeq': rs2 PC = rs' PC). { inv ES; auto. erewrite <- exec_straight_pc. 2: eapply H0. eauto. }
+ rewrite PCeq' in PCeq.
+ assert (f1 = f) by congruence. subst f1.
+ exploit find_label_goto_label.
+ 4: eapply H14. 1-2: eauto. instantiate (2 := (incrPC (Ptrofs.repr (size tbb)) rs')).
+ unfold incrPC, Val.offset_ptr. rewrite PCeq. rewrite Pregmap.gss. eauto.
+ intros (tc' & rs3 & GOTOL & TLPC & Hrs3).
+ exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
+ assert (tf = fn) by congruence. subst tf.
+
+ repeat eexists.
+ rewrite <- BTC. simpl. rewrite app_nil_r. eauto.
+ rewrite <- BTC. simpl. econstructor. rewrite ECFI. eauto.
+
+ econstructor; eauto.
+ eapply agree_exten with rs2; eauto with asmgen.
+ { intros. rewrite Hrs3; unfold incrPC. Simpl. rewrite H. all: auto. apply PC_not_data_preg; auto. }
+ intros. discriminate.
+ * (* MBcond false *)
+ assert (f0 = f) by congruence. subst f0. monadInv H1. monadInv EQ.
+ exploit eval_condition_lessdef.
+ eapply preg_vals; eauto.
+ all: eauto.
+ intros EC.
+
+ exploit transl_cbranch_correct_1; eauto. intros (rs', H).
+ destruct H as [ES [ECFI]].
+ exploit exec_straight_opt_body2. eauto. intros (bdy & EXEB & BTC).
+ assert (PCeq': rs2 PC = rs' PC). { inv ES; auto. erewrite <- exec_straight_pc. 2: eapply H0. eauto. }
+ rewrite PCeq' in PCeq.
+ exploit functions_transl. eapply FIND1. eapply TRANSF0. intros FIND'.
+ assert (tf = fn) by congruence. subst tf.
+
+ assert (NOOV: size_blocks fn.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1.
+
+ repeat eexists.
+ rewrite <- BTC. simpl. rewrite app_nil_r. eauto.
+ rewrite <- BTC. simpl. econstructor. rewrite ECFI. eauto.
+
+ econstructor; eauto.
+ unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq. econstructor; eauto.
+ eapply agree_exten with rs2; eauto with asmgen.
+ { intros. unfold incrPC. Simpl. rewrite H. all: auto. }
+ intros. discriminate.
+ + (* MBjumptable *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0.
+ monadInv H1. monadInv EQ.
+ generalize (transf_function_no_overflow _ _ TRANSF0); intro NOOV.
+ assert (f1 = f) by congruence. subst f1.
+ exploit find_label_goto_label. 4: eapply H14. 1-2: eauto. instantiate (2 := (incrPC (Ptrofs.repr (size tbb)) rs2) # X16 <- Vundef).
+ unfold incrPC. Simpl. unfold Val.offset_ptr. rewrite PCeq. reflexivity. discriminate.
+ exploit functions_transl. eapply FIND0. eapply TRANSF0. intros FIND3. assert (fn = tf) by congruence. subst fn.
+
+ intros [tc' [rs' [A [B C]]]].
+ exploit ireg_val; eauto. rewrite H11. intros LD; inv LD.
+
+ repeat eexists. econstructor. simpl. Simpl. 2: { eapply ireg_of_not_X16''; eauto. }
+ unfold incrPC. rewrite Pregmap.gso; try discriminate. rewrite <- H1.
+ simpl. unfold Mach.label in H12. unfold label. rewrite H12. eapply A.
+ econstructor; eauto.
+ eapply agree_undef_regs; eauto. intros. rewrite C; auto with asmgen.
+ { unfold incrPC. repeat Simpl; auto. apply X16_not_data_preg; auto. }
+ discriminate.
+ + (* MBreturn *)
+ destruct bb' as [mhd' mbdy' mex']; simpl in *. subst.
+ inv TBC. inv TIC. inv H0.
+
+ assert (f0 = f) by congruence. subst f0.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
+ exploit exec_straight_body; eauto.
+ simpl. eauto.
+ intros EXEB. destruct EXEB as [l [MKEPI EXEB]].
+ assert (f1 = f) by congruence. subst f1.
+
+ repeat eexists.
+ rewrite app_nil_r in MKEPI. rewrite <- MKEPI in EXEB. eauto.
+ econstructor. simpl. reflexivity.
+ econstructor; eauto.
+ unfold incrPC. repeat apply agree_set_other; auto with asmgen.
+
+ - inv MCS. inv MAS. simpl in *. subst. inv Hpstate.
+ destruct bb' as [hd' bdy' ex']; simpl in *. subst.
+ monadInv TBC. monadInv TIC. simpl in *.
+ simpl. repeat eexists.
+ econstructor. econstructor. 4: instantiate (3 := false). all:eauto.
+ unfold incrPC. rewrite Pregmap.gss. unfold Val.offset_ptr. rewrite PCeq.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+ assert (f = f0) by congruence. subst f0. econstructor; eauto.
+ generalize (code_tail_next_int _ _ _ _ NOOV TAIL). intro CT1. eauto.
+ eapply agree_exten; eauto. intros. unfold incrPC; Simpl; auto.
+ discriminate.
+Qed.
+
+(* Handling the individual instructions of theorem (B) in the above diagram. A bit less cumbersome, but still tough *)
+Theorem step_simu_basic:
+ forall bb bb' s fb sp c ms m rs1 m1 ms' m' bi cs1 tbdy bdy,
+ MB.header bb = nil -> MB.body bb = bi::(bdy) ->
+ bb' = {| MB.header := nil; MB.body := bdy; MB.exit := MB.exit bb |} ->
+ basic_step ge s fb sp ms m bi ms' m' ->
+ pstate cs1 = (State rs1 m1) -> pbody1 cs1 = tbdy ->
+ match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+ (exists rs2 m2 l cs2 tbdy',
+ cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := tbdy'; pbody2 := pbody2 cs1;
+ pctl := pctl cs1; ep := it1_is_parent (ep cs1) bi; rem := rem cs1; cur := cur cs1 |}
+ /\ tbdy = l ++ tbdy'
+ /\ exec_body lk tge l rs1 m1 = Next rs2 m2
+ /\ match_codestate fb (MB.State s fb sp (bb'::c) ms' m') cs2).
+Proof.
+ intros until bdy. intros Hheader Hbody (* Hnotempty *) Hbb' BSTEP Hpstate Hpbody1 MCS. inv MCS.
+ simpl in *. inv Hpstate.
+ rewrite Hbody in TBC. monadInv TBC.
+ inv BSTEP.
+
+ - (* MBgetstack *)
+ simpl in EQ0.
+ unfold Mach.load_stack in H.
+ exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+ rewrite (sp_val _ _ _ AG) in A.
+ exploit loadind_correct; eauto with asmgen.
+ intros (rs2 & EXECS & Hrs'1 & Hrs'2).
+ eapply exec_straight_body in EXECS.
+ destruct EXECS as (l & Hlbi & EXECB).
+ exists rs2, m1, l.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
+ subst. simpl in Hheadereq.
+
+ eapply match_codestate_intro; eauto.
+ eapply agree_set_mreg; eauto with asmgen.
+ intro Hep. simpl in Hep. discriminate.
+ - (* MBsetstack *)
+ simpl in EQ0.
+ unfold Mach.store_stack in H.
+ assert (Val.lessdef (ms src) (rs1 (preg_of src))). { eapply preg_val; eauto. }
+ exploit Mem.storev_extends; eauto. intros [m2' [A B]].
+ exploit storeind_correct; eauto with asmgen.
+ rewrite (sp_val _ _ _ AG) in A. eauto. intros [rs' [P Q]].
+
+ eapply exec_straight_body in P.
+ destruct P as (l & ll & EXECB).
+ exists rs', m2', l.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ subst.
+ eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
+ eapply agree_undef_regs; eauto with asmgen.
+ simpl; intros. rewrite Q; auto with asmgen. rewrite Hheader in DXP. auto.
+ - (* MBgetparam *)
+ simpl in EQ0.
+
+ assert (f0 = f) by congruence; subst f0.
+ unfold Mach.load_stack in *.
+ exploit Mem.loadv_extends. eauto. eexact H0. auto.
+ intros [parent' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+ exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
+ exploit Mem.loadv_extends. eauto. eexact H1. auto.
+ intros [v' [C D]].
+
+ monadInv EQ0. rewrite Hheader. rewrite Hheader in DXP.
+ destruct ep0 eqn:EPeq.
+
+ (* X29 contains parent *)
+ + exploit loadind_correct. eexact EQ1.
+ instantiate (2 := rs1). rewrite DXP; eauto. discriminate.
+ intros [rs2 [P [Q R]]].
+
+ eapply exec_straight_body in P.
+ destruct P as (l & ll & EXECB).
+ exists rs2, m1, l. eexists.
+ eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. simpl. auto. }
+ subst.
+ eapply match_codestate_intro; eauto.
+
+ eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
+ simpl; intros. rewrite R; auto with asmgen. unfold preg_of.
+ apply preg_of_not_X29; auto.
+
+ (* X29 does not contain parent *)
+ + rewrite chunk_of_Tptr in A.
+ exploit loadptr_correct. eexact A. discriminate. intros [rs2 [P [Q R]]].
+ exploit loadind_correct. eexact EQ1. instantiate (2 := rs2). rewrite Q. eauto.
+ discriminate.
+ intros [rs3 [S [T U]]].
+
+ exploit exec_straight_trans.
+ eapply P.
+ eapply S.
+ intros EXES.
+
+ eapply exec_straight_body in EXES.
+ destruct EXES as (l & ll & EXECB).
+ exists rs3, m1, l.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+ subst.
+ eapply match_codestate_intro; eauto.
+ eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
+ instantiate (1 := rs2#X29 <- (rs3#X29)). intros.
+ rewrite Pregmap.gso; auto with asmgen.
+ congruence.
+ intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen.
+ simpl; intros. rewrite U; auto with asmgen.
+ apply preg_of_not_X29; auto.
+ - (* MBop *)
+ simpl in EQ0. rewrite Hheader in DXP.
+
+ assert (eval_operation tge sp op (map ms args) m' = Some v).
+ rewrite <- H. apply eval_operation_preserved. exact symbols_preserved.
+ exploit eval_operation_lessdef.
+ eapply preg_vals; eauto.
+ 2: eexact H0.
+ all: eauto.
+ intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+ exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
+
+ eapply exec_straight_body in P.
+ destruct P as (l & ll & EXECB).
+ exists rs2, m1, l.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ subst.
+ eapply match_codestate_intro; eauto. simpl. simpl in EQ. rewrite Hheader in EQ. auto.
+ apply agree_set_undef_mreg with rs1; auto.
+ apply Val.lessdef_trans with v'; auto.
+ simpl; intros. destruct (andb_prop _ _ H1); clear H1.
+ rewrite R; auto. apply preg_of_not_X29; auto.
+Local Transparent destroyed_by_op.
+ destruct op; simpl; auto; try discriminate.
+ - (* MBload *)
+ simpl in EQ0. rewrite Hheader in DXP.
+
+ assert (Op.eval_addressing tge sp addr (map ms args) = Some a).
+ rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+ exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+ intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+ exploit Mem.loadv_extends; eauto. intros [v' [C D]]. destruct trap; try discriminate.
+ exploit transl_load_correct; eauto.
+ intros [rs2 [P [Q R]]].
+
+ eapply exec_straight_body in P.
+ destruct P as (l & ll & EXECB).
+ exists rs2, m1, l.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+ subst.
+ eapply match_codestate_intro; eauto.
+ eapply agree_set_mreg; eauto with asmgen.
+ intro Hep. simpl in Hep. discriminate.
+ - (* MBload notrap1 *)
+ simpl in EQ0. unfold transl_load in EQ0. discriminate.
+ - (* MBload notrap2 *)
+ simpl in EQ0. unfold transl_load in EQ0. discriminate.
+ - (* MBstore *)
+ simpl in EQ0. rewrite Hheader in DXP.
+
+ assert (Op.eval_addressing tge sp addr (map ms args) = Some a).
+ rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
+ exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
+ intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
+ assert (Val.lessdef (ms src) (rs1 (preg_of src))). eapply preg_val; eauto.
+ exploit Mem.storev_extends; eauto. intros [m2' [C D]].
+ exploit transl_store_correct; eauto. intros [rs2 [P Q]].
+
+ eapply exec_straight_body in P.
+ destruct P as (l & ll & EXECB).
+ exists rs2, m2', l.
+ eexists. eexists. split. instantiate (1 := x). eauto.
+ repeat (split; auto).
+ remember {| MB.header := _; MB.body := _; MB.exit := _ |} as bb'.
+ assert (Hheadereq: MB.header bb' = MB.header bb). { subst. auto. }
+ subst.
+ eapply match_codestate_intro; eauto.
+ eapply agree_undef_regs; eauto with asmgen.
+ intro Hep. simpl in Hep. discriminate.
+Qed.
+
+Lemma exec_body_trans:
+ forall l l' rs0 m0 rs1 m1 rs2 m2,
+ exec_body lk tge l rs0 m0 = Next rs1 m1 ->
+ exec_body lk tge l' rs1 m1 = Next rs2 m2 ->
+ exec_body lk tge (l++l') rs0 m0 = Next rs2 m2.
+Proof.
+ induction l.
+ - simpl. induction l'. intros.
+ + simpl in *. congruence.
+ + intros. inv H. auto.
+ - intros until m2. intros EXEB1 EXEB2.
+ inv EXEB1. destruct (exec_basic _) eqn:EBI; try discriminate.
+ simpl. rewrite EBI. eapply IHl; eauto.
+Qed.
+
+Lemma exec_body_control:
+ forall b t rs1 m1 rs2 m2 rs3 m3 fn,
+ exec_body lk tge (body b) rs1 m1 = Next rs2 m2 ->
+ exec_exit tge fn (Ptrofs.repr (size b)) rs2 m2 (exit b) t rs3 m3 ->
+ exec_bblock lk tge fn b rs1 m1 t rs3 m3.
+Proof.
+ intros until fn. intros EXEB EXECTL.
+ econstructor; eauto.
+Qed.
+
+Inductive exec_header: codestate -> codestate -> Prop :=
+ | exec_header_cons: forall cs1,
+ exec_header cs1 {| pstate := pstate cs1; pheader := nil; pbody1 := pbody1 cs1; pbody2 := pbody2 cs1;
+ pctl := pctl cs1; ep := (if pheader cs1 then ep cs1 else false); rem := rem cs1;
+ cur := cur cs1 |}.
+
+(* Theorem (A) in the diagram, the easiest of all *)
+Theorem step_simu_header:
+ forall bb s fb sp c ms m rs1 m1 cs1,
+ pstate cs1 = (State rs1 m1) ->
+ match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+ (exists cs1',
+ exec_header cs1 cs1'
+ /\ match_codestate fb (MB.State s fb sp (mb_remove_header bb::c) ms m) cs1').
+Proof.
+ intros until cs1. intros Hpstate MCS.
+ eexists. split; eauto.
+ econstructor; eauto.
+ inv MCS. simpl in *. inv Hpstate.
+ econstructor; eauto.
+Qed.
+
+(* Theorem (B) in the diagram, using step_simu_basic + induction on the Machblock body *)
+Theorem step_simu_body:
+ forall bb s fb sp c ms m rs1 m1 ms' cs1 m',
+ MB.header bb = nil ->
+ body_step ge s fb sp (MB.body bb) ms m ms' m' ->
+ pstate cs1 = (State rs1 m1) ->
+ match_codestate fb (MB.State s fb sp (bb::c) ms m) cs1 ->
+ (exists rs2 m2 cs2 ep,
+ cs2 = {| pstate := (State rs2 m2); pheader := nil; pbody1 := nil; pbody2 := pbody2 cs1;
+ pctl := pctl cs1; ep := ep; rem := rem cs1; cur := cur cs1 |}
+ /\ exec_body lk tge (pbody1 cs1) rs1 m1 = Next rs2 m2
+ /\ match_codestate fb (MB.State s fb sp ({| MB.header := nil; MB.body := nil; MB.exit := MB.exit bb |}::c) ms' m') cs2).
+Proof.
+ intros bb. destruct bb as [hd bdy ex]; simpl; auto. induction bdy as [|bi bdy].
+ - intros until m'. intros Hheader BSTEP Hpstate MCS.
+ inv BSTEP.
+ exists rs1, m1, cs1, (ep cs1).
+ inv MCS. inv Hpstate. simpl in *. monadInv TBC. repeat (split; simpl; auto).
+ econstructor; eauto.
+ - intros until m'. intros Hheader BSTEP Hpstate MCS. inv BSTEP.
+ rename ms' into ms''. rename m' into m''. rename rs' into ms'. rename m'0 into m'.
+ exploit (step_simu_basic); eauto. simpl. eauto. simpl; auto. simpl; auto.
+ intros (rs2 & m2 & l & cs2 & tbdy' & Hcs2 & Happ & EXEB & MCS').
+ simpl in *.
+ exploit IHbdy. auto. eapply H6. 2: eapply MCS'. all: eauto. subst; eauto. simpl; auto.
+ intros (rs3 & m3 & cs3 & ep & Hcs3 & EXEB' & MCS'').
+ exists rs3, m3, cs3, ep.
+ repeat (split; simpl; auto). subst. simpl in *. auto.
+ rewrite Happ. eapply exec_body_trans; eauto. rewrite Hcs2 in EXEB'; simpl in EXEB'. auto.
+Qed.
+
+(* Bringing theorems (A), (B) and (C) together, for the case of the absence of builtin instruction *)
+(* This more general form is easier to prove, but the actual theorem is step_simulation_bblock further below *)
+Lemma step_simulation_bblock':
+ forall t sf f sp bb bb' bb'' rs m rs' m' s'' c S1,
+ bb' = mb_remove_header bb ->
+ body_step ge sf f sp (Machblock.body bb') rs m rs' m' ->
+ bb'' = mb_remove_body bb' ->
+ exit_step return_address_offset ge (Machblock.exit bb'') (Machblock.State sf f sp (bb'' :: c) rs' m') t s'' ->
+ match_states (Machblock.State sf f sp (bb :: c) rs m) S1 ->
+ exists S2 : state, plus (step lk) tge S1 t S2 /\ match_states s'' S2.
+Proof.
+ intros until S1. intros Hbb' BSTEP Hbb'' ESTEP MS.
+ destruct (mbsize bb) eqn:SIZE.
+ - apply mbsize_eqz in SIZE. destruct SIZE as (Hbody & Hexit).
+ destruct bb as [hd bdy ex]; simpl in *; subst.
+ inv MS. inv AT. exploit transl_blocks_nonil; eauto. intros (tbb & tc' & Htc). subst. rename tc' into tc.
+ monadInv H2. simpl in *. inv ESTEP. inv BSTEP.
+ eexists. split.
+ + eapply plus_one.
+ exploit functions_translated; eauto. intros (tf0 & FIND' & TRANSF'). monadInv TRANSF'.
+ assert (x = tf) by congruence. subst x.
+ eapply exec_step_internal; eauto. eapply find_bblock_tail; eauto.
+ unfold exec_bblock. simpl.
+ eexists; eexists; split; eauto.
+ econstructor.
+ + econstructor.
+ 1,2,3: eauto.
+ *
+ unfold incrPC. rewrite Pregmap.gss.
+ unfold Val.offset_ptr. rewrite <- H.
+ assert (NOOV: size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned).
+ { eapply transf_function_no_overflow; eauto. }
+ econstructor; eauto.
+ generalize (code_tail_next_int _ _ _ _ NOOV H3). intro CT1. eauto.
+ *
+ eapply agree_exten; eauto. intros. unfold incrPC. rewrite Pregmap.gso; auto.
+ unfold data_preg in H2. destruct r; try congruence.
+ *
+ intros. discriminate.
+ - subst. exploit mbsize_neqz. { instantiate (1 := bb). rewrite SIZE. discriminate. }
+ intros Hnotempty.
+
+ (* initial setting *)
+ exploit match_state_codestate.
+ eapply Hnotempty.
+ all: eauto.
+ intros (cs1 & fb & f0 & tbb & tc & ep & MCS & MAS & FIND & TLBS & Hbody & Hexit & Hcur & Hrem & Hpstate).
+
+ (* step_simu_header part *)
+ assert (exists rs1 m1, pstate cs1 = State rs1 m1). { inv MAS. simpl. eauto. }
+ destruct H as (rs1 & m1 & Hpstate2). subst.
+ assert (f = fb). { inv MCS. auto. } subst fb.
+ exploit step_simu_header.
+ 2: eapply MCS.
+ all: eauto.
+ intros (cs1' & EXEH & MCS2).
+
+ (* step_simu_body part *)
+ assert (Hpstate': pstate cs1' = pstate cs1). { inv EXEH; auto. }
+ exploit step_simu_body.
+ 2: eapply BSTEP.
+ 3: eapply MCS2.
+ all: eauto. rewrite Hpstate'. eauto.
+ intros (rs2 & m2 & cs2 & ep' & Hcs2 & EXEB & MCS').
+
+ (* step_simu_control part *)
+ assert (exists tf, Genv.find_funct_ptr tge f = Some (Internal tf)).
+ { exploit functions_translated; eauto. intros (tf & FIND' & TRANSF'). monadInv TRANSF'. eauto. }
+ destruct H as (tf & FIND').
+ inv EXEH. simpl in *.
+ subst. exploit step_simu_control.
+ 8: eapply MCS'. all: simpl.
+ 9: eapply ESTEP.
+ all: simpl; eauto.
+ { inv MAS; simpl in *. inv Hpstate2. eapply match_asmstate_some; eauto.
+ erewrite exec_body_pc; eauto. }
+ intros (rs3 & m3 & rs4 & m4 & EXEB' & EXECTL' & MS').
+
+ (* bringing the pieces together *)
+ exploit exec_body_trans.
+ eapply EXEB.
+ eauto.
+ intros EXEB2.
+ exploit exec_body_control; eauto.
+ rewrite <- Hbody in EXEB2. eauto.
+ rewrite Hexit. eauto.
+ intros EXECB. (* inv EXECB. *)
+ exists (State rs4 m4).
+ split; auto. eapply plus_one. rewrite Hpstate2.
+ assert (exists ofs, rs1 PC = Vptr f ofs).
+ { rewrite Hpstate2 in MAS. inv MAS. simpl in *. eauto. }
+ destruct H as (ofs & Hrs1pc).
+ eapply exec_step_internal; eauto.
+
+ (* proving the initial find_bblock *)
+ rewrite Hpstate2 in MAS. inv MAS. simpl in *.
+ assert (f1 = f0) by congruence. subst f0.
+ rewrite PCeq in Hrs1pc. inv Hrs1pc.
+ exploit functions_translated; eauto. intros (tf1 & FIND'' & TRANS''). rewrite FIND' in FIND''.
+ inv FIND''. monadInv TRANS''. rewrite TRANSF0 in EQ. inv EQ.
+ eapply find_bblock_tail; eauto.
+Qed.
+
+Theorem step_simulation_bblock:
+ forall t sf f sp bb ms m ms' m' S2 c,
+ body_step ge sf f sp (Machblock.body bb) ms m ms' m' ->
+ exit_step return_address_offset ge (Machblock.exit bb) (Machblock.State sf f sp (bb :: c) ms' m') t S2 ->
+ forall S1', match_states (Machblock.State sf f sp (bb :: c) ms m) S1' ->
+ exists S2' : state, plus (step lk) tge S1' t S2' /\ match_states S2 S2'.
+Proof.
+ intros until c. intros BSTEP ESTEP S1' MS.
+ eapply step_simulation_bblock'; eauto.
+ all: destruct bb as [hd bdy ex]; simpl in *; eauto.
+ inv ESTEP.
+ - econstructor. inv H; try (econstructor; eauto; fail).
+ - econstructor.
+Qed.
+
+(* Measure to prove finite stuttering, see the other backends *)
+Definition measure (s: MB.state) : nat :=
+ match s with
+ | MB.State _ _ _ _ _ _ => 0%nat
+ | MB.Callstate _ _ _ _ => 0%nat
+ | MB.Returnstate _ _ _ => 1%nat
+ end.
+
+Lemma next_sep:
+ forall rs m rs' m', rs = rs' -> m = m' -> Next rs m = Next rs' m'.
+Proof.
+ congruence.
+Qed.
+
+(* The actual MB.step/AB.step simulation, using the above theorems, plus extra proofs
+ for the internal and external function cases *)
+Theorem step_simulation:
+ forall S1 t S2, MB.step return_address_offset ge S1 t S2 ->
+ forall S1' (MS: match_states S1 S1'),
+ (exists S2', plus (step lk) tge S1' t S2' /\ match_states S2 S2')
+ \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
+Proof.
+ induction 1; intros.
+
+- (* bblock *)
+ left. destruct (Machblock.exit bb) eqn:MBE; try destruct c0.
+ all: try(inversion H0; subst; inv H2; eapply step_simulation_bblock;
+ try (rewrite MBE; try discriminate); eauto).
+ + inversion H0. subst. eapply step_simulation_bblock; try (rewrite MBE; try discriminate); eauto.
+- (* internal function *)
+ inv MS.
+ exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
+ generalize EQ; intros EQ'. monadInv EQ'.
+ destruct (zlt Ptrofs.max_unsigned (size_blocks x0.(fn_blocks))); inversion EQ1. clear EQ1. subst x0.
+ unfold Mach.store_stack in *.
+ exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
+ intros [m1' [C D]].
+ exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
+ intros [m2' [F G]].
+ simpl chunk_of_type in F.
+ exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
+ intros [m3' [P Q]].
+ (* Execution of function prologue *)
+ monadInv EQ0.
+ set (tfbody := make_prologue f x0) in *.
+ set (tf := {| fn_sig := MB.fn_sig f; fn_blocks := tfbody |}) in *.
+ set (rs2 := rs0#X29 <- (parent_sp s) #SP <- sp #X16 <- Vundef).
+ exploit (storeptr_correct lk tge XSP (fn_retaddr_ofs f) RA nil m2' m3' rs2).
+ { rewrite chunk_of_Tptr in P.
+ assert (rs0 X30 = rs2 RA) by auto.
+ rewrite <- H3.
+ rewrite ATLR.
+ change (rs2 XSP) with sp. eexact P. }
+ 1-2: discriminate.
+ intros (rs3 & U & V).
+ assert (EXEC_PROLOGUE: exists rs3',
+ exec_straight_blocks tge lk tf
+ tf.(fn_blocks) rs0 m'
+ x0 rs3' m3'
+ /\ forall r, r <> PC -> r <> X16 -> rs3' r = rs3 r).
+ { eexists. split.
+ - change (fn_blocks tf) with tfbody; unfold tfbody.
+ econstructor; eauto.
+ assert (Archi.ptr64 = true) as SF; auto.
+ + unfold exec_bblock. simpl exec_body.
+ rewrite C. fold sp. rewrite <- (sp_val _ _ _ AG). rewrite chunk_of_Tptr in F.
+ assert (Mptr = Mint64) by auto. rewrite H3 in F. simpl in F. rewrite F. simpl.
+ unfold exec_store_rs_a. repeat Simpl; try discriminate.
+ exists rs2. exists m3'. split.
+ * unfold eval_addressing. Simpl; try discriminate. rewrite Pregmap.gss.
+ rewrite chunk_of_Tptr in P. rewrite H3 in P.
+ unfold Val.addl. unfold Val.offset_ptr in P.
+ destruct sp; simpl; try discriminate. rewrite SF; simpl.
+ rewrite Ptrofs.of_int64_to_int64. unfold Mem.storev in P. rewrite ATLR.
+ rewrite P. simpl. apply next_sep; eauto. apply SF.
+ * econstructor.
+ + eauto.
+ - intros. unfold incrPC.
+ rewrite Pregmap.gso; auto. rewrite V; auto.
+ } destruct EXEC_PROLOGUE as (rs3' & EXEC_PROLOGUE & Heqrs3').
+ exploit exec_straight_steps_2; eauto using functions_transl.
+ simpl fn_blocks. simpl fn_blocks in g. lia. constructor.
+ intros (ofs' & X & Y).
+ left; exists (State rs3' m3'); split.
+ eapply exec_straight_steps_1; eauto.
+ simpl fn_blocks. simpl fn_blocks in g. lia.
+ constructor.
+ econstructor; eauto.
+ rewrite X; econstructor; eauto.
+ apply agree_exten with rs2; eauto with asmgen.
+ unfold rs2.
+ apply agree_set_other; auto with asmgen.
+ apply agree_change_sp with (parent_sp s).
+ apply agree_undef_regs with rs0. auto.
+Local Transparent destroyed_at_function_entry.
+ simpl; intros; Simpl. auto.
+ assert (r' <> X29). { contradict H3; rewrite H3; unfold data_preg; auto. } auto.
+ unfold sp; congruence.
+
+ intros.
+
+ rewrite Heqrs3'. rewrite V. 2-5: try apply X16_not_data_preg; try apply PC_not_data_preg; auto.
+ auto.
+ intros. rewrite Heqrs3'; try discriminate. rewrite V by auto with asmgen. reflexivity.
+- (* external function *)
+ inv MS.
+ exploit functions_translated; eauto.
+ intros [tf [A B]]. simpl in B. inv B.
+ exploit extcall_arguments_match; eauto.
+ intros [args' [C D]].
+ exploit external_call_mem_extends; eauto.
+ intros [res' [m2' [P [Q [R S]]]]].
+ left; econstructor; split.
+ apply plus_one. eapply exec_step_external; eauto.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+ econstructor; eauto.
+ unfold loc_external_result.
+ apply agree_set_other; auto.
+ apply agree_set_pair; auto.
+ apply agree_undef_caller_save_regs; auto.
+
+- (* return *)
+ inv MS.
+ inv STACKS. simpl in *.
+ right. split. lia. split. auto.
+ rewrite <- ATPC in H5.
+ econstructor; eauto. congruence.
+Qed.
+
+Lemma transf_initial_states:
+ forall st1, MB.initial_state prog st1 ->
+ exists st2, AB.initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+ intros. inversion H. unfold ge0 in *.
+ econstructor; split.
+ econstructor.
+ eapply (Genv.init_mem_transf_partial TRANSF); eauto.
+ replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero)
+ with (Vptr fb Ptrofs.zero).
+ econstructor; eauto.
+ constructor.
+ apply Mem.extends_refl.
+ split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
+ intros. rewrite Mach.Regmap.gi. auto.
+ unfold Genv.symbol_address.
+ rewrite (match_program_main TRANSF).
+ rewrite symbols_preserved.
+ unfold ge; rewrite H1. auto.
+Qed.
+
+Lemma transf_final_states:
+ forall st1 st2 r,
+ match_states st1 st2 -> MB.final_state st1 r -> AB.final_state st2 r.
+Proof.
+ intros. inv H0. inv H. constructor. assumption.
+ compute in H1. inv H1.
+ generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
+Qed.
+
+Definition return_address_offset : Machblock.function -> Machblock.code -> ptrofs -> Prop :=
+ Asmblockgenproof0.return_address_offset.
+
+Lemma transf_program_correct:
+ forward_simulation (MB.semantics return_address_offset prog) (AB.semantics lk tprog).
+Proof.
+ eapply forward_simulation_star with (measure := measure).
+ - apply senv_preserved.
+ - eexact transf_initial_states.
+ - eexact transf_final_states.
+ - exact step_simulation.
+Qed.
+
+End PRESERVATION.
diff --git a/aarch64/Asmblockgenproof0.v b/aarch64/Asmblockgenproof0.v
new file mode 100644
index 00000000..004cfd5c
--- /dev/null
+++ b/aarch64/Asmblockgenproof0.v
@@ -0,0 +1,885 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * "block" version of Asmgenproof0
+
+ This module is largely adapted from Asmgenproof0.v of the other backends
+ It needs to stand apart because of the block structure, and the distinction control/basic that there isn't in the other backends
+ It has similar definitions than Asmgenproof0, but adapted to this new structure *)
+
+Require Import Coqlib.
+Require Intv.
+Require Import AST.
+Require Import Errors.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Events.
+Require Import Smallstep.
+Require Import Locations.
+Require Import Machblock.
+Require Import Asmblock.
+Require Import Asmblockgen.
+Require Import Conventions1.
+Require Import Axioms.
+Require Import Asmblockprops.
+Require Import Lia.
+
+Module MB:=Machblock.
+Module AB:=Asmblock.
+
+(** * Agreement between Mach registers and processor registers *)
+
+Hint Extern 2 (_ <> _) => congruence: asmgen.
+
+Lemma ireg_of_eq:
+ forall r r', ireg_of r = OK r' -> preg_of r = IR r'.
+Proof.
+ unfold ireg_of; intros. destruct (preg_of r) as [[[rr1|]|]|xsp|]; inv H; auto.
+Qed.
+
+Lemma freg_of_eq:
+ forall r r', freg_of r = OK r' -> preg_of r = FR r'.
+Proof.
+ unfold freg_of; intros. destruct (preg_of r) as [[fr|]|xsp|]; inv H; auto.
+Qed.
+
+Lemma ireg_of_eq':
+ forall r r', ireg_of r = OK r' -> dreg_of r = IR r'.
+Proof.
+ unfold ireg_of; intros. destruct r; simpl in *; inv H; auto.
+Qed.
+
+Lemma freg_of_eq':
+ forall r r', freg_of r = OK r' -> dreg_of r = FR r'.
+Proof.
+ unfold freg_of; intros. destruct r; simpl in *; inv H; auto.
+Qed.
+
+Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop :=
+ match rl with
+ | nil => True
+ | r1 :: nil => r <> preg_of r1
+ | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl
+ end.
+
+Remark preg_notin_charact:
+ forall r rl,
+ preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr).
+Proof.
+ induction rl; simpl; intros.
+ tauto.
+ destruct rl.
+ simpl. split. intros. intuition congruence. auto.
+ rewrite IHrl. split.
+ intros [A B]. intros. destruct H. congruence. auto.
+ auto.
+Qed.
+
+Record agree (ms: Mach.regset) (sp: val) (rs: AB.regset) : Prop := mkagree {
+ agree_sp: rs#SP = sp;
+ agree_sp_def: sp <> Vundef;
+ agree_mregs: forall r: mreg, Val.lessdef (ms r) (rs#(preg_of r))
+}.
+
+Lemma agree_exten:
+ forall ms sp rs rs',
+ agree ms sp rs ->
+ (forall r, data_preg r = true -> rs'#r = rs#r) ->
+ agree ms sp rs'.
+Proof.
+ intros. destruct H. split; auto.
+ rewrite H0; auto. auto.
+ intros. rewrite H0; auto. apply preg_of_data.
+Qed.
+
+Lemma preg_val:
+ forall ms sp rs r, agree ms sp rs -> Val.lessdef (ms r) rs#(preg_of r).
+Proof.
+ intros. destruct H. auto.
+Qed.
+
+Lemma preg_vals:
+ forall ms sp rs, agree ms sp rs ->
+ forall l, Val.lessdef_list (map ms l) (map rs (map preg_of l)).
+Proof.
+ induction l; simpl. constructor. constructor. eapply preg_val; eauto. auto.
+Qed.
+
+Lemma preg_of_injective:
+ forall r1 r2, preg_of r1 = preg_of r2 -> r1 = r2.
+Proof.
+ destruct r1; destruct r2; simpl; intros; reflexivity || discriminate.
+Qed.
+
+Lemma sp_val:
+ forall ms sp rs, agree ms sp rs -> sp = rs#SP.
+Proof.
+ intros. destruct H; auto.
+Qed.
+
+Lemma ireg_val:
+ forall ms sp rs r r',
+ agree ms sp rs ->
+ ireg_of r = OK r' ->
+ Val.lessdef (ms r) rs#r'.
+Proof.
+ intros. rewrite <- (ireg_of_eq _ _ H0). eapply preg_val; eauto.
+Qed.
+
+Lemma preg_of_not_X29: forall dst,
+ negb (mreg_eq dst R29) = true ->
+ DR (IR X29) <> preg_of dst.
+Proof.
+ intros. destruct dst; try discriminate.
+Qed.
+
+Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen.
+
+(** Preservation of register agreement under various assignments. *)
+
+Lemma agree_set_mreg:
+ forall ms sp rs r v rs',
+ agree ms sp rs ->
+ Val.lessdef v (rs'#(preg_of r)) ->
+ (forall r', data_preg r' = true -> r' <> preg_of r -> rs'#r' = rs#r') ->
+ agree (Mach.Regmap.set r v ms) sp rs'.
+Proof.
+ intros. destruct H. split; auto.
+ rewrite H1; auto. apply not_eq_sym. apply preg_of_not_SP.
+ intros. unfold Mach.Regmap.set. destruct (Mach.RegEq.eq r0 r). congruence.
+ rewrite H1. auto. apply preg_of_data.
+ red; intros; elim n. eapply preg_of_injective; eauto.
+Qed.
+
+Corollary agree_set_mreg_parallel:
+ forall ms sp rs r v v',
+ agree ms sp rs ->
+ Val.lessdef v v' ->
+ agree (Mach.Regmap.set r v ms) sp (Pregmap.set (preg_of r) v' rs).
+Proof.
+ intros. eapply agree_set_mreg; eauto. rewrite Pregmap.gss; auto. intros; apply Pregmap.gso; auto.
+Qed.
+
+Lemma agree_set_other:
+ forall ms sp rs r v,
+ agree ms sp rs ->
+ data_preg r = false ->
+ agree ms sp (rs#r <- v).
+Proof.
+ intros. apply agree_exten with rs. auto.
+ intros. apply Pregmap.gso. congruence.
+Qed.
+
+Lemma agree_nextblock:
+ forall ms sp rs b,
+ agree ms sp rs -> agree ms sp (incrPC (Ptrofs.repr (size b)) rs).
+Proof.
+ intros. unfold incrPC. apply agree_set_other. auto. auto.
+Qed.
+
+Lemma agree_set_pair:
+ forall sp p v v' ms rs,
+ agree ms sp rs ->
+ Val.lessdef v v' ->
+ agree (Mach.set_pair p v ms) sp (set_pair (map_rpair preg_of p) v' rs).
+Proof.
+ intros. destruct p; simpl.
+ - apply agree_set_mreg_parallel; auto.
+ - apply agree_set_mreg_parallel. apply agree_set_mreg_parallel; auto.
+ apply Val.hiword_lessdef; auto. apply Val.loword_lessdef; auto.
+Qed.
+
+Lemma agree_set_res:
+ forall res ms sp rs v v',
+ agree ms sp rs ->
+ Val.lessdef v v' ->
+ agree (Mach.set_res res v ms) sp (set_res (map_builtin_res DR (map_builtin_res dreg_of res)) v' rs).
+Proof.
+ induction res; simpl; intros.
+ - eapply agree_set_mreg; eauto. rewrite Pregmap.gss. auto.
+ intros. apply Pregmap.gso; auto.
+ - auto.
+ - apply IHres2. apply IHres1. auto.
+ apply Val.hiword_lessdef; auto.
+ apply Val.loword_lessdef; auto.
+Qed.
+
+Lemma agree_undef_regs:
+ forall ms sp rl rs rs',
+ agree ms sp rs ->
+ (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') ->
+ agree (Mach.undef_regs rl ms) sp rs'.
+Proof.
+ intros. destruct H. split; auto.
+ rewrite <- agree_sp0. apply H0; auto.
+ rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP.
+ intros. destruct (In_dec mreg_eq r rl).
+ rewrite Mach.undef_regs_same; auto.
+ rewrite Mach.undef_regs_other; auto. rewrite H0; auto.
+ apply preg_of_data.
+ rewrite preg_notin_charact. intros; red; intros. elim n.
+ exploit preg_of_injective; eauto. congruence.
+Qed.
+
+Lemma agree_set_undef_mreg:
+ forall ms sp rs r v rl rs',
+ agree ms sp rs ->
+ Val.lessdef v (rs'#(preg_of r)) ->
+ (forall r', data_preg r' = true -> r' <> preg_of r -> preg_notin r' rl -> rs'#r' = rs#r') ->
+ agree (Mach.Regmap.set r v (Mach.undef_regs rl ms)) sp rs'.
+Proof.
+ intros. apply agree_set_mreg with (rs'#(preg_of r) <- (rs#(preg_of r))); auto.
+ apply agree_undef_regs with rs; auto.
+ intros. unfold Pregmap.set. destruct (PregEq.eq r' (preg_of r)).
+ congruence. auto.
+ intros. rewrite Pregmap.gso; auto.
+Qed.
+
+Lemma agree_undef_caller_save_regs:
+ forall ms sp rs,
+ agree ms sp rs ->
+ agree (Mach.undef_caller_save_regs ms) sp (undef_caller_save_regs rs).
+Proof.
+ intros. destruct H. unfold Mach.undef_caller_save_regs, undef_caller_save_regs; split.
+ - unfold proj_sumbool; rewrite dec_eq_true. auto.
+ - auto.
+ - intros. unfold proj_sumbool. rewrite dec_eq_false by (apply preg_of_not_SP).
+ destruct (List.in_dec preg_eq (preg_of r) (List.map preg_of (List.filter is_callee_save all_mregs))); simpl.
+ + apply list_in_map_inv in i. destruct i as (mr & A & B).
+ assert (r = mr) by (apply preg_of_injective; auto). subst mr; clear A.
+ apply List.filter_In in B. destruct B as [C D]. rewrite D. auto.
+ + destruct (is_callee_save r) eqn:CS; auto.
+ elim n. apply List.in_map. apply List.filter_In. auto using all_mregs_complete.
+Qed.
+
+Lemma agree_change_sp:
+ forall ms sp rs sp',
+ agree ms sp rs -> sp' <> Vundef ->
+ agree ms sp' (rs#SP <- sp').
+Proof.
+ intros. inv H. split; auto.
+ intros. rewrite Pregmap.gso; auto with asmgen.
+Qed.
+
+Remark builtin_arg_match:
+ forall ge (rs: regset) sp m a v,
+ eval_builtin_arg ge (fun r => rs (dreg_of r)) sp m a v ->
+ eval_builtin_arg ge (fun r => rs (DR r)) sp m (map_builtin_arg dreg_of a) v.
+Proof.
+ induction 1; simpl; eauto with barg. econstructor.
+Qed.
+
+Lemma builtin_args_match:
+ forall ge ms sp rs m m', agree ms sp rs -> Mem.extends m m' ->
+ forall al vl, eval_builtin_args ge ms sp m al vl ->
+ exists vl', eval_builtin_args ge (fun r => rs (DR r)) sp m' (map (map_builtin_arg dreg_of) al) vl'
+ /\ Val.lessdef_list vl vl'.
+Proof.
+ induction 3; intros; simpl.
+ exists (@nil val); split; constructor.
+ exploit (@eval_builtin_arg_lessdef _ ge ms (fun r => rs (preg_of r))); eauto.
+ intros; eapply preg_val; eauto.
+ intros (v1' & A & B).
+ destruct IHlist_forall2 as [vl' [C D]].
+ exists (v1' :: vl'); split; constructor; auto. apply builtin_arg_match; auto.
+Qed.
+
+(** Connection between Mach and Asm calling conventions for external
+ functions. *)
+
+Lemma extcall_arg_match:
+ forall ms sp rs m m' l v,
+ agree ms sp rs ->
+ Mem.extends m m' ->
+ Mach.extcall_arg ms m sp l v ->
+ exists v', extcall_arg rs m' l v' /\ Val.lessdef v v'.
+Proof.
+ intros. inv H1.
+ exists (rs#(preg_of r)); split. constructor. eapply preg_val; eauto.
+ unfold Mach.load_stack in H2.
+ exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+ rewrite (sp_val _ _ _ H) in A.
+ exists v'; split; auto.
+ econstructor. eauto. assumption.
+Qed.
+
+Lemma extcall_arg_pair_match:
+ forall ms sp rs m m' p v,
+ agree ms sp rs ->
+ Mem.extends m m' ->
+ Mach.extcall_arg_pair ms m sp p v ->
+ exists v', extcall_arg_pair rs m' p v' /\ Val.lessdef v v'.
+Proof.
+ intros. inv H1.
+ - exploit extcall_arg_match; eauto. intros (v' & A & B). exists v'; split; auto. constructor; auto.
+ - exploit extcall_arg_match. eauto. eauto. eexact H2. intros (v1 & A1 & B1).
+ exploit extcall_arg_match. eauto. eauto. eexact H3. intros (v2 & A2 & B2).
+ exists (Val.longofwords v1 v2); split. constructor; auto. apply Val.longofwords_lessdef; auto.
+Qed.
+
+Lemma extcall_args_match:
+ forall ms sp rs m m', agree ms sp rs -> Mem.extends m m' ->
+ forall ll vl,
+ list_forall2 (Mach.extcall_arg_pair ms m sp) ll vl ->
+ exists vl', list_forall2 (extcall_arg_pair rs m') ll vl' /\ Val.lessdef_list vl vl'.
+Proof.
+ induction 3; intros.
+ exists (@nil val); split. constructor. constructor.
+ exploit extcall_arg_pair_match; eauto. intros [v1' [A B]].
+ destruct IHlist_forall2 as [vl' [C D]].
+ exists (v1' :: vl'); split; constructor; auto.
+Qed.
+
+Lemma extcall_arguments_match:
+ forall ms m m' sp rs sg args,
+ agree ms sp rs -> Mem.extends m m' ->
+ Mach.extcall_arguments ms m sp sg args ->
+ exists args', extcall_arguments rs m' sg args' /\ Val.lessdef_list args args'.
+Proof.
+ unfold Mach.extcall_arguments, extcall_arguments; intros.
+ eapply extcall_args_match; eauto.
+Qed.
+
+Lemma set_res_other:
+ forall r res v rs,
+ data_preg r = false ->
+ set_res (map_builtin_res DR (map_builtin_res dreg_of res)) v rs r = rs r.
+Proof.
+ induction res; simpl; intros.
+ - apply Pregmap.gso. red; intros; subst r. rewrite dreg_of_data in H; discriminate.
+ - auto.
+ - rewrite IHres2, IHres1; auto.
+Qed.
+
+Lemma undef_regs_other:
+ forall r rl rs,
+ (forall r', In r' rl -> r <> r') ->
+ undef_regs rl rs r = rs r.
+Proof.
+ induction rl; simpl; intros. auto.
+ rewrite IHrl by auto. rewrite Pregmap.gso; auto.
+Qed.
+
+Lemma undef_regs_other_2:
+ forall r rl rs,
+ preg_notin r rl ->
+ undef_regs (map preg_of rl) rs r = rs r.
+Proof.
+ intros. apply undef_regs_other. intros.
+ exploit list_in_map_inv; eauto. intros [mr [A B]]. subst.
+ rewrite preg_notin_charact in H. auto.
+Qed.
+
+Inductive code_tail: Z -> bblocks -> bblocks -> Prop :=
+ | code_tail_0: forall c,
+ code_tail 0 c c
+ | code_tail_S: forall pos bi c1 c2,
+ code_tail pos c1 c2 ->
+ code_tail (pos + (size bi)) (bi :: c1) c2.
+
+Lemma code_tail_pos:
+ forall pos c1 c2, code_tail pos c1 c2 -> pos >= 0.
+Proof.
+ induction 1. lia. generalize (size_positive bi); intros; lia.
+Qed.
+
+Lemma find_bblock_tail:
+ forall c1 bi c2 pos,
+ code_tail pos c1 (bi :: c2) ->
+ find_bblock pos c1 = Some bi.
+Proof.
+ induction c1; simpl; intros.
+ inversion H.
+ destruct (zlt pos 0). generalize (code_tail_pos _ _ _ H); intro; lia.
+ destruct (zeq pos 0). subst pos.
+ inv H. auto. generalize (size_positive a) (code_tail_pos _ _ _ H4). intro; lia.
+ inv H. congruence. replace (pos0 + size a - size a) with pos0 by lia.
+ eauto.
+Qed.
+
+Local Hint Resolve code_tail_0 code_tail_S: core.
+
+Lemma code_tail_next:
+ forall fn ofs c0,
+ code_tail ofs fn c0 ->
+ forall bi c1, c0 = bi :: c1 -> code_tail (ofs + (size bi)) fn c1.
+Proof.
+ induction 1; intros.
+ - subst; eauto.
+ - replace (pos + size bi + size bi0) with ((pos + size bi0) + size bi); eauto.
+ lia.
+Qed.
+
+Lemma size_blocks_pos c: 0 <= size_blocks c.
+Proof.
+ induction c as [| a l ]; simpl; try lia.
+ generalize (size_positive a); lia.
+Qed.
+
+Remark code_tail_positive:
+ forall fn ofs c,
+ code_tail ofs fn c -> 0 <= ofs.
+Proof.
+ induction 1; intros; simpl.
+ - lia.
+ - generalize (size_positive bi). lia.
+Qed.
+
+Remark code_tail_size:
+ forall fn ofs c,
+ code_tail ofs fn c -> size_blocks fn = ofs + size_blocks c.
+Proof.
+ induction 1; intros; simpl; try lia.
+Qed.
+
+Remark code_tail_bounds fn ofs c:
+ code_tail ofs fn c -> 0 <= ofs <= size_blocks fn.
+Proof.
+ intro H;
+ exploit code_tail_size; eauto.
+ generalize (code_tail_positive _ _ _ H), (size_blocks_pos c).
+ lia.
+Qed.
+
+Local Hint Resolve code_tail_next: core.
+
+Lemma code_tail_next_int:
+ forall fn ofs bi c,
+ size_blocks fn <= Ptrofs.max_unsigned ->
+ code_tail (Ptrofs.unsigned ofs) fn (bi :: c) ->
+ code_tail (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bi)))) fn c.
+Proof.
+ intros.
+ exploit code_tail_size; eauto.
+ simpl; generalize (code_tail_positive _ _ _ H0), (size_positive bi), (size_blocks_pos c).
+ intros.
+ rewrite Ptrofs.add_unsigned, Ptrofs.unsigned_repr.
+ - rewrite Ptrofs.unsigned_repr; eauto.
+ lia.
+ - rewrite Ptrofs.unsigned_repr; lia.
+Qed.
+
+(** The [find_label] function returns the code tail starting at the
+ given label. A connection with [code_tail] is then established. *)
+
+Fixpoint find_label (lbl: label) (c: bblocks) {struct c} : option bblocks :=
+ match c with
+ | nil => None
+ | bb1 :: bbl => if is_label lbl bb1 then Some c else find_label lbl bbl
+ end.
+
+(* inspired from Mach *)
+
+Lemma find_label_tail:
+ forall lbl c c', MB.find_label lbl c = Some c' -> is_tail c' c.
+Proof.
+ induction c; simpl; intros. discriminate.
+ destruct (MB.is_label lbl a). inv H. auto with coqlib. eauto with coqlib.
+Qed.
+
+Lemma label_pos_code_tail:
+ forall lbl c pos c',
+ find_label lbl c = Some c' ->
+ exists pos',
+ label_pos lbl pos c = Some pos'
+ /\ code_tail (pos' - pos) c c'
+ /\ pos <= pos' <= pos + size_blocks c.
+Proof.
+ induction c.
+ simpl; intros. discriminate.
+ simpl; intros until c'.
+ case (is_label lbl a).
+ - intros. inv H. exists pos. split; auto. split.
+ replace (pos - pos) with 0 by lia. constructor. constructor; try lia.
+ generalize (size_blocks_pos c). generalize (size_positive a). lia.
+ - intros. generalize (IHc (pos+size a) c' H). intros [pos' [A [B C]]].
+ exists pos'. split. auto. split.
+ replace (pos' - pos) with ((pos' - (pos + (size a))) + (size a)) by lia.
+ constructor. auto. generalize (size_positive a). lia.
+Qed.
+
+(** Predictor for return addresses in generated Asm code.
+
+ The [return_address_offset] predicate defined here is used in the
+ semantics for Mach to determine the return addresses that are
+ stored in activation records. *)
+
+(** Consider a Mach function [f] and a sequence [c] of Mach instructions
+ representing the Mach code that remains to be executed after a
+ function call returns. The predicate [return_address_offset f c ofs]
+ holds if [ofs] is the integer offset of the PPC instruction
+ following the call in the Asm code obtained by translating the
+ code of [f]. Graphically:
+<<
+ Mach function f |--------- Mcall ---------|
+ Mach code c | |--------|
+ | \ \
+ | \ \
+ | \ \
+ Asm code | |--------|
+ Asm function |------------- Pcall ---------|
+
+ <-------- ofs ------->
+>>
+*)
+
+Definition return_address_offset (f: MB.function) (c: MB.code) (ofs: ptrofs) : Prop :=
+ forall tf tc,
+ transf_function f = OK tf ->
+ transl_blocks f c false = OK tc ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc.
+
+Lemma transl_blocks_tail:
+ forall f c1 c2, is_tail c1 c2 ->
+ forall tc2 ep2, transl_blocks f c2 ep2 = OK tc2 ->
+ exists tc1, exists ep1, transl_blocks f c1 ep1 = OK tc1 /\ is_tail tc1 tc2.
+Proof.
+ induction 1; simpl; intros.
+ exists tc2; exists ep2; split; auto with coqlib.
+ monadInv H0. exploit IHis_tail; eauto. intros (tc1 & ep1 & A & B).
+ exists tc1; exists ep1; split. auto.
+ eapply is_tail_trans with x0; eauto with coqlib.
+Qed.
+
+Lemma is_tail_code_tail:
+ forall c1 c2, is_tail c1 c2 -> exists ofs, code_tail ofs c2 c1.
+Proof.
+ induction 1; eauto.
+ destruct IHis_tail; eauto.
+Qed.
+
+Section RETADDR_EXISTS.
+
+Hypothesis transf_function_inv:
+ forall f tf, transf_function f = OK tf ->
+ exists tc ep, transl_blocks f (Machblock.fn_code f) ep = OK tc /\ is_tail tc (fn_blocks tf).
+
+Hypothesis transf_function_len:
+ forall f tf, transf_function f = OK tf -> size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned.
+
+
+Lemma return_address_exists:
+ forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
+ exists ra, return_address_offset f c ra.
+Proof.
+ intros. destruct (transf_function f) as [tf|] eqn:TF.
+ + exploit transf_function_inv; eauto. intros (tc1 & ep1 & TR1 & TL1).
+ exploit transl_blocks_tail; eauto. intros (tc2 & ep2 & TR2 & TL2).
+ monadInv TR2.
+ assert (TL3: is_tail x0 (fn_blocks tf)).
+ { apply is_tail_trans with tc1; auto.
+ apply is_tail_trans with (x++x0); auto. eapply is_tail_app.
+ }
+ exploit is_tail_code_tail. eexact TL3. intros [ofs CT].
+ exists (Ptrofs.repr ofs). red; intros.
+ rewrite Ptrofs.unsigned_repr. congruence.
+ exploit code_tail_bounds; eauto.
+ intros; apply transf_function_len in TF. lia.
+ + exists Ptrofs.zero; red; intros. congruence.
+Qed.
+
+End RETADDR_EXISTS.
+
+(** [transl_code_at_pc pc fb f c ep tf tc] holds if the code pointer [pc] points
+ within the Asmblock code generated by translating Machblock function [f],
+ and [tc] is the tail of the generated code at the position corresponding
+ to the code pointer [pc]. *)
+
+Inductive transl_code_at_pc (ge: MB.genv):
+ val -> block -> MB.function -> MB.code -> bool -> AB.function -> AB.bblocks -> Prop :=
+ transl_code_at_pc_intro:
+ forall b ofs f c ep tf tc,
+ Genv.find_funct_ptr ge b = Some(Internal f) ->
+ transf_function f = Errors.OK tf ->
+ transl_blocks f c ep = OK tc ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc ->
+ transl_code_at_pc ge (Vptr b ofs) b f c ep tf tc.
+
+Remark code_tail_no_bigger:
+ forall pos c1 c2, code_tail pos c1 c2 -> (length c2 <= length c1)%nat.
+Proof.
+ induction 1; simpl; lia.
+Qed.
+
+Remark code_tail_unique:
+ forall fn c pos pos',
+ code_tail pos fn c -> code_tail pos' fn c -> pos = pos'.
+Proof.
+ induction fn; intros until pos'; intros ITA CT; inv ITA; inv CT; auto.
+ generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia.
+ generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; lia.
+ f_equal. eauto.
+Qed.
+
+Lemma return_address_offset_correct:
+ forall ge b ofs fb f c tf tc ofs',
+ transl_code_at_pc ge (Vptr b ofs) fb f c false tf tc ->
+ return_address_offset f c ofs' ->
+ ofs' = ofs.
+Proof.
+ intros. inv H. red in H0.
+ exploit code_tail_unique. eexact H12. eapply H0; eauto. intro.
+ rewrite <- (Ptrofs.repr_unsigned ofs).
+ rewrite <- (Ptrofs.repr_unsigned ofs').
+ congruence.
+Qed.
+
+Section STRAIGHTLINE.
+
+Variable ge: genv.
+Variable lk: aarch64_linker.
+Variable fn: function.
+
+(** Straight-line code is composed of processor instructions that execute
+ in sequence (no branches, no function calls and returns).
+ The following inductive predicate relates the machine states
+ before and after executing a straight-line sequence of instructions.
+ Instructions are taken from the first list instead of being fetched
+ from memory. *)
+
+Inductive exec_straight: list basic -> regset -> mem ->
+ list basic -> regset -> mem -> Prop :=
+ | exec_straight_one:
+ forall i1 c rs1 m1 rs2 m2,
+ exec_basic lk ge i1 rs1 m1 = Next rs2 m2 ->
+ exec_straight (i1 :: c) rs1 m1 c rs2 m2
+ | exec_straight_step:
+ forall i c rs1 m1 rs2 m2 c' rs3 m3,
+ exec_basic lk ge i rs1 m1 = Next rs2 m2 ->
+ exec_straight c rs2 m2 c' rs3 m3 ->
+ exec_straight (i :: c) rs1 m1 c' rs3 m3.
+
+Lemma exec_straight_trans:
+ forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3,
+ exec_straight c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight c2 rs2 m2 c3 rs3 m3 ->
+ exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+ induction 1; intros.
+ apply exec_straight_step with rs2 m2; auto.
+ apply exec_straight_step with rs2 m2; auto.
+Qed.
+
+Lemma exec_straight_two:
+ forall i1 i2 c rs1 m1 rs2 m2 rs3 m3,
+ exec_basic lk ge i1 rs1 m1 = Next rs2 m2 ->
+ exec_basic lk ge i2 rs2 m2 = Next rs3 m3 ->
+ exec_straight (i1 :: i2 :: c) rs1 m1 c rs3 m3.
+Proof.
+ intros. apply exec_straight_step with rs2 m2; auto.
+ apply exec_straight_one; auto.
+Qed.
+
+Lemma exec_straight_three:
+ forall i1 i2 i3 c rs1 m1 rs2 m2 rs3 m3 rs4 m4,
+ exec_basic lk ge i1 rs1 m1 = Next rs2 m2 ->
+ exec_basic lk ge i2 rs2 m2 = Next rs3 m3 ->
+ exec_basic lk ge i3 rs3 m3 = Next rs4 m4 ->
+ exec_straight (i1 :: i2 :: i3 :: c) rs1 m1 c rs4 m4.
+Proof.
+ intros. apply exec_straight_step with rs2 m2; auto.
+ eapply exec_straight_two; eauto.
+Qed.
+
+Inductive exec_straight_opt: list basic -> regset -> mem -> list basic -> regset -> mem -> Prop :=
+ | exec_straight_opt_refl: forall c rs m,
+ exec_straight_opt c rs m c rs m
+ | exec_straight_opt_intro: forall c1 rs1 m1 c2 rs2 m2,
+ exec_straight c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight_opt c1 rs1 m1 c2 rs2 m2.
+
+Remark exec_straight_opt_right:
+ forall c3 rs3 m3 c1 rs1 m1 c2 rs2 m2,
+ exec_straight_opt c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight c2 rs2 m2 c3 rs3 m3 ->
+ exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+ destruct 1; intros. auto. eapply exec_straight_trans; eauto.
+Qed.
+
+Lemma exec_straight_opt_step:
+ forall i c rs1 m1 rs2 m2 c' rs3 m3,
+ exec_basic lk ge i rs1 m1 = Next rs2 m2 ->
+ exec_straight_opt c rs2 m2 c' rs3 m3 ->
+ exec_straight (i :: c) rs1 m1 c' rs3 m3.
+Proof.
+ intros. inv H0.
+ - apply exec_straight_one; auto.
+ - eapply exec_straight_step; eauto.
+Qed.
+
+Lemma exec_straight_opt_step_opt:
+ forall i c rs1 m1 rs2 m2 c' rs3 m3,
+ exec_basic lk ge i rs1 m1 = Next rs2 m2 ->
+ exec_straight_opt c rs2 m2 c' rs3 m3 ->
+ exec_straight_opt (i :: c) rs1 m1 c' rs3 m3.
+Proof.
+ intros. apply exec_straight_opt_intro. eapply exec_straight_opt_step; eauto.
+Qed.
+
+(** Like exec_straight predicate, but on blocks *)
+
+Inductive exec_straight_blocks: bblocks -> regset -> mem ->
+ bblocks -> regset -> mem -> Prop :=
+ | exec_straight_blocks_one:
+ forall b1 c rs1 m1 rs2 m2,
+ exec_bblock lk ge fn b1 rs1 m1 E0 rs2 m2 ->
+ rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b1)) ->
+ exec_straight_blocks (b1 :: c) rs1 m1 c rs2 m2
+ | exec_straight_blocks_step:
+ forall b c rs1 m1 rs2 m2 c' rs3 m3,
+ exec_bblock lk ge fn b rs1 m1 E0 rs2 m2 ->
+ rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b)) ->
+ exec_straight_blocks c rs2 m2 c' rs3 m3 ->
+ exec_straight_blocks (b :: c) rs1 m1 c' rs3 m3.
+
+Lemma exec_straight_blocks_trans:
+ forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3,
+ exec_straight_blocks c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight_blocks c2 rs2 m2 c3 rs3 m3 ->
+ exec_straight_blocks c1 rs1 m1 c3 rs3 m3.
+Proof.
+ induction 1; intros.
+ eapply exec_straight_blocks_step; eauto.
+ eapply exec_straight_blocks_step; eauto.
+Qed.
+
+(** Linking exec_straight with exec_straight_blocks *)
+
+Lemma exec_straight_pc:
+ forall c c' rs1 m1 rs2 m2,
+ exec_straight c rs1 m1 c' rs2 m2 ->
+ rs2 PC = rs1 PC.
+Proof.
+ induction c; intros; try (inv H; fail).
+ inv H.
+ - eapply exec_basic_instr_pc; eauto.
+ - rewrite (IHc c' rs3 m3 rs2 m2); auto.
+ erewrite exec_basic_instr_pc; eauto.
+Qed.
+
+Lemma exec_body_pc:
+ forall ge l rs1 m1 rs2 m2,
+ exec_body lk ge l rs1 m1 = Next rs2 m2 ->
+ rs2 PC = rs1 PC.
+Proof.
+ induction l.
+ - intros. inv H. auto.
+ - intros until m2. intro EXEB.
+ inv EXEB. destruct (exec_basic _ _ _ _ _) eqn:EBI; try discriminate.
+ eapply IHl in H0. rewrite H0.
+ destruct s.
+ erewrite exec_basic_instr_pc; eauto.
+Qed.
+
+(** The following lemmas show that straight-line executions
+ (predicate [exec_straight_blocks]) correspond to correct Asm executions. *)
+
+Lemma exec_straight_steps_1:
+ forall c rs m c' rs' m',
+ exec_straight_blocks c rs m c' rs' m' ->
+ size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned ->
+ forall b ofs,
+ rs#PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal fn) ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c ->
+ plus (step lk) ge (State rs m) E0 (State rs' m').
+Proof.
+ induction 1; intros.
+ apply plus_one.
+ econstructor; eauto.
+ eapply find_bblock_tail. eauto.
+ eapply plus_left'.
+ econstructor; eauto.
+ eapply find_bblock_tail. eauto.
+ apply IHexec_straight_blocks with b0 (Ptrofs.add ofs (Ptrofs.repr (size b))).
+ auto. rewrite H0. rewrite H3. reflexivity.
+ auto.
+ apply code_tail_next_int; auto.
+ traceEq.
+Qed.
+
+Lemma exec_straight_steps_2:
+ forall c rs m c' rs' m',
+ exec_straight_blocks c rs m c' rs' m' ->
+ size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned ->
+ forall b ofs,
+ rs#PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal fn) ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c ->
+ exists ofs',
+ rs'#PC = Vptr b ofs'
+ /\ code_tail (Ptrofs.unsigned ofs') (fn_blocks fn) c'.
+Proof.
+ induction 1; intros.
+ exists (Ptrofs.add ofs (Ptrofs.repr (size b1))). split.
+ rewrite H0. rewrite H2. auto.
+ apply code_tail_next_int; auto.
+ apply IHexec_straight_blocks with (Ptrofs.add ofs (Ptrofs.repr (size b))).
+ auto. rewrite H0. rewrite H3. reflexivity. auto.
+ apply code_tail_next_int; auto.
+Qed.
+
+End STRAIGHTLINE.
+
+(** * Properties of the Machblock call stack *)
+
+Section MATCH_STACK.
+
+Variable ge: MB.genv.
+
+Inductive match_stack: list MB.stackframe -> Prop :=
+ | match_stack_nil:
+ match_stack nil
+ | match_stack_cons: forall fb sp ra c s f tf tc,
+ Genv.find_funct_ptr ge fb = Some (Internal f) ->
+ transl_code_at_pc ge ra fb f c false tf tc ->
+ sp <> Vundef ->
+ match_stack s ->
+ match_stack (Stackframe fb sp ra c :: s).
+
+Lemma parent_sp_def: forall s, match_stack s -> parent_sp s <> Vundef.
+Proof.
+ induction 1; simpl.
+ unfold Vnullptr; destruct Archi.ptr64; congruence.
+ auto.
+Qed.
+
+Lemma parent_ra_def: forall s, match_stack s -> parent_ra s <> Vundef.
+Proof.
+ induction 1; simpl.
+ unfold Vnullptr; destruct Archi.ptr64; congruence.
+ inv H0. congruence.
+Qed.
+
+Lemma lessdef_parent_sp:
+ forall s v,
+ match_stack s -> Val.lessdef (parent_sp s) v -> v = parent_sp s.
+Proof.
+ intros. inv H0. auto. exploit parent_sp_def; eauto. tauto.
+Qed.
+
+Lemma lessdef_parent_ra:
+ forall s v,
+ match_stack s -> Val.lessdef (parent_ra s) v -> v = parent_ra s.
+Proof.
+ intros. inv H0. auto. exploit parent_ra_def; eauto. tauto.
+Qed.
+
+End MATCH_STACK.
diff --git a/aarch64/Asmblockgenproof1.v b/aarch64/Asmblockgenproof1.v
new file mode 100644
index 00000000..61d77881
--- /dev/null
+++ b/aarch64/Asmblockgenproof1.v
@@ -0,0 +1,1955 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * Proof of correctness for individual instructions *)
+
+Require Import Coqlib Errors Maps Zbits.
+Require Import AST Integers Floats Values Memory Globalenvs Linking.
+Require Import Op Locations Machblock Conventions Lia.
+Require Import Asmblock Asmblockgen Asmblockgenproof0 Asmblockprops.
+
+Module MB := Machblock.
+Module AB := Asmblock.
+
+Section CONSTRUCTORS.
+
+Variable lk: aarch64_linker.
+Variable ge: genv.
+Variable fn: function.
+
+Hypothesis symbol_high_low: forall (id: ident) (ofs: ptrofs),
+ Val.addl (symbol_high lk id ofs) (symbol_low lk id ofs) = Genv.symbol_address ge id ofs.
+
+Ltac Simplif :=
+ ((rewrite Pregmap.gss)
+ || (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+Ltac ArgsInv :=
+ repeat (match goal with
+ | [ H: Error _ = OK _ |- _ ] => discriminate
+ | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
+ | [ H: bind _ _ = OK _ |- _ ] => monadInv H
+ | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+ | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+ end);
+ subst;
+ repeat (match goal with
+ | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *
+ | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *
+ end).
+
+Ltac SimplEval H :=
+ match type of H with
+ | Some _ = None _ => discriminate
+ | Some _ = Some _ => inversion H; subst
+ | ?a = Some ?b => let A := fresh in assert (A: Val.maketotal a = b) by (rewrite H; reflexivity)
+end.
+
+Ltac TranslOpSimpl :=
+ econstructor; split;
+ [ apply exec_straight_one; reflexivity
+ | split; [ apply Val.lessdef_same; simpl; Simpl; fail | intros; simpl; Simpl; fail ] ].
+
+Ltac TranslOpSimplN :=
+ econstructor; split;
+ try apply exec_straight_one; try reflexivity; try split; try apply Val.lessdef_same;
+ Simpl; simpl; try destruct negb; Simpl; try intros; Simpl; simpl; try destruct negb; Simpl.
+
+Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC.
+Proof.
+ destruct r; simpl; try discriminate.
+Qed.
+Hint Resolve preg_of_iregsp_not_PC: asmgen.
+
+Lemma preg_of_not_X16: forall r, preg_of r <> X16.
+Proof.
+ destruct r; simpl; try discriminate.
+Qed.
+
+Lemma preg_of_not_X30: forall r, preg_of r <> X30.
+Proof.
+ destruct r; simpl; try discriminate.
+Qed.
+
+Lemma ireg_of_not_X16: forall r x, ireg_of r = OK x -> x <> X16.
+Proof.
+ unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
+ red; intros; subst x. elim (preg_of_not_X16 r); auto.
+ destruct d. destruct i. inv H1; auto.
+ all: discriminate.
+Qed.
+
+Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16.
+Proof.
+ intros. apply ireg_of_not_X16 in H. congruence.
+Qed.
+
+Lemma ireg_of_not_X16'': forall r x, ireg_of r = OK x -> DR (IR x) <> DR (IR X16).
+Proof.
+ intros. apply ireg_of_not_X16 in H. congruence.
+Qed.
+
+Lemma ireg_of_not_X30: forall r x, ireg_of r = OK x -> x <> X30.
+Proof.
+ unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
+ red; intros; subst x. elim (preg_of_not_X30 r); auto.
+ destruct d. destruct i. inv H1; auto.
+ all: discriminate.
+Qed.
+
+Lemma ireg_of_not_X30': forall r x, ireg_of r = OK x -> IR x <> IR X30.
+Proof.
+ intros. apply ireg_of_not_X30 in H. congruence.
+Qed.
+
+Lemma ireg_of_not_X30'': forall r x, ireg_of r = OK x -> DR (IR x) <> DR (IR X30).
+Proof.
+ intros. apply ireg_of_not_X30 in H. congruence.
+Qed.
+
+Hint Resolve preg_of_not_X16 ireg_of_not_X16 ireg_of_not_X16' ireg_of_not_X16'': asmgen.
+Hint Resolve preg_of_not_X30 ireg_of_not_X30 ireg_of_not_X30' ireg_of_not_X30'': asmgen.
+
+Inductive wf_decomposition: list (Z * Z) -> Prop :=
+ | wf_decomp_nil:
+ wf_decomposition nil
+ | wf_decomp_cons: forall m n p l,
+ n = Zzero_ext 16 m -> 0 <= p -> wf_decomposition l ->
+ wf_decomposition ((n, p) :: l).
+
+Lemma decompose_int_wf:
+ forall N n p, 0 <= p -> wf_decomposition (decompose_int N n p).
+Proof.
+Local Opaque Zzero_ext.
+ induction N as [ | N]; simpl; intros.
+ - constructor.
+ - set (frag := Zzero_ext 16 (Z.shiftr n p)) in *. destruct (Z.eqb frag 0).
+ + apply IHN. lia.
+ + econstructor. reflexivity. lia. apply IHN; lia.
+Qed.
+
+Fixpoint recompose_int (accu: Z) (l: list (Z * Z)) : Z :=
+ match l with
+ | nil => accu
+ | (n, p) :: l => recompose_int (Zinsert accu n p 16) l
+ end.
+
+Lemma decompose_int_correct:
+ forall N n p accu,
+ 0 <= p ->
+ (forall i, p <= i -> Z.testbit accu i = false) ->
+ (forall i, 0 <= i < p + Z.of_nat N * 16 ->
+ Z.testbit (recompose_int accu (decompose_int N n p)) i =
+ if zlt i p then Z.testbit accu i else Z.testbit n i).
+Proof.
+ induction N as [ | N]; intros until accu; intros PPOS ABOVE i RANGE.
+ - simpl. rewrite zlt_true; auto. lia.
+ - rewrite inj_S in RANGE. simpl.
+ set (frag := Zzero_ext 16 (Z.shiftr n p)).
+ assert (FRAG: forall i, p <= i < p + 16 -> Z.testbit n i = Z.testbit frag (i - p)).
+ { unfold frag; intros. rewrite Zzero_ext_spec by lia. rewrite zlt_true by lia.
+ rewrite Z.shiftr_spec by lia. f_equal; lia. }
+ destruct (Z.eqb_spec frag 0).
+ + rewrite IHN.
+ * destruct (zlt i p). rewrite zlt_true by lia. auto.
+ destruct (zlt i (p + 16)); auto.
+ rewrite ABOVE by lia. rewrite FRAG by lia. rewrite e, Z.testbit_0_l. auto.
+ * lia.
+ * intros; apply ABOVE; lia.
+ * lia.
+ + simpl. rewrite IHN.
+ * destruct (zlt i (p + 16)).
+ ** rewrite Zinsert_spec by lia. unfold proj_sumbool.
+ rewrite zlt_true by lia.
+ destruct (zlt i p).
+ rewrite zle_false by lia. auto.
+ rewrite zle_true by lia. simpl. symmetry; apply FRAG; lia.
+ ** rewrite Z.ldiff_spec, Z.shiftl_spec by lia.
+ change 65535 with (two_p 16 - 1). rewrite Ztestbit_two_p_m1 by lia.
+ rewrite zlt_false by lia. rewrite zlt_false by lia. apply andb_true_r.
+ * lia.
+ * intros. rewrite Zinsert_spec by lia. unfold proj_sumbool.
+ rewrite zle_true by lia. rewrite zlt_false by lia. simpl.
+ apply ABOVE. lia.
+ * lia.
+Qed.
+
+Corollary decompose_int_eqmod: forall N n,
+ eqmod (two_power_nat (N * 16)%nat) (recompose_int 0 (decompose_int N n 0)) n.
+Proof.
+ intros; apply eqmod_same_bits; intros.
+ rewrite decompose_int_correct. apply zlt_false; lia.
+ lia. intros; apply Z.testbit_0_l. lia.
+Qed.
+
+Corollary decompose_notint_eqmod: forall N n,
+ eqmod (two_power_nat (N * 16)%nat)
+ (Z.lnot (recompose_int 0 (decompose_int N (Z.lnot n) 0))) n.
+Proof.
+ intros; apply eqmod_same_bits; intros.
+ rewrite Z.lnot_spec, decompose_int_correct.
+ rewrite zlt_false by lia. rewrite Z.lnot_spec by lia. apply negb_involutive.
+ lia. intros; apply Z.testbit_0_l. lia. lia.
+Qed.
+
+Lemma negate_decomposition_wf:
+ forall l, wf_decomposition l -> wf_decomposition (negate_decomposition l).
+Proof.
+ induction 1; simpl; econstructor; auto.
+ instantiate (1 := (Z.lnot m)).
+ apply equal_same_bits; intros.
+ rewrite H. change 65535 with (two_p 16 - 1).
+ rewrite Z.lxor_spec, !Zzero_ext_spec, Z.lnot_spec, Ztestbit_two_p_m1 by lia.
+ destruct (zlt i 16).
+ apply xorb_true_r.
+ auto.
+Qed.
+
+Lemma Zinsert_eqmod:
+ forall n x1 x2 y p l, 0 <= p -> 0 <= l ->
+ eqmod (two_power_nat n) x1 x2 ->
+ eqmod (two_power_nat n) (Zinsert x1 y p l) (Zinsert x2 y p l).
+Proof.
+ intros. apply eqmod_same_bits; intros. rewrite ! Zinsert_spec by lia.
+ destruct (zle p i && zlt i (p + l)); auto.
+ apply same_bits_eqmod with n; auto.
+Qed.
+
+Lemma Zinsert_0_l:
+ forall y p l,
+ 0 <= p -> 0 <= l ->
+ Z.shiftl (Zzero_ext l y) p = Zinsert 0 (Zzero_ext l y) p l.
+Proof.
+ intros. apply equal_same_bits; intros.
+ rewrite Zinsert_spec by lia. unfold proj_sumbool.
+ destruct (zlt i p); [rewrite zle_false by lia|rewrite zle_true by lia]; simpl.
+ - rewrite Z.testbit_0_l, Z.shiftl_spec_low by auto. auto.
+ - rewrite Z.shiftl_spec by lia.
+ destruct (zlt i (p + l)); auto.
+ rewrite Zzero_ext_spec, zlt_false, Z.testbit_0_l by lia. auto.
+Qed.
+
+Lemma recompose_int_negated:
+ forall l, wf_decomposition l ->
+ forall accu, recompose_int (Z.lnot accu) (negate_decomposition l) = Z.lnot (recompose_int accu l).
+Proof.
+ induction 1; intros accu; simpl.
+ - auto.
+ - rewrite <- IHwf_decomposition. f_equal. apply equal_same_bits; intros.
+ rewrite Z.lnot_spec, ! Zinsert_spec, Z.lxor_spec, Z.lnot_spec by lia.
+ unfold proj_sumbool.
+ destruct (zle p i); simpl; auto.
+ destruct (zlt i (p + 16)); simpl; auto.
+ change 65535 with (two_p 16 - 1).
+ rewrite Ztestbit_two_p_m1 by lia. rewrite zlt_true by lia.
+ apply xorb_true_r.
+Qed.
+
+Lemma exec_loadimm_k_w:
+ forall (rd: ireg) k m l,
+ wf_decomposition l ->
+ forall (rs: regset) accu,
+ rs#rd = Vint (Int.repr accu) ->
+ exists rs',
+ exec_straight_opt ge lk (loadimm_k W rd l k) rs m k rs' m
+ /\ rs'#rd = Vint (Int.repr (recompose_int accu l))
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ induction 1; intros rs accu ACCU; simpl.
+ - exists rs; split. apply exec_straight_opt_refl. auto.
+ - destruct (IHwf_decomposition
+ ((rs#rd <- (insert_in_int rs#rd n p 16)))
+ (Zinsert accu n p 16))
+ as (rs' & P & Q & R).
+ Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr.
+ apply Zinsert_eqmod. auto. lia. apply Int.eqm_sym; apply Int.eqm_unsigned_repr.
+ exists rs'; split.
+ eapply exec_straight_opt_step_opt. simpl. eauto. auto.
+ split. exact Q. intros; Simpl. rewrite R by auto. Simpl.
+Qed.
+
+Lemma exec_loadimm_z_w:
+ forall rd l k rs m,
+ wf_decomposition l ->
+ exists rs',
+ exec_straight ge lk (loadimm_z W rd l k) rs m k rs' m
+ /\ rs'#rd = Vint (Int.repr (recompose_int 0 l))
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold loadimm_z; destruct 1.
+ - econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl.
+ intros; Simpl.
+ - set (accu0 := Zinsert 0 n p 16).
+ set (rs1 := rs#rd <- (Vint (Int.repr accu0))).
+ destruct (exec_loadimm_k_w rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto.
+ unfold rs1; Simpl.
+ exists rs2; split.
+ eapply exec_straight_opt_step; eauto.
+ simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia.
+ split. exact Q.
+ intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm_n_w:
+ forall rd l k rs m,
+ wf_decomposition l ->
+ exists rs',
+ exec_straight ge lk (loadimm_n W rd l k) rs m k rs' m
+ /\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l)))
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold loadimm_n; destruct 1.
+ - econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl.
+ intros; Simpl.
+ - set (accu0 := Z.lnot (Zinsert 0 n p 16)).
+ set (rs1 := rs#rd <- (Vint (Int.repr accu0))).
+ destruct (exec_loadimm_k_w rd k m (negate_decomposition l)
+ (negate_decomposition_wf l H1)
+ rs1 accu0) as (rs2 & P & Q & R).
+ unfold rs1; Simpl.
+ exists rs2; split.
+ eapply exec_straight_opt_step; eauto.
+ simpl. unfold rs1. do 5 f_equal.
+ unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia.
+ split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
+ intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm32:
+ forall rd n k rs m,
+ exists rs',
+ exec_straight ge lk (loadimm32 rd n k) rs m k rs' m
+ /\ rs'#rd = Vint n
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold loadimm32, loadimm; intros.
+ destruct (is_logical_imm32 n).
+ - econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl. rewrite Int.repr_unsigned, Int.or_zero_l; auto.
+ intros; Simpl.
+ - set (dz := decompose_int 2%nat (Int.unsigned n) 0).
+ set (dn := decompose_int 2%nat (Z.lnot (Int.unsigned n)) 0).
+ assert (A: Int.repr (recompose_int 0 dz) = n).
+ { transitivity (Int.repr (Int.unsigned n)).
+ apply Int.eqm_samerepr. apply decompose_int_eqmod.
+ apply Int.repr_unsigned. }
+ assert (B: Int.repr (Z.lnot (recompose_int 0 dn)) = n).
+ { transitivity (Int.repr (Int.unsigned n)).
+ apply Int.eqm_samerepr. apply decompose_notint_eqmod.
+ apply Int.repr_unsigned. }
+ destruct Nat.leb.
+ + rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; lia.
+ + rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; lia.
+Qed.
+
+Lemma exec_loadimm_k_x:
+ forall (rd: ireg) k m l,
+ wf_decomposition l ->
+ forall (rs: regset) accu,
+ rs#rd = Vlong (Int64.repr accu) ->
+ exists rs',
+ exec_straight_opt ge lk (loadimm_k X rd l k) rs m k rs' m
+ /\ rs'#rd = Vlong (Int64.repr (recompose_int accu l))
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ induction 1; intros rs accu ACCU; simpl.
+ - exists rs; split. apply exec_straight_opt_refl. auto.
+ - destruct (IHwf_decomposition
+ (rs#rd <- (insert_in_long rs#rd n p 16))
+ (Zinsert accu n p 16))
+ as (rs' & P & Q & R).
+ Simpl. rewrite ACCU. simpl. f_equal. apply Int64.eqm_samerepr.
+ apply Zinsert_eqmod. auto. lia. apply Int64.eqm_sym; apply Int64.eqm_unsigned_repr.
+ exists rs'; split.
+ eapply exec_straight_opt_step_opt. simpl; eauto. auto.
+ split. exact Q. intros; Simpl. rewrite R by auto. Simpl.
+Qed.
+
+Lemma exec_loadimm_z_x:
+ forall rd l k rs m,
+ wf_decomposition l ->
+ exists rs',
+ exec_straight ge lk (loadimm_z X rd l k) rs m k rs' m
+ /\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l))
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold loadimm_z; destruct 1.
+ - econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl.
+ intros; Simpl.
+ - set (accu0 := Zinsert 0 n p 16).
+ set (rs1 := rs#rd <- (Vlong (Int64.repr accu0))).
+ destruct (exec_loadimm_k_x rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto.
+ unfold rs1; Simpl.
+ exists rs2; split.
+ eapply exec_straight_opt_step; eauto.
+ simpl. unfold rs1. do 5 f_equal. unfold accu0. rewrite H. apply Zinsert_0_l; lia.
+ split. exact Q.
+ intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm_n_x:
+ forall rd l k rs m,
+ wf_decomposition l ->
+ exists rs',
+ exec_straight ge lk (loadimm_n X rd l k) rs m k rs' m
+ /\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l)))
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold loadimm_n; destruct 1.
+ - econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl.
+ intros; Simpl.
+ - set (accu0 := Z.lnot (Zinsert 0 n p 16)).
+ set (rs1 := rs#rd <- (Vlong (Int64.repr accu0))).
+ destruct (exec_loadimm_k_x rd k m (negate_decomposition l)
+ (negate_decomposition_wf l H1)
+ rs1 accu0) as (rs2 & P & Q & R).
+ unfold rs1; Simpl.
+ exists rs2; split.
+ eapply exec_straight_opt_step; eauto.
+ simpl. unfold rs1. do 5 f_equal.
+ unfold accu0. f_equal. rewrite H. apply Zinsert_0_l; lia.
+ split. unfold accu0 in Q; rewrite recompose_int_negated in Q by auto. exact Q.
+ intros. rewrite R by auto. unfold rs1; Simpl.
+Qed.
+
+Lemma exec_loadimm64:
+ forall rd n k rs m,
+ exists rs',
+ exec_straight ge lk (loadimm64 rd n k) rs m k rs' m
+ /\ rs'#rd = Vlong n
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold loadimm64, loadimm; intros.
+ destruct (is_logical_imm64 n).
+ - econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl. rewrite Int64.repr_unsigned, Int64.or_zero_l; auto.
+ intros; Simpl.
+ - set (dz := decompose_int 4%nat (Int64.unsigned n) 0).
+ set (dn := decompose_int 4%nat (Z.lnot (Int64.unsigned n)) 0).
+ assert (A: Int64.repr (recompose_int 0 dz) = n).
+ { transitivity (Int64.repr (Int64.unsigned n)).
+ apply Int64.eqm_samerepr. apply decompose_int_eqmod.
+ apply Int64.repr_unsigned. }
+ assert (B: Int64.repr (Z.lnot (recompose_int 0 dn)) = n).
+ { transitivity (Int64.repr (Int64.unsigned n)).
+ apply Int64.eqm_samerepr. apply decompose_notint_eqmod.
+ apply Int64.repr_unsigned. }
+ destruct Nat.leb.
+ + rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; lia.
+ + rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; lia.
+Qed.
+
+(** Add immediate *)
+
+Lemma exec_addimm_aux_32:
+ forall (insn: Z -> arith_pp) (sem: val -> val -> val),
+ (forall rd r1 n rs m,
+ exec_basic lk ge (PArith (PArithPP (insn n) rd r1)) rs m =
+ Next (rs#rd <- (sem rs#r1 (Vint (Int.repr n)))) m) ->
+ (forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) ->
+ forall rd r1 n k rs m,
+ exists rs',
+ exec_straight ge lk (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m
+ /\ rs'#rd = sem rs#r1 (Vint n)
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros insn sem SEM ASSOC; intros. unfold addimm_aux.
+ set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo).
+ assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; lia).
+ rewrite <- (Int.repr_unsigned n).
+ destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
+ - econstructor; split. apply exec_straight_one. apply SEM. Simpl.
+ split. Simpl. do 3 f_equal; lia.
+ intros; Simpl.
+ - econstructor; split. apply exec_straight_one. apply SEM. Simpl.
+ split. Simpl. do 3 f_equal; lia.
+ intros; Simpl.
+ - econstructor; split. eapply exec_straight_two.
+ apply SEM. apply SEM. simpl. Simpl.
+ split. Simpl. simpl. rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr.
+ rewrite E. auto with ints.
+ intros; Simpl.
+Qed.
+
+Lemma exec_addimm32:
+ forall (rd r1: ireg) n k rs m,
+ r1 <> X16 ->
+ exists rs',
+ exec_straight ge lk (addimm32 rd r1 n k) rs m k rs' m
+ /\ rs'#rd = Val.add rs#r1 (Vint n)
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros. unfold addimm32. set (nn := Int.neg n).
+ destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))].
+ - apply exec_addimm_aux_32 with (sem := Val.add). auto. intros; apply Val.add_assoc.
+ - rewrite <- Val.sub_opp_add.
+ apply exec_addimm_aux_32 with (sem := Val.sub). auto.
+ intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto.
+ - destruct (Int.lt n Int.zero).
+ + rewrite <- Val.sub_opp_add; fold nn.
+ edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
+ split. Simpl. rewrite B, C; eauto with asmgen.
+ intros; Simpl.
+ + edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
+ split. Simpl. rewrite B, C; eauto with asmgen.
+ intros; Simpl.
+Qed.
+
+Lemma exec_addimm_aux_64:
+ forall (insn: Z -> arith_pp) (sem: val -> val -> val),
+ (forall rd r1 n rs m,
+ exec_basic lk ge (PArith (PArithPP (insn n) rd r1)) rs m =
+ Next (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n)))) m) ->
+ (forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) ->
+ forall rd r1 n k rs m,
+ exists rs',
+ exec_straight ge lk (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m
+ /\ rs'#rd = sem rs#r1 (Vlong n)
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros insn sem SEM ASSOC; intros. unfold addimm_aux.
+ set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo).
+ assert (E: Int64.unsigned n = nhi + nlo) by (unfold nhi; lia).
+ rewrite <- (Int64.repr_unsigned n).
+ destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
+ - econstructor; split. apply exec_straight_one. apply SEM. Simpl.
+ split. Simpl. do 3 f_equal; lia.
+ intros; Simpl.
+ - econstructor; split. apply exec_straight_one. apply SEM. Simpl.
+ split. Simpl. do 3 f_equal; lia.
+ intros; Simpl.
+ - econstructor; split. eapply exec_straight_two.
+ apply SEM. apply SEM. Simpl. Simpl.
+ split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr.
+ rewrite E. auto with ints.
+ intros; Simpl.
+Qed.
+
+Lemma exec_addimm64:
+ forall (rd r1: iregsp) n k rs m,
+ r1 <> X16 ->
+ exists rs',
+ exec_straight ge lk (addimm64 rd r1 n k) rs m k rs' m
+ /\ rs'#rd = Val.addl rs#r1 (Vlong n)
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros.
+ unfold addimm64. set (nn := Int64.neg n).
+ destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))].
+ - apply exec_addimm_aux_64 with (sem := Val.addl). auto. intros; apply Val.addl_assoc.
+ - rewrite <- Val.subl_opp_addl.
+ apply exec_addimm_aux_64 with (sem := Val.subl). auto.
+ intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto.
+ - destruct (Int64.lt n Int64.zero).
+ + rewrite <- Val.subl_opp_addl; fold nn.
+ edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl.
+ split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
+ intros; Simpl.
+ + edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl.
+ split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
+ intros; Simpl.
+Qed.
+
+(** Logical immediate *)
+
+Lemma exec_logicalimm32:
+ forall (insn1: Z -> arith_rr0)
+ (insn2: shift_op -> arith_rr0r)
+ (sem: val -> val -> val),
+ (forall rd r1 n rs m,
+ exec_basic lk ge (PArith (PArithRR0 (insn1 n) rd r1)) rs m =
+ Next (rs#rd <- (sem rs##r1 (Vint (Int.repr n)))) m) ->
+ (forall rd r1 r2 s rs m,
+ exec_basic lk ge (PArith (PArithRR0R (insn2 s) rd r1 r2)) rs m =
+ Next (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s))) m) ->
+ forall rd r1 n k rs m,
+ r1 <> X16 ->
+ exists rs',
+ exec_straight ge lk (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m
+ /\ rs'#rd = sem rs#r1 (Vint n)
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32.
+ destruct (is_logical_imm32 n).
+ - econstructor; split.
+ apply exec_straight_one. apply SEM1.
+ split. Simpl. rewrite Int.repr_unsigned; auto. intros; Simpl.
+ - edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A.
+ apply exec_straight_one. apply SEM2.
+ split. Simpl. f_equal; auto. apply C; auto with asmgen.
+ intros; Simpl.
+Qed.
+
+Lemma exec_logicalimm64:
+ forall (insn1: Z -> arith_rr0)
+ (insn2: shift_op -> arith_rr0r)
+ (sem: val -> val -> val),
+ (forall rd r1 n rs m,
+ exec_basic lk ge (PArith (PArithRR0 (insn1 n) rd r1)) rs m =
+ Next (rs#rd <- (sem rs###r1 (Vlong (Int64.repr n)))) m) ->
+ (forall rd r1 r2 s rs m,
+ exec_basic lk ge (PArith (PArithRR0R (insn2 s) rd r1 r2)) rs m =
+ Next (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s))) m) ->
+ forall rd r1 n k rs m,
+ r1 <> X16 ->
+ exists rs',
+ exec_straight ge lk (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m
+ /\ rs'#rd = sem rs#r1 (Vlong n)
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64.
+ destruct (is_logical_imm64 n).
+ - econstructor; split.
+ apply exec_straight_one. apply SEM1.
+ split. Simpl. rewrite Int64.repr_unsigned. auto. intros; Simpl.
+ - edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A.
+ apply exec_straight_one. apply SEM2.
+ split. Simpl. f_equal; auto. apply C; auto with asmgen.
+ intros; Simpl.
+Qed.
+
+(** Load address of symbol *)
+
+Lemma exec_loadsymbol: forall rd s ofs k rs m,
+ rd <> X16 \/ Archi.pic_code tt = false ->
+ exists rs',
+ exec_straight ge lk (loadsymbol rd s ofs k) rs m k rs' m
+ /\ rs'#rd = Genv.symbol_address ge s ofs
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold loadsymbol; intros. destruct (Archi.pic_code tt).
+ - predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero.
+ + subst ofs. econstructor; split.
+ apply exec_straight_one. simpl; eauto.
+ split. Simpl. intros; Simpl.
+ + exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence.
+ intros (rs1 & A & B & C).
+ econstructor; split.
+ econstructor. simpl; eauto. auto. eexact A.
+ split. simpl in B; rewrite B. Simpl.
+ rewrite <- Genv.shift_symbol_address_64 by auto.
+ rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto.
+ intros. rewrite C by auto. Simpl.
+ - econstructor; split.
+ eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
+ split. Simpl.
+ intros; Simpl.
+Qed.
+
+(** Shifted operands *)
+
+Remark transl_shift_not_none:
+ forall s a, transl_shift s a <> SOnone.
+Proof.
+ destruct s; intros; simpl; congruence.
+Qed.
+
+Remark or_zero_eval_shift_op_int:
+ forall v s, s <> SOnone -> Val.or (Vint Int.zero) (eval_shift_op_int v s) = eval_shift_op_int v s.
+Proof.
+ intros; destruct s; try congruence; destruct v; auto; simpl;
+ destruct (Int.ltu n Int.iwordsize); auto; rewrite Int.or_zero_l; auto.
+Qed.
+
+Remark or_zero_eval_shift_op_long:
+ forall v s, s <> SOnone -> Val.orl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
+Proof.
+ intros; destruct s; try congruence; destruct v; auto; simpl;
+ destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.or_zero_l; auto.
+Qed.
+
+Remark add_zero_eval_shift_op_long:
+ forall v s, s <> SOnone -> Val.addl (Vlong Int64.zero) (eval_shift_op_long v s) = eval_shift_op_long v s.
+Proof.
+ intros; destruct s; try congruence; destruct v; auto; simpl;
+ destruct (Int.ltu n Int64.iwordsize'); auto; rewrite Int64.add_zero_l; auto.
+Qed.
+
+Lemma transl_eval_shift: forall s v (a: amount32),
+ eval_shift_op_int v (transl_shift s a) = eval_shift s v a.
+Proof.
+ intros. destruct s; simpl; auto.
+Qed.
+
+Lemma transl_eval_shift': forall s v (a: amount32),
+ Val.or (Vint Int.zero) (eval_shift_op_int v (transl_shift s a)) = eval_shift s v a.
+Proof.
+ intros. rewrite or_zero_eval_shift_op_int by (apply transl_shift_not_none).
+ apply transl_eval_shift.
+Qed.
+
+Lemma transl_eval_shiftl: forall s v (a: amount64),
+ eval_shift_op_long v (transl_shift s a) = eval_shiftl s v a.
+Proof.
+ intros. destruct s; simpl; auto.
+Qed.
+
+Lemma transl_eval_shiftl': forall s v (a: amount64),
+ Val.orl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
+Proof.
+ intros. rewrite or_zero_eval_shift_op_long by (apply transl_shift_not_none).
+ apply transl_eval_shiftl.
+Qed.
+
+Lemma transl_eval_shiftl'': forall s v (a: amount64),
+ Val.addl (Vlong Int64.zero) (eval_shift_op_long v (transl_shift s a)) = eval_shiftl s v a.
+Proof.
+ intros. rewrite add_zero_eval_shift_op_long by (apply transl_shift_not_none).
+ apply transl_eval_shiftl.
+Qed.
+
+(** Zero- and Sign- extensions *)
+
+Lemma exec_move_extended_base: forall rd r1 ex k rs m,
+ exists rs',
+ exec_straight ge lk (move_extended_base rd r1 ex k) rs m k rs' m
+ /\ rs' rd = match ex with Xsgn32 => Val.longofint rs#r1 | Xuns32 => Val.longofintu rs#r1 end
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold move_extended_base; destruct ex; econstructor;
+ (split; [apply exec_straight_one; simpl; eauto | split; [Simpl|intros;Simpl]]).
+Qed.
+
+Lemma exec_move_extended: forall rd r1 ex (a: amount64) k rs m,
+ exists rs',
+ exec_straight ge lk (move_extended rd r1 ex a k) rs m k rs' m
+ /\ rs' rd = Op.eval_extend ex rs#r1 a
+ /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold move_extended; intros. predSpec Int.eq Int.eq_spec a Int.zero.
+ - exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. unfold Op.eval_extend. rewrite H. rewrite B.
+ destruct ex, (rs r1); simpl; auto; rewrite Int64.shl'_zero; auto.
+ auto.
+ - Local Opaque Val.addl.
+ exploit (exec_move_extended_base rd r1 ex). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one.
+ unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+ change (SOlsl a) with (transl_shift Slsl a). rewrite transl_eval_shiftl''. eauto. auto.
+ split. Simpl. rewrite B. auto.
+ intros; Simpl.
+Qed.
+
+Lemma exec_arith_extended:
+ forall (sem: val -> val -> val)
+ (insnX: extend_op -> arith_ppp)
+ (insnS: shift_op -> arith_rr0r),
+ (forall rd r1 r2 x rs m,
+ exec_basic lk ge (PArith (PArithPPP (insnX x) rd r1 r2)) rs m =
+ Next (rs#rd <- (sem rs#r1 (eval_extend rs#r2 x))) m) ->
+ (forall rd r1 r2 s rs m,
+ exec_basic lk ge (PArith (PArithRR0R (insnS s) rd r1 r2)) rs m =
+ Next (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s))) m) ->
+ forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: bcode) rs m,
+ r1 <> X16 ->
+ exists rs',
+ exec_straight ge lk (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m
+ /\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a)
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)).
+ - econstructor; split.
+ apply exec_straight_one. rewrite EX; eauto. auto.
+ split. Simpl. f_equal. destruct ex; auto.
+ intros; Simpl.
+ - exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one.
+ rewrite ES. eauto. auto.
+ split. Simpl. unfold ir0. rewrite C by eauto with asmgen. f_equal.
+ rewrite B. destruct ex; auto.
+ intros; Simpl.
+Qed.
+
+(** Extended right shift *)
+
+Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
+ Val.shrx rs#r1 (Vint n) = Some v ->
+ r1 <> X16 ->
+ exists rs',
+ exec_straight ge lk (shrx32 rd r1 n k) rs m k rs' m
+ /\ rs'#rd = v
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold shrx32; intros. apply Val.shrx_shr_2 in H.
+ destruct (Int.eq n Int.zero) eqn:E0.
+ - econstructor; split. apply exec_straight_one; simpl; eauto.
+ split. Simpl. subst v; auto. intros; Simpl.
+ - econstructor; split. eapply exec_straight_three.
+ unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+ rewrite or_zero_eval_shift_op_int by congruence. eauto.
+ simpl; eauto.
+ unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+ rewrite or_zero_eval_shift_op_int by congruence. eauto.
+ split. subst v; Simpl. intros; Simpl.
+Qed.
+
+Lemma exec_shrx32_none: forall (rd r1: ireg) (n: int) k x (rs: regset) m,
+ Val.shrx rs#r1 (Vint n) = None ->
+ r1 <> X16 ->
+ exists rs',
+ exec_straight ge lk (shrx32 rd r1 n k) rs m k rs' m
+ /\ Val.lessdef (Val.maketotal None) (rs' x)
+ /\ forall r, data_preg r = true -> r <> rd -> preg_notin r (destroyed_by_op (Oshrximm n)) -> rs'#r = rs#r.
+Proof.
+ unfold shrx32; intros.
+ destruct (Int.eq n Int.zero) eqn:E0.
+ - econstructor; split. apply exec_straight_one; simpl; eauto.
+ split. Simpl. auto. intros; Simpl.
+ - econstructor; split. eapply exec_straight_three.
+ unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+ rewrite or_zero_eval_shift_op_int by congruence. eauto.
+ simpl; eauto.
+ unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+ rewrite or_zero_eval_shift_op_int by congruence. eauto.
+ split. Simpl. intros; Simpl.
+Qed.
+
+Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
+ Val.shrxl rs#r1 (Vint n) = Some v ->
+ r1 <> X16 ->
+ exists rs',
+ exec_straight ge lk (shrx64 rd r1 n k) rs m k rs' m
+ /\ rs'#rd = v
+ /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+Proof.
+ unfold shrx64; intros. apply Val.shrxl_shrl_2 in H.
+ destruct (Int.eq n Int.zero) eqn:E.
+ - econstructor; split. apply exec_straight_one; simpl; eauto.
+ split. Simpl. subst v; auto. intros; Simpl.
+ - econstructor; split. eapply exec_straight_three.
+ unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+ rewrite or_zero_eval_shift_op_long by congruence. eauto.
+ simpl; eauto.
+ unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+ rewrite or_zero_eval_shift_op_long by congruence. eauto.
+ split. subst v; Simpl. intros; Simpl.
+Qed.
+
+Lemma exec_shrx64_none: forall (rd r1: ireg) (n: int) k x (rs: regset) m,
+ Val.shrxl rs#r1 (Vint n) = None ->
+ r1 <> X16 ->
+ exists rs',
+ exec_straight ge lk (shrx64 rd r1 n k) rs m k rs' m
+ /\ Val.lessdef (Val.maketotal None) (rs' x)
+ /\ forall r, data_preg r = true -> r <> rd -> preg_notin r (destroyed_by_op (Oshrximm n)) -> rs'#r = rs#r.
+Proof.
+ unfold shrx64; intros.
+ destruct (Int.eq n Int.zero) eqn:E.
+ - econstructor; split. apply exec_straight_one; simpl; eauto.
+ split. Simpl. auto. intros; Simpl.
+ - econstructor; split. eapply exec_straight_three.
+ unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+ rewrite or_zero_eval_shift_op_long by congruence. eauto.
+ simpl; eauto.
+ unfold exec_basic, exec_arith_instr, arith_eval_rr0r.
+ rewrite or_zero_eval_shift_op_long by congruence. eauto.
+ split. Simpl. intros; Simpl.
+Qed.
+
+Ltac TranslOpBase :=
+ econstructor; split;
+ [ apply exec_straight_one; [simpl; eauto ]
+ | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl
+ | intros; Simpl; fail ] ].
+
+(** Condition bits *)
+
+Lemma compare_int_spec: forall rs v1 v2,
+ let rs' := compare_int rs v1 v2 in
+ rs'#CN = (Val.negative (Val.sub v1 v2))
+ /\ rs'#CZ = (Val.mxcmpu Ceq v1 v2)
+ /\ rs'#CC = (Val.mxcmpu Cge v1 v2)
+ /\ rs'#CV = (Val.sub_overflow v1 v2).
+Proof.
+ intros; unfold rs'; auto.
+Qed.
+
+Lemma eval_testcond_compare_sint: forall c v1 v2 b rs,
+ Val.cmp_bool c v1 v2 = Some b ->
+ eval_testcond (cond_for_signed_cmp c) (compare_int rs v1 v2) = Some b.
+Proof.
+ intros. generalize (compare_int_spec rs v1 v2).
+ set (rs' := compare_int rs v1 v2). intros (B & C & D & E).
+ unfold eval_testcond; rewrite B, C, D, E.
+ destruct v1; try discriminate; destruct v2; try discriminate.
+ simpl in H; inv H.
+ unfold Val.mxcmpu; simpl. destruct c; simpl.
+ - destruct (Int.eq i i0); auto.
+ - destruct (Int.eq i i0); auto.
+ - rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
+ - rewrite Int.lt_sub_overflow, Int.not_lt.
+ destruct (Int.eq i i0), (Int.lt i i0); auto.
+ - rewrite Int.lt_sub_overflow, (Int.lt_not i).
+ destruct (Int.eq i i0), (Int.lt i i0); auto.
+ - rewrite Int.lt_sub_overflow. destruct (Int.lt i i0); auto.
+Qed.
+
+Lemma eval_testcond_compare_uint: forall c v1 v2 b rs,
+ Val.mxcmpu_bool c v1 v2 = Some b ->
+ eval_testcond (cond_for_unsigned_cmp c) (compare_int rs v1 v2) = Some b.
+Proof.
+ intros. generalize (compare_int_spec rs v1 v2).
+ set (rs' := compare_int rs v1 v2). intros (B & C & D & E).
+ unfold eval_testcond; rewrite B, C, D, E.
+ destruct v1; try discriminate; destruct v2; try discriminate.
+ simpl in H; inv H.
+ unfold Val.mxcmpu; simpl. destruct c; simpl.
+ - destruct (Int.eq i i0); auto.
+ - destruct (Int.eq i i0); auto.
+ - destruct (Int.ltu i i0); auto.
+ - rewrite (Int.not_ltu i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
+ - rewrite (Int.ltu_not i). destruct (Int.eq i i0), (Int.ltu i i0); auto.
+ - destruct (Int.ltu i i0); auto.
+Qed.
+
+Lemma compare_long_spec: forall rs v1 v2,
+ let rs' := compare_long rs v1 v2 in
+ rs'#CN = (Val.negativel (Val.subl v1 v2))
+ /\ rs'#CZ = (Val.mxcmplu Ceq v1 v2)
+ /\ rs'#CC = (Val.mxcmplu Cge v1 v2)
+ /\ rs'#CV = (Val.subl_overflow v1 v2).
+Proof.
+ intros; unfold rs'; auto.
+Qed.
+
+Remark int64_sub_overflow:
+ forall x y,
+ Int.xor (Int.repr (Int64.unsigned (Int64.sub_overflow x y Int64.zero)))
+ (Int.repr (Int64.unsigned (Int64.negative (Int64.sub x y)))) =
+ (if Int64.lt x y then Int.one else Int.zero).
+Proof.
+ intros.
+ transitivity (Int.repr (Int64.unsigned (if Int64.lt x y then Int64.one else Int64.zero))).
+ rewrite <- (Int64.lt_sub_overflow x y).
+ unfold Int64.sub_overflow, Int64.negative.
+ set (s := Int64.signed x - Int64.signed y - Int64.signed Int64.zero).
+ destruct (zle Int64.min_signed s && zle s Int64.max_signed);
+ destruct (Int64.lt (Int64.sub x y) Int64.zero);
+ auto.
+ destruct (Int64.lt x y); auto.
+Qed.
+
+Lemma eval_testcond_compare_slong: forall c v1 v2 b rs,
+ Val.cmpl_bool c v1 v2 = Some b ->
+ eval_testcond (cond_for_signed_cmp c) (compare_long rs v1 v2) = Some b.
+Proof.
+ intros. generalize (compare_long_spec rs v1 v2).
+ set (rs' := compare_long rs v1 v2). intros (B & C & D & E).
+ unfold eval_testcond; rewrite B, C, D, E.
+ destruct v1; try discriminate; destruct v2; try discriminate.
+ simpl in H; inv H.
+ unfold Val.mxcmplu; simpl. destruct c; simpl.
+ - destruct (Int64.eq i i0); auto.
+ - destruct (Int64.eq i i0); auto.
+ - rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
+ - rewrite int64_sub_overflow, Int64.not_lt.
+ destruct (Int64.eq i i0), (Int64.lt i i0); auto.
+ - rewrite int64_sub_overflow, (Int64.lt_not i).
+ destruct (Int64.eq i i0), (Int64.lt i i0); auto.
+ - rewrite int64_sub_overflow. destruct (Int64.lt i i0); auto.
+Qed.
+
+Lemma eval_testcond_compare_ulong: forall c v1 v2 b rs,
+ Val.mxcmplu_bool c v1 v2 = Some b ->
+ eval_testcond (cond_for_unsigned_cmp c) (compare_long rs v1 v2) = Some b.
+Proof.
+ intros. generalize (compare_long_spec rs v1 v2).
+ set (rs' := compare_long rs v1 v2). intros (B & C & D & E).
+ unfold eval_testcond; rewrite B, C, D, E; unfold Val.mxcmplu.
+ destruct v1; try discriminate; destruct v2; try discriminate; simpl in H.
+ - (* int-int *)
+ inv H. destruct c; simpl.
+ + destruct (Int64.eq i i0); auto.
+ + destruct (Int64.eq i i0); auto.
+ + destruct (Int64.ltu i i0); auto.
+ + rewrite (Int64.not_ltu i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
+ + rewrite (Int64.ltu_not i). destruct (Int64.eq i i0), (Int64.ltu i i0); auto.
+ + destruct (Int64.ltu i i0); auto.
+ - (* int-ptr *)
+ simpl.
+ destruct (Archi.ptr64); simpl; try discriminate.
+ destruct (Int64.eq i Int64.zero); simpl; try discriminate.
+ destruct c; simpl in H; inv H; reflexivity.
+ - (* ptr-int *)
+ simpl.
+ destruct (Archi.ptr64); simpl; try discriminate.
+ destruct (Int64.eq i0 Int64.zero); try discriminate.
+ destruct c; simpl in H; inv H; reflexivity.
+ - (* ptr-ptr *)
+ simpl.
+ destruct (eq_block b0 b1).
+ destruct (Archi.ptr64); simpl; try discriminate.
+ inv H.
+ destruct c; simpl.
+ * destruct (Ptrofs.eq i i0); auto.
+ * destruct (Ptrofs.eq i i0); auto.
+ * destruct (Ptrofs.ltu i i0); auto.
+ * rewrite (Ptrofs.not_ltu i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
+ * rewrite (Ptrofs.ltu_not i). destruct (Ptrofs.eq i i0), (Ptrofs.ltu i i0); auto.
+ * destruct (Ptrofs.ltu i i0); auto.
+ * destruct c; simpl in H; inv H; reflexivity.
+Qed.
+
+Lemma compare_float_spec: forall rs f1 f2,
+ let rs' := compare_float rs (Vfloat f1) (Vfloat f2) in
+ rs'#CN = (Val.of_bool (Float.cmp Clt f1 f2))
+ /\ rs'#CZ = (Val.of_bool (Float.cmp Ceq f1 f2))
+ /\ rs'#CC = (Val.of_bool (negb (Float.cmp Clt f1 f2)))
+ /\ rs'#CV = (Val.of_bool (negb (Float.ordered f1 f2))).
+Proof.
+ intros; auto.
+Qed.
+
+Lemma eval_testcond_compare_float: forall c v1 v2 b rs,
+ Val.cmpf_bool c v1 v2 = Some b ->
+ eval_testcond (cond_for_float_cmp c) (compare_float rs v1 v2) = Some b.
+Proof.
+ intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+ generalize (compare_float_spec rs f f0).
+ set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
+ intros (B & C & D & E).
+ unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float.cmp Float.ordered.
+ unfold Float.cmp, Float.ordered;
+ destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma eval_testcond_compare_not_float: forall c v1 v2 b rs,
+ option_map negb (Val.cmpf_bool c v1 v2) = Some b ->
+ eval_testcond (cond_for_float_not_cmp c) (compare_float rs v1 v2) = Some b.
+Proof.
+ intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+ generalize (compare_float_spec rs f f0).
+ set (rs' := compare_float rs (Vfloat f) (Vfloat f0)).
+ intros (B & C & D & E).
+ unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float.cmp Float.ordered.
+ unfold Float.cmp, Float.ordered;
+ destruct c; destruct (Float.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma compare_single_spec: forall rs f1 f2,
+ let rs' := compare_single rs (Vsingle f1) (Vsingle f2) in
+ rs'#CN = (Val.of_bool (Float32.cmp Clt f1 f2))
+ /\ rs'#CZ = (Val.of_bool (Float32.cmp Ceq f1 f2))
+ /\ rs'#CC = (Val.of_bool (negb (Float32.cmp Clt f1 f2)))
+ /\ rs'#CV = (Val.of_bool (negb (Float32.ordered f1 f2))).
+Proof.
+ intros; auto.
+Qed.
+
+Lemma eval_testcond_compare_single: forall c v1 v2 b rs,
+ Val.cmpfs_bool c v1 v2 = Some b ->
+ eval_testcond (cond_for_float_cmp c) (compare_single rs v1 v2) = Some b.
+Proof.
+ intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+ generalize (compare_single_spec rs f f0).
+ set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
+ intros (B & C & D & E).
+ unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float32.cmp Float32.ordered.
+ unfold Float32.cmp, Float32.ordered;
+ destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Lemma eval_testcond_compare_not_single: forall c v1 v2 b rs,
+ option_map negb (Val.cmpfs_bool c v1 v2) = Some b ->
+ eval_testcond (cond_for_float_not_cmp c) (compare_single rs v1 v2) = Some b.
+Proof.
+ intros. destruct v1; try discriminate; destruct v2; simpl in H; inv H.
+ generalize (compare_single_spec rs f f0).
+ set (rs' := compare_single rs (Vsingle f) (Vsingle f0)).
+ intros (B & C & D & E).
+ unfold eval_testcond; rewrite B, C, D, E.
+Local Transparent Float32.cmp Float32.ordered.
+ unfold Float32.cmp, Float32.ordered;
+ destruct c; destruct (Float32.compare f f0) as [[]|]; reflexivity.
+Qed.
+
+Remark compare_float_inv: forall rs v1 v2 r,
+ match r with CR _ => False | _ => True end ->
+ (compare_float rs v1 v2)#r = rs#r.
+Proof.
+ intros; unfold compare_float.
+ destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
+Qed.
+
+Remark compare_single_inv: forall rs v1 v2 r,
+ match r with CR _ => False | _ => True end ->
+ (compare_single rs v1 v2)#r = rs#r.
+Proof.
+ intros; unfold compare_single.
+ destruct r; try contradiction; destruct v1; auto; destruct v2; auto.
+Qed.
+
+Lemma transl_cond_correct:
+ forall cond args k c rs m,
+ transl_cond cond args k = OK c ->
+ exists rs',
+ exec_straight ge lk c rs m k rs' m
+ /\ (forall b,
+ eval_condition cond (map rs (map preg_of args)) m = Some b ->
+ eval_testcond (cond_for_cond cond) rs' = Some b)
+ /\ forall r, data_preg r = true -> rs'#r = rs#r.
+Proof.
+ intros until m; intros TR. destruct cond; simpl in TR; ArgsInv.
+ - (* Ccomp *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_sint; auto.
+ destruct r; reflexivity || discriminate.
+ - (* Ccompu *)
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ split; intros. apply eval_testcond_compare_uint; auto.
+ destruct r; reflexivity || discriminate.
+ - (* Ccompimm *)
+ destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
+ + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto.
+ destruct r; reflexivity || discriminate.
+ + econstructor; split.
+ apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
+ split; intros. apply eval_testcond_compare_sint; auto.
+ destruct r; reflexivity || discriminate.
+ + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one.
+ simpl. rewrite B, C by eauto with asmgen. eauto.
+ split; intros. apply eval_testcond_compare_sint; auto.
+ transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ - (* Ccompuimm *)
+ destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
+ + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto.
+ destruct r; reflexivity || discriminate.
+ + econstructor; split.
+ apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
+ split; intros. apply eval_testcond_compare_uint; auto.
+ destruct r; reflexivity || discriminate.
+ + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one.
+ simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+ split; intros. apply eval_testcond_compare_uint; auto.
+ transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ - (* Ccompshift *)
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto.
+ destruct r; reflexivity || discriminate.
+ - (* Ccompushift *)
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto.
+ destruct r; reflexivity || discriminate.
+ - (* Cmaskzero *)
+ destruct (is_logical_imm32 n).
+ + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto.
+ destruct r; reflexivity || discriminate.
+ + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A.
+ apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+ split; intros. apply (eval_testcond_compare_sint Ceq); auto.
+ transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ - (* Cmasknotzero *)
+ destruct (is_logical_imm32 n).
+ + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto.
+ destruct r; reflexivity || discriminate.
+ + exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A.
+ apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+ split; intros. apply (eval_testcond_compare_sint Cne); auto.
+ transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ - (* Ccompl *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_slong; auto.
+ destruct r; reflexivity || discriminate.
+ - (* Ccomplu *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_ulong; auto.
+ erewrite Val.mxcmplu_bool_correct; eauto.
+ destruct r; reflexivity || discriminate.
+ - (* Ccomplimm *)
+ destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
+ + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto.
+ destruct r; reflexivity || discriminate.
+ + econstructor; split.
+ apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto.
+ split; intros. apply eval_testcond_compare_slong; auto.
+ destruct r; reflexivity || discriminate.
+ + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one.
+ simpl. rewrite B, C by eauto with asmgen. eauto. auto.
+ split; intros. apply eval_testcond_compare_slong; auto.
+ transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ - (* Ccompluimm *)
+ destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
+ + econstructor; split. apply exec_straight_one. simpl; eauto. auto.
+ split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto.
+ erewrite Val.mxcmplu_bool_correct; eauto.
+ destruct r; reflexivity || discriminate.
+ + econstructor; split.
+ apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto.
+ split; intros. apply eval_testcond_compare_ulong; auto.
+ erewrite Val.mxcmplu_bool_correct; eauto.
+ destruct r; reflexivity || discriminate.
+ + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one.
+ simpl. rewrite B, C by eauto with asmgen. eauto.
+ split; intros. apply eval_testcond_compare_ulong; auto.
+ erewrite Val.mxcmplu_bool_correct; eauto.
+ transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ - (* Ccomplshift *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto.
+ destruct r; reflexivity || discriminate.
+ - (* Ccomplushift *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto.
+ erewrite Val.mxcmplu_bool_correct; eauto.
+ destruct r; reflexivity || discriminate.
+ - (* Cmasklzero *)
+ destruct (is_logical_imm64 n).
+ + econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto.
+ destruct r; reflexivity || discriminate.
+ + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A.
+ apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto.
+ split; intros. apply (eval_testcond_compare_slong Ceq); auto.
+ transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ - (* Cmasknotzero *)
+ destruct (is_logical_imm64 n).
+ + econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto.
+ destruct r; reflexivity || discriminate.
+ + exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A.
+ apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto.
+ split; intros. apply (eval_testcond_compare_slong Cne); auto.
+ transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ - (* Ccompf *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_float; auto.
+ destruct r; discriminate || rewrite compare_float_inv; auto.
+ - (* Cnotcompf *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_not_float; auto.
+ destruct r; discriminate || rewrite compare_float_inv; auto.
+ - (* Ccompfzero *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_float; auto.
+ destruct r; discriminate || rewrite compare_float_inv; auto.
+ - (* Cnotcompfzero *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_not_float; auto.
+ destruct r; discriminate || rewrite compare_float_inv; auto.
+ - (* Ccompfs *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_single; auto.
+ destruct r; discriminate || rewrite compare_single_inv; auto.
+ - (* Cnotcompfs *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_not_single; auto.
+ destruct r; discriminate || rewrite compare_single_inv; auto.
+ - (* Ccompfszero *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_single; auto.
+ destruct r; discriminate || rewrite compare_single_inv; auto.
+ - (* Cnotcompfszero *)
+ econstructor; split. apply exec_straight_one. simpl; eauto.
+ split; intros. apply eval_testcond_compare_not_single; auto.
+ destruct r; discriminate || rewrite compare_single_inv; auto.
+Qed.
+
+Lemma transl_cond_correct':
+ forall cond args k c tbb rs m,
+ transl_cond cond args k = OK c ->
+ exists rs',
+ exec_straight ge lk c rs m k rs' m
+ /\ (forall b,
+ eval_condition cond (map rs (map preg_of args)) m = Some b ->
+ eval_testcond (cond_for_cond cond) (incrPC (Ptrofs.repr (size tbb)) rs') = Some b)
+ /\ forall r, data_preg r = true -> rs'#r = rs#r.
+Proof.
+ intros until m; intros TR.
+ eapply transl_cond_correct; eauto.
+Qed.
+
+Lemma transl_cbranch_correct_1:
+ forall cond args lbl c k b m rs tbb bdy,
+ transl_cond_branch cond args lbl k = OK (bdy,c) ->
+ eval_condition cond (map rs (map preg_of args)) m = Some b ->
+ exists rs',
+ exec_straight_opt ge lk bdy rs m k rs' m
+ /\ exec_cfi ge fn c (incrPC (Ptrofs.repr (size tbb)) rs') m =
+ (if b then goto_label fn lbl (incrPC (Ptrofs.repr (size tbb)) rs') m else
+ Next (incrPC (Ptrofs.repr (size tbb)) rs') m)
+ /\ forall r, data_preg r = true -> rs'#r = rs#r.
+Proof.
+intros until bdy; intros TR EV.
+ assert (Archi.ptr64 = true) as SF; auto.
+ assert (DFL:
+ transl_cond_branch_default cond args lbl k = OK (bdy,c) ->
+ exists rs',
+ exec_straight_opt ge lk bdy rs m k rs' m
+ /\ exec_cfi ge fn c (incrPC (Ptrofs.repr (size tbb)) rs') m = eval_branch fn lbl (incrPC (Ptrofs.repr (size tbb)) rs') m (Some b)
+ /\ forall r, data_preg r = true -> rs'#r = rs#r).
+ {
+ unfold transl_cond_branch_default; intros. monadInv H.
+ exploit transl_cond_correct'; eauto. intros (rs' & A & B & C).
+ eexists; split.
+ apply exec_straight_opt_intro. eexact A.
+ split; auto. simpl. rewrite (B b) by auto. auto.
+ }
+Local Opaque transl_cond transl_cond_branch_default.
+ destruct args as [ | a1 args]; simpl in TR; auto.
+ destruct args as [ | a2 args]; simpl in TR; auto.
+ destruct cond; simpl in TR; auto.
+ - (* Ccompimm *)
+ destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto;
+ apply Int.same_if_eq in N0; subst n; ArgsInv.
+ + (* Ccompimm Cne 0 *)
+ econstructor; split. econstructor.
+ split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV.
+ unfold incrPC. Simpl. rewrite EQRSX. simpl. auto.
+ + (* Ccompimm Ceq 0 *)
+ econstructor; split. econstructor.
+ split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. simpl.
+ unfold incrPC. Simpl. rewrite EQRSX. simpl.
+ destruct (Int.eq i Int.zero); auto.
+ - (* Ccompuimm *)
+ destruct c0; auto; destruct (Int.eq n Int.zero) eqn:N0; auto.
+ apply Int.same_if_eq in N0; subst n; ArgsInv.
+ + (* Ccompuimm Cne 0 *)
+ econstructor; split. econstructor.
+ split; auto. simpl. apply Val.mxcmpu_bool_correct in EV.
+ unfold incrPC. Simpl. rewrite EV. auto.
+ + (* Ccompuimm Ceq 0 *)
+ monadInv TR. ArgsInv. simpl in *.
+ econstructor; split. econstructor.
+ split; auto. simpl. unfold incrPC. Simpl.
+ apply Int.same_if_eq in N0; subst.
+ rewrite (Val.negate_cmpu_bool (Mem.valid_pointer m) Cne), EV.
+ destruct b; auto.
+ - (* Cmaskzero *)
+ destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
+ econstructor; split. econstructor.
+ split; auto. simpl.
+ erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
+ unfold incrPC. Simpl.
+ rewrite (Val.negate_cmp_bool Ceq), EV. destruct b; auto.
+ - (* Cmasknotzero *)
+ destruct (Int.is_power2 n) as [bit|] eqn:P2; auto. ArgsInv.
+ econstructor; split. econstructor.
+ split; auto. simpl.
+ erewrite <- Int.mul_pow2, Int.mul_commut, Int.mul_one by eauto.
+ unfold incrPC. Simpl.
+ rewrite EV. auto.
+ - (* Ccomplimm *)
+ destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto;
+ apply Int64.same_if_eq in N0; subst n; ArgsInv.
+ + (* Ccomplimm Cne 0 *)
+ econstructor; split. econstructor.
+ split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV.
+ unfold incrPC. Simpl. rewrite EQRSX. simpl. auto.
+ + (* Ccomplimm Ceq 0 *)
+ econstructor; split. econstructor.
+ split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. simpl.
+ unfold incrPC. Simpl. rewrite EQRSX. simpl.
+ destruct (Int64.eq i Int64.zero); auto.
+ - (* Ccompluimm *)
+ destruct c0; auto; destruct (Int64.eq n Int64.zero) eqn:N0; auto;
+ apply Int64.same_if_eq in N0; subst n; ArgsInv.
+ + (* Ccompluimm Cne 0 *)
+ econstructor; split. econstructor.
+ split; auto. simpl. apply Val.mxcmplu_bool_correct in EV.
+ unfold incrPC. Simpl. rewrite EV. auto.
+ + (* Ccompluimm Ceq 0 *)
+ econstructor; split. econstructor.
+ split; auto. simpl. destruct (rs x) eqn:EQRSX; simpl in EV; inv EV. simpl.
+ unfold incrPC. Simpl. rewrite EQRSX. simpl.
+ destruct (Int64.eq i Int64.zero); auto.
+ unfold incrPC. Simpl. rewrite EQRSX. simpl.
+ rewrite SF in *; simpl in *.
+ rewrite Int64.eq_true in *.
+ destruct ((Mem.valid_pointer m b0 (Ptrofs.unsigned i) || Mem.valid_pointer m b0 (Ptrofs.unsigned i - 1))); simpl in *.
+ assert (b = true). { destruct b; try congruence. }
+ rewrite H; auto. discriminate.
+ - (* Cmasklzero *)
+ destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
+ econstructor; split. econstructor.
+ split; auto.
+ erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
+ unfold incrPC. Simpl.
+ rewrite (Val.negate_cmpl_bool Ceq), EV. destruct b; auto.
+ - (* Cmasklnotzero *)
+ destruct (Int64.is_power2' n) as [bit|] eqn:P2; auto. ArgsInv.
+ econstructor; split. econstructor.
+ split; auto.
+ erewrite <- Int64.mul_pow2', Int64.mul_commut, Int64.mul_one by eauto.
+ unfold incrPC. Simpl.
+ rewrite EV. auto.
+Qed.
+
+Lemma transl_op_correct:
+ forall op args res k (rs: regset) m v c,
+ transl_op op args res k = OK c ->
+ eval_operation ge (rs#SP) op (map rs (map preg_of args)) m = Some v ->
+ exists rs',
+ exec_straight ge lk c rs m k rs' m
+ /\ Val.lessdef v rs'#(preg_of res)
+ /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
+Proof.
+ (* assert (SAME: forall v1 v2, v1 = v2 -> Val.lessdef v2 v1). { intros; subst; auto. } *)
+Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize.
+ intros until c; intros TR EV.
+ unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl;
+ try (rewrite <- transl_eval_shift; TranslOpSimpl).
+ - (* move *)
+ destruct (preg_of res), (preg_of m0); try destruct d; try destruct d0; inv TR; TranslOpSimpl.
+ - (* intconst *)
+ exploit exec_loadimm32. intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen.
+ - (* longconst *)
+ exploit exec_loadimm64. intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen.
+ - (* floatconst *)
+ destruct (Float.eq_dec n Float.zero).
+ + subst n. TranslOpSimpl.
+ + TranslOpSimplN.
+ - (* singleconst *)
+ destruct (Float32.eq_dec n Float32.zero).
+ + subst n. TranslOpSimpl.
+ + TranslOpSimplN.
+ - (* loadsymbol *)
+ exploit (exec_loadsymbol x id ofs). eauto with asmgen. intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ - (* addrstack *)
+ exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)); try discriminate. simpl; eauto with asmgen.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. replace (DR XSP) with (SP) in B by auto. rewrite B.
+ Local Transparent Val.addl.
+ destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
+ auto.
+ - (* shift *)
+ rewrite <- transl_eval_shift'. TranslOpSimpl.
+ - (* addimm *)
+ exploit (exec_addimm32 x x0 n). eauto with asmgen. intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ - (* mul *)
+ TranslOpBase.
+ Local Transparent Val.add.
+ destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto.
+ - (* andimm *)
+ exploit (exec_logicalimm32 (Pandimm W) (Pand W)).
+ intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ - (* orimm *)
+ exploit (exec_logicalimm32 (Porrimm W) (Porr W)).
+ intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ - (* xorimm *)
+ exploit (exec_logicalimm32 (Peorimm W) (Peor W)).
+ intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ - (* not *)
+ TranslOpBase.
+ destruct (rs x0); auto. simpl. rewrite Int.or_zero_l; auto.
+ - (* notshift *)
+ TranslOpBase.
+ destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto.
+ - (* shrx *)
+ assert (Val.maketotal (Val.shrx (rs x0) (Vint n)) = Val.maketotal (Val.shrx (rs x0) (Vint n))) by eauto.
+ destruct (Val.shrx) eqn:E.
+ + exploit (exec_shrx32 x x0 n); eauto with asmgen. intros (rs' & A & B & C).
+ econstructor; split. eexact A. split. rewrite B; auto. auto.
+ + exploit (exec_shrx32_none x x0 n); eauto with asmgen.
+ - (* zero-ext *)
+ TranslOpBase.
+ destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
+ - (* sign-ext *)
+ TranslOpBase.
+ destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
+ - (* shlzext *)
+ TranslOpBase.
+ destruct (rs x0); simpl; auto. rewrite <- Int.shl_zero_ext_min; auto using a32_range.
+ - (* shlsext *)
+ TranslOpBase.
+ destruct (rs x0); simpl; auto. rewrite <- Int.shl_sign_ext_min; auto using a32_range.
+ - (* zextshr *)
+ TranslOpBase.
+ destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.zero_ext_shru_min; auto using a32_range.
+ - (* sextshr *)
+ TranslOpBase.
+ destruct (rs x0); simpl; auto. rewrite ! a32_range; simpl. rewrite <- Int.sign_ext_shr_min; auto using a32_range.
+ - (* shiftl *)
+ rewrite <- transl_eval_shiftl'. TranslOpSimpl.
+ - (* extend *)
+ exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C).
+ econstructor; split. eexact A.
+ split. rewrite B; auto. eauto with asmgen.
+ - (* addlshift *)
+ TranslOpBase.
+ - (* addext *)
+ exploit (exec_arith_extended Val.addl Paddext (Padd X)).
+ auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C).
+ econstructor; split. eexact A. split. rewrite B; auto. auto.
+ - (* addlimm *)
+ exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto.
+ - (* neglshift *)
+ TranslOpBase.
+ - (* sublshift *)
+ TranslOpBase.
+ - (* subext *)
+ exploit (exec_arith_extended Val.subl Psubext (Psub X)).
+ auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C).
+ econstructor; split. eexact A. split. rewrite B; auto. auto.
+ - (* mull *)
+ TranslOpBase.
+ destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto.
+ - (* andlshift *)
+ TranslOpBase.
+ - (* andlimm *)
+ exploit (exec_logicalimm64 (Pandimm X) (Pand X)).
+ intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ - (* orlshift *)
+ TranslOpBase.
+ - (* orlimm *)
+ exploit (exec_logicalimm64 (Porrimm X) (Porr X)).
+ intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ - (* orlshift *)
+ TranslOpBase.
+ - (* xorlimm *)
+ exploit (exec_logicalimm64 (Peorimm X) (Peor X)).
+ intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ - (* notl *)
+ TranslOpBase.
+ destruct (rs x0); auto. simpl. rewrite Int64.or_zero_l; auto.
+ - (* notlshift *)
+ TranslOpBase.
+ destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
+ - (* biclshift *)
+ TranslOpBase.
+ - (* ornlshift *)
+ TranslOpBase.
+ - (* eqvlshift *)
+ TranslOpBase.
+ - (* shrx *)
+ assert (Val.maketotal (Val.shrxl (rs x0) (Vint n)) = Val.maketotal (Val.shrxl (rs x0) (Vint n))) by eauto.
+ destruct (Val.shrxl) eqn:E.
+ + exploit (exec_shrx64 x x0 n); eauto with asmgen. intros (rs' & A & B & C).
+ econstructor; split. eexact A. split. rewrite B; auto. auto.
+ + exploit (exec_shrx64_none x x0 n); eauto with asmgen.
+ - (* zero-ext-l *)
+ TranslOpBase.
+ destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
+ - (* sign-ext-l *)
+ TranslOpBase.
+ destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
+ - (* shllzext *)
+ TranslOpBase.
+ destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_zero_ext_min; auto using a64_range.
+ - (* shllsext *)
+ TranslOpBase.
+ destruct (rs x0); simpl; auto. rewrite <- Int64.shl'_sign_ext_min; auto using a64_range.
+ - (* zextshrl *)
+ TranslOpBase.
+ destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.zero_ext_shru'_min; auto using a64_range.
+ - (* sextshrl *)
+ TranslOpBase.
+ destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range.
+ - (* condition *)
+ exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
+ split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+ rewrite (B b) by auto. auto.
+ auto.
+ intros; Simpl.
+ - (* select *)
+ destruct (preg_of res) as [[ir|fr]|cr|] eqn:RES; monadInv TR.
+ + (* integer *)
+ generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
+ exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto.
+ split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+ rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
+ rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
+ auto.
+ intros; Simpl.
+ + (* FP *)
+ generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
+ exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto.
+ split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
+ rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
+ rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
+ auto.
+ intros; Simpl.
+Qed.
+
+(** Translation of addressing modes, loads, stores *)
+
+Lemma transl_addressing_correct:
+ forall sz addr args (insn: Asm.addressing -> basic) k (rs: regset) m c b o,
+ transl_addressing sz addr args insn k = OK c ->
+ Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some (Vptr b o) ->
+ exists ad rs',
+ exec_straight_opt ge lk c rs m (insn ad :: k) rs' m
+ /\ eval_addressing lk ad rs' = Vptr b o
+ /\ forall r, data_preg r = true -> rs' r = rs r.
+Proof.
+ intros until o; intros TR EV.
+ unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV.
+ - (* Aindexed *)
+ destruct (offset_representable sz ofs); inv EQ0.
+ + econstructor; econstructor; split. apply exec_straight_opt_refl.
+ auto.
+ + exploit (exec_loadimm64 X16 ofs). intros (rs' & A & B & C).
+ econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A.
+ split. simpl. rewrite B, C by eauto with asmgen. auto.
+ eauto with asmgen.
+ - (* Aindexed2 *)
+ econstructor; econstructor; split. apply exec_straight_opt_refl.
+ auto.
+ - (* Aindexed2shift *)
+ destruct (Int.eq a Int.zero) eqn:E; [|destruct (Int.eq (Int.shl Int.one a) (Int.repr sz))]; inv EQ2.
+ + apply Int.same_if_eq in E. rewrite E.
+ econstructor; econstructor; split. apply exec_straight_opt_refl.
+ split; auto. simpl.
+ rewrite Val.addl_commut in H0. destruct (rs x0); try discriminate.
+ unfold Val.shll. rewrite Int64.shl'_zero. auto.
+ + econstructor; econstructor; split. apply exec_straight_opt_refl.
+ auto.
+ + econstructor; econstructor; split.
+ apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto.
+ split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto.
+ intros; Simpl.
+ - (* Aindexed2ext *)
+ destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2.
+ + econstructor; econstructor; split. apply exec_straight_opt_refl.
+ split; auto. destruct x; auto.
+ + exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto.
+ instantiate (1 := x0). eauto with asmgen.
+ intros (rs' & A & B & C).
+ econstructor; exists rs'; split.
+ apply exec_straight_opt_intro. eexact A.
+ split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal.
+ unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range;
+ simpl; rewrite Int64.add_zero; auto.
+ intros. apply C; eauto with asmgen.
+ - (* Aglobal *)
+ destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR.
+ + econstructor; econstructor; split.
+ apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto.
+ split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence.
+ intros; Simpl.
+ + exploit (exec_loadsymbol X16 id ofs). auto. intros (rs' & A & B & C).
+ econstructor; exists rs'; split.
+ apply exec_straight_opt_intro. eexact A.
+ split. simpl.
+ rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto.
+ simpl in EV. congruence.
+ auto with asmgen.
+ - (* Ainstrack *)
+ assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o).
+ { simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl.
+ rewrite Ptrofs.of_int64_to_int64 by auto. auto. }
+ destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR.
+ + econstructor; econstructor; split. apply exec_straight_opt_refl.
+ auto.
+ + exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). intros (rs' & A & B & C).
+ econstructor; exists rs'; split.
+ apply exec_straight_opt_intro. eexact A.
+ split. simpl. rewrite B, C by eauto with asmgen. auto.
+ auto with asmgen.
+Qed.
+
+Lemma transl_load_correct:
+ forall chunk addr args dst k c (rs: regset) m vaddr v,
+ transl_load chunk addr args dst k = OK c ->
+ Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
+ Mem.loadv chunk m vaddr = Some v ->
+ exists rs',
+ exec_straight ge lk c rs m k rs' m
+ /\ rs'#(preg_of dst) = v
+ /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r.
+Proof.
+ intros. destruct vaddr; try discriminate.
+ assert (A: exists sz insn,
+ transl_addressing sz addr args insn k = OK c
+ /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m =
+ exec_load_rd_a lk chunk (fun v => v) ad (dreg_of dst) rs' m)).
+ {
+ destruct chunk; monadInv H;
+ try rewrite (ireg_of_eq' _ _ EQ); try rewrite (freg_of_eq' _ _ EQ);
+ do 2 econstructor; (split; [eassumption|auto]).
+ }
+ destruct A as (sz & insn & B & C).
+ exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R).
+ assert (X: exec_load_rd_a lk chunk (fun v => v) ad (dreg_of dst) rs' m =
+ Next (rs'#(preg_of dst) <- v) m).
+ { unfold exec_load_rd_a. rewrite Q, H1. auto. }
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact P.
+ apply exec_straight_one. rewrite C, X; eauto. Simpl.
+ split. auto. intros; Simpl.
+Qed.
+
+Lemma transl_store_correct:
+ forall chunk addr args src k c (rs: regset) m vaddr m',
+ transl_store chunk addr args src k = OK c ->
+ Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr ->
+ Mem.storev chunk m vaddr rs#(preg_of src) = Some m' ->
+ exists rs',
+ exec_straight ge lk c rs m k rs' m'
+ /\ forall r, data_preg r = true -> rs' r = rs r.
+Proof.
+ intros. destruct vaddr; try discriminate.
+ set (chunk' := match chunk with Mint8signed => Mint8unsigned
+ | Mint16signed => Mint16unsigned
+ | _ => chunk end).
+ assert (A: exists sz insn,
+ transl_addressing sz addr args insn k = OK c
+ /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m =
+ exec_store_rs_a lk chunk' ad rs'#(preg_of src) rs' m)).
+ {
+ unfold chunk'; destruct chunk; monadInv H;
+ try rewrite (ireg_of_eq _ _ EQ); try rewrite (freg_of_eq _ _ EQ);
+ do 2 econstructor; (split; [eassumption|auto]).
+ }
+ destruct A as (sz & insn & B & C).
+ exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R).
+ assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m').
+ { rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry.
+ apply Mem.store_signed_unsigned_8.
+ apply Mem.store_signed_unsigned_16. }
+ assert (Y: exec_store_rs_a lk chunk' ad rs'#(preg_of src) rs' m =
+ Next rs' m').
+ { unfold exec_store_rs_a. rewrite Q, R, X by auto with asmgen. auto. }
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact P.
+ apply exec_straight_one. rewrite C, Y; eauto.
+ intros; Simpl. rewrite R; auto.
+Qed.
+
+(** Memory accesses *)
+
+Lemma indexed_memory_access_correct: forall insn sz (base: iregsp) ofs k (rs: regset) m b i,
+ preg_of_iregsp base <> X16 ->
+ Val.offset_ptr rs#base ofs = Vptr b i ->
+ exists ad rs',
+ exec_straight_opt ge lk (indexed_memory_access_bc insn sz base ofs k) rs m (insn ad :: k) rs' m
+ /\ eval_addressing lk ad rs' = Vptr b i
+ /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r.
+Proof.
+ unfold indexed_memory_access_bc; intros.
+ assert (Val.addl rs#base (Vlong (Ptrofs.to_int64 ofs)) = Vptr b i).
+ { destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. }
+ destruct offset_representable.
+ - econstructor; econstructor; split. apply exec_straight_opt_refl. auto.
+ - exploit (exec_loadimm64 X16); eauto. intros (rs' & A & B & C).
+ econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A.
+ split. simpl. rewrite B, C; eauto; try discriminate.
+ unfold preg_of_iregsp in H. destruct base; auto. auto.
+Qed.
+
+Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset),
+ Mem.loadv Mint64 m (Val.offset_ptr rs#base ofs) = Some v ->
+ preg_of_iregsp base <> IR X16 ->
+ exists rs',
+ exec_straight ge lk (loadptr_bc base ofs dst k) rs m k rs' m
+ /\ rs'#dst = v
+ /\ forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r.
+Proof.
+ intros.
+ destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+ exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A.
+ apply exec_straight_one. simpl. unfold exec_load_rd_a. rewrite B, H. eauto.
+ split. Simpl. intros; Simpl.
+Qed.
+
+Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset),
+ Mem.storev Mint64 m (Val.offset_ptr rs#base ofs) rs#src = Some m' ->
+ preg_of_iregsp base <> IR X16 ->
+ (DR (IR (RR1 src))) <> (DR (IR (RR1 X16))) ->
+ exists rs',
+ exec_straight ge lk (storeptr_bc src base ofs k) rs m k rs' m'
+ /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r.
+Proof.
+ intros.
+ destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+ exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A.
+ apply exec_straight_one. simpl. unfold exec_store_rs_a. rewrite B, C, H. eauto.
+ discriminate. auto.
+ intros; Simpl. rewrite C; auto.
+Qed.
+
+Lemma loadind_correct:
+ forall (base: iregsp) ofs ty dst k c (rs: regset) m v,
+ loadind base ofs ty dst k = OK c ->
+ Mem.loadv (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) = Some v ->
+ preg_of_iregsp base <> IR X16 ->
+ exists rs',
+ exec_straight ge lk c rs m k rs' m
+ /\ rs'#(preg_of dst) = v
+ /\ forall r, data_preg r = true -> r <> preg_of dst -> rs'#r = rs#r.
+Proof.
+ intros.
+ destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+ assert (X: exists sz (insn: addressing -> ld_instruction),
+ c = indexed_memory_access_bc insn sz base ofs k
+ /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m =
+ exec_load_rd_a lk (chunk_of_type ty) (fun v => v) ad (dreg_of dst) rs' m)).
+ {
+ unfold loadind in H; destruct ty; destruct (dst); inv H;
+ do 2 econstructor; split; eauto.
+ }
+ destruct X as (sz & insn & EQ & SEM). subst c.
+ exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A.
+ apply exec_straight_one. rewrite SEM. unfold exec_load.
+ unfold exec_load_rd_a. rewrite B, H0. eauto. Simpl.
+ split. auto. intros; Simpl.
+Qed.
+
+Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m',
+ storeind src base ofs ty k = OK c ->
+ Mem.storev (chunk_of_type ty) m (Val.offset_ptr rs#base ofs) rs#(preg_of src) = Some m' ->
+ preg_of_iregsp base <> IR X16 ->
+ exists rs',
+ exec_straight ge lk c rs m k rs' m'
+ /\ forall r, data_preg r = true -> rs' r = rs r.
+Proof.
+ intros.
+ destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
+ assert (X: exists sz (insn: addressing -> st_instruction),
+ c = indexed_memory_access_bc insn sz base ofs k
+ /\ (forall ad rs', exec_basic lk ge (insn ad) rs' m =
+ exec_store_rs_a lk (chunk_of_type ty) ad rs'#(preg_of src) rs' m)).
+ {
+ unfold storeind in H; destruct ty; destruct (preg_of src) as [[ir|fr]|cr|]; inv H; do 2 econstructor; split; eauto.
+ }
+ destruct X as (sz & insn & EQ & SEM). subst c.
+ exploit indexed_memory_access_correct; eauto. intros (ad & rs' & A & B & C).
+ econstructor; split.
+ eapply exec_straight_opt_right. eexact A.
+ apply exec_straight_one. rewrite SEM.
+ unfold exec_store. unfold exec_store_rs_a.
+ rewrite B, C, H0 by eauto with asmgen. eauto.
+ intros; Simpl. unfold data_preg in H2. destruct r as [[[ir|]|fr]|cr|].
+ all: rewrite C; auto; try discriminate;
+ destruct ir; try discriminate.
+Qed.
+
+Lemma make_epilogue_correct:
+ forall ge0 f m stk soff cs m' ms rs tm,
+ Mach.load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) ->
+ Mach.load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) ->
+ Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
+ agree ms (Vptr stk soff) rs ->
+ Mem.extends m tm ->
+ match_stack ge0 cs ->
+ exists rs', exists tm',
+ exec_straight ge lk (make_epilogue f) rs tm nil rs' tm'
+ /\ agree ms (parent_sp cs) rs'
+ /\ Mem.extends m' tm'
+ /\ rs'#RA = parent_ra cs
+ /\ rs'#SP = parent_sp cs
+ /\ (forall r, r <> PC -> r <> SP -> r <> X30 -> r <> X16 -> rs'#r = rs#r).
+Proof.
+ assert (Archi.ptr64 = true) as SF; auto.
+ intros until tm; intros LP LRA FREE AG MEXT MCS.
+ exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
+ exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA').
+ exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
+ exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'.
+ exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
+ unfold make_epilogue.
+ rewrite chunk_of_Tptr in *. unfold Mptr in *. rewrite SF in *.
+
+ exploit (loadptr_correct XSP (fn_retaddr_ofs f)).
+ instantiate (2 := rs). simpl.
+ replace (rs XSP) with (rs SP) by auto.
+ rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; discriminate.
+
+ intros (rs1 & A1 & B1 & C1).
+ econstructor; econstructor; split.
+ eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl.
+ simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'.
+ rewrite FREE'. eauto.
+ split. apply agree_set_other; auto.
+ apply agree_change_sp with (Vptr stk soff).
+ apply agree_exten with rs; auto. intros; apply C1; auto with asmgen.
+ eapply parent_sp_def; eauto.
+ split. auto.
+ split. Simpl.
+ split. Simpl.
+ intros. Simpl.
+Qed.
+
+End CONSTRUCTORS.
diff --git a/aarch64/Asmblockprops.v b/aarch64/Asmblockprops.v
new file mode 100644
index 00000000..38fbd6d3
--- /dev/null
+++ b/aarch64/Asmblockprops.v
@@ -0,0 +1,119 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Common definition and proofs on Asmblock required by various modules *)
+
+Require Import Coqlib.
+Require Import Integers.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Values.
+Require Import Asmblock.
+Require Import Axioms.
+
+Definition bblock_simu (lk: aarch64_linker) (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) :=
+ forall rs m rs' m' t,
+ exec_bblock lk ge f bb rs m t rs' m' -> exec_bblock lk ge f bb' rs m t rs' m'.
+
+Definition bblock_simu_aux (lk: aarch64_linker) (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) :=
+ forall rs m,
+ bbstep lk ge f bb rs m <> Stuck ->
+ bbstep lk ge f bb rs m = bbstep lk ge f bb' rs m.
+
+Hint Extern 2 (_ <> _) => congruence: asmgen.
+
+Lemma preg_of_data:
+ forall r, data_preg (preg_of r) = true.
+Proof.
+ intros. destruct r; reflexivity.
+Qed.
+
+Lemma dreg_of_data:
+ forall r, data_preg (dreg_of r) = true.
+Proof.
+ intros. destruct r; reflexivity.
+Qed.
+Hint Resolve preg_of_data dreg_of_data: asmgen.
+
+Lemma data_diff:
+ forall r r',
+ data_preg r = true -> data_preg r' = false -> r <> r'.
+Proof.
+ congruence.
+Qed.
+Hint Resolve data_diff: asmgen.
+
+Lemma preg_of_not_PC:
+ forall r, preg_of r <> PC.
+Proof.
+ intros. apply data_diff; auto with asmgen.
+Qed.
+
+Lemma preg_of_not_SP:
+ forall r, preg_of r <> SP.
+Proof.
+ intros. unfold preg_of; destruct r; simpl; try discriminate.
+Qed.
+
+Ltac desif :=
+ repeat match goal with
+ | [ |- context[ if ?f then _ else _ ] ] => destruct f
+ end.
+
+Ltac decomp :=
+ repeat match goal with
+ | [ |- context[ match (?rs ?r) with | _ => _ end ] ] => destruct (rs r)
+ end.
+
+Ltac Simplif :=
+ ((desif)
+ || (try unfold compare_long)
+ || (try unfold compare_int)
+ || (try unfold compare_float)
+ || (try unfold compare_single)
+ || decomp
+ || (rewrite Pregmap.gss)
+ || (rewrite Pregmap.gso by eauto with asmgen)
+ ); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+Section EPC.
+
+Variable lk: aarch64_linker.
+
+(* For Asmblockgenproof0 *)
+
+Theorem exec_basic_instr_pc:
+ forall ge b rs1 m1 rs2 m2,
+ exec_basic lk ge b rs1 m1 = Next rs2 m2 ->
+ rs2 PC = rs1 PC.
+Proof.
+ intros. destruct b; try destruct i; try destruct i; try destruct ld; try destruct ld;
+ try destruct st; try destruct st; try destruct a.
+ all: try (inv H; simpl in *; auto; Simpl).
+ all: try (try unfold exec_load_rd_a in H1; try destruct (Mem.loadv _ _ _); inv H1; Simpl).
+ all: try (try unfold exec_load_double in H0; destruct (Mem.loadv _ _ _); simpl in *;
+ try destruct (Mem.loadv _ _ _); simpl in *; inv H0; Simpl).
+ all: try (try unfold exec_store_rs_a in H0; try destruct (Mem.storev _ _ _); inv H0; auto; Simpl).
+ all: try (try unfold exec_store_double in H1; destruct (Mem.storev _ _ _); simpl in *;
+ try destruct (Mem.storev _ _ _); simpl in *; inv H1; auto; Simpl).
+ - (* alloc *)
+ destruct (Mem.alloc _ _ _); destruct (Mem.store _ _ _); inv H1; auto; Simpl.
+ - (* free *)
+ destruct (rs1 SP); try discriminate; destruct (Mem.free _ _ _ _); inv H1; Simpl.
+Qed.
+
+End EPC.
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index 573e8b92..eb9ec600 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -34,13 +34,13 @@ let _m1 = Z.of_sint (-1)
(* Emit instruction sequences that set or offset a register by a constant. *)
let expand_loadimm32 (dst: ireg) n =
- List.iter emit (Asmgen.loadimm32 dst n [])
+ List.iter emit (Asmgen.Asmgen_expand.loadimm32 dst n [])
let expand_addimm64 (dst: iregsp) (src: iregsp) n =
- List.iter emit (Asmgen.addimm64 dst src n [])
+ List.iter emit (Asmgen.Asmgen_expand.addimm64 dst src n [])
let expand_storeptr (src: ireg) (base: iregsp) ofs =
- List.iter emit (Asmgen.storeptr src base ofs [])
+ List.iter emit (Asmgen.Asmgen_expand.storeptr src base ofs [])
(* Handling of varargs *)
@@ -84,8 +84,8 @@ let save_parameter_registers ir fr =
while !i < 8 do
let pos = 8*16 + !i*8 in
if !i land 1 = 0 then begin
- emit (Pstp(int_param_regs.(!i), int_param_regs.(!i + 1),
- ADimm(XSP, Z.of_uint pos)));
+ emit (Pstpx(int_param_regs.(!i), int_param_regs.(!i + 1),
+ Mint64, Mint64, ADimm(XSP, Z.of_uint pos)));
i := !i + 2
end else begin
emit (Pstrx(int_param_regs.(!i), ADimm(XSP, Z.of_uint pos)));
@@ -160,9 +160,9 @@ let expand_builtin_va_start r =
let expand_annot_val kind txt targ args res =
emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none));
match args, res with
- | [BA(IR src)], BR(IR dst) ->
- if dst <> src then emit (Pmov (RR1 dst, RR1 src))
- | [BA(FR src)], BR(FR dst) ->
+ | [BA(DR(IR src))], BR(DR(IR dst)) ->
+ if dst <> src then emit (Pmov (dst, src))
+ | [BA(DR(FR src))], BR(DR(FR dst)) ->
if dst <> src then emit (Pfmov (dst, src))
| _, _ ->
raise (Error "ill-formed __builtin_annot_val")
@@ -180,8 +180,8 @@ let offset_in_range ofs =
let memcpy_small_arg sz arg tmp =
match arg with
- | BA (IR r) ->
- (RR1 r, _0)
+ | BA (DR(IR r)) ->
+ (r, _0)
| BA_addrstack ofs ->
if offset_in_range ofs
&& offset_in_range (Ptrofs.add ofs (Ptrofs.repr (Z.of_uint sz)))
@@ -192,14 +192,14 @@ let memcpy_small_arg sz arg tmp =
assert false
let expand_builtin_memcpy_small sz al src dst =
- let tsrc = if dst <> BA (IR X17) then X17 else X29 in
- let tdst = if src <> BA (IR X29) then X29 else X17 in
+ let tsrc = if dst <> BA (DR(IR(RR1 X17))) then X17 else X29 in
+ let tdst = if src <> BA (DR(IR(RR1 X29))) then X29 else X17 in
let (rsrc, osrc) = memcpy_small_arg sz src tsrc in
let (rdst, odst) = memcpy_small_arg sz dst tdst in
let rec copy osrc odst sz =
if sz >= 16 then begin
- emit (Pldp(X16, X30, ADimm(rsrc, osrc)));
- emit (Pstp(X16, X30, ADimm(rdst, odst)));
+ emit (Pldpx(X16, X30, Mint64, Mint64, ADimm(rsrc, osrc)));
+ emit (Pstpx(X16, X30, Mint64, Mint64, ADimm(rdst, odst)));
copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16)
end
else if sz >= 8 then begin
@@ -226,7 +226,7 @@ let expand_builtin_memcpy_small sz al src dst =
let memcpy_big_arg arg tmp =
match arg with
- | BA (IR r) -> emit (Pmov(RR1 tmp, RR1 r))
+ | BA (DR(IR r)) -> emit (Pmov(RR1 tmp, r))
| BA_addrstack ofs -> expand_addimm64 (RR1 tmp) XSP ofs
| _ -> assert false
@@ -237,8 +237,8 @@ let expand_builtin_memcpy_big sz al src dst =
let lbl = new_label () in
expand_loadimm32 X15 (Z.of_uint (sz / 16));
emit (Plabel lbl);
- emit (Pldp(X16, X17, ADpostincr(RR1 X30, _16)));
- emit (Pstp(X16, X17, ADpostincr(RR1 X29, _16)));
+ emit (Pldpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X30, _16)));
+ emit (Pstpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X29, _16)));
emit (Psubimm(W, RR1 X15, RR1 X15, _1));
emit (Pcbnz(W, X15, lbl));
if sz mod 16 >= 8 then begin
@@ -270,29 +270,29 @@ let expand_builtin_memcpy sz al args =
let expand_builtin_vload_common chunk base ofs res =
let addr = ADimm(base, ofs) in
match chunk, res with
- | Mint8unsigned, BR(IR res) ->
+ | Mint8unsigned, BR(DR(IR(RR1 res))) ->
emit (Pldrb(W, res, addr))
- | Mint8signed, BR(IR res) ->
+ | Mint8signed, BR(DR(IR(RR1 res))) ->
emit (Pldrsb(W, res, addr))
- | Mint16unsigned, BR(IR res) ->
+ | Mint16unsigned, BR(DR(IR(RR1 res))) ->
emit (Pldrh(W, res, addr))
- | Mint16signed, BR(IR res) ->
+ | Mint16signed, BR(DR(IR(RR1 res))) ->
emit (Pldrsh(W, res, addr))
- | Mint32, BR(IR res) ->
+ | Mint32, BR(DR(IR(RR1 res))) ->
emit (Pldrw(res, addr))
- | Mint64, BR(IR res) ->
+ | Mint64, BR(DR(IR(RR1 res))) ->
emit (Pldrx(res, addr))
- | Mfloat32, BR(FR res) ->
+ | Mfloat32, BR(DR(FR res)) ->
emit (Pldrs(res, addr))
- | Mfloat64, BR(FR res) ->
+ | Mfloat64, BR(DR(FR res)) ->
emit (Pldrd(res, addr))
| _ ->
assert false
let expand_builtin_vload chunk args res =
match args with
- | [BA(IR addr)] ->
- expand_builtin_vload_common chunk (RR1 addr) _0 res
+ | [BA(DR(IR addr))] ->
+ expand_builtin_vload_common chunk addr _0 res
| [BA_addrstack ofs] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vload_common chunk XSP ofs res
@@ -300,11 +300,11 @@ let expand_builtin_vload chunk args res =
expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
expand_builtin_vload_common chunk (RR1 X16) _0 res
end
- | [BA_addptr(BA(IR addr), BA_long ofs)] ->
+ | [BA_addptr(BA(DR(IR addr)), BA_long ofs)] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
- expand_builtin_vload_common chunk (RR1 addr) ofs res
+ expand_builtin_vload_common chunk addr ofs res
else begin
- expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
+ expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *)
expand_builtin_vload_common chunk (RR1 X16) _0 res
end
| _ ->
@@ -313,25 +313,25 @@ let expand_builtin_vload chunk args res =
let expand_builtin_vstore_common chunk base ofs src =
let addr = ADimm(base, ofs) in
match chunk, src with
- | (Mint8signed | Mint8unsigned), BA(IR src) ->
+ | (Mint8signed | Mint8unsigned), BA(DR(IR(RR1 src))) ->
emit (Pstrb(src, addr))
- | (Mint16signed | Mint16unsigned), BA(IR src) ->
+ | (Mint16signed | Mint16unsigned), BA(DR(IR(RR1 src))) ->
emit (Pstrh(src, addr))
- | Mint32, BA(IR src) ->
+ | Mint32, BA(DR(IR(RR1 src))) ->
emit (Pstrw(src, addr))
- | Mint64, BA(IR src) ->
+ | Mint64, BA(DR(IR(RR1 src))) ->
emit (Pstrx(src, addr))
- | Mfloat32, BA(FR src) ->
+ | Mfloat32, BA(DR(FR src)) ->
emit (Pstrs(src, addr))
- | Mfloat64, BA(FR src) ->
+ | Mfloat64, BA(DR(FR src)) ->
emit (Pstrd(src, addr))
| _ ->
assert false
let expand_builtin_vstore chunk args =
match args with
- | [BA(IR addr); src] ->
- expand_builtin_vstore_common chunk (RR1 addr) _0 src
+ | [BA(DR(IR addr)); src] ->
+ expand_builtin_vstore_common chunk addr _0 src
| [BA_addrstack ofs; src] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vstore_common chunk XSP ofs src
@@ -339,11 +339,11 @@ let expand_builtin_vstore chunk args =
expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
expand_builtin_vstore_common chunk (RR1 X16) _0 src
end
- | [BA_addptr(BA(IR addr), BA_long ofs); src] ->
+ | [BA_addptr(BA(DR(IR addr)), BA_long ofs); src] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
- expand_builtin_vstore_common chunk (RR1 addr) ofs src
+ expand_builtin_vstore_common chunk addr ofs src
else begin
- expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
+ expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *)
expand_builtin_vstore_common chunk (RR1 X16) _0 src
end
| _ ->
@@ -363,45 +363,47 @@ let expand_builtin_inline name args res =
| "__builtin_unreachable", [], _ ->
()
(* Byte swap *)
- | ("__builtin_bswap" | "__builtin_bswap32"), [BA(IR a1)], BR(IR res) ->
+ | ("__builtin_bswap" | "__builtin_bswap32"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev(W, res, a1))
- | "__builtin_bswap64", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_bswap64", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev(X, res, a1))
- | "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_bswap16", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev16(W, res, a1));
emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF))
(* Count leading zeros, leading sign bits, trailing zeros *)
- | "__builtin_clz", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_clz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pclz(W, res, a1))
- | ("__builtin_clzl" | "__builtin_clzll"), [BA(IR a1)], BR(IR res) ->
+ | ("__builtin_clzl" | "__builtin_clzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pclz(X, res, a1))
- | "__builtin_cls", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_cls", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pcls(W, res, a1))
- | ("__builtin_clsl" | "__builtin_clsll"), [BA(IR a1)], BR(IR res) ->
+ | ("__builtin_clsl" | "__builtin_clsll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pcls(X, res, a1))
- | "__builtin_ctz", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_ctz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prbit(W, res, a1));
emit (Pclz(W, res, res))
- | ("__builtin_ctzl" | "__builtin_ctzll"), [BA(IR a1)], BR(IR res) ->
+ | ("__builtin_ctzl" | "__builtin_ctzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prbit(X, res, a1));
emit (Pclz(X, res, res))
(* Float arithmetic *)
- | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) ->
+ | "__builtin_fabs", [BA(DR(FR a1))], BR(DR(FR res)) ->
+ emit (Pfabs(D, res, a1))
+ | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(DR(FR a1))], BR(DR(FR res)) ->
emit (Pfsqrt(D, res, a1))
- | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+ | "__builtin_fmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfmadd(D, res, a1, a2, a3))
- | "__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+ | "__builtin_fmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfmsub(D, res, a1, a2, a3))
- | "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+ | "__builtin_fnmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfnmadd(D, res, a1, a2, a3))
- | "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+ | "__builtin_fnmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfnmsub(D, res, a1, a2, a3))
- | "__builtin_fmax", [BA (FR a1); BA (FR a2)], BR (FR res) ->
+ | "__builtin_fmax", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
emit (Pfmax (D, res, a1, a2))
- | "__builtin_fmin", [BA (FR a1); BA (FR a2)], BR (FR res) ->
+ | "__builtin_fmin", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
emit (Pfmin (D, res, a1, a2))
(* Vararg *)
- | "__builtin_va_start", [BA(IR a)], _ ->
+ | "__builtin_va_start", [BA(DR(IR(RR1 a)))], _ ->
expand_builtin_va_start a
(* Catch-all *)
| _ ->
@@ -441,7 +443,7 @@ let expand_instruction instr =
expand_annot_val kind txt targ args res
| EF_memcpy(sz, al) ->
expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
- | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+ | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ ->
emit instr
| _ ->
assert false
@@ -468,9 +470,9 @@ let float_reg_to_dwarf = function
| D30 -> 94 | D31 -> 95
let preg_to_dwarf = function
- | IR r -> int_reg_to_dwarf r
- | FR r -> float_reg_to_dwarf r
- | SP -> 31
+ | DR(IR(RR1 r)) -> int_reg_to_dwarf r
+ | DR(FR r) -> float_reg_to_dwarf r
+ | DR(IR(XSP)) -> 31
| _ -> assert false
let expand_function id fn =
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index baaab6c4..6bb791c4 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -1,168 +1,46 @@
-(* *********************************************************************)
-(* *)
-(* The Compcert verified compiler *)
-(* *)
-(* Xavier Leroy, Collège de France and INRIA Paris *)
-(* *)
-(* Copyright Institut National de Recherche en Informatique et en *)
-(* Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the INRIA Non-Commercial License Agreement. *)
-(* *)
-(* *********************************************************************)
-
-(** Translation from Mach to AArch64. *)
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Justus Fasse UGA, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
Require Import Recdef Coqlib Zwf Zbits.
Require Import Errors AST Integers Floats Op.
-Require Import Locations Mach Asm.
-Require SelectOp.
+Require Import Locations Compopts.
+Require Import Mach Asm Asmblock Asmblockgen Machblockgen PostpassScheduling.
-Local Open Scope string_scope.
-Local Open Scope list_scope.
Local Open Scope error_monad_scope.
-(** Alignment check for symbols *)
-
-Parameter symbol_is_aligned : ident -> Z -> bool.
-(** [symbol_is_aligned id sz] checks whether the symbol [id] is [sz] aligned *)
-
-(** Extracting integer or float registers. *)
-
-Definition ireg_of (r: mreg) : res ireg :=
- match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgen.ireg_of") end.
-
-Definition freg_of (r: mreg) : res freg :=
- match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgen.freg_of") end.
-
-(** Recognition of immediate arguments for logical integer operations.*)
-
-(** Valid immediate arguments are repetitions of a bit pattern [B]
- of length [e] = 2, 4, 8, 16, 32 or 64.
- The bit pattern [B] must be of the form [0*1*0*] or [1*0*1*]
- but must not be all zeros or all ones. *)
-
-(** The following automaton recognizes [0*1*0*|1*0*1*].
-<<
- 0 1 0
- / \ / \ / \
- \ / \ / \ /
- -0--> [B] --1--> [D] --0--> [F]
- /
- [A]
- \
- -1--> [C] --0--> [E] --1--> [G]
- / \ / \ / \
- \ / \ / \ /
- 1 0 1
->>
-*)
-
-Module Automaton.
-
-Inductive state : Type := SA | SB | SC | SD | SE | SF | SG | Sbad.
-
-Definition start := SA.
-
-Definition next (s: state) (b: bool) :=
- match s, b with
- | SA,false => SB | SA,true => SC
- | SB,false => SB | SB,true => SD
- | SC,false => SE | SC,true => SC
- | SD,false => SF | SD,true => SD
- | SE,false => SE | SE,true => SG
- | SF,false => SF | SF,true => Sbad
- | SG,false => Sbad | SG,true => SG
- | Sbad,_ => Sbad
- end.
-
-Definition accepting (s: state) :=
- match s with
- | SA | SB | SC | SD | SE | SF | SG => true
- | Sbad => false
- end.
-
-Fixpoint run (len: nat) (s: state) (x: Z) : bool :=
- match len with
- | Datatypes.O => accepting s
- | Datatypes.S len => run len (next s (Z.odd x)) (Z.div2 x)
- end.
-
-End Automaton.
-
-(** The following function determines the candidate length [e],
- ensuring that [x] is a repetition [BB...B]
- of a bit pattern [B] of length [e]. *)
-
-Definition logical_imm_length (x: Z) (sixtyfour: bool) : nat :=
- (** [test n] checks that the low [2n] bits of [x] are of the
- form [BB], that is, two occurrences of the same [n] bits *)
- let test (n: Z) : bool :=
- Z.eqb (Zzero_ext n x) (Zzero_ext n (Z.shiftr x n)) in
- (** If [test n] fails, we know that the candidate length [e] is
- at least [2n]. Hence we test with decreasing values of [n]:
- 32, 16, 8, 4, 2. *)
- if sixtyfour && negb (test 32) then 64%nat
- else if negb (test 16) then 32%nat
- else if negb (test 8) then 16%nat
- else if negb (test 4) then 8%nat
- else if negb (test 2) then 4%nat
- else 2%nat.
+(** Functions called by the Asmexpand ocaml file, inspired and adapted from Asmblockgen.v *)
-(** A valid logical immediate is
-- neither [0] nor [-1];
-- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e]
-- the low [e] bits of the number, that is, [B], match [0*1*0*] or [1*0*1*].
-*)
+Module Asmgen_expand.
-Definition is_logical_imm32 (x: int) : bool :=
- negb (Int.eq x Int.zero) && negb (Int.eq x Int.mone) &&
- Automaton.run (logical_imm_length (Int.unsigned x) false)
- Automaton.start (Int.unsigned x).
-
-Definition is_logical_imm64 (x: int64) : bool :=
- negb (Int64.eq x Int64.zero) && negb (Int64.eq x Int64.mone) &&
- Automaton.run (logical_imm_length (Int64.unsigned x) true)
- Automaton.start (Int64.unsigned x).
-
-(** Arithmetic immediates are 12-bit unsigned numbers, possibly shifted left 12 bits *)
-
-Definition is_arith_imm32 (x: int) : bool :=
- Int.eq x (Int.zero_ext 12 x)
- || Int.eq x (Int.shl (Int.zero_ext 12 (Int.shru x (Int.repr 12))) (Int.repr 12)).
-
-Definition is_arith_imm64 (x: int64) : bool :=
- Int64.eq x (Int64.zero_ext 12 x)
- || Int64.eq x (Int64.shl (Int64.zero_ext 12 (Int64.shru x (Int64.repr 12))) (Int64.repr 12)).
-
-(** Decompose integer literals into 16-bit fragments *)
-
-Fixpoint decompose_int (N: nat) (n p: Z) {struct N} : list (Z * Z) :=
- match N with
- | Datatypes.O => nil
- | Datatypes.S N =>
- let frag := Zzero_ext 16 (Z.shiftr n p) in
- if Z.eqb frag 0 then
- decompose_int N n (p + 16)
- else
- (frag, p) :: decompose_int N (Z.ldiff n (Z.shiftl 65535 p)) (p + 16)
- end.
-
-Definition negate_decomposition (l: list (Z * Z)) :=
- List.map (fun np => (Z.lxor (fst np) 65535, snd np)) l.
+(* Load immediate *)
Definition loadimm_k (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code :=
- List.fold_right (fun np k => Pmovk sz rd (fst np) (snd np) :: k) k l.
+ List.fold_right (fun np k => Asm.Pmovk sz rd (fst np) (snd np) :: k) k l.
Definition loadimm_z (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code :=
match l with
- | nil => Pmovz sz rd 0 0 :: k
- | (n1, p1) :: l => Pmovz sz rd n1 p1 :: loadimm_k sz rd l k
+ | nil => Asm.Pmovz sz rd 0 0 :: k
+ | (n1, p1) :: l => Asm.Pmovz sz rd n1 p1 :: loadimm_k sz rd l k
end.
Definition loadimm_n (sz: isize) (rd: ireg) (l: list (Z * Z)) (k: code) : code :=
match l with
- | nil => Pmovn sz rd 0 0 :: k
- | (n1, p1) :: l => Pmovn sz rd n1 p1 :: loadimm_k sz rd (negate_decomposition l) k
+ | nil => Asm.Pmovn sz rd 0 0 :: k
+ | (n1, p1) :: l => Asm.Pmovn sz rd n1 p1 :: loadimm_k sz rd (negate_decomposition l) k
end.
Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: code) : code :=
@@ -175,15 +53,15 @@ Definition loadimm (sz: isize) (rd: ireg) (n: Z) (k: code) : code :=
Definition loadimm32 (rd: ireg) (n: int) (k: code) : code :=
if is_logical_imm32 n
- then Porrimm W rd XZR (Int.unsigned n) :: k
+ then Asm.Porrimm W rd XZR (Int.unsigned n) :: k
else loadimm W rd (Int.unsigned n) k.
Definition loadimm64 (rd: ireg) (n: int64) (k: code) : code :=
if is_logical_imm64 n
- then Porrimm X rd XZR (Int64.unsigned n) :: k
+ then Asm.Porrimm X rd XZR (Int64.unsigned n) :: k
else loadimm X rd (Int64.unsigned n) k.
-(** Add immediate *)
+(* Add immediate *)
Definition addimm_aux (insn: iregsp -> iregsp -> Z -> instruction)
(rd r1: iregsp) (n: Z) (k: code) :=
@@ -196,962 +74,393 @@ Definition addimm_aux (insn: iregsp -> iregsp -> Z -> instruction)
else
insn rd r1 nhi :: insn rd rd nlo :: k.
-Definition addimm32 (rd r1: ireg) (n: int) (k: code) : code :=
- let m := Int.neg n in
- if Int.eq n (Int.zero_ext 24 n) then
- addimm_aux (Paddimm W) rd r1 (Int.unsigned n) k
- else if Int.eq m (Int.zero_ext 24 m) then
- addimm_aux (Psubimm W) rd r1 (Int.unsigned m) k
- else if Int.lt n Int.zero then
- loadimm32 X16 m (Psub W rd r1 X16 SOnone :: k)
- else
- loadimm32 X16 n (Padd W rd r1 X16 SOnone :: k).
-
Definition addimm64 (rd r1: iregsp) (n: int64) (k: code) : code :=
let m := Int64.neg n in
if Int64.eq n (Int64.zero_ext 24 n) then
- addimm_aux (Paddimm X) rd r1 (Int64.unsigned n) k
+ addimm_aux (Asm.Paddimm X) rd r1 (Int64.unsigned n) k
else if Int64.eq m (Int64.zero_ext 24 m) then
- addimm_aux (Psubimm X) rd r1 (Int64.unsigned m) k
+ addimm_aux (Asm.Psubimm X) rd r1 (Int64.unsigned m) k
else if Int64.lt n Int64.zero then
- loadimm64 X16 m (Psubext rd r1 X16 (EOuxtx Int.zero) :: k)
- else
- loadimm64 X16 n (Paddext rd r1 X16 (EOuxtx Int.zero) :: k).
-
-(** Logical immediate *)
-
-Definition logicalimm32
- (insn1: ireg -> ireg0 -> Z -> instruction)
- (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
- (rd r1: ireg) (n: int) (k: code) : code :=
- if is_logical_imm32 n
- then insn1 rd r1 (Int.unsigned n) :: k
- else loadimm32 X16 n (insn2 rd r1 X16 SOnone :: k).
-
-Definition logicalimm64
- (insn1: ireg -> ireg0 -> Z -> instruction)
- (insn2: ireg -> ireg0 -> ireg -> shift_op -> instruction)
- (rd r1: ireg) (n: int64) (k: code) : code :=
- if is_logical_imm64 n
- then insn1 rd r1 (Int64.unsigned n) :: k
- else loadimm64 X16 n (insn2 rd r1 X16 SOnone :: k).
-
-(** Sign- or zero-extended arithmetic *)
-
-Definition transl_extension (ex: extension) (a: int) : extend_op :=
- match ex with Xsgn32 => EOsxtw a | Xuns32 => EOuxtw a end.
-
-Definition move_extended_base
- (rd: ireg) (r1: ireg) (ex: extension) (k: code) : code :=
- match ex with
- | Xsgn32 => Pcvtsw2x rd r1 :: k
- | Xuns32 => Pcvtuw2x rd r1 :: k
- end.
-
-Definition move_extended
- (rd: ireg) (r1: ireg) (ex: extension) (a: int) (k: code) : code :=
- if Int.eq a Int.zero then
- move_extended_base rd r1 ex k
- else
- move_extended_base rd r1 ex (Padd X rd XZR rd (SOlsl a) :: k).
-
-Definition arith_extended
- (insnX: iregsp -> iregsp -> ireg -> extend_op -> instruction)
- (insnS: ireg -> ireg0 -> ireg -> shift_op -> instruction)
- (rd r1 r2: ireg) (ex: extension) (a: int) (k: code) : code :=
- if Int.ltu a (Int.repr 5) then
- insnX rd r1 r2 (transl_extension ex a) :: k
- else
- move_extended_base X16 r2 ex (insnS rd r1 X16 (SOlsl a) :: k).
-
-(** Extended right shift *)
-
-Definition shrx32 (rd r1: ireg) (n: int) (k: code) : code :=
- if Int.eq n Int.zero then
- Pmov rd r1 :: k
- else
- Porr W X16 XZR r1 (SOasr (Int.repr 31)) ::
- Padd W X16 r1 X16 (SOlsr (Int.sub Int.iwordsize n)) ::
- Porr W rd XZR X16 (SOasr n) :: k.
-
-Definition shrx64 (rd r1: ireg) (n: int) (k: code) : code :=
- if Int.eq n Int.zero then
- Pmov rd r1 :: k
- else
- Porr X X16 XZR r1 (SOasr (Int.repr 63)) ::
- Padd X X16 r1 X16 (SOlsr (Int.sub Int64.iwordsize' n)) ::
- Porr X rd XZR X16 (SOasr n) :: k.
-
-(** Load the address [id + ofs] in [rd] *)
-
-Definition loadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (k: code) : code :=
- if SelectOp.symbol_is_relocatable id then
- if Ptrofs.eq ofs Ptrofs.zero then
- Ploadsymbol rd id :: k
- else
- Ploadsymbol rd id :: addimm64 rd rd (Ptrofs.to_int64 ofs) k
+ loadimm64 X16 m (Asm.Psubext rd r1 X16 (EOuxtx Int.zero) :: k)
else
- Padrp rd id ofs :: Paddadr rd rd id ofs :: k.
+ loadimm64 X16 n (Asm.Paddext rd r1 X16 (EOuxtx Int.zero) :: k).
-(** Translate a shifted operand *)
+(** Register-indexed stores *)
-Definition transl_shift (s: Op.shift) (a: int): Asm.shift_op :=
- match s with
- | Slsl => SOlsl a
- | Slsr => SOlsr a
- | Sasr => SOasr a
- | Sror => SOror a
- end.
-
-(** Translation of a condition. Prepends to [k] the instructions
- that evaluate the condition and leave its boolean result in one of
- the bits of the condition register. The bit in question is
- determined by the [crbit_for_cond] function. *)
+Definition indexed_memory_access (insn: Asm.addressing -> instruction)
+ (sz: Z) (base: iregsp) (ofs: ptrofs) (k: code) :=
+ let ofs := Ptrofs.to_int64 ofs in
+ if offset_representable sz ofs
+ then insn (ADimm base ofs) :: k
+ else loadimm64 X16 ofs (insn (ADreg base X16) :: k).
-Definition transl_cond
- (cond: condition) (args: list mreg) (k: code) :=
- match cond, args with
- | (Ccomp c | Ccompu c), a1 :: a2 :: nil =>
- do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pcmp W r1 r2 SOnone :: k)
- | (Ccompshift c s a | Ccompushift c s a), a1 :: a2 :: nil =>
- do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pcmp W r1 r2 (transl_shift s a) :: k)
- | (Ccompimm c n | Ccompuimm c n), a1 :: nil =>
- do r1 <- ireg_of a1;
- OK (if is_arith_imm32 n then
- Pcmpimm W r1 (Int.unsigned n) :: k
- else if is_arith_imm32 (Int.neg n) then
- Pcmnimm W r1 (Int.unsigned (Int.neg n)) :: k
- else
- loadimm32 X16 n (Pcmp W r1 X16 SOnone :: k))
- | (Cmaskzero n | Cmasknotzero n), a1 :: nil =>
- do r1 <- ireg_of a1;
- OK (if is_logical_imm32 n then
- Ptstimm W r1 (Int.unsigned n) :: k
- else
- loadimm32 X16 n (Ptst W r1 X16 SOnone :: k))
- | (Ccompl c | Ccomplu c), a1 :: a2 :: nil =>
- do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pcmp X r1 r2 SOnone :: k)
- | (Ccomplshift c s a | Ccomplushift c s a), a1 :: a2 :: nil =>
- do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pcmp X r1 r2 (transl_shift s a) :: k)
- | (Ccomplimm c n | Ccompluimm c n), a1 :: nil =>
- do r1 <- ireg_of a1;
- OK (if is_arith_imm64 n then
- Pcmpimm X r1 (Int64.unsigned n) :: k
- else if is_arith_imm64 (Int64.neg n) then
- Pcmnimm X r1 (Int64.unsigned (Int64.neg n)) :: k
- else
- loadimm64 X16 n (Pcmp X r1 X16 SOnone :: k))
- | (Cmasklzero n | Cmasklnotzero n), a1 :: nil =>
- do r1 <- ireg_of a1;
- OK (if is_logical_imm64 n then
- Ptstimm X r1 (Int64.unsigned n) :: k
- else
- loadimm64 X16 n (Ptst X r1 X16 SOnone :: k))
- | Ccompf cmp, a1 :: a2 :: nil =>
- do r1 <- freg_of a1; do r2 <- freg_of a2;
- OK (Pfcmp D r1 r2 :: k)
- | Cnotcompf cmp, a1 :: a2 :: nil =>
- do r1 <- freg_of a1; do r2 <- freg_of a2;
- OK (Pfcmp D r1 r2 :: k)
- | Ccompfzero cmp, a1 :: nil =>
- do r1 <- freg_of a1;
- OK (Pfcmp0 D r1 :: k)
- | Cnotcompfzero cmp, a1 :: nil =>
- do r1 <- freg_of a1;
- OK (Pfcmp0 D r1 :: k)
- | Ccompfs cmp, a1 :: a2 :: nil =>
- do r1 <- freg_of a1; do r2 <- freg_of a2;
- OK (Pfcmp S r1 r2 :: k)
- | Cnotcompfs cmp, a1 :: a2 :: nil =>
- do r1 <- freg_of a1; do r2 <- freg_of a2;
- OK (Pfcmp S r1 r2 :: k)
- | Ccompfszero cmp, a1 :: nil =>
- do r1 <- freg_of a1;
- OK (Pfcmp0 S r1 :: k)
- | Cnotcompfszero cmp, a1 :: nil =>
- do r1 <- freg_of a1;
- OK (Pfcmp0 S r1 :: k)
- | _, _ =>
- Error(msg "Asmgen.transl_cond")
- end.
+Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) :=
+ indexed_memory_access (Asm.Pstrx src) 8 base ofs k.
-Definition cond_for_signed_cmp (cmp: comparison) :=
- match cmp with
- | Ceq => TCeq
- | Cne => TCne
- | Clt => TClt
- | Cle => TCle
- | Cgt => TCgt
- | Cge => TCge
- end.
+End Asmgen_expand.
-Definition cond_for_unsigned_cmp (cmp: comparison) :=
- match cmp with
- | Ceq => TCeq
- | Cne => TCne
- | Clt => TClo
- | Cle => TCls
- | Cgt => TChi
- | Cge => TChs
- end.
+(** * Translation from Asmblock to assembly language
+ Inspired from the KVX backend (see kvx/Asm.v and kvx/Asmgen.v) *)
-Definition cond_for_float_cmp (cmp: comparison) :=
- match cmp with
- | Ceq => TCeq
- | Cne => TCne
- | Clt => TCmi
- | Cle => TCls
- | Cgt => TCgt
- | Cge => TCge
- end.
+Module Asmblock_TRANSF.
+(* STUB *)
-Definition cond_for_float_not_cmp (cmp: comparison) :=
- match cmp with
- | Ceq => TCne
- | Cne => TCeq
- | Clt => TCpl
- | Cle => TChi
- | Cgt => TCle
- | Cge => TClt
+Definition ireg_of_preg (p : Asm.preg) : res ireg :=
+ match p with
+ | DR (IR (RR1 r)) => OK r
+ | _ => Error (msg "Asmgen.ireg_of_preg")
end.
-Definition cond_for_cond (cond: condition) :=
- match cond with
- | Ccomp cmp => cond_for_signed_cmp cmp
- | Ccompu cmp => cond_for_unsigned_cmp cmp
- | Ccompshift cmp s a => cond_for_signed_cmp cmp
- | Ccompushift cmp s a => cond_for_unsigned_cmp cmp
- | Ccompimm cmp n => cond_for_signed_cmp cmp
- | Ccompuimm cmp n => cond_for_unsigned_cmp cmp
- | Cmaskzero n => TCeq
- | Cmasknotzero n => TCne
- | Ccompl cmp => cond_for_signed_cmp cmp
- | Ccomplu cmp => cond_for_unsigned_cmp cmp
- | Ccomplshift cmp s a => cond_for_signed_cmp cmp
- | Ccomplushift cmp s a => cond_for_unsigned_cmp cmp
- | Ccomplimm cmp n => cond_for_signed_cmp cmp
- | Ccompluimm cmp n => cond_for_unsigned_cmp cmp
- | Cmasklzero n => TCeq
- | Cmasklnotzero n => TCne
- | Ccompf cmp => cond_for_float_cmp cmp
- | Cnotcompf cmp => cond_for_float_not_cmp cmp
- | Ccompfzero cmp => cond_for_float_cmp cmp
- | Cnotcompfzero cmp => cond_for_float_not_cmp cmp
- | Ccompfs cmp => cond_for_float_cmp cmp
- | Cnotcompfs cmp => cond_for_float_not_cmp cmp
- | Ccompfszero cmp => cond_for_float_cmp cmp
- | Cnotcompfszero cmp => cond_for_float_not_cmp cmp
+Definition freg_of_preg (p : Asm.preg) : res freg :=
+ match p with
+ | DR (FR r) => OK r
+ | _ => Error (msg "Asmgen.freg_of_preg")
end.
-(** Translation of a conditional branch. Prepends to [k] the instructions
- that evaluate the condition and ranch to [lbl] if it holds.
- We recognize some conditional branches that can be implemented
- without setting then testing condition flags. *)
-
-Definition transl_cond_branch_default
- (c: condition) (args: list mreg) (lbl: label) (k: code) :=
- transl_cond c args (Pbc (cond_for_cond c) lbl :: k).
-
-Definition transl_cond_branch
- (c: condition) (args: list mreg) (lbl: label) (k: code) :=
- match args, c with
- | a1 :: nil, (Ccompimm Cne n | Ccompuimm Cne n) =>
- if Int.eq n Int.zero
- then (do r1 <- ireg_of a1; OK (Pcbnz W r1 lbl :: k))
- else transl_cond_branch_default c args lbl k
- | a1 :: nil, (Ccompimm Ceq n | Ccompuimm Ceq n) =>
- if Int.eq n Int.zero
- then (do r1 <- ireg_of a1; OK (Pcbz W r1 lbl :: k))
- else transl_cond_branch_default c args lbl k
- | a1 :: nil, (Ccomplimm Cne n | Ccompluimm Cne n) =>
- if Int64.eq n Int64.zero
- then (do r1 <- ireg_of a1; OK (Pcbnz X r1 lbl :: k))
- else transl_cond_branch_default c args lbl k
- | a1 :: nil, (Ccomplimm Ceq n | Ccompluimm Ceq n) =>
- if Int64.eq n Int64.zero
- then (do r1 <- ireg_of a1; OK (Pcbz X r1 lbl :: k))
- else transl_cond_branch_default c args lbl k
- | a1 :: nil, Cmaskzero n =>
- match Int.is_power2 n with
- | Some bit => do r1 <- ireg_of a1; OK (Ptbz W r1 bit lbl :: k)
- | None => transl_cond_branch_default c args lbl k
- end
- | a1 :: nil, Cmasknotzero n =>
- match Int.is_power2 n with
- | Some bit => do r1 <- ireg_of a1; OK (Ptbnz W r1 bit lbl :: k)
- | None => transl_cond_branch_default c args lbl k
- end
- | a1 :: nil, Cmasklzero n =>
- match Int64.is_power2' n with
- | Some bit => do r1 <- ireg_of a1; OK (Ptbz X r1 bit lbl :: k)
- | None => transl_cond_branch_default c args lbl k
- end
- | a1 :: nil, Cmasklnotzero n =>
- match Int64.is_power2' n with
- | Some bit => do r1 <- ireg_of a1; OK (Ptbnz X r1 bit lbl :: k)
- | None => transl_cond_branch_default c args lbl k
- end
- | _, _ =>
- transl_cond_branch_default c args lbl k
+Definition iregsp_of_preg (p : Asm.preg) : res iregsp :=
+ match p with
+ | DR (IR r) => OK r
+ | _ => Error (msg "Asmgen.iregsp_of_preg")
end.
-
-(** Translation of the arithmetic operation [res <- op(args)].
- The corresponding instructions are prepended to [k]. *)
-Definition transl_op
- (op: operation) (args: list mreg) (res: mreg) (k: code) :=
- match op, args with
- | Omove, a1 :: nil =>
- match preg_of res, preg_of a1 with
- | IR r, IR a => OK (Pmov r a :: k)
- | FR r, FR a => OK (Pfmov r a :: k)
- | _ , _ => Error(msg "Asmgen.Omove")
- end
- | Ointconst n, nil =>
- do rd <- ireg_of res;
- OK (loadimm32 rd n k)
- | Olongconst n, nil =>
- do rd <- ireg_of res;
- OK (loadimm64 rd n k)
- | Ofloatconst f, nil =>
- do rd <- freg_of res;
- OK (if Float.eq_dec f Float.zero
- then Pfmovi D rd XZR :: k
- else Pfmovimmd rd f :: k)
- | Osingleconst f, nil =>
- do rd <- freg_of res;
- OK (if Float32.eq_dec f Float32.zero
- then Pfmovi S rd XZR :: k
- else Pfmovimms rd f :: k)
- | Oaddrsymbol id ofs, nil =>
- do rd <- ireg_of res;
- OK (loadsymbol rd id ofs k)
- | Oaddrstack ofs, nil =>
- do rd <- ireg_of res;
- OK (addimm64 rd XSP (Ptrofs.to_int64 ofs) k)
-(** 32-bit integer arithmetic *)
- | Oshift s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Porr W rd XZR r1 (transl_shift s a) :: k)
- | Oadd, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Padd W rd r1 r2 SOnone :: k)
- | Oaddshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Padd W rd r1 r2 (transl_shift s a) :: k)
- | Oaddimm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (addimm32 rd r1 n k)
- | Oneg, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psub W rd XZR r1 SOnone :: k)
- | Onegshift s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psub W rd XZR r1 (transl_shift s a) :: k)
- | Osub, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Psub W rd r1 r2 SOnone :: k)
- | Osubshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Psub W rd r1 r2 (transl_shift s a) :: k)
- | Omul, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pmadd W rd r1 r2 XZR :: k)
- | Omuladd, a1 :: a2 :: a3 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
- OK (Pmadd W rd r2 r3 r1 :: k)
- | Omulsub, a1 :: a2 :: a3 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
- OK (Pmsub W rd r2 r3 r1 :: k)
- | Odiv, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Psdiv W rd r1 r2 :: k)
- | Odivu, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pudiv W rd r1 r2 :: k)
- | Oand, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pand W rd r1 r2 SOnone :: k)
- | Oandshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pand W rd r1 r2 (transl_shift s a) :: k)
- | Oandimm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (logicalimm32 (Pandimm W) (Pand W) rd r1 n k)
- | Oor, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Porr W rd r1 r2 SOnone :: k)
- | Oorshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Porr W rd r1 r2 (transl_shift s a) :: k)
- | Oorimm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (logicalimm32 (Porrimm W) (Porr W) rd r1 n k)
- | Oxor, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Peor W rd r1 r2 SOnone :: k)
- | Oxorshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Peor W rd r1 r2 (transl_shift s a) :: k)
- | Oxorimm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (logicalimm32 (Peorimm W) (Peor W) rd r1 n k)
- | Onot, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Porn W rd XZR r1 SOnone :: k)
- | Onotshift s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Porn W rd XZR r1 (transl_shift s a) :: k)
- | Obic, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pbic W rd r1 r2 SOnone :: k)
- | Obicshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pbic W rd r1 r2 (transl_shift s a) :: k)
- | Oorn, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Porn W rd r1 r2 SOnone :: k)
- | Oornshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Porn W rd r1 r2 (transl_shift s a) :: k)
- | Oeqv, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Peon W rd r1 r2 SOnone :: k)
- | Oeqvshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Peon W rd r1 r2 (transl_shift s a) :: k)
- | Oshl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Plslv W rd r1 r2 :: k)
- | Oshr, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pasrv W rd r1 r2 :: k)
- | Oshru, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Plsrv W rd r1 r2 :: k)
- | Oshrximm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (shrx32 rd r1 n k)
- | Ozext s, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Pubfiz W rd r1 Int.zero s :: k)
- | Osext s, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psbfiz W rd r1 Int.zero s :: k)
- | Oshlzext s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Pubfiz W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
- | Oshlsext s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psbfiz W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
- | Ozextshr a s, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Pubfx W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
- | Osextshr a s, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psbfx W rd r1 a (Z.min s (Int.zwordsize - Int.unsigned a)) :: k)
-(** 64-bit integer arithmetic *)
- | Oshiftl s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Porr X rd XZR r1 (transl_shift s a) :: k)
- | Oextend x a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (move_extended rd r1 x a k)
- (* [Omakelong] and [Ohighlong] should not occur *)
- | Olowlong, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- assertion (ireg_eq rd r1);
- OK (Pcvtx2w rd :: k)
- | Oaddl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Padd X rd r1 r2 SOnone :: k)
- | Oaddlshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Padd X rd r1 r2 (transl_shift s a) :: k)
- | Oaddlext x a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (arith_extended Paddext (Padd X) rd r1 r2 x a k)
- | Oaddlimm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (addimm64 rd r1 n k)
- | Onegl, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psub X rd XZR r1 SOnone :: k)
- | Oneglshift s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psub X rd XZR r1 (transl_shift s a) :: k)
- | Osubl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Psub X rd r1 r2 SOnone :: k)
- | Osublshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Psub X rd r1 r2 (transl_shift s a) :: k)
- | Osublext x a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (arith_extended Psubext (Psub X) rd r1 r2 x a k)
- | Omull, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pmadd X rd r1 r2 XZR :: k)
- | Omulladd, a1 :: a2 :: a3 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
- OK (Pmadd X rd r2 r3 r1 :: k)
- | Omullsub, a1 :: a2 :: a3 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3;
- OK (Pmsub X rd r2 r3 r1 :: k)
- | Omullhs, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Psmulh rd r1 r2 :: k)
- | Omullhu, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pumulh rd r1 r2 :: k)
- | Odivl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Psdiv X rd r1 r2 :: k)
- | Odivlu, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pudiv X rd r1 r2 :: k)
- | Oandl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pand X rd r1 r2 SOnone :: k)
- | Oandlshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pand X rd r1 r2 (transl_shift s a) :: k)
- | Oandlimm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (logicalimm64 (Pandimm X) (Pand X) rd r1 n k)
- | Oorl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Porr X rd r1 r2 SOnone :: k)
- | Oorlshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Porr X rd r1 r2 (transl_shift s a) :: k)
- | Oorlimm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (logicalimm64 (Porrimm X) (Porr X) rd r1 n k)
- | Oxorl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Peor X rd r1 r2 SOnone :: k)
- | Oxorlshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Peor X rd r1 r2 (transl_shift s a) :: k)
- | Oxorlimm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (logicalimm64 (Peorimm X) (Peor X) rd r1 n k)
- | Onotl, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Porn X rd XZR r1 SOnone :: k)
- | Onotlshift s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Porn X rd XZR r1 (transl_shift s a) :: k)
- | Obicl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pbic X rd r1 r2 SOnone :: k)
- | Obiclshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pbic X rd r1 r2 (transl_shift s a) :: k)
- | Oornl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Porn X rd r1 r2 SOnone :: k)
- | Oornlshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Porn X rd r1 r2 (transl_shift s a) :: k)
- | Oeqvl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Peon X rd r1 r2 SOnone :: k)
- | Oeqvlshift s a, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Peon X rd r1 r2 (transl_shift s a) :: k)
- | Oshll, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Plslv X rd r1 r2 :: k)
- | Oshrl, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pasrv X rd r1 r2 :: k)
- | Oshrlu, a1 :: a2 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Plsrv X rd r1 r2 :: k)
- | Oshrlximm n, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (shrx64 rd r1 n k)
- | Ozextl s, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Pubfiz X rd r1 Int.zero s :: k)
- | Osextl s, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psbfiz X rd r1 Int.zero s :: k)
- | Oshllzext s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Pubfiz X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
- | Oshllsext s a, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psbfiz X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
- | Ozextshrl a s, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Pubfx X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
- | Osextshrl a s, a1 :: nil =>
- do rd <- ireg_of res; do r1 <- ireg_of a1;
- OK (Psbfx X rd r1 a (Z.min s (Int64.zwordsize - Int.unsigned a)) :: k)
-(** 64-bit floating-point arithmetic *)
- | Onegf, a1 :: nil =>
- do rd <- freg_of res; do rs <- freg_of a1;
- OK (Pfneg D rd rs :: k)
- | Oabsf, a1 :: nil =>
- do rd <- freg_of res; do rs <- freg_of a1;
- OK (Pfabs D rd rs :: k)
- | Oaddf, a1 :: a2 :: nil =>
- do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
- OK (Pfadd D rd rs1 rs2 :: k)
- | Osubf, a1 :: a2 :: nil =>
- do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
- OK (Pfsub D rd rs1 rs2 :: k)
- | Omulf, a1 :: a2 :: nil =>
- do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
- OK (Pfmul D rd rs1 rs2 :: k)
- | Odivf, a1 :: a2 :: nil =>
- do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
- OK (Pfdiv D rd rs1 rs2 :: k)
-(** 32-bit floating-point arithmetic *)
- | Onegfs, a1 :: nil =>
- do rd <- freg_of res; do rs <- freg_of a1;
- OK (Pfneg S rd rs :: k)
- | Oabsfs, a1 :: nil =>
- do rd <- freg_of res; do rs <- freg_of a1;
- OK (Pfabs S rd rs :: k)
- | Oaddfs, a1 :: a2 :: nil =>
- do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
- OK (Pfadd S rd rs1 rs2 :: k)
- | Osubfs, a1 :: a2 :: nil =>
- do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
- OK (Pfsub S rd rs1 rs2 :: k)
- | Omulfs, a1 :: a2 :: nil =>
- do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
- OK (Pfmul S rd rs1 rs2 :: k)
- | Odivfs, a1 :: a2 :: nil =>
- do rd <- freg_of res; do rs1 <- freg_of a1; do rs2 <- freg_of a2;
- OK (Pfdiv S rd rs1 rs2 :: k)
- | Osingleoffloat, a1 :: nil =>
- do rd <- freg_of res; do rs <- freg_of a1;
- OK (Pfcvtsd rd rs :: k)
- | Ofloatofsingle, a1 :: nil =>
- do rd <- freg_of res; do rs <- freg_of a1;
- OK (Pfcvtds rd rs :: k)
-(** Conversions between int and float *)
- | Ointoffloat, a1 :: nil =>
- do rd <- ireg_of res; do rs <- freg_of a1;
- OK (Pfcvtzs W D rd rs :: k)
- | Ointuoffloat, a1 :: nil =>
- do rd <- ireg_of res; do rs <- freg_of a1;
- OK (Pfcvtzu W D rd rs :: k)
- | Ofloatofint, a1 :: nil =>
- do rd <- freg_of res; do rs <- ireg_of a1;
- OK (Pscvtf D W rd rs :: k)
- | Ofloatofintu, a1 :: nil =>
- do rd <- freg_of res; do rs <- ireg_of a1;
- OK (Pucvtf D W rd rs :: k)
- | Ointofsingle, a1 :: nil =>
- do rd <- ireg_of res; do rs <- freg_of a1;
- OK (Pfcvtzs W S rd rs :: k)
- | Ointuofsingle, a1 :: nil =>
- do rd <- ireg_of res; do rs <- freg_of a1;
- OK (Pfcvtzu W S rd rs :: k)
- | Osingleofint, a1 :: nil =>
- do rd <- freg_of res; do rs <- ireg_of a1;
- OK (Pscvtf S W rd rs :: k)
- | Osingleofintu, a1 :: nil =>
- do rd <- freg_of res; do rs <- ireg_of a1;
- OK (Pucvtf S W rd rs :: k)
- | Olongoffloat, a1 :: nil =>
- do rd <- ireg_of res; do rs <- freg_of a1;
- OK (Pfcvtzs X D rd rs :: k)
- | Olonguoffloat, a1 :: nil =>
- do rd <- ireg_of res; do rs <- freg_of a1;
- OK (Pfcvtzu X D rd rs :: k)
- | Ofloatoflong, a1 :: nil =>
- do rd <- freg_of res; do rs <- ireg_of a1;
- OK (Pscvtf D X rd rs :: k)
- | Ofloatoflongu, a1 :: nil =>
- do rd <- freg_of res; do rs <- ireg_of a1;
- OK (Pucvtf D X rd rs :: k)
- | Olongofsingle, a1 :: nil =>
- do rd <- ireg_of res; do rs <- freg_of a1;
- OK (Pfcvtzs X S rd rs :: k)
- | Olonguofsingle, a1 :: nil =>
- do rd <- ireg_of res; do rs <- freg_of a1;
- OK (Pfcvtzu X S rd rs :: k)
- | Osingleoflong, a1 :: nil =>
- do rd <- freg_of res; do rs <- ireg_of a1;
- OK (Pscvtf S X rd rs :: k)
- | Osingleoflongu, a1 :: nil =>
- do rd <- freg_of res; do rs <- ireg_of a1;
- OK (Pucvtf S X rd rs :: k)
-(** Boolean tests *)
- | Ocmp c, _ =>
- do rd <- ireg_of res;
- transl_cond c args (Pcset rd (cond_for_cond c) :: k)
-(** Conditional move *)
- | Osel cmp ty, a1 :: a2 :: args =>
- match preg_of res with
- | IR r =>
- do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) :: k)
- | FR r =>
- do r1 <- freg_of a1; do r2 <- freg_of a2;
- transl_cond cmp args (Pfsel r r1 r2 (cond_for_cond cmp) :: k)
- | _ =>
- Error(msg "Asmgen.Osel")
+Definition basic_to_instruction (b: basic) : res Asm.instruction :=
+ match b with
+ (* Aithmetic instructions *)
+ | PArith (PArithP (Padrp id ofs) rd) => do rd' <- ireg_of_preg rd;
+ OK (Asm.Padrp rd' id ofs)
+ | PArith (PArithP (Pmovz sz n pos) rd) => do rd' <- ireg_of_preg rd;
+ OK (Asm.Pmovz sz rd' n pos)
+ | PArith (PArithP (Pmovn sz n pos) rd) => do rd' <- ireg_of_preg rd;
+ OK (Asm.Pmovn sz rd' n pos)
+ | PArith (PArithP (Pfmovimms f) rd) => do rd' <- freg_of_preg rd;
+ OK (Asm.Pfmovimms rd' f)
+ | PArith (PArithP (Pfmovimmd f) rd) => do rd' <- freg_of_preg rd;
+ OK (Asm.Pfmovimmd rd' f)
+
+ | PArith (PArithPP (Pmovk sz n pos) rd rs) =>
+ if (Asm.preg_eq rd rs) then (
+ do rd' <- ireg_of_preg rd;
+ OK (Asm.Pmovk sz rd' n pos)
+ ) else
+ Error (msg "Asmgen.basic_to_instruction: Pmovk uses a single register as both source and target")
+ | PArith (PArithPP Pmov rd rs) => do rd' <- iregsp_of_preg rd;
+ do rs' <- iregsp_of_preg rs;
+ OK (Asm.Pmov rd' rs')
+ | PArith (PArithPP (Paddadr id ofs) rd rs) => do rd' <- ireg_of_preg rd;
+ do rs' <- ireg_of_preg rs;
+ OK (Asm.Paddadr rd' rs' id ofs)
+ | PArith (PArithPP (Psbfiz sz r s) rd rs) => do rd' <- ireg_of_preg rd;
+ do rs' <- ireg_of_preg rs;
+ OK (Asm.Psbfiz sz rd' rs' r s)
+ | PArith (PArithPP (Psbfx sz r s) rd rs) => do rd' <- ireg_of_preg rd;
+ do rs' <- ireg_of_preg rs;
+ OK (Asm.Psbfx sz rd' rs' r s)
+ | PArith (PArithPP (Pubfiz sz r s) rd rs) => do rd' <- ireg_of_preg rd;
+ do rs' <- ireg_of_preg rs;
+ OK (Asm.Pubfiz sz rd' rs' r s)
+ | PArith (PArithPP (Pubfx sz r s) rd rs) => do rd' <- ireg_of_preg rd;
+ do rs' <- ireg_of_preg rs;
+ OK (Asm.Pubfx sz rd' rs' r s)
+ | PArith (PArithPP Pfmov rd rs) => do rd' <- freg_of_preg rd;
+ do rs' <- freg_of_preg rs;
+ OK (Asm.Pfmov rd' rs')
+ | PArith (PArithPP Pfcvtds rd rs) => do rd' <- freg_of_preg rd;
+ do rs' <- freg_of_preg rs;
+ OK (Asm.Pfcvtds rd' rs')
+ | PArith (PArithPP Pfcvtsd rd rs) => do rd' <- freg_of_preg rd;
+ do rs' <- freg_of_preg rs;
+ OK (Asm.Pfcvtsd rd' rs')
+ | PArith (PArithPP (Pfabs sz) rd rs) => do rd' <- freg_of_preg rd;
+ do rs' <- freg_of_preg rs;
+ OK (Asm.Pfabs sz rd' rs')
+ | PArith (PArithPP (Pfneg sz) rd rs) => do rd' <- freg_of_preg rd;
+ do rs' <- freg_of_preg rs;
+ OK (Asm.Pfneg sz rd' rs')
+ | PArith (PArithPP (Pscvtf fsz isz) rd rs) => do rd' <- freg_of_preg rd;
+ do rs' <- ireg_of_preg rs;
+ OK (Asm.Pscvtf fsz isz rd' rs')
+ | PArith (PArithPP (Pucvtf fsz isz) rd rs) => do rd' <- freg_of_preg rd;
+ do rs' <- ireg_of_preg rs;
+ OK (Asm.Pucvtf fsz isz rd' rs')
+ | PArith (PArithPP (Pfcvtzs isz fsz) rd rs) => do rd' <- ireg_of_preg rd;
+ do rs' <- freg_of_preg rs;
+ OK (Asm.Pfcvtzs isz fsz rd' rs')
+ | PArith (PArithPP (Pfcvtzu isz fsz) rd rs) => do rd' <- ireg_of_preg rd;
+ do rs' <- freg_of_preg rs;
+ OK (Asm.Pfcvtzu isz fsz rd' rs')
+ | PArith (PArithPP (Paddimm sz n) rd rs) => do rd' <- iregsp_of_preg rd;
+ do rs' <- iregsp_of_preg rs;
+ OK (Asm.Paddimm sz rd' rs' n)
+ | PArith (PArithPP (Psubimm sz n) rd rs) => do rd' <- iregsp_of_preg rd;
+ do rs' <- iregsp_of_preg rs;
+ OK (Asm.Psubimm sz rd' rs' n)
+
+ | PArith (PArithPPP (Pasrv sz) rd r1 r2) => do rd' <- ireg_of_preg rd;
+ do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Pasrv sz rd' r1' r2')
+ | PArith (PArithPPP (Plslv sz) rd r1 r2) => do rd' <- ireg_of_preg rd;
+ do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Plslv sz rd' r1' r2')
+ | PArith (PArithPPP (Plsrv sz) rd r1 r2) => do rd' <- ireg_of_preg rd;
+ do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Plsrv sz rd' r1' r2')
+ | PArith (PArithPPP (Prorv sz) rd r1 r2) => do rd' <- ireg_of_preg rd;
+ do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Prorv sz rd' r1' r2')
+ | PArith (PArithPPP Psmulh rd r1 r2) => do rd' <- ireg_of_preg rd;
+ do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Psmulh rd' r1' r2')
+ | PArith (PArithPPP Pumulh rd r1 r2) => do rd' <- ireg_of_preg rd;
+ do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Pumulh rd' r1' r2')
+ | PArith (PArithPPP (Psdiv sz) rd r1 r2) => do rd' <- ireg_of_preg rd;
+ do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Psdiv sz rd' r1' r2')
+ | PArith (PArithPPP (Pudiv sz) rd r1 r2) => do rd' <- ireg_of_preg rd;
+ do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Pudiv sz rd' r1' r2')
+ | PArith (PArithPPP (Paddext x) rd r1 r2) => do rd' <- iregsp_of_preg rd;
+ do r1' <- iregsp_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Paddext rd' r1' r2' x)
+ | PArith (PArithPPP (Psubext x) rd r1 r2) => do rd' <- iregsp_of_preg rd;
+ do r1' <- iregsp_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Psubext rd' r1' r2' x)
+ | PArith (PArithPPP (Pfadd sz) rd r1 r2) => do rd' <- freg_of_preg rd;
+ do r1' <- freg_of_preg r1;
+ do r2' <- freg_of_preg r2;
+ OK (Asm.Pfadd sz rd' r1' r2')
+ | PArith (PArithPPP (Pfdiv sz) rd r1 r2) => do rd' <- freg_of_preg rd;
+ do r1' <- freg_of_preg r1;
+ do r2' <- freg_of_preg r2;
+ OK (Asm.Pfdiv sz rd' r1' r2')
+ | PArith (PArithPPP (Pfmul sz) rd r1 r2) => do rd' <- freg_of_preg rd;
+ do r1' <- freg_of_preg r1;
+ do r2' <- freg_of_preg r2;
+ OK (Asm.Pfmul sz rd' r1' r2')
+ | PArith (PArithPPP (Pfsub sz) rd r1 r2) => do rd' <- freg_of_preg rd;
+ do r1' <- freg_of_preg r1;
+ do r2' <- freg_of_preg r2;
+ OK (Asm.Pfsub sz rd' r1' r2')
+
+ | PArith (PArithRR0 (Pandimm sz n) rd r1) => OK (Asm.Pandimm sz rd r1 n)
+ | PArith (PArithRR0 (Peorimm sz n) rd r1) => OK (Asm.Peorimm sz rd r1 n)
+ | PArith (PArithRR0 (Porrimm sz n) rd r1) => OK (Asm.Porrimm sz rd r1 n)
+
+
+ | PArith (PArithRR0R (Padd sz s) rd r1 r2) => OK (Asm.Padd sz rd r1 r2 s)
+ | PArith (PArithRR0R (Psub sz s) rd r1 r2) => OK (Asm.Psub sz rd r1 r2 s)
+ | PArith (PArithRR0R (Pand sz s) rd r1 r2) => OK (Asm.Pand sz rd r1 r2 s)
+ | PArith (PArithRR0R (Pbic sz s) rd r1 r2) => OK (Asm.Pbic sz rd r1 r2 s)
+ | PArith (PArithRR0R (Peon sz s) rd r1 r2) => OK (Asm.Peon sz rd r1 r2 s)
+ | PArith (PArithRR0R (Peor sz s) rd r1 r2) => OK (Asm.Peor sz rd r1 r2 s)
+ | PArith (PArithRR0R (Porr sz s) rd r1 r2) => OK (Asm.Porr sz rd r1 r2 s)
+ | PArith (PArithRR0R (Porn sz s) rd r1 r2) => OK (Asm.Porn sz rd r1 r2 s)
+
+ | PArith (PArithARRRR0 (Pmadd sz) rd r1 r2 r3) => OK (Asm.Pmadd sz rd r1 r2 r3)
+ | PArith (PArithARRRR0 (Pmsub sz) rd r1 r2 r3) => OK (Asm.Pmsub sz rd r1 r2 r3)
+
+ | PArith (PArithComparisonPP (Pcmpext x) r1 r2) => do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Pcmpext r1' r2' x)
+ | PArith (PArithComparisonPP (Pcmnext x) r1 r2) => do r1' <- ireg_of_preg r1;
+ do r2' <- ireg_of_preg r2;
+ OK (Asm.Pcmnext r1' r2' x)
+ | PArith (PArithComparisonPP (Pfcmp sz) r1 r2) => do r1' <- freg_of_preg r1;
+ do r2' <- freg_of_preg r2;
+ OK (Asm.Pfcmp sz r1' r2')
+
+ | PArith (PArithComparisonR0R (Pcmp is s) r1 r2) => OK (Asm.Pcmp is r1 r2 s)
+ | PArith (PArithComparisonR0R (Pcmn is s) r1 r2) => OK (Asm.Pcmn is r1 r2 s)
+ | PArith (PArithComparisonR0R (Ptst is s) r1 r2) => OK (Asm.Ptst is r1 r2 s)
+
+ | PArith (PArithComparisonP (Pcmpimm sz n) r1) => do r1' <- ireg_of_preg r1;
+ OK (Asm.Pcmpimm sz r1' n)
+ | PArith (PArithComparisonP (Pcmnimm sz n) r1) => do r1' <- ireg_of_preg r1;
+ OK (Asm.Pcmnimm sz r1' n)
+ | PArith (PArithComparisonP (Ptstimm sz n) r1) => do r1' <- ireg_of_preg r1;
+ OK (Asm.Ptstimm sz r1' n)
+ | PArith (PArithComparisonP (Pfcmp0 sz) r1) => do r1' <- freg_of_preg r1;
+ OK (Asm.Pfcmp0 sz r1')
+
+ | PArith (Pcset rd c) => OK (Asm.Pcset rd c)
+ | PArith (Pfmovi fsz rd r1) => OK (Asm.Pfmovi fsz rd r1)
+ | PArith (Pcsel rd r1 r2 c) =>
+ match r1, r2 with
+ | IR r1', IR r2' => do rd' <- ireg_of_preg rd;
+ do r1'' <- ireg_of_preg r1';
+ do r2'' <- ireg_of_preg r2';
+ OK (Asm.Pcsel rd' r1'' r2'' c)
+ | FR r1', FR r2' => do rd' <- freg_of_preg rd;
+ do r1'' <- freg_of_preg r1';
+ do r2'' <- freg_of_preg r2';
+ OK (Asm.Pfsel rd' r1'' r2'' c)
+ | _, _ => Error (msg "Asmgen.basic_to_instruction: Pcsel is only defind on iregs and fregs.")
end
- | _, _ =>
- Error(msg "Asmgen.transl_op")
+ | PArith (Pfnmul fsz rd r1 r2) => OK (Asm.Pfnmul fsz rd r1 r2)
+
+ | PLoad (PLd_rd_a Pldrw rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw rd' a)
+ | PLoad (PLd_rd_a Pldrw_a rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw_a rd' a)
+ | PLoad (PLd_rd_a Pldrx rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx rd' a)
+ | PLoad (PLd_rd_a Pldrx_a rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx_a rd' a)
+ | PLoad (PLd_rd_a (Pldrb sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrb sz rd' a)
+ | PLoad (PLd_rd_a (Pldrsb sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsb sz rd' a)
+ | PLoad (PLd_rd_a (Pldrh sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrh sz rd' a)
+ | PLoad (PLd_rd_a (Pldrsh sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsh sz rd' a)
+ | PLoad (PLd_rd_a Pldrzw rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrzw rd' a)
+ | PLoad (PLd_rd_a Pldrsw rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsw rd' a)
+
+ | PLoad (PLd_rd_a Pldrs rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrs rd' a)
+ | PLoad (PLd_rd_a Pldrd rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd rd' a)
+ | PLoad (PLd_rd_a Pldrd_a rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd_a rd' a)
+
+ | PLoad (Pldp Pldpw rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
+ do rd2' <- ireg_of_preg rd2;
+ OK (Asm.Pldpw rd1' rd2' chk1 chk2 a)
+ | PLoad (Pldp Pldpx rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
+ do rd2' <- ireg_of_preg rd2;
+ OK (Asm.Pldpx rd1' rd2' chk1 chk2 a)
+ | PLoad (Pldp Pldps rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1;
+ do rd2' <- freg_of_preg rd2;
+ OK (Asm.Pldps rd1' rd2' chk1 chk2 a)
+ | PLoad (Pldp Pldpd rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1;
+ do rd2' <- freg_of_preg rd2;
+ OK (Asm.Pldpd rd1' rd2' chk1 chk2 a)
+
+ | PStore (PSt_rs_a Pstrw r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw r' a)
+ | PStore (PSt_rs_a Pstrw_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw_a r' a)
+ | PStore (PSt_rs_a Pstrx r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrx r' a)
+ | PStore (PSt_rs_a Pstrx_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrx_a r' a)
+ | PStore (PSt_rs_a Pstrb r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrb r' a)
+ | PStore (PSt_rs_a Pstrh r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrh r' a)
+
+ | PStore (PSt_rs_a Pstrs r a) => do r' <- freg_of_preg r; OK (Asm.Pstrs r' a)
+ | PStore (PSt_rs_a Pstrd r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd r' a)
+ | PStore (PSt_rs_a Pstrd_a r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd_a r' a)
+
+ | PStore (Pstp Pstpw rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
+ do rs2' <- ireg_of_preg rs2;
+ OK (Asm.Pstpw rs1' rs2' chk1 chk2 a)
+ | PStore (Pstp Pstpx rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
+ do rs2' <- ireg_of_preg rs2;
+ OK (Asm.Pstpx rs1' rs2' chk1 chk2 a)
+ | PStore (Pstp Pstps rs1 rs2 chk1 chk2 a) => do rs1' <- freg_of_preg rs1;
+ do rs2' <- freg_of_preg rs2;
+ OK (Asm.Pstps rs1' rs2' chk1 chk2 a)
+ | PStore (Pstp Pstpd rs1 rs2 chk1 chk2 a) => do rs1' <- freg_of_preg rs1;
+ do rs2' <- freg_of_preg rs2;
+ OK (Asm.Pstpd rs1' rs2' chk1 chk2 a)
+
+ | Pallocframe sz linkofs => OK (Asm.Pallocframe sz linkofs)
+ | Pfreeframe sz linkofs => OK (Asm.Pfreeframe sz linkofs)
+
+ | Ploadsymbol rd id => OK (Asm.Ploadsymbol rd id)
+
+ | Pcvtsw2x rd r1 => OK (Asm.Pcvtsw2x rd r1)
+
+ | Pcvtuw2x rd r1 => OK (Asm.Pcvtuw2x rd r1)
+
+ | Pcvtx2w rd => OK (Asm.Pcvtx2w rd)
+ | Pnop => OK (Asm.Pnop)
end.
-(** Translation of addressing modes *)
-
-Definition offset_representable (sz: Z) (ofs: int64) : bool :=
- let isz := Int64.repr sz in
- (** either unscaled 9-bit signed *)
- Int64.eq ofs (Int64.sign_ext 9 ofs) ||
- (** or scaled 12-bit unsigned *)
- (Int64.eq (Int64.modu ofs isz) Int64.zero
- && Int64.ltu ofs (Int64.shl isz (Int64.repr 12))).
-
-Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg)
- (insn: Asm.addressing -> instruction) (k: code) : res code :=
- match addr, args with
- | Aindexed ofs, a1 :: nil =>
- do r1 <- ireg_of a1;
- if offset_representable sz ofs then
- OK (insn (ADimm r1 ofs) :: k)
- else
- OK (loadimm64 X16 ofs (insn (ADreg r1 X16) :: k))
- | Aindexed2, a1 :: a2 :: nil =>
- do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (insn (ADreg r1 r2) :: k)
- | Aindexed2shift a, a1 :: a2 :: nil =>
- do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- if Int.eq a Int.zero then
- OK (insn (ADreg r1 r2) :: k)
- else if Int.eq (Int.shl Int.one a) (Int.repr sz) then
- OK (insn (ADlsl r1 r2 a) :: k)
- else
- OK (Padd X X16 r1 r2 (SOlsl a) :: insn (ADimm X16 Int64.zero) :: k)
- | Aindexed2ext x a, a1 :: a2 :: nil =>
- do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- if Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz) then
- OK (insn (match x with Xsgn32 => ADsxt r1 r2 a
- | Xuns32 => ADuxt r1 r2 a end) :: k)
- else
- OK (arith_extended Paddext (Padd X) X16 r1 r2 x a
- (insn (ADimm X16 Int64.zero) :: k))
- | Aglobal id ofs, nil =>
- assertion (negb (SelectOp.symbol_is_relocatable id));
- if Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz
- then OK (Padrp X16 id ofs :: insn (ADadr X16 id ofs) :: k)
- else OK (loadsymbol X16 id ofs (insn (ADimm X16 Int64.zero) :: k))
- | Ainstack ofs, nil =>
- let ofs := Ptrofs.to_int64 ofs in
- if offset_representable sz ofs then
- OK (insn (ADimm XSP ofs) :: k)
- else
- OK (loadimm64 X16 ofs (insn (ADreg XSP X16) :: k))
- | _, _ =>
- Error(msg "Asmgen.transl_addressing")
+Definition cf_instruction_to_instruction (cfi: cf_instruction) : Asm.instruction :=
+ match cfi with
+ | Pb l => Asm.Pb l
+ | Pbc c lbl => Asm.Pbc c lbl
+ | Pbl id sg => Asm.Pbl id sg
+ | Pbs id sg => Asm.Pbs id sg
+ | Pblr r sg => Asm.Pblr r sg
+ | Pbr r sg => Asm.Pbr r sg
+ | Pret r => Asm.Pret r
+ | Pcbnz sz r lbl => Asm.Pcbnz sz r lbl
+ | Pcbz sz r lbl => Asm.Pcbz sz r lbl
+ | Ptbnz sz r n lbl => Asm.Ptbnz sz r n lbl
+ | Ptbz sz r n lbl => Asm.Ptbz sz r n lbl
+ | Pbtbl r1 tbl => Asm.Pbtbl r1 tbl
end.
-(** Translation of loads and stores *)
-
-Definition transl_load (chunk: memory_chunk) (addr: Op.addressing)
- (args: list mreg) (dst: mreg) (k: code) : res code :=
- match chunk with
- | Mint8unsigned =>
- do rd <- ireg_of dst; transl_addressing 1 addr args (Pldrb W rd) k
- | Mint8signed =>
- do rd <- ireg_of dst; transl_addressing 1 addr args (Pldrsb W rd) k
- | Mint16unsigned =>
- do rd <- ireg_of dst; transl_addressing 2 addr args (Pldrh W rd) k
- | Mint16signed =>
- do rd <- ireg_of dst; transl_addressing 2 addr args (Pldrsh W rd) k
- | Mint32 =>
- do rd <- ireg_of dst; transl_addressing 4 addr args (Pldrw rd) k
- | Mint64 =>
- do rd <- ireg_of dst; transl_addressing 8 addr args (Pldrx rd) k
- | Mfloat32 =>
- do rd <- freg_of dst; transl_addressing 4 addr args (Pldrs rd) k
- | Mfloat64 =>
- do rd <- freg_of dst; transl_addressing 8 addr args (Pldrd rd) k
- | Many32 =>
- do rd <- ireg_of dst; transl_addressing 4 addr args (Pldrw_a rd) k
- | Many64 =>
- do rd <- ireg_of dst; transl_addressing 8 addr args (Pldrx_a rd) k
+Definition control_to_instruction (c: control) :=
+ match c with
+ | PCtlFlow i => cf_instruction_to_instruction i
+ | Pbuiltin ef args res => Asm.Pbuiltin ef (List.map (map_builtin_arg DR) args) (map_builtin_res DR res)
end.
-Definition transl_store (chunk: memory_chunk) (addr: Op.addressing)
- (args: list mreg) (src: mreg) (k: code) : res code :=
- match chunk with
- | Mint8unsigned | Mint8signed =>
- do r1 <- ireg_of src; transl_addressing 1 addr args (Pstrb r1) k
- | Mint16unsigned | Mint16signed =>
- do r1 <- ireg_of src; transl_addressing 2 addr args (Pstrh r1) k
- | Mint32 =>
- do r1 <- ireg_of src; transl_addressing 4 addr args (Pstrw r1) k
- | Mint64 =>
- do r1 <- ireg_of src; transl_addressing 8 addr args (Pstrx r1) k
- | Mfloat32 =>
- do r1 <- freg_of src; transl_addressing 4 addr args (Pstrs r1) k
- | Mfloat64 =>
- do r1 <- freg_of src; transl_addressing 8 addr args (Pstrd r1) k
- | Many32 =>
- do r1 <- ireg_of src; transl_addressing 4 addr args (Pstrw_a r1) k
- | Many64 =>
- do r1 <- ireg_of src; transl_addressing 8 addr args (Pstrx_a r1) k
+Fixpoint unfold_label (ll: list label) :=
+ match ll with
+ | nil => nil
+ | l :: ll => Plabel l :: unfold_label ll
end.
-(** Register-indexed loads and stores *)
-
-Definition indexed_memory_access (insn: Asm.addressing -> instruction)
- (sz: Z) (base: iregsp) (ofs: ptrofs) (k: code) :=
- let ofs := Ptrofs.to_int64 ofs in
- if offset_representable sz ofs
- then insn (ADimm base ofs) :: k
- else loadimm64 X16 ofs (insn (ADreg base X16) :: k).
-
-Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: code) :=
- match ty, preg_of dst with
- | Tint, IR rd => OK (indexed_memory_access (Pldrw rd) 4 base ofs k)
- | Tlong, IR rd => OK (indexed_memory_access (Pldrx rd) 8 base ofs k)
- | Tsingle, FR rd => OK (indexed_memory_access (Pldrs rd) 4 base ofs k)
- | Tfloat, FR rd => OK (indexed_memory_access (Pldrd rd) 8 base ofs k)
- | Tany32, IR rd => OK (indexed_memory_access (Pldrw_a rd) 4 base ofs k)
- | Tany64, IR rd => OK (indexed_memory_access (Pldrx_a rd) 8 base ofs k)
- | Tany64, FR rd => OK (indexed_memory_access (Pldrd_a rd) 8 base ofs k)
- | _, _ => Error (msg "Asmgen.loadind")
- end.
-
-Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: code) :=
- match ty, preg_of src with
- | Tint, IR rd => OK (indexed_memory_access (Pstrw rd) 4 base ofs k)
- | Tlong, IR rd => OK (indexed_memory_access (Pstrx rd) 8 base ofs k)
- | Tsingle, FR rd => OK (indexed_memory_access (Pstrs rd) 4 base ofs k)
- | Tfloat, FR rd => OK (indexed_memory_access (Pstrd rd) 8 base ofs k)
- | Tany32, IR rd => OK (indexed_memory_access (Pstrw_a rd) 4 base ofs k)
- | Tany64, IR rd => OK (indexed_memory_access (Pstrx_a rd) 8 base ofs k)
- | Tany64, FR rd => OK (indexed_memory_access (Pstrd_a rd) 8 base ofs k)
- | _, _ => Error (msg "Asmgen.storeind")
- end.
-
-Definition loadptr (base: iregsp) (ofs: ptrofs) (dst: ireg) (k: code) :=
- indexed_memory_access (Pldrx dst) 8 base ofs k.
-
-Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) :=
- indexed_memory_access (Pstrx src) 8 base ofs k.
-
-(** Function epilogue *)
-
-Definition make_epilogue (f: Mach.function) (k: code) :=
- loadptr XSP f.(fn_retaddr_ofs) RA
- (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k).
-
-(** Translation of a Mach instruction. *)
-
-Definition transl_instr (f: Mach.function) (i: Mach.instruction)
- (r29_is_parent: bool) (k: code) : res code :=
- match i with
- | Mgetstack ofs ty dst =>
- loadind XSP ofs ty dst k
- | Msetstack src ofs ty =>
- storeind src XSP ofs ty k
- | Mgetparam ofs ty dst =>
- (* load via the frame pointer if it is valid *)
- do c <- loadind X29 ofs ty dst k;
- OK (if r29_is_parent then c else loadptr XSP f.(fn_link_ofs) X29 c)
- | Mop op args res =>
- transl_op op args res k
- | Mload chunk addr args dst =>
- transl_load chunk addr args dst k
- | Mstore chunk addr args src =>
- transl_store chunk addr args src k
- | Mcall sig (inl r) =>
- do r1 <- ireg_of r; OK (Pblr r1 sig :: k)
- | Mcall sig (inr symb) =>
- OK (Pbl symb sig :: k)
- | Mtailcall sig (inl r) =>
- do r1 <- ireg_of r;
- OK (make_epilogue f (Pbr r1 sig :: k))
- | Mtailcall sig (inr symb) =>
- OK (make_epilogue f (Pbs symb sig :: k))
- | Mbuiltin ef args res =>
- OK (Pbuiltin ef (List.map (map_builtin_arg preg_of) args) (map_builtin_res preg_of res) :: k)
- | Mlabel lbl =>
- OK (Plabel lbl :: k)
- | Mgoto lbl =>
- OK (Pb lbl :: k)
- | Mcond cond args lbl =>
- transl_cond_branch cond args lbl k
- | Mjumptable arg tbl =>
- do r <- ireg_of arg;
- OK (Pbtbl r tbl :: k)
- | Mreturn =>
- OK (make_epilogue f (Pret RA :: k))
- end.
-
-(** Translation of a code sequence *)
-
-Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
- match i with
- | Msetstack src ofs ty => before
- | Mgetparam ofs ty dst => negb (mreg_eq dst R29)
- | Mop op args res => before && negb (mreg_eq res R29)
- | _ => false
- end.
-
-(** This is the naive definition that we no longer use because it
- is not tail-recursive. It is kept as specification. *)
-
-Fixpoint transl_code (f: Mach.function) (il: list Mach.instruction) (it1p: bool) :=
- match il with
+Fixpoint unfold_body (lb: list basic) : res Asm.code :=
+ match lb with
| nil => OK nil
- | i1 :: il' =>
- do k <- transl_code f il' (it1_is_parent it1p i1);
- transl_instr f i1 it1p k
+ | b :: lb =>
+ (* x_is: x's instructions *)
+ do b_is <- basic_to_instruction b;
+ do lb_is <- unfold_body lb;
+ OK (b_is :: lb_is)
end.
-(** This is an equivalent definition in continuation-passing style
- that runs in constant stack space. *)
-
-Fixpoint transl_code_rec (f: Mach.function) (il: list Mach.instruction)
- (it1p: bool) (k: code -> res code) :=
- match il with
- | nil => k nil
- | i1 :: il' =>
- transl_code_rec f il' (it1_is_parent it1p i1)
- (fun c1 => do c2 <- transl_instr f i1 it1p c1; k c2)
+Definition unfold_exit (oc: option control) :=
+ match oc with
+ | None => nil
+ | Some c => control_to_instruction c :: nil
end.
-Definition transl_code' (f: Mach.function) (il: list Mach.instruction) (it1p: bool) :=
- transl_code_rec f il it1p (fun c => OK c).
+Definition unfold_bblock (bb: bblock) :=
+ let lbl := unfold_label (header bb) in
+ (*
+ * With this dynamically checked assumption on a previous optimization we
+ * can show that [Asmblock.label_pos] and [Asm.label_pos] retrieve the same
+ * exact address. Maintaining this property allows us to use the simple
+ * formulation of match_states defined as equality.
+ * Otherwise we would have to deal with the case of a basic block header
+ * that has multiple labels. Asmblock.label_pos will, for all labels, point
+ * to the same location at the beginning of the basic block. Asm.label_pos
+ * on the other hand could return a position pointing into the original
+ * basic block.
+ *)
+ if zle (list_length_z (header bb)) 1 then
+ do bo_is <- unfold_body (body bb);
+ OK (lbl ++ bo_is ++ unfold_exit (exit bb))
+ else
+ Error (msg "Asmgen.unfold_bblock: Multiple labels were generated.").
+
+Fixpoint unfold (bbs: Asmblock.bblocks) : res Asm.code :=
+ match bbs with
+ | nil => OK (nil)
+ | bb :: bbs' =>
+ do bb_is <- unfold_bblock bb;
+ do bbs'_is <- unfold bbs';
+ OK (bb_is ++ bbs'_is)
+ end.
-(** Translation of a whole function. Note that we must check
- that the generated code contains less than [2^32] instructions,
- otherwise the offset part of the [PC] code pointer could wrap
- around, leading to incorrect executions. *)
+Definition transf_function (f: Asmblock.function) : res Asm.function :=
+ do c <- unfold (Asmblock.fn_blocks f);
+ if zlt Ptrofs.max_unsigned (list_length_z c)
+ then Error (msg "Asmgen.trans_function: code size exceeded")
+ else OK {| Asm.fn_sig := Asmblock.fn_sig f; Asm.fn_code := c |}.
-Definition transl_function (f: Mach.function) :=
- do c <- transl_code' f f.(Mach.fn_code) true;
- OK (mkfunction f.(Mach.fn_sig)
- (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
- storeptr RA XSP f.(fn_retaddr_ofs) c)).
+Definition transf_fundef (f: Asmblock.fundef) : res Asm.fundef :=
+ transf_partial_fundef transf_function f.
-Definition transf_function (f: Mach.function) : res Asm.function :=
- do tf <- transl_function f;
- if zlt Ptrofs.max_unsigned (list_length_z tf.(fn_code))
- then Error (msg "code size exceeded")
- else OK tf.
+Definition transf_program (p: Asmblock.program) : res Asm.program :=
+ transform_partial_program transf_fundef p.
-Definition transf_fundef (f: Mach.fundef) : res Asm.fundef :=
- transf_partial_fundef transf_function f.
+End Asmblock_TRANSF.
Definition transf_program (p: Mach.program) : res Asm.program :=
- transform_partial_program transf_fundef p.
+ let mbp := Machblockgen.transf_program p in
+ do abp <- Asmblockgen.transf_program mbp;
+ do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp;
+ Asmblock_TRANSF.transf_program abp'.
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index dc0bc509..d27b3f8c 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -1,24 +1,35 @@
-(* *********************************************************************)
-(* *)
-(* The Compcert verified compiler *)
-(* *)
-(* Xavier Leroy, Collège de France and INRIA Paris *)
-(* *)
-(* Copyright Institut National de Recherche en Informatique et en *)
-(* Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the INRIA Non-Commercial License Agreement. *)
-(* *)
-(* *********************************************************************)
-
-(** Correctness proof for AArch64 code generation. *)
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* Justus Fasse UGA, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
Require Import Coqlib Errors.
Require Import Integers Floats AST Linking.
Require Import Values Memory Events Globalenvs Smallstep.
-Require Import Op Locations Mach Conventions Asm.
-Require Import Asmgen Asmgenproof0 Asmgenproof1.
+Require Import Op Locations Machblock Conventions Asm Asmblock.
+Require Machblockgenproof Asmblockgenproof PostpassSchedulingproof.
+Require Import Asmgen.
+Require Import Axioms.
+Require Import IterList.
+Require Import Ring Lia.
-Definition match_prog (p: Mach.program) (tp: Asm.program) :=
+Module Asmblock_PRESERVATION.
+
+Import Asmblock_TRANSF.
+
+Definition match_prog (p: Asmblock.program) (tp: Asm.program) :=
match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
Lemma transf_program_match:
@@ -29,20 +40,36 @@ Qed.
Section PRESERVATION.
-Variable prog: Mach.program.
+Variable prog: Asmblock.program.
Variable tprog: Asm.program.
Hypothesis TRANSF: match_prog prog tprog.
Let ge := Genv.globalenv prog.
Let tge := Genv.globalenv tprog.
+Definition lk :aarch64_linker := {| Asmblock.symbol_low:=Asm.symbol_low tge; Asmblock.symbol_high:=Asm.symbol_high tge|}.
+
Lemma symbols_preserved:
forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
Proof (Genv.find_symbol_match TRANSF).
+Lemma symbol_addresses_preserved:
+ forall (s: ident) (ofs: ptrofs),
+ Genv.symbol_address tge s ofs = Genv.symbol_address ge s ofs.
+Proof.
+ intros; unfold Genv.symbol_address; rewrite symbols_preserved; reflexivity.
+Qed.
+
Lemma senv_preserved:
Senv.equiv ge tge.
Proof (Genv.senv_match TRANSF).
+Lemma symbol_high_low: forall (id: ident) (ofs: ptrofs),
+ Val.addl (Asmblock.symbol_high lk id ofs) (Asmblock.symbol_low lk id ofs) = Genv.symbol_address ge id ofs.
+Proof.
+ unfold lk; simpl. intros; rewrite Asm.symbol_high_low; unfold Genv.symbol_address;
+ rewrite symbols_preserved; reflexivity.
+Qed.
+
Lemma functions_translated:
forall b f,
Genv.find_funct_ptr ge b = Some f ->
@@ -50,980 +77,2242 @@ Lemma functions_translated:
Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
Proof (Genv.find_funct_ptr_transf_partial TRANSF).
-Lemma functions_transl:
- forall fb f tf,
+Lemma internal_functions_translated:
+ forall b f,
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ exists tf,
+ Genv.find_funct_ptr tge b = Some (Internal tf) /\ transf_function f = OK tf.
+Proof.
+ intros; exploit functions_translated; eauto.
+ intros (x & FIND & TRANSf).
+ apply bind_inversion in TRANSf.
+ destruct TRANSf as (tf & TRANSf & X).
+ inv X.
+ eauto.
+Qed.
+
+Lemma internal_functions_unfold:
+ forall b f,
+ Genv.find_funct_ptr ge b = Some (Internal f) ->
+ exists tc,
+ Genv.find_funct_ptr tge b = Some (Internal (Asm.mkfunction (fn_sig f) tc))
+ /\ unfold (fn_blocks f) = OK tc
+ /\ list_length_z tc <= Ptrofs.max_unsigned.
+Proof.
+ intros.
+ exploit internal_functions_translated; eauto.
+ intros (tf & FINDtf & TRANStf).
+ unfold transf_function in TRANStf.
+ monadInv TRANStf.
+ destruct (zlt _ _); try congruence.
+ inv EQ. inv EQ0.
+ eexists; intuition eauto.
+ lia.
+Qed.
+
+
+Inductive is_nth_inst (bb: bblock) (n:Z) (i:Asm.instruction): Prop :=
+ | is_nth_label l:
+ list_nth_z (header bb) n = Some l ->
+ i = Asm.Plabel l ->
+ is_nth_inst bb n i
+ | is_nth_basic bi:
+ list_nth_z (body bb) (n - list_length_z (header bb)) = Some bi ->
+ basic_to_instruction bi = OK i ->
+ is_nth_inst bb n i
+ | is_nth_ctlflow cfi:
+ (exit bb) = Some cfi ->
+ n = size bb - 1 ->
+ i = control_to_instruction cfi ->
+ is_nth_inst bb n i.
+
+(* Asmblock and Asm share the same definition of state *)
+Definition match_states (s1 s2 : state) := s1 = s2.
+
+Inductive match_internal: forall n, state -> state -> Prop :=
+ | match_internal_intro n rs1 m1 rs2 m2
+ (MEM: m1 = m2)
+ (AG: forall r, r <> PC -> rs1 r = rs2 r)
+ (AGPC: Val.offset_ptr (rs1 PC) (Ptrofs.repr n) = rs2 PC)
+ : match_internal n (State rs1 m1) (State rs2 m2).
+
+Lemma match_internal_set_parallel:
+ forall n rs1 m1 rs2 m2 r val,
+ match_internal n (State rs1 m1) (State rs2 m2) ->
+ r <> PC ->
+ match_internal n (State (rs1#r <- val) m1) (State (rs2#r <- val ) m2).
+Proof.
+ intros n rs1 m1 rs2 m2 r v MI.
+ inversion MI; constructor; auto.
+ - intros r' NOTPC.
+ unfold Pregmap.set; rewrite AG. reflexivity. assumption.
+ - unfold Pregmap.set; destruct (PregEq.eq PC r); congruence.
+Qed.
+
+Lemma agree_match_states:
+ forall rs1 m1 rs2 m2,
+ match_states (State rs1 m1) (State rs2 m2) ->
+ forall r : preg, rs1#r = rs2#r.
+Proof.
+ intros.
+ unfold match_states in *.
+ assert (rs1 = rs2) as EQ. { congruence. }
+ rewrite EQ. reflexivity.
+Qed.
+
+Lemma match_states_set_parallel:
+ forall rs1 m1 rs2 m2 r v,
+ match_states (State rs1 m1) (State rs2 m2) ->
+ match_states (State (rs1#r <- v) m1) (State (rs2#r <- v) m2).
+Proof.
+ intros; unfold match_states in *.
+ assert (rs1 = rs2) as RSEQ. { congruence. }
+ assert (m1 = m2) as MEQ. { congruence. }
+ rewrite RSEQ in *; rewrite MEQ in *; unfold Pregmap.set; reflexivity.
+Qed.
+
+(* match_internal from match_states *)
+Lemma mi_from_ms:
+ forall rs1 m1 rs2 m2 b ofs,
+ match_states (State rs1 m1) (State rs2 m2) ->
+ rs1#PC = Vptr b ofs ->
+ match_internal 0 (State rs1 m1) (State rs2 m2).
+Proof.
+ intros rs1 m1 rs2 m2 b ofs MS PCVAL.
+ inv MS; constructor; auto; unfold Val.offset_ptr;
+ rewrite PCVAL; rewrite Ptrofs.add_zero; reflexivity.
+Qed.
+
+Lemma transf_initial_states:
+ forall s1, Asmblock.initial_state prog s1 ->
+ exists s2, Asm.initial_state tprog s2 /\ match_states s1 s2.
+Proof.
+ intros ? INIT_s1.
+ inversion INIT_s1 as (m, ?, ge0, rs). unfold ge0 in *.
+ econstructor; split.
+ - econstructor.
+ eapply (Genv.init_mem_transf_partial TRANSF); eauto.
+ - rewrite (match_program_main TRANSF); rewrite symbol_addresses_preserved.
+ unfold rs; reflexivity.
+Qed.
+
+Lemma transf_final_states:
+ forall s1 s2 r,
+ match_states s1 s2 -> Asmblock.final_state s1 r -> Asm.final_state s2 r.
+Proof.
+ intros s1 s2 r MATCH FINAL_s1.
+ inv FINAL_s1; inv MATCH; constructor; assumption.
+Qed.
+
+Definition max_pos (f : Asm.function) := list_length_z f.(Asm.fn_code).
+
+Lemma functions_bound_max_pos: forall fb f tf,
Genv.find_funct_ptr ge fb = Some (Internal f) ->
transf_function f = OK tf ->
- Genv.find_funct_ptr tge fb = Some (Internal tf).
+ max_pos tf <= Ptrofs.max_unsigned.
+Proof.
+ intros fb f tf FINDf TRANSf.
+ unfold transf_function in TRANSf.
+ apply bind_inversion in TRANSf.
+ destruct TRANSf as (c & TRANSf).
+ destruct TRANSf as (_ & TRANSf).
+ destruct (zlt _ _).
+ - inversion TRANSf.
+ - unfold max_pos.
+ assert (Asm.fn_code tf = c) as H. { inversion TRANSf as (H'); auto. }
+ rewrite H; lia.
+Qed.
+
+Lemma one_le_max_unsigned:
+ 1 <= Ptrofs.max_unsigned.
+Proof.
+ unfold Ptrofs.max_unsigned; simpl; unfold Ptrofs.wordsize;
+ unfold Wordsize_Ptrofs.wordsize; destruct Archi.ptr64; simpl; lia.
+Qed.
+
+(* NB: does not seem useful anymore, with the [exec_header_simulation] proof below
+Lemma match_internal_exec_label:
+ forall n rs1 m1 rs2 m2 l fb f tf,
+ Genv.find_funct_ptr ge fb = Some (Internal f) ->
+ transf_function f = OK tf ->
+ match_internal n (State rs1 m1) (State rs2 m2) ->
+ n >= 0 ->
+ (* There is no step if n is already max_pos *)
+ n < (max_pos tf) ->
+ exists rs2' m2', Asm.exec_instr tge tf (Asm.Plabel l) rs2 m2 = Next rs2' m2'
+ /\ match_internal (n+1) (State rs1 m1) (State rs2' m2').
+Proof.
+ intros. (* XXX auto generated names *)
+ unfold Asm.exec_instr.
+ eexists; eexists; split; eauto.
+ inversion H1; constructor; auto.
+ - intros; unfold Asm.nextinstr; unfold Pregmap.set;
+ destruct (PregEq.eq r PC); auto; contradiction.
+ - unfold Asm.nextinstr; rewrite Pregmap.gss; unfold Ptrofs.one.
+ rewrite <- AGPC; rewrite Val.offset_ptr_assoc; unfold Ptrofs.add;
+ rewrite Ptrofs.unsigned_repr. rewrite Ptrofs.unsigned_repr; trivial.
+ + split.
+ * apply Z.le_0_1.
+ * apply one_le_max_unsigned.
+ + split.
+ * apply Z.ge_le; assumption.
+ * rewrite <- functions_bound_max_pos; eauto; lia.
+Qed.
+*)
+
+Lemma incrPC_agree_but_pc:
+ forall rs r ofs,
+ r <> PC ->
+ (incrPC ofs rs)#r = rs#r.
+Proof.
+ intros rs r ofs NOTPC.
+ unfold incrPC; unfold Pregmap.set; destruct (PregEq.eq r PC).
+ - contradiction.
+ - reflexivity.
+Qed.
+
+Lemma bblock_non_empty bb: body bb <> nil \/ exit bb <> None.
+Proof.
+ destruct bb. simpl.
+ unfold non_empty_bblockb in correct.
+ unfold non_empty_body, non_empty_exit, Is_true in correct.
+ destruct body, exit.
+ - right. discriminate.
+ - contradiction.
+ - right. discriminate.
+ - left. discriminate.
+Qed.
+
+Lemma list_length_z_aux_increase A (l: list A): forall acc,
+ list_length_z_aux l acc >= acc.
+Proof.
+ induction l; simpl; intros.
+ - lia.
+ - generalize (IHl (Z.succ acc)). lia.
+Qed.
+
+Lemma bblock_size_aux_pos bb: list_length_z (body bb) + Z.of_nat (length_opt (exit bb)) >= 1.
+Proof.
+ destruct (bblock_non_empty bb), (body bb) as [|hd tl], (exit bb); simpl;
+ try (congruence || lia);
+ unfold list_length_z; simpl;
+ generalize (list_length_z_aux_increase _ tl 1); lia.
+Qed.
+
+
+Lemma list_length_add_acc A (l : list A) acc:
+ list_length_z_aux l acc = (list_length_z l) + acc.
+Proof.
+ unfold list_length_z, list_length_z_aux. simpl.
+ fold list_length_z_aux.
+ rewrite (list_length_z_aux_shift l acc 0).
+ lia.
+Qed.
+
+Lemma list_length_z_cons A hd (tl : list A):
+ list_length_z (hd :: tl) = list_length_z tl + 1.
Proof.
- intros. exploit functions_translated; eauto. intros [tf' [A B]].
- monadInv B. rewrite H0 in EQ; inv EQ; auto.
+ unfold list_length_z; simpl; rewrite list_length_add_acc; reflexivity.
Qed.
-(** * Properties of control flow *)
+Lemma bblock_size_aux bb: size bb = list_length_z (header bb) + list_length_z (body bb) + Z.of_nat (length_opt (exit bb)).
+Proof.
+ unfold size.
+ repeat (rewrite list_length_z_nat). repeat (rewrite Nat2Z.inj_add). reflexivity.
+Qed.
+
+Lemma header_size_lt_block_size bb:
+ list_length_z (header bb) < size bb.
+Proof.
+ rewrite bblock_size_aux.
+ generalize (bblock_non_empty bb); intros NEMPTY; destruct NEMPTY as [HDR|EXIT].
+ - destruct (body bb); try contradiction; rewrite list_length_z_cons;
+ repeat rewrite list_length_z_nat; lia.
+ - destruct (exit bb); try contradiction; simpl; repeat rewrite list_length_z_nat; lia.
+Qed.
-Lemma transf_function_no_overflow:
- forall f tf,
- transf_function f = OK tf -> list_length_z tf.(fn_code) <= Ptrofs.max_unsigned.
+Lemma body_size_le_block_size bb:
+ list_length_z (body bb) <= size bb.
Proof.
- intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0.
+ rewrite bblock_size_aux; repeat rewrite list_length_z_nat; lia.
+Qed.
+
+
+Lemma bblock_size_pos bb: size bb >= 1.
+Proof.
+ rewrite (bblock_size_aux bb).
+ generalize (bblock_size_aux_pos bb).
+ generalize (list_length_z_pos (header bb)).
lia.
Qed.
-Lemma exec_straight_exec:
- forall fb f c ep tf tc c' rs m rs' m',
- transl_code_at_pc ge (rs PC) fb f c ep tf tc ->
- exec_straight tge tf tc rs m c' rs' m' ->
- plus step tge (State rs m) E0 (State rs' m').
-Proof.
- intros. inv H.
- eapply exec_straight_steps_1; eauto.
- eapply transf_function_no_overflow; eauto.
- eapply functions_transl; eauto.
-Qed.
-
-Lemma exec_straight_at:
- forall fb f c ep tf tc c' ep' tc' rs m rs' m',
- transl_code_at_pc ge (rs PC) fb f c ep tf tc ->
- transl_code f c' ep' = OK tc' ->
- exec_straight tge tf tc rs m tc' rs' m' ->
- transl_code_at_pc ge (rs' PC) fb f c' ep' tf tc'.
-Proof.
- intros. inv H.
- exploit exec_straight_steps_2; eauto.
- eapply transf_function_no_overflow; eauto.
- eapply functions_transl; eauto.
- intros [ofs' [PC' CT']].
- rewrite PC'. constructor; auto.
-Qed.
-
-(** The following lemmas show that the translation from Mach to Asm
- preserves labels, in the sense that the following diagram commutes:
-<<
- translation
- Mach code ------------------------ Asm instr sequence
- | |
- | Mach.find_label lbl find_label lbl |
- | |
- v v
- Mach code tail ------------------- Asm instr seq tail
- translation
->>
- The proof demands many boring lemmas showing that Asm constructor
- functions do not introduce new labels.
-*)
+Lemma unfold_car_cdr bb bbs tc:
+ unfold (bb :: bbs) = OK tc ->
+ exists tbb tc', unfold_bblock bb = OK tbb
+ /\ unfold bbs = OK tc'
+ /\ unfold (bb :: bbs) = OK (tbb ++ tc').
+Proof.
+ intros UNFOLD.
+ assert (UF := UNFOLD).
+ unfold unfold in UNFOLD.
+ apply bind_inversion in UNFOLD. destruct UNFOLD as (? & UBB). destruct UBB as (UBB & REST).
+ apply bind_inversion in REST. destruct REST as (? & UNFOLD').
+ fold unfold in UNFOLD'. destruct UNFOLD' as (UNFOLD' & UNFOLD).
+ rewrite <- UNFOLD in UF.
+ eauto.
+Qed.
-Section TRANSL_LABEL.
+Lemma unfold_cdr bb bbs tc:
+ unfold (bb :: bbs) = OK tc ->
+ exists tc', unfold bbs = OK tc'.
+Proof.
+ intros; exploit unfold_car_cdr; eauto. intros (_ & ? & _ & ? & _).
+ eexists; eauto.
+Qed.
-Remark loadimm_z_label: forall sz rd l k, tail_nolabel k (loadimm_z sz rd l k).
-Proof.
- intros; destruct l as [ | [n1 p1] l]; simpl; TailNoLabel.
- induction l as [ | [n p] l]; simpl; TailNoLabel.
+Lemma unfold_car bb bbs tc:
+ unfold (bb :: bbs) = OK tc ->
+ exists tbb, unfold_bblock bb = OK tbb.
+Proof.
+ intros; exploit unfold_car_cdr; eauto. intros (? & _ & ? & _ & _).
+ eexists; eauto.
Qed.
-Remark loadimm_n_label: forall sz rd l k, tail_nolabel k (loadimm_n sz rd l k).
-Proof.
- intros; destruct l as [ | [n1 p1] l]; simpl; TailNoLabel.
- induction l as [ | [n p] l]; simpl; TailNoLabel.
+Lemma all_blocks_translated:
+ forall bbs tc,
+ unfold bbs = OK tc ->
+ forall bb, In bb bbs ->
+ exists c, unfold_bblock bb = OK c.
+Proof.
+ induction bbs as [| bb bbs IHbbs].
+ - contradiction.
+ - intros ? UNFOLD ? IN.
+ (* unfold proceeds by unfolding the basic block at the head of the list and
+ * then recurring *)
+ exploit unfold_car_cdr; eauto. intros (? & ? & ? & ? & _).
+ (* basic block is either in head or tail *)
+ inversion IN as [EQ | NEQ].
+ + rewrite <- EQ; eexists; eauto.
+ + eapply IHbbs; eauto.
Qed.
-Remark loadimm_label: forall sz rd n k, tail_nolabel k (loadimm sz rd n k).
+Lemma entire_body_translated:
+ forall lbi tc,
+ unfold_body lbi = OK tc ->
+ forall bi, In bi lbi ->
+ exists bi', basic_to_instruction bi = OK bi'.
Proof.
- unfold loadimm; intros. destruct Nat.leb; [apply loadimm_z_label|apply loadimm_n_label].
+ induction lbi as [| a lbi IHlbi].
+ - intros. contradiction.
+ - intros tc UNFOLD_BODY bi IN.
+ unfold unfold_body in UNFOLD_BODY. apply bind_inversion in UNFOLD_BODY.
+ destruct UNFOLD_BODY as (? & TRANSbi & REST).
+ apply bind_inversion in REST. destruct REST as (? & UNFOLD_BODY' & ?).
+ fold unfold_body in UNFOLD_BODY'.
+
+ inversion IN as [EQ | NEQ].
+ + rewrite <- EQ; eauto.
+ + eapply IHlbi; eauto.
Qed.
-Hint Resolve loadimm_label: labels.
-Remark loadimm32_label: forall r n k, tail_nolabel k (loadimm32 r n k).
+Lemma bblock_in_bblocks bbs bb: forall
+ tc pos
+ (UNFOLD: unfold bbs = OK tc)
+ (FINDBB: find_bblock pos bbs = Some bb),
+ In bb bbs.
Proof.
- unfold loadimm32; intros. destruct (is_logical_imm32 n); TailNoLabel.
+ induction bbs as [| b bbs IH].
+ - intros. inversion FINDBB.
+ - destruct pos.
+ + intros. inversion FINDBB as (EQ). rewrite <- EQ. apply in_eq.
+ + intros.
+ exploit unfold_cdr; eauto. intros (tc' & UNFOLD').
+ unfold find_bblock in FINDBB. simpl in FINDBB.
+ fold find_bblock in FINDBB.
+ apply in_cons. eapply IH; eauto.
+ + intros. inversion FINDBB.
Qed.
-Hint Resolve loadimm32_label: labels.
-Remark loadimm64_label: forall r n k, tail_nolabel k (loadimm64 r n k).
+Lemma blocks_translated tc pos bbs bb: forall
+ (UNFOLD: unfold bbs = OK tc)
+ (FINDBB: find_bblock pos bbs = Some bb),
+ exists tbb, unfold_bblock bb = OK tbb.
Proof.
- unfold loadimm64; intros. destruct (is_logical_imm64 n); TailNoLabel.
+ intros; exploit bblock_in_bblocks; eauto; intros;
+ eapply all_blocks_translated; eauto.
Qed.
-Hint Resolve loadimm64_label: labels.
-Remark addimm_aux: forall insn rd r1 n k,
- (forall rd r1 n, nolabel (insn rd r1 n)) ->
- tail_nolabel k (addimm_aux insn rd r1 n k).
+Lemma size_header b pos f bb: forall
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock pos (fn_blocks f) = Some bb),
+ list_length_z (header bb) <= 1.
Proof.
- unfold addimm_aux; intros.
- destruct Z.eqb. TailNoLabel. destruct Z.eqb; TailNoLabel.
+ intros.
+ exploit internal_functions_unfold; eauto.
+ intros (tc & FINDtf & TRANStf & ?).
+ exploit blocks_translated; eauto. intros TBB.
+
+ unfold unfold_bblock in TBB.
+ destruct (zle (list_length_z (header bb)) 1).
+ - assumption.
+ - destruct TBB as (? & TBB). discriminate TBB.
Qed.
-Remark addimm32_label: forall rd r1 n k, tail_nolabel k (addimm32 rd r1 n k).
+Lemma list_nth_z_neg A (l: list A): forall n,
+ n < 0 -> list_nth_z l n = None.
Proof.
- unfold addimm32; intros.
- destruct Int.eq. apply addimm_aux; intros; red; auto.
- destruct Int.eq. apply addimm_aux; intros; red; auto.
- destruct Int.lt; eapply tail_nolabel_trans; TailNoLabel.
+ induction l; simpl; auto.
+ intros n H; destruct (zeq _ _); (try eapply IHl); lia.
Qed.
-Hint Resolve addimm32_label: labels.
-Remark addimm64_label: forall rd r1 n k, tail_nolabel k (addimm64 rd r1 n k).
+Lemma find_bblock_neg bbs: forall pos,
+ pos < 0 -> find_bblock pos bbs = None.
Proof.
- unfold addimm64; intros.
- destruct Int64.eq. apply addimm_aux; intros; red; auto.
- destruct Int64.eq. apply addimm_aux; intros; red; auto.
- destruct Int64.lt; eapply tail_nolabel_trans; TailNoLabel.
+ induction bbs; simpl; auto.
+ intros. destruct (zlt pos 0). { reflexivity. }
+ destruct (zeq pos 0); contradiction.
Qed.
-Hint Resolve addimm64_label: labels.
-Remark logicalimm32_label: forall insn1 insn2 rd r1 n k,
- (forall rd r1 n, nolabel (insn1 rd r1 n)) ->
- (forall rd r1 r2 s, nolabel (insn2 rd r1 r2 s)) ->
- tail_nolabel k (logicalimm32 insn1 insn2 rd r1 n k).
+Lemma equal_header_size bb:
+ length (header bb) = length (unfold_label (header bb)).
Proof.
- unfold logicalimm32; intros.
- destruct (is_logical_imm32 n). TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+ induction (header bb); auto.
+ simpl. rewrite IHl. auto.
Qed.
-Remark logicalimm64_label: forall insn1 insn2 rd r1 n k,
- (forall rd r1 n, nolabel (insn1 rd r1 n)) ->
- (forall rd r1 r2 s, nolabel (insn2 rd r1 r2 s)) ->
- tail_nolabel k (logicalimm64 insn1 insn2 rd r1 n k).
+Lemma equal_body_size:
+ forall bb tb,
+ unfold_body (body bb) = OK tb ->
+ length (body bb) = length tb.
Proof.
- unfold logicalimm64; intros.
- destruct (is_logical_imm64 n). TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+ intros bb. induction (body bb).
+ - simpl. intros ? H. inversion H. auto.
+ - intros tb H. simpl in H. apply bind_inversion in H. destruct H as (? & BI & TAIL).
+ apply bind_inversion in TAIL. destruct TAIL as (tb' & BODY' & CONS). inv CONS.
+ simpl. specialize (IHl tb' BODY'). rewrite IHl. reflexivity.
Qed.
-Remark move_extended_label: forall rd r1 ex a k, tail_nolabel k (move_extended rd r1 ex a k).
+Lemma equal_exit_size bb:
+ length_opt (exit bb) = length (unfold_exit (exit bb)).
Proof.
- unfold move_extended, move_extended_base; intros. destruct Int.eq, ex; TailNoLabel.
+ destruct (exit bb); trivial.
Qed.
-Hint Resolve move_extended_label: labels.
-Remark arith_extended_label: forall insnX insnS rd r1 r2 ex a k,
- (forall rd r1 r2 x, nolabel (insnX rd r1 r2 x)) ->
- (forall rd r1 r2 s, nolabel (insnS rd r1 r2 s)) ->
- tail_nolabel k (arith_extended insnX insnS rd r1 r2 ex a k).
+Lemma bblock_size_preserved bb tb:
+ unfold_bblock bb = OK tb ->
+ size bb = list_length_z tb.
Proof.
- unfold arith_extended; intros. destruct Int.ltu.
- TailNoLabel.
- destruct ex; simpl; TailNoLabel.
+ unfold unfold_bblock. intros UNFOLD_BBLOCK.
+ destruct (zle (list_length_z (header bb)) 1). 2: { inversion UNFOLD_BBLOCK. }
+ apply bind_inversion in UNFOLD_BBLOCK. destruct UNFOLD_BBLOCK as (? & UNFOLD_BODY & CONS).
+ inversion CONS.
+ unfold size.
+ rewrite equal_header_size, equal_exit_size.
+ erewrite equal_body_size; eauto.
+ rewrite list_length_z_nat.
+ repeat (rewrite app_length).
+ rewrite plus_assoc. auto.
Qed.
-Remark loadsymbol_label: forall r id ofs k, tail_nolabel k (loadsymbol r id ofs k).
+Lemma size_of_blocks_max_pos_aux:
+ forall bbs tbbs pos bb,
+ find_bblock pos bbs = Some bb ->
+ unfold bbs = OK tbbs ->
+ pos + size bb <= list_length_z tbbs.
Proof.
- intros; unfold loadsymbol.
- destruct (SelectOp.symbol_is_relocatable id); TailNoLabel. destruct Ptrofs.eq; TailNoLabel.
-Qed.
-Hint Resolve loadsymbol_label: labels.
+ induction bbs as [| bb ? IHbbs].
+ - intros tbbs ? ? FINDBB; inversion FINDBB.
+ - simpl; intros tbbs pos bb' FINDBB UNFOLD.
+ apply bind_inversion in UNFOLD; destruct UNFOLD as (tbb & UNFOLD_BBLOCK & H).
+ apply bind_inversion in H; destruct H as (tbbs' & UNFOLD & CONS).
+ inv CONS.
+ destruct (zlt pos 0). { discriminate FINDBB. }
+ destruct (zeq pos 0).
+ + inv FINDBB.
+ exploit bblock_size_preserved; eauto; intros SIZE; rewrite SIZE.
+ repeat (rewrite list_length_z_nat). rewrite app_length, Nat2Z.inj_add.
+ lia.
+ + generalize (IHbbs tbbs' (pos - size bb) bb' FINDBB UNFOLD). intros IH.
+ exploit bblock_size_preserved; eauto; intros SIZE.
+ repeat (rewrite list_length_z_nat); rewrite app_length.
+ rewrite Nat2Z.inj_add; repeat (rewrite <- list_length_z_nat).
+ lia.
+Qed.
-Remark transl_cond_label: forall cond args k c,
- transl_cond cond args k = OK c -> tail_nolabel k c.
+Lemma size_of_blocks_max_pos pos f tf bi:
+ find_bblock pos (fn_blocks f) = Some bi ->
+ transf_function f = OK tf ->
+ pos + size bi <= max_pos tf.
Proof.
- unfold transl_cond; intros; destruct cond; TailNoLabel.
-- destruct is_arith_imm32; TailNoLabel. destruct is_arith_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_arith_imm32; TailNoLabel. destruct is_arith_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_logical_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_logical_imm32; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_arith_imm64; TailNoLabel. destruct is_arith_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_arith_imm64; TailNoLabel. destruct is_arith_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_logical_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
-- destruct is_logical_imm64; TailNoLabel. eapply tail_nolabel_trans; TailNoLabel.
+ unfold transf_function, max_pos.
+ intros FINDBB UNFOLD.
+ apply bind_inversion in UNFOLD. destruct UNFOLD as (? & UNFOLD & H).
+ destruct (zlt Ptrofs.max_unsigned (list_length_z x)). { discriminate H. }
+ inv H. simpl.
+ eapply size_of_blocks_max_pos_aux; eauto.
Qed.
-Remark transl_cond_branch_default_label: forall cond args lbl k c,
- transl_cond_branch_default cond args lbl k = OK c -> tail_nolabel k c.
+Lemma unfold_bblock_not_nil bb:
+ unfold_bblock bb = OK nil -> False.
Proof.
- unfold transl_cond_branch_default; intros.
- eapply tail_nolabel_trans; [eapply transl_cond_label;eauto|TailNoLabel].
+ intros.
+ exploit bblock_size_preserved; eauto. unfold list_length_z; simpl. intros SIZE.
+ generalize (bblock_size_pos bb). intros SIZE'. lia.
Qed.
-Hint Resolve transl_cond_branch_default_label: labels.
-Remark transl_cond_branch_label: forall cond args lbl k c,
- transl_cond_branch cond args lbl k = OK c -> tail_nolabel k c.
+(* same proof as list_nth_z_range (Coqlib) *)
+Lemma find_instr_range:
+ forall c n i,
+ Asm.find_instr n c = Some i -> 0 <= n < list_length_z c.
Proof.
- unfold transl_cond_branch; intros; destruct args; TailNoLabel; destruct cond; TailNoLabel.
-- destruct c0; TailNoLabel.
-- destruct c0; TailNoLabel.
-- destruct (Int.is_power2 n); TailNoLabel.
-- destruct (Int.is_power2 n); TailNoLabel.
-- destruct c0; TailNoLabel.
-- destruct c0; TailNoLabel.
-- destruct (Int64.is_power2' n); TailNoLabel.
-- destruct (Int64.is_power2' n); TailNoLabel.
+ induction c; simpl; intros.
+ discriminate.
+ rewrite list_length_z_cons. destruct (zeq n 0).
+ generalize (list_length_z_pos c); lia.
+ exploit IHc; eauto. lia.
Qed.
-Remark transl_op_label:
- forall op args r k c,
- transl_op op args r k = OK c -> tail_nolabel k c.
+Lemma find_instr_tail:
+ forall tbb pos c i,
+ Asm.find_instr pos c = Some i ->
+ Asm.find_instr (pos + list_length_z tbb) (tbb ++ c) = Some i.
Proof.
- unfold transl_op; intros; destruct op; TailNoLabel.
-- destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel.
-- destruct (Float.eq_dec n Float.zero); TailNoLabel.
-- destruct (Float32.eq_dec n Float32.zero); TailNoLabel.
-- apply logicalimm32_label; unfold nolabel; auto.
-- apply logicalimm32_label; unfold nolabel; auto.
-- apply logicalimm32_label; unfold nolabel; auto.
-- unfold shrx32. destruct Int.eq; TailNoLabel.
-- apply arith_extended_label; unfold nolabel; auto.
-- apply arith_extended_label; unfold nolabel; auto.
-- apply logicalimm64_label; unfold nolabel; auto.
-- apply logicalimm64_label; unfold nolabel; auto.
-- apply logicalimm64_label; unfold nolabel; auto.
-- unfold shrx64. destruct Int.eq; TailNoLabel.
-- eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel.
-- destruct (preg_of r); try discriminate; TailNoLabel;
- (eapply tail_nolabel_trans; [eapply transl_cond_label; eauto | TailNoLabel]).
+ induction tbb as [| ? ? IHtbb].
+ - intros. unfold list_length_z; simpl. rewrite Z.add_0_r. assumption.
+ - intros. rewrite list_length_z_cons. simpl.
+ destruct (zeq (pos + (list_length_z tbb + 1)) 0).
+ + exploit find_instr_range; eauto. intros POS_RANGE.
+ generalize (list_length_z_pos tbb). lia.
+ + replace (pos + (list_length_z tbb + 1) - 1) with (pos + list_length_z tbb) by lia.
+ eapply IHtbb; eauto.
Qed.
-Remark transl_addressing_label:
- forall sz addr args insn k c,
- transl_addressing sz addr args insn k = OK c ->
- (forall ad, nolabel (insn ad)) ->
- tail_nolabel k c.
+Lemma size_of_blocks_bounds fb pos f bi:
+ Genv.find_funct_ptr ge fb = Some (Internal f) ->
+ find_bblock pos (fn_blocks f) = Some bi ->
+ pos + size bi <= Ptrofs.max_unsigned.
Proof.
- unfold transl_addressing; intros; destruct addr; TailNoLabel;
- eapply tail_nolabel_trans; TailNoLabel.
- eapply tail_nolabel_trans. apply arith_extended_label; unfold nolabel; auto. TailNoLabel.
+ intros; exploit internal_functions_translated; eauto.
+ intros (tf & _ & TRANSf).
+ assert (pos + size bi <= max_pos tf). { eapply size_of_blocks_max_pos; eauto. }
+ assert (max_pos tf <= Ptrofs.max_unsigned). { eapply functions_bound_max_pos; eauto. }
+ lia.
Qed.
-Remark transl_load_label:
- forall chunk addr args dst k c,
- transl_load chunk addr args dst k = OK c -> tail_nolabel k c.
+Lemma find_instr_bblock_tail:
+ forall tbb bb pos c i,
+ Asm.find_instr pos c = Some i ->
+ unfold_bblock bb = OK tbb ->
+ Asm.find_instr (pos + size bb ) (tbb ++ c) = Some i.
Proof.
- unfold transl_load; intros; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto.
+ induction tbb.
+ - intros. exploit unfold_bblock_not_nil; eauto. intros. contradiction.
+ - intros. simpl.
+ destruct (zeq (pos + size bb) 0).
+ + (* absurd *)
+ exploit find_instr_range; eauto. intros POS_RANGE.
+ generalize (bblock_size_pos bb). intros SIZE. lia.
+ + erewrite bblock_size_preserved; eauto.
+ rewrite list_length_z_cons.
+ replace (pos + (list_length_z tbb + 1) - 1) with (pos + list_length_z tbb) by lia.
+ apply find_instr_tail; auto.
Qed.
-Remark transl_store_label:
- forall chunk addr args src k c,
- transl_store chunk addr args src k = OK c -> tail_nolabel k c.
+Lemma list_nth_z_find_label:
+ forall (ll : list label) il n l,
+ list_nth_z ll n = Some l ->
+ Asm.find_instr n ((unfold_label ll) ++ il) = Some (Asm.Plabel l).
Proof.
- unfold transl_store; intros; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto.
+ induction ll.
+ - intros. inversion H.
+ - intros. simpl.
+ destruct (zeq n 0) as [Z | NZ].
+ + inversion H as (H'). rewrite Z in H'. simpl in H'. inv H'. reflexivity.
+ + simpl in H. destruct (zeq n 0). { contradiction. }
+ apply IHll; auto.
Qed.
-Remark indexed_memory_access_label:
- forall insn sz base ofs k,
- (forall ad, nolabel (insn ad)) ->
- tail_nolabel k (indexed_memory_access insn sz base ofs k).
+Lemma list_nth_z_find_bi:
+ forall lbi bi tlbi n bi' exit,
+ list_nth_z lbi n = Some bi ->
+ unfold_body lbi = OK tlbi ->
+ basic_to_instruction bi = OK bi' ->
+ Asm.find_instr n (tlbi ++ exit) = Some bi'.
Proof.
- unfold indexed_memory_access; intros. destruct offset_representable.
- TailNoLabel.
- eapply tail_nolabel_trans; TailNoLabel.
+ induction lbi.
+ - intros. inversion H.
+ - simpl. intros.
+ apply bind_inversion in H0. destruct H0 as (? & ? & ?).
+ apply bind_inversion in H2. destruct H2 as (? & ? & ?).
+ destruct (zeq n 0) as [Z | NZ].
+ + destruct n.
+ * inversion H as (BI). rewrite BI in *.
+ inversion H3. simpl. congruence.
+ * (* absurd *) congruence.
+ * (* absurd *) congruence.
+ + inv H3. simpl. destruct (zeq n 0). { contradiction. }
+ eapply IHlbi; eauto.
Qed.
-Remark loadind_label:
- forall base ofs ty dst k c,
- loadind base ofs ty dst k = OK c -> tail_nolabel k c.
+Lemma list_nth_z_find_bi_with_header:
+ forall ll lbi bi tlbi n bi' (rest : list Asm.instruction),
+ list_nth_z lbi (n - list_length_z ll) = Some bi ->
+ unfold_body lbi = OK tlbi ->
+ basic_to_instruction bi = OK bi' ->
+ Asm.find_instr n ((unfold_label ll) ++ (tlbi) ++ (rest)) = Some bi'.
Proof.
- unfold loadind; intros.
- destruct ty, (preg_of dst); inv H; apply indexed_memory_access_label; intros; exact I.
+ induction ll.
+ - unfold list_length_z. simpl. intros.
+ replace (n - 0) with n in H by lia. eapply list_nth_z_find_bi; eauto.
+ - intros. simpl. destruct (zeq n 0).
+ + rewrite list_length_z_cons in H. rewrite e in H.
+ replace (0 - (list_length_z ll + 1)) with (-1 - (list_length_z ll)) in H by lia.
+ generalize (list_length_z_pos ll). intros.
+ rewrite list_nth_z_neg in H; try lia. inversion H.
+ + rewrite list_length_z_cons in H.
+ replace (n - (list_length_z ll + 1)) with (n -1 - (list_length_z ll)) in H by lia.
+ eapply IHll; eauto.
Qed.
-Remark storeind_label:
- forall src base ofs ty k c,
- storeind src base ofs ty k = OK c -> tail_nolabel k c.
+(* XXX unused *)
+Lemma range_list_nth_z:
+ forall (A: Type) (l: list A) n,
+ 0 <= n < list_length_z l ->
+ exists x, list_nth_z l n = Some x.
Proof.
- unfold storeind; intros.
- destruct ty, (preg_of src); inv H; apply indexed_memory_access_label; intros; exact I.
+ induction l.
+ - intros. unfold list_length_z in H. simpl in H. lia.
+ - intros n. destruct (zeq n 0).
+ + intros. simpl. destruct (zeq n 0). { eauto. } contradiction.
+ + intros H. rewrite list_length_z_cons in H.
+ simpl. destruct (zeq n 0). { contradiction. }
+ replace (Z.pred n) with (n - 1) by lia.
+ eapply IHl; lia.
Qed.
-Remark loadptr_label:
- forall base ofs dst k, tail_nolabel k (loadptr base ofs dst k).
+Lemma list_nth_z_n_too_big:
+ forall (A: Type) (l: list A) n,
+ 0 <= n ->
+ list_nth_z l n = None ->
+ n >= list_length_z l.
Proof.
- intros. apply indexed_memory_access_label. unfold nolabel; auto.
+ induction l.
+ - intros. unfold list_length_z. simpl. lia.
+ - intros. rewrite list_length_z_cons.
+ simpl in H0.
+ destruct (zeq n 0) as [N | N].
+ + inversion H0.
+ + (* XXX there must be a more elegant way to prove this simple fact *)
+ assert (n > 0). { lia. }
+ assert (0 <= n - 1). { lia. }
+ generalize (IHl (n - 1)). intros IH.
+ assert (n - 1 >= list_length_z l). { auto. }
+ assert (n > list_length_z l); lia.
Qed.
-Remark storeptr_label:
- forall src base ofs k, tail_nolabel k (storeptr src base ofs k).
+Lemma find_instr_past_header:
+ forall labels n rest,
+ list_nth_z labels n = None ->
+ Asm.find_instr n (unfold_label labels ++ rest) =
+ Asm.find_instr (n - list_length_z labels) rest.
Proof.
- intros. apply indexed_memory_access_label. unfold nolabel; auto.
+ induction labels as [| label labels' IH].
+ - unfold list_length_z; simpl; intros; rewrite Z.sub_0_r; reflexivity.
+ - intros. simpl. destruct (zeq n 0) as [N | N].
+ + rewrite N in H. inversion H.
+ + rewrite list_length_z_cons.
+ replace (n - (list_length_z labels' + 1)) with (n - 1 - list_length_z labels') by lia.
+ simpl in H. destruct (zeq n 0). { contradiction. }
+ replace (Z.pred n) with (n - 1) in H by lia.
+ apply IH; auto.
Qed.
-Remark make_epilogue_label:
- forall f k, tail_nolabel k (make_epilogue f k).
+(* very similar to find_instr_past_header *)
+Lemma find_instr_past_body:
+ forall lbi n tlbi rest,
+ list_nth_z lbi n = None ->
+ unfold_body lbi = OK tlbi ->
+ Asm.find_instr n (tlbi ++ rest) =
+ Asm.find_instr (n - list_length_z lbi) rest.
Proof.
- unfold make_epilogue; intros. eapply tail_nolabel_trans. apply loadptr_label. TailNoLabel.
+ induction lbi.
+ - unfold list_length_z; simpl; intros ? ? ? ? H. inv H; rewrite Z.sub_0_r; reflexivity.
+ - intros n tlib ? NTH UNFOLD_BODY.
+ unfold unfold_body in UNFOLD_BODY. apply bind_inversion in UNFOLD_BODY.
+ destruct UNFOLD_BODY as (? & BI & H).
+ apply bind_inversion in H. destruct H as (? & UNFOLD_BODY' & CONS).
+ fold unfold_body in UNFOLD_BODY'. inv CONS.
+ simpl; destruct (zeq n 0) as [N|N].
+ + rewrite N in NTH; inversion NTH.
+ + rewrite list_length_z_cons.
+ replace (n - (list_length_z lbi + 1)) with (n - 1 - list_length_z lbi) by lia.
+ simpl in NTH. destruct (zeq n 0). { contradiction. }
+ replace (Z.pred n) with (n - 1) in NTH by lia.
+ apply IHlbi; auto.
Qed.
-Lemma transl_instr_label:
- forall f i ep k c,
- transl_instr f i ep k = OK c ->
- match i with Mlabel lbl => c = Plabel lbl :: k | _ => tail_nolabel k c end.
+Lemma n_beyond_body:
+ forall bb n,
+ 0 <= n < size bb ->
+ list_nth_z (header bb) n = None ->
+ list_nth_z (body bb) (n - list_length_z (header bb)) = None ->
+ n >= Z.of_nat (length (header bb) + length (body bb)).
Proof.
- unfold transl_instr; intros; destruct i; TailNoLabel.
-- eapply loadind_label; eauto.
-- eapply storeind_label; eauto.
-- destruct ep. eapply loadind_label; eauto.
- eapply tail_nolabel_trans. apply loadptr_label. eapply loadind_label; eauto.
-- eapply transl_op_label; eauto.
-- eapply transl_load_label; eauto.
-- eapply transl_store_label; eauto.
-- destruct s0; monadInv H; TailNoLabel.
-- destruct s0; monadInv H; (eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel]).
-- eapply transl_cond_branch_label; eauto.
-- eapply tail_nolabel_trans; [eapply make_epilogue_label|TailNoLabel].
+ intros.
+ assert (0 <= n). { lia. }
+ generalize (list_nth_z_n_too_big label (header bb) n H2 H0). intros.
+ generalize (list_nth_z_n_too_big _ (body bb) (n - list_length_z (header bb))). intros.
+ unfold size in H.
+
+ assert (0 <= n - list_length_z (header bb)). { lia. }
+ assert (n - list_length_z (header bb) >= list_length_z (body bb)). { apply H4; auto. }
+
+ assert (n >= list_length_z (header bb) + list_length_z (body bb)). { lia. }
+ rewrite Nat2Z.inj_add.
+ repeat (rewrite <- list_length_z_nat). assumption.
Qed.
-Lemma transl_instr_label':
- forall lbl f i ep k c,
- transl_instr f i ep k = OK c ->
- find_label lbl c = if Mach.is_label lbl i then Some k else find_label lbl k.
+Lemma exec_arith_instr_dont_move_PC ai rs rs': forall
+ (BASIC: exec_arith_instr lk ai rs = rs'),
+ rs PC = rs' PC.
Proof.
- intros. exploit transl_instr_label; eauto.
- destruct i; try (intros [A B]; apply B).
- intros. subst c. simpl. auto.
+ destruct ai; simpl; intros;
+ try (rewrite <- BASIC; rewrite Pregmap.gso; auto; discriminate).
+ - destruct i; simpl in BASIC;
+ try destruct (negb _); rewrite <- BASIC;
+ repeat rewrite Pregmap.gso; try discriminate; reflexivity.
+ - destruct i; simpl in BASIC.
+ 1,2: rewrite <- BASIC; repeat rewrite Pregmap.gso; try discriminate; reflexivity.
+ destruct sz;
+ try (unfold compare_single in BASIC || unfold compare_float in BASIC);
+ destruct (rs r1), (rs r2);
+ try (rewrite <- BASIC; repeat rewrite Pregmap.gso; try (discriminate || reflexivity)).
+ - destruct i; simpl in BASIC;
+ destruct is;
+ try (unfold compare_int in BASIC || unfold compare_long in BASIC);
+ try (rewrite <- BASIC; repeat rewrite Pregmap.gso; try (discriminate || reflexivity)).
+ - destruct i; simpl in BASIC; destruct sz;
+ try (unfold compare_single in BASIC || unfold compare_float in BASIC);
+ destruct (rs r1);
+ try (rewrite <- BASIC; repeat rewrite Pregmap.gso; try (discriminate || reflexivity)).
+ - destruct fsz; rewrite <- BASIC; rewrite Pregmap.gso; try (discriminate || reflexivity).
+ - destruct fsz; rewrite <- BASIC; rewrite Pregmap.gso; try (discriminate || reflexivity).
Qed.
-Lemma transl_code_label:
- forall lbl f c ep tc,
- transl_code f c ep = OK tc ->
- match Mach.find_label lbl c with
- | None => find_label lbl tc = None
- | Some c' => exists tc', find_label lbl tc = Some tc' /\ transl_code f c' false = OK tc'
- end.
+Lemma exec_basic_dont_move_PC bi rs m rs' m': forall
+ (BASIC: exec_basic lk ge bi rs m = Next rs' m'),
+ rs PC = rs' PC.
Proof.
- induction c; simpl; intros.
- inv H. auto.
- monadInv H. rewrite (transl_instr_label' lbl _ _ _ _ _ EQ0).
- generalize (Mach.is_label_correct lbl a).
- destruct (Mach.is_label lbl a); intros.
- subst a. simpl in EQ. exists x; auto.
- eapply IHc; eauto.
+ destruct bi; simpl; intros.
+ - inv BASIC. exploit exec_arith_instr_dont_move_PC; eauto.
+ - unfold exec_load in BASIC.
+ destruct ld.
+ + unfold exec_load_rd_a in BASIC.
+ destruct Mem.loadv. 2: { discriminate BASIC. }
+ inv BASIC. rewrite Pregmap.gso; try discriminate; auto.
+ + unfold exec_load_double, is_pair_addressing_mode_correct in BASIC.
+ destruct a; try discriminate BASIC.
+ do 2 (destruct Mem.loadv; try discriminate BASIC).
+ inv BASIC. rewrite Pregmap.gso; try discriminate; auto.
+ - unfold exec_store in BASIC.
+ destruct st.
+ + unfold exec_store_rs_a in BASIC.
+ destruct Mem.storev. 2: { discriminate BASIC. }
+ inv BASIC; reflexivity.
+ + unfold exec_store_double in BASIC.
+ destruct a; try discriminate BASIC.
+ do 2 (destruct Mem.storev; try discriminate BASIC).
+ inv BASIC; reflexivity.
+ - destruct Mem.alloc, Mem.store. 2: { discriminate BASIC. }
+ inv BASIC. repeat (rewrite Pregmap.gso; try discriminate). reflexivity.
+ - destruct Mem.loadv. 2: { discriminate BASIC. }
+ destruct rs, Mem.free; try discriminate BASIC.
+ inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+ - inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+ - inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+ - inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+ - inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
+ - inv BASIC; auto.
Qed.
-Lemma transl_find_label:
- forall lbl f tf,
- transf_function f = OK tf ->
- match Mach.find_label lbl f.(Mach.fn_code) with
- | None => find_label lbl tf.(fn_code) = None
- | Some c => exists tc, find_label lbl tf.(fn_code) = Some tc /\ transl_code f c false = OK tc
+Lemma exec_body_dont_move_PC_aux:
+ forall bis rs m rs' m'
+ (BODY: exec_body lk ge bis rs m = Next rs' m'),
+ rs PC = rs' PC.
+Proof.
+ induction bis.
+ - intros; inv BODY; reflexivity.
+ - simpl; intros.
+ remember (exec_basic lk ge a rs m) as bi eqn:BI; destruct bi. 2: { discriminate BODY. }
+ symmetry in BI; destruct s in BODY, BI; simpl in BODY, BI.
+ exploit exec_basic_dont_move_PC; eauto; intros AGPC; rewrite AGPC.
+ eapply IHbis; eauto.
+Qed.
+
+Lemma exec_body_dont_move_PC bb rs m rs' m': forall
+ (BODY: exec_body lk ge (body bb) rs m = Next rs' m'),
+ rs PC = rs' PC.
+Proof. apply exec_body_dont_move_PC_aux. Qed.
+
+Lemma find_instr_bblock:
+ forall n lb pos bb tlb
+ (FINDBB: find_bblock pos lb = Some bb)
+ (UNFOLD: unfold lb = OK tlb)
+ (SIZE: 0 <= n < size bb),
+ exists i, is_nth_inst bb n i /\ Asm.find_instr (pos+n) tlb = Some i.
+Proof.
+ induction lb as [| b lb IHlb].
+ - intros. inversion FINDBB.
+ - intros pos bb tlb FINDBB UNFOLD SIZE.
+ destruct pos.
+ + inv FINDBB. simpl.
+ exploit unfold_car_cdr; eauto. intros (tbb & tlb' & UNFOLD_BBLOCK & UNFOLD' & UNFOLD_cons).
+ rewrite UNFOLD in UNFOLD_cons. inversion UNFOLD_cons.
+ unfold unfold_bblock in UNFOLD_BBLOCK.
+ destruct (zle (list_length_z (header bb)) 1). 2: { inversion UNFOLD_BBLOCK. }
+ apply bind_inversion in UNFOLD_BBLOCK.
+ destruct UNFOLD_BBLOCK as (? & UNFOLD_BODY & H).
+ inversion H as (UNFOLD_BBLOCK).
+ remember (list_nth_z (header bb) n) as label_opt eqn:LBL. destruct label_opt.
+ * (* nth instruction is a label *)
+ eexists; split. { eapply is_nth_label; eauto. }
+ inversion UNFOLD_cons.
+ symmetry in LBL.
+ rewrite <- app_assoc.
+ apply list_nth_z_find_label; auto.
+ * remember (list_nth_z (body bb) (n - list_length_z (header bb))) as bi_opt eqn:BI.
+ destruct bi_opt.
+ -- (* nth instruction is a basic instruction *)
+ exploit list_nth_z_in; eauto. intros INBB.
+ exploit entire_body_translated; eauto. intros BI'.
+ destruct BI'.
+ eexists; split.
+ ++ eapply is_nth_basic; eauto.
+ ++ repeat (rewrite <- app_assoc). eapply list_nth_z_find_bi_with_header; eauto.
+ -- (* nth instruction is the exit instruction *)
+ generalize n_beyond_body. intros TEMP.
+ assert (n >= Z.of_nat (Datatypes.length (header bb)
+ + Datatypes.length (body bb))) as NGE. { auto. } clear TEMP.
+ remember (exit bb) as exit_opt eqn:EXIT. destruct exit_opt.
+ ++ rewrite <- app_assoc. rewrite find_instr_past_header; auto.
+ rewrite <- app_assoc. erewrite find_instr_past_body; eauto.
+ assert (SIZE' := SIZE).
+ unfold size in SIZE. rewrite <- EXIT in SIZE. simpl in SIZE.
+ destruct SIZE as (LOWER & UPPER).
+ repeat (rewrite Nat2Z.inj_add in UPPER).
+ repeat (rewrite <- list_length_z_nat in UPPER). repeat (rewrite Nat2Z.inj_add in NGE).
+ repeat (rewrite <- list_length_z_nat in NGE). simpl in UPPER.
+ assert (n = list_length_z (header bb) + list_length_z (body bb)). { lia. }
+ assert (n = size bb - 1). {
+ unfold size. rewrite <- EXIT. simpl.
+ repeat (rewrite Nat2Z.inj_add). repeat (rewrite <- list_length_z_nat). simpl. lia.
+ }
+ symmetry in EXIT.
+ eexists; split.
+ ** eapply is_nth_ctlflow; eauto.
+ ** simpl.
+ destruct (zeq (n - list_length_z (header bb) - list_length_z (body bb)) 0). { reflexivity. }
+ (* absurd *) lia.
+ ++ (* absurd *)
+ unfold size in SIZE. rewrite <- EXIT in SIZE. simpl in SIZE.
+ destruct SIZE as (? & SIZE'). rewrite Nat.add_0_r in SIZE'. lia.
+ + unfold find_bblock in FINDBB; simpl in FINDBB; fold find_bblock in FINDBB.
+ inversion UNFOLD as (UNFOLD').
+ apply bind_inversion in UNFOLD'. destruct UNFOLD' as (? & (UNFOLD_BBLOCK' & UNFOLD')).
+ apply bind_inversion in UNFOLD'. destruct UNFOLD' as (? & (UNFOLD' & TLB)).
+ inversion TLB.
+ generalize (IHlb _ _ _ FINDBB UNFOLD'). intros IH.
+ destruct IH as (? & (IH_is_nth & IH_find_instr)); eauto.
+ eexists; split.
+ * apply IH_is_nth.
+ * replace (Z.pos p + n) with (Z.pos p + n - size b + size b) by lia.
+ eapply find_instr_bblock_tail; try assumption.
+ replace (Z.pos p + n - size b) with (Z.pos p - size b + n) by lia.
+ apply IH_find_instr.
+ + (* absurd *)
+ generalize (Pos2Z.neg_is_neg p). intros. exploit (find_bblock_neg (b :: lb)); eauto.
+ rewrite FINDBB. intros CONTRA. inversion CONTRA.
+Qed.
+
+Lemma exec_header_simulation b ofs f bb rs m: forall
+ (ATPC: rs PC = Vptr b ofs)
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb),
+ exists s', star Asm.step tge (State rs m) E0 s'
+ /\ match_internal (list_length_z (header bb)) (State rs m) s'.
+Proof.
+ intros.
+ exploit internal_functions_unfold; eauto.
+ intros (tc & FINDtf & TRANStf & _).
+ assert (BNDhead: list_length_z (header bb) <= 1). { eapply size_header; eauto. }
+ destruct (header bb) as [|l[|]] eqn: EQhead.
+ + (* header nil *)
+ eexists; split.
+ - eapply star_refl.
+ - split; eauto.
+ unfold list_length_z; rewrite !ATPC; simpl.
+ rewrite Ptrofs.add_zero; auto.
+ + (* header one *)
+ assert (Lhead: list_length_z (header bb) = 1). { rewrite EQhead; unfold list_length_z; simpl. auto. }
+ exploit (find_instr_bblock 0); eauto.
+ { generalize (bblock_size_pos bb). lia. }
+ intros (i & NTH & FIND_INSTR).
+ inv NTH.
+ * rewrite EQhead in H; simpl in H. inv H.
+ replace (Ptrofs.unsigned ofs + 0) with (Ptrofs.unsigned ofs) in FIND_INSTR by lia.
+ eexists. split.
+ - eapply star_one.
+ eapply Asm.exec_step_internal; eauto.
+ simpl; eauto.
+ - unfold list_length_z; simpl. split; eauto.
+ intros r; destruct r; simpl; congruence || auto.
+ * (* absurd case *)
+ erewrite list_nth_z_neg in * |-; [ congruence | rewrite Lhead; lia].
+ * (* absurd case *)
+ rewrite bblock_size_aux, Lhead in *. generalize (bblock_size_aux_pos bb). lia.
+ + (* absurd case *)
+ unfold list_length_z in BNDhead. simpl in *.
+ generalize (list_length_z_aux_increase _ l1 2); lia.
+Qed.
+
+Lemma eval_addressing_preserved a rs1 rs2:
+ (forall r : preg, r <> PC -> rs1 r = rs2 r) ->
+ eval_addressing lk a rs1 = Asm.eval_addressing tge a rs2.
+Proof.
+ intros EQ.
+ destruct a; simpl; try (rewrite !EQ; congruence).
+ auto.
+Qed.
+
+Ltac next_stuck_cong := try (unfold Next, Stuck in *; congruence).
+
+Ltac inv_ok_eq :=
+ repeat match goal with
+ | [EQ: OK ?x = OK ?y |- _ ]
+ => inversion EQ; clear EQ; subst
end.
+
+Ltac reg_rwrt :=
+ match goal with
+ | [e: DR _ = DR _ |- _ ]
+ => rewrite e in *
+ end.
+
+Ltac destruct_reg_inv :=
+ repeat match goal with
+ | [ H : match ?reg with _ => _ end = _ |- _ ]
+ => simpl in *; destruct reg; try congruence; try inv_ok_eq; try reg_rwrt
+ end.
+
+Ltac destruct_ireg_inv :=
+ repeat match goal with
+ | [ H : match ?reg with _ => _ end = _ |- _ ]
+ => destruct reg as [[r|]|]; try congruence; try inv_ok_eq; subst
+ end.
+
+Ltac destruct_reg_size :=
+ simpl in *;
+ match goal with
+ | [ |- context [ match ?reg with _ => _ end ] ]
+ => destruct reg; try congruence
+ end.
+
+Ltac find_rwrt_ag :=
+ simpl in *;
+ match goal with
+ | [ AG: forall r, r <> ?PC -> _ r = _ r |- _ ]
+ => repeat rewrite <- AG; try congruence
+ end.
+
+Ltac inv_matchi :=
+ match goal with
+ | [ MATCHI : match_internal _ _ _ |- _ ]
+ => inversion MATCHI; subst; find_rwrt_ag
+ end.
+
+Ltac destruct_ir0_reg :=
+ match goal with
+ | [ |- context [ ir0 _ _ ?r ] ]
+ => unfold ir0 in *; destruct r; find_rwrt_ag; eauto
+ end.
+
+Ltac pc_not_sp :=
+ match goal with
+ | [ |- ?PC <> ?SP ]
+ => destruct (PregEq.eq SP PC); repeat congruence; discriminate
+ end.
+
+Ltac update_x_access_x :=
+ subst; rewrite !Pregmap.gss; auto.
+
+Ltac update_x_access_r :=
+ rewrite !Pregmap.gso; auto.
+
+Lemma nextinstr_agree_but_pc rs1 rs2: forall
+ (AG: forall r, r <> PC -> rs1 r = rs2 r),
+ forall r, r <> PC -> rs1 r = Asm.nextinstr rs2 r.
Proof.
- intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0.
- monadInv EQ. rewrite transl_code'_transl_code in EQ0. unfold fn_code.
- simpl. destruct (storeptr_label X30 XSP (fn_retaddr_ofs f) x) as [A B]; rewrite B.
- eapply transl_code_label; eauto.
+ intros; unfold Asm.nextinstr in *; rewrite Pregmap.gso in *; eauto.
Qed.
-End TRANSL_LABEL.
+Lemma ptrofs_nextinstr_agree rs1 rs2 n: forall
+ (BOUNDED : 0 <= n <= Ptrofs.max_unsigned)
+ (AGPC : Val.offset_ptr (rs1 PC) (Ptrofs.repr n) = rs2 PC),
+ Val.offset_ptr (rs1 PC) (Ptrofs.repr (n + 1)) = Asm.nextinstr rs2 PC.
+Proof.
+ intros; unfold Asm.nextinstr; rewrite Pregmap.gss.
+ rewrite <- Ptrofs.unsigned_one; rewrite <- (Ptrofs.unsigned_repr n); eauto;
+ rewrite <- Ptrofs.add_unsigned; rewrite <- Val.offset_ptr_assoc; rewrite AGPC; eauto.
+Qed.
-(** A valid branch in a piece of Mach code translates to a valid ``go to''
- transition in the generated Asm code. *)
+Lemma load_rd_a_preserved n rs1 m1 rs1' m1' rs2 m2 rd chk f a: forall
+ (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+ (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+ (HLOAD: exec_load_rd_a lk chk f a rd rs1 m1 = Next rs1' m1'),
+ exists (rs2' : regset) (m2' : mem), Asm.exec_load tge chk f a rd rs2 m2 = Next rs2' m2'
+ /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+ intros.
+ unfold exec_load_rd_a, Asm.exec_load in *.
+ inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+ rewrite <- (eval_addressing_preserved a rs1 rs2); auto.
+ destruct (Mem.loadv _ _ _).
+ + inversion HLOAD; auto. repeat (econstructor; eauto).
+ * eapply nextinstr_agree_but_pc; intros.
+ destruct (PregEq.eq r rd); try update_x_access_x; try update_x_access_r.
+ * eapply ptrofs_nextinstr_agree; eauto.
+ + next_stuck_cong.
+Qed.
+
+Lemma load_double_preserved n rs1 m1 rs1' m1' rs2 m2 rd1 rd2 chk1 chk2 f a: forall
+ (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+ (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+ (HLOAD: exec_load_double lk chk1 chk2 f a rd1 rd2 rs1 m1 = Next rs1' m1'),
+ exists (rs2' : regset) (m2' : mem), Asm.exec_load_double tge chk1 chk2 f a rd1 rd2 rs2 m2 = Next rs2' m2'
+ /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+ intros.
+ unfold exec_load_double, Asm.exec_load_double in *.
+ inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+ erewrite <- !eval_addressing_preserved; eauto.
+ destruct (is_pair_addressing_mode_correct a); try discriminate.
+ destruct (Mem.loadv _ _ _);
+ destruct (Mem.loadv chk2 m2
+ (eval_addressing lk
+ (get_offset_addr a match chk1 with
+ | Mint32 | Mfloat32| Many32 => 4
+ | _ => 8
+ end) rs1));
+ inversion HLOAD; auto.
+ repeat (econstructor; eauto).
+ * eapply nextinstr_agree_but_pc; intros.
+ destruct (PregEq.eq r rd2); destruct (PregEq.eq r rd1).
+ - try update_x_access_x.
+ - try update_x_access_x.
+ - subst; repeat rewrite Pregmap.gso, Pregmap.gss; auto.
+ - try update_x_access_r.
+ * eapply ptrofs_nextinstr_agree; eauto.
+Qed.
-Lemma find_label_goto_label:
- forall f tf lbl rs m c' b ofs,
- Genv.find_funct_ptr ge b = Some (Internal f) ->
- transf_function f = OK tf ->
- rs PC = Vptr b ofs ->
- Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
- exists tc', exists rs',
- goto_label tf lbl rs m = Next rs' m
- /\ transl_code_at_pc ge (rs' PC) b f c' false tf tc'
- /\ forall r, r <> PC -> rs'#r = rs#r.
-Proof.
- intros. exploit (transl_find_label lbl f tf); eauto. rewrite H2.
- intros [tc [A B]].
- exploit label_pos_code_tail; eauto. instantiate (1 := 0).
- intros [pos' [P [Q R]]].
- exists tc; exists (rs#PC <- (Vptr b (Ptrofs.repr pos'))).
- split. unfold goto_label. rewrite P. rewrite H1. auto.
- split. rewrite Pregmap.gss. constructor; auto.
- rewrite Ptrofs.unsigned_repr. replace (pos' - 0) with pos' in Q.
- auto. lia.
- generalize (transf_function_no_overflow _ _ H0). lia.
- intros. apply Pregmap.gso; auto.
-Qed.
-
-(** Existence of return addresses *)
+Lemma store_rs_a_preserved n rs1 m1 rs1' m1' rs2 m2 v chk a: forall
+ (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+ (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+ (HSTORE: exec_store_rs_a lk chk a v rs1 m1 = Next rs1' m1'),
+ exists (rs2' : regset) (m2' : mem), Asm.exec_store tge chk a v rs2 m2 = Next rs2' m2'
+ /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+ intros.
+ unfold exec_store_rs_a, Asm.exec_store in *.
+ inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+ rewrite <- (eval_addressing_preserved a rs1 rs2); auto.
+ destruct (Mem.storev _ _ _ _).
+ + inversion HSTORE; auto. repeat (econstructor; eauto).
+ * eapply nextinstr_agree_but_pc; intros.
+ subst. apply EQR. auto.
+ * eapply ptrofs_nextinstr_agree; subst; eauto.
+ + next_stuck_cong.
+Qed.
-Lemma return_address_exists:
- forall f sg ros c, is_tail (Mcall sg ros :: c) f.(Mach.fn_code) ->
- exists ra, return_address_offset f c ra.
+Lemma store_double_preserved n rs1 m1 rs1' m1' rs2 m2 v1 v2 chk1 chk2 a: forall
+ (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+ (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+ (HSTORE: exec_store_double lk chk1 chk2 a v1 v2 rs1 m1 = Next rs1' m1'),
+ exists (rs2' : regset) (m2' : mem), Asm.exec_store_double tge chk1 chk2 a v1 v2 rs2 m2 = Next rs2' m2'
+ /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+ intros.
+ unfold exec_store_double, Asm.exec_store_double in *.
+ inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+ erewrite <- !eval_addressing_preserved; eauto.
+ destruct (is_pair_addressing_mode_correct a); try discriminate.
+ destruct (Mem.storev _ _ _ _);
+ try destruct (Mem.storev chk2 m
+ (eval_addressing lk
+ (get_offset_addr a
+ match chk1 with
+ | Mint32 | Mfloat32 | Many32 => 4
+ | _ => 8
+ end) rs1) v2);
+ inversion HSTORE; auto.
+ repeat (econstructor; eauto).
+ * eapply nextinstr_agree_but_pc; intros.
+ subst. apply EQR. auto.
+ * eapply ptrofs_nextinstr_agree; subst; eauto.
+Qed.
+
+Lemma next_inst_preserved n rs1 m1 rs1' m1' rs2 m2 (x: dreg) v: forall
+ (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+ (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+ (NEXTI: Next rs1 # x <- v m1 = Next rs1' m1'),
+ exists (rs2' : regset) (m2' : mem),
+ Next (Asm.nextinstr rs2 # x <- v) m2 = Next rs2' m2'
+ /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+ intros.
+ inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+ inversion NEXTI. repeat (econstructor; eauto).
+ * eapply nextinstr_agree_but_pc; intros.
+ destruct (PregEq.eq r x); try update_x_access_x; try update_x_access_r.
+ * eapply ptrofs_nextinstr_agree; eauto.
+Qed.
+
+Lemma match_internal_nextinstr_switch:
+ forall n s rs2 m2 r v,
+ r <> PC ->
+ match_internal n s (State ((Asm.nextinstr rs2)#r <- v) m2) ->
+ match_internal n s (State (Asm.nextinstr (rs2#r <- v)) m2).
Proof.
- intros. eapply Asmgenproof0.return_address_exists; eauto.
-- intros. exploit transl_instr_label; eauto.
- destruct i; try (intros [A B]; apply A). intros. subst c0. repeat constructor.
-- intros. monadInv H0.
- destruct (zlt Ptrofs.max_unsigned (list_length_z x.(fn_code))); inv EQ0. monadInv EQ.
- rewrite transl_code'_transl_code in EQ0.
- exists x; exists true; split; auto. unfold fn_code.
- constructor. apply (storeptr_label X30 XSP (fn_retaddr_ofs f0) x).
-- exact transf_function_no_overflow.
-Qed.
-
-(** * Proof of semantic preservation *)
-
-(** Semantic preservation is proved using simulation diagrams
- of the following form.
-<<
- st1 --------------- st2
- | |
- t| *|t
- | |
- v v
- st1'--------------- st2'
->>
- The invariant is the [match_states] predicate below, which includes:
-- The Asm code pointed by the PC register is the translation of
- the current Mach code sequence.
-- Mach register values and Asm register values agree.
+ unfold Asm.nextinstr; intros n s rs2 m2 r v NOTPC1 MI.
+ inversion MI; subst; constructor; auto.
+ - eapply nextinstr_agree_but_pc; intros.
+ rewrite AG; try congruence.
+ destruct (PregEq.eq r r0); try update_x_access_x; try update_x_access_r.
+ - rewrite !Pregmap.gss, !Pregmap.gso; try congruence.
+ rewrite AGPC.
+ rewrite Pregmap.gso, Pregmap.gss; try congruence.
+Qed.
+
+Lemma match_internal_nextinstr_set_parallel:
+ forall n rs1 m1 rs2 m2 r v1 v2,
+ r <> PC ->
+ match_internal n (State rs1 m1) (State (Asm.nextinstr rs2) m2) ->
+ v1 = v2 ->
+ match_internal n (State (rs1#r <- v1) m1) (State (Asm.nextinstr (rs2#r <- v2)) m2).
+Proof.
+ intros; subst; eapply match_internal_nextinstr_switch; eauto.
+ intros; eapply match_internal_set_parallel; eauto.
+Qed.
+
+Lemma exec_basic_simulation:
+ forall tf n rs1 m1 rs1' m1' rs2 m2 bi tbi
+ (BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
+ (BASIC: exec_basic lk ge bi rs1 m1 = Next rs1' m1')
+ (MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
+ (TRANSBI: basic_to_instruction bi = OK tbi),
+ exists rs2' m2', Asm.exec_instr tge tf tbi
+ rs2 m2 = Next rs2' m2'
+ /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
+Proof.
+ intros.
+ destruct bi.
+ { (* PArith *)
+ simpl in *; destruct i.
+ 1: {
+ destruct i.
+ 1,2,3:
+ try (destruct sumbool_rec; try congruence);
+ try (monadInv TRANSBI);
+ try (destruct_reg_inv);
+ try (inv_matchi);
+ try (exploit next_inst_preserved; eauto);
+ try (repeat destruct_reg_size);
+ try (destruct_ir0_reg).
+ 1,2: (* Special case for Pfmovimmd / Pfmovimms *)
+ try (monadInv TRANSBI);
+ try (destruct_reg_inv);
+ try (inv_matchi);
+ inversion BASIC; clear BASIC; subst;
+ try (destruct (is_immediate_float64 _));
+ try (destruct (is_immediate_float32 _));
+ eexists; eexists; split; eauto;
+ repeat (eapply match_internal_nextinstr_set_parallel; try congruence);
+ try (econstructor; eauto);
+ try (eapply nextinstr_agree_but_pc; eauto);
+ try (eapply ptrofs_nextinstr_agree; eauto).
+ }
+ 1,2,3,4,5: (* PArithP, PArithPP, PArithPPP, PArithRR0R, PArithRR0, PArithARRRR0 *)
+ destruct i;
+ try (destruct sumbool_rec; try congruence);
+ try (monadInv TRANSBI);
+ try (destruct_reg_inv);
+ try (inv_matchi);
+ try (exploit next_inst_preserved; eauto);
+ try (repeat destruct_reg_size);
+ try (destruct_ir0_reg).
+ { (* PArithComparisonPP *)
+ destruct i;
+ try (monadInv TRANSBI);
+ try (inv_matchi);
+ try (destruct_reg_inv);
+ simpl in *.
+ 1,2: (* compare_long *)
+ inversion BASIC; clear BASIC; subst;
+ eexists; eexists; split; eauto;
+ unfold compare_long;
+ repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]);
+ try (econstructor; eauto);
+ try (eapply nextinstr_agree_but_pc; eauto);
+ try (eapply ptrofs_nextinstr_agree; eauto).
+
+ destruct sz.
+ - (* compare_single *)
+ unfold compare_single in BASIC.
+ destruct (rs1 x), (rs1 x0);
+ inversion BASIC;
+ eexists; eexists; split; eauto;
+ repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]);
+ try (econstructor; eauto);
+ try (eapply nextinstr_agree_but_pc; eauto);
+ try (eapply ptrofs_nextinstr_agree; eauto).
+ - (* compare_float *)
+ unfold compare_float in BASIC.
+ destruct (rs1 x), (rs1 x0);
+ inversion BASIC;
+ eexists; eexists; split; eauto;
+ repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]);
+ try (econstructor; eauto);
+ try (eapply nextinstr_agree_but_pc; eauto);
+ try (eapply ptrofs_nextinstr_agree; eauto). }
+ 1,2: (* PArithComparisonR0R, PArithComparisonP *)
+ destruct i;
+ try (monadInv TRANSBI);
+ try (inv_matchi);
+ try (destruct_reg_inv);
+ try (destruct_reg_size);
+ simpl in *;
+ inversion BASIC; clear BASIC; subst;
+ eexists; eexists; split; eauto;
+ unfold compare_long, compare_int, compare_float, compare_single;
+ try (destruct_reg_size);
+ repeat (eapply match_internal_nextinstr_set_parallel; [ congruence | idtac | try (rewrite !AG; congruence)]);
+ try (econstructor; eauto);
+ try (destruct_ir0_reg);
+ try (eapply nextinstr_agree_but_pc; eauto);
+ try (eapply ptrofs_nextinstr_agree; eauto).
+ { (* Pcset *)
+ try (monadInv TRANSBI);
+ try (inv_matchi).
+ try (exploit next_inst_preserved; eauto);
+ try (simpl in *; intros;
+ unfold if_opt_bool_val in *; unfold eval_testcond in *;
+ rewrite <- !AG; try congruence; eauto). }
+ { (* Pfmovi *)
+ try (monadInv TRANSBI);
+ try (inv_matchi);
+ try (destruct_reg_size);
+ try (destruct_ir0_reg);
+ try (exploit next_inst_preserved; eauto). }
+ { (* Pcsel *)
+ try (destruct_reg_inv);
+ try (monadInv TRANSBI);
+ try (destruct_reg_inv);
+ try (inv_matchi);
+ try (exploit next_inst_preserved; eauto);
+ simpl in *; intros;
+ unfold if_opt_bool_val in *; unfold eval_testcond in *;
+ rewrite <- !AG; try congruence; eauto. }
+ { (* Pfnmul *)
+ try (monadInv TRANSBI);
+ try (inv_matchi);
+ try (destruct_reg_size);
+ try (exploit next_inst_preserved; eauto);
+ try (find_rwrt_ag). } }
+ { (* PLoad *)
+ destruct ld.
+ - destruct ld; monadInv TRANSBI; try destruct_ireg_inv; exploit load_rd_a_preserved; eauto;
+ intros; simpl in *; destruct sz; eauto.
+ - destruct ld; monadInv TRANSBI; destruct rd1 as [[rd1'|]|]; destruct rd2 as [[rd2'|]|];
+ inv EQ; inv EQ1; exploit load_double_preserved; eauto. }
+ { (* PStore *)
+ destruct st.
+ - destruct st; monadInv TRANSBI; try destruct_ireg_inv; exploit store_rs_a_preserved; eauto;
+ simpl in *; inv_matchi; find_rwrt_ag.
+ - destruct st; monadInv TRANSBI; destruct rs0 as [[rs0'|]|]; destruct rs3 as [[rs3'|]|];
+ inv EQ; inv EQ1; exploit store_double_preserved; eauto;
+ simpl in *; inv_matchi; find_rwrt_ag. }
+ { (* Pallocframe *)
+ monadInv TRANSBI;
+ inv_matchi; try pc_not_sp;
+ destruct sz eqn:EQSZ;
+ destruct Mem.alloc eqn:EQALLOC;
+ destruct Mem.store eqn:EQSTORE; inversion BASIC; try pc_not_sp;
+ eexists; eexists; split; eauto;
+ repeat (eapply match_internal_nextinstr_set_parallel; [ try (pc_not_sp; congruence) | idtac | try (reflexivity)]);
+ try (econstructor; eauto);
+ try (eapply nextinstr_agree_but_pc; eauto);
+ try (eapply ptrofs_nextinstr_agree; eauto). }
+ { (* Pfreeframe *)
+ monadInv TRANSBI;
+ inv_matchi; try pc_not_sp;
+ destruct sz eqn:EQSZ;
+ destruct Mem.loadv eqn:EQLOAD;
+ destruct (rs1 SP) eqn:EQRS1SP;
+ try (destruct Mem.free eqn:EQFREE);
+ inversion BASIC; try pc_not_sp;
+ eexists; eexists; split; eauto;
+ repeat (eapply match_internal_nextinstr_set_parallel; [ try (pc_not_sp; congruence) | idtac | try (reflexivity)]);
+ try (econstructor; eauto);
+ try (eapply nextinstr_agree_but_pc; eauto);
+ try (eapply ptrofs_nextinstr_agree; eauto). }
+ 1,2,3,4: (* Ploadsymbol, Pcvtsw2x, Pcvtuw2x, Pcvtx2w *)
+ try (monadInv TRANSBI);
+ try (inv_matchi);
+ try (exploit next_inst_preserved; eauto);
+ rewrite symbol_addresses_preserved; eauto;
+ try (find_rwrt_ag).
+ { (* Pnop *)
+ monadInv TRANSBI; inv_matchi.
+ inversion BASIC.
+ repeat (econstructor; eauto).
+ eapply nextinstr_agree_but_pc; intros;
+ try rewrite <- H0, AG; auto.
+ try eapply ptrofs_nextinstr_agree; auto; rewrite <- H0;
+ assumption. }
+Qed.
+
+Lemma find_basic_instructions b ofs f bb tc: forall
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+ (UNFOLD: unfold (fn_blocks f) = OK tc),
+ forall n,
+ (n < length (body bb))%nat ->
+ exists (i : Asm.instruction) (bi : basic),
+ list_nth_z (body bb) (Z.of_nat n) = Some bi
+ /\ basic_to_instruction bi = OK i
+ /\ Asm.find_instr (Ptrofs.unsigned ofs
+ + (list_length_z (header bb))
+ + Z.of_nat n) tc
+ = Some i.
+Proof.
+ intros until n; intros NLT.
+ exploit internal_functions_unfold; eauto.
+ intros (tc' & FINDtf & TRANStf & _).
+ assert (tc' = tc) by congruence; subst.
+ exploit (find_instr_bblock (list_length_z (header bb) + Z.of_nat n)); eauto.
+ { unfold size; split.
+ - rewrite list_length_z_nat; lia.
+ - repeat (rewrite list_length_z_nat). repeat (rewrite Nat2Z.inj_add). lia. }
+ intros (i & NTH & FIND_INSTR).
+ exists i; intros.
+ inv NTH.
+ - (* absurd *) apply list_nth_z_range in H; lia.
+ - exists bi;
+ rewrite Z.add_simpl_l in H;
+ rewrite Z.add_assoc in FIND_INSTR;
+ intuition.
+ - (* absurd *) rewrite bblock_size_aux in H0;
+ rewrite H in H0; simpl in H0; repeat rewrite list_length_z_nat in H0; lia.
+Qed.
+
+(* TODO: remplacer find_basic_instructions directement par ce lemme ? *)
+Lemma find_basic_instructions_alt b ofs f bb tc n: forall
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+ (UNFOLD: unfold (fn_blocks f) = OK tc)
+ (BOUND: 0 <= n < list_length_z (body bb)),
+ exists (i : Asm.instruction) (bi : basic),
+ list_nth_z (body bb) n = Some bi
+ /\ basic_to_instruction bi = OK i
+ /\ Asm.find_instr (Ptrofs.unsigned ofs
+ + (list_length_z (header bb))
+ + n) tc
+ = Some i.
+Proof.
+ intros; assert ((Z.to_nat n) < length (body bb))%nat.
+ { rewrite Nat2Z.inj_lt, <- list_length_z_nat, Z2Nat.id; try lia. }
+ exploit find_basic_instructions; eauto.
+ rewrite Z2Nat.id; try lia. intros (i & bi & X).
+ eexists; eexists; intuition eauto.
+Qed.
+
+Lemma header_body_tail_bound: forall (a: basic) (li: list basic) bb ofs
+ (BOUNDBB : Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned)
+ (BDYLENPOS : 0 <= list_length_z (body bb) - list_length_z (a :: li) <
+ list_length_z (body bb)),
+0 <= list_length_z (header bb) + list_length_z (body bb) - list_length_z (a :: li) <=
+Ptrofs.max_unsigned.
+Proof.
+ intros.
+ assert (HBBPOS: list_length_z (header bb) >= 0) by eapply list_length_z_pos.
+ assert (HBBSIZE: list_length_z (header bb) < size bb) by eapply header_size_lt_block_size.
+ assert (OFSBOUND: 0 <= Ptrofs.unsigned ofs <= Ptrofs.max_unsigned) by eapply Ptrofs.unsigned_range_2.
+ assert (BBSIZE: size bb <= Ptrofs.max_unsigned) by lia.
+ unfold size in BBSIZE.
+ rewrite !Nat2Z.inj_add in BBSIZE.
+ rewrite <- !list_length_z_nat in BBSIZE.
+ lia.
+Qed.
+
+(* A more general version of the exec_body_simulation_plus lemma below.
+ This generalization is necessary for the induction proof inside the body.
*)
+Lemma exec_body_simulation_plus_gen li: forall b ofs f bb rs m s2 rs' m'
+ (BLI: is_tail li (body bb))
+ (ATPC: rs PC = Vptr b ofs)
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+ (NEMPTY_BODY: li <> nil)
+ (MATCHI: match_internal ((list_length_z (header bb)) + (list_length_z (body bb)) - (list_length_z li)) (State rs m) s2)
+ (BODY: exec_body lk ge li rs m = Next rs' m'),
+ exists s2', plus Asm.step tge s2 E0 s2'
+ /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'.
+Proof.
+ induction li as [|a li]; simpl; try congruence.
+ intros.
+ assert (BDYLENPOS: 0 <= (list_length_z (body bb) - list_length_z (a::li)) < list_length_z (body bb)). {
+ assert (Z.of_nat O < list_length_z (a::li) <= list_length_z (body bb)); try lia.
+ rewrite !list_length_z_nat; split.
+ - rewrite <- Nat2Z.inj_lt. simpl. lia.
+ - rewrite <- Nat2Z.inj_le; eapply is_tail_bound; eauto.
+ }
+ exploit internal_functions_unfold; eauto.
+ intros (tc & FINDtf & TRANStf & _).
+ exploit find_basic_instructions_alt; eauto.
+ intros (tbi & (bi & (NTHBI & TRANSBI & FIND_INSTR))).
+ exploit is_tail_list_nth_z; eauto.
+ rewrite NTHBI; simpl.
+ intros X; inversion X; subst; clear X NTHBI.
+ destruct (exec_basic _ _ _ _ _) eqn:EXEC_BASIC; next_stuck_cong.
+ destruct s as (rs1 & m1); simpl in *.
+ destruct s2 as (rs2 & m2); simpl in *.
+ assert (BOUNDBBMAX: Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned)
+ by (eapply size_of_blocks_bounds; eauto).
+ exploit header_body_tail_bound; eauto. intros BDYTAIL.
+ exploit exec_basic_simulation; eauto.
+ intros (rs_next' & m_next' & EXEC_INSTR & MI_NEXT).
+ exploit exec_basic_dont_move_PC; eauto. intros AGPC.
+ inversion MI_NEXT as [A B C D E M_NEXT_AGREE RS_NEXT_AGREE ATPC_NEXT PC_OFS_NEXT RS RS'].
+ subst A. subst B. subst C. subst D. subst E.
+ rewrite ATPC in AGPC. symmetry in AGPC, ATPC_NEXT.
+
+ inv MATCHI. symmetry in AGPC0.
+ rewrite ATPC in AGPC0.
+ unfold Val.offset_ptr in AGPC0.
+
+ simpl in FIND_INSTR.
+ (* Execute internal step. *)
+ exploit (Asm.exec_step_internal tge b); eauto.
+ {
+ rewrite Ptrofs.add_unsigned.
+ repeat (rewrite Ptrofs.unsigned_repr); try lia.
+ 2: {
+ assert (BOUNDOFS: 0 <= Ptrofs.unsigned ofs <= Ptrofs.max_unsigned) by eapply Ptrofs.unsigned_range_2.
+ assert (list_length_z (body bb) <= size bb) by eapply body_size_le_block_size.
+ assert (list_length_z (header bb) <= 1). { eapply size_header; eauto. }
+ lia. }
+ try rewrite list_length_z_nat; try split;
+ simpl; rewrite <- !list_length_z_nat;
+ replace (Ptrofs.unsigned ofs + (list_length_z (header bb) + list_length_z (body bb) -
+ list_length_z (a :: li))) with (Ptrofs.unsigned ofs + list_length_z (header bb) +
+ (list_length_z (body bb) - list_length_z (a :: li))) by lia;
+ try assumption; try lia. }
+
+ (* This is our STEP hypothesis. *)
+ intros STEP_NEXT.
+ destruct li as [|a' li]; simpl in *.
+ - (* case of a single instruction in li: this our base case in the induction *)
+ inversion BODY; subst.
+ eexists; split.
+ + apply plus_one. eauto.
+ + constructor; auto.
+ rewrite ATPC_NEXT.
+ apply f_equal.
+ apply f_equal.
+ rewrite bblock_size_aux, list_length_z_cons; simpl.
+ lia.
+ - exploit (IHli b ofs f bb rs1 m_next' (State rs_next' m_next')); congruence || eauto.
+ + exploit is_tail_app_def; eauto.
+ intros (l3 & EQ); rewrite EQ.
+ exploit (is_tail_app_right (l3 ++ a::nil)).
+ rewrite <- app_assoc; simpl; eauto.
+ + constructor; auto.
+ rewrite ATPC_NEXT.
+ apply f_equal.
+ apply f_equal.
+ rewrite! list_length_z_cons; simpl.
+ lia.
+ + intros (s2' & LAST_STEPS & LAST_MATCHS).
+ eexists. split; eauto.
+ eapply plus_left'; eauto.
+Qed.
-Inductive match_states: Mach.state -> Asm.state -> Prop :=
- | match_states_intro:
- forall s fb sp c ep ms m m' rs f tf tc
- (STACKS: match_stack ge s)
- (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
- (MEXT: Mem.extends m m')
- (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
- (AG: agree ms sp rs)
- (DXP: ep = true -> rs#X29 = parent_sp s),
- match_states (Mach.State s fb sp c ms m)
- (Asm.State rs m')
- | match_states_call:
- forall s fb ms m m' rs
- (STACKS: match_stack ge s)
- (MEXT: Mem.extends m m')
- (AG: agree ms (parent_sp s) rs)
- (ATPC: rs PC = Vptr fb Ptrofs.zero)
- (ATLR: rs RA = parent_ra s),
- match_states (Mach.Callstate s fb ms m)
- (Asm.State rs m')
- | match_states_return:
- forall s ms m m' rs
- (STACKS: match_stack ge s)
- (MEXT: Mem.extends m m')
- (AG: agree ms (parent_sp s) rs)
- (ATPC: rs PC = parent_ra s),
- match_states (Mach.Returnstate s ms m)
- (Asm.State rs m').
-
-Lemma exec_straight_steps:
- forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2,
- match_stack ge s ->
- Mem.extends m2 m2' ->
- Genv.find_funct_ptr ge fb = Some (Internal f) ->
- transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
- (forall k c (TR: transl_instr f i ep k = OK c),
- exists rs2,
- exec_straight tge tf c rs1 m1' k rs2 m2'
- /\ agree ms2 sp rs2
- /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s)) ->
- exists st',
- plus step tge (State rs1 m1') E0 st' /\
- match_states (Mach.State s fb sp c ms2 m2) st'.
-Proof.
- intros. inversion H2. subst. monadInv H7.
- exploit H3; eauto. intros [rs2 [A [B C]]].
- exists (State rs2 m2'); split.
- eapply exec_straight_exec; eauto.
- econstructor; eauto. eapply exec_straight_at; eauto.
-Qed.
-
-Lemma exec_straight_steps_goto:
- forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2 lbl c',
- match_stack ge s ->
- Mem.extends m2 m2' ->
- Genv.find_funct_ptr ge fb = Some (Internal f) ->
- Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
- transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
- it1_is_parent ep i = false ->
- (forall k c (TR: transl_instr f i ep k = OK c),
- exists jmp, exists k', exists rs2,
- exec_straight tge tf c rs1 m1' (jmp :: k') rs2 m2'
- /\ agree ms2 sp rs2
- /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') ->
- exists st',
- plus step tge (State rs1 m1') E0 st' /\
- match_states (Mach.State s fb sp c' ms2 m2) st'.
-Proof.
- intros. inversion H3. subst. monadInv H9.
- exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]].
- generalize (functions_transl _ _ _ H7 H8); intro FN.
- generalize (transf_function_no_overflow _ _ H8); intro NOOV.
- exploit exec_straight_steps_2; eauto.
- intros [ofs' [PC2 CT2]].
- exploit find_label_goto_label; eauto.
- intros [tc' [rs3 [GOTO [AT' OTH]]]].
- exists (State rs3 m2'); split.
- eapply plus_right'.
- eapply exec_straight_steps_1; eauto.
- econstructor; eauto.
- eapply find_instr_tail. eauto.
- rewrite C. eexact GOTO.
- traceEq.
- econstructor; eauto.
- apply agree_exten with rs2; auto with asmgen.
- congruence.
-Qed.
-
-Lemma exec_straight_opt_steps_goto:
- forall s fb f rs1 i c ep tf tc m1' m2 m2' sp ms2 lbl c',
- match_stack ge s ->
- Mem.extends m2 m2' ->
- Genv.find_funct_ptr ge fb = Some (Internal f) ->
- Mach.find_label lbl f.(Mach.fn_code) = Some c' ->
- transl_code_at_pc ge (rs1 PC) fb f (i :: c) ep tf tc ->
- it1_is_parent ep i = false ->
- (forall k c (TR: transl_instr f i ep k = OK c),
- exists jmp, exists k', exists rs2,
- exec_straight_opt tge tf c rs1 m1' (jmp :: k') rs2 m2'
- /\ agree ms2 sp rs2
- /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') ->
- exists st',
- plus step tge (State rs1 m1') E0 st' /\
- match_states (Mach.State s fb sp c' ms2 m2) st'.
-Proof.
- intros. inversion H3. subst. monadInv H9.
- exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]].
- generalize (functions_transl _ _ _ H7 H8); intro FN.
- generalize (transf_function_no_overflow _ _ H8); intro NOOV.
- inv A.
-- exploit find_label_goto_label; eauto.
- intros [tc' [rs3 [GOTO [AT' OTH]]]].
- exists (State rs3 m2'); split.
- apply plus_one. econstructor; eauto.
- eapply find_instr_tail. eauto.
- rewrite C. eexact GOTO.
- econstructor; eauto.
- apply agree_exten with rs2; auto with asmgen.
- congruence.
-- exploit exec_straight_steps_2; eauto.
- intros [ofs' [PC2 CT2]].
- exploit find_label_goto_label; eauto.
- intros [tc' [rs3 [GOTO [AT' OTH]]]].
- exists (State rs3 m2'); split.
- eapply plus_right'.
- eapply exec_straight_steps_1; eauto.
- econstructor; eauto.
- eapply find_instr_tail. eauto.
- rewrite C. eexact GOTO.
- traceEq.
- econstructor; eauto.
- apply agree_exten with rs2; auto with asmgen.
- congruence.
-Qed.
-
-(** We need to show that, in the simulation diagram, we cannot
- take infinitely many Mach transitions that correspond to zero
- transitions on the Asm side. Actually, all Mach transitions
- correspond to at least one Asm transition, except the
- transition from [Machsem.Returnstate] to [Machsem.State].
- So, the following integer measure will suffice to rule out
- the unwanted behaviour. *)
-
-Definition measure (s: Mach.state) : nat :=
- match s with
- | Mach.State _ _ _ _ _ _ => 0%nat
- | Mach.Callstate _ _ _ _ => 0%nat
- | Mach.Returnstate _ _ _ => 1%nat
+Lemma exec_body_simulation_plus b ofs f bb rs m s2 rs' m': forall
+ (ATPC: rs PC = Vptr b ofs)
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+ (NEMPTY_BODY: body bb <> nil)
+ (MATCHI: match_internal (list_length_z (header bb)) (State rs m) s2)
+ (BODY: exec_body lk ge (body bb) rs m = Next rs' m'),
+ exists s2', plus Asm.step tge s2 E0 s2'
+ /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'.
+Proof.
+ intros.
+ exploit exec_body_simulation_plus_gen; eauto.
+ - constructor.
+ - replace (list_length_z (header bb) + list_length_z (body bb) - list_length_z (body bb)) with (list_length_z (header bb)); auto.
+ lia.
+Qed.
+
+Lemma exec_body_simulation_star b ofs f bb rs m s2 rs' m': forall
+ (ATPC: rs PC = Vptr b ofs)
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+ (MATCHI: match_internal (list_length_z (header bb)) (State rs m) s2)
+ (BODY: exec_body lk ge (body bb) rs m = Next rs' m'),
+ exists s2', star Asm.step tge s2 E0 s2'
+ /\ match_internal (size bb - (Z.of_nat (length_opt (exit bb)))) (State rs' m') s2'.
+Proof.
+ intros.
+ destruct (body bb) eqn: Hbb.
+ - simpl in BODY. inv BODY.
+ eexists. split.
+ eapply star_refl; eauto.
+ assert (EQ: (size bb - Z.of_nat (length_opt (exit bb))) = list_length_z (header bb)).
+ { rewrite bblock_size_aux. rewrite Hbb; unfold list_length_z; simpl. lia. }
+ rewrite EQ; eauto.
+ - exploit exec_body_simulation_plus; congruence || eauto.
+ { rewrite Hbb; eauto. }
+ intros (s2' & PLUS & MATCHI').
+ eexists; split; eauto.
+ eapply plus_star; eauto.
+Qed.
+
+Lemma list_nth_z_range_exceeded A (l : list A) n:
+ n >= list_length_z l ->
+ list_nth_z l n = None.
+Proof.
+ intros N.
+ remember (list_nth_z l n) as opt eqn:H. symmetry in H.
+ destruct opt; auto.
+ exploit list_nth_z_range; eauto. lia.
+Qed.
+
+Lemma label_in_header_list lbl a:
+ is_label lbl a = true -> list_length_z (header a) <= 1 -> header a = lbl :: nil.
+Proof.
+ intros.
+ eapply is_label_correct_true in H.
+ destruct (header a).
+ - eapply in_nil in H. contradiction.
+ - rewrite list_length_z_cons in H0.
+ assert (list_length_z l0 >= 0) by eapply list_length_z_pos.
+ assert (list_length_z l0 = 0) by lia.
+ rewrite list_length_z_nat in H2.
+ assert (Datatypes.length l0 = 0%nat) by lia.
+ eapply length_zero_iff_nil in H3. subst.
+ unfold In in H. destruct H.
+ + subst; eauto.
+ + destruct H.
+Qed.
+
+Lemma no_label_in_basic_inst: forall a lbl x,
+ basic_to_instruction a = OK x -> Asm.is_label lbl x = false.
+Proof.
+ intros.
+ destruct a; simpl in *;
+ repeat destruct i;
+ repeat destruct ld; repeat destruct st;
+ simpl in *;
+ try (try destruct_reg_inv; monadInv H; simpl in *; reflexivity).
+Qed.
+
+Lemma label_pos_body bdy: forall c1 c2 z ex lbl
+ (HUNF : unfold_body bdy = OK c2),
+ Asm.label_pos lbl (z + Z.of_nat ((Datatypes.length bdy) + length_opt ex)) c1 = Asm.label_pos lbl (z) ((c2 ++ unfold_exit ex) ++ c1).
+Proof.
+ induction bdy.
+ - intros. inversion HUNF. simpl in *.
+ destruct ex eqn:EQEX.
+ + simpl in *. unfold Asm.is_label. destruct c; simpl; try congruence.
+ destruct i; simpl; try congruence.
+ + simpl in *. ring_simplify (z + 0). auto.
+ - intros. inversion HUNF; clear HUNF. monadInv H0. simpl in *.
+ erewrite no_label_in_basic_inst; eauto. rewrite <- IHbdy; eauto.
+ erewrite Zpos_P_of_succ_nat.
+ apply f_equal2; auto. lia.
+Qed.
+
+Lemma asm_label_pos_header: forall z a x0 x1 lbl
+ (HUNF: unfold_body (body a) = OK x1),
+ Asm.label_pos lbl (z + size a) x0 =
+ Asm.label_pos lbl (z + list_length_z (header a)) ((x1 ++ unfold_exit (exit a)) ++ x0).
+Proof.
+ intros.
+ unfold size.
+ rewrite <- plus_assoc. rewrite Nat2Z.inj_add.
+ rewrite list_length_z_nat.
+ replace (z + (Z.of_nat (Datatypes.length (header a)) + Z.of_nat (Datatypes.length (body a) + length_opt (exit a)))) with (z + Z.of_nat (Datatypes.length (header a)) + Z.of_nat (Datatypes.length (body a) + length_opt (exit a))) by lia.
+ eapply (label_pos_body (body a) x0 x1 (z + Z.of_nat (Datatypes.length (header a))) (exit a) lbl). auto.
+Qed.
+
+Lemma header_size_cons_nil: forall (l0: label) (l1: list label)
+ (HSIZE: list_length_z (l0 :: l1) <= 1),
+ l1 = nil.
+Proof.
+ intros.
+ destruct l1; try congruence. rewrite !list_length_z_cons in HSIZE.
+ assert (list_length_z l1 >= 0) by eapply list_length_z_pos.
+ assert (list_length_z l1 + 1 + 1 >= 2) by lia.
+ assert (2 <= 1) by lia. contradiction H1. lia.
+Qed.
+
+Lemma label_pos_preserved_gen bbs: forall lbl c z
+ (HUNF: unfold bbs = OK c),
+ label_pos lbl z bbs = Asm.label_pos lbl z c.
+Proof.
+ induction bbs.
+ - intros. simpl in *. inversion HUNF. simpl. reflexivity.
+ - intros. simpl in *. monadInv HUNF. unfold unfold_bblock in EQ.
+ destruct (zle _ _); try congruence. monadInv EQ.
+ destruct (is_label _ _) eqn:EQLBL.
+ + erewrite label_in_header_list; eauto.
+ simpl in *. destruct (peq lbl lbl); try congruence.
+ + erewrite IHbbs; eauto.
+ rewrite (asm_label_pos_header z a x0 x1 lbl); auto.
+ unfold is_label in *.
+ destruct (header a).
+ * replace (z + list_length_z (@nil label)) with (z); eauto.
+ unfold list_length_z. simpl. lia.
+ * eapply header_size_cons_nil in l as HL1.
+ subst. simpl in *. destruct (in_dec _ _); try congruence.
+ simpl in *.
+ destruct (peq _ _); try intuition congruence.
+Qed.
+
+Lemma label_pos_preserved f lbl z tf: forall
+ (FINDF: transf_function f = OK tf),
+ label_pos lbl z (fn_blocks f) = Asm.label_pos lbl z (Asm.fn_code tf).
+Proof.
+ intros.
+ eapply label_pos_preserved_gen.
+ unfold transf_function in FINDF. monadInv FINDF.
+ destruct zlt; try congruence. inversion EQ0. eauto.
+Qed.
+
+Lemma goto_label_preserved bb rs1 m1 rs1' m1' rs2 m2 lbl f tf v: forall
+ (FINDF: transf_function f = OK tf)
+ (BOUNDED: size bb <= Ptrofs.max_unsigned)
+ (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2))
+ (HGOTO: goto_label f lbl (incrPC v rs1) m1 = Next rs1' m1'),
+ exists (rs2' : regset) (m2' : mem), Asm.goto_label tf lbl rs2 m2 = Next rs2' m2'
+ /\ match_states (State rs1' m1') (State rs2' m2').
+Proof.
+ intros.
+ unfold goto_label, Asm.goto_label in *.
+ rewrite <- (label_pos_preserved f); auto.
+ inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.
+ destruct label_pos; next_stuck_cong.
+ destruct (incrPC v rs1 PC) eqn:INCRPC; next_stuck_cong.
+ inversion HGOTO; auto. repeat (econstructor; eauto).
+ rewrite <- EQPC.
+ unfold incrPC in *.
+ rewrite !Pregmap.gss in *.
+ destruct (rs1 PC) eqn:EQRS1; simpl in *; try congruence.
+ replace (rs2 # PC <- (Vptr b0 (Ptrofs.repr z))) with ((rs1 # PC <- (Vptr b0 (Ptrofs.add i0 v))) # PC <- (Vptr b (Ptrofs.repr z))); auto.
+ eapply functional_extensionality. intros.
+ destruct (PregEq.eq x PC); subst.
+ rewrite !Pregmap.gss. congruence.
+ rewrite !Pregmap.gso; auto.
+Qed.
+
+Lemma next_inst_incr_pc_preserved bb rs1 m1 rs1' m1' rs2 m2 f tf: forall
+ (FINDF: transf_function f = OK tf)
+ (BOUNDED: size bb <= Ptrofs.max_unsigned)
+ (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2))
+ (NEXT: Next (incrPC (Ptrofs.repr (size bb)) rs1) m2 = Next rs1' m1'),
+ exists (rs2' : regset) (m2' : mem),
+ Next (Asm.nextinstr rs2) m2 = Next rs2' m2'
+ /\ match_states (State rs1' m1') (State rs2' m2').
+Proof.
+ intros; simpl in *; unfold incrPC in NEXT;
+ inv_matchi;
+ assert (size bb >= 1) by eapply bblock_size_pos;
+ assert (0 <= size bb - 1 <= Ptrofs.max_unsigned) by lia;
+ inversion NEXT; subst;
+ eexists; eexists; split; eauto.
+ assert (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb))) = Asm.nextinstr rs2). {
+ unfold Pregmap.set. apply functional_extensionality.
+ intros x. destruct (PregEq.eq x PC).
+ -- unfold Asm.nextinstr. rewrite <- AGPC.
+ rewrite Val.offset_ptr_assoc. rewrite Ptrofs.add_unsigned.
+ rewrite (Ptrofs.unsigned_repr (size bb - 1)); try lia.
+ rewrite Ptrofs.unsigned_one.
+ replace (size bb - 1 + 1) with (size bb) by lia.
+ rewrite e. rewrite Pregmap.gss.
+ reflexivity.
+ -- eapply nextinstr_agree_but_pc; eauto. }
+ rewrite H1. econstructor.
+Qed.
+
+Lemma pc_reg_overwrite: forall (r: ireg) rs1 m1 rs2 m2 bb
+ (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)),
+ rs2 # PC <- (rs2 r) =
+ (rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb)))) # PC <-
+ (rs1 r).
+Proof.
+ intros.
+ unfold Pregmap.set; apply functional_extensionality.
+ intros x; destruct (PregEq.eq x PC) as [X | X]; try discriminate; inv_matchi.
+Qed.
+
+Lemma exec_cfi_simulation:
+ forall bb f tf rs1 m1 rs1' m1' rs2 m2 cfi
+ (SIZE: size bb <= Ptrofs.max_unsigned)
+ (FINDF: transf_function f = OK tf)
+ (* Warning: Asmblock's PC is assumed to be already pointing on the next instruction ! *)
+ (CFI: exec_cfi ge f cfi (incrPC (Ptrofs.repr (size bb)) rs1) m1 = Next rs1' m1')
+ (MATCHI: match_internal (size bb - 1) (State rs1 m1) (State rs2 m2)),
+ exists rs2' m2', Asm.exec_instr tge tf (cf_instruction_to_instruction cfi)
+ rs2 m2 = Next rs2' m2'
+ /\ match_states (State rs1' m1') (State rs2' m2').
+Proof.
+ intros.
+ assert (BBPOS: size bb >= 1) by eapply bblock_size_pos.
+ destruct cfi; inv CFI; simpl.
+ - (* Pb *)
+ exploit goto_label_preserved; eauto.
+ - (* Pbc *)
+ inv_matchi.
+ unfold eval_testcond in *. destruct c;
+ erewrite !incrPC_agree_but_pc in H0; try rewrite <- !AG; try congruence.
+ all:
+ destruct_reg_size;
+ try destruct b eqn:EQB.
+ 1,4,7,10,13,16,19,22,25,28,31,34:
+ exploit goto_label_preserved; eauto.
+ 1,3,5,7,9,11,13,15,17,19,21,23:
+ exploit next_inst_incr_pc_preserved; eauto.
+ all: repeat (econstructor; eauto).
+ - (* Pbl *)
+ eexists; eexists; split; eauto.
+ assert ( ((incrPC (Ptrofs.repr (size bb)) rs1) # X30 <- (incrPC (Ptrofs.repr (size bb)) rs1 PC))
+ # PC <- (Genv.symbol_address ge id Ptrofs.zero)
+ = (rs2 # X30 <- (Val.offset_ptr (rs2 PC) Ptrofs.one))
+ # PC <- (Genv.symbol_address tge id Ptrofs.zero)
+ ) as EQRS. {
+ unfold incrPC. unfold Pregmap.set. simpl. apply functional_extensionality.
+ intros x. destruct (PregEq.eq x PC).
+ * rewrite symbol_addresses_preserved. reflexivity.
+ * destruct (PregEq.eq x X30).
+ -- inv MATCHI. rewrite <- AGPC. rewrite Val.offset_ptr_assoc.
+ unfold Ptrofs.add, Ptrofs.one. repeat (rewrite Ptrofs.unsigned_repr); try lia.
+ replace (size bb - 1 + 1) with (size bb) by lia. reflexivity.
+ -- inv MATCHI; rewrite AG; try assumption; reflexivity.
+ } rewrite EQRS; inv MATCHI; reflexivity.
+ - (* Pbs *)
+ eexists; eexists; split; eauto.
+ assert ( (incrPC (Ptrofs.repr (size bb)) rs1) # PC <-
+ (Genv.symbol_address ge id Ptrofs.zero)
+ = rs2 # PC <- (Genv.symbol_address tge id Ptrofs.zero)
+ ) as EQRS. {
+ unfold incrPC, Pregmap.set. rewrite symbol_addresses_preserved. inv MATCHI.
+ apply functional_extensionality. intros x. destruct (PregEq.eq x PC); auto.
+ } rewrite EQRS; inv MATCHI; reflexivity.
+ - (* Pblr *)
+ eexists; eexists; split; eauto.
+ unfold incrPC. rewrite Pregmap.gss. rewrite Pregmap.gso; try discriminate.
+ assert ( (rs2 # X30 <- (Val.offset_ptr (rs2 PC) Ptrofs.one)) # PC <- (rs2 r)
+ = ((rs1 # PC <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb))))
+ # X30 <- (Val.offset_ptr (rs1 PC) (Ptrofs.repr (size bb))))
+ # PC <- (rs1 r)
+ ) as EQRS. {
+ unfold Pregmap.set. apply functional_extensionality.
+ intros x; destruct (PregEq.eq x PC) as [X | X].
+ - inv_matchi; rewrite AG; auto.
+ - destruct (PregEq.eq x X30) as [X' | X'].
+ + inversion MATCHI; subst. rewrite <- AGPC.
+ rewrite Val.offset_ptr_assoc. unfold Ptrofs.one.
+ rewrite Ptrofs.add_unsigned. rewrite Ptrofs.unsigned_repr; try lia. rewrite Ptrofs.unsigned_repr; try lia.
+ rewrite Z.sub_add; reflexivity.
+ + inv_matchi.
+ } rewrite EQRS. inv_matchi.
+ - (* Pbr *)
+ eexists; eexists; split; eauto.
+ unfold incrPC. rewrite Pregmap.gso; try discriminate.
+ rewrite (pc_reg_overwrite r rs1 m1' rs2 m2 bb); auto.
+ inv_matchi.
+ - (* Pret *)
+ eexists; eexists; split; eauto.
+ unfold incrPC. rewrite Pregmap.gso; try discriminate.
+ rewrite (pc_reg_overwrite r rs1 m1' rs2 m2 bb); auto.
+ inv_matchi.
+ - (* Pcbnz *)
+ inv_matchi.
+ unfold eval_neg_branch in *.
+ erewrite incrPC_agree_but_pc in H0; try congruence.
+ destruct eval_testzero; next_stuck_cong.
+ destruct b.
+ * exploit next_inst_incr_pc_preserved; eauto.
+ * exploit goto_label_preserved; eauto.
+ - (* Pcbz *)
+ inv_matchi.
+ unfold eval_branch in *.
+ erewrite incrPC_agree_but_pc in H0; try congruence.
+ destruct eval_testzero; next_stuck_cong.
+ destruct b.
+ * exploit goto_label_preserved; eauto.
+ * exploit next_inst_incr_pc_preserved; eauto.
+ - (* Ptbnbz *)
+ inv_matchi.
+ unfold eval_branch in *.
+ erewrite incrPC_agree_but_pc in H0; try congruence.
+ destruct eval_testbit; next_stuck_cong.
+ destruct b.
+ * exploit goto_label_preserved; eauto.
+ * exploit next_inst_incr_pc_preserved; eauto.
+ - (* Ptbz *)
+ inv_matchi.
+ unfold eval_neg_branch in *.
+ erewrite incrPC_agree_but_pc in H0; try congruence.
+ destruct eval_testbit; next_stuck_cong.
+ destruct b.
+ * exploit next_inst_incr_pc_preserved; eauto.
+ * exploit goto_label_preserved; eauto.
+ - (* Pbtbl *)
+ assert (rs2 # X16 <- Vundef r1 = (incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef r1)
+ as EQUNDEFX16. {
+ unfold incrPC, Pregmap.set.
+ destruct (PregEq.eq r1 X16) as [X16 | X16]; auto.
+ destruct (PregEq.eq r1 PC) as [PC' | PC']; try discriminate.
+ inv MATCHI; rewrite AG; auto.
+ } rewrite <- EQUNDEFX16 in H0.
+ destruct_reg_inv; next_stuck_cong.
+ unfold goto_label, Asm.goto_label in *.
+ rewrite <- (label_pos_preserved f); auto.
+ inversion MATCHI; subst.
+ destruct label_pos; next_stuck_cong.
+ destruct ((incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef PC) eqn:INCRPC; next_stuck_cong.
+ inversion H0; auto. repeat (econstructor; eauto).
+ rewrite !Pregmap.gso; try congruence.
+ rewrite <- AGPC.
+ unfold incrPC in *.
+ destruct (rs1 PC) eqn:EQRS1; simpl in *; try discriminate.
+ replace ((rs2 # X16 <- Vundef) # PC <- (Vptr b0 (Ptrofs.repr z))) with
+ (((rs1 # PC <- (Vptr b0 (Ptrofs.add i1 (Ptrofs.repr (size bb))))) # X16 <-
+ Vundef) # PC <- (Vptr b (Ptrofs.repr z))); auto.
+ eapply functional_extensionality; intros x.
+ destruct (PregEq.eq x PC); subst.
+ + rewrite Pregmap.gso in INCRPC; try congruence.
+ rewrite Pregmap.gss in INCRPC.
+ rewrite !Pregmap.gss in *; congruence.
+ + rewrite Pregmap.gso; auto.
+ rewrite (Pregmap.gso (i := x) (j := PC)); auto.
+ destruct (PregEq.eq x X16); subst.
+ * rewrite !Pregmap.gss; auto.
+ * rewrite !Pregmap.gso; auto.
+Qed.
+
+Lemma last_instruction_cannot_be_label bb:
+ list_nth_z (header bb) (size bb - 1) = None.
+Proof.
+ assert (list_length_z (header bb) <= size bb - 1). {
+ rewrite bblock_size_aux. generalize (bblock_size_aux_pos bb). lia.
+ }
+ remember (list_nth_z (header bb) (size bb - 1)) as label_opt; destruct label_opt; auto;
+ exploit list_nth_z_range; eauto; lia.
+Qed.
+
+Lemma pc_ptr_exec_step: forall ofs bb b rs m _rs _m
+ (ATPC : rs PC = Vptr b ofs)
+ (MATCHI : match_internal (size bb - 1)
+ {| _rs := rs; _m := m |}
+ {| _rs := _rs; _m := _m |}),
+ _rs PC = Vptr b (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))).
+Proof.
+ intros; inv MATCHI. rewrite <- AGPC; rewrite ATPC; unfold Val.offset_ptr; eauto.
+Qed.
+
+Lemma find_instr_ofs_somei: forall ofs bb f tc asmi rs m _rs _m
+ (BOUNDOFS : Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned)
+ (FIND_INSTR : Asm.find_instr (Ptrofs.unsigned ofs + (size bb - 1)) tc =
+ Some (asmi))
+ (MATCHI : match_internal (size bb - 1)
+ {| _rs := rs; _m := m |}
+ {| _rs := _rs; _m := _m |}),
+ Asm.find_instr (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))))
+ (Asm.fn_code {| Asm.fn_sig := fn_sig f; Asm.fn_code := tc |}) =
+ Some (asmi).
+Proof.
+ intros; simpl.
+ replace (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bb - 1))))
+ with (Ptrofs.unsigned ofs + (size bb - 1)); try assumption.
+ generalize (bblock_size_pos bb); generalize (Ptrofs.unsigned_range_2 ofs); intros.
+ unfold Ptrofs.add.
+ rewrite Ptrofs.unsigned_repr. rewrite Ptrofs.unsigned_repr; try lia.
+ rewrite Ptrofs.unsigned_repr; lia.
+Qed.
+
+Lemma eval_builtin_arg_match: forall rs _m _rs a1 b1
+ (AG : forall r : preg, r <> PC -> rs r = _rs r)
+ (EVAL : eval_builtin_arg tge (fun r : dreg => rs r) (rs SP) _m a1 b1),
+ eval_builtin_arg tge _rs (_rs SP) _m (map_builtin_arg DR a1) b1.
+Proof.
+ intros; induction EVAL; simpl in *; try rewrite AG; try rewrite AG in EVAL; try discriminate; try congruence; eauto with barg.
+ econstructor. rewrite <- AG; try discriminate; auto.
+Qed.
+
+Lemma eval_builtin_args_match: forall bb rs m _rs _m args vargs
+ (MATCHI : match_internal (size bb - 1)
+ {| _rs := rs; _m := m |}
+ {| _rs := _rs; _m := _m |})
+ (EVAL : eval_builtin_args tge (fun r : dreg => rs r) (rs SP) m args vargs),
+ eval_builtin_args tge _rs (_rs SP) _m (map (map_builtin_arg DR) args) vargs.
+Proof.
+ intros; inv MATCHI.
+ induction EVAL; subst.
+ - econstructor.
+ - econstructor.
+ + eapply eval_builtin_arg_match; eauto.
+ + eauto.
+Qed.
+
+Lemma pc_both_sides: forall (rs _rs: regset) v
+ (AG : forall r : preg, r <> PC -> rs r = _rs r),
+ rs # PC <- v = _rs # PC <- v.
+Proof.
+ intros; unfold Pregmap.set; apply functional_extensionality; intros y.
+ destruct (PregEq.eq y PC); try rewrite AG; eauto.
+Qed.
+
+Lemma set_buitin_res_sym res: forall vres rs _rs r
+ (NPC: r <> PC)
+ (AG : forall r : preg, r <> PC -> rs r = _rs r),
+ set_res res vres rs r = set_res res vres _rs r.
+Proof.
+ induction res; simpl; intros; unfold Pregmap.set; try rewrite AG; eauto.
+Qed.
+
+Lemma set_builtin_res_dont_move_pc_gen res: forall vres rs _rs v1 v2
+ (HV: v1 = v2)
+ (AG : forall r : preg, r <> PC -> rs r = _rs r),
+ (set_res res vres rs) # PC <- v1 =
+ (set_res res vres _rs) # PC <- v2.
+Proof.
+ intros. rewrite HV. generalize res vres rs _rs AG v2.
+ clear res vres rs _rs AG v1 v2 HV.
+ induction res.
+ - simpl; intros. apply pc_both_sides; intros.
+ unfold Pregmap.set; try rewrite AG; eauto.
+ - simpl; intros; apply pc_both_sides; eauto.
+ - simpl; intros.
+ erewrite IHres2; eauto; intros.
+ eapply set_buitin_res_sym; eauto.
+Qed.
+
+Lemma set_builtin_map_not_pc (res: builtin_res dreg): forall vres rs,
+ set_res (map_builtin_res DR res) vres rs PC = rs PC.
+Proof.
+ induction res.
+ - intros; simpl. unfold Pregmap.set. destruct (PregEq.eq PC x); try congruence.
+ - intros; simpl; congruence.
+ - intros; simpl in *. rewrite IHres2. rewrite IHres1. reflexivity.
+Qed.
+
+Lemma undef_reg_preserved (rl: list mreg): forall rs _rs r
+ (NPC: r <> PC)
+ (AG : forall r : preg, r <> PC -> rs r = _rs r),
+ undef_regs (map preg_of rl) rs r = undef_regs (map preg_of rl) _rs r.
+Proof.
+ induction rl.
+ - simpl; auto.
+ - simpl; intros. erewrite IHrl; eauto.
+ intros. unfold Pregmap.set. destruct (PregEq.eq r0 (preg_of a)); try rewrite AG; eauto.
+Qed.
+
+Lemma undef_regs_other:
+ forall r rl rs,
+ (forall r', In r' rl -> r <> r') ->
+ undef_regs rl rs r = rs r.
+Proof.
+ induction rl; simpl; intros. auto.
+ rewrite IHrl by auto. rewrite Pregmap.gso; auto.
+Qed.
+
+Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop :=
+ match rl with
+ | nil => True
+ | r1 :: nil => r <> preg_of r1
+ | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl
end.
-Remark preg_of_not_X29: forall r, negb (mreg_eq r R29) = true -> IR X29 <> preg_of r.
-Proof.
- intros. change (IR X29) with (preg_of R29). red; intros.
- exploit preg_of_injective; eauto. intros; subst r; discriminate.
-Qed.
-
-Lemma sp_val': forall ms sp rs, agree ms sp rs -> sp = rs XSP.
-Proof.
- intros. eapply sp_val; eauto.
-Qed.
-
-(** This is the simulation diagram. We prove it by case analysis on the Mach transition. *)
-
-Theorem step_simulation:
- forall S1 t S2, Mach.step return_address_offset ge S1 t S2 ->
- forall S1' (MS: match_states S1 S1'),
- (exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
- \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
-Proof.
- induction 1; intros; inv MS.
-
-- (* Mlabel *)
- left; eapply exec_straight_steps; eauto; intros.
- monadInv TR. econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split. apply agree_nextinstr; auto. simpl; congruence.
-
-- (* Mgetstack *)
- unfold load_stack in H.
- exploit Mem.loadv_extends; eauto. intros [v' [A B]].
- rewrite (sp_val _ _ _ AG) in A.
- left; eapply exec_straight_steps; eauto. intros. simpl in TR.
- exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q R]]].
- exists rs'; split. eauto.
- split. eapply agree_set_mreg; eauto with asmgen. congruence.
- simpl; congruence.
-
-- (* Msetstack *)
- unfold store_stack in H.
- assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto).
- exploit Mem.storev_extends; eauto. intros [m2' [A B]].
- left; eapply exec_straight_steps; eauto.
- rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
- exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]].
- exists rs'; split. eauto.
- split. eapply agree_undef_regs; eauto with asmgen.
- simpl; intros. rewrite Q; auto with asmgen.
-
-- (* Mgetparam *)
- assert (f0 = f) by congruence; subst f0.
- unfold load_stack in *.
- exploit Mem.loadv_extends. eauto. eexact H0. auto.
- intros [parent' [A B]]. rewrite (sp_val' _ _ _ AG) in A.
- exploit lessdef_parent_sp; eauto. clear B; intros B; subst parent'.
- exploit Mem.loadv_extends. eauto. eexact H1. auto.
- intros [v' [C D]].
-Opaque loadind.
- left; eapply exec_straight_steps; eauto; intros. monadInv TR.
- destruct ep.
-(* X30 contains parent *)
- exploit loadind_correct. eexact EQ.
- instantiate (2 := rs0). simpl; rewrite DXP; eauto. simpl; congruence.
- intros [rs1 [P [Q R]]].
- exists rs1; split. eauto.
- split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
- simpl; intros. rewrite R; auto with asmgen.
- apply preg_of_not_X29; auto.
-(* X30 does not contain parent *)
- exploit loadptr_correct. eexact A. simpl; congruence. intros [rs1 [P [Q R]]].
- exploit loadind_correct. eexact EQ. instantiate (2 := rs1). simpl; rewrite Q. eauto. simpl; congruence.
- intros [rs2 [S [T U]]].
- exists rs2; split. eapply exec_straight_trans; eauto.
- split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
- instantiate (1 := rs1#X29 <- (rs2#X29)). intros.
- rewrite Pregmap.gso; auto with asmgen.
- congruence.
- intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen.
- simpl; intros. rewrite U; auto with asmgen.
- apply preg_of_not_X29; auto.
-
-- (* Mop *)
- assert (eval_operation tge sp op (map rs args) m = Some v).
- { rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. }
- exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eexact H0.
- intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
- left; eapply exec_straight_steps; eauto; intros. simpl in TR.
- exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
- exists rs2; split. eauto. split.
- apply agree_set_undef_mreg with rs0; auto.
- apply Val.lessdef_trans with v'; auto.
- simpl; intros. InvBooleans.
- rewrite R; auto. apply preg_of_not_X29; auto.
-Local Transparent destroyed_by_op.
- destruct op; try exact I; simpl; congruence.
-
-- (* Mload *)
- assert (Op.eval_addressing tge sp addr (map rs args) = Some a).
- { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. }
- exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
- intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
- exploit Mem.loadv_extends; eauto. intros [v' [C D]].
- left; eapply exec_straight_steps; eauto; intros. simpl in TR.
- exploit transl_load_correct; eauto. intros [rs2 [P [Q R]]].
- exists rs2; split. eauto.
- split. eapply agree_set_undef_mreg; eauto. congruence.
- simpl; congruence.
-
-- (* Mstore *)
- assert (Op.eval_addressing tge sp addr (map rs args) = Some a).
- { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. }
- exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1.
- intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
- assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto).
- exploit Mem.storev_extends; eauto. intros [m2' [C D]].
- left; eapply exec_straight_steps; eauto.
- intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]].
- exists rs2; split. eauto.
- split. eapply agree_undef_regs; eauto with asmgen.
- simpl; congruence.
-
-- (* Mcall *)
- assert (f0 = f) by congruence. subst f0.
- inv AT.
- assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
- { eapply transf_function_no_overflow; eauto. }
- destruct ros as [rf|fid]; simpl in H; monadInv H5.
-+ (* Indirect call *)
- assert (rs rf = Vptr f' Ptrofs.zero).
- { destruct (rs rf); try discriminate.
- revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
- assert (rs0 x0 = Vptr f' Ptrofs.zero).
- { exploit ireg_val; eauto. rewrite H5; intros LD; inv LD; auto. }
- generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1.
- assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x).
- { econstructor; eauto. }
- exploit return_address_offset_correct; eauto. intros; subst ra.
- left; econstructor; split.
- apply plus_one. eapply exec_step_internal. Simpl. rewrite <- H2; simpl; eauto.
- eapply functions_transl; eauto. eapply find_instr_tail; eauto.
- simpl. eauto.
- econstructor; eauto.
- econstructor; eauto.
- eapply agree_sp_def; eauto.
- simpl. eapply agree_exten; eauto. intros. Simpl.
- Simpl. rewrite <- H2. auto.
-+ (* Direct call *)
- generalize (code_tail_next_int _ _ _ _ NOOV H6). intro CT1.
- assert (TCA: transl_code_at_pc ge (Vptr fb (Ptrofs.add ofs Ptrofs.one)) fb f c false tf x).
- econstructor; eauto.
- exploit return_address_offset_correct; eauto. intros; subst ra.
- left; econstructor; split.
- apply plus_one. eapply exec_step_internal. eauto.
- eapply functions_transl; eauto. eapply find_instr_tail; eauto.
- simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. eauto.
- econstructor; eauto.
- econstructor; eauto.
- eapply agree_sp_def; eauto.
- simpl. eapply agree_exten; eauto. intros. Simpl.
- Simpl. rewrite <- H2. auto.
-
-- (* Mtailcall *)
- assert (f0 = f) by congruence. subst f0.
- inversion AT; subst.
- assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
- { eapply transf_function_no_overflow; eauto. }
- exploit Mem.loadv_extends. eauto. eexact H1. auto. simpl. intros [parent' [A B]].
- destruct ros as [rf|fid]; simpl in H; monadInv H7.
-+ (* Indirect call *)
- assert (rs rf = Vptr f' Ptrofs.zero).
- { destruct (rs rf); try discriminate.
- revert H; predSpec Ptrofs.eq Ptrofs.eq_spec i Ptrofs.zero; intros; congruence. }
- assert (rs0 x0 = Vptr f' Ptrofs.zero).
- { exploit ireg_val; eauto. rewrite H7; intros LD; inv LD; auto. }
- exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
- exploit exec_straight_steps_2; eauto using functions_transl.
- intros (ofs' & P & Q).
- left; econstructor; split.
- (* execution *)
- eapply plus_right'. eapply exec_straight_exec; eauto.
- econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
- simpl. reflexivity.
- traceEq.
- (* match states *)
- econstructor; eauto.
- apply agree_set_other; auto with asmgen.
- Simpl. rewrite Z by (rewrite <- (ireg_of_eq _ _ EQ1); eauto with asmgen). assumption.
-+ (* Direct call *)
- exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
- exploit exec_straight_steps_2; eauto using functions_transl.
- intros (ofs' & P & Q).
- left; econstructor; split.
- (* execution *)
- eapply plus_right'. eapply exec_straight_exec; eauto.
- econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
- simpl. reflexivity.
- traceEq.
- (* match states *)
- econstructor; eauto.
- apply agree_set_other; auto with asmgen.
- Simpl. unfold Genv.symbol_address. rewrite symbols_preserved. rewrite H. auto.
-
-- (* Mbuiltin *)
- inv AT. monadInv H4.
- exploit functions_transl; eauto. intro FN.
- generalize (transf_function_no_overflow _ _ H3); intro NOOV.
- exploit builtin_args_match; eauto. intros [vargs' [P Q]].
- exploit external_call_mem_extends; eauto.
- intros [vres' [m2' [A [B [C D]]]]].
- left. econstructor; split. apply plus_one.
- eapply exec_step_builtin. eauto. eauto.
- eapply find_instr_tail; eauto.
- erewrite <- sp_val by eauto.
- eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
- eapply external_call_symbols_preserved; eauto. apply senv_preserved.
- eauto.
- econstructor; eauto.
- instantiate (2 := tf); instantiate (1 := x).
- unfold nextinstr. rewrite Pregmap.gss.
- rewrite set_res_other. rewrite undef_regs_other.
- rewrite <- H1. simpl. econstructor; eauto.
- eapply code_tail_next_int; eauto.
- simpl; intros. destruct H4. congruence. destruct H4. congruence.
- exploit list_in_map_inv; eauto. intros (mr & U & V). subst.
- auto with asmgen.
- auto with asmgen.
- apply agree_nextinstr. eapply agree_set_res; auto.
- eapply agree_undef_regs; eauto. intros.
- simpl. rewrite undef_regs_other_2; auto. Simpl.
- congruence.
-
-- (* Mgoto *)
- assert (f0 = f) by congruence. subst f0.
- inv AT. monadInv H4.
- exploit find_label_goto_label; eauto. intros [tc' [rs' [GOTO [AT2 INV]]]].
- left; exists (State rs' m'); split.
- apply plus_one. econstructor; eauto.
- eapply functions_transl; eauto.
- eapply find_instr_tail; eauto.
- simpl; eauto.
- econstructor; eauto.
- eapply agree_exten; eauto with asmgen.
- congruence.
-
-- (* Mcond true *)
- assert (f0 = f) by congruence. subst f0.
- exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
- left; eapply exec_straight_opt_steps_goto; eauto.
- intros. simpl in TR.
- exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C).
- exists jmp; exists k; exists rs'.
- split. eexact A.
- split. apply agree_exten with rs0; auto with asmgen.
- exact B.
-
-- (* Mcond false *)
- exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
- left; eapply exec_straight_steps; eauto. intros. simpl in TR.
- exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C).
- econstructor; split.
- eapply exec_straight_opt_right. eexact A. apply exec_straight_one. eexact B. auto.
- split. apply agree_exten with rs0; auto. intros. Simpl.
- simpl; congruence.
-
-- (* Mjumptable *)
- assert (f0 = f) by congruence. subst f0.
- inv AT. monadInv H6.
- exploit functions_transl; eauto. intro FN.
- generalize (transf_function_no_overflow _ _ H5); intro NOOV.
- exploit find_label_goto_label. eauto. eauto.
- instantiate (2 := rs0#X16 <- Vundef).
- Simpl. eauto.
- eauto.
- intros [tc' [rs' [A [B C]]]].
- exploit ireg_val; eauto. rewrite H. intros LD; inv LD.
- left; econstructor; split.
- apply plus_one. econstructor; eauto.
- eapply find_instr_tail; eauto.
- simpl. Simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
- econstructor; eauto.
- eapply agree_undef_regs; eauto.
- simpl. intros. rewrite C; auto with asmgen. Simpl.
- congruence.
-
-- (* Mreturn *)
- assert (f0 = f) by congruence. subst f0.
- inversion AT; subst. simpl in H6; monadInv H6.
- assert (NOOV: list_length_z tf.(fn_code) <= Ptrofs.max_unsigned).
- eapply transf_function_no_overflow; eauto.
- exploit make_epilogue_correct; eauto. intros (rs1 & m1 & U & V & W & X & Y & Z).
- exploit exec_straight_steps_2; eauto using functions_transl.
- intros (ofs' & P & Q).
- left; econstructor; split.
- (* execution *)
- eapply plus_right'. eapply exec_straight_exec; eauto.
- econstructor. eexact P. eapply functions_transl; eauto. eapply find_instr_tail. eexact Q.
- simpl. reflexivity.
- traceEq.
- (* match states *)
- econstructor; eauto.
- apply agree_set_other; auto with asmgen.
-
-- (* internal function *)
-
- exploit functions_translated; eauto. intros [tf [A B]]. monadInv B.
- generalize EQ; intros EQ'. monadInv EQ'.
- destruct (zlt Ptrofs.max_unsigned (list_length_z x0.(fn_code))); inversion EQ1. clear EQ1. subst x0.
- unfold store_stack in *.
- exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
- intros [m1' [C D]].
- exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
- intros [m2' [F G]].
- simpl chunk_of_type in F.
- exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
- intros [m3' [P Q]].
- change (chunk_of_type Tptr) with Mint64 in *.
- (* Execution of function prologue *)
- monadInv EQ0. rewrite transl_code'_transl_code in EQ1.
- set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) ::
- storeptr RA XSP (fn_retaddr_ofs f) x0) in *.
- set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *.
- set (rs2 := nextinstr (rs0#X29 <- (parent_sp s) #SP <- sp #X16 <- Vundef)).
- exploit (storeptr_correct tge tf XSP (fn_retaddr_ofs f) RA x0 m2' m3' rs2).
- simpl preg_of_iregsp. change (rs2 X30) with (rs0 X30). rewrite ATLR.
- change (rs2 X2) with sp. eexact P.
- simpl; congruence. congruence.
- intros (rs3 & U & V).
- assert (EXEC_PROLOGUE:
- exec_straight tge tf
- tf.(fn_code) rs0 m'
- x0 rs3 m3').
- { change (fn_code tf) with tfbody; unfold tfbody.
- apply exec_straight_step with rs2 m2'.
- unfold exec_instr. rewrite C. fold sp.
- rewrite <- (sp_val _ _ _ AG). rewrite F. reflexivity.
- reflexivity.
- eexact U. }
- exploit exec_straight_steps_2; eauto using functions_transl. lia. constructor.
- intros (ofs' & X & Y).
- left; exists (State rs3 m3'); split.
- eapply exec_straight_steps_1; eauto. lia. constructor.
- econstructor; eauto.
- rewrite X; econstructor; eauto.
- apply agree_exten with rs2; eauto with asmgen.
- unfold rs2.
- apply agree_nextinstr. apply agree_set_other; auto with asmgen.
- apply agree_change_sp with (parent_sp s).
- apply agree_undef_regs with rs0. auto.
-Local Transparent destroyed_at_function_entry. simpl.
- simpl; intros; Simpl.
- unfold sp; congruence.
- intros. rewrite V by auto with asmgen. reflexivity.
-
-- (* external function *)
- exploit functions_translated; eauto.
- intros [tf [A B]]. simpl in B. inv B.
- exploit extcall_arguments_match; eauto.
- intros [args' [C D]].
- exploit external_call_mem_extends; eauto.
- intros [res' [m2' [P [Q [R S]]]]].
- left; econstructor; split.
- apply plus_one. eapply exec_step_external; eauto.
- eapply external_call_symbols_preserved; eauto. apply senv_preserved.
- econstructor; eauto.
- unfold loc_external_result. apply agree_set_other; auto. apply agree_set_pair; auto.
- apply agree_undef_caller_save_regs; auto.
-
-- (* return *)
- inv STACKS. simpl in *.
- right. split. lia. split. auto.
- rewrite <- ATPC in H5.
- econstructor; eauto. congruence.
+Remark preg_notin_charact:
+ forall r rl,
+ preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr).
+Proof.
+ induction rl; simpl; intros.
+ tauto.
+ destruct rl.
+ simpl. split. intros. intuition congruence. auto.
+ rewrite IHrl. split.
+ intros [A B]. intros. destruct H. congruence. auto.
+ auto.
Qed.
-Lemma transf_initial_states:
- forall st1, Mach.initial_state prog st1 ->
- exists st2, Asm.initial_state tprog st2 /\ match_states st1 st2.
+Lemma undef_regs_other_2:
+ forall r rl rs,
+ preg_notin r rl ->
+ undef_regs (map preg_of rl) rs r = rs r.
Proof.
- intros. inversion H. unfold ge0 in *.
- econstructor; split.
- econstructor.
- eapply (Genv.init_mem_transf_partial TRANSF); eauto.
- replace (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero)
- with (Vptr fb Ptrofs.zero).
- econstructor; eauto.
- constructor.
- apply Mem.extends_refl.
- split. auto. simpl. unfold Vnullptr; destruct Archi.ptr64; congruence.
- intros. rewrite Regmap.gi. auto.
- unfold Genv.symbol_address.
- rewrite (match_program_main TRANSF).
- rewrite symbols_preserved.
- unfold ge; rewrite H1. auto.
+ intros. apply undef_regs_other. intros.
+ exploit list_in_map_inv; eauto. intros [mr [A B]]. subst.
+ rewrite preg_notin_charact in H. auto.
Qed.
-Lemma transf_final_states:
- forall st1 st2 r,
- match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r.
+Lemma exec_exit_simulation_plus b ofs f bb s2 t rs m rs' m': forall
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+ (NEMPTY_EXIT: exit bb <> None)
+ (MATCHI: match_internal (size bb - Z.of_nat (length_opt (exit bb))) (State rs m) s2)
+ (EXIT: exec_exit ge f (Ptrofs.repr (size bb)) rs m (exit bb) t rs' m')
+ (ATPC: rs PC = Vptr b ofs),
+ plus Asm.step tge s2 t (State rs' m').
+Proof.
+ intros.
+ exploit internal_functions_unfold; eauto.
+ intros (tc & FINDtf & TRANStf & _).
+
+ exploit (find_instr_bblock (size bb - 1)); eauto.
+ { generalize (bblock_size_pos bb). lia. }
+ intros (i' & NTH & FIND_INSTR).
+
+ inv NTH.
+ + rewrite last_instruction_cannot_be_label in *. discriminate.
+ + destruct (exit bb) as [ctrl |] eqn:NEMPTY_EXIT'. 2: { contradiction. }
+ rewrite bblock_size_aux in *. rewrite NEMPTY_EXIT' in *. simpl in *.
+ (* XXX: Is there a better way to simplify this expression i.e. automatically? *)
+ replace (list_length_z (header bb) + list_length_z (body bb) + 1 - 1 -
+ list_length_z (header bb)) with (list_length_z (body bb)) in H by lia.
+ rewrite list_nth_z_range_exceeded in H; try lia. discriminate.
+ + assert (Ptrofs.unsigned ofs + size bb <= Ptrofs.max_unsigned). {
+ eapply size_of_blocks_bounds; eauto.
+ }
+ assert (size bb <= Ptrofs.max_unsigned). { generalize (Ptrofs.unsigned_range_2 ofs); lia. }
+ destruct cfi.
+ * (* control flow instruction *)
+ destruct s2.
+ rewrite H in EXIT. (* exit bb is a cfi *)
+ inv EXIT.
+ rewrite H in MATCHI. simpl in MATCHI.
+ exploit internal_functions_translated; eauto.
+ rewrite FINDtf.
+ intros (tf & FINDtf' & TRANSf). inversion FINDtf'; subst; clear FINDtf'.
+ exploit exec_cfi_simulation; eauto.
+ (* extract exec_cfi_simulation's conclusion as separate hypotheses *)
+ intros (rs2' & m2' & EXECI & MATCHS); rewrite MATCHS.
+ apply plus_one.
+ eapply Asm.exec_step_internal; eauto.
+ - eapply pc_ptr_exec_step; eauto.
+ - eapply find_instr_ofs_somei; eauto.
+ * (* builtin *)
+ destruct s2.
+ rewrite H in EXIT.
+ rewrite H in MATCHI. simpl in MATCHI.
+ simpl in FIND_INSTR.
+ inversion EXIT.
+ apply plus_one.
+ eapply external_call_symbols_preserved in H10; try (apply senv_preserved).
+ eapply eval_builtin_args_preserved in H6; try (apply symbols_preserved).
+ eapply Asm.exec_step_builtin; eauto.
+ - eapply pc_ptr_exec_step; eauto.
+ - eapply find_instr_ofs_somei; eauto.
+ - eapply eval_builtin_args_match; eauto.
+ - inv MATCHI; eauto.
+ - inv MATCHI.
+ unfold Asm.nextinstr, incrPC.
+ assert (HPC: Val.offset_ptr (rs PC) (Ptrofs.repr (size bb))
+ = Val.offset_ptr (_rs PC) Ptrofs.one).
+ { rewrite <- AGPC. rewrite ATPC. unfold Val.offset_ptr.
+ rewrite Ptrofs.add_assoc. unfold Ptrofs.add.
+ assert (BBPOS: size bb >= 1) by eapply bblock_size_pos.
+ rewrite (Ptrofs.unsigned_repr (size bb - 1)); try lia.
+ rewrite Ptrofs.unsigned_one.
+ replace (size bb - 1 + 1) with (size bb) by lia.
+ reflexivity. }
+ apply set_builtin_res_dont_move_pc_gen.
+ -- erewrite !set_builtin_map_not_pc.
+ erewrite !undef_regs_other.
+ rewrite HPC; auto.
+ all: intros; simpl in *; destruct H3 as [HX16 | [HX30 | HDES]]; subst; try discriminate;
+ exploit list_in_map_inv; eauto; intros [mr [A B]]; subst; discriminate.
+ -- intros. eapply undef_reg_preserved; eauto.
+ intros. destruct (PregEq.eq X16 r0); destruct (PregEq.eq X30 r0); subst.
+ rewrite Pregmap.gso, Pregmap.gss; try congruence.
+ do 2 (rewrite Pregmap.gso, Pregmap.gss; try discriminate; auto).
+ rewrite 2Pregmap.gss; auto.
+ rewrite !Pregmap.gso; auto.
+Qed.
+
+Lemma exec_exit_simulation_star b ofs f bb s2 t rs m rs' m': forall
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+ (MATCHI: match_internal (size bb - Z.of_nat (length_opt (exit bb))) (State rs m) s2)
+ (EXIT: exec_exit ge f (Ptrofs.repr (size bb)) rs m (exit bb) t rs' m')
+ (ATPC: rs PC = Vptr b ofs),
+ star Asm.step tge s2 t (State rs' m').
+Proof.
+ intros.
+ destruct (exit bb) eqn: Hex.
+ - eapply plus_star.
+ eapply exec_exit_simulation_plus; try rewrite Hex; congruence || eauto.
+ - inv MATCHI.
+ inv EXIT.
+ assert (X: rs2 = incrPC (Ptrofs.repr (size bb)) rs). {
+ unfold incrPC. unfold Pregmap.set.
+ apply functional_extensionality. intros x.
+ destruct (PregEq.eq x PC) as [X|].
+ - rewrite X. rewrite <- AGPC. simpl.
+ replace (size bb - 0) with (size bb) by lia. reflexivity.
+ - rewrite AG; try assumption. reflexivity.
+ }
+ destruct X.
+ subst; eapply star_refl; eauto.
+Qed.
+
+Lemma exec_bblock_simulation b ofs f bb t rs m rs' m': forall
+ (ATPC: rs PC = Vptr b ofs)
+ (FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
+ (FINDBB: find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb)
+ (EXECBB: exec_bblock lk ge f bb rs m t rs' m'),
+ plus Asm.step tge (State rs m) t (State rs' m').
Proof.
- intros. inv H0. inv H. constructor. assumption.
- compute in H1. inv H1.
- generalize (preg_val _ _ _ R0 AG). rewrite H2. intros LD; inv LD. auto.
+ intros; destruct EXECBB as (rs1 & m1 & BODY & CTL).
+ exploit exec_header_simulation; eauto.
+ intros (s0 & STAR & MATCH0).
+ eapply star_plus_trans; traceEq || eauto.
+ destruct (bblock_non_empty bb).
+ - (* body bb <> nil *)
+ exploit exec_body_simulation_plus; eauto.
+ intros (s1 & PLUS & MATCH1).
+ eapply plus_star_trans; traceEq || eauto.
+ eapply exec_exit_simulation_star; eauto.
+ erewrite <- exec_body_dont_move_PC; eauto.
+ - (* exit bb <> None *)
+ exploit exec_body_simulation_star; eauto.
+ intros (s1 & STAR1 & MATCH1).
+ eapply star_plus_trans; traceEq || eauto.
+ eapply exec_exit_simulation_plus; eauto.
+ erewrite <- exec_body_dont_move_PC; eauto.
Qed.
+Lemma step_simulation s t s':
+ Asmblock.step lk ge s t s' -> plus Asm.step tge s t s'.
+Proof.
+ intros STEP.
+ inv STEP; simpl; exploit functions_translated; eauto;
+ intros (tf0 & FINDtf & TRANSf);
+ monadInv TRANSf.
+ - (* internal step *) eapply exec_bblock_simulation; eauto.
+ - (* external step *)
+ apply plus_one.
+ exploit external_call_symbols_preserved; eauto. apply senv_preserved.
+ intros ?.
+ eapply Asm.exec_step_external; eauto.
+Qed.
+
+Lemma transf_program_correct:
+ forward_simulation (Asmblock.semantics lk prog) (Asm.semantics tprog).
+Proof.
+ eapply forward_simulation_plus.
+ - apply senv_preserved.
+ - eexact transf_initial_states.
+ - eexact transf_final_states.
+ - unfold match_states.
+ simpl; intros; subst; eexists; split; eauto.
+ eapply step_simulation; eauto.
+Qed.
+
+End PRESERVATION.
+
+End Asmblock_PRESERVATION.
+
+
+Local Open Scope linking_scope.
+
+Definition block_passes :=
+ mkpass Machblockgenproof.match_prog
+ ::: mkpass Asmblockgenproof.match_prog
+ ::: mkpass PostpassSchedulingproof.match_prog
+ ::: mkpass Asmblock_PRESERVATION.match_prog
+ ::: pass_nil _.
+
+Definition match_prog := pass_match (compose_passes block_passes).
+
+Lemma transf_program_match:
+ forall p tp, Asmgen.transf_program p = OK tp -> match_prog p tp.
+Proof.
+ intros p tp H.
+ unfold Asmgen.transf_program in H. apply bind_inversion in H. destruct H.
+ inversion_clear H. apply bind_inversion in H1. destruct H1.
+ inversion_clear H.
+ unfold Compopts.time in *. remember (Machblockgen.transf_program p) as mbp.
+ unfold match_prog; simpl.
+ exists mbp; split. apply Machblockgenproof.transf_program_match; auto.
+ exists x; split. apply Asmblockgenproof.transf_program_match; auto.
+ exists x0; split. apply PostpassSchedulingproof.transf_program_match; auto.
+ exists tp; split. apply Asmblock_PRESERVATION.transf_program_match; auto. auto.
+Qed.
+
+(** Return Address Offset *)
+
+Definition return_address_offset: Mach.function -> Mach.code -> ptrofs -> Prop :=
+ Machblockgenproof.Mach_return_address_offset (Asmblockgenproof.return_address_offset).
+
+Lemma return_address_exists:
+ forall f sg ros c, is_tail (Mach.Mcall sg ros :: c) f.(Mach.fn_code) ->
+ exists ra, return_address_offset f c ra.
+Proof.
+ intros; unfold return_address_offset; eapply Machblockgenproof.Mach_return_address_exists; eauto.
+ intros; eapply Asmblockgenproof.return_address_exists; eauto.
+Qed.
+
+Section PRESERVATION.
+
+Variable prog: Mach.program.
+Variable tprog: Asm.program.
+Hypothesis TRANSF: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
Theorem transf_program_correct:
forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog).
Proof.
- eapply forward_simulation_star with (measure := measure).
- apply senv_preserved.
- eexact transf_initial_states.
- eexact transf_final_states.
- exact step_simulation.
+ unfold match_prog in TRANSF. simpl in TRANSF.
+ inv TRANSF. inv H. inv H1. inv H. inv H2. inv H. inv H3. inv H.
+ eapply compose_forward_simulations.
+ { exploit Machblockgenproof.transf_program_correct; eauto. }
+
+ eapply compose_forward_simulations.
+ + apply Asmblockgenproof.transf_program_correct; eauto.
+ { intros.
+ unfold Genv.symbol_address.
+ erewrite <- PostpassSchedulingproof.symbols_preserved; eauto.
+ erewrite Asmblock_PRESERVATION.symbol_high_low; eauto.
+ reflexivity.
+ }
+ + eapply compose_forward_simulations.
+ - apply PostpassSchedulingproof.transf_program_correct; eauto.
+ - apply Asmblock_PRESERVATION.transf_program_correct; eauto.
Qed.
End PRESERVATION.
+
+Instance TransfAsm: TransfLink match_prog := pass_match_link (compose_passes block_passes).
+
+(*******************************************)
+(* Stub actually needed by driver/Compiler *)
+
+Module Asmgenproof0.
+
+Definition return_address_offset := return_address_offset.
+
+End Asmgenproof0.
diff --git a/aarch64/BTL_SEsimplify.v b/aarch64/BTL_SEsimplify.v
new file mode 100644
index 00000000..3e930439
--- /dev/null
+++ b/aarch64/BTL_SEsimplify.v
@@ -0,0 +1,41 @@
+Require Import Coqlib Floats Values Memory.
+Require Import Integers.
+Require Import Op Registers.
+Require Import BTL_SEtheory.
+Require Import BTL_SEsimuref.
+
+(** Target op simplifications using "fake" values *)
+
+Definition target_op_simplify (op: operation) (lr: list reg) (hrs: ristate): option sval :=
+ None.
+
+Definition target_cbranch_expanse (prev: ristate) (cond: condition) (args: list reg) : option (condition * list_sval) :=
+ None.
+
+(* Main proof of simplification *)
+
+Lemma target_op_simplify_correct ctx op lr hrs fsv st args: forall
+ (H: target_op_simplify op lr hrs = Some fsv)
+ (REF: ris_refines ctx hrs st)
+ (OK0: ris_ok ctx hrs)
+ (OK1: eval_list_sval ctx (list_sval_inj (map (si_sreg st) lr)) = Some args),
+ eval_sval ctx fsv = eval_operation (cge ctx) (csp ctx) op args (cm0 ctx).
+Proof.
+ unfold target_op_simplify; simpl.
+ intros H ? ? ?.
+ congruence.
+Qed.
+
+Lemma target_cbranch_expanse_correct hrs c l ctx st c' l': forall
+ (TARGET: target_cbranch_expanse hrs c l = Some (c', l'))
+ (REF: ris_refines ctx hrs st)
+ (OK: ris_ok ctx hrs),
+ eval_scondition ctx c' l' =
+ eval_scondition ctx c (list_sval_inj (map (si_sreg st) l)).
+Proof.
+ unfold target_cbranch_expanse; simpl.
+ intros H ? ?.
+ congruence.
+Qed.
+Global Opaque target_op_simplify.
+Global Opaque target_cbranch_expanse.
diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v
new file mode 100644
index 00000000..d5c7ee0f
--- /dev/null
+++ b/aarch64/CSE2deps.v
@@ -0,0 +1,35 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* David Monniaux CNRS, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+ (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+ && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+ && ((ofsw + size_chunk chunkw <=? ofsr) ||
+ (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+ match addr, addr', args, args' with
+ | (Aindexed ofs), (Aindexed ofs'),
+ (base :: nil), (base' :: nil) =>
+ if peq base base'
+ then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk)
+ else true
+ | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+ negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+ | _, _, _, _ => true
+ end.
diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v
new file mode 100644
index 00000000..653c88f4
--- /dev/null
+++ b/aarch64/CSE2depsproof.v
@@ -0,0 +1,209 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* David Monniaux CNRS, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+ Ptrofs.wordsize = 64%nat.
+Proof.
+ unfold Ptrofs.wordsize.
+ unfold Wordsize_Ptrofs.wordsize.
+ trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+ Ptrofs.modulus = 18446744073709551616.
+Proof.
+ reflexivity.
+Qed.
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Section MEMORY_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+ Variable base : val.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : int64.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aindexed ofsw) (base :: nil) = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aindexed ofsr) (base :: nil) = Some addrr.
+
+ Lemma load_store_away1 :
+ forall RANGEW : 0 <= Int64.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= Int64.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : Int64.unsigned ofsw + size_chunk chunkw <= Int64.unsigned ofsr
+ \/ Int64.unsigned ofsr + size_chunk chunkr <= Int64.unsigned ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in *.
+
+ rewrite ptrofs_modulus in *.
+ simpl in *.
+ inv ADDRR.
+ inv ADDRW.
+
+ destruct base; try discriminate.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: unfold Ptrofs.of_int64.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsr))) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsw))) as [OFSW | OFSW];
+ rewrite OFSW).
+ all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia).
+
+ all: try rewrite ptrofs_modulus in *.
+
+ all: intuition lia.
+ Qed.
+
+ Theorem load_store_away :
+ can_swap_accesses_ofs (Int64.unsigned ofsr) chunkr (Int64.unsigned ofsw) chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+End MEMORY_WRITE.
+
+
+Section STACK_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : ptrofs.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Ainstack ofsw) nil = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Ainstack ofsr) nil = Some addrr.
+
+ Lemma stack_load_store_away1 :
+ forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+ \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in *.
+
+ rewrite ptrofs_modulus in *.
+ simpl in *.
+ inv ADDRR.
+ inv ADDRW.
+
+ destruct sp; try discriminate.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+ rewrite OFSW).
+
+ all: try rewrite ptrofs_modulus in *.
+
+ all: intuition lia.
+ Qed.
+
+ Theorem stack_load_store_away :
+ can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply stack_load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+End STACK_WRITE.
+End SOUNDNESS.
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Lemma may_overlap_sound:
+ forall m m' : mem,
+ forall chunk addr args chunk' addr' args' v a a' rs,
+ (eval_addressing genv sp addr (rs ## args)) = Some a ->
+ (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+ (may_overlap chunk addr args chunk' addr' args') = false ->
+ (Mem.storev chunk m a v) = Some m' ->
+ (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a').
+Proof.
+ intros until rs.
+ intros ADDR ADDR' OVERLAP STORE.
+ destruct addr; destruct addr'; try discriminate.
+ - (* Aindexed / Aindexed *)
+ destruct args as [ | base [ | ]]. 1,3: discriminate.
+ destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+ simpl in OVERLAP.
+ destruct (peq base base'). 2: discriminate.
+ subst base'.
+ destruct (can_swap_accesses_ofs (Int64.unsigned ofs0) chunk' (Int64.unsigned ofs) chunk) eqn:SWAP.
+ 2: discriminate.
+ simpl in *.
+ eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+- (* Ainstack / Ainstack *)
+ destruct args. 2: discriminate.
+ destruct args'. 2: discriminate.
+ cbn in OVERLAP.
+ destruct (can_swap_accesses_ofs (Ptrofs.unsigned ofs0) chunk' (Ptrofs.unsigned ofs) chunk) eqn:SWAP.
+ 2: discriminate.
+ cbn in *.
+ eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+Qed.
+
+End SOUNDNESS.
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
index 7f5f1e06..24498aa4 100644
--- a/aarch64/ConstpropOpproof.v
+++ b/aarch64/ConstpropOpproof.v
@@ -335,40 +335,63 @@ Qed.
Lemma make_divimm_correct:
forall n r1 r2 v,
- Val.divs e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divs e#r1 e#r2) = v ->
e#r2 = Vint n ->
let (op, args) := make_divimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divimm.
- predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
- destruct (e#r1) eqn:?;
- try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
- inv H; auto.
- destruct (Int.is_power2 n) eqn:?.
- destruct (Int.ltu i (Int.repr 31)) eqn:?.
- exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
- exists v; auto.
- exists v; auto.
+ predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+ { destruct (e # r1) eqn:Er1.
+ all: try (cbn; exists (e # r1); split; auto; fail).
+ rewrite Val.divs_one.
+ cbn.
+ rewrite Er1.
+ exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+ destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+ {
+ cbn.
+ exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+ destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+ rewrite Val.divs_pow2 with (y:=v) (n:=n).
+ cbn.
+ all: auto.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
Qed.
+
Lemma make_divuimm_correct:
forall n r1 r2 v,
- Val.divu e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divu e#r1 e#r2) = v ->
e#r2 = Vint n ->
let (op, args) := make_divuimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divuimm.
- predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
- destruct (e#r1) eqn:?;
- try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
- inv H; auto.
- destruct (Int.is_power2 n) eqn:?.
- econstructor; split. simpl; eauto.
- rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
- rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
- exists v; auto.
+ predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+ { destruct (e # r1) eqn:Er1.
+ all: try (cbn; exists (e # r1); split; auto; fail).
+ rewrite Val.divu_one.
+ cbn.
+ rewrite Er1.
+ exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+ cbn.
+ rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
+ exists (Val.shru e # r1 (Vint i)); split; auto.
+ destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+ rewrite Val.divu_pow2 with (y:=v) (n:=n).
+ all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
Qed.
Lemma make_andimm_correct:
@@ -503,34 +526,60 @@ Qed.
Lemma make_divlimm_correct:
forall n r1 r2 v,
- Val.divls e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divls e#r1 e#r2) = v ->
e#r2 = Vlong n ->
let (op, args) := make_divlimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divlimm.
- destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
- rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
- exists v; auto.
- exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+ destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+ {
+ cbn.
+ exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+ rewrite H0 in H.
+ destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+ {
+ subst v0.
+ rewrite Val.divls_pow2 with (y:=v) (n:=n).
+ cbn.
+ all: auto.
+ }
+ subst. auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
Qed.
+
Lemma make_divluimm_correct:
forall n r1 r2 v,
- Val.divlu e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divlu e#r1 e#r2) = v ->
e#r2 = Vlong n ->
let (op, args) := make_divluimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divluimm.
destruct (Int64.is_power2' n) eqn:?.
+ {
econstructor; split. simpl; eauto.
- rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
- rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
- simpl.
- erewrite Int64.is_power2'_range by eauto.
- erewrite Int64.divu_pow2' by eauto. auto.
- exists v; auto.
+ rewrite H0 in H. destruct (e#r1); inv H.
+ all: cbn; auto.
+ {
+ rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+ destruct (Int64.eq n Int64.zero); cbn; auto.
+ erewrite Int64.is_power2'_range by eauto.
+ erewrite Int64.divu_pow2' by eauto. auto.
+ }
+ }
+ exists v; split; auto.
+ cbn.
+ rewrite H.
+ reflexivity.
Qed.
Lemma make_andlimm_correct:
@@ -679,10 +728,10 @@ Proof.
InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
- (* divs *)
assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divimm_correct; auto.
+ apply make_divimm_correct; auto. congruence.
- (* divu *)
assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divuimm_correct; auto.
+ apply make_divuimm_correct; auto. congruence.
- (* and 1 *)
rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
- (* and 2 *)
@@ -745,10 +794,10 @@ Proof.
InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
- (* divl *)
assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divlimm_correct; auto.
+ apply make_divlimm_correct; auto. congruence.
- (* divlu *)
assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divluimm_correct; auto.
+ apply make_divluimm_correct; auto. congruence.
- (* andl 1 *)
rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
- (* andl 2 *)
diff --git a/aarch64/DuplicateOpcodeHeuristic.ml b/aarch64/DuplicateOpcodeHeuristic.ml
new file mode 100644
index 00000000..3a3b87fc
--- /dev/null
+++ b/aarch64/DuplicateOpcodeHeuristic.ml
@@ -0,0 +1,41 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+ match cond with
+ | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccompf c | Ccompfs c -> (match c with
+ | Ceq -> Some false
+ | Cne -> Some true
+ | _ -> None
+ )
+ | Cnotcompf c | Cnotcompfs c -> (match c with
+ | Ceq -> Some true
+ | Cne -> Some false
+ | _ -> None
+ )
+ | _ -> None
+
diff --git a/aarch64/ExpansionOracle.ml b/aarch64/ExpansionOracle.ml
new file mode 100644
index 00000000..afcb29c2
--- /dev/null
+++ b/aarch64/ExpansionOracle.ml
@@ -0,0 +1,15 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+let expanse n ibf btl = btl
+
+let find_last_reg c = ()
diff --git a/aarch64/Machregs.v b/aarch64/Machregs.v
index 704c392e..454a7cbe 100644
--- a/aarch64/Machregs.v
+++ b/aarch64/Machregs.v
@@ -158,6 +158,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
match ef with
| EF_memcpy sz al => R15 :: R17 :: R29 :: nil
| EF_inline_asm txt sg clob => destroyed_by_clobber clob
+ | EF_profiling _ _ => R15 :: R17 :: R29 :: nil
| _ => nil
end.
diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml
index 5f3c1a8d..98e461eb 100644
--- a/aarch64/Machregsaux.ml
+++ b/aarch64/Machregsaux.ml
@@ -14,3 +14,11 @@
let is_scratch_register s =
s = "X16" || s = "x16" || s = "X30" || s = "x30"
+
+let class_of_type = function
+ | AST.Tint | AST.Tlong -> 0
+ | AST.Tfloat | AST.Tsingle -> 1
+ | AST.Tany32 | AST.Tany64 -> assert false
+
+(* number of available registers per class *)
+let nr_regs = [| 29; 32 |]
diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli
new file mode 100644
index 00000000..23ac1c9a
--- /dev/null
+++ b/aarch64/Machregsaux.mli
@@ -0,0 +1,20 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(** Auxiliary functions on machine registers *)
+
+val is_scratch_register: string -> bool
+
+val class_of_type: AST.typ -> int
+
+(* Number of registers in each class *)
+val nr_regs : int array
diff --git a/aarch64/Op.v b/aarch64/Op.v
index f8d2510e..4c0dfb72 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -386,8 +386,8 @@ Definition eval_operation
| Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
| Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3))
| Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3))
- | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
- | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+ | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+ | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
| Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
| Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a))
| Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
@@ -408,7 +408,7 @@ Definition eval_operation
| Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
| Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
| Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
- | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+ | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
| Ozext s, v1 :: nil => Some (Val.zero_ext s v1)
| Osext s, v1 :: nil => Some (Val.sign_ext s v1)
| Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a))
@@ -435,8 +435,8 @@ Definition eval_operation
| Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3))
| Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
| Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
- | Odivl, v1 :: v2 :: nil => Val.divls v1 v2
- | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2
+ | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2))
+ | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2))
| Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2)
| Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a))
| Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n))
@@ -457,7 +457,7 @@ Definition eval_operation
| Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2)
| Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2)
| Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2)
- | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n)
+ | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
| Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1)
| Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1)
| Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a))
@@ -481,22 +481,22 @@ Definition eval_operation
| Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
| Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
- | Ointoffloat, v1::nil => Val.intoffloat v1
- | Ointuoffloat, v1::nil => Val.intuoffloat v1
- | Ofloatofint, v1::nil => Val.floatofint v1
- | Ofloatofintu, v1::nil => Val.floatofintu v1
- | Ointofsingle, v1::nil => Val.intofsingle v1
- | Ointuofsingle, v1::nil => Val.intuofsingle v1
- | Osingleofint, v1::nil => Val.singleofint v1
- | Osingleofintu, v1::nil => Val.singleofintu v1
- | Olongoffloat, v1::nil => Val.longoffloat v1
- | Olonguoffloat, v1::nil => Val.longuoffloat v1
- | Ofloatoflong, v1::nil => Val.floatoflong v1
- | Ofloatoflongu, v1::nil => Val.floatoflongu v1
- | Olongofsingle, v1::nil => Val.longofsingle v1
- | Olonguofsingle, v1::nil => Val.longuofsingle v1
- | Osingleoflong, v1::nil => Val.singleoflong v1
- | Osingleoflongu, v1::nil => Val.singleoflongu v1
+ | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+ | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+ | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+ | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+ | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+ | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+ | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+ | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+ | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+ | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+ | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+ | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+ | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+ | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+ | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+ | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
| Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
| Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
@@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0... destruct v1...
- apply type_add.
- apply type_sub.
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int.eq i0 Int.zero); inv H2...
+ - destruct v0; destruct v1; cbn in *; trivial.
+ destruct (_ || _); trivial...
+ - destruct v0; destruct v1; cbn in *; trivial.
+ destruct (Int.eq i0 Int.zero); constructor.
- destruct v0... destruct v1...
- destruct v0... destruct (eval_shift s v1 a)...
- destruct v0...
@@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
- - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
- destruct v0...
- destruct v0...
- destruct (Val.zero_ext s v0)... simpl; rewrite a32_range...
@@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- apply type_subl.
- destruct v0... destruct v1...
- destruct v0... destruct v1...
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int64.eq i0 Int64.zero); inv H2...
+ - destruct v0; destruct v1; cbn; trivial.
+ destruct (_ || _); cbn; trivial.
+ - destruct v0; destruct v1; cbn; trivial.
+ destruct (Int64.eq i0 Int64.zero); cbn; trivial.
- destruct v0... destruct v1...
- destruct v0... destruct (eval_shiftl s v1 a)...
- destruct v0...
@@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
- - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
- destruct v0...
- destruct v0...
- destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range...
@@ -893,34 +895,60 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0...
- destruct v0...
(* intoffloat, intuoffloat *)
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial.
(* floatofint, floatofintu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* intofsingle, intuofsingle *)
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial.
(* singleofint, singleofintu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* longoffloat, longuoffloat *)
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial.
(* floatoflong, floatoflongu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* longofsingle, longuofsingle *)
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial.
(* singleoflong, singleoflongu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* cmp *)
- destruct (eval_condition cond vl m) as [[]|]...
- unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I.
Qed.
+
+Definition is_trapping_op (op : operation) :=
+ match op with
+ | Omove => false
+ | _ => false
+ end.
+
+
+Definition args_of_operation op :=
+ if eq_operation op Omove
+ then 1%nat
+ else List.length (fst (type_of_operation op)).
+
+Lemma is_trapping_op_sound:
+ forall op vl sp m,
+ is_trapping_op op = false ->
+ (List.length vl) = args_of_operation op ->
+ eval_operation genv sp op vl m <> None.
+Proof.
+ unfold args_of_operation.
+ destruct op; destruct eq_operation; intros; simpl in *; try congruence.
+ all: try (destruct vl as [ | vh1 vl1]; try discriminate).
+ all: try (destruct vl1 as [ | vh2 vl2]; try discriminate).
+ all: try (destruct vl2 as [ | vh3 vl3]; try discriminate).
+ all: try (destruct vl3 as [ | vh4 vl4]; try discriminate).
+Qed.
End SOUNDNESS.
(** * Manipulating and transforming operations *)
@@ -1174,6 +1202,28 @@ Proof.
rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto.
Qed.
+Lemma cond_valid_pointer_eq:
+ forall cond args m1 m2,
+ (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+ eval_condition cond args m1 = eval_condition cond args m2.
+Proof.
+ intros until m2. intro MEM. destruct cond eqn:COND; simpl; try congruence.
+ all: repeat (destruct args; simpl; try congruence);
+ erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
+Lemma op_valid_pointer_eq:
+ forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+ (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+ eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+ intros until m2. intro MEM. destruct op eqn:OP; simpl; try congruence.
+ - f_equal; f_equal; auto using cond_valid_pointer_eq.
+ - destruct cond; simpl; try congruence;
+ repeat (destruct args; simpl; try congruence);
+ erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
(** Global variables mentioned in an operation or addressing mode *)
Definition globals_addressing (addr: addressing) : list ident :=
@@ -1374,12 +1424,12 @@ Proof.
- apply Val.add_inject; auto. inv H2; inv H3; simpl; auto.
- apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto.
(* div, divu *)
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int.eq i0 Int.zero
- || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
- TrivialExists.
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+ - inv H4; inv H2; trivial. cbn.
+ destruct (_ || _); cbn;
+ constructor.
+ - inv H4; inv H2; trivial. cbn.
+ destruct (Int.eq i0 Int.zero); cbn;
+ constructor.
(* and*)
- inv H4; inv H2; simpl; auto.
- generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1411,8 +1461,8 @@ Proof.
(* shru *)
- inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
(* shrx *)
- - inv H4; simpl in H1; try discriminate. simpl.
- destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+ - inv H4; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial.
(* shift-ext *)
- inv H4; simpl; auto.
- inv H4; simpl; auto.
@@ -1447,12 +1497,10 @@ Proof.
- inv H4; inv H2; simpl; auto.
- inv H4; inv H2; simpl; auto.
(* divl, divlu *)
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int64.eq i0 Int64.zero
- || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
- TrivialExists.
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+ - inv H4; inv H2; cbn; trivial.
+ destruct (_ || _); cbn; trivial.
+ - inv H4; inv H2; cbn; trivial.
+ destruct (Int64.eq i0 Int64.zero); cbn; trivial.
(* andl *)
- inv H4; inv H2; simpl; auto.
- generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1484,8 +1532,8 @@ Proof.
(* shrlu *)
- inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
(* shrlx *)
- - inv H4; simpl in H1; try discriminate. simpl.
- destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+ - inv H4; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial.
(* shift-ext *)
- inv H4; simpl; auto.
- inv H4; simpl; auto.
@@ -1516,37 +1564,29 @@ Proof.
- inv H4; simpl; auto.
- inv H4; simpl; auto.
(* intoffloat, intuoffloat *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
- exists (Vint i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
- exists (Vint i); auto.
+ - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial.
(* floatofint, floatofintu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* intofsingle, intuofsingle *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
- exists (Vint i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
- exists (Vint i); auto.
+ - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial.
(* singleofint, singleofintu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* longoffloat, longuoffloat *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
+ - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial.
(* floatoflong, floatoflongu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* longofsingle, longuofsingle *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
+ - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial.
(* singleoflong, singleoflongu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* cmp, sel *)
- subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
@@ -1576,6 +1616,21 @@ Proof.
- apply Val.offset_ptr_inject; auto.
Qed.
+
+Lemma eval_addressing_inj_none:
+ forall addr sp1 vl1 sp2 vl2,
+ (forall id ofs,
+ In id (globals_addressing addr) ->
+ Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) ->
+ Val.inject f sp1 sp2 ->
+ Val.inject_list f vl1 vl2 ->
+ eval_addressing ge1 sp1 addr vl1 = None ->
+ eval_addressing ge2 sp2 addr vl2 = None.
+Proof.
+ intros until vl2. intros Hglobal Hinjsp Hinjvl.
+ destruct addr; simpl in *;
+ inv Hinjvl; trivial; try discriminate; inv H0; trivial; try discriminate; inv H2; trivial; try discriminate.
+Qed.
End EVAL_COMPAT.
(** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *)
@@ -1682,6 +1737,18 @@ Proof.
destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto.
Qed.
+Lemma eval_addressing_lessdef_none:
+ forall sp addr vl1 vl2,
+ Val.lessdef_list vl1 vl2 ->
+ eval_addressing genv sp addr vl1 = None ->
+ eval_addressing genv sp addr vl2 = None.
+Proof.
+ intros. rewrite val_inject_list_lessdef in H.
+ eapply eval_addressing_inj_none with (sp1 := sp).
+ intros. rewrite <- val_inject_lessdef; auto.
+ rewrite <- val_inject_lessdef; auto.
+ eauto. auto.
+Qed.
End EVAL_LESSDEF.
(** Compatibility of the evaluation functions with memory injections. *)
@@ -1734,6 +1801,19 @@ Proof.
econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
Qed.
+Lemma eval_addressing_inject_none:
+ forall addr vl1 vl2,
+ Val.inject_list f vl1 vl2 ->
+ eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None ->
+ eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None.
+Proof.
+ intros.
+ rewrite eval_shift_stack_addressing.
+ eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto.
+ intros. apply symbol_address_inject.
+ econstructor; eauto. rewrite Ptrofs.add_zero_l; auto.
+Qed.
+
Lemma eval_operation_inject:
forall op vl1 vl2 v1 m1 m2,
Val.inject_list f vl1 vl2 ->
diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
new file mode 100644
index 00000000..5cdd002c
--- /dev/null
+++ b/aarch64/OpWeights.ml
@@ -0,0 +1,353 @@
+open Op;;
+open PrepassSchedulingOracleDeps;;
+
+module Cortex_A53=
+ struct
+ let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
+ let nr_non_pipelined_units = 1;;
+
+ let latency_of_op (op : operation) (nargs : int) =
+ match op with
+ | Omove
+ | Ointconst _
+ | Olongconst _
+ | Ofloatconst _
+ | Osingleconst _
+ | Oaddrsymbol _
+ | Oaddrstack _ -> 1
+ | Oshift _ -> 2
+ | Oadd -> 1
+ | Oaddshift _ -> 2
+ | Oaddimm _
+ | Oneg -> 1
+ | Onegshift _ -> 2
+ | Osub -> 1
+ | Osubshift _ -> 2
+ | Omul
+ | Omuladd
+ | Omulsub -> 4
+ | Odiv
+ | Odivu -> 29
+ | Oand -> 1
+ | Oandshift _ -> 2
+ | Oandimm _ -> 1
+ | Oor -> 1
+ | Oorshift _ -> 2
+ | Oorimm _ -> 1
+ | Oxor -> 1
+ | Oxorshift _ -> 2
+ | Oxorimm _ -> 1
+ | Onot -> 1
+ | Onotshift _ -> 2
+ | Obic -> 1
+ | Obicshift _ -> 2
+ | Oorn -> 1
+ | Oornshift _ -> 2
+ | Oeqv -> 1
+ | Oeqvshift _ -> 2
+ | Oshl
+ | Oshr
+ | Oshru -> 2
+ | Oshrximm _ -> 6
+ | Ozext _
+ | Osext _ -> 1
+ | Oshlzext _
+ | Oshlsext _
+ | Ozextshr _
+ | Osextshr _ -> 2
+
+ (* 64-bit integer arithmetic *)
+ | Oshiftl _ -> 2
+ | Oextend _ -> 1
+ | Omakelong
+ | Olowlong
+ | Ohighlong
+ | Oaddl -> 1
+ | Oaddlshift _
+ | Oaddlext _ -> 2
+ | Oaddlimm _
+ | Onegl -> 1
+ | Oneglshift _ -> 2
+ | Osubl -> 1
+ | Osublshift _
+ | Osublext _ -> 2
+ | Omull
+ | Omulladd
+ | Omullsub
+ | Omullhs
+ | Omullhu -> 4
+ | Odivl -> 50
+ | Odivlu -> 50
+ | Oandl -> 1
+ | Oandlshift _ -> 2
+ | Oandlimm _
+ | Oorl -> 1
+ | Oorlshift _ -> 2
+ | Oorlimm _
+ | Oxorl -> 1
+ | Oxorlshift _ -> 2
+ | Oxorlimm _
+ | Onotl -> 1
+ | Onotlshift _ -> 2
+ | Obicl -> 1
+ | Obiclshift _ -> 2
+ | Oornl -> 1
+ | Oornlshift _ -> 2
+ | Oeqvl -> 1
+ | Oeqvlshift _ -> 2
+ | Oshll
+ | Oshrl
+ | Oshrlu -> 2
+ | Oshrlximm _ -> 6
+ | Ozextl _
+ | Osextl _ -> 1
+ | Oshllzext _
+ | Oshllsext _
+ | Ozextshrl _
+ | Osextshrl _ -> 2
+
+ (* 64-bit floating-point arithmetic *)
+ | Onegf (* r [rd = - r1] *)
+ | Oabsf (* r [rd = abs(r1)] *)
+ | Oaddf (* r [rd = r1 + r2] *)
+ | Osubf (* r [rd = r1 - r2] *)
+ | Omulf (* r [rd = r1 * r2] *)
+(* 32-bit floating-point arithmetic *)
+ | Onegfs (* r [rd = - r1] *)
+ | Oabsfs (* r [rd = abs(r1)] *)
+ | Oaddfs (* r [rd = r1 + r2] *)
+ | Osubfs (* r [rd = r1 - r2] *)
+ | Omulfs (* r [rd = r1 * r2] *)
+ | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *)
+ | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *)
+(* Conversions between int and float *)
+ | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *)
+ | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *)
+ | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *)
+ | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *)
+ | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *)
+ | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *)
+ | Osingleofint (* r [rd = float32_of_signed_int(r1)] *)
+ | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *)
+ | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *)
+ | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *)
+ | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *)
+ | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *)
+ | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *)
+ | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *)
+ | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *)
+ | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *)
+ -> 6
+ | Odivf -> 50 (* r [rd = r1 / r2] *)
+ | Odivfs -> 20
+ (* Boolean tests *)
+ | Ocmp cmp | Osel (cmp, _) ->
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6
+ | _ -> 1);;
+
+ let resources_of_op (op : operation) (nargs : int) =
+ match op with
+ | Omove
+ | Ointconst _
+ | Olongconst _
+ | Ofloatconst _
+ | Osingleconst _
+ | Oaddrsymbol _
+ | Oaddrstack _
+ (* 32-bit integer arithmetic *)
+ | Oshift _
+ | Oadd
+ | Oaddshift _
+ | Oaddimm _
+ | Oneg
+ | Onegshift _
+ | Osub
+ | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+ | Omul
+ | Omuladd
+ | Omulsub -> [| 1; 1; 1; 0 |]
+ | Odiv
+ | Odivu -> [| 1; 0; 0; 0 |]
+ | Oand
+ | Oandshift _
+ | Oandimm _
+ | Oor
+ | Oorshift _
+ | Oorimm _
+ | Oxor
+ | Oxorshift _
+ | Oxorimm _
+ | Onot
+ | Onotshift _
+ | Obic
+ | Obicshift _
+ | Oorn
+ | Oornshift _
+ | Oeqv
+ | Oeqvshift _
+ | Oshl
+ | Oshr
+ | Oshru
+ | Oshrximm _
+ | Ozext _
+ | Osext _
+ | Oshlzext _
+ | Oshlsext _
+ | Ozextshr _
+ | Osextshr _
+
+(* 64-bit integer arithmetic *)
+ | Oshiftl _
+ | Oextend _
+ | Omakelong
+ | Olowlong
+ | Ohighlong
+ | Oaddl
+ | Oaddlshift _
+ | Oaddlext _
+ | Oaddlimm _
+ | Onegl
+ | Oneglshift _
+ | Osubl
+ | Osublshift _
+ | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+ | Omull
+ | Omulladd
+ | Omullsub
+ | Omullhs
+ | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+ | Odivl
+ | Odivlu -> [| 1; 0; 0; 0 |]
+ | Oandl
+ | Oandlshift _
+ | Oandlimm _
+ | Oorl
+ | Oorlshift _
+ | Oorlimm _
+ | Oxorl
+ | Oxorlshift _
+ | Oxorlimm _
+ | Onotl
+ | Onotlshift _
+ | Obicl
+ | Obiclshift _
+ | Oornl
+ | Oornlshift _
+ | Oeqvl
+ | Oeqvlshift _
+ | Oshll
+ | Oshrl
+ | Oshrlu
+ | Oshrlximm _
+ | Ozextl _
+ | Osextl _
+ | Oshllzext _
+ | Oshllsext _
+ | Ozextshrl _
+ | Osextshrl _ -> [| 1; 1; 0; 0 |]
+ (* 64-bit floating-point arithmetic *)
+ | Onegf (* r [rd = - r1] *)
+ | Oabsf (* r [rd = abs(r1)] *)
+ | Oaddf (* r [rd = r1 + r2] *)
+ | Osubf (* r [rd = r1 - r2] *)
+ | Omulf (* r [rd = r1 * r2] *)
+ | Odivf
+ (* 32-bit floating-point arithmetic *)
+ | Onegfs (* r [rd = - r1] *)
+ | Oabsfs (* r [rd = abs(r1)] *)
+ | Oaddfs (* r [rd = r1 + r2] *)
+ | Osubfs (* r [rd = r1 - r2] *)
+ | Omulfs (* r [rd = r1 * r2] *)
+ | Odivfs (* r [rd = r1 / r2] *)
+ | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *)
+ | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *)
+(* Conversions between int and float *)
+ | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *)
+ | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *)
+ | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *)
+ | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *)
+ | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *)
+ | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *)
+ | Osingleofint (* r [rd = float32_of_signed_int(r1)] *)
+ | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *)
+ | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *)
+ | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *)
+ | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *)
+ | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *)
+ | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *)
+ | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *)
+ | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *)
+ | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *)
+ -> [| 1 ; 1; 1; 0 |]
+
+(* Boolean tests *)
+ | Ocmp cmp | Osel (cmp, _) ->
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) ->
+ [| 1; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |] );;
+
+ let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+ match op with
+ | Odiv | Odivu -> [| 29 |]
+ | Odivfs -> [| 20 |]
+ | Odivl | Odivlu | Odivf -> [| 50 |]
+ | _ -> [| -1 |];;
+
+ let resources_of_cond (cmp : condition) (nargs : int) =
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) ->
+ [| 1; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |] );;
+
+ let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+ let latency_of_call _ _ = 6;;
+
+ let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
+
+ let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+
+ let resources_of_call _ _ = resource_bounds;;
+ let resources_of_builtin _ = resource_bounds;;
+ end;;
+
+let get_opweights () : opweights =
+ match !Clflags.option_mtune with
+ | "cortex-a53" | "cortex-a35" | "" ->
+ {
+ pipelined_resource_bounds = Cortex_A53.resource_bounds;
+ nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+ latency_of_op = Cortex_A53.latency_of_op;
+ resources_of_op = Cortex_A53.resources_of_op;
+ non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;
+ latency_of_load = Cortex_A53.latency_of_load;
+ resources_of_load = Cortex_A53.resources_of_load;
+ resources_of_store = Cortex_A53.resources_of_store;
+ resources_of_cond = Cortex_A53.resources_of_cond;
+ latency_of_call = Cortex_A53.latency_of_call;
+ resources_of_call = Cortex_A53.resources_of_call;
+ resources_of_builtin = Cortex_A53.resources_of_builtin
+ }
+ | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/aarch64/OpWeightsAsm.ml b/aarch64/OpWeightsAsm.ml
new file mode 100644
index 00000000..44c6f16b
--- /dev/null
+++ b/aarch64/OpWeightsAsm.ml
@@ -0,0 +1,165 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open Asmblock
+
+(*type called_function = (Registers.reg, AST.ident) Datatypes.sum*)
+
+type instruction = PBasic of Asmblock.basic | PControl of Asmblock.control
+
+type opweights = {
+ pipelined_resource_bounds : int array;
+ nr_non_pipelined_units : int;
+ latency_of_op : instruction -> int -> int;
+ resources_of_op : instruction -> int -> int array;
+ (*non_pipelined_resources_of_op : Op.operation -> int -> int array;*)
+ (*latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;*)
+ (*resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;*)
+ (*resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;*)
+ (*resources_of_cond : Op.condition -> int -> int array;*)
+ (*latency_of_call : AST.signature -> called_function -> int;*)
+ (*resources_of_call : AST.signature -> called_function -> int array;*)
+ (*resources_of_builtin : AST.external_function -> int array*)
+}
+
+module Cortex_A53 = struct
+ let resource_bounds = [| 2; 2; 1; 1 |]
+
+ (* instr ; ALU ; MAC; LSU *)
+ let nr_non_pipelined_units = 1
+
+ let latency_of_op (i : instruction) (nargs : int) =
+ match i with
+ | PBasic (PArith (PArithP (_, _))) -> 1
+ | PBasic (PArith (PArithPP (i', _, _))) -> (
+ match i' with
+ | Psbfiz (_, _, _) | Psbfx (_, _, _) | Pubfiz (_, _, _) | Pubfx (_, _, _)
+ ->
+ 2
+ | Pfcvtds | Pfcvtsd
+ | Pfcvtzs (_, _)
+ | Pfcvtzu (_, _)
+ | Pfabs _ | Pfneg _
+ | Pscvtf (_, _)
+ | Pucvtf (_, _) ->
+ 6
+ | _ -> 1)
+ | PBasic (PArith (PArithPPP (i', _, _, _))) -> (
+ match i' with
+ | Pasrv _ | Plslv _ | Plsrv _ | Prorv _ | Paddext _ | Psubext _ -> 2
+ | Psmulh | Pumulh -> 4
+ | Pfdiv _ | Psdiv _ | Pudiv _ -> 50
+ | _ -> 6)
+ | PBasic (PArith (PArithRR0R (_, _, _, _))) -> 2
+ | PBasic (PArith (PArithRR0 (_, _, _))) -> 1
+ | PBasic (PArith (PArithARRRR0 (_, _, _, _, _))) -> 4
+ | PBasic (PArith (PArithComparisonPP (_, _, _))) -> 6
+ | PBasic (PArith (PArithComparisonR0R (_, _, _))) -> 1
+ | PBasic (PArith (PArithComparisonP (i', _))) -> (
+ match i' with Pfcmp0 _ -> 6 | _ -> 1)
+ | PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> 6
+ | PBasic (PArith _) -> 1
+ | PBasic (PLoad _) -> 3
+ | PBasic (PStore _) -> 3
+ | PBasic (Pallocframe (_, _)) -> 3
+ | PBasic (Pfreeframe (_, _)) -> 1
+ | PBasic (Ploadsymbol (_, _)) -> 1
+ | PBasic (Pcvtsw2x (_, _)) -> 2
+ | PBasic (Pcvtuw2x (_, _)) -> 2
+ | PBasic (Pcvtx2w _) -> 1
+ | PBasic (Pnop) -> 0
+ | PControl _ -> 6
+
+ let resources_of_op (i : instruction) (nargs : int) =
+ match i with
+ | PBasic (PArith (PArithP (i', _))) -> (
+ match i' with
+ | Pfmovimmd _ | Pfmovimms _ -> [| 1; 1; 0; 1 |]
+ | _ -> [| 1; 1; 0; 0 |])
+ | PBasic (PArith (PArithPP (i', _, _))) -> (
+ match i' with
+ | Pfcvtds | Pfcvtsd
+ | Pfcvtzs (_, _)
+ | Pfcvtzu (_, _)
+ | Pfabs _ | Pfneg _
+ | Pscvtf (_, _)
+ | Pucvtf (_, _) ->
+ [| 1 ; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |])
+ | PBasic (PArith (PArithPPP (i', _, _, _))) -> (
+ match i' with
+ | Pasrv _ | Plslv _ | Plsrv _ | Prorv _ | Paddext _ | Psubext _ -> [| 1; 1; 0; 0 |]
+ | Pfdiv _ | Psdiv _ | Pudiv _ -> [| 1; 0; 0; 0 |]
+ | _ -> [| 1; 1; 1; 0 |])
+ | PBasic (PArith (PArithRR0R (_, _, _, _))) -> [| 1; 1; 0; 0 |]
+ | PBasic (PArith (PArithRR0 (_, _, _))) -> [| 1; 1; 0; 0 |]
+ | PBasic (PArith (PArithARRRR0 (_, _, _, _, _))) -> [| 1; 1; 1; 0 |]
+ | PBasic (PArith (PArithComparisonPP (_, _, _))) -> [| 1; 1; 1; 0 |]
+ | PBasic (PArith (PArithComparisonR0R (_, _, _))) -> [| 1; 1; 0; 0 |]
+ | PBasic (PArith (PArithComparisonP (i', _))) -> (
+ match i' with Pfcmp0 _ -> [| 1; 1; 1; 0 |] | _ -> [| 1; 1; 0; 0 |])
+ | PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> [| 1; 1; 1; 0 |]
+ | PBasic (PArith _) -> [| 1; 1; 0; 0 |]
+ | PBasic (PLoad _) -> [| 1; 0; 0; 1 |]
+ | PBasic (PStore _) -> [| 1; 0; 0; 1 |]
+ | PBasic (Pallocframe (_, _)) -> [| 1; 0; 0; 1 |]
+ | PBasic (Pfreeframe (_, _)) -> [| 1; 1; 0; 0 |]
+ | PBasic (Ploadsymbol (_, _)) -> [| 1; 1; 0; 0 |]
+ | PBasic (Pcvtsw2x (_, _)) -> [| 1; 1; 0; 0 |]
+ | PBasic (Pcvtuw2x (_, _)) -> [| 1; 1; 0; 0 |]
+ | PBasic (Pcvtx2w _) -> [| 1; 1; 0; 0 |]
+ | PBasic (Pnop) -> [| 0; 0; 0; 0 |]
+ | PControl _ -> resource_bounds
+
+ (*let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+ match op with
+ | Odiv | Odivu -> [| 29 |]
+ | Odivfs -> [| 20 |]
+ | Odivl | Odivlu | Odivf -> [| 50 |]
+ | _ -> [| -1 |];;
+
+ let resources_of_cond (cmp : condition) (nargs : int) =
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) ->
+ [| 1; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |] )*)
+
+end
+
+let get_opweights () : opweights =
+ (*match !Clflags.option_mtune with*)
+ (*| "cortex-a53" | "cortex-a35" | "" ->*)
+ {
+ pipelined_resource_bounds = Cortex_A53.resource_bounds;
+ nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+ latency_of_op = Cortex_A53.latency_of_op;
+ resources_of_op =
+ Cortex_A53.resources_of_op
+ (*non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;*)
+ (*latency_of_load = Cortex_A53.latency_of_load;*)
+ (*resources_of_load = Cortex_A53.resources_of_load;*)
+ (*resources_of_store = Cortex_A53.resources_of_store;*)
+ (*resources_of_cond = Cortex_A53.resources_of_cond;*)
+ (*latency_of_call = Cortex_A53.latency_of_call;*)
+ (*resources_of_call = Cortex_A53.resources_of_call;*)
+ (*resources_of_builtin = Cortex_A53.resources_of_builtin*);
+ }
+
+(*| xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;*)
diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml
new file mode 100644
index 00000000..1d890f2c
--- /dev/null
+++ b/aarch64/PeepholeOracle.ml
@@ -0,0 +1,624 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open Camlcoq
+open Asmblock
+open Asm
+open Int64
+open Printf
+
+(* If true, the oracle will print a msg for each applied peephole *)
+let debug = false
+
+(* Functions to verify the immediate offset range for ldp/stp *)
+let is_valid_immofs_32 z =
+ if z <= 252 && z >= -256 && z mod 4 = 0 then true else false
+
+let is_valid_immofs_64 z =
+ if z <= 504 && z >= -512 && z mod 8 = 0 then true else false
+
+(* Functions to check if a ldp/stp replacement is valid according to args *)
+let is_valid_ldr32 rd1 rd2 b1 b2 n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if
+ (not (dreg_eq rd1 rd2))
+ && iregsp_eq b1 b2
+ && (not (dreg_eq rd1 (IR b2)))
+ && (z2 = z1 + 4 || z2 = z1 - 4)
+ && is_valid_immofs_32 z1
+ then true
+ else false
+
+let is_valid_ldr64 rd1 rd2 b1 b2 n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if
+ (not (dreg_eq rd1 rd2))
+ && iregsp_eq b1 b2
+ && (not (dreg_eq rd1 (IR b2)))
+ && (z2 = z1 + 8 || z2 = z1 - 8)
+ && is_valid_immofs_64 z1
+ then true
+ else false
+
+let is_valid_str32 b1 b2 n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if iregsp_eq b1 b2 && z2 = z1 + 4 && is_valid_immofs_32 z1 then true
+ else false
+
+let is_valid_str64 b1 b2 n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if iregsp_eq b1 b2 && z2 = z1 + 8 && is_valid_immofs_64 z1 then true
+ else false
+
+let dreg_of_ireg r = IR (RR1 r)
+
+let dreg_of_freg r = FR r
+
+(* Return true if an intermediate
+ * affectation eliminates the potential
+ * candidate *)
+let verify_load_affect reg rd b rev =
+ let b = IR b in
+ if not rev then dreg_eq reg b else dreg_eq reg b || dreg_eq reg rd
+
+(* Return true if an intermediate
+ * read eliminates the potential
+ * candidate *)
+let verify_load_read reg rd b rev = dreg_eq reg rd
+
+(* Return true if an intermediate
+ * affectation eliminates the potential
+ * candidate *)
+let verify_store_affect reg rs b rev =
+ let b = IR b in
+ dreg_eq reg b || dreg_eq reg rs
+
+type ph_type = P32 | P32f | P64 | P64f
+
+type inst_type = Ldr of ph_type | Str of ph_type
+
+let ph_ty_to_string = function
+ | Ldr P32 -> "ldr32"
+ | Ldr P32f -> "ldr32f"
+ | Ldr P64 -> "ldr64"
+ | Ldr P64f -> "ldr64f"
+ | Str P32 -> "str32"
+ | Str P32f -> "str32f"
+ | Str P64 -> "str64"
+ | Str P64f -> "str64f"
+
+let print_ph_ty chan v = output_string chan (ph_ty_to_string v)
+
+let symb_mem = Hashtbl.create 9
+
+(* Affect a symbolic memory list of potential replacements
+ * for a given write in reg *)
+let rec affect_symb_mem reg insta pot_rep stype rev =
+ match pot_rep with
+ | [] -> []
+ | h0 :: t0 -> (
+ match (insta.(h0), stype) with
+ | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ ->
+ if verify_load_affect reg rd b rev then
+ affect_symb_mem reg insta t0 stype rev
+ else h0 :: affect_symb_mem reg insta t0 stype rev
+ | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ ->
+ if verify_store_affect reg rs b rev then
+ affect_symb_mem reg insta t0 stype rev
+ else h0 :: affect_symb_mem reg insta t0 stype rev
+ | _, _ ->
+ failwith "affect_symb_mem: Found an inconsistent inst in pot_rep")
+
+(* Affect a symbolic memory list of potential replacements
+ * for a given read in reg *)
+let rec read_symb_mem reg insta pot_rep stype rev =
+ match pot_rep with
+ | [] -> []
+ | h0 :: t0 -> (
+ match (insta.(h0), stype) with
+ | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ ->
+ if verify_load_read reg rd b rev then
+ read_symb_mem reg insta t0 stype rev
+ else h0 :: read_symb_mem reg insta t0 stype rev
+ | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ ->
+ h0 :: read_symb_mem reg insta t0 stype rev
+ | _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep")
+
+(* Update a symbolic memory list of potential replacements
+ * for any addressing *)
+let update_pot_rep_addressing a insta pot_rep stype rev =
+ match a with
+ | ADimm (base, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+ | ADreg (base, r) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+ | ADlsl (base, r, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+ | ADsxt (base, r, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+ | ADuxt (base, r, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+ | ADadr (base, _, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+ | ADpostincr (base, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+
+(* Update a symbolic memory list of potential replacements
+ * for any basic instruction *)
+let update_pot_rep_basic inst insta stype rev =
+ let pot_rep = Hashtbl.find symb_mem stype in
+ (match inst with
+ | PArith i -> (
+ match i with
+ | PArithP (_, rd) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev
+ | PArithPP (_, rd, rs) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs insta !pot_rep stype rev
+ | PArithPPP (_, rd, rs1, rs2) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
+ | PArithRR0R (_, rd, rs1, rs2) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ (match rs1 with
+ | RR0 rs1 ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ | _ -> ());
+ pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev
+ | PArithRR0 (_, rd, rs) -> (
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ match rs with
+ | RR0 rs1 ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ | _ -> ())
+ | PArithARRRR0 (_, rd, rs1, rs2, rs3) -> (
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev;
+ match rs3 with
+ | RR0 rs1 ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ | _ -> ())
+ | PArithComparisonPP (_, rs1, rs2) ->
+ pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
+ | PArithComparisonR0R (_, rs1, rs2) ->
+ (match rs1 with
+ | RR0 rs1 ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ | _ -> ());
+ pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev
+ | PArithComparisonP (_, rs1) ->
+ pot_rep := read_symb_mem rs1 insta !pot_rep stype rev
+ | Pcset (rd, _) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
+ | Pfmovi (_, rd, rs) -> (
+ pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev;
+ match rs with
+ | RR0 rs ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+ | _ -> ())
+ | Pcsel (rd, rs1, rs2, _) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
+ | Pfnmul (_, rd, rs1, rs2) ->
+ pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_freg rs1) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_freg rs2) insta !pot_rep stype rev)
+ | PLoad i -> (
+ (* Here, we consider a different behavior for load and store potential candidates:
+ * a load does not obviously cancel the ldp candidates, but it does for any stp candidate. *)
+ match stype with
+ | Ldr _ -> (
+ match i with
+ | PLd_rd_a (_, rd, a) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+ update_pot_rep_addressing a insta pot_rep stype rev
+ | Pldp (_, rd1, rd2, _, _, a) ->
+ pot_rep := affect_symb_mem rd1 insta !pot_rep stype rev;
+ pot_rep := affect_symb_mem rd2 insta !pot_rep stype rev;
+ update_pot_rep_addressing a insta pot_rep stype rev)
+ | _ -> pot_rep := [])
+ | PStore _ -> (
+ (* Here, we consider that a store cancel all ldp candidates, but it is far more complicated for stp ones :
+ * if we cancel stp candidates here, we would prevent ourselves to apply the non-consec store peephole.
+ * To solve this issue, the store candidates cleaning is managed directly in the peephole function below. *)
+ match stype with Ldr _ -> pot_rep := [] | _ -> ())
+ | Pallocframe (_, _) -> pot_rep := []
+ | Pfreeframe (_, _) -> pot_rep := []
+ | Ploadsymbol (rd, _) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
+ | Pcvtsw2x (rd, rs) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+ | Pcvtuw2x (rd, rs) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+ | Pcvtx2w rd ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
+ | Pnop -> ());
+ Hashtbl.replace symb_mem stype pot_rep
+
+(* This is useful to manage the case were the immofs
+ * of the first ldr/str is greater than the second one *)
+let min_is_rev n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if z1 < z2 then true else false
+
+(* Below functions were added to merge pattern matching cases in peephole,
+ * thanks to this, we can make the chunk difference (int/any) compatible. *)
+let trans_ldi (ldi : load_rd_a) : load_rd1_rd2_a =
+ match ldi with
+ | Pldrw | Pldrw_a -> Pldpw
+ | Pldrx | Pldrx_a -> Pldpx
+ | Pldrs -> Pldps
+ | Pldrd | Pldrd_a -> Pldpd
+ | _ -> failwith "trans_ldi: Found a non compatible load to translate"
+
+let trans_sti (sti : store_rs_a) : store_rs1_rs2_a =
+ match sti with
+ | Pstrw | Pstrw_a -> Pstpw
+ | Pstrx | Pstrx_a -> Pstpx
+ | Pstrs -> Pstps
+ | Pstrd | Pstrd_a -> Pstpd
+ | _ -> failwith "trans_sti: Found a non compatible store to translate"
+
+let is_compat_load (ldi : load_rd_a) =
+ match ldi with
+ | Pldrw | Pldrw_a | Pldrx | Pldrx_a | Pldrs | Pldrd | Pldrd_a -> true
+ | _ -> false
+
+let are_compat_load (ldi1 : load_rd_a) (ldi2 : load_rd_a) =
+ match ldi1 with
+ | Pldrw | Pldrw_a -> ( match ldi2 with Pldrw | Pldrw_a -> true | _ -> false)
+ | Pldrx | Pldrx_a -> ( match ldi2 with Pldrx | Pldrx_a -> true | _ -> false)
+ | Pldrs -> ( match ldi2 with Pldrs -> true | _ -> false)
+ | Pldrd | Pldrd_a -> ( match ldi2 with Pldrd | Pldrd_a -> true | _ -> false)
+ | _ -> false
+
+let is_compat_store (sti : store_rs_a) =
+ match sti with
+ | Pstrw | Pstrw_a | Pstrx | Pstrx_a | Pstrs | Pstrd | Pstrd_a -> true
+ | _ -> false
+
+let are_compat_store (sti1 : store_rs_a) (sti2 : store_rs_a) =
+ match sti1 with
+ | Pstrw | Pstrw_a -> ( match sti2 with Pstrw | Pstrw_a -> true | _ -> false)
+ | Pstrx | Pstrx_a -> ( match sti2 with Pstrx | Pstrx_a -> true | _ -> false)
+ | Pstrs -> ( match sti2 with Pstrs -> true | _ -> false)
+ | Pstrd | Pstrd_a -> ( match sti2 with Pstrd | Pstrd_a -> true | _ -> false)
+ | _ -> false
+
+let get_load_pht (ldi : load_rd_a) =
+ match ldi with
+ | Pldrw | Pldrw_a -> Ldr P32
+ | Pldrs -> Ldr P32f
+ | Pldrx | Pldrx_a -> Ldr P64
+ | Pldrd | Pldrd_a -> Ldr P64f
+ | _ -> failwith "get_load_string: Found a non compatible load to translate"
+
+let get_store_pht (sti : store_rs_a) =
+ match sti with
+ | Pstrw | Pstrw_a -> Str P32
+ | Pstrs -> Str P32f
+ | Pstrx | Pstrx_a -> Str P64
+ | Pstrd | Pstrd_a -> Str P64f
+ | _ -> failwith "get_store_string: Found a non compatible store to translate"
+
+let is_valid_ldr rd1 rd2 b1 b2 n1 n2 stype =
+ match stype with
+ | Ldr P32 | Ldr P32f -> is_valid_ldr32 rd1 rd2 b1 b2 n1 n2
+ | _ -> is_valid_ldr64 rd1 rd2 b1 b2 n1 n2
+
+let is_valid_str b1 b2 n1 n2 stype =
+ match stype with
+ | Str P32 | Str P32f -> is_valid_str32 b1 b2 n1 n2
+ | _ -> is_valid_str64 b1 b2 n1 n2
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory *)
+let rec search_compat_rep r2 b2 n2 insta pot_rep stype =
+ match pot_rep with
+ | [] -> None
+ | h0 :: t0 -> (
+ match insta.(h0) with
+ | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+ if is_valid_ldr rd1 r2 b1 b2 n1 n2 stype then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
+ else search_compat_rep r2 b2 n2 insta t0 stype
+ | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+ if is_valid_str b1 b2 n1 n2 stype then
+ Some (h0, chunk_store st1, rs1, b1, n1)
+ else search_compat_rep r2 b2 n2 insta t0 stype
+ | _ -> failwith "search_compat_rep: Found an inconsistent inst in pot_rep"
+ )
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory (when iterating in the reversed list) *)
+let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype =
+ match pot_rep with
+ | [] -> None
+ | h0 :: t0 -> (
+ match insta.(h0) with
+ | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+ if is_valid_ldr r2 rd1 b2 b1 n2 n1 stype then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
+ else search_compat_rep_inv r2 b2 n2 insta t0 stype
+ | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+ if is_valid_str b2 b1 n2 n1 stype then
+ Some (h0, chunk_store st1, rs1, b1, n1)
+ else search_compat_rep_inv r2 b2 n2 insta t0 stype
+ | _ ->
+ failwith
+ "search_compat_rep_ldst_inv: Found an inconsistent inst in pot_rep")
+
+let init_symb_mem () =
+ Hashtbl.clear symb_mem;
+ Hashtbl.add symb_mem (Ldr P32) (ref []);
+ Hashtbl.add symb_mem (Ldr P64) (ref []);
+ Hashtbl.add symb_mem (Ldr P32f) (ref []);
+ Hashtbl.add symb_mem (Ldr P64f) (ref []);
+ Hashtbl.add symb_mem (Str P32) (ref []);
+ Hashtbl.add symb_mem (Str P64) (ref []);
+ Hashtbl.add symb_mem (Str P32f) (ref []);
+ Hashtbl.add symb_mem (Str P64f) (ref [])
+
+let reset_str_symb_mem () =
+ Hashtbl.replace symb_mem (Str P32) (ref []);
+ Hashtbl.replace symb_mem (Str P64) (ref []);
+ Hashtbl.replace symb_mem (Str P32f) (ref []);
+ Hashtbl.replace symb_mem (Str P64f) (ref [])
+
+(* Main peephole function in backward style *)
+let pair_rep_inv insta =
+ (* Each list below is a symbolic mem representation
+ * for one type of inst. Lists contains integers which
+ * are the indices of insts in the main array "insta". *)
+ init_symb_mem ();
+ for i = Array.length insta - 1 downto 0 do
+ let h0 = insta.(i) in
+ (* Here we need to update every symbolic memory according to the matched inst *)
+ update_pot_rep_basic h0 insta (Ldr P32) true;
+ update_pot_rep_basic h0 insta (Ldr P64) true;
+ update_pot_rep_basic h0 insta (Ldr P32f) true;
+ update_pot_rep_basic h0 insta (Ldr P64f) true;
+ update_pot_rep_basic h0 insta (Str P32) true;
+ update_pot_rep_basic h0 insta (Str P64) true;
+ update_pot_rep_basic h0 insta (Str P32f) true;
+ update_pot_rep_basic h0 insta (Str P64f) true;
+ match h0 with
+ (* Non-consecutive ldr *)
+ | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))) ->
+ if is_compat_load ldi then (
+ (* Search a previous compatible load *)
+ let ld_t = get_load_pht ldi in
+ let pot_rep = Hashtbl.find symb_mem ld_t in
+ (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep ld_t with
+ (* If we can't find a candidate, add the current load as a potential future one *)
+ | None -> pot_rep := i :: !pot_rep
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if min_is_rev n n1 then (
+ if debug then
+ eprintf "LDP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
+ else (
+ if debug then
+ eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1)))));
+ Hashtbl.replace symb_mem ld_t pot_rep)
+ (* Non-consecutive str *)
+ | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))) ->
+ if is_compat_store sti then (
+ (* Search a previous compatible store *)
+ let st_t = get_store_pht sti in
+ let pot_rep = Hashtbl.find symb_mem st_t in
+ (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep st_t with
+ (* If we can't find a candidate, clean and add the current store as a potential future one *)
+ | None ->
+ reset_str_symb_mem ();
+ pot_rep := [ i ]
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if debug then
+ eprintf "STP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
+ insta.(i) <-
+ PStore
+ (Pstp
+ (trans_sti sti, rd1, r, chunk_store sti, c, ADimm (b, n1))));
+ Hashtbl.replace symb_mem st_t pot_rep
+ (* Any other inst *))
+ else reset_str_symb_mem ()
+ | i -> (
+ (* Clear list of candidates if there is a non supported store *)
+ match i with PStore _ -> reset_str_symb_mem () | _ -> ())
+ done
+
+(* Main peephole function in forward style *)
+let pair_rep insta =
+ (* Each list below is a symbolic mem representation
+ * for one type of inst. Lists contains integers which
+ * are the indices of insts in the main array "insta". *)
+ init_symb_mem ();
+ for i = 0 to Array.length insta - 2 do
+ let h0 = insta.(i) in
+ let h1 = insta.(i + 1) in
+ (* Here we need to update every symbolic memory according to the matched inst *)
+ update_pot_rep_basic h0 insta (Ldr P32) false;
+ update_pot_rep_basic h0 insta (Ldr P64) false;
+ update_pot_rep_basic h0 insta (Ldr P32f) false;
+ update_pot_rep_basic h0 insta (Ldr P64f) false;
+ update_pot_rep_basic h0 insta (Str P32) false;
+ update_pot_rep_basic h0 insta (Str P64) false;
+ update_pot_rep_basic h0 insta (Str P32f) false;
+ update_pot_rep_basic h0 insta (Str P64f) false;
+ match (h0, h1) with
+ (* Consecutive ldr *)
+ | ( PLoad (PLd_rd_a (ldi1, rd1, ADimm (b1, n1))),
+ PLoad (PLd_rd_a (ldi2, rd2, ADimm (b2, n2))) ) ->
+ if are_compat_load ldi1 ldi2 then
+ let ld_t = get_load_pht ldi1 in
+ if is_valid_ldr rd1 rd2 b1 b2 n1 n2 ld_t then (
+ if min_is_rev n1 n2 then (
+ if debug then
+ eprintf "LDP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ ( trans_ldi ldi1,
+ rd1,
+ rd2,
+ chunk_load ldi1,
+ chunk_load ldi2,
+ ADimm (b1, n1) )))
+ else (
+ if debug then
+ eprintf "LDP_CONSEC_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ ( trans_ldi ldi1,
+ rd2,
+ rd1,
+ chunk_load ldi2,
+ chunk_load ldi1,
+ ADimm (b1, n2) )));
+ insta.(i + 1) <- Pnop)
+ (* Non-consecutive ldr *)
+ | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ ->
+ if is_compat_load ldi then (
+ (* Search a previous compatible load *)
+ let ld_t = get_load_pht ldi in
+ let pot_rep = Hashtbl.find symb_mem ld_t in
+ (match search_compat_rep rd1 b1 n1 insta !pot_rep ld_t with
+ (* If we can't find a candidate, add the current load as a potential future one *)
+ | None -> pot_rep := i :: !pot_rep
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if min_is_rev n n1 then (
+ if debug then
+ eprintf "LDP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
+ else (
+ if debug then
+ eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1)))));
+ Hashtbl.replace symb_mem ld_t pot_rep)
+ (* Consecutive str *)
+ | ( PStore (PSt_rs_a (sti1, rd1, ADimm (b1, n1))),
+ PStore (PSt_rs_a (sti2, rd2, ADimm (b2, n2))) ) ->
+ (* Regardless of whether we can perform the peephole or not,
+ * we have to clean the potential candidates for stp now as we are
+ * looking at two new store instructions. *)
+ reset_str_symb_mem ();
+ if are_compat_store sti1 sti2 then
+ let st_t = get_store_pht sti1 in
+ if is_valid_str b1 b2 n1 n2 st_t then (
+ if debug then
+ eprintf "STP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
+ insta.(i) <-
+ PStore
+ (Pstp
+ ( trans_sti sti1,
+ rd1,
+ rd2,
+ chunk_store sti1,
+ chunk_store sti2,
+ ADimm (b1, n1) ));
+ insta.(i + 1) <- Pnop)
+ (* Non-consecutive str *)
+ | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ ->
+ if is_compat_store sti then (
+ (* Search a previous compatible store *)
+ let st_t = get_store_pht sti in
+ let pot_rep = Hashtbl.find symb_mem st_t in
+ (match search_compat_rep rd1 b1 n1 insta !pot_rep st_t with
+ (* If we can't find a candidate, clean and add the current store as a potential future one *)
+ | None ->
+ reset_str_symb_mem ();
+ pot_rep := [ i ]
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if debug then
+ eprintf "STP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
+ insta.(i) <-
+ PStore
+ (Pstp (trans_sti sti, r, rd1, c, chunk_store sti, ADimm (b, n))));
+ Hashtbl.replace symb_mem st_t pot_rep)
+ else reset_str_symb_mem ()
+ (* Any other inst *)
+ | i, _ -> (
+ (* Clear list of candidates if there is a non supported store *)
+ match i with PStore _ -> reset_str_symb_mem () | _ -> ())
+ done
+
+(* Calling peephole if flag is set *)
+let optimize_bdy (bdy : basic list) : basic list =
+ if !Clflags.option_fcoalesce_mem then (
+ let insta = Array.of_list bdy in
+ pair_rep insta;
+ pair_rep_inv insta;
+ Array.to_list insta)
+ else bdy
+
+(* Called peephole function from Coq *)
+let peephole_opt bdy =
+ Timing.time_coq
+ [
+ 'P'; 'e'; 'e'; 'p'; 'h'; 'o'; 'l'; 'e'; ' '; 'o'; 'r'; 'a'; 'c'; 'l'; 'e';
+ ]
+ optimize_bdy bdy
diff --git a/aarch64/PostpassScheduling.v b/aarch64/PostpassScheduling.v
new file mode 100644
index 00000000..f826632b
--- /dev/null
+++ b/aarch64/PostpassScheduling.v
@@ -0,0 +1,146 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Implementation (and basic properties) of the verified postpass scheduler *)
+
+Require Import Coqlib Errors AST Integers.
+Require Import Asmblock Axioms Memory Globalenvs.
+Require Import Asmblockdeps Asmblockprops.
+Require Import IterList.
+
+Local Open Scope error_monad_scope.
+
+(** * Oracle taking as input a basic block,
+ returns a scheduled basic block *)
+Axiom schedule: bblock -> (list basic) * option control.
+
+Axiom peephole_opt: (list basic) -> list basic.
+
+Extract Constant schedule => "PostpassSchedulingOracle.schedule".
+
+Extract Constant peephole_opt => "PeepholeOracle.peephole_opt".
+
+Section verify_schedule.
+
+Variable lk: aarch64_linker.
+
+Definition verify_schedule (bb bb' : bblock) : res unit :=
+ match bblock_simub bb bb' with
+ | true => OK tt
+ | false => Error (msg "PostpassScheduling.verify_schedule")
+ end.
+
+Definition verify_size bb bb' := if (Z.eqb (size bb) (size bb')) then OK tt else Error (msg "PostpassScheduling:verify_size: wrong size").
+
+Lemma verify_size_size:
+ forall bb bb', verify_size bb bb' = OK tt -> size bb = size bb'.
+Proof.
+ intros. unfold verify_size in H. destruct (size bb =? size bb') eqn:SIZE; try discriminate.
+ apply Z.eqb_eq. assumption.
+Qed.
+
+Program Definition make_bblock_from_basics lb :=
+ match lb with
+ | nil => Error (msg "PostpassScheduling.make_bblock_from_basics")
+ | b :: lb => OK {| header := nil; body := b::lb; exit := None |}
+ end.
+
+Program Definition schedule_to_bblock (lb: list basic) (oc: option control) : res bblock :=
+ match oc with
+ | None => make_bblock_from_basics lb
+ | Some c => OK {| header := nil; body := lb; exit := Some c |}
+ end.
+Next Obligation.
+ unfold Is_true, non_empty_bblockb.
+ unfold non_empty_exit. rewrite orb_true_r. reflexivity.
+Qed.
+
+Definition do_schedule (bb: bblock) : res bblock :=
+ if (Z.eqb (size bb) 1) then OK (bb)
+ else match (schedule bb) with (lb, oc) => schedule_to_bblock lb oc end.
+
+(*Definition do_peephole (bb: bblock) : bblock :=*)
+ (*let res := Peephole.optimize_bblock bb in*)
+ (*if (size res =? size bb) then res else bb.*)
+
+Program Definition do_peephole (bb : bblock) :=
+ let optimized := peephole_opt (body bb) in
+ let wf_ok := non_empty_bblockb optimized (exit bb) in
+ {| header := header bb;
+ body := if wf_ok then optimized else (body bb);
+ exit := exit bb |}.
+Next Obligation.
+ destruct (non_empty_bblockb (peephole_opt (body bb))) eqn:Rwf.
+ - rewrite Rwf. cbn. trivial.
+ - exact (correct bb).
+Qed.
+
+Definition verified_schedule (bb : bblock) : res bblock :=
+ let nhbb := no_header bb in
+ let phbb := do_peephole nhbb in
+ do schbb <- do_schedule phbb;
+ let bb' := stick_header (header bb) schbb in
+ do sizecheck <- verify_size bb bb';
+ do schedcheck <- verify_schedule bb bb';
+ OK (bb').
+
+Lemma verified_schedule_size:
+ forall bb bb', verified_schedule bb = OK bb' -> size bb = size bb'.
+Proof.
+ intros. unfold verified_schedule in H.
+ monadInv H.
+ unfold verify_size in EQ1.
+ destruct (size _ =? size _) eqn:ESIZE_H in EQ1; try discriminate.
+ rewrite Z.eqb_eq in ESIZE_H; rewrite ESIZE_H; reflexivity.
+Qed.
+
+Theorem verified_schedule_correct:
+ forall ge f bb bb',
+ verified_schedule bb = OK bb' ->
+ bblock_simu lk ge f bb bb'.
+Proof.
+ intros.
+ monadInv H.
+ eapply bblock_simub_correct.
+ unfold verify_schedule in EQ0.
+ destruct (bblock_simub _ _) in *; try discriminate; auto.
+Qed.
+
+End verify_schedule.
+
+Fixpoint transf_blocks (lbb : list bblock) : res (list bblock) :=
+ match lbb with
+ | nil => OK nil
+ | bb :: lbb =>
+ do tlbb <- transf_blocks lbb;
+ do tbb <- verified_schedule bb;
+ OK (tbb :: tlbb)
+ end.
+
+Definition transl_function (f: function) : res function :=
+ do lb <- transf_blocks (fn_blocks f);
+ OK (mkfunction (fn_sig f) lb).
+
+Definition transf_function (f: function) : res function :=
+ do tf <- transl_function f;
+ if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks))
+ then Error (msg "code size exceeded")
+ else OK tf.
+
+Definition transf_fundef (f: fundef) : res fundef :=
+ transf_partial_fundef transf_function f.
+
+Definition transf_program (p: program) : res program :=
+ transform_partial_program transf_fundef p.
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
new file mode 100644
index 00000000..6f784238
--- /dev/null
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -0,0 +1,674 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open Asmblock
+open OpWeightsAsm
+open InstructionScheduler
+
+let debug = false
+
+let stats = false
+
+(**
+ * Extracting infos from Asm instructions
+ *)
+
+type location = Reg of Asm.preg | Mem | IREG0_XZR
+
+type ab_inst_rec = {
+ inst : instruction;
+ write_locs : location list;
+ read_locs : location list;
+ is_control : bool;
+}
+
+(** Asm constructor to real instructions *)
+
+exception OpaqueInstruction
+
+let is_XZR = function IREG0_XZR -> true | _ -> false
+
+let reg_of_pc = Reg Asm.PC
+
+let reg_of_dreg r = Reg (Asm.DR r)
+
+let reg_of_ireg r = Reg (Asm.DR (Asm.IR (Asm.RR1 r)))
+
+let reg_of_iregsp r = Reg (Asm.DR (Asm.IR r))
+
+let reg_of_ireg0 r =
+ match r with Asm.RR0 ir -> reg_of_ireg ir | Asm.XZR -> IREG0_XZR
+
+let reg_of_freg r = Reg (Asm.DR (Asm.FR r))
+
+let reg_of_cr r = Reg (Asm.CR r)
+
+let regXSP = Reg (Asm.DR (Asm.IR Asm.XSP))
+
+let flags_wlocs =
+ [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CC; reg_of_cr Asm.CV ]
+
+let get_arith_p_wlocs = function
+ | Pfmovimms _ -> [ reg_of_ireg Asm.X16 ]
+ | Pfmovimmd _ -> [ reg_of_ireg Asm.X16 ]
+ | _ -> []
+
+let arith_p_rec i i' rd =
+ {
+ inst = i;
+ write_locs = [ rd ] @ get_arith_p_wlocs i';
+ read_locs = [];
+ is_control = false;
+ }
+
+let arith_pp_rec i rd rs =
+ { inst = i; write_locs = [ rd ]; read_locs = [ rs ]; is_control = false }
+
+let arith_ppp_rec i rd r1 r2 =
+ { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false }
+
+let arith_rr0r_rec i rd r1 r2 =
+ let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in
+ { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
+
+let arith_rr0_rec i rd r1 =
+ let rlocs = if is_XZR r1 then [] else [ r1 ] in
+ { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
+
+let arith_arrrr0_rec i rd r1 r2 r3 =
+ let rlocs = if is_XZR r3 then [ r1; r2 ] else [ r1; r2; r3 ] in
+ { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
+
+let arith_comparison_pp_rec i r1 r2 =
+ {
+ inst = i;
+ write_locs = flags_wlocs;
+ read_locs = [ r1; r2 ];
+ is_control = false;
+ }
+
+let arith_comparison_r0r_rec i r1 r2 =
+ let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in
+ { inst = i; write_locs = flags_wlocs; read_locs = rlocs; is_control = false }
+
+let arith_comparison_p_rec i r1 =
+ { inst = i; write_locs = flags_wlocs; read_locs = [ r1 ]; is_control = false }
+
+let get_eval_addressing_rlocs a =
+ match a with
+ | Asm.ADimm (base, _) -> [ reg_of_iregsp base ]
+ | Asm.ADreg (base, r) -> [ reg_of_iregsp base; reg_of_ireg r ]
+ | Asm.ADlsl (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ]
+ | Asm.ADsxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ]
+ | Asm.ADuxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ]
+ | Asm.ADadr (base, _, _) -> [ reg_of_iregsp base ]
+ | Asm.ADpostincr (base, _) -> [ reg_of_iregsp base ]
+
+let load_rd_a_rec ld rd a =
+ {
+ inst = ld;
+ write_locs = [ rd ];
+ read_locs = [ Mem ] @ get_eval_addressing_rlocs a;
+ is_control = false;
+ }
+
+let load_rd1_rd2_a_rec ld rd1 rd2 a =
+ {
+ inst = ld;
+ write_locs = [ rd1; rd2 ];
+ read_locs = [ Mem ] @ get_eval_addressing_rlocs a;
+ is_control = false;
+ }
+
+let load_rec ldi =
+ match ldi with
+ | PLd_rd_a (ld, rd, a) ->
+ load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a
+ | Pldp (ld, rd1, rd2, _, _, a) ->
+ load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd1)
+ (reg_of_dreg rd2) a
+
+let store_rs_a_rec st rs a =
+ {
+ inst = st;
+ write_locs = [ Mem ];
+ read_locs = [ rs; Mem ] @ get_eval_addressing_rlocs a;
+ is_control = false;
+ }
+
+let store_rs1_rs2_a_rec st rs1 rs2 a =
+ {
+ inst = st;
+ write_locs = [ Mem ];
+ read_locs = [ rs1; rs2; Mem ] @ get_eval_addressing_rlocs a;
+ is_control = false;
+ }
+
+let store_rec sti =
+ match sti with
+ | PSt_rs_a (st, rs, a) ->
+ store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a
+ | Pstp (st, rs1, rs2, _, _, a) ->
+ store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_dreg rs1)
+ (reg_of_dreg rs2) a
+
+let loadsymbol_rec i rd id =
+ { inst = i; write_locs = [ rd ]; read_locs = [ Mem ]; is_control = false }
+
+let cvtsw2x_rec i rd r1 =
+ { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false }
+
+let cvtuw2x_rec i rd r1 =
+ { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false }
+
+let cvtx2w_rec i rd =
+ { inst = i; write_locs = [ rd ]; read_locs = [ rd ]; is_control = false }
+
+let get_testcond_rlocs c =
+ match c with
+ | Asm.TCeq -> [ reg_of_cr Asm.CZ ]
+ | Asm.TCne -> [ reg_of_cr Asm.CZ ]
+ | Asm.TChs -> [ reg_of_cr Asm.CC ]
+ | Asm.TClo -> [ reg_of_cr Asm.CC ]
+ | Asm.TCmi -> [ reg_of_cr Asm.CN ]
+ | Asm.TCpl -> [ reg_of_cr Asm.CN ]
+ | Asm.TChi -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ]
+ | Asm.TCls -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ]
+ | Asm.TCge -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ]
+ | Asm.TClt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ]
+ | Asm.TCgt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ]
+ | Asm.TCle -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ]
+
+let cset_rec i rd c =
+ {
+ inst = i;
+ write_locs = [ rd ];
+ read_locs = get_testcond_rlocs c;
+ is_control = false;
+ }
+
+let csel_rec i rd r1 r2 c =
+ {
+ inst = i;
+ write_locs = [ rd ];
+ read_locs = [ r1; r2 ] @ get_testcond_rlocs c;
+ is_control = false;
+ }
+
+let fmovi_rec i fsz rd r1 =
+ let rlocs = if is_XZR r1 then [] else [ r1 ] in
+ { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
+
+let fnmul_rec i fsz rd r1 r2 =
+ { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false }
+
+let allocframe_rec i sz linkofs =
+ {
+ inst = i;
+ write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16; reg_of_ireg Asm.X29 ];
+ read_locs = [ regXSP; Mem ];
+ is_control = false;
+ }
+
+let freeframe_rec i sz linkofs =
+ {
+ inst = i;
+ write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16 ];
+ read_locs = [ regXSP; Mem ];
+ is_control = false;
+ }
+
+let nop_rec i =
+ { inst = i; write_locs = []; read_locs = []; is_control = false }
+
+let arith_rec i =
+ match i with
+ | PArithP (i', rd) -> arith_p_rec (PBasic (PArith i)) i' (reg_of_dreg rd)
+ | PArithPP (i', rd, rs) ->
+ arith_pp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg rs)
+ | PArithPPP (i', rd, r1, r2) ->
+ arith_ppp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1)
+ (reg_of_dreg r2)
+ | PArithRR0R (i', rd, r1, r2) ->
+ arith_rr0r_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1)
+ (reg_of_ireg r2)
+ | PArithRR0 (i', rd, r1) ->
+ arith_rr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1)
+ | PArithARRRR0 (i', rd, r1, r2, r3) ->
+ arith_arrrr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg r1)
+ (reg_of_ireg r2) (reg_of_ireg0 r3)
+ | PArithComparisonPP (i', r1, r2) ->
+ arith_comparison_pp_rec (PBasic (PArith i)) (reg_of_dreg r1)
+ (reg_of_dreg r2)
+ | PArithComparisonR0R (i', r1, r2) ->
+ arith_comparison_r0r_rec (PBasic (PArith i)) (reg_of_ireg0 r1)
+ (reg_of_ireg r2)
+ | PArithComparisonP (i', r1) ->
+ arith_comparison_p_rec (PBasic (PArith i)) (reg_of_dreg r1)
+ | Pcset (rd, c) -> cset_rec (PBasic (PArith i)) (reg_of_ireg rd) c
+ | Pfmovi (fsz, rd, r1) ->
+ fmovi_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_ireg0 r1)
+ | Pcsel (rd, r1, r2, c) ->
+ csel_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1)
+ (reg_of_dreg r2) c
+ | Pfnmul (fsz, rd, r1, r2) ->
+ fnmul_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_freg r1)
+ (reg_of_freg r2)
+
+let basic_rec i =
+ match i with
+ | PArith i' -> arith_rec i'
+ | PLoad ld -> load_rec ld
+ | PStore st -> store_rec st
+ | Pallocframe (sz, linkofs) -> allocframe_rec (PBasic i) sz linkofs
+ | Pfreeframe (sz, linkofs) -> freeframe_rec (PBasic i) sz linkofs
+ | Ploadsymbol (rd, id) -> loadsymbol_rec (PBasic i) (reg_of_ireg rd) id
+ | Pcvtsw2x (rd, r1) ->
+ cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1)
+ | Pcvtuw2x (rd, r1) ->
+ cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1)
+ | Pcvtx2w rd -> cvtx2w_rec (PBasic i) (reg_of_ireg rd)
+ | Pnop -> nop_rec (PBasic i)
+
+let builtin_rec i ef args res =
+ { inst = i; write_locs = [ Mem ]; read_locs = [ Mem ]; is_control = true }
+
+let ctl_flow_rec i =
+ match i with
+ | Pb lbl ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_pc ];
+ read_locs = [ reg_of_pc ];
+ is_control = true;
+ }
+ | Pbc (c, lbl) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_pc ];
+ read_locs = [ reg_of_pc ];
+ is_control = true;
+ }
+ | Pbl (id, sg) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ];
+ read_locs = [ reg_of_pc ];
+ is_control = true;
+ }
+ | Pbs (id, sg) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_pc ];
+ read_locs = [];
+ is_control = true;
+ }
+ | Pblr (r, sg) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ];
+ read_locs = [ reg_of_ireg r ];
+ is_control = true;
+ }
+ | Pbr (r, sg) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_pc ];
+ read_locs = [ reg_of_ireg r ];
+ is_control = true;
+ }
+ | Pret r ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_pc ];
+ read_locs = [ reg_of_ireg r ];
+ is_control = true;
+ }
+ | Pcbnz (sz, r, lbl) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_pc ];
+ read_locs = [ reg_of_ireg r; reg_of_pc ];
+ is_control = true;
+ }
+ | Pcbz (sz, r, lbl) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_pc ];
+ read_locs = [ reg_of_ireg r; reg_of_pc ];
+ is_control = true;
+ }
+ | Ptbnz (sz, r, n, lbl) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_pc ];
+ read_locs = [ reg_of_ireg r; reg_of_pc ];
+ is_control = true;
+ }
+ | Ptbz (sz, r, n, lbl) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_pc ];
+ read_locs = [ reg_of_ireg r; reg_of_pc ];
+ is_control = true;
+ }
+ | Pbtbl (r1, tbl) ->
+ {
+ inst = PControl (PCtlFlow i);
+ write_locs = [ reg_of_ireg Asm.X16; reg_of_ireg Asm.X17; reg_of_pc ];
+ read_locs = [ reg_of_ireg r1; reg_of_pc ];
+ is_control = true;
+ }
+
+let control_rec i =
+ match i with
+ | Pbuiltin (ef, args, res) -> builtin_rec (PControl i) ef args res
+ | PCtlFlow i' -> ctl_flow_rec i'
+
+let rec basic_recs body =
+ match body with [] -> [] | bi :: body -> basic_rec bi :: basic_recs body
+
+let exit_rec exit = match exit with None -> [] | Some ex -> [ control_rec ex ]
+
+let instruction_recs bb = basic_recs bb.body @ exit_rec bb.exit
+
+(**
+ * Providing informations relative to the real instructions
+ *)
+
+type inst_info = {
+ write_locs : location list;
+ read_locs : location list;
+ is_control : bool;
+ (* resources consumed by the instruction *)
+ usage : int array;
+ latency : int;
+}
+
+(** Abstraction providing all the necessary informations for solving the scheduling problem *)
+
+let rec_to_info (r : ab_inst_rec) : inst_info =
+ let opweights = OpWeightsAsm.get_opweights () in
+ let usage = opweights.resources_of_op r.inst 0
+ and latency = opweights.latency_of_op r.inst 0 in
+ {
+ write_locs = r.write_locs;
+ read_locs = r.read_locs;
+ usage;
+ latency;
+ is_control = r.is_control;
+ }
+
+let instruction_infos bb = List.map rec_to_info (instruction_recs bb)
+
+let instruction_usages bb =
+ let usages = List.map (fun info -> info.usage) (instruction_infos bb) in
+ Array.of_list usages
+
+(**
+ * Latency constraints building
+ *)
+
+module LocHash = Hashtbl
+
+(* Hash table : location => list of instruction ids *)
+
+let rec intlist n =
+ if n < 0 then failwith "intlist: n < 0"
+ else if n = 0 then []
+ else (n - 1) :: intlist (n - 1)
+
+let find_in_hash hashloc loc =
+ match LocHash.find_opt hashloc loc with Some idl -> idl | None -> []
+
+(* Returns a list of instruction ids *)
+let rec get_accesses hashloc (ll : location list) =
+ match ll with
+ | [] -> []
+ | loc :: llocs -> find_in_hash hashloc loc @ get_accesses hashloc llocs
+
+let compute_latency (ifrom : inst_info) = ifrom.latency
+
+let latency_constraints bb =
+ let written = LocHash.create 70
+ and read = LocHash.create 70
+ and count = ref 0
+ and constraints = ref []
+ and instr_infos = instruction_infos bb in
+ let step (i : inst_info) =
+ let raw = get_accesses written i.read_locs
+ and waw = get_accesses written i.write_locs
+ and war = get_accesses read i.write_locs in
+ List.iter
+ (fun i ->
+ constraints :=
+ {
+ instr_from = i;
+ instr_to = !count;
+ latency = compute_latency (List.nth instr_infos i);
+ }
+ :: !constraints)
+ raw;
+ List.iter
+ (fun i ->
+ constraints :=
+ {
+ instr_from = i;
+ instr_to = !count;
+ latency = compute_latency (List.nth instr_infos i);
+ }
+ :: !constraints)
+ waw;
+ List.iter
+ (fun i ->
+ constraints :=
+ { instr_from = i; instr_to = !count; latency = 0 } :: !constraints)
+ war;
+ if i.is_control then
+ List.iter
+ (fun n ->
+ constraints :=
+ { instr_from = n; instr_to = !count; latency = 0 } :: !constraints)
+ (intlist !count);
+ (* Updating "read" and "written" hashmaps *)
+ List.iter
+ (fun loc ->
+ LocHash.replace written loc [ !count ];
+ LocHash.replace read loc []
+ (* Clearing all the entries of "read" hashmap when a register is written *))
+ i.write_locs;
+ List.iter
+ (fun loc -> LocHash.replace read loc (!count :: find_in_hash read loc))
+ i.read_locs;
+ count := !count + 1
+ in
+ List.iter step instr_infos;
+ !constraints
+
+(**
+ * Using the InstructionScheduler
+ *)
+
+let opweights = OpWeightsAsm.get_opweights ()
+
+let build_problem bb =
+ {
+ max_latency = -1;
+ resource_bounds = opweights.pipelined_resource_bounds;
+ live_regs_entry = Registers.Regset.empty; (* unused here *)
+ typing = (fun x -> AST.Tint); (* unused here *)
+ reference_counting = None;
+ instruction_usages = instruction_usages bb;
+ latency_constraints = latency_constraints bb;
+ }
+
+let get_from_indexes indexes l = List.map (List.nth l) indexes
+
+(*let is_basic = function PBasic _ -> true | _ -> false*)
+let is_control = function PControl _ -> true | _ -> false
+
+let to_control = function
+ | PControl i -> i
+ | _ -> failwith "to_control: basic instruction found"
+
+let rec body_to_instrs bdy =
+ match bdy with [] -> [] | i :: l' -> PBasic i :: body_to_instrs l'
+
+let rec instrs_to_bdy instrs =
+ match instrs with
+ | [] -> []
+ | PBasic i :: l' -> i :: instrs_to_bdy l'
+ | PControl _ :: l' -> failwith "instrs_to_bdy: control instruction found"
+
+let repack li hd =
+ let last = List.nth li (List.length li - 1) in
+ if is_control last then
+ let cut_li =
+ Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1)
+ in
+ { header = hd; body = instrs_to_bdy cut_li; exit = Some (to_control last) }
+ else { header = hd; body = instrs_to_bdy li; exit = None }
+
+module TimeHash = Hashtbl
+
+(*Hash table : time => list of instruction ids *)
+
+(* Flattening the minpack result *)
+let hashtbl2flatarray h maxint =
+ let rec f i =
+ match TimeHash.find_opt h i with
+ | None -> if i > maxint then [] else f (i + 1)
+ | Some bund -> bund @ f (i + 1)
+ in
+ f 0
+
+let find_max l =
+ let rec f = function
+ | [] -> None
+ | e :: l -> (
+ match f l with
+ | None -> Some e
+ | Some m -> if e > m then Some e else Some m)
+ in
+ match f l with None -> raise Not_found | Some m -> m
+
+(* We still use the minpack algorithm even without bundles, but the result is then flattened *)
+(*(* [0, 2, 3, 1, 1, 2, 4, 5] -> [[0], [3, 4], [1, 5], [2], [6], [7]] *)*)
+let minpack_list (l : int list) =
+ let timehash = TimeHash.create (List.length l) in
+ let rec f i = function
+ | [] -> ()
+ | t :: l ->
+ (match TimeHash.find_opt timehash t with
+ | None -> TimeHash.add timehash t [ i ]
+ | Some bund -> TimeHash.replace timehash t (bund @ [ i ]));
+ f (i + 1) l
+ in
+ f 0 l;
+ hashtbl2flatarray timehash (find_max l)
+
+let bb_to_instrs bb =
+ body_to_instrs bb.body
+ @ match bb.exit with None -> [] | Some e -> [ PControl e ]
+
+let build_solution bb sol =
+ (* Remove last element - the total *)
+ let tmp = Array.to_list @@ Array.sub sol 0 (Array.length sol - 1) in
+ let pack = minpack_list tmp and instrs = bb_to_instrs bb in
+ repack (get_from_indexes pack instrs) bb.header
+
+let print_schedule sched =
+ print_string "[ ";
+ Array.iter (fun x -> Printf.printf "%d; " x) sched;
+ print_endline "]"
+
+let do_schedule bb =
+ let problem = build_problem bb in
+ if debug then print_problem stdout problem;
+ let solution = scheduler_by_name !Clflags.option_fpostpass_sched problem in
+ match solution with
+ | None -> failwith "Could not find a valid schedule"
+ | Some sol ->
+ if debug then print_schedule sol;
+ build_solution bb sol
+
+(**
+ * Dumb schedule if the above doesn't work
+ *)
+
+(* Pack result *)
+let pack_result (bb : bblock) = (bb.body, bb.exit)
+
+let smart_schedule bb =
+ let bb' =
+ try do_schedule bb with
+ | OpaqueInstruction ->
+ if debug then
+ Printf.eprintf "OpaqueInstruction raised, using identity scheduling\n";
+ bb (* Identity in case of failure *)
+ | e ->
+ let msg = Printexc.to_string e and stack = Printexc.get_backtrace () in
+ Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg
+ stack;
+ failwith "Invalid schedule"
+ in
+ pack_result bb'
+
+let bblock_schedule bb =
+ let identity_mode = not !Clflags.option_fpostpass in
+ if debug && not identity_mode then (
+ Printf.eprintf "###############################\n";
+ Printf.eprintf "SCHEDULING\n");
+ if stats then (
+ let oc =
+ open_out_gen [ Open_append; Open_creat ] 0o666 "oracle_stats.csv"
+ in
+ Printf.fprintf oc "%d\n" (Camlcoq.Z.to_int (size bb));
+ close_out oc);
+ if identity_mode then pack_result bb else smart_schedule bb
+
+(** Called schedule function from Coq *)
+
+(*let schedule_notime bb = let toto = bblock_schedule in toto*)
+let schedule bb =
+ Timing.time_coq
+ [
+ 'P';
+ 'o';
+ 's';
+ 't';
+ 'p';
+ 'a';
+ 's';
+ 's';
+ 'S';
+ 'c';
+ 'h';
+ 'e';
+ 'd';
+ 'u';
+ 'l';
+ 'i';
+ 'n';
+ 'g';
+ ' ';
+ 'o';
+ 'r';
+ 'a';
+ 'c';
+ 'l';
+ 'e';
+ ]
+ bblock_schedule bb
diff --git a/aarch64/PostpassSchedulingproof.v b/aarch64/PostpassSchedulingproof.v
new file mode 100644
index 00000000..a5084b5f
--- /dev/null
+++ b/aarch64/PostpassSchedulingproof.v
@@ -0,0 +1,482 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Machblock Conventions Asmblock.
+Require Import Asmblockprops.
+Require Import PostpassScheduling.
+Require Import Asmblockgenproof.
+Require Import Axioms.
+Require Import Lia.
+
+Local Open Scope error_monad_scope.
+
+Definition match_prog (p tp: program) :=
+ match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+ forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+ intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Section PRESERVATION_ASMBLOCK.
+
+Variables prog tprog: program.
+Variable lk: aarch64_linker.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma symbols_preserved:
+ forall id,
+ Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_match TRANSL).
+
+Lemma senv_preserved:
+ Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSL).
+
+Lemma function_ptr_translated:
+ forall v f,
+ Genv.find_funct_ptr ge v = Some f ->
+ exists tf,
+ Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial TRANSL).
+
+Inductive match_states: state -> state -> Prop :=
+ | match_states_intro:
+ forall s1 s2, s1 = s2 -> match_states s1 s2.
+
+Lemma prog_main_preserved:
+ prog_main tprog = prog_main prog.
+Proof (match_program_main TRANSL).
+
+Lemma prog_main_address_preserved:
+ (Genv.symbol_address (Genv.globalenv prog) (prog_main prog) Ptrofs.zero) =
+ (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero).
+Proof.
+ unfold Genv.symbol_address. rewrite symbols_preserved.
+ rewrite prog_main_preserved. auto.
+Qed.
+
+Lemma transf_initial_states:
+ forall st1, initial_state prog st1 ->
+ exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+ intros. inv H.
+ econstructor; split.
+ - eapply initial_state_intro.
+ eapply (Genv.init_mem_transf_partial TRANSL); eauto.
+ - econstructor; eauto. subst ge0. subst rs0. rewrite prog_main_address_preserved. auto.
+Qed.
+
+Lemma transf_final_states:
+ forall st1 st2 r,
+ match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+ intros. inv H0. inv H. econstructor; eauto.
+Qed.
+
+Lemma transf_find_bblock:
+ forall ofs f bb tf,
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb ->
+ transf_function f = OK tf ->
+ exists tbb,
+ verified_schedule bb = OK tbb
+ /\ find_bblock (Ptrofs.unsigned ofs) (fn_blocks tf) = Some tbb.
+Proof.
+ intros.
+ monadInv H0. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); try (inv EQ0; fail). inv EQ0.
+ monadInv EQ. simpl in *.
+ generalize (Ptrofs.unsigned ofs) H x EQ0; clear ofs H x g EQ0.
+ induction (fn_blocks f).
+ - intros. simpl in *. discriminate.
+ - intros. simpl in *.
+ monadInv EQ0. simpl.
+ destruct (zlt z 0); try discriminate.
+ destruct (zeq z 0).
+ + inv H. eauto.
+ + monadInv EQ0.
+ exploit IHb; eauto.
+ intros (tbb & SCH & FIND).
+ eexists; split; eauto.
+ inv FIND.
+ unfold verify_size in EQ0.
+ destruct (size a =? size (stick_header (header a) x)) eqn:EQSIZE; try discriminate.
+ rewrite Z.eqb_eq in EQSIZE; rewrite EQSIZE.
+ reflexivity.
+Qed.
+
+Lemma stick_header_neutral: forall a,
+ a = (stick_header (header a) (no_header a)).
+Proof.
+ intros.
+ unfold stick_header. unfold Asmblock.stick_header_obligation_1. simpl. destruct a.
+ simpl. reflexivity.
+Qed.
+
+Lemma symbol_address_preserved:
+ forall l ofs, Genv.symbol_address ge l ofs = Genv.symbol_address tge l ofs.
+Proof.
+ intros. unfold Genv.symbol_address. repeat (rewrite symbols_preserved). reflexivity.
+Qed.
+
+Lemma verified_schedule_label:
+ forall bb tbb l,
+ verified_schedule bb = OK (tbb) ->
+ is_label l bb = is_label l tbb.
+Proof.
+ intros.
+ unfold is_label.
+ monadInv H. simpl. auto.
+Qed.
+
+Remark label_pos_pvar_none_add:
+ forall tc l p p' k,
+ label_pos l (p+k) tc = None -> label_pos l (p'+k) tc = None.
+Proof.
+ induction tc.
+ - intros. simpl. auto.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+ + discriminate.
+ + pose (IHtc l p p' (k + size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar_none:
+ forall tc l p p',
+ label_pos l p tc = None -> label_pos l p' tc = None.
+Proof.
+ intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+ eapply label_pos_pvar_none_add; eauto.
+Qed.
+
+Remark label_pos_pvar_some_add_add:
+ forall tc l p p' k k',
+ label_pos l (p+k') tc = Some (p+k) -> label_pos l (p'+k') tc = Some (p'+k).
+Proof.
+ induction tc.
+ - intros. simpl in H. discriminate.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+ + inv H. assert (k = k') by lia. subst. reflexivity.
+ + pose (IHtc l p p' k (k' + size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar_some_add:
+ forall tc l p p' k,
+ label_pos l p tc = Some (p+k) -> label_pos l p' tc = Some (p'+k).
+Proof.
+ intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+ eapply label_pos_pvar_some_add_add; eauto.
+Qed.
+
+Remark label_pos_pvar_add:
+ forall c tc l p p' k,
+ label_pos l (p+k) c = label_pos l p tc ->
+ label_pos l (p'+k) c = label_pos l p' tc.
+Proof.
+ induction c.
+ - intros. simpl in *.
+ exploit label_pos_pvar_none; eauto.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+ + exploit label_pos_pvar_some_add; eauto.
+ + pose (IHc tc l p p' (k+size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar:
+ forall c tc l p p',
+ label_pos l p c = label_pos l p tc ->
+ label_pos l p' c = label_pos l p' tc.
+Proof.
+ intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+ eapply label_pos_pvar_add; eauto.
+Qed.
+
+Lemma label_pos_head_cons:
+ forall c bb tbb l tc p,
+ verified_schedule bb = OK tbb ->
+ label_pos l p c = label_pos l p tc ->
+ label_pos l p (bb :: c) = label_pos l p (tbb :: tc).
+Proof.
+ intros. simpl.
+ exploit verified_schedule_label; eauto. intros ISLBL.
+ rewrite ISLBL.
+ destruct (is_label l tbb) eqn:ISLBL'; simpl; auto.
+ eapply label_pos_pvar in H0. erewrite H0.
+ erewrite verified_schedule_size; eauto.
+Qed.
+
+Lemma label_pos_preserved:
+ forall c tc l,
+ transf_blocks c = OK tc -> label_pos l 0 c = label_pos l 0 tc.
+Proof.
+ induction c.
+ - intros. simpl in *. inv H. reflexivity.
+ - intros. unfold transf_blocks in H; fold transf_blocks in H. monadInv H.
+ eapply IHc in EQ. eapply label_pos_head_cons; eauto.
+Qed.
+
+Lemma label_pos_preserved_blocks:
+ forall l f tf,
+ transf_function f = OK tf ->
+ label_pos l 0 (fn_blocks f) = label_pos l 0 (fn_blocks tf).
+Proof.
+ intros. monadInv H. monadInv EQ.
+ destruct (zlt Ptrofs.max_unsigned _); try discriminate.
+ monadInv EQ0. simpl. eapply label_pos_preserved; eauto.
+Qed.
+
+Lemma transf_exec_basic:
+ forall i rs m, exec_basic lk ge i rs m = exec_basic lk tge i rs m.
+Proof.
+ intros. pose symbol_address_preserved.
+ unfold exec_basic.
+ destruct i; simpl; auto; try congruence.
+Qed.
+
+Lemma transf_exec_body:
+ forall bdy rs m, exec_body lk ge bdy rs m = exec_body lk tge bdy rs m.
+Proof.
+ induction bdy; intros.
+ - simpl. reflexivity.
+ - simpl. rewrite transf_exec_basic.
+ destruct (exec_basic _ _ _); auto.
+Qed.
+
+Lemma transf_exec_cfi: forall f tf cfi rs m,
+ transf_function f = OK tf ->
+ exec_cfi ge f cfi rs m = exec_cfi tge tf cfi rs m.
+Proof.
+ intros. destruct cfi; simpl; auto;
+ assert (ge = Genv.globalenv prog); auto;
+ assert (tge = Genv.globalenv tprog); auto;
+ pose symbol_address_preserved.
+ all: try unfold eval_branch; try unfold eval_neg_branch; try unfold goto_label;
+ try erewrite label_pos_preserved_blocks; try rewrite e; eauto.
+ destruct (rs # X16 <- Vundef r1); auto.
+ destruct (list_nth_z tbl (Int.unsigned i)); auto.
+ erewrite label_pos_preserved_blocks; eauto.
+Qed.
+
+Lemma transf_exec_exit:
+ forall f tf sz ex t rs m rs' m',
+ transf_function f = OK tf ->
+ exec_exit ge f sz rs m ex t rs' m' ->
+ exec_exit tge tf sz rs m ex t rs' m'.
+Proof.
+ intros. induction H0.
+ - econstructor.
+ - econstructor. erewrite <- transf_exec_cfi; eauto.
+ - econstructor; eauto.
+ eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+Qed.
+
+Lemma transf_exec_bblock:
+ forall f tf bb t rs m rs' m',
+ transf_function f = OK tf ->
+ exec_bblock lk ge f bb rs m t rs' m' ->
+ exec_bblock lk tge tf bb rs m t rs' m'.
+Proof.
+ intros.
+ destruct H0 as [rs1[m1[BDY EXIT]]].
+ unfold exec_bblock.
+ eexists; eexists; split.
+ rewrite <- transf_exec_body; eauto.
+ eapply transf_exec_exit; eauto.
+Qed.
+
+Theorem transf_step_correct:
+ forall s1 t s2, step lk ge s1 t s2 ->
+ forall s1' (MS: match_states s1 s1'),
+ (exists s2', step lk tge s1' t s2' /\ match_states s2 s2').
+Proof.
+ induction 1; intros; inv MS.
+ - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+ exploit transf_find_bblock; eauto. intros (tbb & VES & FIND).
+ exploit verified_schedule_correct; eauto. intros EBB.
+ eapply transf_exec_bblock in EBB; eauto.
+ exists (State rs' m').
+ split; try (econstructor; eauto).
+ - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+ remember (State _ m') as s'. exists s'. split; try constructor; auto.
+ subst s'. eapply exec_step_external; eauto.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+Qed.
+
+Theorem transf_program_correct_Asmblock:
+ forward_simulation (Asmblock.semantics lk prog) (Asmblock.semantics lk tprog).
+Proof.
+ eapply forward_simulation_step.
+ - apply senv_preserved.
+ - apply transf_initial_states.
+ - apply transf_final_states.
+ - apply transf_step_correct.
+Qed.
+
+End PRESERVATION_ASMBLOCK.
+
+(*
+Require Import Asm.
+
+Lemma verified_par_checks_alls_bundles lb x: forall bundle,
+ verify_par lb = OK x ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ induction lb; simpl; try tauto.
+ intros bundle H; monadInv H.
+ destruct 1; subst; eauto.
+ destruct x0; auto.
+Qed.
+
+Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle:
+ verified_schedule_nob bb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ unfold verified_schedule_nob. intros H;
+ monadInv H. destruct x4.
+ intros; eapply verified_par_checks_alls_bundles; eauto.
+Qed.
+
+Lemma verify_par_bblock_PExpand bb i:
+ exit bb = Some (PExpand i) -> verify_par_bblock bb = OK tt.
+Proof.
+ destruct bb as [h bdy ext H]; simpl.
+ intros; subst. destruct i.
+ generalize H.
+ rewrite <- wf_bblock_refl in H.
+ destruct H as [H H0].
+ unfold builtin_alone in H0. erewrite H0; eauto.
+Qed.
+
+Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core.
+
+Lemma verified_schedule_checks_alls_bundles bb lb bundle:
+ verified_schedule bb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ unfold verified_schedule. remember (exit bb) as exb.
+ destruct exb as [c|]; eauto.
+ destruct c as [i|]; eauto.
+ destruct i; intros H. inversion_clear H; simpl.
+ intuition subst.
+ intros; eapply verify_par_bblock_PExpand; eauto.
+Qed.
+
+Lemma transf_blocks_checks_all_bundles lbb: forall lb bundle,
+ transf_blocks lbb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ induction lbb; simpl.
+ - intros lb bundle H; inversion_clear H. simpl; try tauto.
+ - intros lb bundle H0.
+ monadInv H0.
+ rewrite in_app. destruct 1; eauto.
+ eapply verified_schedule_checks_alls_bundles; eauto.
+Qed.
+
+Lemma find_bblock_Some_in lb:
+ forall ofs b, find_bblock ofs lb = Some b -> List.In b lb.
+Proof.
+ induction lb; simpl; try congruence.
+ intros ofs b.
+ destruct (zlt ofs 0); try congruence.
+ destruct (zeq ofs 0); eauto.
+ intros X; inversion X; eauto.
+Qed.*)
+
+(*
+Section PRESERVATION_ASMVLIW.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma all_bundles_are_checked b ofs f bundle:
+ Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+ find_bblock ofs (fn_blocks f) = Some bundle ->
+ verify_par_bblock bundle = OK tt.
+Proof.
+ unfold match_prog, match_program in TRANSL.
+ unfold Genv.find_funct_ptr; simpl; intros X.
+ destruct (Genv.find_def_match_2 TRANSL b) as [|f0 y H]; try congruence.
+ destruct y as [tf0|]; try congruence.
+ inversion X as [H1]. subst. clear X.
+ remember (@Gfun fundef unit (Internal f)) as f2.
+ destruct H as [ctx' f1 f2 H0|]; try congruence.
+ inversion Heqf2 as [H2]. subst; clear Heqf2.
+ unfold transf_fundef, transf_partial_fundef in H.
+ destruct f1 as [f1|f1]; try congruence.
+ unfold transf_function, transl_function in H.
+ monadInv H. monadInv EQ.
+ destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks _))); simpl in *|-; try congruence.
+ injection EQ1; intros; subst.
+ monadInv EQ0. simpl in * |-.
+ intros; exploit transf_blocks_checks_all_bundles; eauto.
+ intros; eapply find_bblock_Some_in; eauto.
+Qed.
+
+Lemma checked_bundles_are_parexec_equiv f bundle rs rs' m m':
+ exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+ verify_par_bblock bundle = OK tt ->
+ det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'.
+Proof.
+ intros. unfold verify_par_bblock in H0. destruct (Asmblockdeps.bblock_para_check _) eqn:BPC; try discriminate. clear H0.
+ simpl in H.
+ eapply Asmblockdeps.bblock_para_check_correct; eauto.
+Qed.
+
+Lemma seqexec_parexec_equiv b ofs f bundle rs rs' m m':
+ Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle ->
+ exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+ det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'.
+Proof.
+ intros; eapply checked_bundles_are_parexec_equiv; eauto.
+ eapply all_bundles_are_checked; eauto.
+Qed.
+
+Theorem transf_program_correct_Asmvliw:
+ forward_simulation (Asmblock.semantics tprog) (Asmvliw.semantics tprog).
+Proof.
+ eapply forward_simulation_step with (match_states:=fun (s1:Asmvliw.state) s2 => s1=s2); eauto.
+ - intros; subst; auto.
+ - intros s1 t s1' H s2 H0; subst; inversion H; clear H; subst; eexists; split; eauto.
+ + eapply exec_step_internal; eauto.
+ intros; eapply seqexec_parexec_equiv; eauto.
+ + eapply exec_step_builtin; eauto.
+ + eapply exec_step_external; eauto.
+Qed.
+
+End PRESERVATION_ASMVLIW.*)
+
+Section PRESERVATION.
+
+Variable lk: aarch64_linker.
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Theorem transf_program_correct:
+ forward_simulation (Asmblock.semantics lk prog) (Asmblock.semantics lk tprog).
+Proof.
+ eapply transf_program_correct_Asmblock; eauto.
+Qed.
+
+End PRESERVATION.
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..d7e80cd9
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -0,0 +1,297 @@
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* Nicolas Nardino ENS-Lyon, VERIMAG *)
+(* *)
+(* *)
+(* *************************************************************)
+
+open AST
+open BTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+open PrintBTL
+open DebugPrint
+
+let use_alias_analysis () = false
+
+let build_constraints_and_resources (opweights : opweights) seqa btl =
+ let last_reg_reads : int list PTree.t ref = ref PTree.empty
+ and last_reg_write : (int * int) PTree.t ref = ref PTree.empty
+ and last_mem_reads : int list ref = ref []
+ and last_mem_write : int option ref = ref None
+ and last_branch : int option ref = ref None
+ and last_non_pipelined_op : int array =
+ Array.make opweights.nr_non_pipelined_units (-1)
+ and latency_constraints : latency_constraint list ref = ref [] in
+ let add_constraint instr_from instr_to latency =
+ assert (instr_from <= instr_to);
+ assert (latency >= 0);
+ if instr_from = instr_to then
+ if latency = 0 then ()
+ else
+ failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop"
+ else
+ latency_constraints :=
+ { instr_from; instr_to; latency } :: !latency_constraints
+ and get_last_reads reg =
+ match PTree.get reg !last_reg_reads with Some l -> l | None -> []
+ in
+ let add_input_mem i =
+ if not (use_alias_analysis ()) then (
+ (* Read after write *)
+ (match !last_mem_write with None -> () | Some j -> add_constraint j i 1);
+ last_mem_reads := i :: !last_mem_reads)
+ and add_output_mem i =
+ if not (use_alias_analysis ()) then (
+ (* Write after write *)
+ (match !last_mem_write with None -> () | Some j -> add_constraint j i 1);
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_write := Some i;
+ last_mem_reads := [])
+ and add_input_reg i reg =
+ (* Read after write *)
+ (match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency);
+ last_reg_reads := PTree.set reg (i :: get_last_reads reg) !last_reg_reads
+ and add_output_reg i latency reg =
+ (* Write after write *)
+ (match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1);
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg);
+ last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+ last_reg_reads := PTree.remove reg !last_reg_reads
+ in
+ let add_input_regs i regs = List.iter (add_input_reg i) regs in
+ let rec add_builtin_res i (res : reg builtin_res) =
+ match res with
+ | BR r -> add_output_reg i 10 r
+ | BR_none -> ()
+ | BR_splitlong (hi, lo) ->
+ add_builtin_res i hi;
+ add_builtin_res i lo
+ in
+ let rec add_builtin_arg i (ba : reg builtin_arg) =
+ match ba with
+ | BA r -> add_input_reg i r
+ | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+ | BA_loadstack (_, _) -> add_input_mem i
+ | BA_addrstack _ -> ()
+ | BA_loadglobal (_, _, _) -> add_input_mem i
+ | BA_addrglobal _ -> ()
+ | BA_splitlong (hi, lo) ->
+ add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr (a1, a2) ->
+ add_builtin_arg i a1;
+ add_builtin_arg i a2
+ and irreversible_action i =
+ match !last_branch with None -> () | Some j -> add_constraint j i 1
+ in
+ let set_branch i =
+ irreversible_action i;
+ last_branch := Some i
+ and add_non_pipelined_resources i resources =
+ Array.iter2
+ (fun latency last ->
+ if latency >= 0 && last >= 0 then add_constraint last i latency)
+ resources last_non_pipelined_op;
+ Array.iteri
+ (fun rsc latency -> if latency >= 0 then last_non_pipelined_op.(rsc) <- i)
+ resources
+ in
+ Array.iteri
+ (fun i (inst, other_uses) ->
+ List.iter (fun use -> add_input_reg i use) (Regset.elements other_uses);
+ match inst with
+ | Bnop _ -> ()
+ | Bop (op, lr, rd, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length lr));
+ if Op.is_trapping_op op then irreversible_action i;
+ add_input_regs i lr;
+ add_output_reg i (opweights.latency_of_op op (List.length lr)) rd
+ | Bload (trap, chk, addr, lr, rd, _) ->
+ if trap = TRAP then irreversible_action i;
+ add_input_mem i;
+ add_input_regs i lr;
+ add_output_reg i
+ (opweights.latency_of_load trap chk addr (List.length lr))
+ rd
+ | Bstore (chk, addr, lr, src, _) ->
+ irreversible_action i;
+ add_input_regs i lr;
+ add_input_reg i src;
+ add_output_mem i
+ | Bcond (cond, lr, BF (Bgoto s, _), ibnot, _) ->
+ set_branch i;
+ add_input_mem i;
+ add_input_regs i lr
+ | Bcond (_, _, _, _, _) ->
+ failwith "build_constraints_and_resources: invalid Bcond"
+ | BF (Bcall (signature, ef, lr, rd, _), _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ());
+ add_input_mem i;
+ add_input_regs i lr;
+ add_output_reg i (opweights.latency_of_call signature ef) rd;
+ add_output_mem i;
+ failwith "build_constraints_and_resources: invalid Bcall"
+ | BF (Btailcall (signature, ef, lr), _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ());
+ add_input_mem i;
+ add_input_regs i lr;
+ failwith "build_constraints_and_resources: invalid Btailcall"
+ | BF (Bbuiltin (ef, lr, rd, _), _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) lr;
+ add_builtin_res i rd;
+ add_output_mem i;
+ failwith "build_constraints_and_resources: invalid Bbuiltin"
+ | BF (Bjumptable (lr, _), _) ->
+ set_branch i;
+ add_input_reg i lr;
+ failwith "build_constraints_and_resources: invalid Bjumptable"
+ | BF (Breturn (Some r), _) ->
+ set_branch i;
+ add_input_reg i r;
+ failwith "build_constraints_and_resources: invalid Breturn Some"
+ | BF (Breturn None, _) ->
+ set_branch i;
+ failwith "build_constraints_and_resources: invalid Breturn None"
+ | BF (Bgoto _, _) ->
+ failwith "build_constraints_and_resources: invalid Bgoto"
+ | Bseq (_, _) -> failwith "build_constraints_and_resources: Bseq")
+ seqa;
+ !latency_constraints
+
+let resources_of_instruction (opweights : opweights) = function
+ | Bnop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+ | Bop (op, inputs, output, _) ->
+ opweights.resources_of_op op (List.length inputs)
+ | Bload (trap, chunk, addressing, addr_regs, output, _) ->
+ opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+ | Bstore (chunk, addressing, addr_regs, input, _) ->
+ opweights.resources_of_store chunk addressing (List.length addr_regs)
+ | BF (Bcall (signature, ef, inputs, output, _), _) ->
+ opweights.resources_of_call signature ef
+ | BF (Bbuiltin (ef, builtin_inputs, builtin_output, _), _) ->
+ opweights.resources_of_builtin ef
+ | Bcond (cond, args, _, _, _) ->
+ opweights.resources_of_cond cond (List.length args)
+ | BF (Btailcall _, _) | BF (Bjumptable _, _) | BF (Breturn _, _) ->
+ opweights.pipelined_resource_bounds
+ | BF (Bgoto _, _) | Bseq (_, _) ->
+ failwith "resources_of_instruction: invalid btl instruction"
+
+let print_sequence pp seqa =
+ Array.iteri
+ (fun i (inst, other_uses) ->
+ debug "i=%d\n inst = " i;
+ print_btl_inst pp inst;
+ debug "\n other_uses=";
+ print_regset other_uses;
+ debug "\n")
+ seqa
+
+let length_of_chunk = function
+ | Mint8signed | Mint8unsigned -> 1
+ | Mint16signed | Mint16unsigned -> 2
+ | Mint32 | Mfloat32 | Many32 -> 4
+ | Mint64 | Mfloat64 | Many64 -> 8
+
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
+ (typing : RTLtyping.regenv) reference_counting seqa btl =
+ let simple_deps = build_constraints_and_resources opweights seqa btl in
+ {
+ max_latency = -1;
+ resource_bounds = opweights.pipelined_resource_bounds;
+ live_regs_entry = live_entry_regs;
+ typing;
+ reference_counting = Some reference_counting;
+ instruction_usages =
+ Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+ latency_constraints = simple_deps;
+ }
+
+let zigzag_scheduler problem early_ones =
+ let nr_instructions = get_nr_instructions problem in
+ assert (nr_instructions = Array.length early_ones);
+ match list_scheduler problem with
+ | Some fwd_schedule ->
+ let fwd_makespan = fwd_schedule.(Array.length fwd_schedule - 1) in
+ let constraints' = ref problem.latency_constraints in
+ Array.iteri
+ (fun i is_early ->
+ if is_early then
+ constraints' :=
+ {
+ instr_from = i;
+ instr_to = nr_instructions;
+ latency = fwd_makespan - fwd_schedule.(i);
+ }
+ :: !constraints')
+ early_ones;
+ validated_scheduler reverse_list_scheduler
+ { problem with latency_constraints = !constraints' }
+ | None -> None
+
+let prepass_scheduler_by_name name problem seqa =
+ match name with
+ | "zigzag" ->
+ let early_ones =
+ Array.map
+ (fun (inst, _) ->
+ match inst with Bcond (_, _, _, _, _) -> true | _ -> false)
+ seqa
+ in
+ zigzag_scheduler problem early_ones
+ | _ -> scheduler_by_name name problem
+
+let schedule_sequence seqa btl (live_regs_entry : Registers.Regset.t)
+ (typing : RTLtyping.regenv) reference =
+ let opweights = OpWeights.get_opweights () in
+ try
+ if Array.length seqa <= 1 then None
+ else
+ let nr_instructions = Array.length seqa in
+ if !Clflags.option_debug_compcert > 6 then
+ Printf.printf "prepass scheduling length = %d\n" nr_instructions;
+ let problem =
+ define_problem opweights live_regs_entry typing reference seqa btl
+ in
+ if !Clflags.option_debug_compcert > 7 then (
+ print_sequence stdout seqa;
+ print_problem stdout problem);
+ match
+ prepass_scheduler_by_name !Clflags.option_fprepass_sched problem seqa
+ with
+ | None ->
+ Printf.printf "no solution in prepass scheduling\n";
+ None
+ | Some solution ->
+ let positions = Array.init nr_instructions (fun i -> i) in
+ Array.sort
+ (fun i j ->
+ let si = solution.(i) and sj = solution.(j) in
+ if si < sj then -1 else if si > sj then 1 else i - j)
+ positions;
+ Some positions
+ with Failure s ->
+ Printf.printf "failure in prepass scheduling: %s\n" s;
+ None
diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+
+type opweights =
+ {
+ pipelined_resource_bounds : int array;
+ nr_non_pipelined_units : int;
+ latency_of_op : Op.operation -> int -> int;
+ resources_of_op : Op.operation -> int -> int array;
+ non_pipelined_resources_of_op : Op.operation -> int -> int array;
+ latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+ resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_cond : Op.condition -> int -> int array;
+ latency_of_call : AST.signature -> called_function -> int;
+ resources_of_call : AST.signature -> called_function -> int array;
+ resources_of_builtin : AST.external_function -> int array
+ };;
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index aee09b12..0984943c 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -16,6 +16,7 @@ Require Import Coqlib Zbits.
Require Import AST Integers Floats Values Memory Globalenvs.
Require Import Cminor Op CminorSel.
Require Import SelectOp SelectLong SelectOpproof.
+Require Import OpHelpers OpHelpersproof.
Local Open Scope cminorsel_scope.
Local Transparent Archi.ptr64.
@@ -23,8 +24,10 @@ Local Transparent Archi.ptr64.
(** * Correctness of the smart constructors *)
Section CMCONSTR.
-
-Variable ge: genv.
+Variable prog: program.
+Variable hf: helper_functions.
+Hypothesis HELPERS: helper_functions_declared prog hf.
+Let ge := Genv.globalenv prog.
Variable sp: val.
Variable e: env.
Variable m: mem.
@@ -556,25 +559,29 @@ Qed.
Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
Proof.
red; intros; unfold divls_base; TrivialExists.
+ cbn. rewrite H1. reflexivity.
Qed.
Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
Proof.
red; intros; unfold modls_base, modl_aux.
exploit Val.modls_divls; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ rewrite A. reflexivity.
Qed.
Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
Proof.
red; intros; unfold divlu_base; TrivialExists.
+ cbn. rewrite H1. reflexivity.
Qed.
Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
Proof.
red; intros; unfold modlu_base, modl_aux.
exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ rewrite A. reflexivity.
Qed.
Theorem eval_shrxlimm:
@@ -589,7 +596,7 @@ Proof.
destruct x; simpl in H0; try discriminate.
change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0.
rewrite Int64.shrx'_zero. auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
(** General shifts *)
@@ -723,42 +730,42 @@ Qed.
Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
End CMCONSTR.
diff --git a/aarch64/SelectOp.vp b/aarch64/SelectOp.vp
index b5a03989..7f73d592 100644
--- a/aarch64/SelectOp.vp
+++ b/aarch64/SelectOp.vp
@@ -56,9 +56,13 @@ Nondetfunction add (e1: expr) (e2: expr) :=
| t1, Eop (Oshift s a) (t2:::Enil) ?? arith_shift s =>
Eop (Oaddshift s a) (t1 ::: t2 ::: Enil)
| Eop Omul (t1:::t2:::Enil), t3 =>
- Eop Omuladd (t3:::t1:::t2:::Enil)
+ if Compopts.optim_madd tt
+ then Eop Omuladd (t3:::t1:::t2:::Enil)
+ else Eop Oadd (e1:::e2:::Enil)
| t1, Eop Omul (t2:::t3:::Enil) =>
- Eop Omuladd (t1:::t2:::t3:::Enil)
+ if Compopts.optim_madd tt
+ then Eop Omuladd (t1:::t2:::t3:::Enil)
+ else Eop Oadd (e1:::e2:::Enil)
| _, _ => Eop Oadd (e1:::e2:::Enil)
end.
@@ -555,6 +559,13 @@ Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
| _ => (Aindexed Int64.zero, e:::Enil)
end.
+(* floats *)
+Definition divf_base (e1: expr) (e2: expr) :=
+ Eop Odivf (e1 ::: e2 ::: Enil).
+
+Definition divfs_base (e1: expr) (e2: expr) :=
+ Eop Odivfs (e1 ::: e2 ::: Enil).
+
(** ** Arguments of builtins *)
Nondetfunction builtin_arg (e: expr) :=
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index ccc4c0f1..dfa4c598 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -16,6 +16,7 @@ Require Import Coqlib Zbits.
Require Import AST Integers Floats Values Memory Builtins Globalenvs.
Require Import Cminor Op CminorSel.
Require Import SelectOp.
+Require Import OpHelpers OpHelpersproof.
Local Open Scope cminorsel_scope.
Local Transparent Archi.ptr64.
@@ -74,8 +75,10 @@ Ltac TrivialExists :=
(** * Correctness of the smart constructors *)
Section CMCONSTR.
-
-Variable ge: genv.
+Variable prog: program.
+Variable hf: helper_functions.
+Hypothesis HELPERS: helper_functions_declared prog hf.
+Let ge := Genv.globalenv prog.
Variable sp: val.
Variable e: env.
Variable m: mem.
@@ -158,8 +161,10 @@ Proof.
- rewrite <- Val.add_assoc. apply eval_addimm. EvalOp.
- rewrite Val.add_commut. TrivialExists.
- TrivialExists.
-- rewrite Val.add_commut. TrivialExists.
-- TrivialExists.
+- destruct (Compopts.optim_madd tt).
+ + rewrite Val.add_commut. TrivialExists.
+ + TrivialExists.
+- destruct (Compopts.optim_madd tt); TrivialExists.
- TrivialExists.
Qed.
@@ -661,7 +666,8 @@ Theorem eval_divs_base:
Val.divs x y = Some z ->
exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
Proof.
- intros; unfold divs_base; TrivialExists.
+ intros; unfold divs_base; TrivialExists; cbn.
+ rewrite H1. reflexivity.
Qed.
Theorem eval_mods_base:
@@ -673,7 +679,8 @@ Theorem eval_mods_base:
Proof.
intros; unfold mods_base, mod_aux.
exploit Val.mods_divs; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ cbn. rewrite A. reflexivity.
Qed.
Theorem eval_divu_base:
@@ -684,6 +691,7 @@ Theorem eval_divu_base:
exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
Proof.
intros; unfold divu_base; TrivialExists.
+ cbn. rewrite H1. reflexivity.
Qed.
Theorem eval_modu_base:
@@ -695,7 +703,8 @@ Theorem eval_modu_base:
Proof.
intros; unfold modu_base, mod_aux.
exploit Val.modu_divu; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ rewrite A. reflexivity.
Qed.
Theorem eval_shrximm:
@@ -710,7 +719,7 @@ Proof.
destruct x; simpl in H0; try discriminate.
change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0.
rewrite Int.shrx_zero by (compute; auto). auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
(** General shifts *)
@@ -923,7 +932,7 @@ Theorem eval_intoffloat:
Val.intoffloat x = Some y ->
exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatofint:
@@ -934,7 +943,7 @@ Theorem eval_floatofint:
Proof.
intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_intuoffloat:
@@ -943,7 +952,7 @@ Theorem eval_intuoffloat:
Val.intuoffloat x = Some y ->
exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatofintu:
@@ -954,7 +963,7 @@ Theorem eval_floatofintu:
Proof.
intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_intofsingle:
@@ -963,7 +972,7 @@ Theorem eval_intofsingle:
Val.intofsingle x = Some y ->
exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleofint:
@@ -974,7 +983,7 @@ Theorem eval_singleofint:
Proof.
intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_intuofsingle:
@@ -983,7 +992,7 @@ Theorem eval_intuofsingle:
Val.intuofsingle x = Some y ->
exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleofintu:
@@ -994,7 +1003,7 @@ Theorem eval_singleofintu:
Proof.
intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
(** Selection *)
@@ -1061,6 +1070,26 @@ Proof.
- constructor; auto.
Qed.
+Theorem eval_divf_base:
+ forall le a b x y,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ exists v, eval_expr ge sp e m le (divf_base a b) v /\ Val.lessdef (Val.divf x y) v.
+Proof.
+ intros; unfold divf_base.
+ TrivialExists.
+Qed.
+
+Theorem eval_divfs_base:
+ forall le a b x y,
+ eval_expr ge sp e m le a x ->
+ eval_expr ge sp e m le b y ->
+ exists v, eval_expr ge sp e m le (divfs_base a b) v /\ Val.lessdef (Val.divfs x y) v.
+Proof.
+ intros; unfold divfs_base.
+ TrivialExists.
+Qed.
+
(** Platform-specific known builtins *)
Theorem eval_platform_builtin:
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index a9d47bdd..1ca3be16 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -21,23 +21,6 @@ open AisAnnot
open PrintAsmaux
open Fileinfo
-(* Recognition of FP numbers that are supported by the fmov #imm instructions:
- "a normalized binary floating point encoding with 1 sign bit,
- 4 bits of fraction and a 3-bit exponent"
-*)
-
-let is_immediate_float64 bits =
- let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in
- let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in
- exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant
-
-let is_immediate_float32 bits =
- let exp = (Int32.(to_int (shift_right_logical bits 23)) land 0xFF) - 127 in
- let mant = Int32.logand bits 0x7F_FFFFl in
- exp >= -3 && exp <= 4 && Int32.logand mant 0x78_0000l = mant
-
-(* Naming and printing registers *)
-
let intsz oc (sz, n) =
match sz with X -> coqint64 oc n | W -> coqint oc n
@@ -107,14 +90,14 @@ let freg oc (sz, r) =
output_string oc (match sz with D -> dreg_name r | S -> sreg_name r)
let preg_asm oc ty = function
- | IR r -> if ty = Tint then wreg oc r else xreg oc r
- | FR r -> if ty = Tsingle then sreg oc r else dreg oc r
- | _ -> assert false
+ | DR (IR (RR1 r)) -> if ty = Tint then wreg oc r else xreg oc r
+ | DR (FR r) -> if ty = Tsingle then sreg oc r else dreg oc r
+| _ -> assert false
let preg_annot = function
- | IR r -> xreg_name r
- | FR r -> dreg_name r
- | _ -> assert false
+ | DR (IR (RR1 r)) -> xreg_name r
+ | DR (FR r) -> dreg_name r
+ | _ -> assert false
(* Base-2 log of a Caml integer *)
let rec log2 n =
@@ -156,12 +139,16 @@ module ELF_System : SYSTEM =
fprintf oc "#:lo12:%a" elf_label lbl
let load_symbol_address oc rd id =
- fprintf oc " adrp %a, :got:%a\n" xreg rd symbol id;
- fprintf oc " ldr %a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id
+ fprintf oc " adrp %a, :got:%a\n" xreg rd symbol id;
+ fprintf oc " ldr %a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id
+
+ (* Names of sections *)
let name_of_section = function
| Section_text -> ".text"
- | Section_data i | Section_small_data i ->
+ | Section_data(i, true) ->
+ failwith "_Thread_local unsupported on this platform"
+ | Section_data(i, false) | Section_small_data i ->
variable_section ~sec:".data" ~bss:".bss" i
| Section_const i | Section_small_const i ->
variable_section ~sec:".section .rodata" i
@@ -223,7 +210,9 @@ module MacOS_System : SYSTEM =
let name_of_section = function
| Section_text -> ".text"
- | Section_data i | Section_small_data i ->
+ | Section_data(i, true) ->
+ failwith "_Thread_local unsupported on this platform"
+ | Section_data(i, false) | Section_small_data i ->
variable_section ~sec:".data" i
| Section_const i | Section_small_const i ->
variable_section ~sec:".const" ~reloc:".const_data" i
@@ -343,6 +332,28 @@ module Target(System: SYSTEM): TARGET =
| (r, EOuxtw n) -> fprintf oc "%a, uxtw #%a" wreg r coqint n
| (r, EOuxtx n) -> fprintf oc "%a, uxtx #%a" xreg r coqint n
+ let next_profiling_label =
+ let atomic_incr_counter = ref 0 in
+ fun () ->
+ let r = sprintf ".Lcompcert_atomic_incr%d" !atomic_incr_counter in
+ incr atomic_incr_counter; r;;
+
+ let print_profiling_logger oc id kind =
+ assert (kind >= 0);
+ assert (kind <= 1);
+ fprintf oc "%s begin profiling %a %d: atomic increment\n" comment
+ Profilingaux.pp_id id kind;
+ let ofs = profiling_offset id kind and lbl = next_profiling_label () in
+ fprintf oc " adrp x15, %s+%d\n" profiling_counter_table_name ofs;
+ fprintf oc " add x15, x15, :lo12:(%s+%d)\n" profiling_counter_table_name ofs;
+ fprintf oc "%s:\n" lbl;
+ fprintf oc " ldaxr x17, [x15]\n";
+ fprintf oc " add x17, x17, 1\n";
+ fprintf oc " stlxr w29, x17, [x15]\n";
+ fprintf oc " cbnz w29, %s\n" lbl;
+ fprintf oc "%s end profiling %a %d\n" comment
+ Profilingaux.pp_id id kind;;
+
(* Printing of instructions *)
let print_instruction oc = function
(* Branches *)
@@ -386,7 +397,9 @@ module Target(System: SYSTEM): TARGET =
(* the upper 32 bits of Xrd are set to 0, performing zero-extension *)
| Pldrsw(rd, a) ->
fprintf oc " ldrsw %a, %a\n" xreg rd addressing a
- | Pldp(rd1, rd2, a) ->
+ | Pldpw(rd1, rd2, _, _, a) ->
+ fprintf oc " ldp %a, %a, %a\n" wreg rd1 wreg rd2 addressing a
+ | Pldpx(rd1, rd2, _, _, a) ->
fprintf oc " ldp %a, %a, %a\n" xreg rd1 xreg rd2 addressing a
| Pstrw(rs, a) | Pstrw_a(rs, a) ->
fprintf oc " str %a, %a\n" wreg rs addressing a
@@ -396,7 +409,9 @@ module Target(System: SYSTEM): TARGET =
fprintf oc " strb %a, %a\n" wreg rs addressing a
| Pstrh(rs, a) ->
fprintf oc " strh %a, %a\n" wreg rs addressing a
- | Pstp(rs1, rs2, a) ->
+ | Pstpw(rs1, rs2, _, _, a) ->
+ fprintf oc " stp %a, %a, %a\n" wreg rs1 wreg rs2 addressing a
+ | Pstpx(rs1, rs2, _, _, a) ->
fprintf oc " stp %a, %a, %a\n" xreg rs1 xreg rs2 addressing a
(* Integer arithmetic, immediate *)
| Paddimm(sz, rd, r1, n) ->
@@ -520,12 +535,20 @@ module Target(System: SYSTEM): TARGET =
fprintf oc " str %a, %a\n" sreg rd addressing a
| Pstrd(rd, a) | Pstrd_a(rd, a) ->
fprintf oc " str %a, %a\n" dreg rd addressing a
+ | Pldps(rd1, rd2, _, _, a) ->
+ fprintf oc " ldp %a, %a, %a\n" sreg rd1 sreg rd2 addressing a
+ | Pldpd(rd1, rd2, _, _, a) ->
+ fprintf oc " ldp %a, %a, %a\n" dreg rd1 dreg rd2 addressing a
+ | Pstps(rd1, rd2, _, _, a) ->
+ fprintf oc " stp %a, %a, %a\n" sreg rd1 sreg rd2 addressing a
+ | Pstpd(rd1, rd2, _, _, a) ->
+ fprintf oc " stp %a, %a, %a\n" dreg rd1 dreg rd2 addressing a
(* Floating-point move *)
| Pfmov(rd, r1) ->
fprintf oc " fmov %a, %a\n" dreg rd dreg r1
| Pfmovimmd(rd, f) ->
let d = camlint64_of_coqint (Floats.Float.to_bits f) in
- if is_immediate_float64 d then
+ if is_immediate_float64 f then
fprintf oc " fmov %a, #%.7f\n" dreg rd (Int64.float_of_bits d)
else begin
let lbl = label_literal64 d in
@@ -534,7 +557,7 @@ module Target(System: SYSTEM): TARGET =
end
| Pfmovimms(rd, f) ->
let d = camlint_of_coqint (Floats.Float32.to_bits f) in
- if is_immediate_float32 d then
+ if is_immediate_float32 f then
fprintf oc " fmov %a, #%.7f\n" sreg rd (Int32.float_of_bits d)
else begin
let lbl = label_literal32 d in
@@ -596,8 +619,8 @@ module Target(System: SYSTEM): TARGET =
| Pfsel(rd, r1, r2, c) ->
fprintf oc " fcsel %a, %a, %a, %s\n" dreg rd dreg r1 dreg r2 (condition_name c)
(* No-op *)
- | Pnop ->
- fprintf oc " nop\n"
+ | Pnop -> ()
+ (*fprintf oc " nop\n"*)
(* Pseudo-instructions expanded in Asmexpand *)
| Pallocframe(sz, linkofs) -> assert false
| Pfreeframe(sz, linkofs) -> assert false
@@ -640,6 +663,8 @@ module Target(System: SYSTEM): TARGET =
fprintf oc "%s begin inline assembly\n\t" comment;
print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res;
fprintf oc "%s end inline assembly\n" comment
+ | EF_profiling (id, coq_kind) ->
+ print_profiling_logger oc id (Z.to_int coq_kind)
| _ ->
assert false
end
@@ -689,7 +714,24 @@ module Target(System: SYSTEM): TARGET =
section oc Section_text;
end
+ let aarch64_profiling_stub oc nr_items
+ profiling_id_table_name
+ profiling_counter_table_name =
+ fprintf oc " adrp x2, %s\n" profiling_counter_table_name;
+ fprintf oc " adrp x1, %s\n" profiling_id_table_name;
+ fprintf oc " add x2, x2, :lo12:%s\n" profiling_counter_table_name;
+ fprintf oc " add x1, x1, :lo12:%s\n" profiling_id_table_name;
+ fprintf oc " mov w0, %d\n" nr_items;
+ fprintf oc " b %s\n" profiling_write_table_helper ;;
+
+ let print_atexit oc to_be_called =
+ fprintf oc " adrp x0, %s\n" to_be_called;
+ fprintf oc " add x0, x0, :lo12:%s\n" to_be_called;
+ fprintf oc " b atexit\n";;
+
+
let print_epilogue oc =
+ print_profiling_epilogue elf_text_print_fun_info (Init_atexit print_atexit) aarch64_profiling_stub oc;
if !Clflags.option_g then begin
Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f));
section oc Section_text;
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
index e0d98c85..e6a60d4e 100644
--- a/aarch64/ValueAOp.v
+++ b/aarch64/ValueAOp.v
@@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Omul, v1::v2::nil => mul v1 v2
| Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3)
| Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
- | Odiv, v1::v2::nil => divs v1 v2
- | Odivu, v1::v2::nil => divu v1 v2
+ | Odiv, v1::v2::nil => divs_total v1 v2
+ | Odivu, v1::v2::nil => divu_total v1 v2
| Oand, v1::v2::nil => and v1 v2
| Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a)
| Oandimm n, v1::nil => and v1 (I n)
@@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3)
| Omullhs, v1::v2::nil => mullhs v1 v2
| Omullhu, v1::v2::nil => mullhu v1 v2
- | Odivl, v1::v2::nil => divls v1 v2
- | Odivlu, v1::v2::nil => divlu v1 v2
+ | Odivl, v1::v2::nil => divls_total v1 v2
+ | Odivlu, v1::v2::nil => divlu_total v1 v2
| Oandl, v1::v2::nil => andl v1 v2
| Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a)
| Oandlimm n, v1::nil => andl v1 (L n)
@@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Osingleoffloat, v1::nil => singleoffloat v1
| Ofloatofsingle, v1::nil => floatofsingle v1
- | Ointoffloat, v1::nil => intoffloat v1
- | Ointuoffloat, v1::nil => intuoffloat v1
+ | Ointoffloat, v1::nil => intoffloat_total v1
+ | Ointuoffloat, v1::nil => intuoffloat_total v1
| Ofloatofint, v1::nil => floatofint v1
| Ofloatofintu, v1::nil => floatofintu v1
- | Ointofsingle, v1::nil => intofsingle v1
- | Ointuofsingle, v1::nil => intuofsingle v1
+ | Ointofsingle, v1::nil => intofsingle_total v1
+ | Ointuofsingle, v1::nil => intuofsingle_total v1
| Osingleofint, v1::nil => singleofint v1
| Osingleofintu, v1::nil => singleofintu v1
- | Olongoffloat, v1::nil => longoffloat v1
- | Olonguoffloat, v1::nil => longuoffloat v1
+ | Olongoffloat, v1::nil => longoffloat_total v1
+ | Olonguoffloat, v1::nil => longuoffloat_total v1
| Ofloatoflong, v1::nil => floatoflong v1
| Ofloatoflongu, v1::nil => floatoflongu v1
- | Olongofsingle, v1::nil => longofsingle v1
- | Olonguofsingle, v1::nil => longuofsingle v1
+ | Olongofsingle, v1::nil => longofsingle_total v1
+ | Olonguofsingle, v1::nil => longuofsingle_total v1
| Osingleoflong, v1::nil => singleoflong v1
| Osingleoflongu, v1::nil => singleoflongu v1
diff --git a/aarch64/extractionMachdep.v b/aarch64/extractionMachdep.v
index ae0006bc..0401d0fa 100644
--- a/aarch64/extractionMachdep.v
+++ b/aarch64/extractionMachdep.v
@@ -20,6 +20,9 @@ Require Archi Asm Asmgen SelectOp.
(* Archi *)
+
+Extract Constant Archi.pic_code => "fun () -> false". (* for the time being *)
+
Extract Constant Archi.abi =>
"match Configuration.abi with
| ""apple"" -> Apple
@@ -36,7 +39,4 @@ Extract Constant SelectOp.symbol_is_relocatable =>
Extract Constant Asm.symbol_low => "fun _ _ _ -> assert false".
Extract Constant Asm.symbol_high => "fun _ _ _ -> assert false".
-
-(* Asmgen *)
-
-Extract Constant Asmgen.symbol_is_aligned => "C2C.atom_is_aligned".
+Extract Constant Asmblockgen.symbol_is_aligned => "C2C.atom_is_aligned".