diff options
author | Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr> | 2020-12-04 23:26:01 +0100 |
---|---|---|
committer | Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr> | 2020-12-04 23:26:01 +0100 |
commit | 972f6b9890ea3644626fc097e460035028b940eb (patch) | |
tree | a29e64ad4e978f55f06e19345ee753858f1334e0 /aarch64 | |
parent | c48b7c63592e5930f022452ee6c3f1cf3d1ada97 (diff) | |
download | compcert-kvx-972f6b9890ea3644626fc097e460035028b940eb.tar.gz compcert-kvx-972f6b9890ea3644626fc097e460035028b940eb.zip |
a first working draft on ldp/stp peephole
Diffstat (limited to 'aarch64')
-rw-r--r-- | aarch64/Asm.v | 64 | ||||
-rw-r--r-- | aarch64/Asmblock.v | 109 | ||||
-rw-r--r-- | aarch64/Asmblockdeps.v | 279 | ||||
-rw-r--r-- | aarch64/Asmblockgen.v | 32 | ||||
-rw-r--r-- | aarch64/Asmexpand.ml | 8 | ||||
-rw-r--r-- | aarch64/Asmgen.v | 28 | ||||
-rw-r--r-- | aarch64/Asmgenproof.v | 50 | ||||
-rw-r--r-- | aarch64/OpWeightsAsm.ml | 6 | ||||
-rw-r--r-- | aarch64/Peephole.v | 167 | ||||
-rw-r--r-- | aarch64/PostpassScheduling.v | 23 | ||||
-rw-r--r-- | aarch64/PostpassSchedulingOracle.ml | 28 | ||||
-rw-r--r-- | aarch64/PostpassSchedulingproof.v | 43 | ||||
-rw-r--r-- | aarch64/TargetPrinter.ml | 8 |
13 files changed, 640 insertions, 205 deletions
diff --git a/aarch64/Asm.v b/aarch64/Asm.v index 33f1013a..719c3b61 100644 --- a/aarch64/Asm.v +++ b/aarch64/Asm.v @@ -201,7 +201,8 @@ Inductive instruction: Type := | Pstrx_a (rs: ireg) (a: addressing) (**r store int64 as any64 *) | Pstrb (rs: ireg) (a: addressing) (**r store int8 *) | Pstrh (rs: ireg) (a: addressing) (**r store int16 *) - | Pstp (rs1 rs2: ireg) (a: addressing) (**r store two int64 *) + | Pstpw (rs1 rs2: ireg) (a: addressing) (**r store two int64 *) + | Pstpx (rs1 rs2: ireg) (a: addressing) (**r store two int64 *) (** Integer arithmetic, immediate *) | Paddimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r addition *) | Psubimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r subtraction *) @@ -820,13 +821,19 @@ Definition eval_addressing (a: addressing) (rs: regset): val := | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n)) | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n)) | ADadr base id ofs => Val.addl rs#base (symbol_low ge id ofs) - | ADpostincr base n => rs#base + | ADpostincr base n => Vundef end. -Definition eval_addressing_update (a: addressing) (rs: regset): regset := +Definition is_pair_addressing_mode_correct (a: addressing): bool := match a with - | ADpostincr base n => rs#base <- (Vlong n) - | _ => rs + | ADimm _ _ => true + | _ => false + end. + +Definition get_offset_addr (a: addressing) (ofs: Z) : addressing := + match a with + | ADimm base n => (ADimm base (Int64.add n (Int64.repr ofs))) + | _ => a end. (** Auxiliaries for memory accesses *) @@ -840,17 +847,20 @@ Definition exec_load (chunk: memory_chunk) (transf: val -> val) Definition exec_load_double (chunk: memory_chunk) (transf: val -> val) (a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) := - let addr := (eval_addressing a rs) in - let ofs := match chunk with | Mint32 => 4 | _ => 8 end in - match Mem.loadv Mint32 m addr with - | None => Stuck - | Some v1 => - match Mem.loadv Mint32 m (Val.offset_ptr addr (Ptrofs.repr ofs)) with - | None => Stuck - | Some v2 => - Next (nextinstr (((eval_addressing_update a rs)#rd1 <- (transf v1))#rd2 <- (transf v2))) m - end - end. + if is_pair_addressing_mode_correct a then + let addr := (eval_addressing a rs) in + let ofs := match chunk with | Mint32 => 4 | _ => 8 end in + let addr' := (eval_addressing (get_offset_addr a ofs) rs) in + match Mem.loadv chunk m addr with + | None => Stuck + | Some v1 => + match Mem.loadv chunk m addr' with + | None => Stuck + | Some v2 => + Next (nextinstr ((rs#rd1 <- (transf v1))#rd2 <- (transf v2))) m + end + end + else Stuck. Definition exec_store (chunk: memory_chunk) (a: addressing) (v: val) @@ -860,6 +870,21 @@ Definition exec_store (chunk: memory_chunk) | Some m' => Next (nextinstr rs) m' end. +Definition exec_store_double (chunk: memory_chunk) + (a: addressing) (v1 v2: val) + (rs: regset) (m: mem) := + if is_pair_addressing_mode_correct a then + let addr := (eval_addressing a rs) in + let ofs := match chunk with | Mint32 => 4 | _ => 8 end in + let addr' := (eval_addressing (get_offset_addr a ofs) rs) in + match Mem.storev chunk m addr v1 with + | None => Stuck + | Some m' => match Mem.storev chunk m' addr' v2 with + | None => Stuck + | Some m'' => Next (nextinstr rs) m'' + end + end + else Stuck. Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : outcome := match i with @@ -1290,7 +1315,11 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out exec_load_double Mint32 (fun v => v) a rd1 rd2 rs m | Pldpx rd1 rd2 a => exec_load_double Mint64 (fun v => v) a rd1 rd2 rs m - | Pstp _ _ _ + | Pnop => Next (nextinstr rs) m + | Pstpw rs1 rs2 a => + exec_store_double Mint32 a rs#rs1 rs#rs2 rs m + | Pstpx rs1 rs2 a => + exec_store_double Mint64 a rs#rs1 rs#rs2 rs m | Pcls _ _ _ | Pclz _ _ _ | Prev _ _ _ @@ -1300,7 +1329,6 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pfmsub _ _ _ _ _ | Pfnmadd _ _ _ _ _ | Pfnmsub _ _ _ _ _ - | Pnop | Pcfi_adjust _ | Pcfi_rel_offset _ => Stuck diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v index 329c889e..3ad1d7f8 100644 --- a/aarch64/Asmblock.v +++ b/aarch64/Asmblock.v @@ -130,14 +130,16 @@ Inductive store_rs_a : Type := | Pstrd (**r store float64 *) | Pstrd_a (**r store float64 as any64 *) . -(* Actually, Pstp is not used in the aarch64/Asm semantics! - * Thus we skip this particular case: - * Inductive st_instruction : Type := - * | PSt_rs_a (st : store_rs_a) (rs : ireg) (a : addressing) - * | Pstp (rs1 rs2: ireg) (a: addressing) (**r store two int64 *) - * . - * Coercion PStore : st_instruction >-> basic. - *) + +Inductive store_rs1_rs2_a : Type := + | Pstpw + | Pstpx + . + +Inductive st_instruction : Type := + | PSt_rs_a (st: store_rs_a) (rs: dreg) (a: addressing) + | Pstp (st: store_rs1_rs2_a) (rs1 rs2: ireg) (a: addressing) (**r store two int64 *) + . Inductive arith_p : Type := (** PC-relative addressing *) @@ -318,23 +320,23 @@ Inductive ar_instruction : Type := Inductive basic : Type := | PArith (i: ar_instruction) (**r TODO *) - (*| PLoad (ld: load_rd_a) (rd: dreg) (a: addressing) [>*r TODO <]*) | PLoad (ld: ld_instruction) - | PStore (st: store_rs_a) (r: dreg) (a: addressing) (**r TODO *) + | PStore (st: st_instruction) | Pallocframe (sz: Z) (linkofs: ptrofs) (**r allocate new stack frame *) | Pfreeframe (sz: Z) (linkofs: ptrofs) (**r deallocate stack frame and restore previous frame *) | Ploadsymbol (rd: ireg) (id: ident) (**r load the address of [id] *) | Pcvtsw2x (rd: ireg) (r1: ireg) (**r sign-extend 32-bit int to 64-bit *) | Pcvtuw2x (rd: ireg) (r1: ireg) (**r zero-extend 32-bit int to 64-bit *) | Pcvtx2w (rd: ireg) (**r retype a 64-bit int as a 32-bit int *) -(* NOT USED IN THE SEMANTICS ! | Pnop (**r no operation *) +(* NOT USED IN THE SEMANTICS ! | Pcfi_adjust (ofs: int) (**r .cfi_adjust debug directive *) | Pcfi_rel_offset (ofs: int) (**r .cfi_rel_offset debug directive *) *) . Coercion PLoad: ld_instruction >-> basic. +Coercion PStore : st_instruction >-> basic. (* Not used in Coq, declared in ocaml directly in PostpassSchedulingOracle.ml Inductive instruction : Type := @@ -591,13 +593,7 @@ Definition eval_addressing (a: addressing) (rs: regset): val := | ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n)) | ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n)) | ADadr base id ofs => Val.addl rs#base (symbol_low lk id ofs) - | ADpostincr base n => rs#base - end. - -Definition eval_addressing_update (a: addressing) (rs: regset): regset := - match a with - | ADpostincr base n => rs#base <- (Vlong n) - | _ => rs + | ADpostincr base n => Vundef end. (** Auxiliaries for memory accesses *) @@ -609,24 +605,42 @@ Definition exec_load_rd_a (chunk: memory_chunk) (transf: val -> val) Definition exec_load_double (chunk: memory_chunk) (transf: val -> val) (a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) := - let addr := (eval_addressing a rs) in - let ofs := match chunk with | Mint32 => 4 | _ => 8 end in - match Mem.loadv Mint32 m addr with - | None => Stuck - | Some v1 => - match Mem.loadv Mint32 m (Val.offset_ptr addr (Ptrofs.repr ofs)) with - | None => Stuck - | Some v2 => - Next (nextinstr (((eval_addressing_update a rs)#rd1 <- (transf v1))#rd2 <- (transf v2))) m - end - end. - -Definition exec_store (chunk: memory_chunk) + if is_pair_addressing_mode_correct a then + let addr := (eval_addressing a rs) in + let ofs := match chunk with | Mint32 => 4 | _ => 8 end in + let addr' := (eval_addressing (get_offset_addr a ofs) rs) in + match Mem.loadv chunk m addr with + | None => Stuck + | Some v1 => + match Mem.loadv chunk m addr' with + | None => Stuck + | Some v2 => + Next ((rs#rd1 <- (transf v1))#rd2 <- (transf v2)) m + end + end + else Stuck. + +Definition exec_store_rs_a (chunk: memory_chunk) (a: addressing) (v: val) (rs: regset) (m: mem) := SOME m' <- Mem.storev chunk m (eval_addressing a rs) v IN Next rs m'. +Definition exec_store_double (chunk: memory_chunk) + (a: addressing) (v1 v2: val) + (rs: regset) (m: mem) := + if is_pair_addressing_mode_correct a then + let addr := (eval_addressing a rs) in + let ofs := match chunk with | Mint32 => 4 | _ => 8 end in + let addr' := (eval_addressing (get_offset_addr a ofs) rs) in + match Mem.storev chunk m addr v1 with + | None => Stuck + | Some m' => match Mem.storev chunk m' addr' v2 with + | None => Stuck + | Some m'' => Next rs m'' + end + end + else Stuck. (** execution of loads *) @@ -647,20 +661,22 @@ Definition chunk_load (ld: ld_instruction): memory_chunk := | PLd_rd_a Pldrd _ _ => Mfloat64 | PLd_rd_a Pldrd_a _ _ => Many64 | Pldp Pldpw _ _ _ => Mint32 - | Pldp Pldpx _ _ _ => Mint32 + | Pldp Pldpx _ _ _ => Mint64 end. -Definition chunk_store_rs_a (st: store_rs_a) : memory_chunk := +Definition chunk_store (st: st_instruction) : memory_chunk := match st with - | Pstrw => Mint32 - | Pstrw_a => Many32 - | Pstrx => Mint64 - | Pstrx_a => Many64 - | Pstrb => Mint8unsigned - | Pstrh => Mint16unsigned - | Pstrs => Mfloat32 - | Pstrd => Mfloat64 - | Pstrd_a => Many64 + | PSt_rs_a Pstrw _ _ => Mint32 + | PSt_rs_a Pstrw_a _ _ => Many32 + | PSt_rs_a Pstrx _ _ => Mint64 + | PSt_rs_a Pstrx_a _ _ => Many64 + | PSt_rs_a Pstrb _ _ => Mint8unsigned + | PSt_rs_a Pstrh _ _ => Mint16unsigned + | PSt_rs_a Pstrs _ _ => Mfloat32 + | PSt_rs_a Pstrd _ _ => Mfloat64 + | PSt_rs_a Pstrd_a _ _ => Many64 + | Pstp Pstpw _ _ _ => Mint32 + | Pstp Pstpx _ _ _ => Mint64 end. Definition interp_load (ld: ld_instruction): val -> val := @@ -686,6 +702,12 @@ Definition exec_load (ldi: ld_instruction) (rs: regset) (m: mem) := | Pldp ld rd1 rd2 a => exec_load_double (chunk_load ldi) (interp_load ldi) a rd1 rd2 rs m end. +Definition exec_store (sti: st_instruction) (rs: regset) (m: mem) := + match sti with + | PSt_rs_a st rsr a => exec_store_rs_a (chunk_store sti) a rs#rsr rs m + | Pstp st rs1 rs2 a => exec_store_double (chunk_store sti) a rs#rs1 rs#rs2 rs m + end. + (** TODO: redundant w.r.t Machblock ?? *) Lemma in_dec (lbl: label) (l: list label): { List.In lbl l } + { ~(List.In lbl l) }. Proof. @@ -1022,7 +1044,7 @@ Definition exec_basic (b: basic) (rs: regset) (m: mem): outcome := match b with | PArith ai => Next (exec_arith_instr ai rs) m | PLoad ldi => exec_load ldi rs m - | PStore st r a => exec_store (chunk_store_rs_a st) a rs#r rs m + | PStore sti => exec_store sti rs m | Pallocframe sz pos => let (m1, stk) := Mem.alloc m 0 sz in let sp := (Vptr stk Ptrofs.zero) in @@ -1044,6 +1066,7 @@ Definition exec_basic (b: basic) (rs: regset) (m: mem): outcome := Next (rs#rd <- (Val.longofintu rs#r1)) m | Pcvtx2w rd => Next (rs#rd <- (Val.loword rs#rd)) m + | Pnop => Next rs m end. (* TODO: ORIGINAL EXECUTION OF INSTRUCTIONS THAT NEEDS TO BE ADAPTED ! diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v index 059f234c..bdfb9ed3 100644 --- a/aarch64/Asmblockdeps.v +++ b/aarch64/Asmblockdeps.v @@ -166,14 +166,15 @@ Inductive arith_op := . Inductive store_op := - | Ostore1 (st: store_rs_a) (a: addressing) - | Ostore2 (st: store_rs_a) (a: addressing) - | OstoreU (st: store_rs_a) (a: addressing) + | Ostore1 (st: st_instruction) (a: addressing) + | Ostore2 (st: st_instruction) (a: addressing) + | OstoreU (st: st_instruction) (a: addressing) . Inductive load_op := | Oload1 (ld: ld_instruction) (a: addressing) | Oload2 (ld: ld_instruction) (a: addressing) + | OloadU (ld: ld_instruction) (a: addressing) . Inductive allocf_op := @@ -423,16 +424,16 @@ Definition call_ll_storev (c: memory_chunk) (m: mem) (v: option val) (vs: val) : | None => None (* should never occurs *) end. -Definition exec_store1 (n: store_rs_a) (m: mem) (a: addressing) (vr vs: val) := +Definition exec_store1 (n: st_instruction) (m: mem) (a: addressing) (vr vs: val) := let v := match a with | ADimm _ n => Some (Val.addl vs (Vlong n)) | ADadr _ id ofs => Some (Val.addl vs (symbol_low Ge.(_lk) id ofs)) | _ => None end in - call_ll_storev (chunk_store_rs_a n) m v vr. + call_ll_storev (chunk_store n) m v vr. -Definition exec_store2 (n: store_rs_a) (m: mem) (a: addressing) (vr vs1 vs2: val) := +Definition exec_store2 (n: st_instruction) (m: mem) (a: addressing) (vr vs1 vs2: val) := let v := match a with | ADreg _ _ => Some (Val.addl vs1 vs2) @@ -441,10 +442,10 @@ Definition exec_store2 (n: store_rs_a) (m: mem) (a: addressing) (vr vs1 vs2: val | ADuxt _ _ n => Some (Val.addl vs1 (Val.shll (Val.longofintu vs2) (Vint n))) | _ => None end in - call_ll_storev (chunk_store_rs_a n) m v vr. + call_ll_storev (chunk_store n) m v vr. -Definition exec_storeU (n: store_rs_a) (m: mem) (a: addressing) (vr: val) := - call_ll_storev (chunk_store_rs_a n) m None vr. +Definition exec_storeU (n: st_instruction) (m: mem) (a: addressing) (vr: val) := + call_ll_storev (chunk_store n) m None vr. Definition goto_label_deps (f: function) (lbl: label) (vpc: val) := match label_pos lbl 0 (fn_blocks f) with @@ -537,10 +538,14 @@ Definition exec_load2 (ld: ld_instruction) (m: mem) (a: addressing) (vl1 vl2: va end in call_ll_loadv (chunk_load ld) (interp_load ld) m v. +Definition exec_loadU (n: ld_instruction) (m: mem) (a: addressing) := + call_ll_loadv (chunk_load n) (interp_load n) m None. + Definition load_eval (o: load_op) (l: list value) := match o, l with | Oload1 ld a, [Val vs; Memstate m] => exec_load1 ld m a vs | Oload2 ld a, [Val vs1; Val vs2; Memstate m] => exec_load2 ld m a vs1 vs2 + | OloadU st a, [Memstate m] => exec_loadU st m a | _, _ => None end. @@ -764,18 +769,18 @@ Opaque control_op_eq_correct. Definition store_op_eq (s1 s2: store_op): ?? bool := match s1 with | Ostore1 st1 a1 => - match s2 with Ostore1 st2 a2 => iandb (phys_eq st1 st2) (struct_eq a1 a2) | _ => RET false end + match s2 with Ostore1 st2 a2 => iandb (struct_eq st1 st2) (struct_eq a1 a2) | _ => RET false end | Ostore2 st1 a1 => - match s2 with Ostore2 st2 a2 => iandb (phys_eq st1 st2) (struct_eq a1 a2) | _ => RET false end + match s2 with Ostore2 st2 a2 => iandb (struct_eq st1 st2) (struct_eq a1 a2) | _ => RET false end | OstoreU st1 a1 => - match s2 with OstoreU st2 a2 => iandb (phys_eq st1 st2) (struct_eq a1 a2) | _ => RET false end + match s2 with OstoreU st2 a2 => iandb (struct_eq st1 st2) (struct_eq a1 a2) | _ => RET false end end. Lemma store_op_eq_correct s1 s2: WHEN store_op_eq s1 s2 ~> b THEN b = true -> s1 = s2. Proof. destruct s1, s2; wlp_simplify; try congruence. - all: rewrite H0 in H; rewrite H; reflexivity. + all: rewrite H1 in H0; rewrite H0, H; reflexivity. Qed. Hint Resolve store_op_eq_correct: wlp. Opaque store_op_eq_correct. @@ -783,16 +788,18 @@ Opaque store_op_eq_correct. Definition load_op_eq (l1 l2: load_op): ?? bool := match l1 with | Oload1 ld1 a1 => - match l2 with Oload1 ld2 a2 => iandb (phys_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end + match l2 with Oload1 ld2 a2 => iandb (struct_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end | Oload2 ld1 a1 => - match l2 with Oload2 ld2 a2 => iandb (phys_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end + match l2 with Oload2 ld2 a2 => iandb (struct_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end + | OloadU ld1 a1 => + match l2 with OloadU ld2 a2 => iandb (struct_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end end. Lemma load_op_eq_correct l1 l2: WHEN load_op_eq l1 l2 ~> b THEN b = true -> l1 = l2. Proof. destruct l1, l2; wlp_simplify; try congruence. - all: rewrite H0 in H; rewrite H; reflexivity. + all: rewrite H1 in H0; rewrite H, H0; reflexivity. Qed. Hint Resolve load_op_eq_correct: wlp. Opaque load_op_eq_correct. @@ -1120,7 +1127,7 @@ Definition trans_arith (ai: ar_instruction) : inst := | Pfnmul fsz rd r1 r2 => [(#rd, Op(Arith (Ofnmul fsz)) (PReg(#r1) @ PReg(#r2) @ Enil))] end. -Definition eval_addressing_rlocs_st (st: store_rs_a) (rs: dreg) (a: addressing) := +Definition eval_addressing_rlocs_st (st: st_instruction) (rs: dreg) (a: addressing) := match a with | ADimm base n => Op (Store (Ostore1 st a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil) | ADreg base r => Op (Store (Ostore2 st a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) @@ -1147,10 +1154,42 @@ Definition trans_load (ldi: ld_instruction) := | PLd_rd_a ld r a => let lr := eval_addressing_rlocs_ld ldi a in [(#r, lr)] | Pldp ld r1 r2 a => - let lr := eval_addressing_rlocs_ld ldi a in + let ldi' := + match ld with + | Pldpw => Pldrw + | Pldpx => Pldrx + end in + let lr := eval_addressing_rlocs_ld (PLd_rd_a ldi' r1 a) a in + let ofs := match ld with | Pldpw => 4%Z | Pldpx => 8%Z end in + match a with + | ADimm base n => + let a' := (get_offset_addr a ofs) in + [(#r1, lr); + (#r2, Op (Load (Oload1 (PLd_rd_a ldi' r2 a') a')) + (Old(PReg (#base)) @ PReg (pmem) @ Enil))] + | _ => [(#PC, (Op (OError)) Enil)] + end + end. + +Definition trans_store (sti: st_instruction) := + match sti with + | PSt_rs_a st r a => + let lr := eval_addressing_rlocs_st sti r a in [(pmem, lr)] + | Pstp st r1 r2 a => + let sti' := + match st with + | Pstpw => Pstrw + | Pstpx => Pstrx + end in + let lr := eval_addressing_rlocs_st (PSt_rs_a sti' r1 a) r1 a in + let ofs := match st with | Pstpw => 4%Z | Pstpx => 8%Z end in match a with - | ADpostincr base n => [(#base, lr); (#r1, lr); (#r2, lr)] - | _ => [(#r1, lr); (#r2, lr)] + | ADimm base n => + let a' := (get_offset_addr a ofs) in + [(pmem, lr); + (pmem, Op (Store (Ostore1 (PSt_rs_a sti' r2 a') a')) + (PReg (#r2) @ Old(PReg (#base)) @ PReg (pmem) @ Enil))] + | _ => [(#PC, (Op (OError)) Enil)] end end. @@ -1158,8 +1197,7 @@ Definition trans_basic (b: basic) : inst := match b with | PArith ai => trans_arith ai | PLoad ld => trans_load ld - | PStore st r a => - let lr := eval_addressing_rlocs_st st r a in [(pmem, lr)] + | PStore st => trans_store st | Pallocframe sz linkofs => [(#X29, PReg(#SP)); (#SP, Op (Allocframe (OAllocf_SP sz linkofs)) (PReg (#SP) @ PReg pmem @ Enil)); @@ -1173,6 +1211,7 @@ Definition trans_basic (b: basic) : inst := | Pcvtsw2x rd r1 => [(#rd, Op (Cvtsw2x) (PReg (#r1) @ Enil))] | Pcvtuw2x rd r1 => [(#rd, Op (Cvtuw2x) (PReg (#r1) @ Enil))] | Pcvtx2w rd => [(#rd, Op (Cvtx2w) (PReg (#rd) @ Enil))] + | Pnop => [] end. Fixpoint trans_body (b: list basic) : list L.inst := @@ -1673,20 +1712,46 @@ Local Ltac preg_eq_discr r rd := 6,7,8,9: Simpl sr. - (* Arith *) exploit trans_arith_correct; eauto. - - (* Load *) admit. - (*unfold exec_load, eval_addressing_rlocs_ld, exp_eval;*) - (*destruct a; simpl; try destruct base; Simpl_exists sr; erewrite H;*) - (*unfold exec_load1, exec_load2, exec_loadU; unfold call_ll_loadv;*) - (*try destruct (Mem.loadv _ _ _); simpl; auto.*) - (*all: DDRM rd; Simpl_exists sr; try intros rr; Simpl_update.*) + - (* Load *) + destruct ld. + + unfold exec_load, exec_load_rd_a, eval_addressing_rlocs_ld, exp_eval; + destruct ld; destruct a; simpl; + try fold (ppos base); try fold (ppos r); + erewrite !H0, H; simpl; + unfold exec_load1, exec_load2, chunk_load; unfold call_ll_loadv; + try destruct (Mem.loadv _ _ _); simpl; auto. + all: try (fold (ppos rd); Simpl_exists sr; auto; intros rr; Simpl_update). + + unfold exec_load, exec_load_double, eval_addressing_rlocs_ld, exp_eval; + destruct ld; destruct a; simpl; unfold control_eval; destruct Ge; auto; + try fold (ppos base); + try erewrite !H0, H; simpl; + unfold exec_load1, exec_load2, chunk_load; unfold call_ll_loadv; + try destruct (Mem.loadv _ _ _); simpl; auto; + fold (ppos rd1); rewrite assign_diff; discriminate_ppos; rewrite H; + try destruct (Mem.loadv _ _ _); simpl; auto; Simpl_exists sr; + try (rewrite !assign_diff; discriminate_ppos; reflexivity); + try (destruct base; discriminate_ppos); + repeat (try fold (ppos r); intros; Simpl_update). - (* Store *) - admit. - (*unfold exec_store, eval_addressing_rlocs_st, exp_eval;*) - (*destruct a; simpl; DIRN1 r; try destruct base; Simpl_exists sr; erewrite H;*) - (*unfold exec_store1, exec_store2, exec_storeU; unfold call_ll_storev;*) - (*try destruct (Mem.storev _ _ _ _); simpl; auto.*) - (*all: eexists; split; [| split]; [ eauto | eauto |*) - (*intros rr; rewrite assign_diff; [ rewrite H0; auto | apply ppos_pmem_discr ]].*) + destruct st. + + unfold exec_store, exec_store_rs_a, eval_addressing_rlocs_st, exp_eval; + destruct st; destruct a; simpl; + try fold (ppos base); try fold (ppos rs); try fold (ppos r); + erewrite !H0, H; simpl; + unfold exec_store1, exec_store2, chunk_store; unfold call_ll_storev; + try destruct (Mem.storev _ _ _ _); simpl; auto. + all: eexists; split; [| split]; eauto; + intros rr; rewrite assign_diff; try rewrite H0; auto; discriminate_ppos. + + unfold exec_store, exec_store_double, eval_addressing_rlocs_st, exp_eval; + destruct st; destruct a; simpl; unfold control_eval; destruct Ge; auto; + try fold (ppos base); try fold (ppos rs1); + erewrite !H0, H; simpl; + unfold exec_store1, exec_store2, chunk_store; unfold call_ll_storev; + try destruct (Mem.storev _ _ _ _); simpl; auto; + fold (ppos rs2); rewrite assign_diff; try congruence; try rewrite H0; simpl; + try destruct (Mem.storev _ _ _ _); simpl; auto. + all: eexists; split; [| split]; eauto; + repeat (try intros rr; rewrite assign_diff; try rewrite H0; auto; discriminate_ppos). - (* Alloc *) destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS. + eexists; repeat split. @@ -2052,6 +2117,21 @@ Definition freg_name (fr: freg) : pstring := end . +Definition iregsp_name (irsp: iregsp) : pstring := + match irsp with + | RR1 ir => ireg_name ir + | XSP => Str ("XSP") + end. + +Definition dreg_name (dr: dreg) : ?? pstring := + match dr with + | IR ir => match ir with + | XSP => RET (Str ("XSP")) + | RR1 irr => RET (ireg_name irr) + end + | FR fr => RET (freg_name fr) + end. + Definition string_of_name (x: P.R.t): ?? pstring := if (Pos.eqb x pmem) then RET (Str "MEM") @@ -2064,13 +2144,7 @@ Definition string_of_name (x: P.R.t): ?? pstring := | CV => RET (Str ("CV")) end | Some (PC) => RET (Str ("PC")) - | Some (DR dr) => match dr with - | IR ir => match ir with - | XSP => RET (Str ("XSP")) - | RR1 irr => RET (ireg_name irr) - end - | FR fr => RET (freg_name fr) - end + | Some (DR dr) => dreg_name dr | _ => RET (Str ("UNDEFINED")) end. @@ -2339,17 +2413,118 @@ Definition string_of_arith (op: arith_op): pstring := | Ofnmul _ => "Ofnmul" end. -Definition string_of_store (op: store_op) : pstring := - match op with - | Ostore1 _ _ => "Ostore1" - | Ostore2 _ _ => "Ostore2" - | OstoreU _ _ => "OstoreU" +Definition string_of_ofs (ofs: ptrofs) : ?? pstring := + (string_of_Z (Ptrofs.signed ofs)). + +Definition string_of_int (n: int) : ?? pstring := + (string_of_Z (Int.signed n)). + +Definition string_of_int64 (n: int64) : ?? pstring := + (string_of_Z (Int64.signed n)). + +Notation "x +; y" := (Concat x y). + +Definition string_of_addressing (a: addressing) : ?? pstring := + match a with + | ADimm base n => + DO n' <~ string_of_int64 n;; + RET ((Str "[ADimm ") +; (iregsp_name base) +; " " +; n' +; "]") + | ADreg base r => + RET ((Str "[ADreg ") +; (iregsp_name base) +; " " +; (ireg_name r) +; "]") + | ADlsl base r n => + DO n' <~ string_of_int n;; + RET ((Str "[ADlsl ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]") + | ADsxt base r n => + DO n' <~ string_of_int n;; + RET ((Str "[ADsxt ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]") + | ADuxt base r n => + DO n' <~ string_of_int n;; + RET ((Str "[ADuxt ") +; (iregsp_name base) +; " " +; (ireg_name r) +; " " +; n' +; "]") + | ADadr base id ofs => + DO id' <~ string_of_Z (Zpos id);; + DO ofs' <~ string_of_ofs ofs;; + RET ((Str "[ADadr ") +; (iregsp_name base) +; " " +; id' +; " " +; ofs' +; "]") + | ADpostincr base n => + DO n' <~ string_of_int64 n;; + RET ((Str "[ADpostincr ") +; (iregsp_name base) +; " " +; n' +; "]") + end. + +Definition string_of_ld_rd_a (ld: load_rd_a) : pstring := + match ld with + | Pldrw => Str "Pldrw" + | Pldrw_a => Str "Pldrw_a" + | Pldrx => Str "Pldrx" + | Pldrx_a => Str "Pldrx_a" + | Pldrb _ => Str "Pldrb" + | Pldrsb _ => Str "Pldrsb" + | Pldrh _ => Str "Pldrh" + | Pldrsh _ => Str "Pldrsh" + | Pldrzw => Str "Pldrzw" + | Pldrsw => Str "Pldrsw" + | Pldrs => Str "Pldrs" + | Pldrd => Str "Pldrd" + | Pldrd_a => Str "Pldrd_a" + end. + +Definition string_of_ldi (ldi: ld_instruction) : ?? pstring:= + match ldi with + | PLd_rd_a ld rd a => + DO a' <~ string_of_addressing a;; + DO rd' <~ dreg_name rd;; + RET (Str "PLd_rd_a (" +; (string_of_ld_rd_a ld) +; " " +; rd' +; " " +; a' +; ")") + | Pldp _ _ _ a => + DO a' <~ string_of_addressing a;; + RET (Str "Pldp (" +; a' +; ")") end. -Definition string_of_load (op: load_op) : pstring := +Definition string_of_load (op: load_op) : ?? pstring := + match op with + | Oload1 ld a => + DO a' <~ string_of_addressing a;; + DO ld' <~ string_of_ldi ld;; + RET((Str "Oload1 ") +; " " +; ld' +; " " +; a' +; " ") + | Oload2 ld a => + DO a' <~ string_of_addressing a;; + DO ld' <~ string_of_ldi ld;; + RET((Str "Oload2 ") +; " " +; ld' +; " " +; a' +; " ") + | OloadU _ _ => RET (Str "OloadU") + end. + +Definition string_of_st_rs_a (st: store_rs_a) : pstring := + match st with + | Pstrw => Str "Pstrw" + | Pstrw_a => Str "Pstrw_a" + | Pstrx => Str "Pstrx" + | Pstrx_a => Str "Pstrx_a" + | Pstrb => Str "Pstrb" + | Pstrh => Str "Pstrh" + | Pstrs => Str "Pstrs" + | Pstrd => Str "Pstrd" + | Pstrd_a => Str "Pstrd_a" + end. + +Definition string_of_sti (sti: st_instruction) : ?? pstring:= + match sti with + | PSt_rs_a st rs a => + DO a' <~ string_of_addressing a;; + DO rs' <~ dreg_name rs;; + RET (Str "PSt_rs_a (" +; (string_of_st_rs_a st) +; " " +; rs' +; " " +; a' +; ")") + | Pstp _ _ _ a => + DO a' <~ string_of_addressing a;; + RET (Str "Pstp (" +; a' +; ")") + end. + +Definition string_of_store (op: store_op) : ?? pstring := match op with - | Oload1 _ _ => "Oload1" - | Oload2 _ _ => "Oload2" + | Ostore1 st a => + DO a' <~ string_of_addressing a;; + DO st' <~ string_of_sti st;; + RET((Str "Ostore1 ") +; " " +; st' +; " " +; a' +; " ") + | Ostore2 st a => + DO a' <~ string_of_addressing a;; + DO st' <~ string_of_sti st;; + RET((Str "Ostore2 ") +; " " +; st' +; " " +; a' +; " ") + | OstoreU _ _ => RET (Str "OstoreU") end. Definition string_of_control (op: control_op) : pstring := @@ -2382,8 +2557,8 @@ Definition string_of_freef (op: freef_op) : pstring := Definition string_of_op (op: P.op): ?? pstring := match op with | Arith op => RET (string_of_arith op) - | Load op => RET (string_of_load op) - | Store op => RET (string_of_store op) + | Load op => string_of_load op + | Store op => string_of_store op | Control op => RET (string_of_control op) | Allocframe op => RET (string_of_allocf op) | Freeframe op => RET (string_of_freef op) diff --git a/aarch64/Asmblockgen.v b/aarch64/Asmblockgen.v index abc9162b..137b5871 100644 --- a/aarch64/Asmblockgen.v +++ b/aarch64/Asmblockgen.v @@ -292,13 +292,13 @@ Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: bcode) Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: bcode) := match ty, preg_of src with - | Tint, IR rd => OK (indexed_memory_access_bc (PStore Pstrw rd) 4 base ofs k) - | Tlong, IR rd => OK (indexed_memory_access_bc (PStore Pstrx rd) 8 base ofs k) - | Tsingle, FR rd => OK (indexed_memory_access_bc (PStore Pstrs rd) 4 base ofs k) - | Tfloat, FR rd => OK (indexed_memory_access_bc (PStore Pstrd rd) 8 base ofs k) - | Tany32, IR rd => OK (indexed_memory_access_bc (PStore Pstrw_a rd) 4 base ofs k) - | Tany64, IR rd => OK (indexed_memory_access_bc (PStore Pstrx_a rd) 8 base ofs k) - | Tany64, FR rd => OK (indexed_memory_access_bc (PStore Pstrd_a rd) 8 base ofs k) + | Tint, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrw rd) 4 base ofs k) + | Tlong, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrx rd) 8 base ofs k) + | Tsingle, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrs rd) 4 base ofs k) + | Tfloat, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrd rd) 8 base ofs k) + | Tany32, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrw_a rd) 4 base ofs k) + | Tany64, IR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrx_a rd) 8 base ofs k) + | Tany64, FR rd => OK (indexed_memory_access_bc (PSt_rs_a Pstrd_a rd) 8 base ofs k) | _, _ => Error (msg "Asmgen.storeind") end. @@ -306,7 +306,7 @@ Definition loadptr_bc (base: iregsp) (ofs: ptrofs) (dst: ireg) (k: bcode): bcode indexed_memory_access_bc (PLd_rd_a Pldrx dst) 8 base ofs k. Definition storeptr_bc (src: ireg) (base: iregsp) (ofs: ptrofs) (k: bcode): bcode := - indexed_memory_access_bc (PStore Pstrx src) 8 base ofs k. + indexed_memory_access_bc (PSt_rs_a Pstrx src) 8 base ofs k. (** Function epilogue *) @@ -1121,21 +1121,21 @@ Definition transl_store (chunk: memory_chunk) (addr: Op.addressing) (args: list mreg) (src: mreg) (k: bcode) : res bcode := match chunk with | Mint8unsigned | Mint8signed => - do r1 <- ireg_of src; transl_addressing 1 addr args (PStore Pstrb r1) k + do r1 <- ireg_of src; transl_addressing 1 addr args (PSt_rs_a Pstrb r1) k | Mint16unsigned | Mint16signed => - do r1 <- ireg_of src; transl_addressing 2 addr args (PStore Pstrh r1) k + do r1 <- ireg_of src; transl_addressing 2 addr args (PSt_rs_a Pstrh r1) k | Mint32 => - do r1 <- ireg_of src; transl_addressing 4 addr args (PStore Pstrw r1) k + do r1 <- ireg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrw r1) k | Mint64 => - do r1 <- ireg_of src; transl_addressing 8 addr args (PStore Pstrx r1) k + do r1 <- ireg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrx r1) k | Mfloat32 => - do r1 <- freg_of src; transl_addressing 4 addr args (PStore Pstrs r1) k + do r1 <- freg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrs r1) k | Mfloat64 => - do r1 <- freg_of src; transl_addressing 8 addr args (PStore Pstrd r1) k + do r1 <- freg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrd r1) k | Many32 => - do r1 <- ireg_of src; transl_addressing 4 addr args (PStore Pstrw_a r1) k + do r1 <- ireg_of src; transl_addressing 4 addr args (PSt_rs_a Pstrw_a r1) k | Many64 => - do r1 <- ireg_of src; transl_addressing 8 addr args (PStore Pstrx_a r1) k + do r1 <- ireg_of src; transl_addressing 8 addr args (PSt_rs_a Pstrx_a r1) k end. (** Translation of a Machblock instruction. *) diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml index edcce06f..94697df7 100644 --- a/aarch64/Asmexpand.ml +++ b/aarch64/Asmexpand.ml @@ -73,7 +73,7 @@ let save_parameter_registers ir fr = while !i < 8 do let pos = 8*16 + !i*8 in if !i land 1 = 0 then begin - emit (Pstp(int_param_regs.(!i), int_param_regs.(!i + 1), + emit (Pstpx(int_param_regs.(!i), int_param_regs.(!i + 1), ADimm(XSP, Z.of_uint pos))); i := !i + 2 end else begin @@ -169,8 +169,8 @@ let expand_builtin_memcpy_small sz al src dst = let (rdst, odst) = memcpy_small_arg sz dst tdst in let rec copy osrc odst sz = if sz >= 16 then begin - emit (Pldpw(X16, X30, ADimm(rsrc, osrc))); - emit (Pstp(X16, X30, ADimm(rdst, odst))); + emit (Pldpx(X16, X30, ADimm(rsrc, osrc))); + emit (Pstpx(X16, X30, ADimm(rdst, odst))); copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16) end else if sz >= 8 then begin @@ -209,7 +209,7 @@ let expand_builtin_memcpy_big sz al src dst = expand_loadimm32 X15 (Z.of_uint (sz / 16)); emit (Plabel lbl); emit (Pldpx(X16, X17, ADpostincr(RR1 X30, _16))); - emit (Pstp(X16, X17, ADpostincr(RR1 X29, _16))); + emit (Pstpx(X16, X17, ADpostincr(RR1 X29, _16))); emit (Psubimm(W, RR1 X15, RR1 X15, _1)); emit (Pcbnz(W, X15, lbl)); if sz mod 16 >= 8 then begin diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v index d9d59929..f7ec07ae 100644 --- a/aarch64/Asmgen.v +++ b/aarch64/Asmgen.v @@ -331,16 +331,23 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction := do rd2' <- ireg_of_preg rd2; OK (Asm.Pldpx rd1 rd2 a) - | PStore Pstrw r a => do r' <- ireg_of_preg r; OK (Asm.Pstrw r' a) - | PStore Pstrw_a r a => do r' <- ireg_of_preg r; OK (Asm.Pstrw_a r' a) - | PStore Pstrx r a => do r' <- ireg_of_preg r; OK (Asm.Pstrx r' a) - | PStore Pstrx_a r a => do r' <- ireg_of_preg r; OK (Asm.Pstrx_a r' a) - | PStore Pstrb r a => do r' <- ireg_of_preg r; OK (Asm.Pstrb r' a) - | PStore Pstrh r a => do r' <- ireg_of_preg r; OK (Asm.Pstrh r' a) - - | PStore Pstrs r a => do r' <- freg_of_preg r; OK (Asm.Pstrs r' a) - | PStore Pstrd r a => do r' <- freg_of_preg r; OK (Asm.Pstrd r' a) - | PStore Pstrd_a r a => do r' <- freg_of_preg r; OK (Asm.Pstrd_a r' a) + | PStore (PSt_rs_a Pstrw r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw r' a) + | PStore (PSt_rs_a Pstrw_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw_a r' a) + | PStore (PSt_rs_a Pstrx r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrx r' a) + | PStore (PSt_rs_a Pstrx_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrx_a r' a) + | PStore (PSt_rs_a Pstrb r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrb r' a) + | PStore (PSt_rs_a Pstrh r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrh r' a) + + | PStore (PSt_rs_a Pstrs r a) => do r' <- freg_of_preg r; OK (Asm.Pstrs r' a) + | PStore (PSt_rs_a Pstrd r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd r' a) + | PStore (PSt_rs_a Pstrd_a r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd_a r' a) + + | PStore (Pstp Pstpw rs1 rs2 a) => do rs1' <- ireg_of_preg rs1; + do rs2' <- ireg_of_preg rs2; + OK (Asm.Pstpw rs1 rs2 a) + | PStore (Pstp Pstpx rs1 rs2 a) => do rs1' <- ireg_of_preg rs1; + do rs2' <- ireg_of_preg rs2; + OK (Asm.Pstpx rs1 rs2 a) | Pallocframe sz linkofs => OK (Asm.Pallocframe sz linkofs) | Pfreeframe sz linkofs => OK (Asm.Pfreeframe sz linkofs) @@ -352,6 +359,7 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction := | Pcvtuw2x rd r1 => OK (Asm.Pcvtuw2x rd r1) | Pcvtx2w rd => OK (Asm.Pcvtx2w rd) + | Pnop => OK (Asm.Pnop) end. Definition cf_instruction_to_instruction (cfi: cf_instruction) : Asm.instruction := diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v index 9d80f58f..d412b129 100644 --- a/aarch64/Asmgenproof.v +++ b/aarch64/Asmgenproof.v @@ -818,9 +818,10 @@ Proof. (*unfold exec_load in BASIC.*) (*destruct Mem.loadv. 2: { discriminate BASIC. }*) (*inv BASIC. rewrite Pregmap.gso; try discriminate; auto.*) - - unfold exec_store in BASIC. - destruct Mem.storev. 2: { discriminate BASIC. } - inv BASIC; reflexivity. + - admit. + (*unfold exec_store in BASIC.*) + (*destruct Mem.storev. 2: { discriminate BASIC. }*) + (*inv BASIC; reflexivity.*) - destruct Mem.alloc, Mem.store. 2: { discriminate BASIC. } inv BASIC. repeat (rewrite Pregmap.gso; try discriminate). reflexivity. - destruct Mem.loadv. 2: { discriminate BASIC. } @@ -979,7 +980,7 @@ Lemma eval_addressing_preserved a rs1 rs2: Proof. intros EQ. destruct a; simpl; try (rewrite !EQ; congruence). - (*auto. TODO *) + auto. Qed. Ltac next_stuck_cong := try (unfold Next, Stuck in *; congruence). @@ -1084,24 +1085,24 @@ Proof. Qed. *) -Lemma store_preserved n rs1 m1 rs1' m1' rs2 m2 rd chk a: forall - (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) - (MATCHI: match_internal n (State rs1 m1) (State rs2 m2)) - (HSTORE: exec_store lk chk a rd rs1 m1 = Next rs1' m1'), - exists (rs2' : regset) (m2' : mem), Asm.exec_store tge chk a rd rs2 m2 = Next rs2' m2' - /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2'). -Proof. - intros. - unfold exec_store, Asm.exec_store in *. - inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst. - rewrite <- (eval_addressing_preserved a rs1 rs2); auto. - destruct (Mem.storev _ _ _ _). - + inversion HSTORE; auto. repeat (econstructor; eauto). - * eapply nextinstr_agree_but_pc; intros. - subst. apply EQR. auto. - * eapply ptrofs_nextinstr_agree; subst; eauto. - + next_stuck_cong. -Qed. +(*Lemma store_preserved n rs1 m1 rs1' m1' rs2 m2 rd chk a: forall*) + (*(BOUNDED: 0 <= n <= Ptrofs.max_unsigned)*) + (*(MATCHI: match_internal n (State rs1 m1) (State rs2 m2))*) + (*(HSTORE: exec_store lk chk a rd rs1 m1 = Next rs1' m1'),*) + (*exists (rs2' : regset) (m2' : mem), Asm.exec_store tge chk a rd rs2 m2 = Next rs2' m2'*) + (*/\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').*) +(*Proof.*) + (*intros.*) + (*unfold exec_store, Asm.exec_store in *.*) + (*inversion MATCHI as [n0 r1 mx1 r2 mx2 EQM EQR EQPC]; subst.*) + (*rewrite <- (eval_addressing_preserved a rs1 rs2); auto.*) + (*destruct (Mem.storev _ _ _ _).*) + (*+ inversion HSTORE; auto. repeat (econstructor; eauto).*) + (** eapply nextinstr_agree_but_pc; intros.*) + (*subst. apply EQR. auto.*) + (** eapply ptrofs_nextinstr_agree; subst; eauto.*) + (*+ next_stuck_cong.*) +(*Qed.*) Lemma next_inst_preserved n rs1 m1 rs1' m1' rs2 m2 (x: dreg) v: forall (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) @@ -1275,8 +1276,9 @@ Proof. (*destruct ld; monadInv TRANSBI; try destruct_ireg_inv; exploit load_preserved; eauto;*) (*intros; simpl in *; destruct sz; eauto. }*)} { (* PStore *) - destruct st; monadInv TRANSBI; try destruct_ireg_inv; exploit store_preserved; eauto; - simpl in *; inv_matchi; find_rwrt_ag. } + admit. } + (*destruct st; monadInv TRANSBI; try destruct_ireg_inv; exploit store_preserved; eauto;*) + (*simpl in *; inv_matchi; find_rwrt_ag. }*) { (* Pallocframe *) monadInv TRANSBI; inv_matchi; try pc_not_sp; diff --git a/aarch64/OpWeightsAsm.ml b/aarch64/OpWeightsAsm.ml index 3ce2c68e..44c6f16b 100644 --- a/aarch64/OpWeightsAsm.ml +++ b/aarch64/OpWeightsAsm.ml @@ -70,13 +70,14 @@ module Cortex_A53 = struct | PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> 6 | PBasic (PArith _) -> 1 | PBasic (PLoad _) -> 3 - | PBasic (PStore (_, _, _)) -> 3 + | PBasic (PStore _) -> 3 | PBasic (Pallocframe (_, _)) -> 3 | PBasic (Pfreeframe (_, _)) -> 1 | PBasic (Ploadsymbol (_, _)) -> 1 | PBasic (Pcvtsw2x (_, _)) -> 2 | PBasic (Pcvtuw2x (_, _)) -> 2 | PBasic (Pcvtx2w _) -> 1 + | PBasic (Pnop) -> 0 | PControl _ -> 6 let resources_of_op (i : instruction) (nargs : int) = @@ -110,13 +111,14 @@ module Cortex_A53 = struct | PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> [| 1; 1; 1; 0 |] | PBasic (PArith _) -> [| 1; 1; 0; 0 |] | PBasic (PLoad _) -> [| 1; 0; 0; 1 |] - | PBasic (PStore (_, _, _)) -> [| 1; 0; 0; 1 |] + | PBasic (PStore _) -> [| 1; 0; 0; 1 |] | PBasic (Pallocframe (_, _)) -> [| 1; 0; 0; 1 |] | PBasic (Pfreeframe (_, _)) -> [| 1; 1; 0; 0 |] | PBasic (Ploadsymbol (_, _)) -> [| 1; 1; 0; 0 |] | PBasic (Pcvtsw2x (_, _)) -> [| 1; 1; 0; 0 |] | PBasic (Pcvtuw2x (_, _)) -> [| 1; 1; 0; 0 |] | PBasic (Pcvtx2w _) -> [| 1; 1; 0; 0 |] + | PBasic (Pnop) -> [| 0; 0; 0; 0 |] | PControl _ -> resource_bounds (*let non_pipelined_resources_of_op (op : operation) (nargs : int) = diff --git a/aarch64/Peephole.v b/aarch64/Peephole.v new file mode 100644 index 00000000..2282326a --- /dev/null +++ b/aarch64/Peephole.v @@ -0,0 +1,167 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib. +Require Import Asmblock. +Require Import Values. +Require Import Integers. +Require Import AST. +Require Compopts. + +(*Fixpoint gpreg_q_search_rec r0 r1 l := + match l with + | h :: t => + let (s0, s1) := gpreg_q_expand h in + if (gpreg_eq r0 s0) && (gpreg_eq r1 s1) + then Some h + else gpreg_q_search_rec r0 r1 t + | nil => None + end. + +Fixpoint gpreg_o_search_rec r0 r1 r2 r3 l := + match l with + | h :: t => + match gpreg_o_expand h with + | (((s0, s1), s2), s3) => + if (gpreg_eq r0 s0) && (gpreg_eq r1 s1) && + (gpreg_eq r2 s2) && (gpreg_eq r3 s3) + then Some h + else gpreg_o_search_rec r0 r1 r2 r3 t + end + | nil => None + end. + +Definition gpreg_q_search (r0 : gpreg) (r1 : gpreg) : option gpreg_q := + gpreg_q_search_rec r0 r1 gpreg_q_list. + +Definition gpreg_o_search r0 r1 r2 r3 : option gpreg_o := + gpreg_o_search_rec r0 r1 r2 r3 gpreg_o_list. + +Parameter print_found_store: forall A, Z -> A -> A. + + Definition coalesce_octuples := true.*) + +Definition is_valid_immofs_32 (z: Z) : bool := + if (zle z 252) && (zle (-256) z) && ((Z.modulo z 4) =? 0) then true else false. + +Definition is_valid_immofs_64 (z: Z) : bool := + if (zle z 504) && (zle (-512) z) && ((Z.modulo z 8) =? 0) then true else false. + +Fixpoint ldp_rep (insns : list basic) : list basic := + match insns with + | nil => nil + | h0 :: t0 => + match t0 with + | h1 :: t1 => + match h0, h1 with + | (PLd_rd_a Pldrw (IR (RR1 rd1)) (ADimm b1 n1)), + (PLd_rd_a Pldrw (IR (RR1 rd2)) (ADimm b2 n2)) => + let z1 := Int64.signed n1 in + let z2 := Int64.signed n2 in + + (* TODO This could be insteresting to add if compatible with the checker + When both load the same dest, and with no reuse of ld1 in ld2. *) + (* if (ireg_eq rd1 rd2) && (negb (iregsp_eq b2 (RR1 rd1))) then + Pnop :: (ldp_rep t0) + (* When two consec mem addr are loading two different dest. *) + else*) + + if (negb (ireg_eq rd1 rd2)) && (iregsp_eq b1 b2) && (negb (iregsp_eq (RR1 rd1) b1)) + && (z2 =? z1 + 4) && (is_valid_immofs_32 z1) then + (PLoad (Pldp Pldpw rd1 rd2 (ADimm b1 n1))) :: Pnop :: (ldp_rep t1) + (* When nothing can be optimized. *) + else + h0 :: (ldp_rep t0) + | (PLd_rd_a Pldrx (IR (RR1 rd1)) (ADimm b1 n1)), + (PLd_rd_a Pldrx (IR (RR1 rd2)) (ADimm b2 n2)) => + let z1 := Int64.signed n1 in + let z2 := Int64.signed n2 in + + (*if (ireg_eq rd1 rd2) && (negb (iregsp_eq b2 (RR1 rd1))) then + Pnop :: (ldp_rep t0) + else*) + + if (negb (ireg_eq rd1 rd2)) && (iregsp_eq b1 b2) && (negb (iregsp_eq (RR1 rd1) b1)) + && (z2 =? z1 + 8) && (is_valid_immofs_64 z1) then + (PLoad (Pldp Pldpx rd1 rd2 (ADimm b1 n1))) :: Pnop :: (ldp_rep t1) + else + h0 :: (ldp_rep t0) + (*| (PLd_rd_a Pldrw rd1 (ADimm b1 n1)), h1 => ldpw_follow rd1 b1 n1*) + (*| (PLd_rd_a Pldrx rd1 (ADimm b1 n1)), h1 => ldpx_follow rd1 b1 n1*) + | _, _ => h0 :: (ldp_rep t0) + end + | nil => h0 :: nil + end + end. + +Fixpoint stp_rep (insns : list basic) : list basic := + match insns with + | nil => nil + | h0 :: t0 => + match t0 with + | h1 :: t1 => + match h0, h1 with + | (PSt_rs_a Pstrw (IR (RR1 rs1)) (ADimm b1 n1)), + (PSt_rs_a Pstrw (IR (RR1 rs2)) (ADimm b2 n2)) => + let z1 := Int64.signed n1 in + let z2 := Int64.signed n2 in + + (* When both store at the same addr, same ofs. *) + (*if (iregsp_eq b1 b2) && (z2 =? z1) then + Pnop :: (stp_rep t0) + (* When two consec mem addr are targeted by two store. *) + else*) + + if (iregsp_eq b1 b2) && (z2 =? z1 + 4) && (is_valid_immofs_32 z1) then + (PStore (Pstp Pstpw rs1 rs2 (ADimm b1 n1))) :: Pnop :: (stp_rep t1) + (* When nothing can be optimized. *) + else + h0 :: (stp_rep t0) + | (PSt_rs_a Pstrx (IR (RR1 rs1)) (ADimm b1 n1)), + (PSt_rs_a Pstrx (IR (RR1 rs2)) (ADimm b2 n2)) => + let z1 := Int64.signed n1 in + let z2 := Int64.signed n2 in + + (*if (iregsp_eq b1 b2) && (z2 =? z1) then + Pnop :: (stp_rep t0) + else*) + + if (iregsp_eq b1 b2) && (z2 =? z1 + 8) && (is_valid_immofs_64 z1) then + (PStore (Pstp Pstpx rs1 rs2 (ADimm b1 n1))) :: Pnop :: (stp_rep t1) + else + h0 :: (stp_rep t0) + (*| (PLd_rd_a Pldrw rd1 (ADimm b1 n1)), h1 => ldpw_follow rd1 b1 n1*) + (*| (PLd_rd_a Pldrx rd1 (ADimm b1 n1)), h1 => ldpx_follow rd1 b1 n1*) + | _, _ => h0 :: (stp_rep t0) + end + | nil => h0 :: nil + end + end. + +Definition optimize_body (insns : list basic) := + stp_rep (ldp_rep insns). + (* TODO ADD A FLAG TO ENABLE PEEPHOLE *) + (*if Compopts.optim_coalesce_mem tt*) + (*then ldp_rep insns*) + (*else insns.*) + +Program Definition optimize_bblock (bb : bblock) := + let optimized := optimize_body (body bb) in + let wf_ok := non_empty_bblockb optimized (exit bb) in + {| header := header bb; + body := if wf_ok then optimized else (body bb); + exit := exit bb |}. +Next Obligation. + destruct (non_empty_bblockb (optimize_body (body bb))) eqn:Rwf. + - rewrite Rwf. cbn. trivial. + - exact (correct bb). +Qed. diff --git a/aarch64/PostpassScheduling.v b/aarch64/PostpassScheduling.v index 74c2438e..548976c9 100644 --- a/aarch64/PostpassScheduling.v +++ b/aarch64/PostpassScheduling.v @@ -19,6 +19,7 @@ Require Import Coqlib Errors AST Integers. Require Import Asmblock Axioms Memory Globalenvs. Require Import Asmblockdeps Asmblockprops. Require Import IterList. +Require Peephole. Local Open Scope error_monad_scope. @@ -69,8 +70,9 @@ Definition do_schedule (bb: bblock) : res bblock := Definition verified_schedule (bb : bblock) : res bblock := let nhbb := no_header bb in - do nhbb' <- do_schedule nhbb; - let bb' := stick_header (header bb) nhbb' in + let nhbb' := Peephole.optimize_bblock nhbb in + do nhbb'' <- do_schedule nhbb'; + let bb' := stick_header (header bb) nhbb'' in do sizecheck <- verify_size bb bb'; do schedcheck <- verify_schedule bb bb'; OK (bb'). @@ -78,6 +80,8 @@ Definition verified_schedule (bb : bblock) : res bblock := Lemma verified_schedule_size: forall bb bb', verified_schedule bb = OK bb' -> size bb = size bb'. Proof. +Admitted. +(* TODO intros. unfold verified_schedule in H. monadInv H. unfold do_schedule in EQ. @@ -98,19 +102,20 @@ Proof. Z.of_nat (Datatypes.length (header bb) + Datatypes.S (Datatypes.length l) + 0)) eqn:ESIZE; try discriminate. rewrite Z.eqb_eq in ESIZE. repeat rewrite Nat2Z.inj_add in *. rewrite ESIZE. reflexivity. -Qed. + Qed.*) Theorem verified_schedule_correct: forall ge f bb bb', verified_schedule bb = OK bb' -> bblock_simu lk ge f bb bb'. Proof. - intros. - monadInv H. - eapply bblock_simub_correct. - unfold verify_schedule in EQ0. - destruct (bblock_simub _ _) in *; try discriminate; auto. -Qed. +Admitted. + (* TODO intros.*) + (*monadInv H.*) + (*eapply bblock_simub_correct.*) + (*unfold verify_schedule in EQ0.*) + (*destruct (bblock_simub _ _) in *; try discriminate; auto.*) +(*Qed.*) End verify_schedule. diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index 7f55b29b..e233f77d 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -171,14 +171,27 @@ let load_rec ldi = | PLd_rd_a (ld, rd, a) -> load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a | Pldp (ld, rd1, rd2, a) -> load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_ireg rd1) (reg_of_ireg rd2) a -let store_rec st r a = +let store_rs_a_rec st rs a = { inst = st; write_locs = [ Mem ]; - read_locs = [ r; Mem ] @ get_eval_addressing_rlocs a; + read_locs = [ rs; Mem ] @ get_eval_addressing_rlocs a; is_control = false; } +let store_rs1_rs2_a_rec st rs1 rs2 a = + { + inst = st; + write_locs = [ Mem ]; + read_locs = [ rs1; rs2; Mem ] @ get_eval_addressing_rlocs a; + is_control = false; + } + +let store_rec sti = + match sti with + | PSt_rs_a (st, rs, a) -> store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a + | Pstp (st, rs1, rs2, a) -> store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_ireg rs1) (reg_of_ireg rs2) a + let loadsymbol_rec i rd id = { inst = i; @@ -275,6 +288,14 @@ let freeframe_rec i sz linkofs = is_control = false; } +let nop_rec i = + { + inst = i; + write_locs = [ ]; + read_locs = [ ]; + is_control = false; + } + let arith_rec i = match i with | PArithP (i', rd) -> arith_p_rec (PBasic (PArith i)) i' (reg_of_dreg rd) @@ -304,13 +325,14 @@ let basic_rec i = match i with | PArith i' -> arith_rec i' | PLoad ld -> load_rec ld - | PStore (st, r, a) -> store_rec (PBasic i) (reg_of_dreg r) a + | PStore st -> store_rec st | Pallocframe (sz, linkofs) -> allocframe_rec (PBasic i) sz linkofs | Pfreeframe (sz, linkofs) -> freeframe_rec (PBasic i) sz linkofs | Ploadsymbol (rd, id) -> loadsymbol_rec (PBasic i) (reg_of_ireg rd) id | Pcvtsw2x (rd, r1) -> cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) | Pcvtuw2x (rd, r1) -> cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) | Pcvtx2w rd -> cvtx2w_rec (PBasic i) (reg_of_ireg rd) + | Pnop -> nop_rec (PBasic i) let builtin_rec i ef args res = { diff --git a/aarch64/PostpassSchedulingproof.v b/aarch64/PostpassSchedulingproof.v index 48840602..140b3aa2 100644 --- a/aarch64/PostpassSchedulingproof.v +++ b/aarch64/PostpassSchedulingproof.v @@ -99,27 +99,28 @@ Lemma transf_find_bblock: verified_schedule bb = OK tbb /\ find_bblock (Ptrofs.unsigned ofs) (fn_blocks tf) = Some tbb. Proof. - intros. - monadInv H0. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); try (inv EQ0; fail). inv EQ0. - monadInv EQ. simpl in *. - generalize (Ptrofs.unsigned ofs) H x EQ0; clear ofs H x g EQ0. - induction (fn_blocks f). - - intros. simpl in *. discriminate. - - intros. simpl in *. - monadInv EQ0. simpl. - destruct (zlt z 0); try discriminate. - destruct (zeq z 0). - + inv H. eauto. - + monadInv EQ0. - exploit IHb; eauto. - intros (tbb & SCH & FIND). - eexists; split; eauto. - inv FIND. - unfold verify_size in EQ0. - destruct (size a =? size (stick_header (header a) x)) eqn:EQSIZE; try discriminate. - rewrite Z.eqb_eq in EQSIZE; rewrite EQSIZE. - reflexivity. -Qed. +Admitted. + (*intros.*) + (*monadInv H0. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); try (inv EQ0; fail). inv EQ0.*) + (*monadInv EQ. simpl in *.*) + (*generalize (Ptrofs.unsigned ofs) H x EQ0; clear ofs H x g EQ0.*) + (*induction (fn_blocks f).*) + (*- intros. simpl in *. discriminate.*) + (*- intros. simpl in *.*) + (*monadInv EQ0. simpl.*) + (*destruct (zlt z 0); try discriminate.*) + (*destruct (zeq z 0).*) + (*+ inv H. eauto.*) + (*+ monadInv EQ0.*) + (*exploit IHb; eauto.*) + (*intros (tbb & SCH & FIND).*) + (*eexists; split; eauto.*) + (*inv FIND.*) + (*unfold verify_size in EQ0.*) + (*destruct (size a =? size (stick_header (header a) x)) eqn:EQSIZE; try discriminate.*) + (*rewrite Z.eqb_eq in EQSIZE; rewrite EQSIZE.*) + (*reflexivity.*) +(*Qed.*) Lemma stick_header_neutral: forall a, a = (stick_header (header a) (no_header a)). diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml index 24a64aaf..c336fb1e 100644 --- a/aarch64/TargetPrinter.ml +++ b/aarch64/TargetPrinter.ml @@ -291,7 +291,9 @@ module Target (*: TARGET*) = fprintf oc " strb %a, %a\n" wreg rs addressing a | Pstrh(rs, a) -> fprintf oc " strh %a, %a\n" wreg rs addressing a - | Pstp(rs1, rs2, a) -> + | Pstpw(rs1, rs2, a) -> + fprintf oc " stp %a, %a, %a\n" wreg rs1 wreg rs2 addressing a + | Pstpx(rs1, rs2, a) -> fprintf oc " stp %a, %a, %a\n" xreg rs1 xreg rs2 addressing a (* Integer arithmetic, immediate *) | Paddimm(sz, rd, r1, n) -> @@ -485,8 +487,8 @@ module Target (*: TARGET*) = | Pfsel(rd, r1, r2, c) -> fprintf oc " fcsel %a, %a, %a, %s\n" dreg rd dreg r1 dreg r2 (condition_name c) (* No-op *) - | Pnop -> - fprintf oc " nop\n" + | Pnop -> () + (*fprintf oc " nop\n"*) (* Pseudo-instructions expanded in Asmexpand *) | Pallocframe(sz, linkofs) -> assert false | Pfreeframe(sz, linkofs) -> assert false |