From 919d1dfbf22967ae9686d5b982b51cfa707b5edd Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 3 May 2019 16:32:35 +0200 Subject: begin add Plq --- mppa_k1c/Asm.v | 5 ++- mppa_k1c/Asmblock.v | 2 ++ mppa_k1c/Asmblockdeps.v | 60 ++++++++++++++++++++++++++++++++++-- mppa_k1c/Asmvliw.v | 31 +++++++++++++------ mppa_k1c/PostpassSchedulingOracle.ml | 3 ++ mppa_k1c/PostpassSchedulingproof.v | 19 ++++++++++++ mppa_k1c/TargetPrinter.ml | 2 ++ mppa_k1c/lib/Asmblockgenproof0.v | 7 +++++ 8 files changed, 117 insertions(+), 12 deletions(-) diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v index 8b1c9a81..70d39168 100644 --- a/mppa_k1c/Asm.v +++ b/mppa_k1c/Asm.v @@ -112,7 +112,8 @@ Inductive instruction : Type := | Pld (rd: ireg) (ra: ireg) (ofs: addressing) (**r load int64 *) | Pld_a (rd: ireg) (ra: ireg) (ofs: addressing) (**r load any64 *) | Pfls (rd: freg) (ra: ireg) (ofs: addressing) (**r load float *) - | Pfld (rd: freg) (ra: ireg) (ofs: addressing) (**r load 64-bit float *) + | Pfld (rd: freg) (ra: ireg) (ofs: addressing) (**r load 64-bit float *) + | Plq (rs: gpreg_q) (ra: ireg) (ofs: addressing) (**r load 64-bit float *) (** Stores **) | Psb (rs: ireg) (ra: ireg) (ofs: addressing) (**r store byte *) @@ -436,6 +437,8 @@ Definition basic_to_instruction (b: basic) := | PLoadRRO Asmvliw.Pfls rd ra ofs => Pfls rd ra (AOff ofs) | PLoadRRO Asmvliw.Pfld rd ra ofs => Pfld rd ra (AOff ofs) + | PLoadQRRO qrs ra ofs => Plq qrs ra (AOff ofs) + | PLoadRRR Asmvliw.Plb rd ra ro => Plb rd ra (AReg ro) | PLoadRRR Asmvliw.Plbu rd ra ro => Plbu rd ra (AReg ro) | PLoadRRR Asmvliw.Plh rd ra ro => Plh rd ra (AReg ro) diff --git a/mppa_k1c/Asmblock.v b/mppa_k1c/Asmblock.v index f2c4a382..d9fbbdaa 100644 --- a/mppa_k1c/Asmblock.v +++ b/mppa_k1c/Asmblock.v @@ -270,6 +270,8 @@ Definition exec_load_reg (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ir Definition exec_load_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: ireg) := parexec_load_regxs chunk rs rs m m d a ro. +Definition exec_load_q_offset (rs: regset) (m: mem) (d : gpreg_q) (a: ireg) (ofs: offset) := parexec_load_q_offset rs rs m m d a ofs. + Definition exec_store_offset (chunk: memory_chunk) (rs: regset) (m: mem) (s a: ireg) (ofs: offset) := parexec_store_offset chunk rs rs m m s a ofs. Definition exec_store_q_offset (rs: regset) (m: mem) (s : gpreg_q) (a: ireg) (ofs: offset) := parexec_store_q_offset rs rs m m s a ofs. diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v index 52af8cdf..ee4a9b51 100644 --- a/mppa_k1c/Asmblockdeps.v +++ b/mppa_k1c/Asmblockdeps.v @@ -650,6 +650,14 @@ Definition trans_basic (b: basic) : inst := | PLoadRRR n d a ro => [(#d, Op (Load (OLoadRRR n)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] | PLoadRRRXS n d a ro => [(#d, Op (Load (OLoadRRRXS n)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] | PStoreRRO n s a ofs => [(pmem, Op (Store (OStoreRRO n ofs)) (PReg (#s) @ PReg (#a) @ PReg pmem @ Enil))] + | PLoadQRRO qd a ofs => + let (d0, d1) := gpreg_q_expand qd in + if ireg_eq d0 a + then [(#d0, Op Fail Enil); + (#d1, Op Fail Enil)] + else + [(#d0, Op (Load (OLoadRRO Pld_a ofs)) (PReg (#a) @ PReg pmem @ Enil)); + (#d1, Op (Load (OLoadRRO Pld_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#a) @ PReg pmem @ Enil))] | PStoreRRR n s a ro => [(pmem, Op (Store (OStoreRRR n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] | PStoreRRRXS n s a ro => [(pmem, Op (Store (OStoreRRRXS n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] | PStoreQRRO qs a ofs => @@ -842,7 +850,7 @@ Proof. * intros rr; destruct rr; Simpl. destruct (ireg_eq g rd); subst; Simpl. Qed. - + Theorem forward_simu_par_wio_basic ge fn rsr rsw mr mw sr sw bi: Ge = Genv ge fn -> match_states (State rsr mr) sr -> @@ -877,7 +885,55 @@ Proof. destruct (Mem.loadv _ _ _) eqn:MEML; simpl; auto; eexists; split; try split; Simpl; intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl. - + + unfold parexec_load_q_offset. + destruct (gpreg_q_expand rd) as [rd0 rd1]. + destruct (ireg_eq rd0 ra) as [ OVERLAP0 | NOOVERLAP0 ]. + simpl. trivial. + unfold exec_load_deps_offset. + destruct Ge. + unfold eval_offset. + repeat rewrite H0. + destruct (Mem.loadv Many64 mr (Val.offset_ptr (rsr ra) ofs)) as [load0 | ] eqn:MEML0; simpl. + ++ destruct (Mem.loadv Many64 mr + (Val.offset_ptr (rsr ra) (Ptrofs.add ofs (Ptrofs.repr 8)))) as [load1| ] eqn:MEML1. + +++ rewrite H0. + rewrite H. + rewrite MEML0. + rewrite (assign_diff _ _ (# ra)) by (apply ppos_discr; congruence). + rewrite H0. + rewrite (assign_diff _ _ pmem) by (apply not_eq_sym; apply ppos_pmem_discr). + rewrite H. + rewrite MEML1. + eexists; split. + * f_equal. + * constructor. + ** repeat rewrite (assign_diff _ _ pmem) by (apply not_eq_sym; apply ppos_pmem_discr). + assumption. + ** intro. + destruct (preg_eq r rd1). + *** subst r. + rewrite (assign_eq _ (# rd1)). + rewrite Pregmap.gss. + reflexivity. + *** rewrite (assign_diff _ (#rd1) (#r) _) by (apply ppos_discr; apply not_eq_sym; assumption). + rewrite Pregmap.gso by assumption. + destruct (preg_eq r rd0). + **** subst r. + rewrite (assign_eq _ (# rd0)). + rewrite Pregmap.gss. + reflexivity. + **** rewrite (assign_diff _ (#rd0) (#r) _) by (apply ppos_discr; apply not_eq_sym; assumption). + rewrite Pregmap.gso by assumption. + trivial. + +++ rewrite H0. rewrite H. rewrite MEML0. + rewrite (assign_diff _ _ (# ra)) by (apply ppos_discr; congruence). + rewrite H0. + rewrite (assign_diff _ _ pmem) by (apply not_eq_sym; apply ppos_pmem_discr). + rewrite H. + rewrite MEML1. + constructor. + ++ rewrite H0. rewrite H. rewrite MEML0. trivial. + (* Store *) - destruct i. (* Store Offset *) diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v index 6ebc8340..30263b4d 100644 --- a/mppa_k1c/Asmvliw.v +++ b/mppa_k1c/Asmvliw.v @@ -318,6 +318,7 @@ Inductive ld_instruction : Type := | PLoadRRO (i: load_name) (rd: ireg) (ra: ireg) (ofs: offset) | PLoadRRR (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg) | PLoadRRRXS (i: load_name) (rd: ireg) (ra: ireg) (rofs: ireg) + | PLoadQRRO (rd: gpreg_q) (ra: ireg) (ofs: offset) . (** Stores **) @@ -1152,6 +1153,20 @@ Definition parexec_load_offset (chunk: memory_chunk) (rsr rsw: regset) (mr mw: m | _ => Stuck end. +Definition parexec_load_q_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_q) (a: ireg) (ofs: offset) := + let (rd0, rd1) := gpreg_q_expand d in + if ireg_eq rd0 a + then Stuck + else + match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) ofs) with + | None => Stuck + | Some v0 => + match Mem.loadv Many64 mr (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) with + | None => Stuck + | Some v1 => Next (rsw#rd0 <- v0 #rd1 <- v1) mw + end + end. + Definition parexec_load_reg (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) := match Mem.loadv chunk mr (Val.addl (rsr a) (rsr ro)) with | None => Stuck @@ -1187,17 +1202,13 @@ Definition parexec_store_regxs (chunk: memory_chunk) (rsr rsw: regset) (mr mw: m Definition parexec_store_q_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_q) (a: ireg) (ofs: offset) := let (s0, s1) := gpreg_q_expand s in - match eval_offset ofs with - | OK eofs => - match Mem.storev Many64 mr (Val.offset_ptr (rsr a) eofs) (rsr s0) with + match Mem.storev Many64 mr (Val.offset_ptr (rsr a) ofs) (rsr s0) with + | None => Stuck + | Some m1 => + match Mem.storev Many64 m1 (Val.offset_ptr (rsr a) (Ptrofs.add ofs (Ptrofs.repr 8))) (rsr s1) with | None => Stuck - | Some m1 => - match Mem.storev Many64 m1 (Val.offset_ptr (rsr a) (Ptrofs.add eofs (Ptrofs.repr 8))) (rsr s1) with - | None => Stuck - | Some m2 => Next rsw m2 - end + | Some m2 => Next rsw m2 end - | _ => Stuck end. @@ -1236,6 +1247,8 @@ Definition parexec_basic_instr (bi: basic) (rsr rsw: regset) (mr mw: mem) := | PLoadRRO n d a ofs => parexec_load_offset (load_chunk n) rsr rsw mr mw d a ofs | PLoadRRR n d a ro => parexec_load_reg (load_chunk n) rsr rsw mr mw d a ro | PLoadRRRXS n d a ro => parexec_load_regxs (load_chunk n) rsr rsw mr mw d a ro + | PLoadQRRO d a ofs => + parexec_load_q_offset rsr rsw mr mw d a ofs | PStoreRRO n s a ofs => parexec_store_offset (store_chunk n) rsr rsw mr mw s a ofs | PStoreRRR n s a ro => parexec_store_reg (store_chunk n) rsr rsw mr mw s a ro diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml index a4dc3614..e3d43237 100644 --- a/mppa_k1c/PostpassSchedulingOracle.ml +++ b/mppa_k1c/PostpassSchedulingOracle.ml @@ -218,6 +218,9 @@ let arith_rec i = let load_rec i = match i with | PLoadRRO (i, rs1, rs2, imm) -> { inst = load_str i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false} + | PLoadQRRO(rs, ra, imm) -> + let (rs0, rs1) = gpreg_q_expand rs in + { inst = "Plq"; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false} | PLoadRRR (i, rs1, rs2, rs3) | PLoadRRRXS (i, rs1, rs2, rs3) -> { inst = load_str i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false} diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v index da64c41d..0ceff6e5 100644 --- a/mppa_k1c/PostpassSchedulingproof.v +++ b/mppa_k1c/PostpassSchedulingproof.v @@ -128,6 +128,24 @@ Proof. - discriminate. Qed. +Lemma exec_load_offset_q_pc_var: + forall rs m rd ra ofs rs' m' v, + exec_load_q_offset rs m rd ra ofs = Next rs' m' -> + exec_load_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_load_q_offset in *. unfold parexec_load_q_offset in *. + destruct (gpreg_q_expand rd) as [rd0 rd1]. + destruct (ireg_eq rd0 ra); try discriminate. + rewrite Pregmap.gso; try discriminate. + destruct (Mem.loadv _ _ _); try discriminate. + inv H. + destruct (Mem.loadv _ _ _); try discriminate. + inv H1. f_equal. + rewrite (regset_double_set PC rd0) by discriminate. + rewrite (regset_double_set PC rd1) by discriminate. + reflexivity. +Qed. + Lemma exec_store_offset_pc_var: forall t rs m rd ra ofs rs' m' v, exec_store_offset t rs m rd ra ofs = Next rs' m' -> @@ -191,6 +209,7 @@ Proof. + exploreInst; apply exec_load_offset_pc_var; auto. + exploreInst; apply exec_load_reg_pc_var; auto. + exploreInst; apply exec_load_regxs_pc_var; auto. + + apply exec_load_offset_q_pc_var; auto. - destruct i. + exploreInst; apply exec_store_offset_pc_var; auto. + exploreInst; apply exec_store_reg_pc_var; auto. diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml index 9d42169a..326b802d 100644 --- a/mppa_k1c/TargetPrinter.ml +++ b/mppa_k1c/TargetPrinter.ml @@ -392,6 +392,8 @@ module Target (*: TARGET*) = fprintf oc " lws%a %a = %a[%a]\n" xscale adr ireg rd addressing adr ireg ra | Pld(rd, ra, adr) | Pfld(rd, ra, adr) | Pld_a(rd, ra, adr) -> assert Archi.ptr64; fprintf oc " ld%a %a = %a[%a]\n" xscale adr ireg rd addressing adr ireg ra + | Plq(rd, ra, adr) -> + fprintf oc " lq%a %a = %a[%a]\n" xscale adr gpreg_q rd addressing adr ireg ra | Psb(rd, ra, adr) -> fprintf oc " sb%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd diff --git a/mppa_k1c/lib/Asmblockgenproof0.v b/mppa_k1c/lib/Asmblockgenproof0.v index 3bca6629..9303b780 100644 --- a/mppa_k1c/lib/Asmblockgenproof0.v +++ b/mppa_k1c/lib/Asmblockgenproof0.v @@ -950,6 +950,13 @@ Proof. 1-10: try (unfold parexec_store_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.storev _ _ _); [inv H1; auto | discriminate]). 1-10: try (unfold parexec_store_reg in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto. 1-10: try (unfold parexec_store_regxs in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto. + - (* PLoadQRRO *) + unfold parexec_load_q_offset in H1. + destruct (gpreg_q_expand _) as [r0 r1] in H1. + destruct (ireg_eq _ _) in H1; try discriminate. + destruct (Mem.loadv _ _ _) in H1; try discriminate. + destruct (Mem.loadv _ _ _) in H1; try discriminate. + inv H1. Simpl. - (* PStoreQRRO *) unfold parexec_store_q_offset in H1. destruct (gpreg_q_expand _) as [r0 r1] in H1. -- cgit From 3916ecbc8c77c99bab0bd69de184ae432fd9d3f4 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 3 May 2019 16:48:07 +0200 Subject: Lq finished ? --- mppa_k1c/Asmblockdeps.v | 31 ++++++++++++++++--------------- mppa_k1c/Peephole.v | 11 +++++++++++ mppa_k1c/PostpassSchedulingOracle.ml | 7 ++++--- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v index ee4a9b51..225c3e98 100644 --- a/mppa_k1c/Asmblockdeps.v +++ b/mppa_k1c/Asmblockdeps.v @@ -366,10 +366,23 @@ Qed. Hint Resolve arith_op_eq_correct: wlp. Opaque arith_op_eq_correct. +Definition offset_eq (ofs1 ofs2 : offset): ?? bool := + RET (Ptrofs.eq ofs1 ofs2). + +Lemma offset_eq_correct ofs1 ofs2: + WHEN offset_eq ofs1 ofs2 ~> b THEN b = true -> ofs1 = ofs2. +Proof. + wlp_simplify. + pose (Ptrofs.eq_spec ofs1 ofs2). + rewrite H in *. + trivial. +Qed. +Hint Resolve offset_eq_correct: wlp. + Definition load_op_eq (o1 o2: load_op): ?? bool := match o1 with | OLoadRRO n1 ofs1 => - match o2 with OLoadRRO n2 ofs2 => iandb (phys_eq n1 n2) (phys_eq ofs1 ofs2) | _ => RET false end + match o2 with OLoadRRO n2 ofs2 => iandb (phys_eq n1 n2) (offset_eq ofs1 ofs2) | _ => RET false end | OLoadRRR n1 => match o2 with OLoadRRR n2 => phys_eq n1 n2 | _ => RET false end | OLoadRRRXS n1 => @@ -380,26 +393,14 @@ Lemma load_op_eq_correct o1 o2: WHEN load_op_eq o1 o2 ~> b THEN b = true -> o1 = o2. Proof. destruct o1, o2; wlp_simplify; try discriminate. - - congruence. + - f_equal. pose (Ptrofs.eq_spec ofs ofs0). + rewrite H in *. trivial. - congruence. - congruence. Qed. Hint Resolve load_op_eq_correct: wlp. Opaque load_op_eq_correct. -Definition offset_eq (ofs1 ofs2 : offset): ?? bool := - RET (Ptrofs.eq ofs1 ofs2). - -Lemma offset_eq_correct ofs1 ofs2: - WHEN offset_eq ofs1 ofs2 ~> b THEN b = true -> ofs1 = ofs2. -Proof. - wlp_simplify. - pose (Ptrofs.eq_spec ofs1 ofs2). - rewrite H in *. - trivial. -Qed. -Hint Resolve offset_eq_correct: wlp. - Definition store_op_eq (o1 o2: store_op): ?? bool := match o1 with | OStoreRRO n1 ofs1 => diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v index 6e06e7ea..3603c1d8 100644 --- a/mppa_k1c/Peephole.v +++ b/mppa_k1c/Peephole.v @@ -46,6 +46,17 @@ Fixpoint coalesce_mem (insns : list basic) : list basic := else h0 :: (coalesce_mem t0) | None => h0 :: (coalesce_mem t0) end + | (PLoadRRO Pld_a rd0 ra0 ofs0), + (PLoadRRO Pld_a rd1 ra1 ofs1) => + match gpreg_q_search rd0 rd1 with + | Some rd0rd1 => + let zofs0 := Ptrofs.signed ofs0 in + let zofs1 := Ptrofs.signed ofs1 in + if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1) && negb (ireg_eq ra0 rd0) + then (PLoad (PLoadQRRO rd0rd1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + else h0 :: (coalesce_mem t0) + | None => h0 :: (coalesce_mem t0) + end | _, _ => h0 :: (coalesce_mem t0) end | nil => h0 :: nil diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml index e3d43237..d3f16366 100644 --- a/mppa_k1c/PostpassSchedulingOracle.ml +++ b/mppa_k1c/PostpassSchedulingOracle.ml @@ -448,7 +448,7 @@ type real_instruction = | Maddw | Maddd | Cmoved | Make | Nop | Extfz | Extfs | Insf (* LSU *) - | Lbs | Lbz | Lhs | Lhz | Lws | Ld + | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Sb | Sh | Sw | Sd | Sq (* BCU *) | Icall | Call | Cb | Igoto | Goto | Ret | Get | Set @@ -521,6 +521,7 @@ let ab_inst_to_real = function | "Plhu" -> Lhz | "Plw" | "Plw_a" | "Pfls" -> Lws | "Pld" | "Pfld" | "Pld_a" -> Ld + | "Plq" -> Lq | "Psb" -> Sb | "Psh" -> Sh @@ -598,7 +599,7 @@ let rec_to_usage r = | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding) | Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding) | Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau - | Lbs | Lbz | Lhs | Lhz | Lws | Ld -> + | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq -> (match encoding with None | Some U6 | Some S10 -> lsu_data | Some U27L5 | Some U27L10 -> lsu_data_x | Some E27U27L10 -> lsu_data_y) @@ -623,7 +624,7 @@ let real_inst_to_latency = function -> 1 | Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4 | Mulw | Muld | Maddw | Maddd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *) - | Lbs | Lbz | Lhs | Lhz | Lws | Ld -> 3 + | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq -> 3 | Sb | Sh | Sw | Sd | Sq -> 1 (* See k1c-Optimization.pdf page 19 *) | Get -> 1 | Set -> 4 (* According to the manual should be 3, but I measured 4 *) -- cgit