From a289d73e791be5a760c8a9b2f3de2064f001a770 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 3 May 2019 11:17:57 +0200 Subject: use sq to save pairs of registers --- mppa_k1c/Asmblockdeps.v | 19 ++++++++++++++++--- mppa_k1c/Asmvliw.v | 5 ++--- mppa_k1c/Peephole.v | 2 +- mppa_k1c/PostpassSchedulingOracle.ml | 10 +++++++--- mppa_k1c/TargetPrinter.ml | 2 +- 5 files changed, 27 insertions(+), 11 deletions(-) (limited to 'mppa_k1c') diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v index b2fa79d1..52af8cdf 100644 --- a/mppa_k1c/Asmblockdeps.v +++ b/mppa_k1c/Asmblockdeps.v @@ -387,11 +387,23 @@ Qed. Hint Resolve load_op_eq_correct: wlp. Opaque load_op_eq_correct. +Definition offset_eq (ofs1 ofs2 : offset): ?? bool := + RET (Ptrofs.eq ofs1 ofs2). + +Lemma offset_eq_correct ofs1 ofs2: + WHEN offset_eq ofs1 ofs2 ~> b THEN b = true -> ofs1 = ofs2. +Proof. + wlp_simplify. + pose (Ptrofs.eq_spec ofs1 ofs2). + rewrite H in *. + trivial. +Qed. +Hint Resolve offset_eq_correct: wlp. Definition store_op_eq (o1 o2: store_op): ?? bool := match o1 with | OStoreRRO n1 ofs1 => - match o2 with OStoreRRO n2 ofs2 => iandb (phys_eq n1 n2) (phys_eq ofs1 ofs2) | _ => RET false end + match o2 with OStoreRRO n2 ofs2 => iandb (phys_eq n1 n2) (offset_eq ofs1 ofs2) | _ => RET false end | OStoreRRR n1 => match o2 with OStoreRRR n2 => phys_eq n1 n2 | _ => RET false end | OStoreRRRXS n1 => @@ -402,7 +414,8 @@ Lemma store_op_eq_correct o1 o2: WHEN store_op_eq o1 o2 ~> b THEN b = true -> o1 = o2. Proof. destruct o1, o2; wlp_simplify; try discriminate. - - congruence. + - f_equal. pose (Ptrofs.eq_spec ofs ofs0). + rewrite H in *. trivial. - congruence. - congruence. Qed. @@ -642,7 +655,7 @@ Definition trans_basic (b: basic) : inst := | PStoreQRRO qs a ofs => let (s0, s1) := gpreg_q_expand qs in [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil)); - (pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))] + (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))] | Pallocframe sz pos => [(#FP, PReg (#SP)); (#SP, Op (Allocframe2 sz pos) (PReg (#SP) @ PReg pmem @ Enil)); (#RTMP, Op (Constant Vundef) Enil); (pmem, Op (Allocframe sz pos) (Old (PReg (#SP)) @ PReg pmem @ Enil))] | Pfreeframe sz pos => [(pmem, Op (Freeframe sz pos) (PReg (#SP) @ PReg pmem @ Enil)); diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v index 248b8660..6ebc8340 100644 --- a/mppa_k1c/Asmvliw.v +++ b/mppa_k1c/Asmvliw.v @@ -1189,11 +1189,10 @@ Definition parexec_store_q_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_q) ( let (s0, s1) := gpreg_q_expand s in match eval_offset ofs with | OK eofs => - let base := Val.offset_ptr (rsr a) eofs in - match Mem.storev Many64 mr base (rsr s0) with + match Mem.storev Many64 mr (Val.offset_ptr (rsr a) eofs) (rsr s0) with | None => Stuck | Some m1 => - match Mem.storev Many64 m1 base (rsr s1) with + match Mem.storev Many64 m1 (Val.offset_ptr (rsr a) (Ptrofs.add eofs (Ptrofs.repr 8))) (rsr s1) with | None => Stuck | Some m2 => Next rsw m2 end diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v index 91936ac6..3e0d9ae9 100644 --- a/mppa_k1c/Peephole.v +++ b/mppa_k1c/Peephole.v @@ -42,7 +42,7 @@ Fixpoint optimize_body (insns : list basic) : list basic := let zofs1 := Ptrofs.signed ofs1 in if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1) then let h0' := print_found_store basic zofs0 h0 in - h0' :: (optimize_body t0) + (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (optimize_body t1) else h0 :: (optimize_body t0) | None => h0 :: (optimize_body t0) end diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml index c944774a..f88acb44 100644 --- a/mppa_k1c/PostpassSchedulingOracle.ml +++ b/mppa_k1c/PostpassSchedulingOracle.ml @@ -446,7 +446,7 @@ type real_instruction = | Make | Nop | Extfz | Extfs | Insf (* LSU *) | Lbs | Lbz | Lhs | Lhz | Lws | Ld - | Sb | Sh | Sw | Sd + | Sb | Sh | Sw | Sd | Sq (* BCU *) | Icall | Call | Cb | Igoto | Goto | Ret | Get | Set (* FPU *) @@ -523,6 +523,7 @@ let ab_inst_to_real = function | "Psh" -> Sh | "Psw" | "Psw_a" | "Pfss" -> Sw | "Psd" | "Psd_a" | "Pfsd" -> Sd + | "Psq" -> Sq | "Pcb" | "Pcbu" -> Cb | "Pcall" | "Pdiv" | "Pdivu" -> Call @@ -543,6 +544,9 @@ let ab_inst_to_real = function | "Pfsbfw" -> Fsbfw | "Pfmuld" -> Fmuld | "Pfmulw" -> Fmulw + + | "nop" -> Nop + | s -> failwith @@ sprintf "ab_inst_to_real: unrecognized instruction: %s" s exception InvalidEncoding @@ -595,7 +599,7 @@ let rec_to_usage r = (match encoding with None | Some U6 | Some S10 -> lsu_data | Some U27L5 | Some U27L10 -> lsu_data_x | Some E27U27L10 -> lsu_data_y) - | Sb | Sh | Sw | Sd -> + | Sb | Sh | Sw | Sd | Sq -> (match encoding with None | Some U6 | Some S10 -> lsu_acc | Some U27L5 | Some U27L10 -> lsu_acc_x | Some E27U27L10 -> lsu_acc_y) @@ -617,7 +621,7 @@ let real_inst_to_latency = function | Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4 | Mulw | Muld | Maddw | Maddd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *) | Lbs | Lbz | Lhs | Lhz | Lws | Ld -> 3 - | Sb | Sh | Sw | Sd -> 1 (* See k1c-Optimization.pdf page 19 *) + | Sb | Sh | Sw | Sd | Sq -> 1 (* See k1c-Optimization.pdf page 19 *) | Get -> 1 | Set -> 4 (* According to the manual should be 3, but I measured 4 *) | Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *) diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml index 7eea7d15..9d42169a 100644 --- a/mppa_k1c/TargetPrinter.ml +++ b/mppa_k1c/TargetPrinter.ml @@ -287,7 +287,7 @@ module Target (*: TARGET*) = | _ -> assert false end - | Pnop -> fprintf oc " nop\n" + | Pnop -> (* FIXME fprintf oc " nop\n" *) () | Psemi -> fprintf oc ";;\n" | Pclzll (rd, rs) -> fprintf oc " clzd %a = %a\n" ireg rd ireg rs -- cgit