From df9f42773b48270e77d1760719b1d8399062c2ea Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 2 May 2019 20:23:56 +0200 Subject: seems to work --- mppa_k1c/Peephole.v | 15 +++++++++++++++ mppa_k1c/PostpassScheduling.v | 6 ++++-- 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 mppa_k1c/Peephole.v diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v new file mode 100644 index 00000000..56e547e5 --- /dev/null +++ b/mppa_k1c/Peephole.v @@ -0,0 +1,15 @@ +Require Import Asmvliw. + +Definition optimize_body (insns : list basic) := insns. + +Program Definition optimize_bblock (bb : bblock) := + let optimized := optimize_body (body bb) in + let wf_ok := wf_bblockb optimized (exit bb) in + {| header := header bb; + body := if wf_ok then optimized else (body bb); + exit := exit bb |}. +Next Obligation. + destruct (wf_bblockb (optimize_body (body bb))) eqn:Rwf. + - rewrite Rwf. simpl. trivial. + - exact (correct bb). +Qed. diff --git a/mppa_k1c/PostpassScheduling.v b/mppa_k1c/PostpassScheduling.v index ab4bc9c9..ecd40f5c 100644 --- a/mppa_k1c/PostpassScheduling.v +++ b/mppa_k1c/PostpassScheduling.v @@ -13,6 +13,7 @@ Require Import Coqlib Errors AST Integers. Require Import Asmblock Axioms Memory Globalenvs. Require Import Asmblockdeps Asmblockgenproof0. +Require Peephole. Local Open Scope error_monad_scope. @@ -347,8 +348,9 @@ Fixpoint verify_par (lbb: list bblock) := end. Definition verified_schedule_nob (bb : bblock) : res (list bblock) := - let bb' := no_header bb in - let lbb := do_schedule bb' in + let bb' := no_header bb in + let bb'' := Peephole.optimize_bblock bb' in + let lbb := do_schedule bb'' in do tbb <- concat_all lbb; do sizecheck <- verify_size bb lbb; do schedcheck <- verify_schedule bb' tbb; -- cgit From ec84544fb9f842caf98ca61669a8fb3024504ae2 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 2 May 2019 21:14:48 +0200 Subject: found peephole --- mppa_k1c/Peephole.v | 22 +++++++++++++++++++++- mppa_k1c/extractionMachdep.v | 3 +++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v index 56e547e5..b7931aca 100644 --- a/mppa_k1c/Peephole.v +++ b/mppa_k1c/Peephole.v @@ -1,6 +1,26 @@ +Require Import Coqlib. Require Import Asmvliw. +Require Import Values. +Require Import Integers. -Definition optimize_body (insns : list basic) := insns. +Parameter print_found_store : forall A : Type, Z -> A -> A. + +Fixpoint optimize_body (insns : list basic) : list basic := + match insns with + | nil => nil + | h0 :: t0 => + match t0 with + | h1 :: t1 => + match h0, h1 with + | (PStoreRRO Psd_a rs0 ra0 (Ofsimm ofs0)), + (PStoreRRO Psd_a rs1 ra1 (Ofsimm ofs1)) => + let h0' := print_found_store basic (Ptrofs.signed ofs0) h0 in + h0' :: (optimize_body t0) + | _, _ => h0 :: (optimize_body t0) + end + | nil => h0 :: nil + end + end. Program Definition optimize_bblock (bb : bblock) := let optimized := optimize_body (body bb) in diff --git a/mppa_k1c/extractionMachdep.v b/mppa_k1c/extractionMachdep.v index e70f51de..fdecd2a3 100644 --- a/mppa_k1c/extractionMachdep.v +++ b/mppa_k1c/extractionMachdep.v @@ -22,6 +22,9 @@ Require Archi Asm. Extract Constant Archi.ptr64 => " Configuration.model = ""64"" ". Extract Constant Archi.pic_code => "fun () -> false". (* for the time being *) +Extract Constant Peephole.print_found_store => +"fun offset x -> Printf.printf ""found offset = %ld\n"" (Camlcoq.camlint_of_coqint offset); x". + (* Asm *) (* Extract Constant Asm.low_half => "fun _ _ _ -> assert false". -- cgit From e967c0669d17f9a04a301d2f600c4b55a0618fc7 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 2 May 2019 21:15:10 +0200 Subject: example of spill peephole --- test/monniaux/regalloc/bigspill.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 test/monniaux/regalloc/bigspill.c diff --git a/test/monniaux/regalloc/bigspill.c b/test/monniaux/regalloc/bigspill.c new file mode 100644 index 00000000..6191e018 --- /dev/null +++ b/test/monniaux/regalloc/bigspill.c @@ -0,0 +1,21 @@ +extern void callee(void); + +void bigspill(int *t) { + int t0 = t[0]; + int t1 = t[1]; + int t2 = t[2]; + int t3 = t[3]; + int t4 = t[4]; + int t5 = t[5]; + int t6 = t[6]; + int t7 = t[7]; + callee(); + t[0] = t0; + t[1] = t1; + t[2] = t2; + t[3] = t3; + t[4] = t4; + t[5] = t5; + t[6] = t6; + t[7] = t7; +} -- cgit From 56ab9fea6dc937dac56e883a64f06cae3c931551 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 2 May 2019 21:20:14 +0200 Subject: detect consecutive ones --- mppa_k1c/Peephole.v | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v index b7931aca..47e04aec 100644 --- a/mppa_k1c/Peephole.v +++ b/mppa_k1c/Peephole.v @@ -14,8 +14,12 @@ Fixpoint optimize_body (insns : list basic) : list basic := match h0, h1 with | (PStoreRRO Psd_a rs0 ra0 (Ofsimm ofs0)), (PStoreRRO Psd_a rs1 ra1 (Ofsimm ofs1)) => - let h0' := print_found_store basic (Ptrofs.signed ofs0) h0 in - h0' :: (optimize_body t0) + let zofs0 := Ptrofs.signed ofs0 in + let zofs1 := Ptrofs.signed ofs1 in + if zofs1 =? zofs0 + 8 + then let h0' := print_found_store basic zofs0 h0 in + h0' :: (optimize_body t0) + else h0 :: (optimize_body t0) | _, _ => h0 :: (optimize_body t0) end | nil => h0 :: nil -- cgit From 5d09dd8f3194ecc90137178d6b5d18b9ad31aabf Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 2 May 2019 21:58:47 +0200 Subject: find consecutive writes --- mppa_k1c/Asmvliw.v | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ mppa_k1c/Peephole.v | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v index e460727c..0cbe27e1 100644 --- a/mppa_k1c/Asmvliw.v +++ b/mppa_k1c/Asmvliw.v @@ -66,12 +66,63 @@ Inductive gpreg: Type := Definition ireg := gpreg. Definition freg := gpreg. +Lemma gpreg_eq: forall (x y: gpreg), {x=y} + {x<>y}. +Proof. decide equality. Defined. + Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}. Proof. decide equality. Defined. Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}. Proof. decide equality. Defined. +Inductive gpreg_q : Type := +| R0R1 | R2R3 | R4R5 | R6R7 | R8R9 +| R10R11 | R12R13 | R14R15 | R16R17 | R18R19 +| R20R21 | R22R23 | R24R25 | R26R27 | R28R29 +| R30R31 | R32R33 | R34R35 | R36R37 | R38R39 +| R40R41 | R42R43 | R44R45 | R46R47 | R48R49 +| R50R51 | R52R53 | R54R55 | R56R57 | R58R59 +| R60R61 | R62R63. + +Lemma gpreg_q_eq : forall (x y : gpreg_q), {x=y} + {x<>y}. +Proof. decide equality. Defined. + +Definition gpreg_q_expand (x : gpreg_q) : gpreg * gpreg := + match x with + | R0R1 => (GPR0, GPR1) + | R2R3 => (GPR2, GPR3) + | R4R5 => (GPR4, GPR5) + | R6R7 => (GPR6, GPR7) + | R8R9 => (GPR8, GPR9) + | R10R11 => (GPR10, GPR11) + | R12R13 => (GPR12, GPR13) + | R14R15 => (GPR14, GPR15) + | R16R17 => (GPR16, GPR17) + | R18R19 => (GPR18, GPR19) + | R20R21 => (GPR20, GPR21) + | R22R23 => (GPR22, GPR23) + | R24R25 => (GPR24, GPR25) + | R26R27 => (GPR26, GPR27) + | R28R29 => (GPR28, GPR29) + | R30R31 => (GPR30, GPR31) + | R32R33 => (GPR32, GPR33) + | R34R35 => (GPR34, GPR35) + | R36R37 => (GPR36, GPR37) + | R38R39 => (GPR38, GPR39) + | R40R41 => (GPR40, GPR41) + | R42R43 => (GPR42, GPR43) + | R44R45 => (GPR44, GPR45) + | R46R47 => (GPR46, GPR47) + | R48R49 => (GPR48, GPR49) + | R50R51 => (GPR50, GPR51) + | R52R53 => (GPR52, GPR53) + | R54R55 => (GPR54, GPR55) + | R56R57 => (GPR56, GPR57) + | R58R59 => (GPR58, GPR59) + | R60R61 => (GPR60, GPR61) + | R62R63 => (GPR62, GPR63) + end. + (** We model the following registers of the RISC-V architecture. *) (** basic register *) diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v index 47e04aec..2c73bb63 100644 --- a/mppa_k1c/Peephole.v +++ b/mppa_k1c/Peephole.v @@ -3,6 +3,28 @@ Require Import Asmvliw. Require Import Values. Require Import Integers. +Definition gpreg_q_list : list gpreg_q := +R0R1 :: R2R3 :: R4R5 :: R6R7 :: R8R9 +:: R10R11 :: R12R13 :: R14R15 :: R16R17 :: R18R19 +:: R20R21 :: R22R23 :: R24R25 :: R26R27 :: R28R29 +:: R30R31 :: R32R33 :: R34R35 :: R36R37 :: R38R39 +:: R40R41 :: R42R43 :: R44R45 :: R46R47 :: R48R49 +:: R50R51 :: R52R53 :: R54R55 :: R56R57 :: R58R59 +:: R60R61 :: R62R63 :: nil. + +Fixpoint gpreg_q_search_rec r0 r1 l := + match l with + | h :: t => + let (s0, s1) := gpreg_q_expand h in + if (gpreg_eq r0 s0) && (gpreg_eq r1 s1) + then Some h + else gpreg_q_search_rec r0 r1 t + | nil => None + end. + +Definition gpreg_q_search (r0 : gpreg) (r1 : gpreg) : option gpreg_q := + gpreg_q_search_rec r0 r1 gpreg_q_list. + Parameter print_found_store : forall A : Type, Z -> A -> A. Fixpoint optimize_body (insns : list basic) : list basic := @@ -14,12 +36,16 @@ Fixpoint optimize_body (insns : list basic) : list basic := match h0, h1 with | (PStoreRRO Psd_a rs0 ra0 (Ofsimm ofs0)), (PStoreRRO Psd_a rs1 ra1 (Ofsimm ofs1)) => - let zofs0 := Ptrofs.signed ofs0 in - let zofs1 := Ptrofs.signed ofs1 in - if zofs1 =? zofs0 + 8 - then let h0' := print_found_store basic zofs0 h0 in - h0' :: (optimize_body t0) - else h0 :: (optimize_body t0) + match gpreg_q_search rs0 rs1 with + | Some rs0rs1 => + let zofs0 := Ptrofs.signed ofs0 in + let zofs1 := Ptrofs.signed ofs1 in + if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1) + then let h0' := print_found_store basic zofs0 h0 in + h0' :: (optimize_body t0) + else h0 :: (optimize_body t0) + | None => h0 :: (optimize_body t0) + end | _, _ => h0 :: (optimize_body t0) end | nil => h0 :: nil -- cgit From 8163278174362fb8269804a7958f6e9e7878a511 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 3 May 2019 03:33:58 +0200 Subject: getting stuck need to move offsets --- mppa_k1c/Asm.v | 5 ++++- mppa_k1c/Asmblockdeps.v | 11 ++++++++--- mppa_k1c/Asmvliw.v | 27 +++++++++++++++++++++------ mppa_k1c/lib/Asmblockgenproof0.v | 7 +++++++ 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v index d73d00c7..8b1c9a81 100644 --- a/mppa_k1c/Asm.v +++ b/mppa_k1c/Asm.v @@ -122,7 +122,9 @@ Inductive instruction : Type := | Psd (rs: ireg) (ra: ireg) (ofs: addressing) (**r store int64 *) | Psd_a (rs: ireg) (ra: ireg) (ofs: addressing) (**r store any64 *) | Pfss (rs: freg) (ra: ireg) (ofs: addressing) (**r store float *) - | Pfsd (rd: freg) (ra: ireg) (ofs: addressing) (**r store 64-bit float *) + | Pfsd (rs: freg) (ra: ireg) (ofs: addressing) (**r store 64-bit float *) + + | Psq (rs: gpreg_q) (ra: ireg) (ofs: addressing) (**r store 64-bit float *) (** Arith RR *) | Pmv (rd rs: ireg) (**r register move *) @@ -484,6 +486,7 @@ Definition basic_to_instruction (b: basic) := | PStoreRRRXS Asmvliw.Pfss rd ra ro => Pfss rd ra (ARegXS ro) | PStoreRRRXS Asmvliw.Pfsd rd ra ro => Pfsd rd ra (ARegXS ro) + | PStoreQRRO qrs ra ofs => Psq qrs ra (AOff ofs) end. Section RELSEM. diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v index 1ee5002c..04f02a80 100644 --- a/mppa_k1c/Asmblockdeps.v +++ b/mppa_k1c/Asmblockdeps.v @@ -639,6 +639,10 @@ Definition trans_basic (b: basic) : inst := | PStoreRRO n s a ofs => [(pmem, Op (Store (OStoreRRO n ofs)) (PReg (#s) @ PReg (#a) @ PReg pmem @ Enil))] | PStoreRRR n s a ro => [(pmem, Op (Store (OStoreRRR n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] | PStoreRRRXS n s a ro => [(pmem, Op (Store (OStoreRRRXS n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))] + | PStoreQRRO qs a ofs => + let (s0, s1) := gpreg_q_expand qs in + [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil)); + (pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))] | Pallocframe sz pos => [(#FP, PReg (#SP)); (#SP, Op (Allocframe2 sz pos) (PReg (#SP) @ PReg pmem @ Enil)); (#RTMP, Op (Constant Vundef) Enil); (pmem, Op (Allocframe sz pos) (Old (PReg (#SP)) @ PReg pmem @ Enil))] | Pfreeframe sz pos => [(pmem, Op (Freeframe sz pos) (PReg (#SP) @ PReg pmem @ Enil)); @@ -884,7 +888,10 @@ Proof. eexists; split; try split; Simpl; intros rr; destruct rr; Simpl. -(* Allocframe *) + + unfold parexec_store_q_offset. + destruct (gpreg_q_expand rs) as [s0 s1]. + simpl. + (* Allocframe *) - destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS. * eexists; repeat split. { Simpl. erewrite !H0, H, MEMAL, MEMS. Simpl. @@ -1529,5 +1536,3 @@ Definition bblock_equivb: Asmvliw.bblock -> Asmvliw.bblock -> bool := pure_bbloc Definition bblock_equiv_eq := pure_bblock_eq_test_correct true. End SECT_BBLOCK_EQUIV. - - diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v index 0cbe27e1..fb1575f9 100644 --- a/mppa_k1c/Asmvliw.v +++ b/mppa_k1c/Asmvliw.v @@ -236,9 +236,6 @@ Definition label := positive. *) Inductive ex_instruction : Type := (* Pseudo-instructions *) -(*| Ploadsymbol_high (rd: ireg) (id: ident) (ofs: ptrofs) (**r load the high part of the address of a symbol *) - | Pbtbl (r: ireg) (tbl: list label) (**r N-way branch through a jump table *) *) - | Pbuiltin: external_function -> list (builtin_arg preg) -> builtin_res preg -> ex_instruction (**r built-in function (pseudo) *) . @@ -341,6 +338,7 @@ Inductive st_instruction : Type := | PStoreRRO (i: store_name) (rs: ireg) (ra: ireg) (ofs: offset) | PStoreRRR (i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg) | PStoreRRRXS(i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg) + | PStoreQRRO (rs: gpreg_q) (ra: ireg) (ofs: offset) . (** Arithmetic instructions **) @@ -355,7 +353,6 @@ Inductive arith_name_rr : Type := | Pcvtl2w (**r Convert Long to Word *) | Psxwd (**r Sign Extend Word to Double Word *) | Pzxwd (**r Zero Extend Word to Double Word *) -(* | Pextfs (stop : int) (start : int) (**r extract bit field, signed *) *) | Pextfz (stop : Z) (start : Z) (**r extract bit field, unsigned *) | Pextfs (stop : Z) (start : Z) (**r extract bit field, signed *) | Pextfzl (stop : Z) (start : Z) (**r extract bit field, unsigned *) @@ -693,7 +690,7 @@ Variable ge: genv. from the current state (a register set + a memory state) to either [Next rs' m'] where [rs'] and [m'] are the updated register set and memory state after execution of the instruction at [rs#PC], or [Stuck] if the processor is stuck. - + The parallel semantics of each instructions handles two states in input: - the actual input state of the bundle which is only read - and the other on which every "write" is performed: @@ -1198,6 +1195,23 @@ Definition parexec_store_regxs (chunk: memory_chunk) (rsr rsw: regset) (mr mw: m | Some m' => Next rsw m' end. +Definition parexec_store_q_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_q) (a: ireg) (ofs: offset) := + let (s0, s1) := gpreg_q_expand s in + match eval_offset ofs with + | OK eofs => + let base := Val.offset_ptr (rsr a) eofs in + match Mem.storev Many64 mr base (rsr s0) with + | None => Stuck + | Some m1 => + match Mem.storev Many64 m1 base (rsr s1) with + | None => Stuck + | Some m2 => Next rsw m2 + end + end + | _ => Stuck + end. + + Definition load_chunk n := match n with | Plb => Mint8signed @@ -1237,7 +1251,8 @@ Definition parexec_basic_instr (bi: basic) (rsr rsw: regset) (mr mw: mem) := | PStoreRRO n s a ofs => parexec_store_offset (store_chunk n) rsr rsw mr mw s a ofs | PStoreRRR n s a ro => parexec_store_reg (store_chunk n) rsr rsw mr mw s a ro | PStoreRRRXS n s a ro => parexec_store_regxs (store_chunk n) rsr rsw mr mw s a ro - + | PStoreQRRO s a ofs => + parexec_store_q_offset rsr rsw mr mw s a ofs | Pallocframe sz pos => let (mw, stk) := Mem.alloc mr 0 sz in let sp := (Vptr stk Ptrofs.zero) in diff --git a/mppa_k1c/lib/Asmblockgenproof0.v b/mppa_k1c/lib/Asmblockgenproof0.v index 0a83a903..a93cb28a 100644 --- a/mppa_k1c/lib/Asmblockgenproof0.v +++ b/mppa_k1c/lib/Asmblockgenproof0.v @@ -950,6 +950,13 @@ Proof. 1-10: try (unfold parexec_store_offset in H1; destruct (eval_offset ge ofs); try discriminate; destruct (Mem.storev _ _ _); [inv H1; auto | discriminate]). 1-10: try (unfold parexec_store_reg in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto. 1-10: try (unfold parexec_store_regxs in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto. + - (* PStoreQRRO *) + unfold parexec_store_q_offset in H1. + destruct (gpreg_q_expand _) as [r0 r1] in H1. + destruct (eval_offset _ _) in H1; try discriminate. + destruct (Mem.storev _ _ _) in H1; try discriminate. + destruct (Mem.storev _ _ _) in H1; try discriminate. + inv H1. Simpl. reflexivity. - destruct (Mem.alloc _ _ _). destruct (Mem.store _ _ _ _ _). inv H1. Simpl. discriminate. - destruct (Mem.loadv _ _ _); try discriminate. destruct (rs1 _); try discriminate. destruct (Mem.free _ _ _ _). inv H1. Simpl. discriminate. -- cgit From da32340a8c063c9dc0847d01e7ec5c77ce75f3b1 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 3 May 2019 09:29:46 +0200 Subject: rm Ofslow --- mppa_k1c/lib/Asmblockgenproof0.v | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mppa_k1c/lib/Asmblockgenproof0.v b/mppa_k1c/lib/Asmblockgenproof0.v index 0465618c..3bca6629 100644 --- a/mppa_k1c/lib/Asmblockgenproof0.v +++ b/mppa_k1c/lib/Asmblockgenproof0.v @@ -953,7 +953,7 @@ Proof. - (* PStoreQRRO *) unfold parexec_store_q_offset in H1. destruct (gpreg_q_expand _) as [r0 r1] in H1. - destruct (eval_offset _ _) in H1; try discriminate. + unfold eval_offset in H1; try discriminate. destruct (Mem.storev _ _ _) in H1; try discriminate. destruct (Mem.storev _ _ _) in H1; try discriminate. inv H1. Simpl. reflexivity. -- cgit From 676d1ae6324d3c2f13e20efdcff3fbda9aab1686 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 3 May 2019 10:14:13 +0200 Subject: ça avance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mppa_k1c/Asmblockdeps.v | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v index 2b6a8450..b2fa79d1 100644 --- a/mppa_k1c/Asmblockdeps.v +++ b/mppa_k1c/Asmblockdeps.v @@ -890,8 +890,35 @@ Proof. intros rr; destruct rr; Simpl. + unfold parexec_store_q_offset. + simpl. destruct (gpreg_q_expand rs) as [s0 s1]. simpl. + unfold exec_store_deps_offset. + repeat rewrite H0. + repeat rewrite H. + destruct Ge. + destruct (Mem.storev _ _ _ (rsr s0)) as [mem0 | ] eqn:MEML0; simpl. + ++ rewrite MEML0. + destruct (Mem.storev _ _ _ (rsr s1)) as [mem1 | ] eqn:MEML1; simpl. + * rewrite (assign_diff sr _ (# s1)) by apply ppos_pmem_discr. + rewrite (assign_diff sr _ (# ra)) by apply ppos_pmem_discr. + repeat rewrite H0. + rewrite MEML1. + eexists; split. + reflexivity. + rewrite (assign_eq _ pmem). + split; trivial. + intro r. + rewrite (assign_diff _ _ (# r)) by apply ppos_pmem_discr. + rewrite (assign_diff _ _ (# r)) by apply ppos_pmem_discr. + congruence. + * rewrite (assign_diff sr pmem (# s1)) by apply ppos_pmem_discr. + rewrite (assign_diff sr pmem (# ra)) by apply ppos_pmem_discr. + repeat rewrite H0. + rewrite MEML1. + reflexivity. + ++ rewrite MEML0. + reflexivity. (* Allocframe *) - destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS. * eexists; repeat split. -- cgit From e1c864b670812eda55e0ee129855c69d32c8b84a Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 3 May 2019 10:34:58 +0200 Subject: it compiles --- mppa_k1c/Asmblock.v | 2 ++ mppa_k1c/Peephole.v | 4 ++-- mppa_k1c/PostpassSchedulingOracle.ml | 7 +++++- mppa_k1c/PostpassSchedulingproof.v | 14 ++++++++++++ mppa_k1c/TargetPrinter.ml | 43 ++++++++++++++++++++++++++++++++++-- 5 files changed, 65 insertions(+), 5 deletions(-) diff --git a/mppa_k1c/Asmblock.v b/mppa_k1c/Asmblock.v index 9abc1ca1..f2c4a382 100644 --- a/mppa_k1c/Asmblock.v +++ b/mppa_k1c/Asmblock.v @@ -272,6 +272,8 @@ Definition exec_load_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro: Definition exec_store_offset (chunk: memory_chunk) (rs: regset) (m: mem) (s a: ireg) (ofs: offset) := parexec_store_offset chunk rs rs m m s a ofs. +Definition exec_store_q_offset (rs: regset) (m: mem) (s : gpreg_q) (a: ireg) (ofs: offset) := parexec_store_q_offset rs rs m m s a ofs. + Definition exec_store_reg (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro: ireg) := parexec_store_reg chunk rs rs m m s a ro. Definition exec_store_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro: ireg) := parexec_store_regxs chunk rs rs m m s a ro. diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v index 2c73bb63..91936ac6 100644 --- a/mppa_k1c/Peephole.v +++ b/mppa_k1c/Peephole.v @@ -34,8 +34,8 @@ Fixpoint optimize_body (insns : list basic) : list basic := match t0 with | h1 :: t1 => match h0, h1 with - | (PStoreRRO Psd_a rs0 ra0 (Ofsimm ofs0)), - (PStoreRRO Psd_a rs1 ra1 (Ofsimm ofs1)) => + | (PStoreRRO Psd_a rs0 ra0 ofs0), + (PStoreRRO Psd_a rs1 ra1 ofs1) => match gpreg_q_search rs0 rs1 with | Some rs0rs1 => let zofs0 := Ptrofs.signed ofs0 in diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml index 033cf943..c944774a 100644 --- a/mppa_k1c/PostpassSchedulingOracle.ml +++ b/mppa_k1c/PostpassSchedulingOracle.ml @@ -222,7 +222,12 @@ let load_rec i = match i with { inst = load_str i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false} let store_rec i = match i with - | PStoreRRO (i, rs1, rs2, imm) -> { inst = store_str i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2)]; imm = (Some (Off imm)) + | PStoreRRO (i, rs1, rs2, imm) -> + { inst = store_str i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2)]; imm = (Some (Off imm)) + ; is_control = false} + | PStoreQRRO (rs, ra, imm) -> + let (rs0, rs1) = gpreg_q_expand rs in + { inst = "Psq"; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false} | PStoreRRR (i, rs1, rs2, rs3) | PStoreRRRXS (i, rs1, rs2, rs3) -> { inst = store_str i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false} diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v index 47248e3e..da64c41d 100644 --- a/mppa_k1c/PostpassSchedulingproof.v +++ b/mppa_k1c/PostpassSchedulingproof.v @@ -140,6 +140,19 @@ Proof. - discriminate. Qed. +Lemma exec_store_q_offset_pc_var: + forall rs m rd ra ofs rs' m' v, + exec_store_q_offset rs m rd ra ofs = Next rs' m' -> + exec_store_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'. +Proof. + intros. unfold exec_store_q_offset in *. unfold parexec_store_q_offset in *. rewrite Pregmap.gso; try discriminate. + simpl in *. + destruct (gpreg_q_expand _) as [s0 s1]. + destruct (Mem.storev _ _ _); try discriminate. + destruct (Mem.storev _ _ _); try discriminate. + inv H. apply next_eq; auto. +Qed. + Lemma exec_store_reg_pc_var: forall t rs m rd ra ro rs' m' v, exec_store_reg t rs m rd ra ro = Next rs' m' -> @@ -182,6 +195,7 @@ Proof. + exploreInst; apply exec_store_offset_pc_var; auto. + exploreInst; apply exec_store_reg_pc_var; auto. + exploreInst; apply exec_store_regxs_pc_var; auto. + + apply exec_store_q_offset_pc_var; auto. - destruct (Mem.alloc _ _ _) as (m1 & stk). repeat (rewrite Pregmap.gso; try discriminate). destruct (Mem.storev _ _ _ _); try discriminate. inv H. apply next_eq; auto. apply functional_extensionality. intros. diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml index 156b16d0..7eea7d15 100644 --- a/mppa_k1c/TargetPrinter.ml +++ b/mppa_k1c/TargetPrinter.ml @@ -41,6 +41,7 @@ module Target (*: TARGET*) = let print_label oc lbl = label oc (transl_label lbl) let int_reg_name = let open Asmvliw in function + | GPR0 -> "$r0" | GPR1 -> "$r1" | GPR2 -> "$r2" | GPR3 -> "$r3" | GPR4 -> "$r4" | GPR5 -> "$r5" | GPR6 -> "$r6" | GPR7 -> "$r7" | GPR8 -> "$r8" | GPR9 -> "$r9" | GPR10 -> "$r10" | GPR11 -> "$r11" @@ -60,7 +61,43 @@ module Target (*: TARGET*) = let ireg oc r = output_string oc (int_reg_name r) - let ireg = ireg + let int_gpreg_q_name = + let open Asmvliw in + function + | R0R1 -> "$r0r1" + | R2R3 -> "$r2r3" + | R4R5 -> "$r4r5" + | R6R7 -> "$r6r7" + | R8R9 -> "$r8r9" + | R10R11 -> "$r10r11" + | R12R13 -> "$r12r13" + | R14R15 -> "$r14r15" + | R16R17 -> "$r16r17" + | R18R19 -> "$r18r19" + | R20R21 -> "$r20r21" + | R22R23 -> "$r22r23" + | R24R25 -> "$r24r25" + | R26R27 -> "$r26r27" + | R28R29 -> "$r28r29" + | R30R31 -> "$r30r31" + | R32R33 -> "$r32r33" + | R34R35 -> "$r34r35" + | R36R37 -> "$r36r37" + | R38R39 -> "$r38r39" + | R40R41 -> "$r40r41" + | R42R43 -> "$r42r43" + | R44R45 -> "$r44r45" + | R46R47 -> "$r46r47" + | R48R49 -> "$r48r49" + | R50R51 -> "$r50r51" + | R52R53 -> "$r52r53" + | R54R55 -> "$r54r55" + | R56R57 -> "$r56r57" + | R58R59 -> "$r58r59" + | R60R61 -> "$r60r61" + | R62R63 -> "$r62r63" + + let gpreg_q oc r = output_string oc (int_gpreg_q_name r) let preg oc = let open Asmvliw in function | IR r -> ireg oc r @@ -364,7 +401,9 @@ module Target (*: TARGET*) = fprintf oc " sw%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd | Psd(rd, ra, adr) | Psd_a(rd, ra, adr) | Pfsd(rd, ra, adr) -> assert Archi.ptr64; fprintf oc " sd%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd - + | Psq(rd, ra, adr) -> + fprintf oc " sq%a %a[%a] = %a\n" xscale adr addressing adr ireg ra gpreg_q rd + (* Arith R instructions *) (* Arith RR instructions *) -- cgit From a289d73e791be5a760c8a9b2f3de2064f001a770 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 3 May 2019 11:17:57 +0200 Subject: use sq to save pairs of registers --- mppa_k1c/Asmblockdeps.v | 19 ++++++++++++++++--- mppa_k1c/Asmvliw.v | 5 ++--- mppa_k1c/Peephole.v | 2 +- mppa_k1c/PostpassSchedulingOracle.ml | 10 +++++++--- mppa_k1c/TargetPrinter.ml | 2 +- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v index b2fa79d1..52af8cdf 100644 --- a/mppa_k1c/Asmblockdeps.v +++ b/mppa_k1c/Asmblockdeps.v @@ -387,11 +387,23 @@ Qed. Hint Resolve load_op_eq_correct: wlp. Opaque load_op_eq_correct. +Definition offset_eq (ofs1 ofs2 : offset): ?? bool := + RET (Ptrofs.eq ofs1 ofs2). + +Lemma offset_eq_correct ofs1 ofs2: + WHEN offset_eq ofs1 ofs2 ~> b THEN b = true -> ofs1 = ofs2. +Proof. + wlp_simplify. + pose (Ptrofs.eq_spec ofs1 ofs2). + rewrite H in *. + trivial. +Qed. +Hint Resolve offset_eq_correct: wlp. Definition store_op_eq (o1 o2: store_op): ?? bool := match o1 with | OStoreRRO n1 ofs1 => - match o2 with OStoreRRO n2 ofs2 => iandb (phys_eq n1 n2) (phys_eq ofs1 ofs2) | _ => RET false end + match o2 with OStoreRRO n2 ofs2 => iandb (phys_eq n1 n2) (offset_eq ofs1 ofs2) | _ => RET false end | OStoreRRR n1 => match o2 with OStoreRRR n2 => phys_eq n1 n2 | _ => RET false end | OStoreRRRXS n1 => @@ -402,7 +414,8 @@ Lemma store_op_eq_correct o1 o2: WHEN store_op_eq o1 o2 ~> b THEN b = true -> o1 = o2. Proof. destruct o1, o2; wlp_simplify; try discriminate. - - congruence. + - f_equal. pose (Ptrofs.eq_spec ofs ofs0). + rewrite H in *. trivial. - congruence. - congruence. Qed. @@ -642,7 +655,7 @@ Definition trans_basic (b: basic) : inst := | PStoreQRRO qs a ofs => let (s0, s1) := gpreg_q_expand qs in [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil)); - (pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))] + (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))] | Pallocframe sz pos => [(#FP, PReg (#SP)); (#SP, Op (Allocframe2 sz pos) (PReg (#SP) @ PReg pmem @ Enil)); (#RTMP, Op (Constant Vundef) Enil); (pmem, Op (Allocframe sz pos) (Old (PReg (#SP)) @ PReg pmem @ Enil))] | Pfreeframe sz pos => [(pmem, Op (Freeframe sz pos) (PReg (#SP) @ PReg pmem @ Enil)); diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v index 248b8660..6ebc8340 100644 --- a/mppa_k1c/Asmvliw.v +++ b/mppa_k1c/Asmvliw.v @@ -1189,11 +1189,10 @@ Definition parexec_store_q_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_q) ( let (s0, s1) := gpreg_q_expand s in match eval_offset ofs with | OK eofs => - let base := Val.offset_ptr (rsr a) eofs in - match Mem.storev Many64 mr base (rsr s0) with + match Mem.storev Many64 mr (Val.offset_ptr (rsr a) eofs) (rsr s0) with | None => Stuck | Some m1 => - match Mem.storev Many64 m1 base (rsr s1) with + match Mem.storev Many64 m1 (Val.offset_ptr (rsr a) (Ptrofs.add eofs (Ptrofs.repr 8))) (rsr s1) with | None => Stuck | Some m2 => Next rsw m2 end diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v index 91936ac6..3e0d9ae9 100644 --- a/mppa_k1c/Peephole.v +++ b/mppa_k1c/Peephole.v @@ -42,7 +42,7 @@ Fixpoint optimize_body (insns : list basic) : list basic := let zofs1 := Ptrofs.signed ofs1 in if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1) then let h0' := print_found_store basic zofs0 h0 in - h0' :: (optimize_body t0) + (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (optimize_body t1) else h0 :: (optimize_body t0) | None => h0 :: (optimize_body t0) end diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml index c944774a..f88acb44 100644 --- a/mppa_k1c/PostpassSchedulingOracle.ml +++ b/mppa_k1c/PostpassSchedulingOracle.ml @@ -446,7 +446,7 @@ type real_instruction = | Make | Nop | Extfz | Extfs | Insf (* LSU *) | Lbs | Lbz | Lhs | Lhz | Lws | Ld - | Sb | Sh | Sw | Sd + | Sb | Sh | Sw | Sd | Sq (* BCU *) | Icall | Call | Cb | Igoto | Goto | Ret | Get | Set (* FPU *) @@ -523,6 +523,7 @@ let ab_inst_to_real = function | "Psh" -> Sh | "Psw" | "Psw_a" | "Pfss" -> Sw | "Psd" | "Psd_a" | "Pfsd" -> Sd + | "Psq" -> Sq | "Pcb" | "Pcbu" -> Cb | "Pcall" | "Pdiv" | "Pdivu" -> Call @@ -543,6 +544,9 @@ let ab_inst_to_real = function | "Pfsbfw" -> Fsbfw | "Pfmuld" -> Fmuld | "Pfmulw" -> Fmulw + + | "nop" -> Nop + | s -> failwith @@ sprintf "ab_inst_to_real: unrecognized instruction: %s" s exception InvalidEncoding @@ -595,7 +599,7 @@ let rec_to_usage r = (match encoding with None | Some U6 | Some S10 -> lsu_data | Some U27L5 | Some U27L10 -> lsu_data_x | Some E27U27L10 -> lsu_data_y) - | Sb | Sh | Sw | Sd -> + | Sb | Sh | Sw | Sd | Sq -> (match encoding with None | Some U6 | Some S10 -> lsu_acc | Some U27L5 | Some U27L10 -> lsu_acc_x | Some E27U27L10 -> lsu_acc_y) @@ -617,7 +621,7 @@ let real_inst_to_latency = function | Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4 | Mulw | Muld | Maddw | Maddd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *) | Lbs | Lbz | Lhs | Lhz | Lws | Ld -> 3 - | Sb | Sh | Sw | Sd -> 1 (* See k1c-Optimization.pdf page 19 *) + | Sb | Sh | Sw | Sd | Sq -> 1 (* See k1c-Optimization.pdf page 19 *) | Get -> 1 | Set -> 4 (* According to the manual should be 3, but I measured 4 *) | Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *) diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml index 7eea7d15..9d42169a 100644 --- a/mppa_k1c/TargetPrinter.ml +++ b/mppa_k1c/TargetPrinter.ml @@ -287,7 +287,7 @@ module Target (*: TARGET*) = | _ -> assert false end - | Pnop -> fprintf oc " nop\n" + | Pnop -> (* FIXME fprintf oc " nop\n" *) () | Psemi -> fprintf oc ";;\n" | Pclzll (rd, rs) -> fprintf oc " clzd %a = %a\n" ireg rd ireg rs -- cgit From cba53c98b999eea7984e4ffd24a9449abea3e0e2 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 3 May 2019 11:29:40 +0200 Subject: -fcoalesce-mem --- driver/Clflags.ml | 1 + driver/Compopts.v | 3 +++ driver/Driver.ml | 1 + extraction/extraction.v | 2 ++ mppa_k1c/Peephole.v | 19 ++++++++++++------- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 46f19dcf..b1afab6f 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -70,3 +70,4 @@ let use_standard_headers = ref Configuration.has_standard_headers let option_fglobaladdrtmp = ref false let option_fglobaladdroffset = ref false let option_fxsaddr = ref true +let option_coalesce_mem = ref true diff --git a/driver/Compopts.v b/driver/Compopts.v index 3bb7a474..f7de596c 100644 --- a/driver/Compopts.v +++ b/driver/Compopts.v @@ -51,6 +51,9 @@ Parameter optim_fglobaladdroffset: unit -> bool. (** FIXME TEMPORARY Flag -fxsaddr. Use .xs addressing mode (default true) *) Parameter optim_fxsaddr: unit -> bool. +(** FIXME TEMPORARY Flag -fcoaelesce-mem. Fuse (default true) *) +Parameter optim_coalesce_mem: unit -> bool. + (** Flag -fthumb. For the ARM back-end. *) Parameter thumb: unit -> bool. diff --git a/driver/Driver.ml b/driver/Driver.ml index 35b5db86..804fc3c9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -375,6 +375,7 @@ let cmdline_actions = @ f_opt "globaladdrtmp" option_fglobaladdrtmp @ f_opt "globaladdroffset" option_fglobaladdroffset @ f_opt "xsaddr" option_fxsaddr + @ f_opt "coalesce-mem" option_coalesce_mem (* Code generation options *) @ f_opt "fpu" option_ffpu @ f_opt "sse" option_ffpu (* backward compatibility *) diff --git a/extraction/extraction.v b/extraction/extraction.v index 0f336916..979e1d49 100644 --- a/extraction/extraction.v +++ b/extraction/extraction.v @@ -123,6 +123,8 @@ Extract Constant Compopts.optim_fglobaladdroffset => "fun _ -> !Clflags.option_fglobaladdroffset". Extract Constant Compopts.optim_fxsaddr => "fun _ -> !Clflags.option_fxsaddr". +Extract Constant Compopts.optim_coalesce_mem => + "fun _ -> !Clflags.option_coalesce_mem". (* Compiler *) Extract Constant Compiler.print_Clight => "PrintClight.print_if". diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v index 3e0d9ae9..6e06e7ea 100644 --- a/mppa_k1c/Peephole.v +++ b/mppa_k1c/Peephole.v @@ -2,6 +2,7 @@ Require Import Coqlib. Require Import Asmvliw. Require Import Values. Require Import Integers. +Require Compopts. Definition gpreg_q_list : list gpreg_q := R0R1 :: R2R3 :: R4R5 :: R6R7 :: R8R9 @@ -25,9 +26,9 @@ Fixpoint gpreg_q_search_rec r0 r1 l := Definition gpreg_q_search (r0 : gpreg) (r1 : gpreg) : option gpreg_q := gpreg_q_search_rec r0 r1 gpreg_q_list. -Parameter print_found_store : forall A : Type, Z -> A -> A. +Parameter print_found_store: forall A, Z -> A -> A. -Fixpoint optimize_body (insns : list basic) : list basic := +Fixpoint coalesce_mem (insns : list basic) : list basic := match insns with | nil => nil | h0 :: t0 => @@ -41,17 +42,21 @@ Fixpoint optimize_body (insns : list basic) : list basic := let zofs0 := Ptrofs.signed ofs0 in let zofs1 := Ptrofs.signed ofs1 in if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1) - then let h0' := print_found_store basic zofs0 h0 in - (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (optimize_body t1) - else h0 :: (optimize_body t0) - | None => h0 :: (optimize_body t0) + then (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1) + else h0 :: (coalesce_mem t0) + | None => h0 :: (coalesce_mem t0) end - | _, _ => h0 :: (optimize_body t0) + | _, _ => h0 :: (coalesce_mem t0) end | nil => h0 :: nil end end. +Definition optimize_body (insns : list basic) := + if Compopts.optim_coalesce_mem tt + then coalesce_mem insns + else insns. + Program Definition optimize_bblock (bb : bblock) := let optimized := optimize_body (body bb) in let wf_ok := wf_bblockb optimized (exit bb) in -- cgit