aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mppa_k1c/Asm.v5
-rw-r--r--mppa_k1c/Asmblockdeps.v11
-rw-r--r--mppa_k1c/Asmvliw.v78
-rw-r--r--mppa_k1c/Peephole.v65
-rw-r--r--mppa_k1c/PostpassScheduling.v6
-rw-r--r--mppa_k1c/extractionMachdep.v3
-rw-r--r--mppa_k1c/lib/Asmblockgenproof0.v7
-rw-r--r--test/monniaux/regalloc/bigspill.c21
8 files changed, 184 insertions, 12 deletions
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v
index d73d00c7..8b1c9a81 100644
--- a/mppa_k1c/Asm.v
+++ b/mppa_k1c/Asm.v
@@ -122,7 +122,9 @@ Inductive instruction : Type :=
| Psd (rs: ireg) (ra: ireg) (ofs: addressing) (**r store int64 *)
| Psd_a (rs: ireg) (ra: ireg) (ofs: addressing) (**r store any64 *)
| Pfss (rs: freg) (ra: ireg) (ofs: addressing) (**r store float *)
- | Pfsd (rd: freg) (ra: ireg) (ofs: addressing) (**r store 64-bit float *)
+ | Pfsd (rs: freg) (ra: ireg) (ofs: addressing) (**r store 64-bit float *)
+
+ | Psq (rs: gpreg_q) (ra: ireg) (ofs: addressing) (**r store 64-bit float *)
(** Arith RR *)
| Pmv (rd rs: ireg) (**r register move *)
@@ -484,6 +486,7 @@ Definition basic_to_instruction (b: basic) :=
| PStoreRRRXS Asmvliw.Pfss rd ra ro => Pfss rd ra (ARegXS ro)
| PStoreRRRXS Asmvliw.Pfsd rd ra ro => Pfsd rd ra (ARegXS ro)
+ | PStoreQRRO qrs ra ofs => Psq qrs ra (AOff ofs)
end.
Section RELSEM.
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index 7cfcbff1..2b6a8450 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -639,6 +639,10 @@ Definition trans_basic (b: basic) : inst :=
| PStoreRRO n s a ofs => [(pmem, Op (Store (OStoreRRO n ofs)) (PReg (#s) @ PReg (#a) @ PReg pmem @ Enil))]
| PStoreRRR n s a ro => [(pmem, Op (Store (OStoreRRR n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
| PStoreRRRXS n s a ro => [(pmem, Op (Store (OStoreRRRXS n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PStoreQRRO qs a ofs =>
+ let (s0, s1) := gpreg_q_expand qs in
+ [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))]
| Pallocframe sz pos => [(#FP, PReg (#SP)); (#SP, Op (Allocframe2 sz pos) (PReg (#SP) @ PReg pmem @ Enil)); (#RTMP, Op (Constant Vundef) Enil);
(pmem, Op (Allocframe sz pos) (Old (PReg (#SP)) @ PReg pmem @ Enil))]
| Pfreeframe sz pos => [(pmem, Op (Freeframe sz pos) (PReg (#SP) @ PReg pmem @ Enil));
@@ -885,7 +889,10 @@ Proof.
eexists; split; try split; Simpl;
intros rr; destruct rr; Simpl.
-(* Allocframe *)
+ + unfold parexec_store_q_offset.
+ destruct (gpreg_q_expand rs) as [s0 s1].
+ simpl.
+ (* Allocframe *)
- destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS.
* eexists; repeat split.
{ Simpl. erewrite !H0, H, MEMAL, MEMS. Simpl.
@@ -1530,5 +1537,3 @@ Definition bblock_equivb: Asmvliw.bblock -> Asmvliw.bblock -> bool := pure_bbloc
Definition bblock_equiv_eq := pure_bblock_eq_test_correct true.
End SECT_BBLOCK_EQUIV.
-
-
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
index c25d4235..248b8660 100644
--- a/mppa_k1c/Asmvliw.v
+++ b/mppa_k1c/Asmvliw.v
@@ -66,12 +66,63 @@ Inductive gpreg: Type :=
Definition ireg := gpreg.
Definition freg := gpreg.
+Lemma gpreg_eq: forall (x y: gpreg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
Proof. decide equality. Defined.
Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}.
Proof. decide equality. Defined.
+Inductive gpreg_q : Type :=
+| R0R1 | R2R3 | R4R5 | R6R7 | R8R9
+| R10R11 | R12R13 | R14R15 | R16R17 | R18R19
+| R20R21 | R22R23 | R24R25 | R26R27 | R28R29
+| R30R31 | R32R33 | R34R35 | R36R37 | R38R39
+| R40R41 | R42R43 | R44R45 | R46R47 | R48R49
+| R50R51 | R52R53 | R54R55 | R56R57 | R58R59
+| R60R61 | R62R63.
+
+Lemma gpreg_q_eq : forall (x y : gpreg_q), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+Definition gpreg_q_expand (x : gpreg_q) : gpreg * gpreg :=
+ match x with
+ | R0R1 => (GPR0, GPR1)
+ | R2R3 => (GPR2, GPR3)
+ | R4R5 => (GPR4, GPR5)
+ | R6R7 => (GPR6, GPR7)
+ | R8R9 => (GPR8, GPR9)
+ | R10R11 => (GPR10, GPR11)
+ | R12R13 => (GPR12, GPR13)
+ | R14R15 => (GPR14, GPR15)
+ | R16R17 => (GPR16, GPR17)
+ | R18R19 => (GPR18, GPR19)
+ | R20R21 => (GPR20, GPR21)
+ | R22R23 => (GPR22, GPR23)
+ | R24R25 => (GPR24, GPR25)
+ | R26R27 => (GPR26, GPR27)
+ | R28R29 => (GPR28, GPR29)
+ | R30R31 => (GPR30, GPR31)
+ | R32R33 => (GPR32, GPR33)
+ | R34R35 => (GPR34, GPR35)
+ | R36R37 => (GPR36, GPR37)
+ | R38R39 => (GPR38, GPR39)
+ | R40R41 => (GPR40, GPR41)
+ | R42R43 => (GPR42, GPR43)
+ | R44R45 => (GPR44, GPR45)
+ | R46R47 => (GPR46, GPR47)
+ | R48R49 => (GPR48, GPR49)
+ | R50R51 => (GPR50, GPR51)
+ | R52R53 => (GPR52, GPR53)
+ | R54R55 => (GPR54, GPR55)
+ | R56R57 => (GPR56, GPR57)
+ | R58R59 => (GPR58, GPR59)
+ | R60R61 => (GPR60, GPR61)
+ | R62R63 => (GPR62, GPR63)
+ end.
+
(** We model the following registers of the RISC-V architecture. *)
(** basic register *)
@@ -183,9 +234,6 @@ Definition label := positive.
*)
Inductive ex_instruction : Type :=
(* Pseudo-instructions *)
-(*| Ploadsymbol_high (rd: ireg) (id: ident) (ofs: ptrofs) (**r load the high part of the address of a symbol *)
- | Pbtbl (r: ireg) (tbl: list label) (**r N-way branch through a jump table *) *)
-
| Pbuiltin: external_function -> list (builtin_arg preg)
-> builtin_res preg -> ex_instruction (**r built-in function (pseudo) *)
.
@@ -288,6 +336,7 @@ Inductive st_instruction : Type :=
| PStoreRRO (i: store_name) (rs: ireg) (ra: ireg) (ofs: offset)
| PStoreRRR (i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg)
| PStoreRRRXS(i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg)
+ | PStoreQRRO (rs: gpreg_q) (ra: ireg) (ofs: offset)
.
(** Arithmetic instructions **)
@@ -302,7 +351,6 @@ Inductive arith_name_rr : Type :=
| Pcvtl2w (**r Convert Long to Word *)
| Psxwd (**r Sign Extend Word to Double Word *)
| Pzxwd (**r Zero Extend Word to Double Word *)
-(* | Pextfs (stop : int) (start : int) (**r extract bit field, signed *) *)
| Pextfz (stop : Z) (start : Z) (**r extract bit field, unsigned *)
| Pextfs (stop : Z) (start : Z) (**r extract bit field, signed *)
| Pextfzl (stop : Z) (start : Z) (**r extract bit field, unsigned *)
@@ -640,7 +688,7 @@ Variable ge: genv.
from the current state (a register set + a memory state) to either [Next rs' m']
where [rs'] and [m'] are the updated register set and memory state after execution
of the instruction at [rs#PC], or [Stuck] if the processor is stuck.
-
+
The parallel semantics of each instructions handles two states in input:
- the actual input state of the bundle which is only read
- and the other on which every "write" is performed:
@@ -1137,6 +1185,23 @@ Definition parexec_store_regxs (chunk: memory_chunk) (rsr rsw: regset) (mr mw: m
| Some m' => Next rsw m'
end.
+Definition parexec_store_q_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_q) (a: ireg) (ofs: offset) :=
+ let (s0, s1) := gpreg_q_expand s in
+ match eval_offset ofs with
+ | OK eofs =>
+ let base := Val.offset_ptr (rsr a) eofs in
+ match Mem.storev Many64 mr base (rsr s0) with
+ | None => Stuck
+ | Some m1 =>
+ match Mem.storev Many64 m1 base (rsr s1) with
+ | None => Stuck
+ | Some m2 => Next rsw m2
+ end
+ end
+ | _ => Stuck
+ end.
+
+
Definition load_chunk n :=
match n with
| Plb => Mint8signed
@@ -1176,7 +1241,8 @@ Definition parexec_basic_instr (bi: basic) (rsr rsw: regset) (mr mw: mem) :=
| PStoreRRO n s a ofs => parexec_store_offset (store_chunk n) rsr rsw mr mw s a ofs
| PStoreRRR n s a ro => parexec_store_reg (store_chunk n) rsr rsw mr mw s a ro
| PStoreRRRXS n s a ro => parexec_store_regxs (store_chunk n) rsr rsw mr mw s a ro
-
+ | PStoreQRRO s a ofs =>
+ parexec_store_q_offset rsr rsw mr mw s a ofs
| Pallocframe sz pos =>
let (mw, stk) := Mem.alloc mr 0 sz in
let sp := (Vptr stk Ptrofs.zero) in
diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v
new file mode 100644
index 00000000..2c73bb63
--- /dev/null
+++ b/mppa_k1c/Peephole.v
@@ -0,0 +1,65 @@
+Require Import Coqlib.
+Require Import Asmvliw.
+Require Import Values.
+Require Import Integers.
+
+Definition gpreg_q_list : list gpreg_q :=
+R0R1 :: R2R3 :: R4R5 :: R6R7 :: R8R9
+:: R10R11 :: R12R13 :: R14R15 :: R16R17 :: R18R19
+:: R20R21 :: R22R23 :: R24R25 :: R26R27 :: R28R29
+:: R30R31 :: R32R33 :: R34R35 :: R36R37 :: R38R39
+:: R40R41 :: R42R43 :: R44R45 :: R46R47 :: R48R49
+:: R50R51 :: R52R53 :: R54R55 :: R56R57 :: R58R59
+:: R60R61 :: R62R63 :: nil.
+
+Fixpoint gpreg_q_search_rec r0 r1 l :=
+ match l with
+ | h :: t =>
+ let (s0, s1) := gpreg_q_expand h in
+ if (gpreg_eq r0 s0) && (gpreg_eq r1 s1)
+ then Some h
+ else gpreg_q_search_rec r0 r1 t
+ | nil => None
+ end.
+
+Definition gpreg_q_search (r0 : gpreg) (r1 : gpreg) : option gpreg_q :=
+ gpreg_q_search_rec r0 r1 gpreg_q_list.
+
+Parameter print_found_store : forall A : Type, Z -> A -> A.
+
+Fixpoint optimize_body (insns : list basic) : list basic :=
+ match insns with
+ | nil => nil
+ | h0 :: t0 =>
+ match t0 with
+ | h1 :: t1 =>
+ match h0, h1 with
+ | (PStoreRRO Psd_a rs0 ra0 (Ofsimm ofs0)),
+ (PStoreRRO Psd_a rs1 ra1 (Ofsimm ofs1)) =>
+ match gpreg_q_search rs0 rs1 with
+ | Some rs0rs1 =>
+ let zofs0 := Ptrofs.signed ofs0 in
+ let zofs1 := Ptrofs.signed ofs1 in
+ if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1)
+ then let h0' := print_found_store basic zofs0 h0 in
+ h0' :: (optimize_body t0)
+ else h0 :: (optimize_body t0)
+ | None => h0 :: (optimize_body t0)
+ end
+ | _, _ => h0 :: (optimize_body t0)
+ end
+ | nil => h0 :: nil
+ end
+ end.
+
+Program Definition optimize_bblock (bb : bblock) :=
+ let optimized := optimize_body (body bb) in
+ let wf_ok := wf_bblockb optimized (exit bb) in
+ {| header := header bb;
+ body := if wf_ok then optimized else (body bb);
+ exit := exit bb |}.
+Next Obligation.
+ destruct (wf_bblockb (optimize_body (body bb))) eqn:Rwf.
+ - rewrite Rwf. simpl. trivial.
+ - exact (correct bb).
+Qed.
diff --git a/mppa_k1c/PostpassScheduling.v b/mppa_k1c/PostpassScheduling.v
index ab4bc9c9..ecd40f5c 100644
--- a/mppa_k1c/PostpassScheduling.v
+++ b/mppa_k1c/PostpassScheduling.v
@@ -13,6 +13,7 @@
Require Import Coqlib Errors AST Integers.
Require Import Asmblock Axioms Memory Globalenvs.
Require Import Asmblockdeps Asmblockgenproof0.
+Require Peephole.
Local Open Scope error_monad_scope.
@@ -347,8 +348,9 @@ Fixpoint verify_par (lbb: list bblock) :=
end.
Definition verified_schedule_nob (bb : bblock) : res (list bblock) :=
- let bb' := no_header bb in
- let lbb := do_schedule bb' in
+ let bb' := no_header bb in
+ let bb'' := Peephole.optimize_bblock bb' in
+ let lbb := do_schedule bb'' in
do tbb <- concat_all lbb;
do sizecheck <- verify_size bb lbb;
do schedcheck <- verify_schedule bb' tbb;
diff --git a/mppa_k1c/extractionMachdep.v b/mppa_k1c/extractionMachdep.v
index e70f51de..fdecd2a3 100644
--- a/mppa_k1c/extractionMachdep.v
+++ b/mppa_k1c/extractionMachdep.v
@@ -22,6 +22,9 @@ Require Archi Asm.
Extract Constant Archi.ptr64 => " Configuration.model = ""64"" ".
Extract Constant Archi.pic_code => "fun () -> false". (* for the time being *)
+Extract Constant Peephole.print_found_store =>
+"fun offset x -> Printf.printf ""found offset = %ld\n"" (Camlcoq.camlint_of_coqint offset); x".
+
(* Asm *)
(*
Extract Constant Asm.low_half => "fun _ _ _ -> assert false".
diff --git a/mppa_k1c/lib/Asmblockgenproof0.v b/mppa_k1c/lib/Asmblockgenproof0.v
index 130f0b12..0465618c 100644
--- a/mppa_k1c/lib/Asmblockgenproof0.v
+++ b/mppa_k1c/lib/Asmblockgenproof0.v
@@ -950,6 +950,13 @@ Proof.
1-10: try (unfold parexec_store_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.storev _ _ _); [inv H1; auto | discriminate]).
1-10: try (unfold parexec_store_reg in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto.
1-10: try (unfold parexec_store_regxs in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto.
+ - (* PStoreQRRO *)
+ unfold parexec_store_q_offset in H1.
+ destruct (gpreg_q_expand _) as [r0 r1] in H1.
+ destruct (eval_offset _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ inv H1. Simpl. reflexivity.
- destruct (Mem.alloc _ _ _). destruct (Mem.store _ _ _ _ _). inv H1. Simpl. discriminate.
- destruct (Mem.loadv _ _ _); try discriminate. destruct (rs1 _); try discriminate.
destruct (Mem.free _ _ _ _). inv H1. Simpl. discriminate.
diff --git a/test/monniaux/regalloc/bigspill.c b/test/monniaux/regalloc/bigspill.c
new file mode 100644
index 00000000..6191e018
--- /dev/null
+++ b/test/monniaux/regalloc/bigspill.c
@@ -0,0 +1,21 @@
+extern void callee(void);
+
+void bigspill(int *t) {
+ int t0 = t[0];
+ int t1 = t[1];
+ int t2 = t[2];
+ int t3 = t[3];
+ int t4 = t[4];
+ int t5 = t[5];
+ int t6 = t[6];
+ int t7 = t[7];
+ callee();
+ t[0] = t0;
+ t[1] = t1;
+ t[2] = t2;
+ t[3] = t3;
+ t[4] = t4;
+ t[5] = t5;
+ t[6] = t6;
+ t[7] = t7;
+}