aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-05-03 11:34:22 +0200
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-05-03 11:34:22 +0200
commitad8f16390a68b0cf8e4da39d2ae5d1ad30026803 (patch)
tree5f0e12eccd40bcc688408cb5d07dc8d850832384
parentfb77ce264f957a1ee3f87e537b55afbb10785ecf (diff)
parentcba53c98b999eea7984e4ffd24a9449abea3e0e2 (diff)
downloadcompcert-kvx-ad8f16390a68b0cf8e4da39d2ae5d1ad30026803.tar.gz
compcert-kvx-ad8f16390a68b0cf8e4da39d2ae5d1ad30026803.zip
Merge remote-tracking branch 'origin/mppa-peephole' into mppa-work
-rw-r--r--driver/Clflags.ml1
-rw-r--r--driver/Compopts.v3
-rw-r--r--driver/Driver.ml1
-rw-r--r--extraction/extraction.v2
-rw-r--r--mppa_k1c/Asm.v5
-rw-r--r--mppa_k1c/Asmblock.v2
-rw-r--r--mppa_k1c/Asmblockdeps.v55
-rw-r--r--mppa_k1c/Asmvliw.v77
-rw-r--r--mppa_k1c/Peephole.v70
-rw-r--r--mppa_k1c/PostpassScheduling.v6
-rw-r--r--mppa_k1c/PostpassSchedulingOracle.ml17
-rw-r--r--mppa_k1c/PostpassSchedulingproof.v14
-rw-r--r--mppa_k1c/TargetPrinter.ml45
-rw-r--r--mppa_k1c/extractionMachdep.v3
-rw-r--r--mppa_k1c/lib/Asmblockgenproof0.v7
-rw-r--r--test/monniaux/regalloc/bigspill.c21
16 files changed, 308 insertions, 21 deletions
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 46f19dcf..b1afab6f 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -70,3 +70,4 @@ let use_standard_headers = ref Configuration.has_standard_headers
let option_fglobaladdrtmp = ref false
let option_fglobaladdroffset = ref false
let option_fxsaddr = ref true
+let option_coalesce_mem = ref true
diff --git a/driver/Compopts.v b/driver/Compopts.v
index 3bb7a474..f7de596c 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -51,6 +51,9 @@ Parameter optim_fglobaladdroffset: unit -> bool.
(** FIXME TEMPORARY Flag -fxsaddr. Use .xs addressing mode (default true) *)
Parameter optim_fxsaddr: unit -> bool.
+(** FIXME TEMPORARY Flag -fcoaelesce-mem. Fuse (default true) *)
+Parameter optim_coalesce_mem: unit -> bool.
+
(** Flag -fthumb. For the ARM back-end. *)
Parameter thumb: unit -> bool.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 3b0830ad..cfafcaa3 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -375,6 +375,7 @@ let cmdline_actions =
@ f_opt "globaladdrtmp" option_fglobaladdrtmp
@ f_opt "globaladdroffset" option_fglobaladdroffset
@ f_opt "xsaddr" option_fxsaddr
+ @ f_opt "coalesce-mem" option_coalesce_mem
(* Code generation options *)
@ f_opt "fpu" option_ffpu
@ f_opt "sse" option_ffpu (* backward compatibility *)
diff --git a/extraction/extraction.v b/extraction/extraction.v
index 0f336916..979e1d49 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -123,6 +123,8 @@ Extract Constant Compopts.optim_fglobaladdroffset =>
"fun _ -> !Clflags.option_fglobaladdroffset".
Extract Constant Compopts.optim_fxsaddr =>
"fun _ -> !Clflags.option_fxsaddr".
+Extract Constant Compopts.optim_coalesce_mem =>
+ "fun _ -> !Clflags.option_coalesce_mem".
(* Compiler *)
Extract Constant Compiler.print_Clight => "PrintClight.print_if".
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v
index d73d00c7..8b1c9a81 100644
--- a/mppa_k1c/Asm.v
+++ b/mppa_k1c/Asm.v
@@ -122,7 +122,9 @@ Inductive instruction : Type :=
| Psd (rs: ireg) (ra: ireg) (ofs: addressing) (**r store int64 *)
| Psd_a (rs: ireg) (ra: ireg) (ofs: addressing) (**r store any64 *)
| Pfss (rs: freg) (ra: ireg) (ofs: addressing) (**r store float *)
- | Pfsd (rd: freg) (ra: ireg) (ofs: addressing) (**r store 64-bit float *)
+ | Pfsd (rs: freg) (ra: ireg) (ofs: addressing) (**r store 64-bit float *)
+
+ | Psq (rs: gpreg_q) (ra: ireg) (ofs: addressing) (**r store 64-bit float *)
(** Arith RR *)
| Pmv (rd rs: ireg) (**r register move *)
@@ -484,6 +486,7 @@ Definition basic_to_instruction (b: basic) :=
| PStoreRRRXS Asmvliw.Pfss rd ra ro => Pfss rd ra (ARegXS ro)
| PStoreRRRXS Asmvliw.Pfsd rd ra ro => Pfsd rd ra (ARegXS ro)
+ | PStoreQRRO qrs ra ofs => Psq qrs ra (AOff ofs)
end.
Section RELSEM.
diff --git a/mppa_k1c/Asmblock.v b/mppa_k1c/Asmblock.v
index 9abc1ca1..f2c4a382 100644
--- a/mppa_k1c/Asmblock.v
+++ b/mppa_k1c/Asmblock.v
@@ -272,6 +272,8 @@ Definition exec_load_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (d a ro:
Definition exec_store_offset (chunk: memory_chunk) (rs: regset) (m: mem) (s a: ireg) (ofs: offset) := parexec_store_offset chunk rs rs m m s a ofs.
+Definition exec_store_q_offset (rs: regset) (m: mem) (s : gpreg_q) (a: ireg) (ofs: offset) := parexec_store_q_offset rs rs m m s a ofs.
+
Definition exec_store_reg (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro: ireg) := parexec_store_reg chunk rs rs m m s a ro.
Definition exec_store_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro: ireg) := parexec_store_regxs chunk rs rs m m s a ro.
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index 7cfcbff1..52af8cdf 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -387,11 +387,23 @@ Qed.
Hint Resolve load_op_eq_correct: wlp.
Opaque load_op_eq_correct.
+Definition offset_eq (ofs1 ofs2 : offset): ?? bool :=
+ RET (Ptrofs.eq ofs1 ofs2).
+
+Lemma offset_eq_correct ofs1 ofs2:
+ WHEN offset_eq ofs1 ofs2 ~> b THEN b = true -> ofs1 = ofs2.
+Proof.
+ wlp_simplify.
+ pose (Ptrofs.eq_spec ofs1 ofs2).
+ rewrite H in *.
+ trivial.
+Qed.
+Hint Resolve offset_eq_correct: wlp.
Definition store_op_eq (o1 o2: store_op): ?? bool :=
match o1 with
| OStoreRRO n1 ofs1 =>
- match o2 with OStoreRRO n2 ofs2 => iandb (phys_eq n1 n2) (phys_eq ofs1 ofs2) | _ => RET false end
+ match o2 with OStoreRRO n2 ofs2 => iandb (phys_eq n1 n2) (offset_eq ofs1 ofs2) | _ => RET false end
| OStoreRRR n1 =>
match o2 with OStoreRRR n2 => phys_eq n1 n2 | _ => RET false end
| OStoreRRRXS n1 =>
@@ -402,7 +414,8 @@ Lemma store_op_eq_correct o1 o2:
WHEN store_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
Proof.
destruct o1, o2; wlp_simplify; try discriminate.
- - congruence.
+ - f_equal. pose (Ptrofs.eq_spec ofs ofs0).
+ rewrite H in *. trivial.
- congruence.
- congruence.
Qed.
@@ -639,6 +652,10 @@ Definition trans_basic (b: basic) : inst :=
| PStoreRRO n s a ofs => [(pmem, Op (Store (OStoreRRO n ofs)) (PReg (#s) @ PReg (#a) @ PReg pmem @ Enil))]
| PStoreRRR n s a ro => [(pmem, Op (Store (OStoreRRR n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
| PStoreRRRXS n s a ro => [(pmem, Op (Store (OStoreRRRXS n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PStoreQRRO qs a ofs =>
+ let (s0, s1) := gpreg_q_expand qs in
+ [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))]
| Pallocframe sz pos => [(#FP, PReg (#SP)); (#SP, Op (Allocframe2 sz pos) (PReg (#SP) @ PReg pmem @ Enil)); (#RTMP, Op (Constant Vundef) Enil);
(pmem, Op (Allocframe sz pos) (Old (PReg (#SP)) @ PReg pmem @ Enil))]
| Pfreeframe sz pos => [(pmem, Op (Freeframe sz pos) (PReg (#SP) @ PReg pmem @ Enil));
@@ -885,7 +902,37 @@ Proof.
eexists; split; try split; Simpl;
intros rr; destruct rr; Simpl.
-(* Allocframe *)
+ + unfold parexec_store_q_offset.
+ simpl.
+ destruct (gpreg_q_expand rs) as [s0 s1].
+ simpl.
+ unfold exec_store_deps_offset.
+ repeat rewrite H0.
+ repeat rewrite H.
+ destruct Ge.
+ destruct (Mem.storev _ _ _ (rsr s0)) as [mem0 | ] eqn:MEML0; simpl.
+ ++ rewrite MEML0.
+ destruct (Mem.storev _ _ _ (rsr s1)) as [mem1 | ] eqn:MEML1; simpl.
+ * rewrite (assign_diff sr _ (# s1)) by apply ppos_pmem_discr.
+ rewrite (assign_diff sr _ (# ra)) by apply ppos_pmem_discr.
+ repeat rewrite H0.
+ rewrite MEML1.
+ eexists; split.
+ reflexivity.
+ rewrite (assign_eq _ pmem).
+ split; trivial.
+ intro r.
+ rewrite (assign_diff _ _ (# r)) by apply ppos_pmem_discr.
+ rewrite (assign_diff _ _ (# r)) by apply ppos_pmem_discr.
+ congruence.
+ * rewrite (assign_diff sr pmem (# s1)) by apply ppos_pmem_discr.
+ rewrite (assign_diff sr pmem (# ra)) by apply ppos_pmem_discr.
+ repeat rewrite H0.
+ rewrite MEML1.
+ reflexivity.
+ ++ rewrite MEML0.
+ reflexivity.
+ (* Allocframe *)
- destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS.
* eexists; repeat split.
{ Simpl. erewrite !H0, H, MEMAL, MEMS. Simpl.
@@ -1530,5 +1577,3 @@ Definition bblock_equivb: Asmvliw.bblock -> Asmvliw.bblock -> bool := pure_bbloc
Definition bblock_equiv_eq := pure_bblock_eq_test_correct true.
End SECT_BBLOCK_EQUIV.
-
-
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
index c25d4235..6ebc8340 100644
--- a/mppa_k1c/Asmvliw.v
+++ b/mppa_k1c/Asmvliw.v
@@ -66,12 +66,63 @@ Inductive gpreg: Type :=
Definition ireg := gpreg.
Definition freg := gpreg.
+Lemma gpreg_eq: forall (x y: gpreg), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
Proof. decide equality. Defined.
Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}.
Proof. decide equality. Defined.
+Inductive gpreg_q : Type :=
+| R0R1 | R2R3 | R4R5 | R6R7 | R8R9
+| R10R11 | R12R13 | R14R15 | R16R17 | R18R19
+| R20R21 | R22R23 | R24R25 | R26R27 | R28R29
+| R30R31 | R32R33 | R34R35 | R36R37 | R38R39
+| R40R41 | R42R43 | R44R45 | R46R47 | R48R49
+| R50R51 | R52R53 | R54R55 | R56R57 | R58R59
+| R60R61 | R62R63.
+
+Lemma gpreg_q_eq : forall (x y : gpreg_q), {x=y} + {x<>y}.
+Proof. decide equality. Defined.
+
+Definition gpreg_q_expand (x : gpreg_q) : gpreg * gpreg :=
+ match x with
+ | R0R1 => (GPR0, GPR1)
+ | R2R3 => (GPR2, GPR3)
+ | R4R5 => (GPR4, GPR5)
+ | R6R7 => (GPR6, GPR7)
+ | R8R9 => (GPR8, GPR9)
+ | R10R11 => (GPR10, GPR11)
+ | R12R13 => (GPR12, GPR13)
+ | R14R15 => (GPR14, GPR15)
+ | R16R17 => (GPR16, GPR17)
+ | R18R19 => (GPR18, GPR19)
+ | R20R21 => (GPR20, GPR21)
+ | R22R23 => (GPR22, GPR23)
+ | R24R25 => (GPR24, GPR25)
+ | R26R27 => (GPR26, GPR27)
+ | R28R29 => (GPR28, GPR29)
+ | R30R31 => (GPR30, GPR31)
+ | R32R33 => (GPR32, GPR33)
+ | R34R35 => (GPR34, GPR35)
+ | R36R37 => (GPR36, GPR37)
+ | R38R39 => (GPR38, GPR39)
+ | R40R41 => (GPR40, GPR41)
+ | R42R43 => (GPR42, GPR43)
+ | R44R45 => (GPR44, GPR45)
+ | R46R47 => (GPR46, GPR47)
+ | R48R49 => (GPR48, GPR49)
+ | R50R51 => (GPR50, GPR51)
+ | R52R53 => (GPR52, GPR53)
+ | R54R55 => (GPR54, GPR55)
+ | R56R57 => (GPR56, GPR57)
+ | R58R59 => (GPR58, GPR59)
+ | R60R61 => (GPR60, GPR61)
+ | R62R63 => (GPR62, GPR63)
+ end.
+
(** We model the following registers of the RISC-V architecture. *)
(** basic register *)
@@ -183,9 +234,6 @@ Definition label := positive.
*)
Inductive ex_instruction : Type :=
(* Pseudo-instructions *)
-(*| Ploadsymbol_high (rd: ireg) (id: ident) (ofs: ptrofs) (**r load the high part of the address of a symbol *)
- | Pbtbl (r: ireg) (tbl: list label) (**r N-way branch through a jump table *) *)
-
| Pbuiltin: external_function -> list (builtin_arg preg)
-> builtin_res preg -> ex_instruction (**r built-in function (pseudo) *)
.
@@ -288,6 +336,7 @@ Inductive st_instruction : Type :=
| PStoreRRO (i: store_name) (rs: ireg) (ra: ireg) (ofs: offset)
| PStoreRRR (i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg)
| PStoreRRRXS(i: store_name) (rs: ireg) (ra: ireg) (rofs: ireg)
+ | PStoreQRRO (rs: gpreg_q) (ra: ireg) (ofs: offset)
.
(** Arithmetic instructions **)
@@ -302,7 +351,6 @@ Inductive arith_name_rr : Type :=
| Pcvtl2w (**r Convert Long to Word *)
| Psxwd (**r Sign Extend Word to Double Word *)
| Pzxwd (**r Zero Extend Word to Double Word *)
-(* | Pextfs (stop : int) (start : int) (**r extract bit field, signed *) *)
| Pextfz (stop : Z) (start : Z) (**r extract bit field, unsigned *)
| Pextfs (stop : Z) (start : Z) (**r extract bit field, signed *)
| Pextfzl (stop : Z) (start : Z) (**r extract bit field, unsigned *)
@@ -640,7 +688,7 @@ Variable ge: genv.
from the current state (a register set + a memory state) to either [Next rs' m']
where [rs'] and [m'] are the updated register set and memory state after execution
of the instruction at [rs#PC], or [Stuck] if the processor is stuck.
-
+
The parallel semantics of each instructions handles two states in input:
- the actual input state of the bundle which is only read
- and the other on which every "write" is performed:
@@ -1137,6 +1185,22 @@ Definition parexec_store_regxs (chunk: memory_chunk) (rsr rsw: regset) (mr mw: m
| Some m' => Next rsw m'
end.
+Definition parexec_store_q_offset (rsr rsw: regset) (mr mw: mem) (s : gpreg_q) (a: ireg) (ofs: offset) :=
+ let (s0, s1) := gpreg_q_expand s in
+ match eval_offset ofs with
+ | OK eofs =>
+ match Mem.storev Many64 mr (Val.offset_ptr (rsr a) eofs) (rsr s0) with
+ | None => Stuck
+ | Some m1 =>
+ match Mem.storev Many64 m1 (Val.offset_ptr (rsr a) (Ptrofs.add eofs (Ptrofs.repr 8))) (rsr s1) with
+ | None => Stuck
+ | Some m2 => Next rsw m2
+ end
+ end
+ | _ => Stuck
+ end.
+
+
Definition load_chunk n :=
match n with
| Plb => Mint8signed
@@ -1176,7 +1240,8 @@ Definition parexec_basic_instr (bi: basic) (rsr rsw: regset) (mr mw: mem) :=
| PStoreRRO n s a ofs => parexec_store_offset (store_chunk n) rsr rsw mr mw s a ofs
| PStoreRRR n s a ro => parexec_store_reg (store_chunk n) rsr rsw mr mw s a ro
| PStoreRRRXS n s a ro => parexec_store_regxs (store_chunk n) rsr rsw mr mw s a ro
-
+ | PStoreQRRO s a ofs =>
+ parexec_store_q_offset rsr rsw mr mw s a ofs
| Pallocframe sz pos =>
let (mw, stk) := Mem.alloc mr 0 sz in
let sp := (Vptr stk Ptrofs.zero) in
diff --git a/mppa_k1c/Peephole.v b/mppa_k1c/Peephole.v
new file mode 100644
index 00000000..6e06e7ea
--- /dev/null
+++ b/mppa_k1c/Peephole.v
@@ -0,0 +1,70 @@
+Require Import Coqlib.
+Require Import Asmvliw.
+Require Import Values.
+Require Import Integers.
+Require Compopts.
+
+Definition gpreg_q_list : list gpreg_q :=
+R0R1 :: R2R3 :: R4R5 :: R6R7 :: R8R9
+:: R10R11 :: R12R13 :: R14R15 :: R16R17 :: R18R19
+:: R20R21 :: R22R23 :: R24R25 :: R26R27 :: R28R29
+:: R30R31 :: R32R33 :: R34R35 :: R36R37 :: R38R39
+:: R40R41 :: R42R43 :: R44R45 :: R46R47 :: R48R49
+:: R50R51 :: R52R53 :: R54R55 :: R56R57 :: R58R59
+:: R60R61 :: R62R63 :: nil.
+
+Fixpoint gpreg_q_search_rec r0 r1 l :=
+ match l with
+ | h :: t =>
+ let (s0, s1) := gpreg_q_expand h in
+ if (gpreg_eq r0 s0) && (gpreg_eq r1 s1)
+ then Some h
+ else gpreg_q_search_rec r0 r1 t
+ | nil => None
+ end.
+
+Definition gpreg_q_search (r0 : gpreg) (r1 : gpreg) : option gpreg_q :=
+ gpreg_q_search_rec r0 r1 gpreg_q_list.
+
+Parameter print_found_store: forall A, Z -> A -> A.
+
+Fixpoint coalesce_mem (insns : list basic) : list basic :=
+ match insns with
+ | nil => nil
+ | h0 :: t0 =>
+ match t0 with
+ | h1 :: t1 =>
+ match h0, h1 with
+ | (PStoreRRO Psd_a rs0 ra0 ofs0),
+ (PStoreRRO Psd_a rs1 ra1 ofs1) =>
+ match gpreg_q_search rs0 rs1 with
+ | Some rs0rs1 =>
+ let zofs0 := Ptrofs.signed ofs0 in
+ let zofs1 := Ptrofs.signed ofs1 in
+ if (zofs1 =? zofs0 + 8) && (ireg_eq ra0 ra1)
+ then (PStore (PStoreQRRO rs0rs1 ra0 ofs0)) :: Pnop :: (coalesce_mem t1)
+ else h0 :: (coalesce_mem t0)
+ | None => h0 :: (coalesce_mem t0)
+ end
+ | _, _ => h0 :: (coalesce_mem t0)
+ end
+ | nil => h0 :: nil
+ end
+ end.
+
+Definition optimize_body (insns : list basic) :=
+ if Compopts.optim_coalesce_mem tt
+ then coalesce_mem insns
+ else insns.
+
+Program Definition optimize_bblock (bb : bblock) :=
+ let optimized := optimize_body (body bb) in
+ let wf_ok := wf_bblockb optimized (exit bb) in
+ {| header := header bb;
+ body := if wf_ok then optimized else (body bb);
+ exit := exit bb |}.
+Next Obligation.
+ destruct (wf_bblockb (optimize_body (body bb))) eqn:Rwf.
+ - rewrite Rwf. simpl. trivial.
+ - exact (correct bb).
+Qed.
diff --git a/mppa_k1c/PostpassScheduling.v b/mppa_k1c/PostpassScheduling.v
index ab4bc9c9..ecd40f5c 100644
--- a/mppa_k1c/PostpassScheduling.v
+++ b/mppa_k1c/PostpassScheduling.v
@@ -13,6 +13,7 @@
Require Import Coqlib Errors AST Integers.
Require Import Asmblock Axioms Memory Globalenvs.
Require Import Asmblockdeps Asmblockgenproof0.
+Require Peephole.
Local Open Scope error_monad_scope.
@@ -347,8 +348,9 @@ Fixpoint verify_par (lbb: list bblock) :=
end.
Definition verified_schedule_nob (bb : bblock) : res (list bblock) :=
- let bb' := no_header bb in
- let lbb := do_schedule bb' in
+ let bb' := no_header bb in
+ let bb'' := Peephole.optimize_bblock bb' in
+ let lbb := do_schedule bb'' in
do tbb <- concat_all lbb;
do sizecheck <- verify_size bb lbb;
do schedcheck <- verify_schedule bb' tbb;
diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml
index cf1d8e55..a4dc3614 100644
--- a/mppa_k1c/PostpassSchedulingOracle.ml
+++ b/mppa_k1c/PostpassSchedulingOracle.ml
@@ -222,7 +222,12 @@ let load_rec i = match i with
{ inst = load_str i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false}
let store_rec i = match i with
- | PStoreRRO (i, rs1, rs2, imm) -> { inst = store_str i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2)]; imm = (Some (Off imm))
+ | PStoreRRO (i, rs1, rs2, imm) ->
+ { inst = store_str i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2)]; imm = (Some (Off imm))
+ ; is_control = false}
+ | PStoreQRRO (rs, ra, imm) ->
+ let (rs0, rs1) = gpreg_q_expand rs in
+ { inst = "Psq"; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm))
; is_control = false}
| PStoreRRR (i, rs1, rs2, rs3) | PStoreRRRXS (i, rs1, rs2, rs3) -> { inst = store_str i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; imm = None
; is_control = false}
@@ -441,7 +446,7 @@ type real_instruction =
| Make | Nop | Extfz | Extfs | Insf
(* LSU *)
| Lbs | Lbz | Lhs | Lhz | Lws | Ld
- | Sb | Sh | Sw | Sd
+ | Sb | Sh | Sw | Sd | Sq
(* BCU *)
| Icall | Call | Cb | Igoto | Goto | Ret | Get | Set
(* FPU *)
@@ -518,6 +523,7 @@ let ab_inst_to_real = function
| "Psh" -> Sh
| "Psw" | "Psw_a" | "Pfss" -> Sw
| "Psd" | "Psd_a" | "Pfsd" -> Sd
+ | "Psq" -> Sq
| "Pcb" | "Pcbu" -> Cb
| "Pcall" | "Pdiv" | "Pdivu" -> Call
@@ -538,6 +544,9 @@ let ab_inst_to_real = function
| "Pfsbfw" -> Fsbfw
| "Pfmuld" -> Fmuld
| "Pfmulw" -> Fmulw
+
+ | "nop" -> Nop
+
| s -> failwith @@ sprintf "ab_inst_to_real: unrecognized instruction: %s" s
exception InvalidEncoding
@@ -590,7 +599,7 @@ let rec_to_usage r =
(match encoding with None | Some U6 | Some S10 -> lsu_data
| Some U27L5 | Some U27L10 -> lsu_data_x
| Some E27U27L10 -> lsu_data_y)
- | Sb | Sh | Sw | Sd ->
+ | Sb | Sh | Sw | Sd | Sq ->
(match encoding with None | Some U6 | Some S10 -> lsu_acc
| Some U27L5 | Some U27L10 -> lsu_acc_x
| Some E27U27L10 -> lsu_acc_y)
@@ -612,7 +621,7 @@ let real_inst_to_latency = function
| Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4
| Mulw | Muld | Maddw | Maddd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *)
| Lbs | Lbz | Lhs | Lhz | Lws | Ld -> 3
- | Sb | Sh | Sw | Sd -> 1 (* See k1c-Optimization.pdf page 19 *)
+ | Sb | Sh | Sw | Sd | Sq -> 1 (* See k1c-Optimization.pdf page 19 *)
| Get -> 1
| Set -> 4 (* According to the manual should be 3, but I measured 4 *)
| Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *)
diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v
index 47248e3e..da64c41d 100644
--- a/mppa_k1c/PostpassSchedulingproof.v
+++ b/mppa_k1c/PostpassSchedulingproof.v
@@ -140,6 +140,19 @@ Proof.
- discriminate.
Qed.
+Lemma exec_store_q_offset_pc_var:
+ forall rs m rd ra ofs rs' m' v,
+ exec_store_q_offset rs m rd ra ofs = Next rs' m' ->
+ exec_store_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_store_q_offset in *. unfold parexec_store_q_offset in *. rewrite Pregmap.gso; try discriminate.
+ simpl in *.
+ destruct (gpreg_q_expand _) as [s0 s1].
+ destruct (Mem.storev _ _ _); try discriminate.
+ destruct (Mem.storev _ _ _); try discriminate.
+ inv H. apply next_eq; auto.
+Qed.
+
Lemma exec_store_reg_pc_var:
forall t rs m rd ra ro rs' m' v,
exec_store_reg t rs m rd ra ro = Next rs' m' ->
@@ -182,6 +195,7 @@ Proof.
+ exploreInst; apply exec_store_offset_pc_var; auto.
+ exploreInst; apply exec_store_reg_pc_var; auto.
+ exploreInst; apply exec_store_regxs_pc_var; auto.
+ + apply exec_store_q_offset_pc_var; auto.
- destruct (Mem.alloc _ _ _) as (m1 & stk). repeat (rewrite Pregmap.gso; try discriminate).
destruct (Mem.storev _ _ _ _); try discriminate.
inv H. apply next_eq; auto. apply functional_extensionality. intros.
diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml
index 156b16d0..9d42169a 100644
--- a/mppa_k1c/TargetPrinter.ml
+++ b/mppa_k1c/TargetPrinter.ml
@@ -41,6 +41,7 @@ module Target (*: TARGET*) =
let print_label oc lbl = label oc (transl_label lbl)
let int_reg_name = let open Asmvliw in function
+
| GPR0 -> "$r0" | GPR1 -> "$r1" | GPR2 -> "$r2" | GPR3 -> "$r3"
| GPR4 -> "$r4" | GPR5 -> "$r5" | GPR6 -> "$r6" | GPR7 -> "$r7"
| GPR8 -> "$r8" | GPR9 -> "$r9" | GPR10 -> "$r10" | GPR11 -> "$r11"
@@ -60,7 +61,43 @@ module Target (*: TARGET*) =
let ireg oc r = output_string oc (int_reg_name r)
- let ireg = ireg
+ let int_gpreg_q_name =
+ let open Asmvliw in
+ function
+ | R0R1 -> "$r0r1"
+ | R2R3 -> "$r2r3"
+ | R4R5 -> "$r4r5"
+ | R6R7 -> "$r6r7"
+ | R8R9 -> "$r8r9"
+ | R10R11 -> "$r10r11"
+ | R12R13 -> "$r12r13"
+ | R14R15 -> "$r14r15"
+ | R16R17 -> "$r16r17"
+ | R18R19 -> "$r18r19"
+ | R20R21 -> "$r20r21"
+ | R22R23 -> "$r22r23"
+ | R24R25 -> "$r24r25"
+ | R26R27 -> "$r26r27"
+ | R28R29 -> "$r28r29"
+ | R30R31 -> "$r30r31"
+ | R32R33 -> "$r32r33"
+ | R34R35 -> "$r34r35"
+ | R36R37 -> "$r36r37"
+ | R38R39 -> "$r38r39"
+ | R40R41 -> "$r40r41"
+ | R42R43 -> "$r42r43"
+ | R44R45 -> "$r44r45"
+ | R46R47 -> "$r46r47"
+ | R48R49 -> "$r48r49"
+ | R50R51 -> "$r50r51"
+ | R52R53 -> "$r52r53"
+ | R54R55 -> "$r54r55"
+ | R56R57 -> "$r56r57"
+ | R58R59 -> "$r58r59"
+ | R60R61 -> "$r60r61"
+ | R62R63 -> "$r62r63"
+
+ let gpreg_q oc r = output_string oc (int_gpreg_q_name r)
let preg oc = let open Asmvliw in function
| IR r -> ireg oc r
@@ -250,7 +287,7 @@ module Target (*: TARGET*) =
| _ ->
assert false
end
- | Pnop -> fprintf oc " nop\n"
+ | Pnop -> (* FIXME fprintf oc " nop\n" *) ()
| Psemi -> fprintf oc ";;\n"
| Pclzll (rd, rs) -> fprintf oc " clzd %a = %a\n" ireg rd ireg rs
@@ -364,7 +401,9 @@ module Target (*: TARGET*) =
fprintf oc " sw%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd
| Psd(rd, ra, adr) | Psd_a(rd, ra, adr) | Pfsd(rd, ra, adr) -> assert Archi.ptr64;
fprintf oc " sd%a %a[%a] = %a\n" xscale adr addressing adr ireg ra ireg rd
-
+ | Psq(rd, ra, adr) ->
+ fprintf oc " sq%a %a[%a] = %a\n" xscale adr addressing adr ireg ra gpreg_q rd
+
(* Arith R instructions *)
(* Arith RR instructions *)
diff --git a/mppa_k1c/extractionMachdep.v b/mppa_k1c/extractionMachdep.v
index e70f51de..fdecd2a3 100644
--- a/mppa_k1c/extractionMachdep.v
+++ b/mppa_k1c/extractionMachdep.v
@@ -22,6 +22,9 @@ Require Archi Asm.
Extract Constant Archi.ptr64 => " Configuration.model = ""64"" ".
Extract Constant Archi.pic_code => "fun () -> false". (* for the time being *)
+Extract Constant Peephole.print_found_store =>
+"fun offset x -> Printf.printf ""found offset = %ld\n"" (Camlcoq.camlint_of_coqint offset); x".
+
(* Asm *)
(*
Extract Constant Asm.low_half => "fun _ _ _ -> assert false".
diff --git a/mppa_k1c/lib/Asmblockgenproof0.v b/mppa_k1c/lib/Asmblockgenproof0.v
index 130f0b12..3bca6629 100644
--- a/mppa_k1c/lib/Asmblockgenproof0.v
+++ b/mppa_k1c/lib/Asmblockgenproof0.v
@@ -950,6 +950,13 @@ Proof.
1-10: try (unfold parexec_store_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.storev _ _ _); [inv H1; auto | discriminate]).
1-10: try (unfold parexec_store_reg in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto.
1-10: try (unfold parexec_store_regxs in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]); auto.
+ - (* PStoreQRRO *)
+ unfold parexec_store_q_offset in H1.
+ destruct (gpreg_q_expand _) as [r0 r1] in H1.
+ unfold eval_offset in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ inv H1. Simpl. reflexivity.
- destruct (Mem.alloc _ _ _). destruct (Mem.store _ _ _ _ _). inv H1. Simpl. discriminate.
- destruct (Mem.loadv _ _ _); try discriminate. destruct (rs1 _); try discriminate.
destruct (Mem.free _ _ _ _). inv H1. Simpl. discriminate.
diff --git a/test/monniaux/regalloc/bigspill.c b/test/monniaux/regalloc/bigspill.c
new file mode 100644
index 00000000..6191e018
--- /dev/null
+++ b/test/monniaux/regalloc/bigspill.c
@@ -0,0 +1,21 @@
+extern void callee(void);
+
+void bigspill(int *t) {
+ int t0 = t[0];
+ int t1 = t[1];
+ int t2 = t[2];
+ int t3 = t[3];
+ int t4 = t[4];
+ int t5 = t[5];
+ int t6 = t[6];
+ int t7 = t[7];
+ callee();
+ t[0] = t0;
+ t[1] = t1;
+ t[2] = t2;
+ t[3] = t3;
+ t[4] = t4;
+ t[5] = t5;
+ t[6] = t6;
+ t[7] = t7;
+}