aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2021-01-22 14:17:39 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2021-01-22 14:17:39 +0100
commitcac0556d551438b5dc3bbb6b701ae0101b214e0f (patch)
treec97b791ac6c8029f512e9b110dd5e430817d806d /aarch64
parente265be77756b14b1d830a0d0faf1b105494bbb43 (diff)
parent0bc5b0f9fe8a2463ddb147671359a5b374cfd50c (diff)
downloadcompcert-kvx-cac0556d551438b5dc3bbb6b701ae0101b214e0f.tar.gz
compcert-kvx-cac0556d551438b5dc3bbb6b701ae0101b214e0f.zip
Merge remote-tracking branch 'origin/aarch64-peephole' into kvx-work
Diffstat (limited to 'aarch64')
-rw-r--r--aarch64/Asm.v28
-rw-r--r--aarch64/Asmblock.v12
-rw-r--r--aarch64/Asmblockdeps.v61
-rw-r--r--aarch64/Asmgen.v20
-rw-r--r--aarch64/Asmgenproof.v13
-rw-r--r--aarch64/PeepholeOracle.ml315
-rw-r--r--aarch64/PostpassSchedulingOracle.ml8
-rw-r--r--aarch64/TargetPrinter.ml8
8 files changed, 297 insertions, 168 deletions
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index dc1f025f..5f109224 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -201,8 +201,8 @@ Inductive instruction: Type :=
| Pstrx_a (rs: ireg) (a: addressing) (**r store int64 as any64 *)
| Pstrb (rs: ireg) (a: addressing) (**r store int8 *)
| Pstrh (rs: ireg) (a: addressing) (**r store int16 *)
- | Pstpw (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
- | Pstpx (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
+ | Pstpw (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
+ | Pstpx (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
(** Integer arithmetic, immediate *)
| Paddimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r addition *)
| Psubimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r subtraction *)
@@ -273,9 +273,13 @@ Inductive instruction: Type :=
| Pldrs (rd: freg) (a: addressing) (**r load float32 (single precision) *)
| Pldrd (rd: freg) (a: addressing) (**r load float64 (double precision) *)
| Pldrd_a (rd: freg) (a: addressing) (**r load float64 as any64 *)
+ | Pldps (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two float32 *)
+ | Pldpd (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two float64 *)
| Pstrs (rs: freg) (a: addressing) (**r store float32 *)
| Pstrd (rs: freg) (a: addressing) (**r store float64 *)
| Pstrd_a (rs: freg) (a: addressing) (**r store float64 as any64 *)
+ | Pstps (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two float32 *)
+ | Pstpd (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two float64 *)
(** Floating-point move *)
| Pfmov (rd r1: freg)
| Pfmovimms (rd: freg) (f: float32) (**r load float32 constant *)
@@ -798,7 +802,7 @@ Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
(a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
match Mem.loadv chk1 m addr with
| None => Stuck
@@ -824,7 +828,7 @@ Definition exec_store_double (chk1 chk2: memory_chunk)
(rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
match Mem.storev chk1 m addr v1 with
| None => Stuck
@@ -1258,17 +1262,27 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| _ => Stuck
end
| Pbuiltin ef args res => Stuck (**r treated specially below *)
- (** The following instructions and directives are not generated directly
- by Asmgen, so we do not model them. *)
+ (** loads and stores pairs int/int64 *)
| Pldpw rd1 rd2 chk1 chk2 a =>
exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
| Pldpx rd1 rd2 chk1 chk2 a =>
exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
- | Pnop => Next (nextinstr rs) m
| Pstpw rs1 rs2 chk1 chk2 a =>
exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
| Pstpx rs1 rs2 chk1 chk2 a =>
exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ (** loads and stores pairs floating-point *)
+ | Pldps rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ | Pldpd rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ | Pstps rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ | Pstpd rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ | Pnop => Next (nextinstr rs) m
+ (** The following instructions and directives are not generated directly
+ by Asmgen, so we do not model them. *)
| Pcls _ _ _
| Pclz _ _ _
| Prev _ _ _
diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v
index ed84b7d8..c606002a 100644
--- a/aarch64/Asmblock.v
+++ b/aarch64/Asmblock.v
@@ -110,11 +110,13 @@ Inductive load_rd_a: Type :=
Inductive load_rd1_rd2_a: Type :=
| Pldpw
| Pldpx
+ | Pldps
+ | Pldpd
.
Inductive ld_instruction: Type :=
| PLd_rd_a (ld: load_rd_a) (rd: dreg) (a: addressing)
- | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *)
+ | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: dreg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *)
.
Inductive store_rs_a : Type :=
@@ -134,11 +136,13 @@ Inductive store_rs_a : Type :=
Inductive store_rs1_rs2_a : Type :=
| Pstpw
| Pstpx
+ | Pstps
+ | Pstpd
.
Inductive st_instruction : Type :=
| PSt_rs_a (st: store_rs_a) (rs: dreg) (a: addressing)
- | Pstp (st: store_rs1_rs2_a) (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
+ | Pstp (st: store_rs1_rs2_a) (rs1 rs2: dreg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
.
Inductive arith_p : Type :=
@@ -481,7 +485,7 @@ Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
(a: addressing) (rd1 rd2: dreg) (rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
match Mem.loadv chk1 m addr with
| None => Stuck
@@ -505,7 +509,7 @@ Definition exec_store_double (chk1 chk2: memory_chunk)
(rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
match Mem.storev chk1 m addr v1 with
| None => Stuck
diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v
index e1d591df..5cd049c5 100644
--- a/aarch64/Asmblockdeps.v
+++ b/aarch64/Asmblockdeps.v
@@ -1150,20 +1150,32 @@ Definition eval_addressing_rlocs_ld (ld: load_rd_a) (chunk: memory_chunk) (a: ad
| ADpostincr base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
end.
-Definition trans_ldp_chunk (chunk: memory_chunk): load_rd_a :=
+Definition trans_ldp_chunk (chunk: memory_chunk) (r: dreg): load_rd_a :=
match chunk with
- | Many32 => Pldrw_a
+ | Mint32 => Pldrw
| Mint64 => Pldrx
- | Many64 => Pldrx_a
- | _ => Pldrw
+ | Mfloat32 => Pldrs
+ | Mfloat64 => Pldrd
+ | Many32 => Pldrw_a
+ | _ => (* This case should always correspond to Many64 *)
+ match r with
+ | IR _ => Pldrx_a
+ | FR _ => Pldrd_a
+ end
end.
-Definition trans_stp_chunk (chunk: memory_chunk): store_rs_a :=
+Definition trans_stp_chunk (chunk: memory_chunk) (r: dreg): store_rs_a :=
match chunk with
- | Many32 => Pstrw_a
+ | Mint32 => Pstrw
| Mint64 => Pstrx
- | Many64 => Pstrx_a
- | _ => Pstrw
+ | Mfloat32 => Pstrs
+ | Mfloat64 => Pstrd
+ | Many32 => Pstrw_a
+ | _ => (* This case should always correspond to Many64 *)
+ match r with
+ | IR _ => Pstrx_a
+ | FR _ => Pstrd_a
+ end
end.
Definition trans_load (ldi: ld_instruction) :=
@@ -1171,10 +1183,10 @@ Definition trans_load (ldi: ld_instruction) :=
| PLd_rd_a ld r a =>
let lr := eval_addressing_rlocs_ld ld (chunk_load ld) a in [(#r, lr)]
| Pldp ld r1 r2 chk1 chk2 a =>
- let ldi1 := trans_ldp_chunk chk1 in
- let ldi2 := trans_ldp_chunk chk2 in
+ let ldi1 := trans_ldp_chunk chk1 r1 in
+ let ldi2 := trans_ldp_chunk chk2 r1 in
let lr := eval_addressing_rlocs_ld ldi1 chk1 a in
- let ofs := match chk1 with | Mint32 | Many32 => 4%Z | _ => 8%Z end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4%Z | _ => 8%Z end in
match a with
| ADimm base n =>
let a' := (get_offset_addr a ofs) in
@@ -1190,10 +1202,10 @@ Definition trans_store (sti: st_instruction) :=
| PSt_rs_a st r a =>
let lr := eval_addressing_rlocs_st st (chunk_store st) r a in [(pmem, lr)]
| Pstp st r1 r2 chk1 chk2 a =>
- let sti1 := trans_stp_chunk chk1 in
- let sti2 := trans_stp_chunk chk2 in
+ let sti1 := trans_stp_chunk chk1 r1 in
+ let sti2 := trans_stp_chunk chk2 r1 in
let lr := eval_addressing_rlocs_st sti1 chk1 r1 a in
- let ofs := match chk1 with | Mint32 | Many32 => 4%Z | _ => 8%Z end in
+ let ofs := match chk1 with | Mint32 | Mfloat32| Many32 => 4%Z | _ => 8%Z end in
match a with
| ADimm base n =>
let a' := (get_offset_addr a ofs) in
@@ -1356,6 +1368,14 @@ Proof.
intros; destruct r; discriminate.
Qed.
+Lemma dreg_not_pmem: forall (r: dreg),
+ (# r) <> pmem.
+Proof.
+ intros; destruct r as [i|f].
+ - destruct i. apply ireg_not_pmem. discriminate.
+ - apply freg_not_pmem.
+Qed.
+
Ltac DPRM pr :=
destruct pr as [drDPRF|crDPRF|];
[destruct drDPRF as [irDPRF|frDPRF]; [destruct irDPRF |]
@@ -1463,6 +1483,7 @@ Ltac discriminate_ppos :=
try apply ireg_not_pmem;
try apply ireg_not_pc;
try apply freg_not_pmem;
+ try apply dreg_not_pmem;
try apply ireg_not_CN;
try apply ireg_not_CZ;
try apply ireg_not_CC;
@@ -1706,10 +1727,10 @@ Proof.
econstructor.
Qed.
-Lemma load_chunk_neutral: forall chk v,
- interp_load (trans_ldp_chunk chk) v = v.
+Lemma load_chunk_neutral: forall chk v r,
+ interp_load (trans_ldp_chunk chk r) v = v.
Proof.
- intros; destruct chk; simpl; reflexivity.
+ intros; destruct chk; destruct r; simpl; reflexivity.
Qed.
Theorem bisimu_basic rsr mr sr bi:
@@ -1749,8 +1770,10 @@ Local Ltac preg_eq_discr r rd :=
try destruct (Mem.loadv _ _ _); simpl; auto; Simpl_exists sr;
rewrite !load_chunk_neutral;
try (rewrite !assign_diff; discriminate_ppos; reflexivity);
- try (destruct base; discriminate_ppos);
- repeat (try fold (ppos r); intros; Simpl_update).
+ try (destruct rd1 as [ir1|fr1]; try destruct ir1; destruct rd2 as [ir2|fr2]; try destruct ir2;
+ destruct base; discriminate_ppos);
+ repeat (try fold (ppos r); try fold (ppos r0);
+ try fold (ppos fr1); try fold (ppos fr2); intros; Simpl_update).
- (* Store *)
destruct st.
+ unfold exec_store, exec_store_rs_a, eval_addressing_rlocs_st, exp_eval;
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index a720c11c..45205158 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -326,10 +326,16 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction :=
| PLoad (Pldp Pldpw rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
do rd2' <- ireg_of_preg rd2;
- OK (Asm.Pldpw rd1 rd2 chk1 chk2 a)
+ OK (Asm.Pldpw rd1' rd2' chk1 chk2 a)
| PLoad (Pldp Pldpx rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
do rd2' <- ireg_of_preg rd2;
- OK (Asm.Pldpx rd1 rd2 chk1 chk2 a)
+ OK (Asm.Pldpx rd1' rd2' chk1 chk2 a)
+ | PLoad (Pldp Pldps rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1;
+ do rd2' <- freg_of_preg rd2;
+ OK (Asm.Pldps rd1' rd2' chk1 chk2 a)
+ | PLoad (Pldp Pldpd rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1;
+ do rd2' <- freg_of_preg rd2;
+ OK (Asm.Pldpd rd1' rd2' chk1 chk2 a)
| PStore (PSt_rs_a Pstrw r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw r' a)
| PStore (PSt_rs_a Pstrw_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw_a r' a)
@@ -344,10 +350,16 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction :=
| PStore (Pstp Pstpw rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
do rs2' <- ireg_of_preg rs2;
- OK (Asm.Pstpw rs1 rs2 chk1 chk2 a)
+ OK (Asm.Pstpw rs1' rs2' chk1 chk2 a)
| PStore (Pstp Pstpx rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
do rs2' <- ireg_of_preg rs2;
- OK (Asm.Pstpx rs1 rs2 chk1 chk2 a)
+ OK (Asm.Pstpx rs1' rs2' chk1 chk2 a)
+ | PStore (Pstp Pstps rs1 rs2 chk1 chk2 a) => do rs1' <- freg_of_preg rs1;
+ do rs2' <- freg_of_preg rs2;
+ OK (Asm.Pstps rs1' rs2' chk1 chk2 a)
+ | PStore (Pstp Pstpd rs1 rs2 chk1 chk2 a) => do rs1' <- freg_of_preg rs1;
+ do rs2' <- freg_of_preg rs2;
+ OK (Asm.Pstpd rs1' rs2' chk1 chk2 a)
| Pallocframe sz linkofs => OK (Asm.Pallocframe sz linkofs)
| Pfreeframe sz linkofs => OK (Asm.Pfreeframe sz linkofs)
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index a33f90b3..d27b3f8c 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -1100,7 +1100,7 @@ Proof.
destruct (Mem.loadv chk2 m2
(eval_addressing lk
(get_offset_addr a match chk1 with
- | Mint32 | Many32 => 4
+ | Mint32 | Mfloat32| Many32 => 4
| _ => 8
end) rs1));
inversion HLOAD; auto.
@@ -1150,7 +1150,7 @@ Proof.
(eval_addressing lk
(get_offset_addr a
match chk1 with
- | Mint32 | Many32 => 4
+ | Mint32 | Mfloat32 | Many32 => 4
| _ => 8
end) rs1) v2);
inversion HSTORE; auto.
@@ -1331,12 +1331,14 @@ Proof.
destruct ld.
- destruct ld; monadInv TRANSBI; try destruct_ireg_inv; exploit load_rd_a_preserved; eauto;
intros; simpl in *; destruct sz; eauto.
- - destruct ld; monadInv TRANSBI; exploit load_double_preserved; eauto. }
+ - destruct ld; monadInv TRANSBI; destruct rd1 as [[rd1'|]|]; destruct rd2 as [[rd2'|]|];
+ inv EQ; inv EQ1; exploit load_double_preserved; eauto. }
{ (* PStore *)
destruct st.
- destruct st; monadInv TRANSBI; try destruct_ireg_inv; exploit store_rs_a_preserved; eauto;
simpl in *; inv_matchi; find_rwrt_ag.
- - destruct st; monadInv TRANSBI; exploit store_double_preserved; eauto;
+ - destruct st; monadInv TRANSBI; destruct rs0 as [[rs0'|]|]; destruct rs3 as [[rs3'|]|];
+ inv EQ; inv EQ1; exploit store_double_preserved; eauto;
simpl in *; inv_matchi; find_rwrt_ag. }
{ (* Pallocframe *)
monadInv TRANSBI;
@@ -2224,8 +2226,7 @@ Proof.
- apply senv_preserved.
- eexact transf_initial_states.
- eexact transf_final_states.
- - (* TODO step_simulation *)
- unfold match_states.
+ - unfold match_states.
simpl; intros; subst; eexists; split; eauto.
eapply step_simulation; eauto.
Qed.
diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml
index a6945b9f..83e454e7 100644
--- a/aarch64/PeepholeOracle.ml
+++ b/aarch64/PeepholeOracle.ml
@@ -27,57 +27,56 @@ let is_valid_immofs_64 z =
if z <= 504 && z >= -512 && z mod 8 = 0 then true else false
(* Functions to check if a ldp/stp replacement is valid according to args *)
-let is_valid_ldrw rd1 rd2 b1 b2 n1 n2 =
+let is_valid_ldr32 rd1 rd2 b1 b2 n1 n2 =
let z1 = to_int (camlint64_of_coqint n1) in
let z2 = to_int (camlint64_of_coqint n2) in
if
- (not (ireg_eq rd1 rd2))
+ (not (dreg_eq rd1 rd2))
&& iregsp_eq b1 b2
- && (not (iregsp_eq (RR1 rd1) b2))
+ && (not (dreg_eq rd1 (IR b2)))
&& (z2 = z1 + 4 || z2 = z1 - 4)
&& is_valid_immofs_32 z1
then true
else false
-let is_valid_ldrx rd1 rd2 b1 b2 n1 n2 =
+let is_valid_ldr64 rd1 rd2 b1 b2 n1 n2 =
let z1 = to_int (camlint64_of_coqint n1) in
let z2 = to_int (camlint64_of_coqint n2) in
if
- (not (ireg_eq rd1 rd2))
+ (not (dreg_eq rd1 rd2))
&& iregsp_eq b1 b2
- && (not (iregsp_eq (RR1 rd1) b2))
+ && (not (dreg_eq rd1 (IR b2)))
&& (z2 = z1 + 8 || z2 = z1 - 8)
&& is_valid_immofs_64 z1
then true
else false
-let is_valid_strw b1 b2 n1 n2 =
+let is_valid_str32 b1 b2 n1 n2 =
let z1 = to_int (camlint64_of_coqint n1) in
let z2 = to_int (camlint64_of_coqint n2) in
if iregsp_eq b1 b2 && z2 = z1 + 4 && is_valid_immofs_32 z1 then true
else false
-let is_valid_strx b1 b2 n1 n2 =
+let is_valid_str64 b1 b2 n1 n2 =
let z1 = to_int (camlint64_of_coqint n1) in
let z2 = to_int (camlint64_of_coqint n2) in
if iregsp_eq b1 b2 && z2 = z1 + 8 && is_valid_immofs_64 z1 then true
else false
let dreg_of_ireg r = IR (RR1 r)
+let dreg_of_freg r = FR r
(* Return true if an intermediate
* affectation eliminates the potential
* candidate *)
let verify_load_affect reg rd b rev =
let b = IR b in
- let rd = dreg_of_ireg rd in
if not rev then dreg_eq reg b else dreg_eq reg b || dreg_eq reg rd
(* Return true if an intermediate
* read eliminates the potential
* candidate *)
let verify_load_read reg rd b rev =
- let rd = dreg_of_ireg rd in
dreg_eq reg rd
(* Return true if an intermediate
@@ -85,9 +84,30 @@ let verify_load_read reg rd b rev =
* candidate *)
let verify_store_affect reg rs b rev =
let b = IR b in
- let rs = dreg_of_ireg rs in
dreg_eq reg b || dreg_eq reg rs
+type ph_type =
+ | P32
+ | P32f
+ | P64
+ | P64f
+
+type inst_type =
+ | Ldr of ph_type
+ | Str of ph_type
+
+let ph_ty_to_string = function
+ | Ldr P32 -> "ldr32"
+ | Ldr P32f -> "ldr32f"
+ | Ldr P64 -> "ldr64"
+ | Ldr P64f -> "ldr64f"
+ | Str P32 -> "str32"
+ | Str P32f -> "str32f"
+ | Str P64 -> "str64"
+ | Str P64f -> "str64f"
+
+let print_ph_ty chan v = output_string chan (ph_ty_to_string v)
+
(* Affect a symbolic memory list of potential replacements
* for a given write in reg *)
let rec affect_symb_mem reg insta pot_rep stype rev =
@@ -95,11 +115,11 @@ let rec affect_symb_mem reg insta pot_rep stype rev =
| [] -> []
| h0 :: t0 -> (
match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (_, IR (RR1 rd), ADimm (b, n))), "ldr" ->
+ | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ ->
if verify_load_affect reg rd b rev then
affect_symb_mem reg insta t0 stype rev
else h0 :: affect_symb_mem reg insta t0 stype rev
- | PStore (PSt_rs_a (_, IR (RR1 rs), ADimm (b, n))), "str" ->
+ | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ ->
if verify_store_affect reg rs b rev then
affect_symb_mem reg insta t0 stype rev
else h0 :: affect_symb_mem reg insta t0 stype rev
@@ -113,11 +133,11 @@ let rec read_symb_mem reg insta pot_rep stype rev =
| [] -> []
| h0 :: t0 -> (
match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (_, IR (RR1 rd), ADimm (b, n))), "ldr" ->
+ | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ ->
if verify_load_read reg rd b rev then
read_symb_mem reg insta t0 stype rev
else h0 :: read_symb_mem reg insta t0 stype rev
- | PStore (PSt_rs_a (_, IR (RR1 rs), ADimm (b, n))), "str" ->
+ | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ ->
h0 :: read_symb_mem reg insta t0 stype rev
| _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep")
@@ -197,7 +217,8 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
pot_rep := read_symb_mem rs1 insta !pot_rep stype rev
| Pcset (rd, _) ->
pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
- | Pfmovi (_, _, rs) -> (
+ | Pfmovi (_, rd, rs) -> (
+ pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev;
match rs with
| RR0 rs ->
pot_rep :=
@@ -207,28 +228,31 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
- | Pfnmul (_, rd, rs1, rs2) -> ())
+ | Pfnmul (_, rd, rs1, rs2) ->
+ pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_freg rs1) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_freg rs2) insta !pot_rep stype rev)
| PLoad i -> (
(* Here, we consider a different behavior for load and store potential candidates:
* a load does not obviously cancel the ldp candidates, but it does for any stp candidate. *)
match stype with
- | "ldr" -> (
+ | Ldr _ -> (
match i with
| PLd_rd_a (_, rd, a) ->
pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
update_pot_rep_addressing a insta pot_rep stype rev
| Pldp (_, rd1, rd2, _, _, a) ->
pot_rep :=
- affect_symb_mem (dreg_of_ireg rd1) insta !pot_rep stype rev;
+ affect_symb_mem rd1 insta !pot_rep stype rev;
pot_rep :=
- affect_symb_mem (dreg_of_ireg rd2) insta !pot_rep stype rev;
+ affect_symb_mem rd2 insta !pot_rep stype rev;
update_pot_rep_addressing a insta pot_rep stype rev)
| _ -> pot_rep := [])
| PStore _ -> (
(* Here, we consider that a store cancel all ldp candidates, but it is far more complicated for stp ones :
* if we cancel stp candidates here, we would prevent ourselves to apply the non-consec store peephole.
* To solve this issue, the store candidates cleaning is managed directly in the peephole function below. *)
- match stype with "ldr" -> pot_rep := [] | _ -> ())
+ match stype with Ldr _ -> pot_rep := [] | _ -> ())
| Pallocframe (_, _) -> pot_rep := []
| Pfreeframe (_, _) -> pot_rep := []
| Ploadsymbol (rd, _) ->
@@ -244,59 +268,6 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
pot_rep := read_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
| Pnop -> ()
-(* Try to find the index of the first previous compatible
- * replacement in a given symbolic memory *)
-let rec search_compat_rep r2 b2 n2 insta pot_rep stype =
- match pot_rep with
- | [] -> None
- | h0 :: t0 -> (
- match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" ->
- if is_valid_ldrw rd1 r2 b1 b2 n1 n2 then
- Some (h0, chunk_load ld1, rd1, b1, n1)
- else search_compat_rep r2 b2 n2 insta t0 stype
- | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" ->
- if is_valid_ldrx rd1 r2 b1 b2 n1 n2 then
- Some (h0, chunk_load ld1, rd1, b1, n1)
- else search_compat_rep r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strw" ->
- if is_valid_strw b1 b2 n1 n2 then
- Some (h0, chunk_store st1, rs1, b1, n1)
- else search_compat_rep r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strx" ->
- if is_valid_strx b1 b2 n1 n2 then
- Some (h0, chunk_store st1, rs1, b1, n1)
- else search_compat_rep r2 b2 n2 insta t0 stype
- | _, _ ->
- failwith "search_compat_rep: Found an inconsistent inst in pot_rep")
-
-(* Try to find the index of the first previous compatible
- * replacement in a given symbolic memory (when iterating in the reversed list) *)
-let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype =
- match pot_rep with
- | [] -> None
- | h0 :: t0 -> (
- match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" ->
- if is_valid_ldrw r2 rd1 b2 b1 n2 n1 then
- Some (h0, chunk_load ld1, rd1, b1, n1)
- else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" ->
- if is_valid_ldrx r2 rd1 b2 b1 n2 n1 then
- Some (h0, chunk_load ld1, rd1, b1, n1)
- else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strw" ->
- if is_valid_strw b2 b1 n2 n1 then
- Some (h0, chunk_store st1, rs1, b1, n1)
- else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strx" ->
- if is_valid_strx b2 b1 n2 n1 then
- Some (h0, chunk_store st1, rs1, b1, n1)
- else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | _, _ ->
- failwith
- "search_compat_rep_inv: Found an inconsistent inst in pot_rep")
-
(* This is useful to manage the case were the immofs
* of the first ldr/str is greater than the second one *)
let min_is_rev n1 n2 =
@@ -310,53 +281,102 @@ let trans_ldi (ldi : load_rd_a) : load_rd1_rd2_a =
match ldi with
| Pldrw | Pldrw_a -> Pldpw
| Pldrx | Pldrx_a -> Pldpx
+ | Pldrs -> Pldps
+ | Pldrd | Pldrd_a -> Pldpd
| _ -> failwith "trans_ldi: Found a non compatible load to translate"
let trans_sti (sti : store_rs_a) : store_rs1_rs2_a =
match sti with
| Pstrw | Pstrw_a -> Pstpw
| Pstrx | Pstrx_a -> Pstpx
+ | Pstrs -> Pstps
+ | Pstrd | Pstrd_a -> Pstpd
| _ -> failwith "trans_sti: Found a non compatible store to translate"
let is_compat_load (ldi : load_rd_a) =
- match ldi with Pldrw | Pldrw_a | Pldrx | Pldrx_a -> true | _ -> false
+ match ldi with Pldrw | Pldrw_a | Pldrx | Pldrx_a | Pldrs | Pldrd | Pldrd_a-> true | _ -> false
let are_compat_load (ldi1 : load_rd_a) (ldi2 : load_rd_a) =
match ldi1 with
| Pldrw | Pldrw_a -> ( match ldi2 with Pldrw | Pldrw_a -> true | _ -> false)
| Pldrx | Pldrx_a -> ( match ldi2 with Pldrx | Pldrx_a -> true | _ -> false)
+ | Pldrs -> (match ldi2 with Pldrs -> true | _ -> false)
+ | Pldrd | Pldrd_a -> ( match ldi2 with Pldrd | Pldrd_a -> true | _ -> false)
| _ -> false
let is_compat_store (sti : store_rs_a) =
- match sti with Pstrw | Pstrw_a | Pstrx | Pstrx_a -> true | _ -> false
+ match sti with Pstrw | Pstrw_a | Pstrx | Pstrx_a | Pstrs | Pstrd | Pstrd_a -> true | _ -> false
let are_compat_store (sti1 : store_rs_a) (sti2 : store_rs_a) =
match sti1 with
| Pstrw | Pstrw_a -> ( match sti2 with Pstrw | Pstrw_a -> true | _ -> false)
| Pstrx | Pstrx_a -> ( match sti2 with Pstrx | Pstrx_a -> true | _ -> false)
+ | Pstrs -> (match sti2 with Pstrs -> true | _ -> false)
+ | Pstrd | Pstrd_a -> ( match sti2 with Pstrd | Pstrd_a -> true | _ -> false)
| _ -> false
-let get_load_string (ldi : load_rd_a) =
+let get_load_pht (ldi : load_rd_a) =
match ldi with
- | Pldrw | Pldrw_a -> "ldrw"
- | Pldrx | Pldrx_a -> "ldrx"
+ | Pldrw | Pldrw_a -> Ldr P32
+ | Pldrs -> Ldr P32f
+ | Pldrx | Pldrx_a -> Ldr P64
+ | Pldrd | Pldrd_a -> Ldr P64f
| _ -> failwith "get_load_string: Found a non compatible load to translate"
-let get_store_string (sti : store_rs_a) =
+let get_store_pht (sti : store_rs_a) =
match sti with
- | Pstrw | Pstrw_a -> "strw"
- | Pstrx | Pstrx_a -> "strx"
+ | Pstrw | Pstrw_a -> Str P32
+ | Pstrs -> Str P32f
+ | Pstrx | Pstrx_a -> Str P64
+ | Pstrd | Pstrd_a -> Str P64f
| _ -> failwith "get_store_string: Found a non compatible store to translate"
let is_valid_ldr rd1 rd2 b1 b2 n1 n2 stype =
match stype with
- | "ldrw" -> is_valid_ldrw rd1 rd2 b1 b2 n1 n2
- | _ -> is_valid_ldrx rd1 rd2 b1 b2 n1 n2
+ | Ldr P32 | Ldr P32f -> is_valid_ldr32 rd1 rd2 b1 b2 n1 n2
+ | _ -> is_valid_ldr64 rd1 rd2 b1 b2 n1 n2
let is_valid_str b1 b2 n1 n2 stype =
match stype with
- | "strw" -> is_valid_strw b1 b2 n1 n2
- | _ -> is_valid_strx b1 b2 n1 n2
+ | Str P32 | Str P32f -> is_valid_str32 b1 b2 n1 n2
+ | _ -> is_valid_str64 b1 b2 n1 n2
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory *)
+let rec search_compat_rep r2 b2 n2 insta pot_rep stype =
+ match pot_rep with
+ | [] -> None
+ | h0 :: t0 -> (
+ match insta.(h0) with
+ | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+ if is_valid_ldr rd1 r2 b1 b2 n1 n2 stype then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
+ else search_compat_rep r2 b2 n2 insta t0 stype
+ | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+ if is_valid_str b1 b2 n1 n2 stype then
+ Some (h0, chunk_store st1, rs1, b1, n1)
+ else search_compat_rep r2 b2 n2 insta t0 stype
+ | _ ->
+ failwith "search_compat_rep: Found an inconsistent inst in pot_rep")
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory (when iterating in the reversed list) *)
+let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype =
+ match pot_rep with
+ | [] -> None
+ | h0 :: t0 -> (
+ match insta.(h0) with
+ | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+ if is_valid_ldr r2 rd1 b2 b1 n2 n1 stype then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
+ else search_compat_rep_inv r2 b2 n2 insta t0 stype
+ | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+ if is_valid_str b2 b1 n2 n1 stype then
+ Some (h0, chunk_store st1, rs1, b1, n1)
+ else search_compat_rep_inv r2 b2 n2 insta t0 stype
+ | _ ->
+ failwith
+ "search_compat_rep_inv: Found an inconsistent inst in pot_rep")
(* Main peephole function in backward style *)
let pair_rep_inv insta =
@@ -365,26 +385,39 @@ let pair_rep_inv insta =
* are the indices of insts in the main array "insta". *)
let pot_ldrw_rep = ref [] in
let pot_ldrx_rep = ref [] in
+ let pot_ldrs_rep = ref [] in
+ let pot_ldrd_rep = ref [] in
let pot_strw_rep = ref [] in
let pot_strx_rep = ref [] in
+ let pot_strs_rep = ref [] in
+ let pot_strd_rep = ref [] in
for i = Array.length insta - 1 downto 1 do
let h0 = insta.(i) in
let h1 = insta.(i - 1) in
(* Here we need to update every symbolic memory according to the matched inst *)
- update_pot_rep_basic h0 insta pot_ldrw_rep "ldr" true;
- update_pot_rep_basic h0 insta pot_ldrx_rep "ldr" true;
- update_pot_rep_basic h0 insta pot_strw_rep "str" true;
- update_pot_rep_basic h0 insta pot_strx_rep "str" true;
+ update_pot_rep_basic h0 insta pot_ldrw_rep (Ldr P32) true;
+ update_pot_rep_basic h0 insta pot_ldrx_rep (Ldr P64) true;
+ update_pot_rep_basic h0 insta pot_ldrs_rep (Ldr P32f) true;
+ update_pot_rep_basic h0 insta pot_ldrd_rep (Ldr P64f) true;
+ update_pot_rep_basic h0 insta pot_strw_rep (Str P32) true;
+ update_pot_rep_basic h0 insta pot_strx_rep (Str P64) true;
+ update_pot_rep_basic h0 insta pot_strs_rep (Str P32f) true;
+ update_pot_rep_basic h0 insta pot_strd_rep (Str P64f) true;
match (h0, h1) with
(* Non-consecutive ldr *)
- | PLoad (PLd_rd_a (ldi, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ -> (
if is_compat_load ldi then
let pot_rep =
- match ldi with Pldrw | Pldrw_a -> pot_ldrw_rep | _ -> pot_ldrx_rep
+ match ldi with
+ | Pldrw | Pldrw_a -> pot_ldrw_rep
+ | Pldrx | Pldrx_a -> pot_ldrx_rep
+ | Pldrs -> pot_ldrs_rep
+ | _ -> pot_ldrd_rep
in
(* Search a previous compatible load *)
+ let ld_t = get_load_pht ldi in
match
- search_compat_rep_inv rd1 b1 n1 insta !pot_rep (get_load_string ldi)
+ search_compat_rep_inv rd1 b1 n1 insta !pot_rep ld_t
with
(* If we can't find a candidate, add the current load as a potential future one *)
| None -> pot_rep := i :: !pot_rep
@@ -395,33 +428,39 @@ let pair_rep_inv insta =
pot_rep := List.filter filt !pot_rep;
insta.(rep) <- Pnop;
if min_is_rev n n1 then (
- if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_INC\n";
+ if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
insta.(i) <-
PLoad
(Pldp
(trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
else (
- if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC\n";
+ if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
insta.(i) <-
PLoad
(Pldp
(trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))
)
(* Non-consecutive str *)
- | PStore (PSt_rs_a (sti, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ -> (
if is_compat_store sti then
let pot_rep =
- match sti with Pstrw | Pstrw_a -> pot_strw_rep | _ -> pot_strx_rep
+ match sti with
+ | Pstrw | Pstrw_a -> pot_strw_rep
+ | Pstrx | Pstrx_a -> pot_strx_rep
+ | Pstrs -> pot_strs_rep
+ | _ -> pot_strd_rep
in
(* Search a previous compatible store *)
+ let st_t = get_store_pht sti in
match
- search_compat_rep_inv rd1 b1 n1 insta !pot_rep
- (get_store_string sti)
+ search_compat_rep_inv rd1 b1 n1 insta !pot_rep st_t
with
(* If we can't find a candidate, clean and add the current store as a potential future one *)
| None ->
pot_strw_rep := [];
pot_strx_rep := [];
+ pot_strs_rep := [];
+ pot_strd_rep := [];
pot_rep := i :: !pot_rep
(* Else, perform the peephole *)
| Some (rep, c, r, b, n) ->
@@ -429,7 +468,7 @@ let pair_rep_inv insta =
let filt x = x != rep in
pot_rep := List.filter filt !pot_rep;
insta.(rep) <- Pnop;
- if debug then eprintf "STP_BACK_SPACED_PEEP_IMM_INC\n";
+ if debug then eprintf "STP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
insta.(i) <-
PStore
(Pstp
@@ -440,7 +479,9 @@ let pair_rep_inv insta =
match i with
| PStore _ ->
pot_strw_rep := [];
- pot_strx_rep := []
+ pot_strx_rep := [];
+ pot_strs_rep := [];
+ pot_strd_rep := []
| _ -> ())
done
@@ -451,24 +492,33 @@ let pair_rep insta =
* are the indices of insts in the main array "insta". *)
let pot_ldrw_rep = ref [] in
let pot_ldrx_rep = ref [] in
+ let pot_ldrs_rep = ref [] in
+ let pot_ldrd_rep = ref [] in
let pot_strw_rep = ref [] in
let pot_strx_rep = ref [] in
+ let pot_strs_rep = ref [] in
+ let pot_strd_rep = ref [] in
for i = 0 to Array.length insta - 2 do
let h0 = insta.(i) in
let h1 = insta.(i + 1) in
(* Here we need to update every symbolic memory according to the matched inst *)
- update_pot_rep_basic h0 insta pot_ldrw_rep "ldr" false;
- update_pot_rep_basic h0 insta pot_ldrx_rep "ldr" false;
- update_pot_rep_basic h0 insta pot_strw_rep "str" false;
- update_pot_rep_basic h0 insta pot_strx_rep "str" false;
+ update_pot_rep_basic h0 insta pot_ldrw_rep (Ldr P32) true;
+ update_pot_rep_basic h0 insta pot_ldrx_rep (Ldr P64) true;
+ update_pot_rep_basic h0 insta pot_ldrs_rep (Ldr P32f) true;
+ update_pot_rep_basic h0 insta pot_ldrd_rep (Ldr P64f) true;
+ update_pot_rep_basic h0 insta pot_strw_rep (Str P32) true;
+ update_pot_rep_basic h0 insta pot_strx_rep (Str P64) true;
+ update_pot_rep_basic h0 insta pot_strs_rep (Str P32f) true;
+ update_pot_rep_basic h0 insta pot_strd_rep (Str P64f) true;
match (h0, h1) with
(* Consecutive ldr *)
- | ( PLoad (PLd_rd_a (ldi1, IR (RR1 rd1), ADimm (b1, n1))),
- PLoad (PLd_rd_a (ldi2, IR (RR1 rd2), ADimm (b2, n2))) ) ->
+ | ( PLoad (PLd_rd_a (ldi1, rd1, ADimm (b1, n1))),
+ PLoad (PLd_rd_a (ldi2, rd2, ADimm (b2, n2))) ) ->
if are_compat_load ldi1 ldi2 then
- if is_valid_ldr rd1 rd2 b1 b2 n1 n2 (get_load_string ldi1) then (
+ let ld_t = get_load_pht ldi1 in
+ if is_valid_ldr rd1 rd2 b1 b2 n1 n2 ld_t then (
if min_is_rev n1 n2 then (
- if debug then eprintf "LDP_CONSEC_PEEP_IMM_INC\n";
+ if debug then eprintf "LDP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
insta.(i) <-
PLoad
(Pldp
@@ -479,7 +529,7 @@ let pair_rep insta =
chunk_load ldi2,
ADimm (b1, n1) )))
else (
- if debug then eprintf "LDP_CONSEC_PEEP_IMM_DEC\n";
+ if debug then eprintf "LDP_CONSEC_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
insta.(i) <-
PLoad
(Pldp
@@ -491,14 +541,19 @@ let pair_rep insta =
ADimm (b1, n2) )));
insta.(i + 1) <- Pnop)
(* Non-consecutive ldr *)
- | PLoad (PLd_rd_a (ldi, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ -> (
if is_compat_load ldi then
let pot_rep =
- match ldi with Pldrw | Pldrw_a -> pot_ldrw_rep | _ -> pot_ldrx_rep
+ match ldi with
+ | Pldrw | Pldrw_a -> pot_ldrw_rep
+ | Pldrx | Pldrx_a -> pot_ldrx_rep
+ | Pldrs -> pot_ldrs_rep
+ | _ -> pot_ldrd_rep
in
(* Search a previous compatible load *)
+ let ld_t = get_load_pht ldi in
match
- search_compat_rep rd1 b1 n1 insta !pot_rep (get_load_string ldi)
+ search_compat_rep rd1 b1 n1 insta !pot_rep ld_t
with
(* If we can't find a candidate, add the current load as a potential future one *)
| None -> pot_rep := i :: !pot_rep
@@ -509,29 +564,32 @@ let pair_rep insta =
pot_rep := List.filter filt !pot_rep;
insta.(rep) <- Pnop;
if min_is_rev n n1 then (
- if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_INC\n";
+ if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
insta.(i) <-
PLoad
(Pldp
(trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
else (
- if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC\n";
+ if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
insta.(i) <-
PLoad
(Pldp
(trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))
)
(* Consecutive str *)
- | ( PStore (PSt_rs_a (sti1, IR (RR1 rd1), ADimm (b1, n1))),
- PStore (PSt_rs_a (sti2, IR (RR1 rd2), ADimm (b2, n2))) ) ->
+ | ( PStore (PSt_rs_a (sti1, rd1, ADimm (b1, n1))),
+ PStore (PSt_rs_a (sti2, rd2, ADimm (b2, n2))) ) ->
(* Regardless of whether we can perform the peephole or not,
* we have to clean the potential candidates for stp now as we are
* looking at two new store instructions. *)
pot_strw_rep := [];
pot_strx_rep := [];
+ pot_strs_rep := [];
+ pot_strd_rep := [];
if are_compat_store sti1 sti2 then
- if is_valid_str b1 b2 n1 n2 (get_store_string sti1) then (
- if debug then eprintf "STP_CONSEC_PEEP_IMM_INC\n";
+ let st_t = get_store_pht sti1 in
+ if is_valid_str b1 b2 n1 n2 st_t then (
+ if debug then eprintf "STP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
insta.(i) <-
PStore
(Pstp
@@ -543,19 +601,26 @@ let pair_rep insta =
ADimm (b1, n1) ));
insta.(i + 1) <- Pnop)
(* Non-consecutive str *)
- | PStore (PSt_rs_a (sti, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ -> (
if is_compat_store sti then
let pot_rep =
- match sti with Pstrw | Pstrw_a -> pot_strw_rep | _ -> pot_strx_rep
+ match sti with
+ | Pstrw | Pstrw_a -> pot_strw_rep
+ | Pstrx | Pstrx_a -> pot_strx_rep
+ | Pstrs -> pot_strs_rep
+ | _ -> pot_strd_rep
in
(* Search a previous compatible store *)
+ let st_t = get_store_pht sti in
match
- search_compat_rep rd1 b1 n1 insta !pot_rep (get_store_string sti)
+ search_compat_rep rd1 b1 n1 insta !pot_rep st_t
with
(* If we can't find a candidate, clean and add the current store as a potential future one *)
| None ->
pot_strw_rep := [];
pot_strx_rep := [];
+ pot_strs_rep := [];
+ pot_strd_rep := [];
pot_rep := i :: !pot_rep
(* Else, perform the peephole *)
| Some (rep, c, r, b, n) ->
@@ -563,7 +628,7 @@ let pair_rep insta =
let filt x = x != rep in
pot_rep := List.filter filt !pot_rep;
insta.(rep) <- Pnop;
- if debug then eprintf "STP_FORW_SPACED_PEEP_IMM_INC\n";
+ if debug then eprintf "STP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
insta.(i) <-
PStore
(Pstp (trans_sti sti, r, rd1, c, chunk_store sti, ADimm (b, n)))
@@ -574,7 +639,9 @@ let pair_rep insta =
match i with
| PStore _ ->
pot_strw_rep := [];
- pot_strx_rep := []
+ pot_strx_rep := [];
+ pot_strs_rep := [];
+ pot_strd_rep := []
| _ -> ())
done
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index d7fab1de..cde3e7a7 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -137,8 +137,8 @@ let load_rec ldi =
| PLd_rd_a (ld, rd, a) ->
load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a
| Pldp (ld, rd1, rd2, _, _, a) ->
- load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_ireg rd1)
- (reg_of_ireg rd2) a
+ load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd1)
+ (reg_of_dreg rd2) a
let store_rs_a_rec st rs a =
{
@@ -161,8 +161,8 @@ let store_rec sti =
| PSt_rs_a (st, rs, a) ->
store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a
| Pstp (st, rs1, rs2, _, _, a) ->
- store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_ireg rs1)
- (reg_of_ireg rs2) a
+ store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_dreg rs1)
+ (reg_of_dreg rs2) a
let loadsymbol_rec i rd id =
{ inst = i; write_locs = [ rd ]; read_locs = [ Mem ]; is_control = false }
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index 40e4a182..9ec1d563 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -417,6 +417,14 @@ module Target (*: TARGET*) =
fprintf oc " str %a, %a\n" sreg rd addressing a
| Pstrd(rd, a) | Pstrd_a(rd, a) ->
fprintf oc " str %a, %a\n" dreg rd addressing a
+ | Pldps(rd1, rd2, _, _, a) ->
+ fprintf oc " ldp %a, %a, %a\n" sreg rd1 sreg rd2 addressing a
+ | Pldpd(rd1, rd2, _, _, a) ->
+ fprintf oc " ldp %a, %a, %a\n" dreg rd1 dreg rd2 addressing a
+ | Pstps(rd1, rd2, _, _, a) ->
+ fprintf oc " stp %a, %a, %a\n" sreg rd1 sreg rd2 addressing a
+ | Pstpd(rd1, rd2, _, _, a) ->
+ fprintf oc " stp %a, %a, %a\n" dreg rd1 dreg rd2 addressing a
(* Floating-point move *)
| Pfmov(rd, r1) ->
fprintf oc " fmov %a, %a\n" dreg rd dreg r1