aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2021-01-20 16:46:52 +0100
committerLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2021-01-20 16:46:52 +0100
commit8781c27a16463fd8f83a9c6eaba7b76846bd296c (patch)
tree17beca9da05a776da1193eb9c2002ea79a3261b5
parent87e268bc9e6079b5aea31cd6c20e30d00adf2bb8 (diff)
downloadcompcert-kvx-8781c27a16463fd8f83a9c6eaba7b76846bd296c.tar.gz
compcert-kvx-8781c27a16463fd8f83a9c6eaba7b76846bd296c.zip
Adding fp loads pair
-rw-r--r--aarch64/Asm.v27
-rw-r--r--aarch64/Asmblock.v11
-rw-r--r--aarch64/Asmblockdeps.v43
-rw-r--r--aarch64/Asmgen.v14
-rw-r--r--aarch64/Asmgenproof.v10
-rw-r--r--aarch64/PeepholeOracle.ml203
-rw-r--r--aarch64/PostpassSchedulingOracle.ml8
-rw-r--r--aarch64/TargetPrinter.ml4
8 files changed, 186 insertions, 134 deletions
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index dc1f025f..85e5ed3a 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -201,8 +201,8 @@ Inductive instruction: Type :=
| Pstrx_a (rs: ireg) (a: addressing) (**r store int64 as any64 *)
| Pstrb (rs: ireg) (a: addressing) (**r store int8 *)
| Pstrh (rs: ireg) (a: addressing) (**r store int16 *)
- | Pstpw (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
- | Pstpx (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
+ | Pstpw (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
+ | Pstpx (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
(** Integer arithmetic, immediate *)
| Paddimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r addition *)
| Psubimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r subtraction *)
@@ -273,9 +273,12 @@ Inductive instruction: Type :=
| Pldrs (rd: freg) (a: addressing) (**r load float32 (single precision) *)
| Pldrd (rd: freg) (a: addressing) (**r load float64 (double precision) *)
| Pldrd_a (rd: freg) (a: addressing) (**r load float64 as any64 *)
+ | Pldps (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two float32 *)
+ | Pldpd (rd1 rd2: freg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two float64 *)
| Pstrs (rs: freg) (a: addressing) (**r store float32 *)
| Pstrd (rs: freg) (a: addressing) (**r store float64 *)
| Pstrd_a (rs: freg) (a: addressing) (**r store float64 as any64 *)
+ (* TODO *)
(** Floating-point move *)
| Pfmov (rd r1: freg)
| Pfmovimms (rd: freg) (f: float32) (**r load float32 constant *)
@@ -798,7 +801,7 @@ Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
(a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
match Mem.loadv chk1 m addr with
| None => Stuck
@@ -824,7 +827,7 @@ Definition exec_store_double (chk1 chk2: memory_chunk)
(rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
match Mem.storev chk1 m addr v1 with
| None => Stuck
@@ -1258,17 +1261,27 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| _ => Stuck
end
| Pbuiltin ef args res => Stuck (**r treated specially below *)
- (** The following instructions and directives are not generated directly
- by Asmgen, so we do not model them. *)
+ (** loads and stores pairs int/int64 *)
| Pldpw rd1 rd2 chk1 chk2 a =>
exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
| Pldpx rd1 rd2 chk1 chk2 a =>
exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
- | Pnop => Next (nextinstr rs) m
| Pstpw rs1 rs2 chk1 chk2 a =>
exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
| Pstpx rs1 rs2 chk1 chk2 a =>
exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ (** loads and stores pairs floating-point *)
+ | Pldps rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ | Pldpd rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ (* TODO | Pstps rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ | Pstpd rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m *)
+ | Pnop => Next (nextinstr rs) m
+ (** The following instructions and directives are not generated directly
+ by Asmgen, so we do not model them. *)
| Pcls _ _ _
| Pclz _ _ _
| Prev _ _ _
diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v
index ed84b7d8..f5d2abbe 100644
--- a/aarch64/Asmblock.v
+++ b/aarch64/Asmblock.v
@@ -110,11 +110,13 @@ Inductive load_rd_a: Type :=
Inductive load_rd1_rd2_a: Type :=
| Pldpw
| Pldpx
+ | Pldps
+ | Pldpd
.
Inductive ld_instruction: Type :=
| PLd_rd_a (ld: load_rd_a) (rd: dreg) (a: addressing)
- | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *)
+ | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: dreg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *)
.
Inductive store_rs_a : Type :=
@@ -134,11 +136,12 @@ Inductive store_rs_a : Type :=
Inductive store_rs1_rs2_a : Type :=
| Pstpw
| Pstpx
+ (* TODO *)
.
Inductive st_instruction : Type :=
| PSt_rs_a (st: store_rs_a) (rs: dreg) (a: addressing)
- | Pstp (st: store_rs1_rs2_a) (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
+ | Pstp (st: store_rs1_rs2_a) (rs1 rs2: dreg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
.
Inductive arith_p : Type :=
@@ -481,7 +484,7 @@ Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
(a: addressing) (rd1 rd2: dreg) (rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
match Mem.loadv chk1 m addr with
| None => Stuck
@@ -505,7 +508,7 @@ Definition exec_store_double (chk1 chk2: memory_chunk)
(rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
match Mem.storev chk1 m addr v1 with
| None => Stuck
diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v
index e1d591df..8a2061de 100644
--- a/aarch64/Asmblockdeps.v
+++ b/aarch64/Asmblockdeps.v
@@ -1150,12 +1150,18 @@ Definition eval_addressing_rlocs_ld (ld: load_rd_a) (chunk: memory_chunk) (a: ad
| ADpostincr base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
end.
-Definition trans_ldp_chunk (chunk: memory_chunk): load_rd_a :=
+Definition trans_ldp_chunk (chunk: memory_chunk) (r: dreg): load_rd_a :=
match chunk with
- | Many32 => Pldrw_a
+ | Mint32 => Pldrw
| Mint64 => Pldrx
- | Many64 => Pldrx_a
- | _ => Pldrw
+ | Mfloat32 => Pldrs
+ | Mfloat64 => Pldrd
+ | Many32 => Pldrw_a
+ | _ => (* This case should always correspond to Many64 *)
+ match r with
+ | IR _ => Pldrx_a
+ | FR _ => Pldrd_a
+ end
end.
Definition trans_stp_chunk (chunk: memory_chunk): store_rs_a :=
@@ -1171,10 +1177,10 @@ Definition trans_load (ldi: ld_instruction) :=
| PLd_rd_a ld r a =>
let lr := eval_addressing_rlocs_ld ld (chunk_load ld) a in [(#r, lr)]
| Pldp ld r1 r2 chk1 chk2 a =>
- let ldi1 := trans_ldp_chunk chk1 in
- let ldi2 := trans_ldp_chunk chk2 in
+ let ldi1 := trans_ldp_chunk chk1 r1 in
+ let ldi2 := trans_ldp_chunk chk2 r1 in
let lr := eval_addressing_rlocs_ld ldi1 chk1 a in
- let ofs := match chk1 with | Mint32 | Many32 => 4%Z | _ => 8%Z end in
+ let ofs := match chk1 with | Mint32 | Mfloat32 | Many32 => 4%Z | _ => 8%Z end in
match a with
| ADimm base n =>
let a' := (get_offset_addr a ofs) in
@@ -1193,7 +1199,7 @@ Definition trans_store (sti: st_instruction) :=
let sti1 := trans_stp_chunk chk1 in
let sti2 := trans_stp_chunk chk2 in
let lr := eval_addressing_rlocs_st sti1 chk1 r1 a in
- let ofs := match chk1 with | Mint32 | Many32 => 4%Z | _ => 8%Z end in
+ let ofs := match chk1 with | Mint32 | Mfloat32| Many32 => 4%Z | _ => 8%Z end in
match a with
| ADimm base n =>
let a' := (get_offset_addr a ofs) in
@@ -1356,6 +1362,14 @@ Proof.
intros; destruct r; discriminate.
Qed.
+Lemma dreg_not_pmem: forall (r: dreg),
+ (# r) <> pmem.
+Proof.
+ intros; destruct r as [i|f].
+ - destruct i. apply ireg_not_pmem. discriminate.
+ - apply freg_not_pmem.
+Qed.
+
Ltac DPRM pr :=
destruct pr as [drDPRF|crDPRF|];
[destruct drDPRF as [irDPRF|frDPRF]; [destruct irDPRF |]
@@ -1463,6 +1477,7 @@ Ltac discriminate_ppos :=
try apply ireg_not_pmem;
try apply ireg_not_pc;
try apply freg_not_pmem;
+ try apply dreg_not_pmem;
try apply ireg_not_CN;
try apply ireg_not_CZ;
try apply ireg_not_CC;
@@ -1706,10 +1721,10 @@ Proof.
econstructor.
Qed.
-Lemma load_chunk_neutral: forall chk v,
- interp_load (trans_ldp_chunk chk) v = v.
+Lemma load_chunk_neutral: forall chk v r,
+ interp_load (trans_ldp_chunk chk r) v = v.
Proof.
- intros; destruct chk; simpl; reflexivity.
+ intros; destruct chk; destruct r; simpl; reflexivity.
Qed.
Theorem bisimu_basic rsr mr sr bi:
@@ -1749,8 +1764,10 @@ Local Ltac preg_eq_discr r rd :=
try destruct (Mem.loadv _ _ _); simpl; auto; Simpl_exists sr;
rewrite !load_chunk_neutral;
try (rewrite !assign_diff; discriminate_ppos; reflexivity);
- try (destruct base; discriminate_ppos);
- repeat (try fold (ppos r); intros; Simpl_update).
+ try (destruct rd1 as [ir1|fr1]; try destruct ir1; destruct rd2 as [ir2|fr2]; try destruct ir2;
+ destruct base; discriminate_ppos);
+ repeat (try fold (ppos r); try fold (ppos r0);
+ try fold (ppos fr1); try fold (ppos fr2); intros; Simpl_update).
- (* Store *)
destruct st.
+ unfold exec_store, exec_store_rs_a, eval_addressing_rlocs_st, exp_eval;
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index a720c11c..b6aa4c70 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -326,10 +326,16 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction :=
| PLoad (Pldp Pldpw rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
do rd2' <- ireg_of_preg rd2;
- OK (Asm.Pldpw rd1 rd2 chk1 chk2 a)
+ OK (Asm.Pldpw rd1' rd2' chk1 chk2 a)
| PLoad (Pldp Pldpx rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
do rd2' <- ireg_of_preg rd2;
- OK (Asm.Pldpx rd1 rd2 chk1 chk2 a)
+ OK (Asm.Pldpx rd1' rd2' chk1 chk2 a)
+ | PLoad (Pldp Pldps rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1;
+ do rd2' <- freg_of_preg rd2;
+ OK (Asm.Pldps rd1' rd2' chk1 chk2 a)
+ | PLoad (Pldp Pldpd rd1 rd2 chk1 chk2 a) => do rd1' <- freg_of_preg rd1;
+ do rd2' <- freg_of_preg rd2;
+ OK (Asm.Pldpd rd1' rd2' chk1 chk2 a)
| PStore (PSt_rs_a Pstrw r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw r' a)
| PStore (PSt_rs_a Pstrw_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw_a r' a)
@@ -344,10 +350,10 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction :=
| PStore (Pstp Pstpw rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
do rs2' <- ireg_of_preg rs2;
- OK (Asm.Pstpw rs1 rs2 chk1 chk2 a)
+ OK (Asm.Pstpw rs1' rs2' chk1 chk2 a)
| PStore (Pstp Pstpx rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
do rs2' <- ireg_of_preg rs2;
- OK (Asm.Pstpx rs1 rs2 chk1 chk2 a)
+ OK (Asm.Pstpx rs1' rs2' chk1 chk2 a)
| Pallocframe sz linkofs => OK (Asm.Pallocframe sz linkofs)
| Pfreeframe sz linkofs => OK (Asm.Pfreeframe sz linkofs)
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index a33f90b3..23db2e92 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -1100,7 +1100,7 @@ Proof.
destruct (Mem.loadv chk2 m2
(eval_addressing lk
(get_offset_addr a match chk1 with
- | Mint32 | Many32 => 4
+ | Mint32 | Mfloat32| Many32 => 4
| _ => 8
end) rs1));
inversion HLOAD; auto.
@@ -1150,7 +1150,7 @@ Proof.
(eval_addressing lk
(get_offset_addr a
match chk1 with
- | Mint32 | Many32 => 4
+ | Mint32 | Mfloat32 | Many32 => 4
| _ => 8
end) rs1) v2);
inversion HSTORE; auto.
@@ -1331,12 +1331,14 @@ Proof.
destruct ld.
- destruct ld; monadInv TRANSBI; try destruct_ireg_inv; exploit load_rd_a_preserved; eauto;
intros; simpl in *; destruct sz; eauto.
- - destruct ld; monadInv TRANSBI; exploit load_double_preserved; eauto. }
+ - destruct ld; monadInv TRANSBI; destruct rd1 as [[rd1'|]|]; destruct rd2 as [[rd2'|]|];
+ inv EQ; inv EQ1; exploit load_double_preserved; eauto. }
{ (* PStore *)
destruct st.
- destruct st; monadInv TRANSBI; try destruct_ireg_inv; exploit store_rs_a_preserved; eauto;
simpl in *; inv_matchi; find_rwrt_ag.
- - destruct st; monadInv TRANSBI; exploit store_double_preserved; eauto;
+ - destruct st; monadInv TRANSBI; destruct rs0 as [[rs0'|]|]; destruct rs3 as [[rs3'|]|];
+ inv EQ; inv EQ1; exploit store_double_preserved; eauto;
simpl in *; inv_matchi; find_rwrt_ag. }
{ (* Pallocframe *)
monadInv TRANSBI;
diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml
index a6945b9f..ecb529d0 100644
--- a/aarch64/PeepholeOracle.ml
+++ b/aarch64/PeepholeOracle.ml
@@ -27,37 +27,37 @@ let is_valid_immofs_64 z =
if z <= 504 && z >= -512 && z mod 8 = 0 then true else false
(* Functions to check if a ldp/stp replacement is valid according to args *)
-let is_valid_ldrw rd1 rd2 b1 b2 n1 n2 =
+let is_valid_ldr32 rd1 rd2 b1 b2 n1 n2 =
let z1 = to_int (camlint64_of_coqint n1) in
let z2 = to_int (camlint64_of_coqint n2) in
if
- (not (ireg_eq rd1 rd2))
+ (not (dreg_eq rd1 rd2))
&& iregsp_eq b1 b2
- && (not (iregsp_eq (RR1 rd1) b2))
+ && (not (dreg_eq rd1 (IR b2)))
&& (z2 = z1 + 4 || z2 = z1 - 4)
&& is_valid_immofs_32 z1
then true
else false
-let is_valid_ldrx rd1 rd2 b1 b2 n1 n2 =
+let is_valid_ldr64 rd1 rd2 b1 b2 n1 n2 =
let z1 = to_int (camlint64_of_coqint n1) in
let z2 = to_int (camlint64_of_coqint n2) in
if
- (not (ireg_eq rd1 rd2))
+ (not (dreg_eq rd1 rd2))
&& iregsp_eq b1 b2
- && (not (iregsp_eq (RR1 rd1) b2))
+ && (not (dreg_eq rd1 (IR b2)))
&& (z2 = z1 + 8 || z2 = z1 - 8)
&& is_valid_immofs_64 z1
then true
else false
-let is_valid_strw b1 b2 n1 n2 =
+let is_valid_str32 b1 b2 n1 n2 =
let z1 = to_int (camlint64_of_coqint n1) in
let z2 = to_int (camlint64_of_coqint n2) in
if iregsp_eq b1 b2 && z2 = z1 + 4 && is_valid_immofs_32 z1 then true
else false
-let is_valid_strx b1 b2 n1 n2 =
+let is_valid_str64 b1 b2 n1 n2 =
let z1 = to_int (camlint64_of_coqint n1) in
let z2 = to_int (camlint64_of_coqint n2) in
if iregsp_eq b1 b2 && z2 = z1 + 8 && is_valid_immofs_64 z1 then true
@@ -70,14 +70,12 @@ let dreg_of_ireg r = IR (RR1 r)
* candidate *)
let verify_load_affect reg rd b rev =
let b = IR b in
- let rd = dreg_of_ireg rd in
if not rev then dreg_eq reg b else dreg_eq reg b || dreg_eq reg rd
(* Return true if an intermediate
* read eliminates the potential
* candidate *)
let verify_load_read reg rd b rev =
- let rd = dreg_of_ireg rd in
dreg_eq reg rd
(* Return true if an intermediate
@@ -85,7 +83,6 @@ let verify_load_read reg rd b rev =
* candidate *)
let verify_store_affect reg rs b rev =
let b = IR b in
- let rs = dreg_of_ireg rs in
dreg_eq reg b || dreg_eq reg rs
(* Affect a symbolic memory list of potential replacements
@@ -95,11 +92,11 @@ let rec affect_symb_mem reg insta pot_rep stype rev =
| [] -> []
| h0 :: t0 -> (
match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (_, IR (RR1 rd), ADimm (b, n))), "ldr" ->
+ | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), "ldr" ->
if verify_load_affect reg rd b rev then
affect_symb_mem reg insta t0 stype rev
else h0 :: affect_symb_mem reg insta t0 stype rev
- | PStore (PSt_rs_a (_, IR (RR1 rs), ADimm (b, n))), "str" ->
+ | PStore (PSt_rs_a (_, rs, ADimm (b, n))), "str" ->
if verify_store_affect reg rs b rev then
affect_symb_mem reg insta t0 stype rev
else h0 :: affect_symb_mem reg insta t0 stype rev
@@ -113,11 +110,11 @@ let rec read_symb_mem reg insta pot_rep stype rev =
| [] -> []
| h0 :: t0 -> (
match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (_, IR (RR1 rd), ADimm (b, n))), "ldr" ->
+ | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), "ldr" ->
if verify_load_read reg rd b rev then
read_symb_mem reg insta t0 stype rev
else h0 :: read_symb_mem reg insta t0 stype rev
- | PStore (PSt_rs_a (_, IR (RR1 rs), ADimm (b, n))), "str" ->
+ | PStore (PSt_rs_a (_, rs, ADimm (b, n))), "str" ->
h0 :: read_symb_mem reg insta t0 stype rev
| _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep")
@@ -219,9 +216,9 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
update_pot_rep_addressing a insta pot_rep stype rev
| Pldp (_, rd1, rd2, _, _, a) ->
pot_rep :=
- affect_symb_mem (dreg_of_ireg rd1) insta !pot_rep stype rev;
+ affect_symb_mem rd1 insta !pot_rep stype rev;
pot_rep :=
- affect_symb_mem (dreg_of_ireg rd2) insta !pot_rep stype rev;
+ affect_symb_mem rd2 insta !pot_rep stype rev;
update_pot_rep_addressing a insta pot_rep stype rev)
| _ -> pot_rep := [])
| PStore _ -> (
@@ -244,59 +241,6 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
pot_rep := read_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
| Pnop -> ()
-(* Try to find the index of the first previous compatible
- * replacement in a given symbolic memory *)
-let rec search_compat_rep r2 b2 n2 insta pot_rep stype =
- match pot_rep with
- | [] -> None
- | h0 :: t0 -> (
- match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" ->
- if is_valid_ldrw rd1 r2 b1 b2 n1 n2 then
- Some (h0, chunk_load ld1, rd1, b1, n1)
- else search_compat_rep r2 b2 n2 insta t0 stype
- | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" ->
- if is_valid_ldrx rd1 r2 b1 b2 n1 n2 then
- Some (h0, chunk_load ld1, rd1, b1, n1)
- else search_compat_rep r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strw" ->
- if is_valid_strw b1 b2 n1 n2 then
- Some (h0, chunk_store st1, rs1, b1, n1)
- else search_compat_rep r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strx" ->
- if is_valid_strx b1 b2 n1 n2 then
- Some (h0, chunk_store st1, rs1, b1, n1)
- else search_compat_rep r2 b2 n2 insta t0 stype
- | _, _ ->
- failwith "search_compat_rep: Found an inconsistent inst in pot_rep")
-
-(* Try to find the index of the first previous compatible
- * replacement in a given symbolic memory (when iterating in the reversed list) *)
-let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype =
- match pot_rep with
- | [] -> None
- | h0 :: t0 -> (
- match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" ->
- if is_valid_ldrw r2 rd1 b2 b1 n2 n1 then
- Some (h0, chunk_load ld1, rd1, b1, n1)
- else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" ->
- if is_valid_ldrx r2 rd1 b2 b1 n2 n1 then
- Some (h0, chunk_load ld1, rd1, b1, n1)
- else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strw" ->
- if is_valid_strw b2 b1 n2 n1 then
- Some (h0, chunk_store st1, rs1, b1, n1)
- else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strx" ->
- if is_valid_strx b2 b1 n2 n1 then
- Some (h0, chunk_store st1, rs1, b1, n1)
- else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | _, _ ->
- failwith
- "search_compat_rep_inv: Found an inconsistent inst in pot_rep")
-
(* This is useful to manage the case were the immofs
* of the first ldr/str is greater than the second one *)
let min_is_rev n1 n2 =
@@ -310,6 +254,8 @@ let trans_ldi (ldi : load_rd_a) : load_rd1_rd2_a =
match ldi with
| Pldrw | Pldrw_a -> Pldpw
| Pldrx | Pldrx_a -> Pldpx
+ | Pldrs -> Pldps
+ | Pldrd | Pldrd_a -> Pldpd
| _ -> failwith "trans_ldi: Found a non compatible load to translate"
let trans_sti (sti : store_rs_a) : store_rs1_rs2_a =
@@ -319,12 +265,14 @@ let trans_sti (sti : store_rs_a) : store_rs1_rs2_a =
| _ -> failwith "trans_sti: Found a non compatible store to translate"
let is_compat_load (ldi : load_rd_a) =
- match ldi with Pldrw | Pldrw_a | Pldrx | Pldrx_a -> true | _ -> false
+ match ldi with Pldrw | Pldrw_a | Pldrx | Pldrx_a | Pldrs | Pldrd | Pldrd_a-> true | _ -> false
let are_compat_load (ldi1 : load_rd_a) (ldi2 : load_rd_a) =
match ldi1 with
| Pldrw | Pldrw_a -> ( match ldi2 with Pldrw | Pldrw_a -> true | _ -> false)
| Pldrx | Pldrx_a -> ( match ldi2 with Pldrx | Pldrx_a -> true | _ -> false)
+ | Pldrs -> (match ldi2 with Pldrs -> true | _ -> false)
+ | Pldrd | Pldrd_a -> ( match ldi2 with Pldrd | Pldrd_a -> true | _ -> false)
| _ -> false
let is_compat_store (sti : store_rs_a) =
@@ -338,25 +286,64 @@ let are_compat_store (sti1 : store_rs_a) (sti2 : store_rs_a) =
let get_load_string (ldi : load_rd_a) =
match ldi with
- | Pldrw | Pldrw_a -> "ldrw"
- | Pldrx | Pldrx_a -> "ldrx"
+ | Pldrw | Pldrw_a -> "ldr32"
+ | Pldrs -> "ldr32f"
+ | Pldrx | Pldrx_a -> "ldr64"
+ | Pldrd | Pldrd_a -> "ldr64f"
| _ -> failwith "get_load_string: Found a non compatible load to translate"
let get_store_string (sti : store_rs_a) =
match sti with
- | Pstrw | Pstrw_a -> "strw"
- | Pstrx | Pstrx_a -> "strx"
+ | Pstrw | Pstrw_a -> "str32"
+ | Pstrx | Pstrx_a -> "str64"
| _ -> failwith "get_store_string: Found a non compatible store to translate"
let is_valid_ldr rd1 rd2 b1 b2 n1 n2 stype =
match stype with
- | "ldrw" -> is_valid_ldrw rd1 rd2 b1 b2 n1 n2
- | _ -> is_valid_ldrx rd1 rd2 b1 b2 n1 n2
+ | "ldr32" | "ldr32f" -> is_valid_ldr32 rd1 rd2 b1 b2 n1 n2
+ | _ -> is_valid_ldr64 rd1 rd2 b1 b2 n1 n2
let is_valid_str b1 b2 n1 n2 stype =
match stype with
- | "strw" -> is_valid_strw b1 b2 n1 n2
- | _ -> is_valid_strx b1 b2 n1 n2
+ | "str32" -> is_valid_str32 b1 b2 n1 n2
+ | _ -> is_valid_str64 b1 b2 n1 n2
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory *)
+let rec search_compat_rep r2 b2 n2 insta pot_rep stype =
+ match pot_rep with
+ | [] -> None
+ | h0 :: t0 -> (
+ match insta.(h0) with
+ | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+ if is_valid_ldr rd1 r2 b1 b2 n1 n2 stype then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
+ else search_compat_rep r2 b2 n2 insta t0 stype
+ | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+ if is_valid_str b1 b2 n1 n2 stype then
+ Some (h0, chunk_store st1, rs1, b1, n1)
+ else search_compat_rep r2 b2 n2 insta t0 stype
+ | _ ->
+ failwith "search_compat_rep: Found an inconsistent inst in pot_rep")
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory (when iterating in the reversed list) *)
+let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype =
+ match pot_rep with
+ | [] -> None
+ | h0 :: t0 -> (
+ match insta.(h0) with
+ | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+ if is_valid_ldr r2 rd1 b2 b1 n2 n1 stype then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
+ else search_compat_rep_inv r2 b2 n2 insta t0 stype
+ | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+ if is_valid_str b2 b1 n2 n1 stype then
+ Some (h0, chunk_store st1, rs1, b1, n1)
+ else search_compat_rep_inv r2 b2 n2 insta t0 stype
+ | _ ->
+ failwith
+ "search_compat_rep_inv: Found an inconsistent inst in pot_rep")
(* Main peephole function in backward style *)
let pair_rep_inv insta =
@@ -365,6 +352,8 @@ let pair_rep_inv insta =
* are the indices of insts in the main array "insta". *)
let pot_ldrw_rep = ref [] in
let pot_ldrx_rep = ref [] in
+ let pot_ldrs_rep = ref [] in
+ let pot_ldrd_rep = ref [] in
let pot_strw_rep = ref [] in
let pot_strx_rep = ref [] in
for i = Array.length insta - 1 downto 1 do
@@ -373,18 +362,25 @@ let pair_rep_inv insta =
(* Here we need to update every symbolic memory according to the matched inst *)
update_pot_rep_basic h0 insta pot_ldrw_rep "ldr" true;
update_pot_rep_basic h0 insta pot_ldrx_rep "ldr" true;
+ update_pot_rep_basic h0 insta pot_ldrs_rep "ldr" true;
+ update_pot_rep_basic h0 insta pot_ldrd_rep "ldr" true;
update_pot_rep_basic h0 insta pot_strw_rep "str" true;
update_pot_rep_basic h0 insta pot_strx_rep "str" true;
match (h0, h1) with
(* Non-consecutive ldr *)
- | PLoad (PLd_rd_a (ldi, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ -> (
if is_compat_load ldi then
let pot_rep =
- match ldi with Pldrw | Pldrw_a -> pot_ldrw_rep | _ -> pot_ldrx_rep
+ match ldi with
+ | Pldrw | Pldrw_a -> pot_ldrw_rep
+ | Pldrx | Pldrx_a -> pot_ldrx_rep
+ | Pldrs -> pot_ldrs_rep
+ | _ -> pot_ldrd_rep
in
(* Search a previous compatible load *)
+ let ld_string = get_load_string ldi in
match
- search_compat_rep_inv rd1 b1 n1 insta !pot_rep (get_load_string ldi)
+ search_compat_rep_inv rd1 b1 n1 insta !pot_rep ld_string
with
(* If we can't find a candidate, add the current load as a potential future one *)
| None -> pot_rep := i :: !pot_rep
@@ -395,20 +391,20 @@ let pair_rep_inv insta =
pot_rep := List.filter filt !pot_rep;
insta.(rep) <- Pnop;
if min_is_rev n n1 then (
- if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_INC\n";
+ if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_INC_%s\n" ld_string;
insta.(i) <-
PLoad
(Pldp
(trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
else (
- if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC\n";
+ if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC_%s\n" ld_string;
insta.(i) <-
PLoad
(Pldp
(trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))
)
(* Non-consecutive str *)
- | PStore (PSt_rs_a (sti, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ -> (
if is_compat_store sti then
let pot_rep =
match sti with Pstrw | Pstrw_a -> pot_strw_rep | _ -> pot_strx_rep
@@ -451,24 +447,30 @@ let pair_rep insta =
* are the indices of insts in the main array "insta". *)
let pot_ldrw_rep = ref [] in
let pot_ldrx_rep = ref [] in
+ let pot_ldrs_rep = ref [] in
+ let pot_ldrd_rep = ref [] in
let pot_strw_rep = ref [] in
let pot_strx_rep = ref [] in
for i = 0 to Array.length insta - 2 do
+ eprintf "DEBUG %d" (Array.length insta);
let h0 = insta.(i) in
let h1 = insta.(i + 1) in
(* Here we need to update every symbolic memory according to the matched inst *)
update_pot_rep_basic h0 insta pot_ldrw_rep "ldr" false;
update_pot_rep_basic h0 insta pot_ldrx_rep "ldr" false;
+ update_pot_rep_basic h0 insta pot_ldrs_rep "ldr" true;
+ update_pot_rep_basic h0 insta pot_ldrd_rep "ldr" true;
update_pot_rep_basic h0 insta pot_strw_rep "str" false;
update_pot_rep_basic h0 insta pot_strx_rep "str" false;
match (h0, h1) with
(* Consecutive ldr *)
- | ( PLoad (PLd_rd_a (ldi1, IR (RR1 rd1), ADimm (b1, n1))),
- PLoad (PLd_rd_a (ldi2, IR (RR1 rd2), ADimm (b2, n2))) ) ->
+ | ( PLoad (PLd_rd_a (ldi1, rd1, ADimm (b1, n1))),
+ PLoad (PLd_rd_a (ldi2, rd2, ADimm (b2, n2))) ) ->
if are_compat_load ldi1 ldi2 then
- if is_valid_ldr rd1 rd2 b1 b2 n1 n2 (get_load_string ldi1) then (
+ let ld_string = get_load_string ldi1 in
+ if is_valid_ldr rd1 rd2 b1 b2 n1 n2 ld_string then (
if min_is_rev n1 n2 then (
- if debug then eprintf "LDP_CONSEC_PEEP_IMM_INC\n";
+ if debug then eprintf "LDP_CONSEC_PEEP_IMM_INC_%s\n" ld_string;
insta.(i) <-
PLoad
(Pldp
@@ -479,7 +481,7 @@ let pair_rep insta =
chunk_load ldi2,
ADimm (b1, n1) )))
else (
- if debug then eprintf "LDP_CONSEC_PEEP_IMM_DEC\n";
+ if debug then eprintf "LDP_CONSEC_PEEP_IMM_DEC_%s\n" ld_string;
insta.(i) <-
PLoad
(Pldp
@@ -491,14 +493,19 @@ let pair_rep insta =
ADimm (b1, n2) )));
insta.(i + 1) <- Pnop)
(* Non-consecutive ldr *)
- | PLoad (PLd_rd_a (ldi, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ -> (
if is_compat_load ldi then
let pot_rep =
- match ldi with Pldrw | Pldrw_a -> pot_ldrw_rep | _ -> pot_ldrx_rep
+ match ldi with
+ | Pldrw | Pldrw_a -> pot_ldrw_rep
+ | Pldrx | Pldrx_a -> pot_ldrx_rep
+ | Pldrs -> pot_ldrs_rep
+ | _ -> pot_ldrd_rep
in
(* Search a previous compatible load *)
+ let ld_string = get_load_string ldi in
match
- search_compat_rep rd1 b1 n1 insta !pot_rep (get_load_string ldi)
+ search_compat_rep rd1 b1 n1 insta !pot_rep ld_string
with
(* If we can't find a candidate, add the current load as a potential future one *)
| None -> pot_rep := i :: !pot_rep
@@ -509,21 +516,21 @@ let pair_rep insta =
pot_rep := List.filter filt !pot_rep;
insta.(rep) <- Pnop;
if min_is_rev n n1 then (
- if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_INC\n";
+ if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_INC_%s\n" ld_string;
insta.(i) <-
PLoad
(Pldp
(trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
else (
- if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC\n";
+ if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC_%s\n" ld_string;
insta.(i) <-
PLoad
(Pldp
(trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))
)
(* Consecutive str *)
- | ( PStore (PSt_rs_a (sti1, IR (RR1 rd1), ADimm (b1, n1))),
- PStore (PSt_rs_a (sti2, IR (RR1 rd2), ADimm (b2, n2))) ) ->
+ | ( PStore (PSt_rs_a (sti1, rd1, ADimm (b1, n1))),
+ PStore (PSt_rs_a (sti2, rd2, ADimm (b2, n2))) ) ->
(* Regardless of whether we can perform the peephole or not,
* we have to clean the potential candidates for stp now as we are
* looking at two new store instructions. *)
@@ -543,7 +550,7 @@ let pair_rep insta =
ADimm (b1, n1) ));
insta.(i + 1) <- Pnop)
(* Non-consecutive str *)
- | PStore (PSt_rs_a (sti, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ -> (
if is_compat_store sti then
let pot_rep =
match sti with Pstrw | Pstrw_a -> pot_strw_rep | _ -> pot_strx_rep
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index d7fab1de..cde3e7a7 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -137,8 +137,8 @@ let load_rec ldi =
| PLd_rd_a (ld, rd, a) ->
load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a
| Pldp (ld, rd1, rd2, _, _, a) ->
- load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_ireg rd1)
- (reg_of_ireg rd2) a
+ load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd1)
+ (reg_of_dreg rd2) a
let store_rs_a_rec st rs a =
{
@@ -161,8 +161,8 @@ let store_rec sti =
| PSt_rs_a (st, rs, a) ->
store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a
| Pstp (st, rs1, rs2, _, _, a) ->
- store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_ireg rs1)
- (reg_of_ireg rs2) a
+ store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_dreg rs1)
+ (reg_of_dreg rs2) a
let loadsymbol_rec i rd id =
{ inst = i; write_locs = [ rd ]; read_locs = [ Mem ]; is_control = false }
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index 40e4a182..93cfa99c 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -417,6 +417,10 @@ module Target (*: TARGET*) =
fprintf oc " str %a, %a\n" sreg rd addressing a
| Pstrd(rd, a) | Pstrd_a(rd, a) ->
fprintf oc " str %a, %a\n" dreg rd addressing a
+ | Pldps(rd1, rd2, _, _, a) ->
+ fprintf oc " ldp %a, %a, %a\n" sreg rd1 sreg rd2 addressing a
+ | Pldpd(rd1, rd2, _, _, a) ->
+ fprintf oc " ldp %a, %a, %a\n" dreg rd1 dreg rd2 addressing a
(* Floating-point move *)
| Pfmov(rd, r1) ->
fprintf oc " fmov %a, %a\n" dreg rd dreg r1