aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64
diff options
context:
space:
mode:
authorLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2020-12-02 18:02:23 +0100
committerLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2020-12-02 18:02:23 +0100
commitc48b7c63592e5930f022452ee6c3f1cf3d1ada97 (patch)
tree8b77e87f9dbc17e67da3c36b7402fae21f033e33 /aarch64
parentdd299de4b51c561b3a2d8e9f388396381b0e2b85 (diff)
downloadcompcert-kvx-c48b7c63592e5930f022452ee6c3f1cf3d1ada97.tar.gz
compcert-kvx-c48b7c63592e5930f022452ee6c3f1cf3d1ada97.zip
Adding semantics for Pldp
This commit prepare the backend for a peephole optimization in Asmblock.
Diffstat (limited to 'aarch64')
-rw-r--r--aarch64/Asm.v30
-rw-r--r--aarch64/Asmblock.v104
-rw-r--r--aarch64/Asmblockdeps.v71
-rw-r--r--aarch64/Asmblockgen.v36
-rw-r--r--aarch64/Asmexpand.ml4
-rw-r--r--aarch64/Asmgen.v35
-rw-r--r--aarch64/Asmgenproof.v31
-rw-r--r--aarch64/OpWeightsAsm.ml4
-rw-r--r--aarch64/PostpassSchedulingOracle.ml19
-rw-r--r--aarch64/TargetPrinter.ml4
10 files changed, 215 insertions, 123 deletions
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index 19601024..33f1013a 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -193,7 +193,8 @@ Inductive instruction: Type :=
| Pldrsh (sz: isize) (rd: ireg) (a: addressing) (**r load int16, sign-extend *)
| Pldrzw (rd: ireg) (a: addressing) (**r load int32, zero-extend to int64 *)
| Pldrsw (rd: ireg) (a: addressing) (**r load int32, sign-extend to int64 *)
- | Pldp (rd1 rd2: ireg) (a: addressing) (**r load two int64 *)
+ | Pldpw (rd1 rd2: ireg) (a: addressing) (**r load two int32 *)
+ | Pldpx (rd1 rd2: ireg) (a: addressing) (**r load two int64 *)
| Pstrw (rs: ireg) (a: addressing) (**r store int32 *)
| Pstrw_a (rs: ireg) (a: addressing) (**r store int32 as any32 *)
| Pstrx (rs: ireg) (a: addressing) (**r store int64 *)
@@ -819,7 +820,13 @@ Definition eval_addressing (a: addressing) (rs: regset): val :=
| ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n))
| ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n))
| ADadr base id ofs => Val.addl rs#base (symbol_low ge id ofs)
- | ADpostincr base n => Vundef (* not modeled yet *)
+ | ADpostincr base n => rs#base
+ end.
+
+Definition eval_addressing_update (a: addressing) (rs: regset): regset :=
+ match a with
+ | ADpostincr base n => rs#base <- (Vlong n)
+ | _ => rs
end.
(** Auxiliaries for memory accesses *)
@@ -831,6 +838,20 @@ Definition exec_load (chunk: memory_chunk) (transf: val -> val)
| Some v => Next (nextinstr (rs#r <- (transf v))) m
end.
+Definition exec_load_double (chunk: memory_chunk) (transf: val -> val)
+ (a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) :=
+ let addr := (eval_addressing a rs) in
+ let ofs := match chunk with | Mint32 => 4 | _ => 8 end in
+ match Mem.loadv Mint32 m addr with
+ | None => Stuck
+ | Some v1 =>
+ match Mem.loadv Mint32 m (Val.offset_ptr addr (Ptrofs.repr ofs)) with
+ | None => Stuck
+ | Some v2 =>
+ Next (nextinstr (((eval_addressing_update a rs)#rd1 <- (transf v1))#rd2 <- (transf v2))) m
+ end
+ end.
+
Definition exec_store (chunk: memory_chunk)
(a: addressing) (v: val)
(rs: regset) (m: mem) :=
@@ -1265,7 +1286,10 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Pbuiltin ef args res => Stuck (**r treated specially below *)
(** The following instructions and directives are not generated directly
by Asmgen, so we do not model them. *)
- | Pldp _ _ _
+ | Pldpw rd1 rd2 a =>
+ exec_load_double Mint32 (fun v => v) a rd1 rd2 rs m
+ | Pldpx rd1 rd2 a =>
+ exec_load_double Mint64 (fun v => v) a rd1 rd2 rs m
| Pstp _ _ _
| Pcls _ _ _
| Pclz _ _ _
diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v
index 6ea592dd..329c889e 100644
--- a/aarch64/Asmblock.v
+++ b/aarch64/Asmblock.v
@@ -107,19 +107,16 @@ Inductive load_rd_a: Type :=
| Pldrd_a (**r load float64 as any64 *)
.
-(* Actually, Pldp is not used in the aarch64/Asm semantics!
-
-Thus we skip this particular case:
+Inductive load_rd1_rd2_a: Type :=
+ | Pldpw
+ | Pldpx
+ .
Inductive ld_instruction: Type :=
- | PLd_rd_a (ld: load_rd_a) (rd: ireg) (a: addressing)
- | Pldp (rd1 rd2: ireg) (a: addressing) (**r load two int64 *)
+ | PLd_rd_a (ld: load_rd_a) (rd: dreg) (a: addressing)
+ | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: ireg) (a: addressing) (**r load two int64 *)
.
-Coercion PLoad: ld_instruction >-> basic.
-
-*)
-
Inductive store_rs_a : Type :=
(* Integer store *)
| Pstrw (**r store int32 *)
@@ -321,7 +318,8 @@ Inductive ar_instruction : Type :=
Inductive basic : Type :=
| PArith (i: ar_instruction) (**r TODO *)
- | PLoad (ld: load_rd_a) (rd: dreg) (a: addressing) (**r TODO *)
+ (*| PLoad (ld: load_rd_a) (rd: dreg) (a: addressing) [>*r TODO <]*)
+ | PLoad (ld: ld_instruction)
| PStore (st: store_rs_a) (r: dreg) (a: addressing) (**r TODO *)
| Pallocframe (sz: Z) (linkofs: ptrofs) (**r allocate new stack frame *)
| Pfreeframe (sz: Z) (linkofs: ptrofs) (**r deallocate stack frame and restore previous frame *)
@@ -336,6 +334,8 @@ Inductive basic : Type :=
*)
.
+Coercion PLoad: ld_instruction >-> basic.
+
(* Not used in Coq, declared in ocaml directly in PostpassSchedulingOracle.ml
Inductive instruction : Type :=
| PBasic (i: basic)
@@ -591,16 +591,36 @@ Definition eval_addressing (a: addressing) (rs: regset): val :=
| ADsxt base r n => Val.addl rs#base (Val.shll (Val.longofint rs#r) (Vint n))
| ADuxt base r n => Val.addl rs#base (Val.shll (Val.longofintu rs#r) (Vint n))
| ADadr base id ofs => Val.addl rs#base (symbol_low lk id ofs)
- | ADpostincr base n => Vundef (* not modeled yet *)
+ | ADpostincr base n => rs#base
+ end.
+
+Definition eval_addressing_update (a: addressing) (rs: regset): regset :=
+ match a with
+ | ADpostincr base n => rs#base <- (Vlong n)
+ | _ => rs
end.
(** Auxiliaries for memory accesses *)
-Definition exec_load (chunk: memory_chunk) (transf: val -> val)
+Definition exec_load_rd_a (chunk: memory_chunk) (transf: val -> val)
(a: addressing) (r: dreg) (rs: regset) (m: mem) :=
SOME v <- Mem.loadv chunk m (eval_addressing a rs) IN
Next (rs#r <- (transf v)) m.
+Definition exec_load_double (chunk: memory_chunk) (transf: val -> val)
+ (a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) :=
+ let addr := (eval_addressing a rs) in
+ let ofs := match chunk with | Mint32 => 4 | _ => 8 end in
+ match Mem.loadv Mint32 m addr with
+ | None => Stuck
+ | Some v1 =>
+ match Mem.loadv Mint32 m (Val.offset_ptr addr (Ptrofs.repr ofs)) with
+ | None => Stuck
+ | Some v2 =>
+ Next (nextinstr (((eval_addressing_update a rs)#rd1 <- (transf v1))#rd2 <- (transf v2))) m
+ end
+ end.
+
Definition exec_store (chunk: memory_chunk)
(a: addressing) (v: val)
(rs: regset) (m: mem) :=
@@ -611,21 +631,23 @@ Definition exec_store (chunk: memory_chunk)
(** execution of loads
*)
-Definition chunk_load_rd_a (ld: load_rd_a): memory_chunk :=
+Definition chunk_load (ld: ld_instruction): memory_chunk :=
match ld with
- | Pldrw => Mint32
- | Pldrw_a => Many32
- | Pldrx => Mint64
- | Pldrx_a => Many64
- | Pldrb _ => Mint8unsigned
- | Pldrsb _ => Mint8signed
- | Pldrh _ => Mint16unsigned
- | Pldrsh _ => Mint16signed
- | Pldrzw => Mint32
- | Pldrsw => Mint32
- | Pldrs => Mfloat32
- | Pldrd => Mfloat64
- | Pldrd_a => Many64
+ | PLd_rd_a Pldrw _ _ => Mint32
+ | PLd_rd_a Pldrw_a _ _ => Many32
+ | PLd_rd_a Pldrx _ _ => Mint64
+ | PLd_rd_a Pldrx_a _ _ => Many64
+ | PLd_rd_a (Pldrb _) _ _ => Mint8unsigned
+ | PLd_rd_a (Pldrsb _) _ _ => Mint8signed
+ | PLd_rd_a (Pldrh _) _ _ => Mint16unsigned
+ | PLd_rd_a (Pldrsh _) _ _ => Mint16signed
+ | PLd_rd_a Pldrzw _ _ => Mint32
+ | PLd_rd_a Pldrsw _ _ => Mint32
+ | PLd_rd_a Pldrs _ _ => Mfloat32
+ | PLd_rd_a Pldrd _ _ => Mfloat64
+ | PLd_rd_a Pldrd_a _ _ => Many64
+ | Pldp Pldpw _ _ _ => Mint32
+ | Pldp Pldpx _ _ _ => Mint32
end.
Definition chunk_store_rs_a (st: store_rs_a) : memory_chunk :=
@@ -641,19 +663,27 @@ Definition chunk_store_rs_a (st: store_rs_a) : memory_chunk :=
| Pstrd_a => Many64
end.
-Definition interp_load_rd_a (ld: load_rd_a): val -> val :=
+Definition interp_load (ld: ld_instruction): val -> val :=
match ld with
- | Pldrb X => Val.longofintu
- | Pldrsb X => Val.longofint
- | Pldrh X => Val.longofintu
- | Pldrsh X => Val.longofint
- | Pldrzw => Val.longofintu
- | Pldrsw => Val.longofint
+ | PLd_rd_a (Pldrb X) _ _ => Val.longofintu
+ | PLd_rd_a (Pldrsb X) _ _ => Val.longofint
+ | PLd_rd_a (Pldrh X) _ _ => Val.longofintu
+ | PLd_rd_a (Pldrsh X) _ _ => Val.longofint
+ | PLd_rd_a Pldrzw _ _ => Val.longofintu
+ | PLd_rd_a Pldrsw _ _ => Val.longofint
(* Changed to exhaustive list because I tend to forgot all the places I need
* to check when changing things. *)
- | Pldrb W | Pldrsb W | Pldrh W | Pldrsh W
- | Pldrw | Pldrw_a | Pldrx | Pldrx_a
- | Pldrs | Pldrd | Pldrd_a => fun v => v
+ | PLd_rd_a (Pldrb W) _ _ | PLd_rd_a (Pldrsb W) _ _ | PLd_rd_a (Pldrh W) _ _ | PLd_rd_a (Pldrsh W) _ _
+ | PLd_rd_a Pldrw _ _ | PLd_rd_a Pldrw_a _ _ | PLd_rd_a Pldrx _ _
+ | PLd_rd_a Pldrx_a _ _ | PLd_rd_a Pldrs _ _ | PLd_rd_a Pldrd _ _
+ | PLd_rd_a Pldrd_a _ _
+ | Pldp _ _ _ _ => fun v => v
+ end.
+
+Definition exec_load (ldi: ld_instruction) (rs: regset) (m: mem) :=
+ match ldi with
+ | PLd_rd_a ld rd a => exec_load_rd_a (chunk_load ldi) (interp_load ldi) a rd rs m
+ | Pldp ld rd1 rd2 a => exec_load_double (chunk_load ldi) (interp_load ldi) a rd1 rd2 rs m
end.
(** TODO: redundant w.r.t Machblock ?? *)
@@ -991,7 +1021,7 @@ Definition exec_arith_instr (ai: ar_instruction) (rs: regset): regset :=
Definition exec_basic (b: basic) (rs: regset) (m: mem): outcome :=
match b with
| PArith ai => Next (exec_arith_instr ai rs) m
- | PLoad ld rd a => exec_load (chunk_load_rd_a ld) (interp_load_rd_a ld) a rd rs m
+ | PLoad ldi => exec_load ldi rs m
| PStore st r a => exec_store (chunk_store_rs_a st) a rs#r rs m
| Pallocframe sz pos =>
let (m1, stk) := Mem.alloc m 0 sz in
diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v
index 8ec862da..059f234c 100644
--- a/aarch64/Asmblockdeps.v
+++ b/aarch64/Asmblockdeps.v
@@ -172,9 +172,8 @@ Inductive store_op :=
.
Inductive load_op :=
- | Oload1 (ld: load_rd_a) (a: addressing)
- | Oload2 (ld: load_rd_a) (a: addressing)
- | OloadU (ld: load_rd_a) (a: addressing)
+ | Oload1 (ld: ld_instruction) (a: addressing)
+ | Oload2 (ld: ld_instruction) (a: addressing)
.
Inductive allocf_op :=
@@ -518,16 +517,16 @@ Definition call_ll_loadv (c: memory_chunk) (transf: val -> val) (m: mem) (v: opt
| None => None (* should never occurs *)
end.
-Definition exec_load1 (n: load_rd_a) (m: mem) (a: addressing) (vl: val) :=
+Definition exec_load1 (ld: ld_instruction) (m: mem) (a: addressing) (vl: val) :=
let v :=
match a with
| ADimm _ n => Some (Val.addl vl (Vlong n))
| ADadr _ id ofs => Some (Val.addl vl (symbol_low Ge.(_lk) id ofs))
| _ => None
end in
- call_ll_loadv (chunk_load_rd_a n) (interp_load_rd_a n) m v.
+ call_ll_loadv (chunk_load ld) (interp_load ld) m v.
-Definition exec_load2 (n: load_rd_a) (m: mem) (a: addressing) (vl1 vl2: val) :=
+Definition exec_load2 (ld: ld_instruction) (m: mem) (a: addressing) (vl1 vl2: val) :=
let v :=
match a with
| ADreg _ _ => Some (Val.addl vl1 vl2)
@@ -536,16 +535,12 @@ Definition exec_load2 (n: load_rd_a) (m: mem) (a: addressing) (vl1 vl2: val) :=
| ADuxt _ _ n => Some (Val.addl vl1 (Val.shll (Val.longofintu vl2) (Vint n)))
| _ => None
end in
- call_ll_loadv (chunk_load_rd_a n) (interp_load_rd_a n) m v.
+ call_ll_loadv (chunk_load ld) (interp_load ld) m v.
-Definition exec_loadU (n: load_rd_a) (m: mem) (a: addressing) :=
- call_ll_loadv (chunk_load_rd_a n) (interp_load_rd_a n) m None.
-
Definition load_eval (o: load_op) (l: list value) :=
match o, l with
- | Oload1 st a, [Val vs; Memstate m] => exec_load1 st m a vs
- | Oload2 st a, [Val vs1; Val vs2; Memstate m] => exec_load2 st m a vs1 vs2
- | OloadU st a, [Memstate m] => exec_loadU st m a
+ | Oload1 ld a, [Val vs; Memstate m] => exec_load1 ld m a vs
+ | Oload2 ld a, [Val vs1; Val vs2; Memstate m] => exec_load2 ld m a vs1 vs2
| _, _ => None
end.
@@ -791,8 +786,6 @@ Definition load_op_eq (l1 l2: load_op): ?? bool :=
match l2 with Oload1 ld2 a2 => iandb (phys_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end
| Oload2 ld1 a1 =>
match l2 with Oload2 ld2 a2 => iandb (phys_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end
- | OloadU ld1 a1 =>
- match l2 with OloadU ld2 a2 => iandb (phys_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end
end.
Lemma load_op_eq_correct l1 l2:
@@ -1138,7 +1131,7 @@ Definition eval_addressing_rlocs_st (st: store_rs_a) (rs: dreg) (a: addressing)
| ADpostincr base n => Op (Store (OstoreU st a)) (PReg (#rs) @ PReg (pmem) @ Enil) (* not modeled yet *)
end.
-Definition eval_addressing_rlocs_ld (ld: load_rd_a) (a: addressing) :=
+Definition eval_addressing_rlocs_ld (ld: ld_instruction) (a: addressing) :=
match a with
| ADimm base n => Op (Load (Oload1 ld a)) (PReg (#base) @ PReg (pmem) @ Enil)
| ADreg base r => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
@@ -1146,14 +1139,25 @@ Definition eval_addressing_rlocs_ld (ld: load_rd_a) (a: addressing) :=
| ADsxt base r n => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
| ADuxt base r n => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
| ADadr base id ofs => Op (Load (Oload1 ld a)) (PReg (#base) @ PReg (pmem) @ Enil)
- | ADpostincr base n => Op (Load (OloadU ld a)) (PReg (pmem) @ Enil) (* not modeled yet *)
+ | ADpostincr base n => Op (Load (Oload1 ld a)) (PReg (#base) @ PReg (pmem) @ Enil)
+ end.
+
+Definition trans_load (ldi: ld_instruction) :=
+ match ldi with
+ | PLd_rd_a ld r a =>
+ let lr := eval_addressing_rlocs_ld ldi a in [(#r, lr)]
+ | Pldp ld r1 r2 a =>
+ let lr := eval_addressing_rlocs_ld ldi a in
+ match a with
+ | ADpostincr base n => [(#base, lr); (#r1, lr); (#r2, lr)]
+ | _ => [(#r1, lr); (#r2, lr)]
+ end
end.
Definition trans_basic (b: basic) : inst :=
match b with
| PArith ai => trans_arith ai
- | PLoad ld r a =>
- let lr := eval_addressing_rlocs_ld ld a in [(#r, lr)]
+ | PLoad ld => trans_load ld
| PStore st r a =>
let lr := eval_addressing_rlocs_st st r a in [(pmem, lr)]
| Pallocframe sz linkofs =>
@@ -1669,19 +1673,20 @@ Local Ltac preg_eq_discr r rd :=
6,7,8,9: Simpl sr.
- (* Arith *)
exploit trans_arith_correct; eauto.
- - (* Load *)
- unfold exec_load, eval_addressing_rlocs_ld, exp_eval;
- destruct a; simpl; try destruct base; Simpl_exists sr; erewrite H;
- unfold exec_load1, exec_load2, exec_loadU; unfold call_ll_loadv;
- try destruct (Mem.loadv _ _ _); simpl; auto.
- all: DDRM rd; Simpl_exists sr; try intros rr; Simpl_update.
+ - (* Load *) admit.
+ (*unfold exec_load, eval_addressing_rlocs_ld, exp_eval;*)
+ (*destruct a; simpl; try destruct base; Simpl_exists sr; erewrite H;*)
+ (*unfold exec_load1, exec_load2, exec_loadU; unfold call_ll_loadv;*)
+ (*try destruct (Mem.loadv _ _ _); simpl; auto.*)
+ (*all: DDRM rd; Simpl_exists sr; try intros rr; Simpl_update.*)
- (* Store *)
- unfold exec_store, eval_addressing_rlocs_st, exp_eval;
- destruct a; simpl; DIRN1 r; try destruct base; Simpl_exists sr; erewrite H;
- unfold exec_store1, exec_store2, exec_storeU; unfold call_ll_storev;
- try destruct (Mem.storev _ _ _ _); simpl; auto.
- all: eexists; split; [| split]; [ eauto | eauto |
- intros rr; rewrite assign_diff; [ rewrite H0; auto | apply ppos_pmem_discr ]].
+ admit.
+ (*unfold exec_store, eval_addressing_rlocs_st, exp_eval;*)
+ (*destruct a; simpl; DIRN1 r; try destruct base; Simpl_exists sr; erewrite H;*)
+ (*unfold exec_store1, exec_store2, exec_storeU; unfold call_ll_storev;*)
+ (*try destruct (Mem.storev _ _ _ _); simpl; auto.*)
+ (*all: eexists; split; [| split]; [ eauto | eauto |*)
+ (*intros rr; rewrite assign_diff; [ rewrite H0; auto | apply ppos_pmem_discr ]].*)
- (* Alloc *)
destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS.
+ eexists; repeat split.
@@ -1711,7 +1716,8 @@ Local Ltac preg_eq_discr r rd :=
* rewrite e; rewrite Pregmap.gss, sr_gss; auto.
* rewrite Pregmap.gso, !assign_diff; auto; apply ppos_discr in n; auto.
+ rewrite <- sp_xsp; rewrite MLOAD; reflexivity.
-Qed.
+(*Qed.*)
+Admitted.
Theorem bisimu_body:
forall bdy rsr mr sr,
@@ -2344,7 +2350,6 @@ Definition string_of_load (op: load_op) : pstring :=
match op with
| Oload1 _ _ => "Oload1"
| Oload2 _ _ => "Oload2"
- | OloadU _ _ => "OloadU"
end.
Definition string_of_control (op: control_op) : pstring :=
diff --git a/aarch64/Asmblockgen.v b/aarch64/Asmblockgen.v
index b25a95da..abc9162b 100644
--- a/aarch64/Asmblockgen.v
+++ b/aarch64/Asmblockgen.v
@@ -280,13 +280,13 @@ Definition indexed_memory_access_bc (insn: addressing -> basic)
Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: bcode) :=
match ty, preg_of dst with
- | Tint, IR rd => OK (indexed_memory_access_bc (PLoad Pldrw rd) 4 base ofs k)
- | Tlong, IR rd => OK (indexed_memory_access_bc (PLoad Pldrx rd) 8 base ofs k)
- | Tsingle, FR rd => OK (indexed_memory_access_bc (PLoad Pldrs rd) 4 base ofs k)
- | Tfloat, FR rd => OK (indexed_memory_access_bc (PLoad Pldrd rd) 8 base ofs k)
- | Tany32, IR rd => OK (indexed_memory_access_bc (PLoad Pldrw_a rd) 4 base ofs k)
- | Tany64, IR rd => OK (indexed_memory_access_bc (PLoad Pldrx_a rd) 8 base ofs k)
- | Tany64, FR rd => OK (indexed_memory_access_bc (PLoad Pldrd_a rd) 8 base ofs k)
+ | Tint, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrw rd) 4 base ofs k)
+ | Tlong, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrx rd) 8 base ofs k)
+ | Tsingle, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrs rd) 4 base ofs k)
+ | Tfloat, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrd rd) 8 base ofs k)
+ | Tany32, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrw_a rd) 4 base ofs k)
+ | Tany64, IR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrx_a rd) 8 base ofs k)
+ | Tany64, FR rd => OK (indexed_memory_access_bc (PLd_rd_a Pldrd_a rd) 8 base ofs k)
| _, _ => Error (msg "Asmgen.loadind")
end.
@@ -303,7 +303,7 @@ Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: bcode
end.
Definition loadptr_bc (base: iregsp) (ofs: ptrofs) (dst: ireg) (k: bcode): bcode :=
- indexed_memory_access_bc (PLoad Pldrx dst) 8 base ofs k.
+ indexed_memory_access_bc (PLd_rd_a Pldrx dst) 8 base ofs k.
Definition storeptr_bc (src: ireg) (base: iregsp) (ofs: ptrofs) (k: bcode): bcode :=
indexed_memory_access_bc (PStore Pstrx src) 8 base ofs k.
@@ -1096,25 +1096,25 @@ Definition transl_load (chunk: memory_chunk) (addr: Op.addressing)
(args: list mreg) (dst: mreg) (k: bcode) : res bcode :=
match chunk with
| Mint8unsigned =>
- do rd <- ireg_of dst; transl_addressing 1 addr args (PLoad (Pldrb W) rd) k
+ do rd <- ireg_of dst; transl_addressing 1 addr args (PLd_rd_a (Pldrb W) rd) k
| Mint8signed =>
- do rd <- ireg_of dst; transl_addressing 1 addr args (PLoad (Pldrsb W) rd) k
+ do rd <- ireg_of dst; transl_addressing 1 addr args (PLd_rd_a (Pldrsb W) rd) k
| Mint16unsigned =>
- do rd <- ireg_of dst; transl_addressing 2 addr args (PLoad (Pldrh W) rd) k
+ do rd <- ireg_of dst; transl_addressing 2 addr args (PLd_rd_a (Pldrh W) rd) k
| Mint16signed =>
- do rd <- ireg_of dst; transl_addressing 2 addr args (PLoad (Pldrsh W) rd) k
+ do rd <- ireg_of dst; transl_addressing 2 addr args (PLd_rd_a (Pldrsh W) rd) k
| Mint32 =>
- do rd <- ireg_of dst; transl_addressing 4 addr args (PLoad Pldrw rd) k
+ do rd <- ireg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrw rd) k
| Mint64 =>
- do rd <- ireg_of dst; transl_addressing 8 addr args (PLoad Pldrx rd) k
+ do rd <- ireg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrx rd) k
| Mfloat32 =>
- do rd <- freg_of dst; transl_addressing 4 addr args (PLoad Pldrs rd) k
+ do rd <- freg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrs rd) k
| Mfloat64 =>
- do rd <- freg_of dst; transl_addressing 8 addr args (PLoad Pldrd rd) k
+ do rd <- freg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrd rd) k
| Many32 =>
- do rd <- ireg_of dst; transl_addressing 4 addr args (PLoad Pldrw_a rd) k
+ do rd <- ireg_of dst; transl_addressing 4 addr args (PLd_rd_a Pldrw_a rd) k
| Many64 =>
- do rd <- ireg_of dst; transl_addressing 8 addr args (PLoad Pldrx_a rd) k
+ do rd <- ireg_of dst; transl_addressing 8 addr args (PLd_rd_a Pldrx_a rd) k
end.
Definition transl_store (chunk: memory_chunk) (addr: Op.addressing)
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index c2519e6a..edcce06f 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -169,7 +169,7 @@ let expand_builtin_memcpy_small sz al src dst =
let (rdst, odst) = memcpy_small_arg sz dst tdst in
let rec copy osrc odst sz =
if sz >= 16 then begin
- emit (Pldp(X16, X30, ADimm(rsrc, osrc)));
+ emit (Pldpw(X16, X30, ADimm(rsrc, osrc)));
emit (Pstp(X16, X30, ADimm(rdst, odst)));
copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16)
end
@@ -208,7 +208,7 @@ let expand_builtin_memcpy_big sz al src dst =
let lbl = new_label () in
expand_loadimm32 X15 (Z.of_uint (sz / 16));
emit (Plabel lbl);
- emit (Pldp(X16, X17, ADpostincr(RR1 X30, _16)));
+ emit (Pldpx(X16, X17, ADpostincr(RR1 X30, _16)));
emit (Pstp(X16, X17, ADpostincr(RR1 X29, _16)));
emit (Psubimm(W, RR1 X15, RR1 X15, _1));
emit (Pcbnz(W, X15, lbl));
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index 78c3f156..d9d59929 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -309,20 +309,27 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction :=
end
| PArith (Pfnmul fsz rd r1 r2) => OK (Asm.Pfnmul fsz rd r1 r2)
- | PLoad Pldrw rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw rd' a)
- | PLoad Pldrw_a rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw_a rd' a)
- | PLoad Pldrx rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx rd' a)
- | PLoad Pldrx_a rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx_a rd' a)
- | PLoad (Pldrb sz) rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrb sz rd' a)
- | PLoad (Pldrsb sz) rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsb sz rd' a)
- | PLoad (Pldrh sz) rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrh sz rd' a)
- | PLoad (Pldrsh sz) rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsh sz rd' a)
- | PLoad Pldrzw rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrzw rd' a)
- | PLoad Pldrsw rd a => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsw rd' a)
-
- | PLoad Pldrs rd a => do rd' <- freg_of_preg rd; OK (Asm.Pldrs rd' a)
- | PLoad Pldrd rd a => do rd' <- freg_of_preg rd; OK (Asm.Pldrd rd' a)
- | PLoad Pldrd_a rd a => do rd' <- freg_of_preg rd; OK (Asm.Pldrd_a rd' a)
+ | PLoad (PLd_rd_a Pldrw rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw rd' a)
+ | PLoad (PLd_rd_a Pldrw_a rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrw_a rd' a)
+ | PLoad (PLd_rd_a Pldrx rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx rd' a)
+ | PLoad (PLd_rd_a Pldrx_a rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrx_a rd' a)
+ | PLoad (PLd_rd_a (Pldrb sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrb sz rd' a)
+ | PLoad (PLd_rd_a (Pldrsb sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsb sz rd' a)
+ | PLoad (PLd_rd_a (Pldrh sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrh sz rd' a)
+ | PLoad (PLd_rd_a (Pldrsh sz) rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsh sz rd' a)
+ | PLoad (PLd_rd_a Pldrzw rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrzw rd' a)
+ | PLoad (PLd_rd_a Pldrsw rd a) => do rd' <- ireg_of_preg rd; OK (Asm.Pldrsw rd' a)
+
+ | PLoad (PLd_rd_a Pldrs rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrs rd' a)
+ | PLoad (PLd_rd_a Pldrd rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd rd' a)
+ | PLoad (PLd_rd_a Pldrd_a rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd_a rd' a)
+
+ | PLoad (Pldp Pldpw rd1 rd2 a) => do rd1' <- ireg_of_preg rd1;
+ do rd2' <- ireg_of_preg rd2;
+ OK (Asm.Pldpw rd1 rd2 a)
+ | PLoad (Pldp Pldpx rd1 rd2 a) => do rd1' <- ireg_of_preg rd1;
+ do rd2' <- ireg_of_preg rd2;
+ OK (Asm.Pldpx rd1 rd2 a)
| PStore Pstrw r a => do r' <- ireg_of_preg r; OK (Asm.Pstrw r' a)
| PStore Pstrw_a r a => do r' <- ireg_of_preg r; OK (Asm.Pstrw_a r' a)
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index f0b09ef6..9d80f58f 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -807,15 +807,17 @@ Proof.
- destruct fsz; rewrite <- BASIC; rewrite Pregmap.gso; try (discriminate || reflexivity).
Qed.
+(* TODO *)
Lemma exec_basic_dont_move_PC bi rs m rs' m': forall
(BASIC: exec_basic lk ge bi rs m = Next rs' m'),
rs PC = rs' PC.
Proof.
destruct bi; simpl; intros.
- inv BASIC. exploit exec_arith_instr_dont_move_PC; eauto.
- - unfold exec_load in BASIC.
- destruct Mem.loadv. 2: { discriminate BASIC. }
- inv BASIC. rewrite Pregmap.gso; try discriminate; auto.
+ - admit.
+ (*unfold exec_load in BASIC.*)
+ (*destruct Mem.loadv. 2: { discriminate BASIC. }*)
+ (*inv BASIC. rewrite Pregmap.gso; try discriminate; auto.*)
- unfold exec_store in BASIC.
destruct Mem.storev. 2: { discriminate BASIC. }
inv BASIC; reflexivity.
@@ -828,7 +830,8 @@ Proof.
- inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
- inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
- inv BASIC; rewrite Pregmap.gso; try discriminate; auto.
-Qed.
+(*Qed.*)
+Admitted.
Lemma exec_body_dont_move_PC_aux:
forall bis rs m rs' m'
@@ -975,7 +978,8 @@ Lemma eval_addressing_preserved a rs1 rs2:
eval_addressing lk a rs1 = Asm.eval_addressing tge a rs2.
Proof.
intros EQ.
- destruct a; simpl; try (rewrite !EQ; congruence). auto.
+ destruct a; simpl; try (rewrite !EQ; congruence).
+ (*auto. TODO *)
Qed.
Ltac next_stuck_cong := try (unfold Next, Stuck in *; congruence).
@@ -1059,6 +1063,7 @@ Proof.
rewrite <- Ptrofs.add_unsigned; rewrite <- Val.offset_ptr_assoc; rewrite AGPC; eauto.
Qed.
+(* TODO
Lemma load_preserved n rs1 m1 rs1' m1' rs2 m2 rd chk f a: forall
(BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
(MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
@@ -1077,6 +1082,7 @@ Proof.
* eapply ptrofs_nextinstr_agree; eauto.
+ next_stuck_cong.
Qed.
+ *)
Lemma store_preserved n rs1 m1 rs1' m1' rs2 m2 rd chk a: forall
(BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
@@ -1264,9 +1270,10 @@ Proof.
try (destruct_reg_size);
try (exploit next_inst_preserved; eauto);
try (find_rwrt_ag). } }
- { (* PLoad *)
- destruct ld; monadInv TRANSBI; try destruct_ireg_inv; exploit load_preserved; eauto;
- intros; simpl in *; destruct sz; eauto. }
+ { (* PLoad TODO *)
+ admit.
+ (*destruct ld; monadInv TRANSBI; try destruct_ireg_inv; exploit load_preserved; eauto;*)
+ (*intros; simpl in *; destruct sz; eauto. }*)}
{ (* PStore *)
destruct st; monadInv TRANSBI; try destruct_ireg_inv; exploit store_preserved; eauto;
simpl in *; inv_matchi; find_rwrt_ag. }
@@ -1300,7 +1307,8 @@ Proof.
try (exploit next_inst_preserved; eauto);
rewrite symbol_addresses_preserved; eauto;
try (find_rwrt_ag).
-Qed.
+(*Qed.*)
+Admitted.
Lemma find_basic_instructions b ofs f bb tc: forall
(FINDF: Genv.find_funct_ptr ge b = Some (Internal f))
@@ -1576,6 +1584,7 @@ Proof.
+ destruct H.
Qed.
+(* TODO *)
Lemma no_label_in_basic_inst: forall a lbl x,
basic_to_instruction a = OK x -> Asm.is_label lbl x = false.
Proof.
@@ -1583,7 +1592,9 @@ Proof.
destruct a; simpl in *;
repeat destruct i; simpl in *;
try (try destruct_reg_inv; monadInv H; simpl in *; reflexivity).
-Qed.
+ admit.
+(*Qed.*)
+Admitted.
Lemma label_pos_body bdy: forall c1 c2 z ex lbl
(HUNF : unfold_body bdy = OK c2),
diff --git a/aarch64/OpWeightsAsm.ml b/aarch64/OpWeightsAsm.ml
index ca120228..3ce2c68e 100644
--- a/aarch64/OpWeightsAsm.ml
+++ b/aarch64/OpWeightsAsm.ml
@@ -69,7 +69,7 @@ module Cortex_A53 = struct
match i' with Pfcmp0 _ -> 6 | _ -> 1)
| PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> 6
| PBasic (PArith _) -> 1
- | PBasic (PLoad (_, _, _)) -> 3
+ | PBasic (PLoad _) -> 3
| PBasic (PStore (_, _, _)) -> 3
| PBasic (Pallocframe (_, _)) -> 3
| PBasic (Pfreeframe (_, _)) -> 1
@@ -109,7 +109,7 @@ module Cortex_A53 = struct
match i' with Pfcmp0 _ -> [| 1; 1; 1; 0 |] | _ -> [| 1; 1; 0; 0 |])
| PBasic (PArith (Pcset (_, _))) | PBasic (PArith (Pcsel (_, _, _, _))) -> [| 1; 1; 1; 0 |]
| PBasic (PArith _) -> [| 1; 1; 0; 0 |]
- | PBasic (PLoad (_, _, _)) -> [| 1; 0; 0; 1 |]
+ | PBasic (PLoad _) -> [| 1; 0; 0; 1 |]
| PBasic (PStore (_, _, _)) -> [| 1; 0; 0; 1 |]
| PBasic (Pallocframe (_, _)) -> [| 1; 0; 0; 1 |]
| PBasic (Pfreeframe (_, _)) -> [| 1; 1; 0; 0 |]
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index 8d930255..7f55b29b 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -148,9 +148,9 @@ let get_eval_addressing_rlocs a =
| Asm.ADsxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ]
| Asm.ADuxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ]
| Asm.ADadr (base, _, _) -> [ reg_of_iregsp base ]
- | Asm.ADpostincr (base, _) -> []
+ | Asm.ADpostincr (base, _) -> [ reg_of_iregsp base ]
-let load_rec ld rd a =
+let load_rd_a_rec ld rd a =
{
inst = ld;
write_locs = [ rd ];
@@ -158,6 +158,19 @@ let load_rec ld rd a =
is_control = false;
}
+let load_rd1_rd2_a_rec ld rd1 rd2 a =
+ {
+ inst = ld;
+ write_locs = [ rd1; rd2 ];
+ read_locs = [ Mem ] @ get_eval_addressing_rlocs a;
+ is_control = false;
+ }
+
+let load_rec ldi =
+ match ldi with
+ | PLd_rd_a (ld, rd, a) -> load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a
+ | Pldp (ld, rd1, rd2, a) -> load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_ireg rd1) (reg_of_ireg rd2) a
+
let store_rec st r a =
{
inst = st;
@@ -290,7 +303,7 @@ let arith_rec i =
let basic_rec i =
match i with
| PArith i' -> arith_rec i'
- | PLoad (ld, rd, a) -> load_rec (PBasic i) (reg_of_dreg rd) a
+ | PLoad ld -> load_rec ld
| PStore (st, r, a) -> store_rec (PBasic i) (reg_of_dreg r) a
| Pallocframe (sz, linkofs) -> allocframe_rec (PBasic i) sz linkofs
| Pfreeframe (sz, linkofs) -> freeframe_rec (PBasic i) sz linkofs
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index b5e3a814..24a64aaf 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -279,7 +279,9 @@ module Target (*: TARGET*) =
(* the upper 32 bits of Xrd are set to 0, performing zero-extension *)
| Pldrsw(rd, a) ->
fprintf oc " ldrsw %a, %a\n" xreg rd addressing a
- | Pldp(rd1, rd2, a) ->
+ | Pldpw(rd1, rd2, a) ->
+ fprintf oc " ldp %a, %a, %a\n" wreg rd1 wreg rd2 addressing a
+ | Pldpx(rd1, rd2, a) ->
fprintf oc " ldp %a, %a, %a\n" xreg rd1 xreg rd2 addressing a
| Pstrw(rs, a) | Pstrw_a(rs, a) ->
fprintf oc " str %a, %a\n" wreg rs addressing a