aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64
diff options
context:
space:
mode:
authorCyril SIX <cyril.six@kalray.eu>2020-12-04 17:41:14 +0100
committerCyril SIX <cyril.six@kalray.eu>2020-12-04 17:41:14 +0100
commit60ff1e39bac5ab35c46698cbc1ed7a76fc936cab (patch)
tree87ff5f3b209e5659e967f862dab1517bb2b32baa /aarch64
parentf2fb8540c94ceb9892510f83bd7d6734fe9d422f (diff)
parentd2197102d6b81e225865cfac5f1d319d168e1e23 (diff)
downloadcompcert-kvx-60ff1e39bac5ab35c46698cbc1ed7a76fc936cab.tar.gz
compcert-kvx-60ff1e39bac5ab35c46698cbc1ed7a76fc936cab.zip
Merge branch 'kvx-work' into kvx-work-merge3.8
Conflicts: Makefile configure
Diffstat (limited to 'aarch64')
-rw-r--r--aarch64/Asmgenproof1.v87
-rw-r--r--aarch64/CSE2deps.v5
-rw-r--r--aarch64/CSE2depsproof.v75
-rw-r--r--aarch64/ConstpropOpproof.v121
-rw-r--r--aarch64/Op.v193
-rw-r--r--aarch64/OpWeights.ml353
-rw-r--r--aarch64/PrepassSchedulingOracle.ml477
-rw-r--r--aarch64/PrepassSchedulingOracleDeps.ml17
-rw-r--r--aarch64/SelectLongproof.v26
-rw-r--r--aarch64/SelectOpproof.v28
-rw-r--r--aarch64/ValueAOp.v24
11 files changed, 1231 insertions, 175 deletions
diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v
index 0e36bd05..35f1f2d7 100644
--- a/aarch64/Asmgenproof1.v
+++ b/aarch64/Asmgenproof1.v
@@ -881,7 +881,40 @@ Proof.
split. subst v; Simpl.
split; intros; Simpl.
Qed.
-
+
+
+Lemma exec_shrx32_none: forall (rd r1: ireg) (n: int) k (rs: regset) m,
+ Val.shrx rs#r1 (Vint n) = None ->
+ r1 <> X16 ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+ exists rs',
+ exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
+Proof.
+ unfold shrx32; intros.
+ destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto].
+ split.
+ + intros. Simpl.
+ + Simpl.
+- generalize (Int.eq_spec n Int.one).
+ destruct (Int.eq n Int.one); intro ONE.
+ * subst n.
+ econstructor; split. eapply exec_straight_two.
+ all: cbn; auto.
+ split.
+ ** intros.
+ destruct (Val.add _ _); cbn; Simpl.
+ ** Simpl.
+ * econstructor; split. eapply exec_straight_three.
+ all: cbn; auto.
+ split.
+ ** intros.
+ destruct (Val.shr _ _); cbn; Simpl.
+ ** Simpl.
+Qed.
+
Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
Val.shrxl rs#r1 (Vint n) = Some v ->
r1 <> X16 ->
@@ -918,6 +951,38 @@ Proof.
split; intros; Simpl.
Qed.
+Lemma exec_shrx64_none: forall (rd r1: ireg) (n: int) k (rs: regset) m,
+ Val.shrxl rs#r1 (Vint n) = None ->
+ r1 <> X16 ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
+ exists rs',
+ exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
+Proof.
+ unfold shrx64; intros.
+ destruct (Int.eq n Int.zero) eqn:E.
+- econstructor; split. apply exec_straight_one; [simpl;eauto|auto].
+ split.
+ + intros. Simpl.
+ + Simpl.
+- generalize (Int.eq_spec n Int.one).
+ destruct (Int.eq n Int.one); intro ONE.
+ * subst n.
+ econstructor; split. eapply exec_straight_two.
+ all: cbn; auto.
+ split.
+ ** intros.
+ destruct (Val.addl _ _); cbn; Simpl.
+ ** Simpl.
+ * econstructor; split. eapply exec_straight_three.
+ all: cbn; auto.
+ split.
+ ** intros.
+ destruct (Val.shrl _ _); cbn; Simpl.
+ ** Simpl.
+Qed.
+
(** Condition bits *)
Lemma compare_int_spec: forall rs v1 v2 m,
@@ -1660,10 +1725,19 @@ Local Transparent Val.add.
TranslOpBase.
destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto.
- (* shrx *)
- exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+ destruct (Val.shrx (rs x0) (Vint n)) eqn:TOTAL.
+ {
+ exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
intros (rs' & A & B & C & D).
econstructor; split. eexact A. split. rewrite B; auto.
split; auto.
+ }
+ exploit (exec_shrx32_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C).
+ econstructor; split. { eexact A. }
+ split. { cbn. constructor. }
+ split; auto.
+
- (* zero-ext *)
TranslOpBase.
destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
@@ -1736,9 +1810,18 @@ Local Transparent Val.add.
TranslOpBase.
destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
- (* shrx *)
+ destruct (Val.shrxl (rs x0) (Vint n)) eqn:TOTAL.
+ {
exploit (exec_shrx64 x x0 n); eauto with asmgen.
apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ).
econstructor; split. eexact A. split. rewrite B; auto. auto.
+ }
+ exploit (exec_shrx64_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C).
+ econstructor; split. { eexact A. }
+ split. { cbn. constructor. }
+ split; auto.
+
- (* zero-ext-l *)
TranslOpBase.
destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto.
diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v
index a23e41a8..d5c7ee0f 100644
--- a/aarch64/CSE2deps.v
+++ b/aarch64/CSE2deps.v
@@ -28,5 +28,8 @@ Definition may_overlap chunk addr args chunk' addr' args' :=
(base :: nil), (base' :: nil) =>
if peq base base'
then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk)
- else true | _, _, _, _ => true
+ else true
+ | (Ainstack ofs), (Ainstack ofs'), _, _ =>
+ negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+ | _, _, _, _ => true
end.
diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v
index dbd46142..653c88f4 100644
--- a/aarch64/CSE2depsproof.v
+++ b/aarch64/CSE2depsproof.v
@@ -104,9 +104,71 @@ Section MEMORY_WRITE.
Qed.
End INDEXED_AWAY.
End MEMORY_WRITE.
-End SOUNDNESS.
+Section STACK_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : ptrofs.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Ainstack ofsw) nil = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Ainstack ofsr) nil = Some addrr.
+
+ Lemma stack_load_store_away1 :
+ forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+ \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in *.
+
+ rewrite ptrofs_modulus in *.
+ simpl in *.
+ inv ADDRR.
+ inv ADDRW.
+
+ destruct sp; try discriminate.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+ rewrite OFSW).
+
+ all: try rewrite ptrofs_modulus in *.
+
+ all: intuition lia.
+ Qed.
+
+ Theorem stack_load_store_away :
+ can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply stack_load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+End STACK_WRITE.
+End SOUNDNESS.
+
Section SOUNDNESS.
Variable F V : Type.
Variable genv: Genv.t F V.
@@ -124,7 +186,7 @@ Proof.
intros until rs.
intros ADDR ADDR' OVERLAP STORE.
destruct addr; destruct addr'; try discriminate.
- { (* Aindexed / Aindexed *)
+ - (* Aindexed / Aindexed *)
destruct args as [ | base [ | ]]. 1,3: discriminate.
destruct args' as [ | base' [ | ]]. 1,3: discriminate.
simpl in OVERLAP.
@@ -134,7 +196,14 @@ Proof.
2: discriminate.
simpl in *.
eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
- }
+- (* Ainstack / Ainstack *)
+ destruct args. 2: discriminate.
+ destruct args'. 2: discriminate.
+ cbn in OVERLAP.
+ destruct (can_swap_accesses_ofs (Ptrofs.unsigned ofs0) chunk' (Ptrofs.unsigned ofs) chunk) eqn:SWAP.
+ 2: discriminate.
+ cbn in *.
+ eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
Qed.
End SOUNDNESS.
diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v
index deab7cd4..c777062c 100644
--- a/aarch64/ConstpropOpproof.v
+++ b/aarch64/ConstpropOpproof.v
@@ -335,40 +335,63 @@ Qed.
Lemma make_divimm_correct:
forall n r1 r2 v,
- Val.divs e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divs e#r1 e#r2) = v ->
e#r2 = Vint n ->
let (op, args) := make_divimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divimm.
- predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
- destruct (e#r1) eqn:?;
- try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
- inv H; auto.
- destruct (Int.is_power2 n) eqn:?.
- destruct (Int.ltu i (Int.repr 31)) eqn:?.
- exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence.
- exists v; auto.
- exists v; auto.
+ predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+ { destruct (e # r1) eqn:Er1.
+ all: try (cbn; exists (e # r1); split; auto; fail).
+ rewrite Val.divs_one.
+ cbn.
+ rewrite Er1.
+ exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+ destruct (Int.ltu i (Int.repr 31)) eqn:iLT31.
+ {
+ cbn.
+ exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto.
+ destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto.
+ rewrite Val.divs_pow2 with (y:=v) (n:=n).
+ cbn.
+ all: auto.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
+ }
+ exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence.
Qed.
+
Lemma make_divuimm_correct:
forall n r1 r2 v,
- Val.divu e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divu e#r1 e#r2) = v ->
e#r2 = Vint n ->
let (op, args) := make_divuimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divuimm.
- predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H.
- destruct (e#r1) eqn:?;
- try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto);
- inv H; auto.
- destruct (Int.is_power2 n) eqn:?.
- econstructor; split. simpl; eauto.
- rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
- rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto.
- exists v; auto.
+ predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0.
+ { destruct (e # r1) eqn:Er1.
+ all: try (cbn; exists (e # r1); split; auto; fail).
+ rewrite Val.divu_one.
+ cbn.
+ rewrite Er1.
+ exists (Vint i); split; auto.
+ }
+ destruct (Int.is_power2 n) eqn:Power2.
+ {
+ cbn.
+ rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto).
+ exists (Val.shru e # r1 (Vint i)); split; auto.
+ destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto.
+ rewrite Val.divu_pow2 with (y:=v) (n:=n).
+ all: auto.
+ }
+ exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence.
Qed.
Lemma make_andimm_correct:
@@ -503,34 +526,60 @@ Qed.
Lemma make_divlimm_correct:
forall n r1 r2 v,
- Val.divls e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divls e#r1 e#r2) = v ->
e#r2 = Vlong n ->
let (op, args) := make_divlimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divlimm.
- destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?.
- rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto.
- exists v; auto.
- exists v; auto.
+ destruct (Int64.is_power2' n) eqn:Power2.
+ {
+ destruct (Int.ltu i (Int.repr 63)) eqn:iLT63.
+ {
+ cbn.
+ exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto.
+ rewrite H0 in H.
+ destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto.
+ {
+ subst v0.
+ rewrite Val.divls_pow2 with (y:=v) (n:=n).
+ cbn.
+ all: auto.
+ }
+ subst. auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
+ }
+ cbn. subst. rewrite H0.
+ exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto.
Qed.
+
Lemma make_divluimm_correct:
forall n r1 r2 v,
- Val.divlu e#r1 e#r2 = Some v ->
+ Val.maketotal (Val.divlu e#r1 e#r2) = v ->
e#r2 = Vlong n ->
let (op, args) := make_divluimm n r1 r2 in
exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w.
Proof.
intros; unfold make_divluimm.
destruct (Int64.is_power2' n) eqn:?.
+ {
econstructor; split. simpl; eauto.
- rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
- rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2.
- simpl.
- erewrite Int64.is_power2'_range by eauto.
- erewrite Int64.divu_pow2' by eauto. auto.
- exists v; auto.
+ rewrite H0 in H. destruct (e#r1); inv H.
+ all: cbn; auto.
+ {
+ rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto).
+ destruct (Int64.eq n Int64.zero); cbn; auto.
+ erewrite Int64.is_power2'_range by eauto.
+ erewrite Int64.divu_pow2' by eauto. auto.
+ }
+ }
+ exists v; split; auto.
+ cbn.
+ rewrite H.
+ reflexivity.
Qed.
Lemma make_andlimm_correct:
@@ -679,10 +728,10 @@ Proof.
InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto.
- (* divs *)
assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divimm_correct; auto.
+ apply make_divimm_correct; auto. congruence.
- (* divu *)
assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divuimm_correct; auto.
+ apply make_divuimm_correct; auto. congruence.
- (* and 1 *)
rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto.
- (* and 2 *)
@@ -745,10 +794,10 @@ Proof.
InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto.
- (* divl *)
assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divlimm_correct; auto.
+ apply make_divlimm_correct; auto. congruence.
- (* divlu *)
assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto.
- apply make_divluimm_correct; auto.
+ apply make_divluimm_correct; auto. congruence.
- (* andl 1 *)
rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto.
- (* andl 2 *)
diff --git a/aarch64/Op.v b/aarch64/Op.v
index afc25aa6..f2a8e6fb 100644
--- a/aarch64/Op.v
+++ b/aarch64/Op.v
@@ -386,8 +386,8 @@ Definition eval_operation
| Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
| Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3))
| Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3))
- | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
- | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
+ | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2))
+ | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2))
| Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
| Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a))
| Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n))
@@ -408,7 +408,7 @@ Definition eval_operation
| Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2)
| Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2)
| Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2)
- | Oshrximm n, v1::nil => Val.shrx v1 (Vint n)
+ | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n)))
| Ozext s, v1 :: nil => Some (Val.zero_ext s v1)
| Osext s, v1 :: nil => Some (Val.sign_ext s v1)
| Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a))
@@ -435,8 +435,8 @@ Definition eval_operation
| Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3))
| Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
| Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
- | Odivl, v1 :: v2 :: nil => Val.divls v1 v2
- | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2
+ | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2))
+ | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2))
| Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2)
| Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a))
| Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n))
@@ -457,7 +457,7 @@ Definition eval_operation
| Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2)
| Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2)
| Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2)
- | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n)
+ | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n)))
| Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1)
| Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1)
| Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a))
@@ -481,22 +481,22 @@ Definition eval_operation
| Osingleoffloat, v1::nil => Some (Val.singleoffloat v1)
| Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1)
- | Ointoffloat, v1::nil => Val.intoffloat v1
- | Ointuoffloat, v1::nil => Val.intuoffloat v1
- | Ofloatofint, v1::nil => Val.floatofint v1
- | Ofloatofintu, v1::nil => Val.floatofintu v1
- | Ointofsingle, v1::nil => Val.intofsingle v1
- | Ointuofsingle, v1::nil => Val.intuofsingle v1
- | Osingleofint, v1::nil => Val.singleofint v1
- | Osingleofintu, v1::nil => Val.singleofintu v1
- | Olongoffloat, v1::nil => Val.longoffloat v1
- | Olonguoffloat, v1::nil => Val.longuoffloat v1
- | Ofloatoflong, v1::nil => Val.floatoflong v1
- | Ofloatoflongu, v1::nil => Val.floatoflongu v1
- | Olongofsingle, v1::nil => Val.longofsingle v1
- | Olonguofsingle, v1::nil => Val.longuofsingle v1
- | Osingleoflong, v1::nil => Val.singleoflong v1
- | Osingleoflongu, v1::nil => Val.singleoflongu v1
+ | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1))
+ | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1))
+ | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1))
+ | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1))
+ | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1))
+ | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1))
+ | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1))
+ | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1))
+ | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1))
+ | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1))
+ | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1))
+ | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1))
+ | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1))
+ | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1))
+ | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1))
+ | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1))
| Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m))
| Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty)
@@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0... destruct v1...
- apply type_add.
- apply type_sub.
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int.eq i0 Int.zero); inv H2...
+ - destruct v0; destruct v1; cbn in *; trivial.
+ destruct (_ || _); trivial...
+ - destruct v0; destruct v1; cbn in *; trivial.
+ destruct (Int.eq i0 Int.zero); constructor.
- destruct v0... destruct v1...
- destruct v0... destruct (eval_shift s v1 a)...
- destruct v0...
@@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
- - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0...
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 31)); cbn; trivial.
- destruct v0...
- destruct v0...
- destruct (Val.zero_ext s v0)... simpl; rewrite a32_range...
@@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- apply type_subl.
- destruct v0... destruct v1...
- destruct v0... destruct v1...
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
- - destruct v0; destruct v1; simpl in *; inv H0.
- destruct (Int64.eq i0 Int64.zero); inv H2...
+ - destruct v0; destruct v1; cbn; trivial.
+ destruct (_ || _); cbn; trivial.
+ - destruct v0; destruct v1; cbn; trivial.
+ destruct (Int64.eq i0 Int64.zero); cbn; trivial.
- destruct v0... destruct v1...
- destruct v0... destruct (eval_shiftl s v1 a)...
- destruct v0...
@@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
- destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')...
- - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0...
+ - destruct v0; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 63)); cbn; trivial.
- destruct v0...
- destruct v0...
- destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range...
@@ -893,29 +895,29 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type).
- destruct v0...
- destruct v0...
(* intoffloat, intuoffloat *)
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial.
(* floatofint, floatofintu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* intofsingle, intuofsingle *)
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial.
(* singleofint, singleofintu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* longoffloat, longuoffloat *)
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial.
(* floatoflong, floatoflongu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* longofsingle, longuofsingle *)
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2...
- - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2...
+ - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial.
+ - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial.
(* singleoflong, singleoflongu *)
- - destruct v0; simpl in H0; inv H0...
- - destruct v0; simpl in H0; inv H0...
+ - destruct v0; cbn; trivial.
+ - destruct v0; cbn; trivial.
(* cmp *)
- destruct (eval_condition cond vl m) as [[]|]...
- unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I.
@@ -924,16 +926,7 @@ Qed.
Definition is_trapping_op (op : operation) :=
match op with
- | Odiv | Odivu | Odivl | Odivlu
- | Oshrximm _ | Oshrlximm _
- | Ointoffloat | Ointuoffloat
- | Ointofsingle | Ointuofsingle
- | Ofloatofint | Ofloatofintu
- | Osingleofint | Osingleofintu
- | Olongoffloat | Olonguoffloat
- | Olongofsingle | Olonguofsingle
- | Ofloatoflong | Ofloatoflongu
- | Osingleoflong | Osingleoflongu => true
+ | Omove => false
| _ => false
end.
@@ -1209,6 +1202,20 @@ Proof.
rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto.
Qed.
+Lemma op_valid_pointer_eq:
+ forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2,
+ (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) ->
+ eval_operation ge sp op args m1 = eval_operation ge sp op args m2.
+Proof.
+ intros until m2. destruct op eqn:OP; simpl; try congruence.
+ - intros MEM; destruct cond; simpl; try congruence;
+ repeat (destruct args; simpl; try congruence);
+ erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+ - intro MEM; destruct cond; simpl; try congruence;
+ repeat (destruct args; simpl; try congruence);
+ erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto.
+Qed.
+
(** Global variables mentioned in an operation or addressing mode *)
Definition globals_addressing (addr: addressing) : list ident :=
@@ -1409,12 +1416,12 @@ Proof.
- apply Val.add_inject; auto. inv H2; inv H3; simpl; auto.
- apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto.
(* div, divu *)
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int.eq i0 Int.zero
- || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2.
- TrivialExists.
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int.eq i0 Int.zero); inv H2. TrivialExists.
+ - inv H4; inv H2; trivial. cbn.
+ destruct (_ || _); cbn;
+ constructor.
+ - inv H4; inv H2; trivial. cbn.
+ destruct (Int.eq i0 Int.zero); cbn;
+ constructor.
(* and*)
- inv H4; inv H2; simpl; auto.
- generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1446,8 +1453,8 @@ Proof.
(* shru *)
- inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
(* shrx *)
- - inv H4; simpl in H1; try discriminate. simpl.
- destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists.
+ - inv H4; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial.
(* shift-ext *)
- inv H4; simpl; auto.
- inv H4; simpl; auto.
@@ -1482,12 +1489,10 @@ Proof.
- inv H4; inv H2; simpl; auto.
- inv H4; inv H2; simpl; auto.
(* divl, divlu *)
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int64.eq i0 Int64.zero
- || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2.
- TrivialExists.
- - inv H4; inv H3; simpl in H1; inv H1. simpl.
- destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists.
+ - inv H4; inv H2; cbn; trivial.
+ destruct (_ || _); cbn; trivial.
+ - inv H4; inv H2; cbn; trivial.
+ destruct (Int64.eq i0 Int64.zero); cbn; trivial.
(* andl *)
- inv H4; inv H2; simpl; auto.
- generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto.
@@ -1519,8 +1524,8 @@ Proof.
(* shrlu *)
- inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto.
(* shrlx *)
- - inv H4; simpl in H1; try discriminate. simpl.
- destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists.
+ - inv H4; cbn; trivial.
+ destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial.
(* shift-ext *)
- inv H4; simpl; auto.
- inv H4; simpl; auto.
@@ -1551,37 +1556,29 @@ Proof.
- inv H4; simpl; auto.
- inv H4; simpl; auto.
(* intoffloat, intuoffloat *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
- exists (Vint i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
- exists (Vint i); auto.
+ - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial.
(* floatofint, floatofintu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* intofsingle, intuofsingle *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
- exists (Vint i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
- exists (Vint i); auto.
+ - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial.
(* singleofint, singleofintu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* longoffloat, longuoffloat *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
+ - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial.
(* floatoflong, floatoflongu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* longofsingle, longuofsingle *)
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
- - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2.
- exists (Vlong i); auto.
+ - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial.
+ - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial.
(* singleoflong, singleoflongu *)
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
- - inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+ - inv H4; cbn; trivial.
+ - inv H4; cbn; trivial.
(* cmp, sel *)
- subst v1. destruct (eval_condition cond vl1 m1) eqn:?.
exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml
new file mode 100644
index 00000000..5cdd002c
--- /dev/null
+++ b/aarch64/OpWeights.ml
@@ -0,0 +1,353 @@
+open Op;;
+open PrepassSchedulingOracleDeps;;
+
+module Cortex_A53=
+ struct
+ let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *)
+ let nr_non_pipelined_units = 1;;
+
+ let latency_of_op (op : operation) (nargs : int) =
+ match op with
+ | Omove
+ | Ointconst _
+ | Olongconst _
+ | Ofloatconst _
+ | Osingleconst _
+ | Oaddrsymbol _
+ | Oaddrstack _ -> 1
+ | Oshift _ -> 2
+ | Oadd -> 1
+ | Oaddshift _ -> 2
+ | Oaddimm _
+ | Oneg -> 1
+ | Onegshift _ -> 2
+ | Osub -> 1
+ | Osubshift _ -> 2
+ | Omul
+ | Omuladd
+ | Omulsub -> 4
+ | Odiv
+ | Odivu -> 29
+ | Oand -> 1
+ | Oandshift _ -> 2
+ | Oandimm _ -> 1
+ | Oor -> 1
+ | Oorshift _ -> 2
+ | Oorimm _ -> 1
+ | Oxor -> 1
+ | Oxorshift _ -> 2
+ | Oxorimm _ -> 1
+ | Onot -> 1
+ | Onotshift _ -> 2
+ | Obic -> 1
+ | Obicshift _ -> 2
+ | Oorn -> 1
+ | Oornshift _ -> 2
+ | Oeqv -> 1
+ | Oeqvshift _ -> 2
+ | Oshl
+ | Oshr
+ | Oshru -> 2
+ | Oshrximm _ -> 6
+ | Ozext _
+ | Osext _ -> 1
+ | Oshlzext _
+ | Oshlsext _
+ | Ozextshr _
+ | Osextshr _ -> 2
+
+ (* 64-bit integer arithmetic *)
+ | Oshiftl _ -> 2
+ | Oextend _ -> 1
+ | Omakelong
+ | Olowlong
+ | Ohighlong
+ | Oaddl -> 1
+ | Oaddlshift _
+ | Oaddlext _ -> 2
+ | Oaddlimm _
+ | Onegl -> 1
+ | Oneglshift _ -> 2
+ | Osubl -> 1
+ | Osublshift _
+ | Osublext _ -> 2
+ | Omull
+ | Omulladd
+ | Omullsub
+ | Omullhs
+ | Omullhu -> 4
+ | Odivl -> 50
+ | Odivlu -> 50
+ | Oandl -> 1
+ | Oandlshift _ -> 2
+ | Oandlimm _
+ | Oorl -> 1
+ | Oorlshift _ -> 2
+ | Oorlimm _
+ | Oxorl -> 1
+ | Oxorlshift _ -> 2
+ | Oxorlimm _
+ | Onotl -> 1
+ | Onotlshift _ -> 2
+ | Obicl -> 1
+ | Obiclshift _ -> 2
+ | Oornl -> 1
+ | Oornlshift _ -> 2
+ | Oeqvl -> 1
+ | Oeqvlshift _ -> 2
+ | Oshll
+ | Oshrl
+ | Oshrlu -> 2
+ | Oshrlximm _ -> 6
+ | Ozextl _
+ | Osextl _ -> 1
+ | Oshllzext _
+ | Oshllsext _
+ | Ozextshrl _
+ | Osextshrl _ -> 2
+
+ (* 64-bit floating-point arithmetic *)
+ | Onegf (* r [rd = - r1] *)
+ | Oabsf (* r [rd = abs(r1)] *)
+ | Oaddf (* r [rd = r1 + r2] *)
+ | Osubf (* r [rd = r1 - r2] *)
+ | Omulf (* r [rd = r1 * r2] *)
+(* 32-bit floating-point arithmetic *)
+ | Onegfs (* r [rd = - r1] *)
+ | Oabsfs (* r [rd = abs(r1)] *)
+ | Oaddfs (* r [rd = r1 + r2] *)
+ | Osubfs (* r [rd = r1 - r2] *)
+ | Omulfs (* r [rd = r1 * r2] *)
+ | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *)
+ | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *)
+(* Conversions between int and float *)
+ | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *)
+ | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *)
+ | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *)
+ | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *)
+ | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *)
+ | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *)
+ | Osingleofint (* r [rd = float32_of_signed_int(r1)] *)
+ | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *)
+ | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *)
+ | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *)
+ | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *)
+ | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *)
+ | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *)
+ | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *)
+ | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *)
+ | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *)
+ -> 6
+ | Odivf -> 50 (* r [rd = r1 / r2] *)
+ | Odivfs -> 20
+ (* Boolean tests *)
+ | Ocmp cmp | Osel (cmp, _) ->
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6
+ | _ -> 1);;
+
+ let resources_of_op (op : operation) (nargs : int) =
+ match op with
+ | Omove
+ | Ointconst _
+ | Olongconst _
+ | Ofloatconst _
+ | Osingleconst _
+ | Oaddrsymbol _
+ | Oaddrstack _
+ (* 32-bit integer arithmetic *)
+ | Oshift _
+ | Oadd
+ | Oaddshift _
+ | Oaddimm _
+ | Oneg
+ | Onegshift _
+ | Osub
+ | Osubshift _ -> [| 1 ; 1; 0; 0 |]
+ | Omul
+ | Omuladd
+ | Omulsub -> [| 1; 1; 1; 0 |]
+ | Odiv
+ | Odivu -> [| 1; 0; 0; 0 |]
+ | Oand
+ | Oandshift _
+ | Oandimm _
+ | Oor
+ | Oorshift _
+ | Oorimm _
+ | Oxor
+ | Oxorshift _
+ | Oxorimm _
+ | Onot
+ | Onotshift _
+ | Obic
+ | Obicshift _
+ | Oorn
+ | Oornshift _
+ | Oeqv
+ | Oeqvshift _
+ | Oshl
+ | Oshr
+ | Oshru
+ | Oshrximm _
+ | Ozext _
+ | Osext _
+ | Oshlzext _
+ | Oshlsext _
+ | Ozextshr _
+ | Osextshr _
+
+(* 64-bit integer arithmetic *)
+ | Oshiftl _
+ | Oextend _
+ | Omakelong
+ | Olowlong
+ | Ohighlong
+ | Oaddl
+ | Oaddlshift _
+ | Oaddlext _
+ | Oaddlimm _
+ | Onegl
+ | Oneglshift _
+ | Osubl
+ | Osublshift _
+ | Osublext _ -> [| 1 ; 1 ; 0; 0 |]
+ | Omull
+ | Omulladd
+ | Omullsub
+ | Omullhs
+ | Omullhu -> [| 1 ; 1 ; 1; 0 |]
+ | Odivl
+ | Odivlu -> [| 1; 0; 0; 0 |]
+ | Oandl
+ | Oandlshift _
+ | Oandlimm _
+ | Oorl
+ | Oorlshift _
+ | Oorlimm _
+ | Oxorl
+ | Oxorlshift _
+ | Oxorlimm _
+ | Onotl
+ | Onotlshift _
+ | Obicl
+ | Obiclshift _
+ | Oornl
+ | Oornlshift _
+ | Oeqvl
+ | Oeqvlshift _
+ | Oshll
+ | Oshrl
+ | Oshrlu
+ | Oshrlximm _
+ | Ozextl _
+ | Osextl _
+ | Oshllzext _
+ | Oshllsext _
+ | Ozextshrl _
+ | Osextshrl _ -> [| 1; 1; 0; 0 |]
+ (* 64-bit floating-point arithmetic *)
+ | Onegf (* r [rd = - r1] *)
+ | Oabsf (* r [rd = abs(r1)] *)
+ | Oaddf (* r [rd = r1 + r2] *)
+ | Osubf (* r [rd = r1 - r2] *)
+ | Omulf (* r [rd = r1 * r2] *)
+ | Odivf
+ (* 32-bit floating-point arithmetic *)
+ | Onegfs (* r [rd = - r1] *)
+ | Oabsfs (* r [rd = abs(r1)] *)
+ | Oaddfs (* r [rd = r1 + r2] *)
+ | Osubfs (* r [rd = r1 - r2] *)
+ | Omulfs (* r [rd = r1 * r2] *)
+ | Odivfs (* r [rd = r1 / r2] *)
+ | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *)
+ | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *)
+(* Conversions between int and float *)
+ | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *)
+ | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *)
+ | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *)
+ | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *)
+ | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *)
+ | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *)
+ | Osingleofint (* r [rd = float32_of_signed_int(r1)] *)
+ | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *)
+ | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *)
+ | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *)
+ | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *)
+ | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *)
+ | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *)
+ | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *)
+ | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *)
+ | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *)
+ -> [| 1 ; 1; 1; 0 |]
+
+(* Boolean tests *)
+ | Ocmp cmp | Osel (cmp, _) ->
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) ->
+ [| 1; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |] );;
+
+ let non_pipelined_resources_of_op (op : operation) (nargs : int) =
+ match op with
+ | Odiv | Odivu -> [| 29 |]
+ | Odivfs -> [| 20 |]
+ | Odivl | Odivlu | Odivf -> [| 50 |]
+ | _ -> [| -1 |];;
+
+ let resources_of_cond (cmp : condition) (nargs : int) =
+ (match cmp with
+ | Ccompf _ (* r FP comparison *)
+ | Cnotcompf _ (* r negation of an FP comparison *)
+ | Ccompfzero _ (* r comparison with 0.0 *)
+ | Cnotcompfzero _ (* r negation of comparison with 0.0 *)
+ | Ccompfs _ (* r FP comparison *)
+ | Cnotcompfs _ (* r negation of an FP comparison *)
+ | Ccompfszero _ (* r equal to 0.0 *)
+ | Cnotcompfszero _ (* r not equal to 0.0 *) ->
+ [| 1; 1; 1; 0 |]
+ | _ -> [| 1; 1; 0; 0 |] );;
+
+ let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;;
+ let latency_of_call _ _ = 6;;
+
+ let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];;
+
+ let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];;
+
+ let resources_of_call _ _ = resource_bounds;;
+ let resources_of_builtin _ = resource_bounds;;
+ end;;
+
+let get_opweights () : opweights =
+ match !Clflags.option_mtune with
+ | "cortex-a53" | "cortex-a35" | "" ->
+ {
+ pipelined_resource_bounds = Cortex_A53.resource_bounds;
+ nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units;
+ latency_of_op = Cortex_A53.latency_of_op;
+ resources_of_op = Cortex_A53.resources_of_op;
+ non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op;
+ latency_of_load = Cortex_A53.latency_of_load;
+ resources_of_load = Cortex_A53.resources_of_load;
+ resources_of_store = Cortex_A53.resources_of_store;
+ resources_of_cond = Cortex_A53.resources_of_cond;
+ latency_of_call = Cortex_A53.latency_of_call;
+ resources_of_call = Cortex_A53.resources_of_call;
+ resources_of_builtin = Cortex_A53.resources_of_builtin
+ }
+ | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);;
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
new file mode 100644
index 00000000..2c3eb14f
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -0,0 +1,477 @@
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+
+let use_alias_analysis () = false
+
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+ let last_reg_reads : int list PTree.t ref = ref PTree.empty
+ and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+ and last_mem_reads : int list ref = ref []
+ and last_mem_write : int option ref = ref None
+ and last_branch : int option ref = ref None
+ and last_non_pipelined_op : int array = Array.make
+ opweights.nr_non_pipelined_units ( -1 )
+ and latency_constraints : latency_constraint list ref = ref [] in
+ let add_constraint instr_from instr_to latency =
+ assert (instr_from <= instr_to);
+ assert (latency >= 0);
+ if instr_from = instr_to
+ then (if latency = 0
+ then ()
+ else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+ else
+ latency_constraints :=
+ { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !latency_constraints
+ and get_last_reads reg =
+ match PTree.get reg !last_reg_reads
+ with Some l -> l
+ | None -> [] in
+ let add_input_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_output_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Write after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_write := Some i;
+ last_mem_reads := []
+ end
+ and add_input_reg i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_output_reg i latency reg =
+ begin
+ (* Write after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1
+ end;
+ begin
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+ end;
+ last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+ last_reg_reads := PTree.remove reg !last_reg_reads
+ in
+ let add_input_regs i regs = List.iter (add_input_reg i) regs in
+ let rec add_builtin_res i (res : reg builtin_res) =
+ match res with
+ | BR r -> add_output_reg i 10 r
+ | BR_none -> ()
+ | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+ add_builtin_res i lo in
+ let rec add_builtin_arg i (ba : reg builtin_arg) =
+ match ba with
+ | BA r -> add_input_reg i r
+ | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+ | BA_loadstack(_,_) -> add_input_mem i
+ | BA_addrstack _ -> ()
+ | BA_loadglobal(_, _, _) -> add_input_mem i
+ | BA_addrglobal _ -> ()
+ | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+ add_builtin_arg i a2 in
+ let irreversible_action i =
+ match !last_branch with
+ | None -> ()
+ | Some j -> add_constraint j i 1 in
+ let set_branch i =
+ irreversible_action i;
+ last_branch := Some i in
+ let add_non_pipelined_resources i resources =
+ Array.iter2
+ (fun latency last ->
+ if latency >= 0 && last >= 0 then add_constraint last i latency)
+ resources last_non_pipelined_op;
+ Array.iteri (fun rsc latency ->
+ if latency >= 0
+ then last_non_pipelined_op.(rsc) <- i) resources
+ in
+ Array.iteri
+ begin
+ fun i (insn, other_uses) ->
+ List.iter (fun use ->
+ add_input_reg i use)
+ (Regset.elements other_uses);
+
+ match insn with
+ | Inop _ -> ()
+ | Iop(op, inputs, output, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length inputs));
+ (if Op.is_trapping_op op then irreversible_action i);
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ (if trap=TRAP then irreversible_action i);
+ add_input_mem i;
+ add_input_regs i addr_regs;
+ add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ irreversible_action i;
+ add_input_regs i addr_regs;
+ add_input_reg i input;
+ add_output_mem i
+ | Icall(signature, ef, inputs, output, _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_call signature ef) output;
+ add_output_mem i;
+ failwith "Icall"
+ | Itailcall(signature, ef, inputs) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ failwith "Itailcall"
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) builtin_inputs;
+ add_builtin_res i builtin_output;
+ add_output_mem i;
+ failwith "Ibuiltin"
+ | Icond(cond, inputs, _, _, _) ->
+ set_branch i;
+ add_input_mem i;
+ add_input_regs i inputs
+ | Ijumptable(input, _) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ijumptable"
+ | Ireturn(Some input) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ireturn"
+ | Ireturn(None) ->
+ set_branch i;
+ failwith "Ireturn none"
+ end seqa;
+ !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+ | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+ | Iop(op, inputs, output, _) ->
+ opweights.resources_of_op op (List.length inputs)
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ opweights.resources_of_store chunk addressing (List.length addr_regs)
+ | Icall(signature, ef, inputs, output, _) ->
+ opweights.resources_of_call signature ef
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ opweights.resources_of_builtin ef
+ | Icond(cond, args, _, _ , _) ->
+ opweights.resources_of_cond cond (List.length args)
+ | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+
+let print_sequence pp (seqa : instruction array) =
+ Array.iteri (
+ fun i (insn : instruction) ->
+ PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+
+type 'a symbolic_term_node =
+ | STop of Op.operation * 'a list
+ | STinitial_reg of int
+ | STother of int;;
+
+type symbolic_term = {
+ hash_id : unique_id;
+ hash_ct : symbolic_term symbolic_term_node
+ };;
+
+let rec print_term channel term =
+ match term.hash_ct with
+ | STop(op, args) ->
+ PrintOp.print_operation print_term channel (op, args)
+ | STinitial_reg n -> Printf.fprintf channel "x%d" n
+ | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+ st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+ mutable st_next_id : unique_id };;
+
+let hash_init () = {
+ st_table = Hashtbl.create 20;
+ st_next_id = 0
+ };;
+
+let ground_to_id = function
+ | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+ | STinitial_reg r -> STinitial_reg r
+ | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+ let grounded = ground_to_id term in
+ match Hashtbl.find_opt table.st_table grounded with
+ | Some x -> x
+ | None ->
+ let term' = { hash_id = table.st_next_id;
+ hash_ct = term } in
+ (if table.st_next_id = max_int then failwith "hash: max_int");
+ table.st_next_id <- table.st_next_id + 1;
+ Hashtbl.add table.st_table grounded term';
+ term';;
+
+type access = {
+ base : symbolic_term;
+ offset : int64;
+ length : int
+ };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+ match addressing, args with
+ | (Op.Aindexed ofs), [reg] -> Some
+ { base = get_reg reg;
+ offset = Camlcoq.camlint64_of_ptrofs ofs;
+ length = length_of_chunk chunk
+ }
+ | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+ let regs = ref PTree.empty
+ and table = hash_init() in
+ let assign reg term = regs := PTree.set reg term !regs
+ and hash term = hash_node table term in
+ let get_reg reg =
+ match PTree.get reg !regs with
+ | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+ | Some x -> x in
+ let targets = Array.make (Array.length seqa) None in
+ Array.iteri
+ begin
+ fun i insn ->
+ match insn with
+ | Iop(Op.Omove, [input], output, _) ->
+ assign output (get_reg input)
+ | Iop(op, inputs, output, _) ->
+ assign output (hash (STop(op, List.map get_reg inputs)))
+
+ | Iload(trap, chunk, addressing, args, output, _) ->
+ let access = access_of_addressing get_reg chunk addressing args in
+ targets.(i) <- access;
+ assign output (hash (STother(i)))
+
+ | Icall(_, _, _, output, _)
+ | Ibuiltin(_, _, BR output, _) ->
+ assign output (hash (STother(i)))
+
+ | Istore(chunk, addressing, args, va, _) ->
+ let access = access_of_addressing get_reg chunk addressing args in
+ targets.(i) <- access
+
+ | Inop _ -> ()
+ | Ibuiltin(_, _, BR_none, _) -> ()
+ | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+ | Itailcall (_, _, _)
+ |Icond (_, _, _, _, _)
+ |Ijumptable (_, _)
+ |Ireturn _ -> ()
+ end seqa;
+ targets;;
+
+let print_access channel = function
+ | None -> Printf.fprintf channel "any"
+ | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+ let targets = symbolic_execution seqa in
+ Array.iteri
+ (fun i insn ->
+ match insn with
+ | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+ i print_access targets.(i)
+ | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+ i print_access targets.(i)
+ | _ -> ()
+ ) seqa;;
+
+let may_overlap a0 b0 =
+ match a0, b0 with
+ | (None, _) | (_ , None) -> true
+ | (Some a), (Some b) ->
+ if term_equal a.base b.base
+ then (max a.offset b.offset) <
+ (min (Int64.add (Int64.of_int a.length) a.offset)
+ (Int64.add (Int64.of_int b.length) b.offset))
+ else match a.base.hash_ct, b.base.hash_ct with
+ | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+ STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+ (ida=idb) &&
+ let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+ and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+ (max ao bo) <
+ (min (Int64.add (Int64.of_int a.length) ao)
+ (Int64.add (Int64.of_int b.length) bo))
+ | STop(Op.Oaddrstack _, []),
+ STop(Op.Oaddrsymbol _, [])
+ | STop(Op.Oaddrsymbol _, []),
+ STop(Op.Oaddrstack _, []) -> false
+ | STop(Op.Oaddrstack(ofsa),[]),
+ STop(Op.Oaddrstack(ofsb),[]) ->
+ let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+ and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+ (max ao bo) <
+ (min (Int64.add (Int64.of_int a.length) ao)
+ (Int64.add (Int64.of_int b.length) bo))
+ | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+ let targets = symbolic_execution seqa
+ and deps = ref [] in
+ let add_constraint instr_from instr_to latency =
+ deps := { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !deps in
+ for i=0 to (Array.length seqa)-1
+ do
+ for j=0 to i-1
+ do
+ match seqa.(j), seqa.(i) with
+ | (Istore _), ((Iload _) | (Istore _)) ->
+ if may_overlap targets.(j) targets.(i)
+ then add_constraint j i 1
+ | (Iload _), (Istore _) ->
+ if may_overlap targets.(j) targets.(i)
+ then add_constraint j i 0
+ | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+ | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+ add_constraint j i 1
+ | (Inop _ | Iop _), _
+ | _, (Inop _ | Iop _)
+ | (Iload _), (Iload _) -> ()
+ done
+ done;
+ !deps;;
+ *)
+
+let define_problem (opweights : opweights) seqa =
+ let simple_deps = get_simple_dependencies opweights seqa in
+ { max_latency = -1;
+ resource_bounds = opweights.pipelined_resource_bounds;
+ instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+ latency_constraints =
+ (* if (use_alias_analysis ())
+ then (get_alias_dependencies seqa) @ simple_deps
+ else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+ let nr_instructions = get_nr_instructions problem in
+ assert(nr_instructions = (Array.length early_ones));
+ match list_scheduler problem with
+ | Some fwd_schedule ->
+ let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+ let constraints' = ref problem.latency_constraints in
+ Array.iteri (fun i is_early ->
+ if is_early then
+ constraints' := {
+ instr_from = i;
+ instr_to = nr_instructions ;
+ latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+ early_ones;
+ validated_scheduler reverse_list_scheduler
+ { problem with latency_constraints = !constraints' }
+ | None -> None;;
+
+let prepass_scheduler_by_name name problem early_ones =
+ match name with
+ | "zigzag" -> zigzag_scheduler problem early_ones
+ | _ -> scheduler_by_name name problem
+
+let schedule_sequence (seqa : (instruction*Regset.t) array) =
+ let opweights = OpWeights.get_opweights () in
+ try
+ if (Array.length seqa) <= 1
+ then None
+ else
+ begin
+ let nr_instructions = Array.length seqa in
+ (if !Clflags.option_debug_compcert > 6
+ then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+ let problem = define_problem opweights seqa in
+ (if !Clflags.option_debug_compcert > 7
+ then (print_sequence stdout (Array.map fst seqa);
+ print_problem stdout problem));
+ match prepass_scheduler_by_name
+ (!Clflags.option_fprepass_sched)
+ problem
+ (Array.map (fun (ins, _) ->
+ match ins with
+ | Icond _ -> true
+ | _ -> false) seqa) with
+ | None -> Printf.printf "no solution in prepass scheduling\n";
+ None
+ | Some solution ->
+ let positions = Array.init nr_instructions (fun i -> i) in
+ Array.sort (fun i j ->
+ let si = solution.(i) and sj = solution.(j) in
+ if si < sj then -1
+ else if si > sj then 1
+ else i - j) positions;
+ Some positions
+ end
+ with (Failure s) ->
+ Printf.printf "failure in prepass scheduling: %s\n" s;
+ None;;
+
diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml
new file mode 100644
index 00000000..8d10d406
--- /dev/null
+++ b/aarch64/PrepassSchedulingOracleDeps.ml
@@ -0,0 +1,17 @@
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+
+type opweights =
+ {
+ pipelined_resource_bounds : int array;
+ nr_non_pipelined_units : int;
+ latency_of_op : Op.operation -> int -> int;
+ resources_of_op : Op.operation -> int -> int array;
+ non_pipelined_resources_of_op : Op.operation -> int -> int array;
+ latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+ resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_cond : Op.condition -> int -> int array;
+ latency_of_call : AST.signature -> called_function -> int;
+ resources_of_call : AST.signature -> called_function -> int array;
+ resources_of_builtin : AST.external_function -> int array
+ };;
diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v
index 60dc1a12..513ee9bd 100644
--- a/aarch64/SelectLongproof.v
+++ b/aarch64/SelectLongproof.v
@@ -559,25 +559,29 @@ Qed.
Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls.
Proof.
red; intros; unfold divls_base; TrivialExists.
+ cbn. rewrite H1. reflexivity.
Qed.
Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls.
Proof.
red; intros; unfold modls_base, modl_aux.
exploit Val.modls_divls; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ rewrite A. reflexivity.
Qed.
Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu.
Proof.
red; intros; unfold divlu_base; TrivialExists.
+ cbn. rewrite H1. reflexivity.
Qed.
Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu.
Proof.
red; intros; unfold modlu_base, modl_aux.
exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ rewrite A. reflexivity.
Qed.
Theorem eval_shrxlimm:
@@ -592,7 +596,7 @@ Proof.
destruct x; simpl in H0; try discriminate.
change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0.
rewrite Int64.shrx'_zero. auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
(** General shifts *)
@@ -726,42 +730,42 @@ Qed.
Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu.
Proof.
- red; intros; TrivialExists.
+ red; intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
End CMCONSTR.
diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v
index 3379cbd8..9ce7a8bf 100644
--- a/aarch64/SelectOpproof.v
+++ b/aarch64/SelectOpproof.v
@@ -666,7 +666,8 @@ Theorem eval_divs_base:
Val.divs x y = Some z ->
exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
Proof.
- intros; unfold divs_base; TrivialExists.
+ intros; unfold divs_base; TrivialExists; cbn.
+ rewrite H1. reflexivity.
Qed.
Theorem eval_mods_base:
@@ -678,7 +679,8 @@ Theorem eval_mods_base:
Proof.
intros; unfold mods_base, mod_aux.
exploit Val.mods_divs; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ cbn. rewrite A. reflexivity.
Qed.
Theorem eval_divu_base:
@@ -689,6 +691,7 @@ Theorem eval_divu_base:
exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
Proof.
intros; unfold divu_base; TrivialExists.
+ cbn. rewrite H1. reflexivity.
Qed.
Theorem eval_modu_base:
@@ -700,7 +703,8 @@ Theorem eval_modu_base:
Proof.
intros; unfold modu_base, mod_aux.
exploit Val.modu_divu; eauto. intros (q & A & B). subst z.
- TrivialExists. repeat (econstructor; eauto with evalexpr). exact A.
+ TrivialExists. repeat (econstructor; eauto with evalexpr).
+ rewrite A. reflexivity.
Qed.
Theorem eval_shrximm:
@@ -715,7 +719,7 @@ Proof.
destruct x; simpl in H0; try discriminate.
change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0.
rewrite Int.shrx_zero by (compute; auto). auto.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
(** General shifts *)
@@ -928,7 +932,7 @@ Theorem eval_intoffloat:
Val.intoffloat x = Some y ->
exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatofint:
@@ -939,7 +943,7 @@ Theorem eval_floatofint:
Proof.
intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_intuoffloat:
@@ -948,7 +952,7 @@ Theorem eval_intuoffloat:
Val.intuoffloat x = Some y ->
exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_floatofintu:
@@ -959,7 +963,7 @@ Theorem eval_floatofintu:
Proof.
intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_intofsingle:
@@ -968,7 +972,7 @@ Theorem eval_intofsingle:
Val.intofsingle x = Some y ->
exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleofint:
@@ -979,7 +983,7 @@ Theorem eval_singleofint:
Proof.
intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_intuofsingle:
@@ -988,7 +992,7 @@ Theorem eval_intuofsingle:
Val.intuofsingle x = Some y ->
exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
Proof.
- intros; TrivialExists.
+ intros; TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
Theorem eval_singleofintu:
@@ -999,7 +1003,7 @@ Theorem eval_singleofintu:
Proof.
intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval.
- TrivialExists.
-- TrivialExists.
+- TrivialExists. cbn. rewrite H0. reflexivity.
Qed.
(** Selection *)
diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v
index e0d98c85..e6a60d4e 100644
--- a/aarch64/ValueAOp.v
+++ b/aarch64/ValueAOp.v
@@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Omul, v1::v2::nil => mul v1 v2
| Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3)
| Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3)
- | Odiv, v1::v2::nil => divs v1 v2
- | Odivu, v1::v2::nil => divu v1 v2
+ | Odiv, v1::v2::nil => divs_total v1 v2
+ | Odivu, v1::v2::nil => divu_total v1 v2
| Oand, v1::v2::nil => and v1 v2
| Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a)
| Oandimm n, v1::nil => and v1 (I n)
@@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3)
| Omullhs, v1::v2::nil => mullhs v1 v2
| Omullhu, v1::v2::nil => mullhu v1 v2
- | Odivl, v1::v2::nil => divls v1 v2
- | Odivlu, v1::v2::nil => divlu v1 v2
+ | Odivl, v1::v2::nil => divls_total v1 v2
+ | Odivlu, v1::v2::nil => divlu_total v1 v2
| Oandl, v1::v2::nil => andl v1 v2
| Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a)
| Oandlimm n, v1::nil => andl v1 (L n)
@@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
| Osingleoffloat, v1::nil => singleoffloat v1
| Ofloatofsingle, v1::nil => floatofsingle v1
- | Ointoffloat, v1::nil => intoffloat v1
- | Ointuoffloat, v1::nil => intuoffloat v1
+ | Ointoffloat, v1::nil => intoffloat_total v1
+ | Ointuoffloat, v1::nil => intuoffloat_total v1
| Ofloatofint, v1::nil => floatofint v1
| Ofloatofintu, v1::nil => floatofintu v1
- | Ointofsingle, v1::nil => intofsingle v1
- | Ointuofsingle, v1::nil => intuofsingle v1
+ | Ointofsingle, v1::nil => intofsingle_total v1
+ | Ointuofsingle, v1::nil => intuofsingle_total v1
| Osingleofint, v1::nil => singleofint v1
| Osingleofintu, v1::nil => singleofintu v1
- | Olongoffloat, v1::nil => longoffloat v1
- | Olonguoffloat, v1::nil => longuoffloat v1
+ | Olongoffloat, v1::nil => longoffloat_total v1
+ | Olonguoffloat, v1::nil => longuoffloat_total v1
| Ofloatoflong, v1::nil => floatoflong v1
| Ofloatoflongu, v1::nil => floatoflongu v1
- | Olongofsingle, v1::nil => longofsingle v1
- | Olonguofsingle, v1::nil => longuofsingle v1
+ | Olongofsingle, v1::nil => longofsingle_total v1
+ | Olonguofsingle, v1::nil => longuofsingle_total v1
| Osingleoflong, v1::nil => singleoflong v1
| Osingleoflongu, v1::nil => singleoflongu v1