From 932f7f853fdad81c2bdc9db42ed87d106db5762f Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 16 Sep 2020 10:17:27 +0200 Subject: first opweights, bogus weights --- aarch64/OpWeights.ml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 aarch64/OpWeights.ml (limited to 'aarch64') diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml new file mode 100644 index 00000000..3662ef1c --- /dev/null +++ b/aarch64/OpWeights.ml @@ -0,0 +1,19 @@ +open Op;; +let resource_bounds = [| 1 |];; + + +let latency_of_op (op : operation) (nargs : int) = 1;; + +let resources_of_op (op : operation) (nargs : int) = [| 1 |];; + +let resources_of_cond (cond : condition) (nargs : int) = [| 1 |];; + +let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; +let latency_of_call _ _ = 6;; + +let resources_of_load trap chunk addressing nargs = [| 1 |];; + +let resources_of_store chunk addressing nargs = [| 1 |];; + +let resources_of_call _ _ = resource_bounds;; +let resources_of_builtin _ = resource_bounds;; -- cgit From d01887944e21d33c869b6b2ffc2e1dc6c5a701cd Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Tue, 29 Sep 2020 18:53:58 +0200 Subject: attempt at latencies for Cortex A53 --- aarch64/OpWeights.ml | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 2 deletions(-) (limited to 'aarch64') diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml index 3662ef1c..471161b7 100644 --- a/aarch64/OpWeights.ml +++ b/aarch64/OpWeights.ml @@ -1,8 +1,153 @@ open Op;; let resource_bounds = [| 1 |];; +let nr_non_pipelined_units = 1;; - -let latency_of_op (op : operation) (nargs : int) = 1;; +let latency_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ -> 1 + | Oshift _ -> 2 + | Oadd -> 1 + | Oaddshift _ -> 2 + | Oaddimm _ + | Oneg -> 1 + | Onegshift _ -> 2 + | Osub -> 1 + | Osubshift _ -> 2 + | Omul + | Omuladd + | Omulsub -> 4 + | Odiv + | Odivu -> 29 + | Oand -> 1 + | Oandshift _ -> 2 + | Oandimm _ -> 1 + | Oor -> 1 + | Oorshift _ -> 2 + | Oorimm _ -> 1 + | Oxor -> 1 + | Oxorshift _ -> 2 + | Oxorimm _ -> 1 + | Onot -> 1 + | Onotshift _ -> 2 + | Obic -> 1 + | Obicshift _ -> 2 + | Oorn -> 1 + | Oornshift _ -> 2 + | Oeqv -> 1 + | Oeqvshift _ -> 2 + | Oshl + | Oshr + | Oshru -> 2 + | Oshrximm _ -> 6 + | Ozext _ + | Osext _ -> 1 + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ -> 2 + +(* 64-bit integer arithmetic *) + | Oshiftl _ -> 2 + | Oextend _ -> 1 + | Omakelong + | Olowlong + | Ohighlong + | Oaddl -> 1 + | Oaddlshift _ + | Oaddlext _ -> 2 + | Oaddlimm _ + | Onegl -> 1 + | Oneglshift _ -> 2 + | Osubl -> 1 + | Osublshift _ + | Osublext _ -> 2 + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> 4 + | Odivl -> 50 + | Odivlu -> 50 + | Oandl -> 1 + | Oandlshift _ -> 2 + | Oandlimm _ + | Oorl -> 1 + | Oorlshift _ -> 2 + | Oorlimm _ + | Oxorl -> 1 + | Oxorlshift _ -> 2 + | Oxorlimm _ + | Onotl -> 1 + | Onotlshift _ -> 2 + | Obicl -> 1 + | Obiclshift _ -> 2 + | Oornl -> 1 + | Oornlshift _ -> 2 + | Oeqvl -> 1 + | Oeqvlshift _ -> 2 + | Oshll + | Oshrl + | Oshrlu -> 2 + | Oshrlximm _ -> 6 + | Ozextl _ + | Osextl _ -> 1 + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> 2 + +(* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) + | Odivf (* r [rd = r1 / r2] *) +(* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Odivfs (* r [rd = r1 / r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> 6 + (* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6 + | _ -> 1);; let resources_of_op (op : operation) (nargs : int) = [| 1 |];; -- cgit From 1f0c8cf821d310e405f3ad6327870fe3321ad6e6 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Tue, 29 Sep 2020 19:19:02 +0200 Subject: try to model resources --- aarch64/OpWeights.ml | 169 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 164 insertions(+), 5 deletions(-) (limited to 'aarch64') diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml index 471161b7..d19f34af 100644 --- a/aarch64/OpWeights.ml +++ b/aarch64/OpWeights.ml @@ -1,5 +1,5 @@ open Op;; -let resource_bounds = [| 1 |];; +let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *) let nr_non_pipelined_units = 1;; let latency_of_op (op : operation) (nargs : int) = @@ -149,16 +149,175 @@ let latency_of_op (op : operation) (nargs : int) = | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6 | _ -> 1);; -let resources_of_op (op : operation) (nargs : int) = [| 1 |];; +let resources_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ +(* 32-bit integer arithmetic *) + | Oshift _ + | Oadd + | Oaddshift _ + | Oaddimm _ + | Oneg + | Onegshift _ + | Osub + | Osubshift _ -> [| 1 ; 1; 0; 0 |] + | Omul + | Omuladd + | Omulsub -> [| 1; 1; 1; 0 |] + | Odiv + | Odivu -> [| 1; 0; 0; 0 |] + | Oand + | Oandshift _ + | Oandimm _ + | Oor + | Oorshift _ + | Oorimm _ + | Oxor + | Oxorshift _ + | Oxorimm _ + | Onot + | Onotshift _ + | Obic + | Obicshift _ + | Oorn + | Oornshift _ + | Oeqv + | Oeqvshift _ + | Oshl + | Oshr + | Oshru + | Oshrximm _ + | Ozext _ + | Osext _ + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ + +(* 64-bit integer arithmetic *) + | Oshiftl _ + | Oextend _ + | Omakelong + | Olowlong + | Ohighlong + | Oaddl + | Oaddlshift _ + | Oaddlext _ + | Oaddlimm _ + | Onegl + | Oneglshift _ + | Osubl + | Osublshift _ + | Osublext _ -> [| 1 ; 1 ; 0; 0 |] + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> [| 1 ; 1 ; 1; 0 |] + | Odivl + | Odivlu -> [| 1; 0; 0; 0 |] + | Oandl + | Oandlshift _ + | Oandlimm _ + | Oorl + | Oorlshift _ + | Oorlimm _ + | Oxorl + | Oxorlshift _ + | Oxorlimm _ + | Onotl + | Onotlshift _ + | Obicl + | Obiclshift _ + | Oornl + | Oornlshift _ + | Oeqvl + | Oeqvlshift _ + | Oshll + | Oshrl + | Oshrlu + | Oshrlximm _ + | Ozextl _ + | Osextl _ + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> [| 1; 1; 0; 0 |] +(* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) + | Odivf (* r [rd = r1 / r2] *) +(* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Odivfs (* r [rd = r1 / r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> [| 1 ; 1; 1; 0 |] + +(* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + -let resources_of_cond (cond : condition) (nargs : int) = [| 1 |];; +let resources_of_cond (cmp : condition) (nargs : int) = + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; let latency_of_call _ _ = 6;; -let resources_of_load trap chunk addressing nargs = [| 1 |];; +let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];; -let resources_of_store chunk addressing nargs = [| 1 |];; +let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];; let resources_of_call _ _ = resource_bounds;; let resources_of_builtin _ = resource_bounds;; -- cgit From f6b0bfa541f69b4a563ac99a864284939067e994 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Tue, 29 Sep 2020 19:35:36 +0200 Subject: attempt at separating the divisions --- aarch64/OpWeights.ml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'aarch64') diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml index d19f34af..01a1a553 100644 --- a/aarch64/OpWeights.ml +++ b/aarch64/OpWeights.ml @@ -298,6 +298,11 @@ let resources_of_op (op : operation) (nargs : int) = [| 1; 1; 1; 0 |] | _ -> [| 1; 1; 0; 0 |] );; +let non_pipelined_resources_of_op (op : operation) (nargs : int) = + match op with + | Odiv | Odivu -> [| 29 |] + | Odivl | Odivlu -> [| 50 |] + | _ -> [| -1 |];; let resources_of_cond (cmp : condition) (nargs : int) = (match cmp with -- cgit From 827bdabf1242720979848cf473263a54fcf212f5 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Tue, 29 Sep 2020 20:29:05 +0200 Subject: floating-point division uses the divisor --- aarch64/OpWeights.ml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'aarch64') diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml index 01a1a553..1b48bc0f 100644 --- a/aarch64/OpWeights.ml +++ b/aarch64/OpWeights.ml @@ -108,14 +108,12 @@ let latency_of_op (op : operation) (nargs : int) = | Oaddf (* r [rd = r1 + r2] *) | Osubf (* r [rd = r1 - r2] *) | Omulf (* r [rd = r1 * r2] *) - | Odivf (* r [rd = r1 / r2] *) (* 32-bit floating-point arithmetic *) | Onegfs (* r [rd = - r1] *) | Oabsfs (* r [rd = abs(r1)] *) | Oaddfs (* r [rd = r1 + r2] *) | Osubfs (* r [rd = r1 - r2] *) | Omulfs (* r [rd = r1 * r2] *) - | Odivfs (* r [rd = r1 / r2] *) | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) (* Conversions between int and float *) @@ -136,6 +134,8 @@ let latency_of_op (op : operation) (nargs : int) = | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) -> 6 + | Odivf -> 50 (* r [rd = r1 / r2] *) + | Odivfs -> 20 (* Boolean tests *) | Ocmp cmp | Osel (cmp, _) -> (match cmp with @@ -255,7 +255,7 @@ let resources_of_op (op : operation) (nargs : int) = | Oaddf (* r [rd = r1 + r2] *) | Osubf (* r [rd = r1 - r2] *) | Omulf (* r [rd = r1 * r2] *) - | Odivf (* r [rd = r1 / r2] *) + | Odivf (* 32-bit floating-point arithmetic *) | Onegfs (* r [rd = - r1] *) | Oabsfs (* r [rd = abs(r1)] *) @@ -301,7 +301,8 @@ let resources_of_op (op : operation) (nargs : int) = let non_pipelined_resources_of_op (op : operation) (nargs : int) = match op with | Odiv | Odivu -> [| 29 |] - | Odivl | Odivlu -> [| 50 |] + | Odivfs -> [| 20 |] + | Odivl | Odivlu | Odivf -> [| 50 |] | _ -> [| -1 |];; let resources_of_cond (cmp : condition) (nargs : int) = -- cgit From c455f69d66b186414c8bb1c5cd28ce8f29e965aa Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 30 Sep 2020 12:00:33 +0200 Subject: AArch64 division no longer "traps" --- aarch64/Asmgenproof1.v | 87 +++++++++++++++++++++++++++++++- aarch64/ConstpropOpproof.v | 121 +++++++++++++++++++++++++++++++-------------- aarch64/Op.v | 64 ++++++++++++------------ aarch64/SelectLongproof.v | 10 ++-- aarch64/SelectOpproof.v | 12 +++-- aarch64/ValueAOp.v | 8 +-- 6 files changed, 221 insertions(+), 81 deletions(-) (limited to 'aarch64') diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v index 0e36bd05..35f1f2d7 100644 --- a/aarch64/Asmgenproof1.v +++ b/aarch64/Asmgenproof1.v @@ -881,7 +881,40 @@ Proof. split. subst v; Simpl. split; intros; Simpl. Qed. - + + +Lemma exec_shrx32_none: forall (rd r1: ireg) (n: int) k (rs: regset) m, + Val.shrx rs#r1 (Vint n) = None -> + r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> + exists rs', + exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. +Proof. + unfold shrx32; intros. + destruct (Int.eq n Int.zero) eqn:E. +- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. + split. + + intros. Simpl. + + Simpl. +- generalize (Int.eq_spec n Int.one). + destruct (Int.eq n Int.one); intro ONE. + * subst n. + econstructor; split. eapply exec_straight_two. + all: cbn; auto. + split. + ** intros. + destruct (Val.add _ _); cbn; Simpl. + ** Simpl. + * econstructor; split. eapply exec_straight_three. + all: cbn; auto. + split. + ** intros. + destruct (Val.shr _ _); cbn; Simpl. + ** Simpl. +Qed. + Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m, Val.shrxl rs#r1 (Vint n) = Some v -> r1 <> X16 -> @@ -918,6 +951,38 @@ Proof. split; intros; Simpl. Qed. +Lemma exec_shrx64_none: forall (rd r1: ireg) (n: int) k (rs: regset) m, + Val.shrxl rs#r1 (Vint n) = None -> + r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> + exists rs', + exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. +Proof. + unfold shrx64; intros. + destruct (Int.eq n Int.zero) eqn:E. +- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. + split. + + intros. Simpl. + + Simpl. +- generalize (Int.eq_spec n Int.one). + destruct (Int.eq n Int.one); intro ONE. + * subst n. + econstructor; split. eapply exec_straight_two. + all: cbn; auto. + split. + ** intros. + destruct (Val.addl _ _); cbn; Simpl. + ** Simpl. + * econstructor; split. eapply exec_straight_three. + all: cbn; auto. + split. + ** intros. + destruct (Val.shrl _ _); cbn; Simpl. + ** Simpl. +Qed. + (** Condition bits *) Lemma compare_int_spec: forall rs v1 v2 m, @@ -1660,10 +1725,19 @@ Local Transparent Val.add. TranslOpBase. destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto. - (* shrx *) - exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + destruct (Val.shrx (rs x0) (Vint n)) eqn:TOTAL. + { + exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D). econstructor; split. eexact A. split. rewrite B; auto. split; auto. + } + exploit (exec_shrx32_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C). + econstructor; split. { eexact A. } + split. { cbn. constructor. } + split; auto. + - (* zero-ext *) TranslOpBase. destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto. @@ -1736,9 +1810,18 @@ Local Transparent Val.add. TranslOpBase. destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto. - (* shrx *) + destruct (Val.shrxl (rs x0) (Vint n)) eqn:TOTAL. + { exploit (exec_shrx64 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ). econstructor; split. eexact A. split. rewrite B; auto. auto. + } + exploit (exec_shrx64_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C). + econstructor; split. { eexact A. } + split. { cbn. constructor. } + split; auto. + - (* zero-ext-l *) TranslOpBase. destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto. diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v index deab7cd4..c777062c 100644 --- a/aarch64/ConstpropOpproof.v +++ b/aarch64/ConstpropOpproof.v @@ -335,40 +335,63 @@ Qed. Lemma make_divimm_correct: forall n r1 r2 v, - Val.divs e#r1 e#r2 = Some v -> + Val.maketotal (Val.divs e#r1 e#r2) = v -> e#r2 = Vint n -> let (op, args) := make_divimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divimm. - predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. - destruct (e#r1) eqn:?; - try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto); - inv H; auto. - destruct (Int.is_power2 n) eqn:?. - destruct (Int.ltu i (Int.repr 31)) eqn:?. - exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence. - exists v; auto. - exists v; auto. + predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0. + { destruct (e # r1) eqn:Er1. + all: try (cbn; exists (e # r1); split; auto; fail). + rewrite Val.divs_one. + cbn. + rewrite Er1. + exists (Vint i); split; auto. + } + destruct (Int.is_power2 n) eqn:Power2. + { + destruct (Int.ltu i (Int.repr 31)) eqn:iLT31. + { + cbn. + exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto. + destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto. + rewrite Val.divs_pow2 with (y:=v) (n:=n). + cbn. + all: auto. + } + exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence. + } + exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence. Qed. + Lemma make_divuimm_correct: forall n r1 r2 v, - Val.divu e#r1 e#r2 = Some v -> + Val.maketotal (Val.divu e#r1 e#r2) = v -> e#r2 = Vint n -> let (op, args) := make_divuimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divuimm. - predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. - destruct (e#r1) eqn:?; - try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto); - inv H; auto. - destruct (Int.is_power2 n) eqn:?. - econstructor; split. simpl; eauto. - rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto). - rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto. - exists v; auto. + predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0. + { destruct (e # r1) eqn:Er1. + all: try (cbn; exists (e # r1); split; auto; fail). + rewrite Val.divu_one. + cbn. + rewrite Er1. + exists (Vint i); split; auto. + } + destruct (Int.is_power2 n) eqn:Power2. + { + cbn. + rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto). + exists (Val.shru e # r1 (Vint i)); split; auto. + destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto. + rewrite Val.divu_pow2 with (y:=v) (n:=n). + all: auto. + } + exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence. Qed. Lemma make_andimm_correct: @@ -503,34 +526,60 @@ Qed. Lemma make_divlimm_correct: forall n r1 r2 v, - Val.divls e#r1 e#r2 = Some v -> + Val.maketotal (Val.divls e#r1 e#r2) = v -> e#r2 = Vlong n -> let (op, args) := make_divlimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divlimm. - destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?. - rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto. - exists v; auto. - exists v; auto. + destruct (Int64.is_power2' n) eqn:Power2. + { + destruct (Int.ltu i (Int.repr 63)) eqn:iLT63. + { + cbn. + exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto. + rewrite H0 in H. + destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto. + { + subst v0. + rewrite Val.divls_pow2 with (y:=v) (n:=n). + cbn. + all: auto. + } + subst. auto. + } + cbn. subst. rewrite H0. + exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto. + } + cbn. subst. rewrite H0. + exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto. Qed. + Lemma make_divluimm_correct: forall n r1 r2 v, - Val.divlu e#r1 e#r2 = Some v -> + Val.maketotal (Val.divlu e#r1 e#r2) = v -> e#r2 = Vlong n -> let (op, args) := make_divluimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divluimm. destruct (Int64.is_power2' n) eqn:?. + { econstructor; split. simpl; eauto. - rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto). - rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. - simpl. - erewrite Int64.is_power2'_range by eauto. - erewrite Int64.divu_pow2' by eauto. auto. - exists v; auto. + rewrite H0 in H. destruct (e#r1); inv H. + all: cbn; auto. + { + rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto). + destruct (Int64.eq n Int64.zero); cbn; auto. + erewrite Int64.is_power2'_range by eauto. + erewrite Int64.divu_pow2' by eauto. auto. + } + } + exists v; split; auto. + cbn. + rewrite H. + reflexivity. Qed. Lemma make_andlimm_correct: @@ -679,10 +728,10 @@ Proof. InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto. - (* divs *) assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divimm_correct; auto. + apply make_divimm_correct; auto. congruence. - (* divu *) assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divuimm_correct; auto. + apply make_divuimm_correct; auto. congruence. - (* and 1 *) rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto. - (* and 2 *) @@ -745,10 +794,10 @@ Proof. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto. - (* divl *) assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divlimm_correct; auto. + apply make_divlimm_correct; auto. congruence. - (* divlu *) assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divluimm_correct; auto. + apply make_divluimm_correct; auto. congruence. - (* andl 1 *) rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto. - (* andl 2 *) diff --git a/aarch64/Op.v b/aarch64/Op.v index afc25aa6..30f806d3 100644 --- a/aarch64/Op.v +++ b/aarch64/Op.v @@ -386,8 +386,8 @@ Definition eval_operation | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2) | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3)) | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3)) - | Odiv, v1 :: v2 :: nil => Val.divs v1 v2 - | Odivu, v1 :: v2 :: nil => Val.divu v1 v2 + | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2)) + | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2)) | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2) | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a)) | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n)) @@ -408,7 +408,7 @@ Definition eval_operation | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2) | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2) | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2) - | Oshrximm n, v1::nil => Val.shrx v1 (Vint n) + | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n))) | Ozext s, v1 :: nil => Some (Val.zero_ext s v1) | Osext s, v1 :: nil => Some (Val.sign_ext s v1) | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a)) @@ -435,8 +435,8 @@ Definition eval_operation | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3)) | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2) | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2) - | Odivl, v1 :: v2 :: nil => Val.divls v1 v2 - | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2 + | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2)) + | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2)) | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2) | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a)) | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n)) @@ -457,7 +457,7 @@ Definition eval_operation | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2) | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2) | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2) - | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n) + | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n))) | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1) | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1) | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a)) @@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0... destruct v1... - apply type_add. - apply type_sub. - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int.eq i0 Int.zero); inv H2... + - destruct v0; destruct v1; cbn in *; trivial. + destruct (_ || _); trivial... + - destruct v0; destruct v1; cbn in *; trivial. + destruct (Int.eq i0 Int.zero); constructor. - destruct v0... destruct v1... - destruct v0... destruct (eval_shift s v1 a)... - destruct v0... @@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0... + - destruct v0; cbn; trivial. + destruct (Int.ltu n (Int.repr 31)); cbn; trivial. - destruct v0... - destruct v0... - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... @@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - apply type_subl. - destruct v0... destruct v1... - destruct v0... destruct v1... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int64.eq i0 Int64.zero); inv H2... + - destruct v0; destruct v1; cbn; trivial. + destruct (_ || _); cbn; trivial. + - destruct v0; destruct v1; cbn; trivial. + destruct (Int64.eq i0 Int64.zero); cbn; trivial. - destruct v0... destruct v1... - destruct v0... destruct (eval_shiftl s v1 a)... - destruct v0... @@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0... + - destruct v0; cbn; trivial. + destruct (Int.ltu n (Int.repr 63)); cbn; trivial. - destruct v0... - destruct v0... - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... @@ -1409,12 +1411,12 @@ Proof. - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto. - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto. (* div, divu *) - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int.eq i0 Int.zero - || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. - TrivialExists. - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int.eq i0 Int.zero); inv H2. TrivialExists. + - inv H4; inv H2; trivial. cbn. + destruct (_ || _); cbn; + constructor. + - inv H4; inv H2; trivial. cbn. + destruct (Int.eq i0 Int.zero); cbn; + constructor. (* and*) - inv H4; inv H2; simpl; auto. - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto. @@ -1446,8 +1448,8 @@ Proof. (* shru *) - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto. (* shrx *) - - inv H4; simpl in H1; try discriminate. simpl. - destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists. + - inv H4; cbn; trivial. + destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial. (* shift-ext *) - inv H4; simpl; auto. - inv H4; simpl; auto. @@ -1482,12 +1484,10 @@ Proof. - inv H4; inv H2; simpl; auto. - inv H4; inv H2; simpl; auto. (* divl, divlu *) - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int64.eq i0 Int64.zero - || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2. - TrivialExists. - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists. + - inv H4; inv H2; cbn; trivial. + destruct (_ || _); cbn; trivial. + - inv H4; inv H2; cbn; trivial. + destruct (Int64.eq i0 Int64.zero); cbn; trivial. (* andl *) - inv H4; inv H2; simpl; auto. - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto. @@ -1519,8 +1519,8 @@ Proof. (* shrlu *) - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto. (* shrlx *) - - inv H4; simpl in H1; try discriminate. simpl. - destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists. + - inv H4; cbn; trivial. + destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial. (* shift-ext *) - inv H4; simpl; auto. - inv H4; simpl; auto. diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v index 60dc1a12..c1847638 100644 --- a/aarch64/SelectLongproof.v +++ b/aarch64/SelectLongproof.v @@ -559,25 +559,29 @@ Qed. Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls. Proof. red; intros; unfold divls_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls. Proof. red; intros; unfold modls_base, modl_aux. exploit Val.modls_divls; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu. Proof. red; intros; unfold divlu_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu. Proof. red; intros; unfold modlu_base, modl_aux. exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_shrxlimm: @@ -592,7 +596,7 @@ Proof. destruct x; simpl in H0; try discriminate. change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0. rewrite Int64.shrx'_zero. auto. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** General shifts *) diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v index 3379cbd8..c7898193 100644 --- a/aarch64/SelectOpproof.v +++ b/aarch64/SelectOpproof.v @@ -666,7 +666,8 @@ Theorem eval_divs_base: Val.divs x y = Some z -> exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v. Proof. - intros; unfold divs_base; TrivialExists. + intros; unfold divs_base; TrivialExists; cbn. + rewrite H1. reflexivity. Qed. Theorem eval_mods_base: @@ -678,7 +679,8 @@ Theorem eval_mods_base: Proof. intros; unfold mods_base, mod_aux. exploit Val.mods_divs; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + cbn. rewrite A. reflexivity. Qed. Theorem eval_divu_base: @@ -689,6 +691,7 @@ Theorem eval_divu_base: exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v. Proof. intros; unfold divu_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modu_base: @@ -700,7 +703,8 @@ Theorem eval_modu_base: Proof. intros; unfold modu_base, mod_aux. exploit Val.modu_divu; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_shrximm: @@ -715,7 +719,7 @@ Proof. destruct x; simpl in H0; try discriminate. change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0. rewrite Int.shrx_zero by (compute; auto). auto. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** General shifts *) diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v index e0d98c85..d379bbe8 100644 --- a/aarch64/ValueAOp.v +++ b/aarch64/ValueAOp.v @@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Omul, v1::v2::nil => mul v1 v2 | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3) | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3) - | Odiv, v1::v2::nil => divs v1 v2 - | Odivu, v1::v2::nil => divu v1 v2 + | Odiv, v1::v2::nil => divs_total v1 v2 + | Odivu, v1::v2::nil => divu_total v1 v2 | Oand, v1::v2::nil => and v1 v2 | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a) | Oandimm n, v1::nil => and v1 (I n) @@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3) | Omullhs, v1::v2::nil => mullhs v1 v2 | Omullhu, v1::v2::nil => mullhu v1 v2 - | Odivl, v1::v2::nil => divls v1 v2 - | Odivlu, v1::v2::nil => divlu v1 v2 + | Odivl, v1::v2::nil => divls_total v1 v2 + | Odivlu, v1::v2::nil => divlu_total v1 v2 | Oandl, v1::v2::nil => andl v1 v2 | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a) | Oandlimm n, v1::nil => andl v1 (L n) -- cgit From 154529f64b96ca0e4cef8c4aeff6a1cfb8210e91 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 30 Sep 2020 14:07:40 +0200 Subject: non trapping --- aarch64/Op.v | 2 -- 1 file changed, 2 deletions(-) (limited to 'aarch64') diff --git a/aarch64/Op.v b/aarch64/Op.v index 30f806d3..ef28dd6d 100644 --- a/aarch64/Op.v +++ b/aarch64/Op.v @@ -926,8 +926,6 @@ Qed. Definition is_trapping_op (op : operation) := match op with - | Odiv | Odivu | Odivl | Odivlu - | Oshrximm _ | Oshrlximm _ | Ointoffloat | Ointuoffloat | Ointofsingle | Ointuofsingle | Ofloatofint | Ofloatofintu -- cgit From 244159573aa20a0a74897edb5897ded1bd16cd66 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 30 Sep 2020 14:42:28 +0200 Subject: non trapping op --- aarch64/Op.v | 113 ++++++++++++++++++++-------------------------- aarch64/SelectLongproof.v | 16 +++---- aarch64/SelectOpproof.v | 16 +++---- aarch64/ValueAOp.v | 16 +++---- 4 files changed, 73 insertions(+), 88 deletions(-) (limited to 'aarch64') diff --git a/aarch64/Op.v b/aarch64/Op.v index ef28dd6d..0a29ff3e 100644 --- a/aarch64/Op.v +++ b/aarch64/Op.v @@ -481,22 +481,22 @@ Definition eval_operation | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1) | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1) - | Ointoffloat, v1::nil => Val.intoffloat v1 - | Ointuoffloat, v1::nil => Val.intuoffloat v1 - | Ofloatofint, v1::nil => Val.floatofint v1 - | Ofloatofintu, v1::nil => Val.floatofintu v1 - | Ointofsingle, v1::nil => Val.intofsingle v1 - | Ointuofsingle, v1::nil => Val.intuofsingle v1 - | Osingleofint, v1::nil => Val.singleofint v1 - | Osingleofintu, v1::nil => Val.singleofintu v1 - | Olongoffloat, v1::nil => Val.longoffloat v1 - | Olonguoffloat, v1::nil => Val.longuoffloat v1 - | Ofloatoflong, v1::nil => Val.floatoflong v1 - | Ofloatoflongu, v1::nil => Val.floatoflongu v1 - | Olongofsingle, v1::nil => Val.longofsingle v1 - | Olonguofsingle, v1::nil => Val.longuofsingle v1 - | Osingleoflong, v1::nil => Val.singleoflong v1 - | Osingleoflongu, v1::nil => Val.singleoflongu v1 + | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1)) + | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1)) + | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1)) + | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1)) + | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1)) + | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1)) + | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1)) + | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1)) + | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1)) + | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1)) + | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1)) + | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1)) + | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1)) + | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1)) + | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1)) + | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1)) | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m)) | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty) @@ -895,29 +895,29 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0... - destruct v0... (* intoffloat, intuoffloat *) - - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial. (* floatofint, floatofintu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* intofsingle, intuofsingle *) - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial. (* singleofint, singleofintu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* longoffloat, longuoffloat *) - - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial. (* floatoflong, floatoflongu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* longofsingle, longuofsingle *) - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial. (* singleoflong, singleoflongu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* cmp *) - destruct (eval_condition cond vl m) as [[]|]... - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I. @@ -926,14 +926,7 @@ Qed. Definition is_trapping_op (op : operation) := match op with - | Ointoffloat | Ointuoffloat - | Ointofsingle | Ointuofsingle - | Ofloatofint | Ofloatofintu - | Osingleofint | Osingleofintu - | Olongoffloat | Olonguoffloat - | Olongofsingle | Olonguofsingle - | Ofloatoflong | Ofloatoflongu - | Osingleoflong | Osingleoflongu => true + | Omove => false | _ => false end. @@ -1549,37 +1542,29 @@ Proof. - inv H4; simpl; auto. - inv H4; simpl; auto. (* intoffloat, intuoffloat *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2. - exists (Vint i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2. - exists (Vint i); auto. + - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial. (* floatofint, floatofintu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* intofsingle, intuofsingle *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2. - exists (Vint i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2. - exists (Vint i); auto. + - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial. (* singleofint, singleofintu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* longoffloat, longuoffloat *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2. - exists (Vlong i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2. - exists (Vlong i); auto. + - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial. (* floatoflong, floatoflongu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* longofsingle, longuofsingle *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2. - exists (Vlong i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2. - exists (Vlong i); auto. + - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial. (* singleoflong, singleoflongu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* cmp, sel *) - subst v1. destruct (eval_condition cond vl1 m1) eqn:?. exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v index c1847638..513ee9bd 100644 --- a/aarch64/SelectLongproof.v +++ b/aarch64/SelectLongproof.v @@ -730,42 +730,42 @@ Qed. Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. End CMCONSTR. diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v index c7898193..9ce7a8bf 100644 --- a/aarch64/SelectOpproof.v +++ b/aarch64/SelectOpproof.v @@ -932,7 +932,7 @@ Theorem eval_intoffloat: Val.intoffloat x = Some y -> exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatofint: @@ -943,7 +943,7 @@ Theorem eval_floatofint: Proof. intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intuoffloat: @@ -952,7 +952,7 @@ Theorem eval_intuoffloat: Val.intuoffloat x = Some y -> exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatofintu: @@ -963,7 +963,7 @@ Theorem eval_floatofintu: Proof. intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intofsingle: @@ -972,7 +972,7 @@ Theorem eval_intofsingle: Val.intofsingle x = Some y -> exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleofint: @@ -983,7 +983,7 @@ Theorem eval_singleofint: Proof. intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intuofsingle: @@ -992,7 +992,7 @@ Theorem eval_intuofsingle: Val.intuofsingle x = Some y -> exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleofintu: @@ -1003,7 +1003,7 @@ Theorem eval_singleofintu: Proof. intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** Selection *) diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v index d379bbe8..e6a60d4e 100644 --- a/aarch64/ValueAOp.v +++ b/aarch64/ValueAOp.v @@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Osingleoffloat, v1::nil => singleoffloat v1 | Ofloatofsingle, v1::nil => floatofsingle v1 - | Ointoffloat, v1::nil => intoffloat v1 - | Ointuoffloat, v1::nil => intuoffloat v1 + | Ointoffloat, v1::nil => intoffloat_total v1 + | Ointuoffloat, v1::nil => intuoffloat_total v1 | Ofloatofint, v1::nil => floatofint v1 | Ofloatofintu, v1::nil => floatofintu v1 - | Ointofsingle, v1::nil => intofsingle v1 - | Ointuofsingle, v1::nil => intuofsingle v1 + | Ointofsingle, v1::nil => intofsingle_total v1 + | Ointuofsingle, v1::nil => intuofsingle_total v1 | Osingleofint, v1::nil => singleofint v1 | Osingleofintu, v1::nil => singleofintu v1 - | Olongoffloat, v1::nil => longoffloat v1 - | Olonguoffloat, v1::nil => longuoffloat v1 + | Olongoffloat, v1::nil => longoffloat_total v1 + | Olonguoffloat, v1::nil => longuoffloat_total v1 | Ofloatoflong, v1::nil => floatoflong v1 | Ofloatoflongu, v1::nil => floatoflongu v1 - | Olongofsingle, v1::nil => longofsingle v1 - | Olonguofsingle, v1::nil => longuofsingle v1 + | Olongofsingle, v1::nil => longofsingle_total v1 + | Olonguofsingle, v1::nil => longuofsingle_total v1 | Osingleoflong, v1::nil => singleoflong v1 | Osingleoflongu, v1::nil => singleoflongu v1 -- cgit From 825b77fe8b4eb0919564e51cfaae69a6dfae24e3 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 2 Oct 2020 21:54:34 +0200 Subject: so that all architectures compile --- aarch64/PrepassSchedulingOracle.ml | 473 +++++++++++++++++++++++++++++++++++++ 1 file changed, 473 insertions(+) create mode 100644 aarch64/PrepassSchedulingOracle.ml (limited to 'aarch64') diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..25083bcd --- /dev/null +++ b/aarch64/PrepassSchedulingOracle.ml @@ -0,0 +1,473 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open OpWeights +open Registers + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction = function + | Inop _ -> Array.map (fun _ -> 0) resource_bounds + | Iop(op, inputs, output, _) -> resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem seqa = + let simple_deps = get_simple_dependencies seqa in + { max_latency = -1; + resource_bounds = OpWeights.resource_bounds; + instruction_usages = Array.map resources_of_instruction (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) = + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + Printf.printf "prepass scheduling length = %d\n" (Array.length seqa); + let problem = define_problem seqa in + print_sequence stdout (Array.map fst seqa); + print_problem stdout problem; + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + -- cgit From 1deadbf2715e435f2599c415c629ad0f042d40a6 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 19 Oct 2020 18:20:28 +0200 Subject: op_valid_pointer_eq for aarch64 --- aarch64/Op.v | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'aarch64') diff --git a/aarch64/Op.v b/aarch64/Op.v index 0a29ff3e..f2a8e6fb 100644 --- a/aarch64/Op.v +++ b/aarch64/Op.v @@ -1202,6 +1202,20 @@ Proof. rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto. Qed. +Lemma op_valid_pointer_eq: + forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2, + (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) -> + eval_operation ge sp op args m1 = eval_operation ge sp op args m2. +Proof. + intros until m2. destruct op eqn:OP; simpl; try congruence. + - intros MEM; destruct cond; simpl; try congruence; + repeat (destruct args; simpl; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. + - intro MEM; destruct cond; simpl; try congruence; + repeat (destruct args; simpl; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. +Qed. + (** Global variables mentioned in an operation or addressing mode *) Definition globals_addressing (addr: addressing) : list ident := -- cgit From bc80528de5dfbc864c611e23691ddd96f15dfdc7 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 22 Oct 2020 12:28:16 +0200 Subject: prefix all calls to OpWeights as preparation to using a structure --- aarch64/PrepassSchedulingOracle.ml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'aarch64') diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 25083bcd..63fdec17 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -2,7 +2,6 @@ open AST open RTL open Maps open InstructionScheduler -open OpWeights open Registers let use_alias_analysis () = false @@ -26,7 +25,7 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) = and last_mem_write : int option ref = ref None and last_branch : int option ref = ref None and last_non_pipelined_op : int array = Array.make - nr_non_pipelined_units ( -1 ) + OpWeights.nr_non_pipelined_units ( -1 ) and latency_constraints : latency_constraint list ref = ref [] in let add_constraint instr_from instr_to latency = assert (instr_from <= instr_to); @@ -142,15 +141,15 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) = | Inop _ -> () | Iop(op, inputs, output, _) -> add_non_pipelined_resources i - (non_pipelined_resources_of_op op (List.length inputs)); + (OpWeights.non_pipelined_resources_of_op op (List.length inputs)); (if Op.is_trapping_op op then irreversible_action i); add_input_regs i inputs; - add_output_reg i (latency_of_op op (List.length inputs)) output + add_output_reg i (OpWeights.latency_of_op op (List.length inputs)) output | Iload(trap, chunk, addressing, addr_regs, output, _) -> (if trap=TRAP then irreversible_action i); add_input_mem i; add_input_regs i addr_regs; - add_output_reg i (latency_of_load trap chunk addressing (List.length addr_regs)) output + add_output_reg i (OpWeights.latency_of_load trap chunk addressing (List.length addr_regs)) output | Istore(chunk, addressing, addr_regs, input, _) -> irreversible_action i; add_input_regs i addr_regs; @@ -164,7 +163,7 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) = ); add_input_mem i; add_input_regs i inputs; - add_output_reg i (latency_of_call signature ef) output; + add_output_reg i (OpWeights.latency_of_call signature ef) output; add_output_mem i; failwith "Icall" | Itailcall(signature, ef, inputs) -> @@ -202,19 +201,20 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) = !latency_constraints;; let resources_of_instruction = function - | Inop _ -> Array.map (fun _ -> 0) resource_bounds - | Iop(op, inputs, output, _) -> resources_of_op op (List.length inputs) + | Inop _ -> Array.map (fun _ -> 0) OpWeights.resource_bounds + | Iop(op, inputs, output, _) -> + OpWeights.resources_of_op op (List.length inputs) | Iload(trap, chunk, addressing, addr_regs, output, _) -> - resources_of_load trap chunk addressing (List.length addr_regs) + OpWeights.resources_of_load trap chunk addressing (List.length addr_regs) | Istore(chunk, addressing, addr_regs, input, _) -> - resources_of_store chunk addressing (List.length addr_regs) + OpWeights.resources_of_store chunk addressing (List.length addr_regs) | Icall(signature, ef, inputs, output, _) -> - resources_of_call signature ef + OpWeights.resources_of_call signature ef | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> - resources_of_builtin ef + OpWeights.resources_of_builtin ef | Icond(cond, args, _, _ , _) -> - resources_of_cond cond (List.length args) - | Itailcall _ | Ijumptable _ | Ireturn _ -> resource_bounds + OpWeights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> OpWeights.resource_bounds let print_sequence pp (seqa : instruction array) = Array.iteri ( -- cgit From 1740571e67f4c9384aea706132b534411d24b36c Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 22 Oct 2020 13:26:35 +0200 Subject: allow changing target cpu --- aarch64/PrepassSchedulingOracle.ml | 44 ++++++++++++++++++---------------- aarch64/PrepassSchedulingOracleDeps.ml | 17 +++++++++++++ 2 files changed, 40 insertions(+), 21 deletions(-) create mode 100644 aarch64/PrepassSchedulingOracleDeps.ml (limited to 'aarch64') diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 63fdec17..358ee97b 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -3,7 +3,8 @@ open RTL open Maps open InstructionScheduler open Registers - +open PrepassSchedulingOracleDeps + let use_alias_analysis () = false let length_of_chunk = function @@ -18,14 +19,14 @@ let length_of_chunk = function | Mfloat64 | Many64 -> 8;; -let get_simple_dependencies (seqa : (instruction*Regset.t) array) = +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = let last_reg_reads : int list PTree.t ref = ref PTree.empty and last_reg_write : (int*int) PTree.t ref = ref PTree.empty and last_mem_reads : int list ref = ref [] and last_mem_write : int option ref = ref None and last_branch : int option ref = ref None and last_non_pipelined_op : int array = Array.make - OpWeights.nr_non_pipelined_units ( -1 ) + opweights.nr_non_pipelined_units ( -1 ) and latency_constraints : latency_constraint list ref = ref [] in let add_constraint instr_from instr_to latency = assert (instr_from <= instr_to); @@ -141,15 +142,15 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) = | Inop _ -> () | Iop(op, inputs, output, _) -> add_non_pipelined_resources i - (OpWeights.non_pipelined_resources_of_op op (List.length inputs)); + (opweights.non_pipelined_resources_of_op op (List.length inputs)); (if Op.is_trapping_op op then irreversible_action i); add_input_regs i inputs; - add_output_reg i (OpWeights.latency_of_op op (List.length inputs)) output + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output | Iload(trap, chunk, addressing, addr_regs, output, _) -> (if trap=TRAP then irreversible_action i); add_input_mem i; add_input_regs i addr_regs; - add_output_reg i (OpWeights.latency_of_load trap chunk addressing (List.length addr_regs)) output + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output | Istore(chunk, addressing, addr_regs, input, _) -> irreversible_action i; add_input_regs i addr_regs; @@ -163,7 +164,7 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) = ); add_input_mem i; add_input_regs i inputs; - add_output_reg i (OpWeights.latency_of_call signature ef) output; + add_output_reg i (opweights.latency_of_call signature ef) output; add_output_mem i; failwith "Icall" | Itailcall(signature, ef, inputs) -> @@ -200,21 +201,21 @@ let get_simple_dependencies (seqa : (instruction*Regset.t) array) = end seqa; !latency_constraints;; -let resources_of_instruction = function - | Inop _ -> Array.map (fun _ -> 0) OpWeights.resource_bounds +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds | Iop(op, inputs, output, _) -> - OpWeights.resources_of_op op (List.length inputs) + opweights.resources_of_op op (List.length inputs) | Iload(trap, chunk, addressing, addr_regs, output, _) -> - OpWeights.resources_of_load trap chunk addressing (List.length addr_regs) + opweights.resources_of_load trap chunk addressing (List.length addr_regs) | Istore(chunk, addressing, addr_regs, input, _) -> - OpWeights.resources_of_store chunk addressing (List.length addr_regs) + opweights.resources_of_store chunk addressing (List.length addr_regs) | Icall(signature, ef, inputs, output, _) -> - OpWeights.resources_of_call signature ef + opweights.resources_of_call signature ef | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> - OpWeights.resources_of_builtin ef + opweights.resources_of_builtin ef | Icond(cond, args, _, _ , _) -> - OpWeights.resources_of_cond cond (List.length args) - | Itailcall _ | Ijumptable _ | Ireturn _ -> OpWeights.resource_bounds + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds let print_sequence pp (seqa : instruction array) = Array.iteri ( @@ -405,11 +406,11 @@ let get_alias_dependencies seqa = !deps;; *) -let define_problem seqa = - let simple_deps = get_simple_dependencies seqa in +let define_problem (opweights : opweights) seqa = + let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; - resource_bounds = OpWeights.resource_bounds; - instruction_usages = Array.map resources_of_instruction (Array.map fst seqa); + resource_bounds = opweights.pipelined_resource_bounds; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) then (get_alias_dependencies seqa) @ simple_deps @@ -439,6 +440,7 @@ let prepass_scheduler_by_name name problem early_ones = | _ -> scheduler_by_name name problem let schedule_sequence (seqa : (instruction*Regset.t) array) = + let opweights = OpWeights.get_opweights () in try if (Array.length seqa) <= 1 then None @@ -446,7 +448,7 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) = begin let nr_instructions = Array.length seqa in Printf.printf "prepass scheduling length = %d\n" (Array.length seqa); - let problem = define_problem seqa in + let problem = define_problem opweights seqa in print_sequence stdout (Array.map fst seqa); print_problem stdout problem; match prepass_scheduler_by_name diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/aarch64/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; -- cgit From 3486eb50d289456b79de168e1ab410306305cf2c Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 22 Oct 2020 13:47:17 +0200 Subject: new OpWeights for aarch64 --- aarch64/OpWeights.ml | 660 ++++++++++++++++++++++++++------------------------- 1 file changed, 342 insertions(+), 318 deletions(-) (limited to 'aarch64') diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml index 1b48bc0f..5cdd002c 100644 --- a/aarch64/OpWeights.ml +++ b/aarch64/OpWeights.ml @@ -1,329 +1,353 @@ open Op;; -let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *) -let nr_non_pipelined_units = 1;; +open PrepassSchedulingOracleDeps;; -let latency_of_op (op : operation) (nargs : int) = - match op with - | Omove - | Ointconst _ - | Olongconst _ - | Ofloatconst _ - | Osingleconst _ - | Oaddrsymbol _ - | Oaddrstack _ -> 1 - | Oshift _ -> 2 - | Oadd -> 1 - | Oaddshift _ -> 2 - | Oaddimm _ - | Oneg -> 1 - | Onegshift _ -> 2 - | Osub -> 1 - | Osubshift _ -> 2 - | Omul - | Omuladd - | Omulsub -> 4 - | Odiv - | Odivu -> 29 - | Oand -> 1 - | Oandshift _ -> 2 - | Oandimm _ -> 1 - | Oor -> 1 - | Oorshift _ -> 2 - | Oorimm _ -> 1 - | Oxor -> 1 - | Oxorshift _ -> 2 - | Oxorimm _ -> 1 - | Onot -> 1 - | Onotshift _ -> 2 - | Obic -> 1 - | Obicshift _ -> 2 - | Oorn -> 1 - | Oornshift _ -> 2 - | Oeqv -> 1 - | Oeqvshift _ -> 2 - | Oshl - | Oshr - | Oshru -> 2 - | Oshrximm _ -> 6 - | Ozext _ - | Osext _ -> 1 - | Oshlzext _ - | Oshlsext _ - | Ozextshr _ - | Osextshr _ -> 2 - -(* 64-bit integer arithmetic *) - | Oshiftl _ -> 2 - | Oextend _ -> 1 - | Omakelong - | Olowlong - | Ohighlong - | Oaddl -> 1 - | Oaddlshift _ - | Oaddlext _ -> 2 - | Oaddlimm _ - | Onegl -> 1 - | Oneglshift _ -> 2 - | Osubl -> 1 - | Osublshift _ - | Osublext _ -> 2 - | Omull - | Omulladd - | Omullsub - | Omullhs - | Omullhu -> 4 - | Odivl -> 50 - | Odivlu -> 50 - | Oandl -> 1 - | Oandlshift _ -> 2 - | Oandlimm _ - | Oorl -> 1 - | Oorlshift _ -> 2 - | Oorlimm _ - | Oxorl -> 1 - | Oxorlshift _ -> 2 - | Oxorlimm _ - | Onotl -> 1 - | Onotlshift _ -> 2 - | Obicl -> 1 - | Obiclshift _ -> 2 - | Oornl -> 1 - | Oornlshift _ -> 2 - | Oeqvl -> 1 - | Oeqvlshift _ -> 2 - | Oshll - | Oshrl - | Oshrlu -> 2 - | Oshrlximm _ -> 6 - | Ozextl _ - | Osextl _ -> 1 - | Oshllzext _ - | Oshllsext _ - | Ozextshrl _ - | Osextshrl _ -> 2 - -(* 64-bit floating-point arithmetic *) - | Onegf (* r [rd = - r1] *) - | Oabsf (* r [rd = abs(r1)] *) - | Oaddf (* r [rd = r1 + r2] *) - | Osubf (* r [rd = r1 - r2] *) - | Omulf (* r [rd = r1 * r2] *) +module Cortex_A53= + struct + let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *) + let nr_non_pipelined_units = 1;; + + let latency_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ -> 1 + | Oshift _ -> 2 + | Oadd -> 1 + | Oaddshift _ -> 2 + | Oaddimm _ + | Oneg -> 1 + | Onegshift _ -> 2 + | Osub -> 1 + | Osubshift _ -> 2 + | Omul + | Omuladd + | Omulsub -> 4 + | Odiv + | Odivu -> 29 + | Oand -> 1 + | Oandshift _ -> 2 + | Oandimm _ -> 1 + | Oor -> 1 + | Oorshift _ -> 2 + | Oorimm _ -> 1 + | Oxor -> 1 + | Oxorshift _ -> 2 + | Oxorimm _ -> 1 + | Onot -> 1 + | Onotshift _ -> 2 + | Obic -> 1 + | Obicshift _ -> 2 + | Oorn -> 1 + | Oornshift _ -> 2 + | Oeqv -> 1 + | Oeqvshift _ -> 2 + | Oshl + | Oshr + | Oshru -> 2 + | Oshrximm _ -> 6 + | Ozext _ + | Osext _ -> 1 + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ -> 2 + + (* 64-bit integer arithmetic *) + | Oshiftl _ -> 2 + | Oextend _ -> 1 + | Omakelong + | Olowlong + | Ohighlong + | Oaddl -> 1 + | Oaddlshift _ + | Oaddlext _ -> 2 + | Oaddlimm _ + | Onegl -> 1 + | Oneglshift _ -> 2 + | Osubl -> 1 + | Osublshift _ + | Osublext _ -> 2 + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> 4 + | Odivl -> 50 + | Odivlu -> 50 + | Oandl -> 1 + | Oandlshift _ -> 2 + | Oandlimm _ + | Oorl -> 1 + | Oorlshift _ -> 2 + | Oorlimm _ + | Oxorl -> 1 + | Oxorlshift _ -> 2 + | Oxorlimm _ + | Onotl -> 1 + | Onotlshift _ -> 2 + | Obicl -> 1 + | Obiclshift _ -> 2 + | Oornl -> 1 + | Oornlshift _ -> 2 + | Oeqvl -> 1 + | Oeqvlshift _ -> 2 + | Oshll + | Oshrl + | Oshrlu -> 2 + | Oshrlximm _ -> 6 + | Ozextl _ + | Osextl _ -> 1 + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> 2 + + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) (* 32-bit floating-point arithmetic *) - | Onegfs (* r [rd = - r1] *) - | Oabsfs (* r [rd = abs(r1)] *) - | Oaddfs (* r [rd = r1 + r2] *) - | Osubfs (* r [rd = r1 - r2] *) - | Omulfs (* r [rd = r1 * r2] *) - | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) - | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) (* Conversions between int and float *) - | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) - | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) - | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) - | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) - | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) - | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) - | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) - | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) - | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) - | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) - | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) - | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) - | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) - | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) - | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) - | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) - -> 6 - | Odivf -> 50 (* r [rd = r1 / r2] *) - | Odivfs -> 20 + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> 6 + | Odivf -> 50 (* r [rd = r1 / r2] *) + | Odivfs -> 20 (* Boolean tests *) - | Ocmp cmp | Osel (cmp, _) -> - (match cmp with - | Ccompf _ (* r FP comparison *) - | Cnotcompf _ (* r negation of an FP comparison *) - | Ccompfzero _ (* r comparison with 0.0 *) - | Cnotcompfzero _ (* r negation of comparison with 0.0 *) - | Ccompfs _ (* r FP comparison *) - | Cnotcompfs _ (* r negation of an FP comparison *) - | Ccompfszero _ (* r equal to 0.0 *) - | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6 - | _ -> 1);; - -let resources_of_op (op : operation) (nargs : int) = - match op with - | Omove - | Ointconst _ - | Olongconst _ - | Ofloatconst _ - | Osingleconst _ - | Oaddrsymbol _ - | Oaddrstack _ -(* 32-bit integer arithmetic *) - | Oshift _ - | Oadd - | Oaddshift _ - | Oaddimm _ - | Oneg - | Onegshift _ - | Osub - | Osubshift _ -> [| 1 ; 1; 0; 0 |] - | Omul - | Omuladd - | Omulsub -> [| 1; 1; 1; 0 |] - | Odiv - | Odivu -> [| 1; 0; 0; 0 |] - | Oand - | Oandshift _ - | Oandimm _ - | Oor - | Oorshift _ - | Oorimm _ - | Oxor - | Oxorshift _ - | Oxorimm _ - | Onot - | Onotshift _ - | Obic - | Obicshift _ - | Oorn - | Oornshift _ - | Oeqv - | Oeqvshift _ - | Oshl - | Oshr - | Oshru - | Oshrximm _ - | Ozext _ - | Osext _ - | Oshlzext _ - | Oshlsext _ - | Ozextshr _ - | Osextshr _ + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6 + | _ -> 1);; + + let resources_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ + (* 32-bit integer arithmetic *) + | Oshift _ + | Oadd + | Oaddshift _ + | Oaddimm _ + | Oneg + | Onegshift _ + | Osub + | Osubshift _ -> [| 1 ; 1; 0; 0 |] + | Omul + | Omuladd + | Omulsub -> [| 1; 1; 1; 0 |] + | Odiv + | Odivu -> [| 1; 0; 0; 0 |] + | Oand + | Oandshift _ + | Oandimm _ + | Oor + | Oorshift _ + | Oorimm _ + | Oxor + | Oxorshift _ + | Oxorimm _ + | Onot + | Onotshift _ + | Obic + | Obicshift _ + | Oorn + | Oornshift _ + | Oeqv + | Oeqvshift _ + | Oshl + | Oshr + | Oshru + | Oshrximm _ + | Ozext _ + | Osext _ + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ (* 64-bit integer arithmetic *) - | Oshiftl _ - | Oextend _ - | Omakelong - | Olowlong - | Ohighlong - | Oaddl - | Oaddlshift _ - | Oaddlext _ - | Oaddlimm _ - | Onegl - | Oneglshift _ - | Osubl - | Osublshift _ - | Osublext _ -> [| 1 ; 1 ; 0; 0 |] - | Omull - | Omulladd - | Omullsub - | Omullhs - | Omullhu -> [| 1 ; 1 ; 1; 0 |] - | Odivl - | Odivlu -> [| 1; 0; 0; 0 |] - | Oandl - | Oandlshift _ - | Oandlimm _ - | Oorl - | Oorlshift _ - | Oorlimm _ - | Oxorl - | Oxorlshift _ - | Oxorlimm _ - | Onotl - | Onotlshift _ - | Obicl - | Obiclshift _ - | Oornl - | Oornlshift _ - | Oeqvl - | Oeqvlshift _ - | Oshll - | Oshrl - | Oshrlu - | Oshrlximm _ - | Ozextl _ - | Osextl _ - | Oshllzext _ - | Oshllsext _ - | Ozextshrl _ - | Osextshrl _ -> [| 1; 1; 0; 0 |] -(* 64-bit floating-point arithmetic *) - | Onegf (* r [rd = - r1] *) - | Oabsf (* r [rd = abs(r1)] *) - | Oaddf (* r [rd = r1 + r2] *) - | Osubf (* r [rd = r1 - r2] *) - | Omulf (* r [rd = r1 * r2] *) - | Odivf -(* 32-bit floating-point arithmetic *) - | Onegfs (* r [rd = - r1] *) - | Oabsfs (* r [rd = abs(r1)] *) - | Oaddfs (* r [rd = r1 + r2] *) - | Osubfs (* r [rd = r1 - r2] *) - | Omulfs (* r [rd = r1 * r2] *) - | Odivfs (* r [rd = r1 / r2] *) - | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) - | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) + | Oshiftl _ + | Oextend _ + | Omakelong + | Olowlong + | Ohighlong + | Oaddl + | Oaddlshift _ + | Oaddlext _ + | Oaddlimm _ + | Onegl + | Oneglshift _ + | Osubl + | Osublshift _ + | Osublext _ -> [| 1 ; 1 ; 0; 0 |] + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> [| 1 ; 1 ; 1; 0 |] + | Odivl + | Odivlu -> [| 1; 0; 0; 0 |] + | Oandl + | Oandlshift _ + | Oandlimm _ + | Oorl + | Oorlshift _ + | Oorlimm _ + | Oxorl + | Oxorlshift _ + | Oxorlimm _ + | Onotl + | Onotlshift _ + | Obicl + | Obiclshift _ + | Oornl + | Oornlshift _ + | Oeqvl + | Oeqvlshift _ + | Oshll + | Oshrl + | Oshrlu + | Oshrlximm _ + | Ozextl _ + | Osextl _ + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> [| 1; 1; 0; 0 |] + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) + | Odivf + (* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Odivfs (* r [rd = r1 / r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) (* Conversions between int and float *) - | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) - | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) - | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) - | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) - | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) - | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) - | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) - | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) - | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) - | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) - | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) - | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) - | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) - | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) - | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) - | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) - -> [| 1 ; 1; 1; 0 |] - + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> [| 1 ; 1; 1; 0 |] + (* Boolean tests *) - | Ocmp cmp | Osel (cmp, _) -> - (match cmp with - | Ccompf _ (* r FP comparison *) - | Cnotcompf _ (* r negation of an FP comparison *) - | Ccompfzero _ (* r comparison with 0.0 *) - | Cnotcompfzero _ (* r negation of comparison with 0.0 *) - | Ccompfs _ (* r FP comparison *) - | Cnotcompfs _ (* r negation of an FP comparison *) - | Ccompfszero _ (* r equal to 0.0 *) - | Cnotcompfszero _ (* r not equal to 0.0 *) -> - [| 1; 1; 1; 0 |] - | _ -> [| 1; 1; 0; 0 |] );; - -let non_pipelined_resources_of_op (op : operation) (nargs : int) = - match op with - | Odiv | Odivu -> [| 29 |] - | Odivfs -> [| 20 |] - | Odivl | Odivlu | Odivf -> [| 50 |] - | _ -> [| -1 |];; - -let resources_of_cond (cmp : condition) (nargs : int) = + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let non_pipelined_resources_of_op (op : operation) (nargs : int) = + match op with + | Odiv | Odivu -> [| 29 |] + | Odivfs -> [| 20 |] + | Odivl | Odivlu | Odivf -> [| 50 |] + | _ -> [| -1 |];; + + let resources_of_cond (cmp : condition) (nargs : int) = (match cmp with - | Ccompf _ (* r FP comparison *) - | Cnotcompf _ (* r negation of an FP comparison *) - | Ccompfzero _ (* r comparison with 0.0 *) - | Cnotcompfzero _ (* r negation of comparison with 0.0 *) - | Ccompfs _ (* r FP comparison *) - | Cnotcompfs _ (* r negation of an FP comparison *) - | Ccompfszero _ (* r equal to 0.0 *) - | Cnotcompfszero _ (* r not equal to 0.0 *) -> - [| 1; 1; 1; 0 |] - | _ -> [| 1; 1; 0; 0 |] );; - -let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; -let latency_of_call _ _ = 6;; - -let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];; - -let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];; + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; + let latency_of_call _ _ = 6;; + + let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_call _ _ = resource_bounds;; + let resources_of_builtin _ = resource_bounds;; + end;; -let resources_of_call _ _ = resource_bounds;; -let resources_of_builtin _ = resource_bounds;; +let get_opweights () : opweights = + match !Clflags.option_mtune with + | "cortex-a53" | "cortex-a35" | "" -> + { + pipelined_resource_bounds = Cortex_A53.resource_bounds; + nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units; + latency_of_op = Cortex_A53.latency_of_op; + resources_of_op = Cortex_A53.resources_of_op; + non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op; + latency_of_load = Cortex_A53.latency_of_load; + resources_of_load = Cortex_A53.resources_of_load; + resources_of_store = Cortex_A53.resources_of_store; + resources_of_cond = Cortex_A53.resources_of_cond; + latency_of_call = Cortex_A53.latency_of_call; + resources_of_call = Cortex_A53.resources_of_call; + resources_of_builtin = Cortex_A53.resources_of_builtin + } + | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);; -- cgit From 0fe569d24b99a34fb3b9ad6c0cb86876cc86a31d Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 4 Nov 2020 09:10:00 +0100 Subject: disable debug printing in scheduler --- aarch64/PrepassSchedulingOracle.ml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'aarch64') diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 358ee97b..2c3eb14f 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -447,10 +447,12 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) = else begin let nr_instructions = Array.length seqa in - Printf.printf "prepass scheduling length = %d\n" (Array.length seqa); + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); let problem = define_problem opweights seqa in - print_sequence stdout (Array.map fst seqa); - print_problem stdout problem; + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); match prepass_scheduler_by_name (!Clflags.option_fprepass_sched) problem -- cgit