From f21a6b181dded86ef0e5c7ab94f74e5b960fd510 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Sun, 2 Oct 2016 16:17:51 +0200 Subject: Improve code generation for 64-bit signed integer division Implement the 'shift right extended' trick, both in the generic implementation (backend/SplitLong) and in the IA32 port. Note that now SelectDiv depends on SelectLong, and that some work was moved from SelectLong to SelectDiv. --- ia32/Asmgen.v | 11 +++++++++++ ia32/Asmgenproof.v | 7 +++++++ ia32/Asmgenproof1.v | 41 +++++++++++++++++++++++++++++++++++++++ ia32/ConstpropOp.vp | 9 +++++++++ ia32/ConstpropOpproof.v | 17 +++++++++++++++++ ia32/Machregs.v | 3 +++ ia32/NeedOp.v | 1 + ia32/Op.v | 5 +++++ ia32/PrintOp.ml | 1 + ia32/SelectLong.vp | 51 ++++++++++++------------------------------------- ia32/SelectLongproof.v | 38 ++++++++++++++++++++++++------------ ia32/ValueAOp.v | 1 + 12 files changed, 134 insertions(+), 51 deletions(-) (limited to 'ia32') diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v index 4662f964..ccf2e6fd 100644 --- a/ia32/Asmgen.v +++ b/ia32/Asmgen.v @@ -53,6 +53,13 @@ Definition mk_shrximm (n: int) (k: code) : res code := Pcmov Cond_l RAX RCX :: Psarl_ri RAX n :: k). +Definition mk_shrxlimm (n: int) (k: code) : res code := + OK (if Int.eq n Int.zero then Pmov_rr RAX RAX :: k else + Pcqto :: + Pshrq_ri RDX (Int.sub (Int.repr 64) n) :: + Pleaq RAX (Addrmode (Some RAX) (Some(RDX, 1)) (inl _ 0)) :: + Psarq_ri RAX n :: k). + Definition low_ireg (r: ireg) : bool := match r with RAX | RBX | RCX | RDX => true | _ => false end. @@ -501,6 +508,10 @@ Definition transl_op | Oshrlimm n, a1 :: nil => assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Psarq_ri r n :: k) + | Oshrxlimm n, a1 :: nil => + assertion (mreg_eq a1 AX); + assertion (mreg_eq res AX); + mk_shrxlimm n k | Oshrlu, a1 :: a2 :: nil => assertion (mreg_eq a1 res); assertion (mreg_eq a2 CX); diff --git a/ia32/Asmgenproof.v b/ia32/Asmgenproof.v index bf14f010..79602e52 100644 --- a/ia32/Asmgenproof.v +++ b/ia32/Asmgenproof.v @@ -132,6 +132,13 @@ Proof. Qed. Hint Resolve mk_shrximm_label: labels. +Remark mk_shrxlimm_label: + forall n k c, mk_shrxlimm n k = OK c -> tail_nolabel k c. +Proof. + intros. monadInv H. destruct (Int.eq n Int.zero); TailNoLabel. +Qed. +Hint Resolve mk_shrxlimm_label: labels. + Remark mk_intconv_label: forall f r1 r2 k c, mk_intconv f r1 r2 k = OK c -> (forall r r', nolabel (f r r')) -> diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v index fa75e7e7..99d0680d 100644 --- a/ia32/Asmgenproof1.v +++ b/ia32/Asmgenproof1.v @@ -230,6 +230,45 @@ Proof. unfold compare_ints. Simplifs. Qed. +(** Smart constructor for [shrxl] *) + +Lemma mk_shrxlimm_correct: + forall n k c (rs1: regset) v m, + mk_shrxlimm n k = OK c -> + Val.shrxl (rs1#RAX) (Vint n) = Some v -> + exists rs2, + exec_straight ge fn c rs1 m k rs2 m + /\ rs2#RAX = v + /\ forall r, data_preg r = true -> r <> RAX -> r <> RDX -> rs2#r = rs1#r. +Proof. + unfold mk_shrxlimm; intros. exploit Val.shrxl_shrl_2; eauto. intros EQ. + destruct (Int.eq n Int.zero); inv H. +- econstructor; split. apply exec_straight_one. simpl; reflexivity. auto. + split. Simplifs. intros; Simplifs. +- set (v1 := Val.shrl (rs1 RAX) (Vint (Int.repr 63))) in *. + set (v2 := Val.shrlu v1 (Vint (Int.sub (Int.repr 64) n))) in *. + set (v3 := Val.addl (rs1 RAX) v2) in *. + set (v4 := Val.shrl v3 (Vint n)) in *. + set (rs2 := nextinstr_nf (rs1#RDX <- v1)). + set (rs3 := nextinstr_nf (rs2#RDX <- v2)). + set (rs4 := nextinstr (rs3#RAX <- v3)). + set (rs5 := nextinstr_nf (rs4#RAX <- v4)). + assert (X: forall v1 v2, + Val.addl v1 (Val.addl v2 (Vlong Int64.zero)) = Val.addl v1 v2). + { intros. destruct v0; simpl; auto; destruct v5; simpl; auto. + rewrite Int64.add_zero; auto. + destruct Archi.ptr64; auto. rewrite Ptrofs.add_zero; auto. + destruct Archi.ptr64; auto. rewrite Int64.add_zero; auto. + destruct Archi.ptr64; auto. } + exists rs5; split. + eapply exec_straight_trans with (rs2 := rs3). + eapply exec_straight_two with (rs2 := rs2); reflexivity. + eapply exec_straight_two with (rs2 := rs4). + simpl. rewrite X. reflexivity. reflexivity. reflexivity. reflexivity. + split. unfold rs5; Simplifs. + intros. unfold rs5; Simplifs. unfold rs4; Simplifs. unfold rs3; Simplifs. unfold rs2; Simplifs. +Qed. + (** Smart constructor for integer conversions *) Lemma mk_intconv_correct: @@ -1321,6 +1360,8 @@ Transparent destroyed_by_op. rewrite D. reflexivity. auto. auto. split. change (Vlong r = v). congruence. simpl; intros. destruct H2. unfold rs1; Simplifs. +(* shrxlimm *) + apply SAME. eapply mk_shrxlimm_correct; eauto. (* leal *) exploit transl_addressing_mode_64_correct; eauto. intros EA. generalize (normalize_addrmode_64_correct x rs). destruct (normalize_addrmode_64 x) as [am' [delta|]]; intros EV. diff --git a/ia32/ConstpropOp.vp b/ia32/ConstpropOp.vp index c35d3def..0bf143d2 100644 --- a/ia32/ConstpropOp.vp +++ b/ia32/ConstpropOp.vp @@ -311,6 +311,14 @@ Definition make_xorlimm (n: int64) (r: reg) := else if Int64.eq n Int64.mone then (Onotl, r :: nil) else (Oxorlimm n, r :: nil). +Definition make_divlimm n (r1 r2: reg) := + match Int64.is_power2' n with + | Some l => if Int.ltu l (Int.repr 63) + then (Oshrxlimm l, r1 :: nil) + else (Odivl, r1 :: r2 :: nil) + | None => (Odivl, r1 :: r2 :: nil) + end. + Definition make_divluimm n (r1 r2: reg) := match Int64.is_power2' n with | Some l => (Oshrluimm l, r1 :: nil) @@ -371,6 +379,7 @@ Nondetfunction op_strength_reduction | Osubl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_addlimm (Int64.neg n2) r1 | Omull, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_mullimm n1 r2 | Omull, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_mullimm n2 r1 + | Odivl, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divlimm n2 r1 r2 | Odivlu, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_divluimm n2 r1 r2 | Omodlu, r1 :: r2 :: nil, v1 :: L n2 :: nil => make_modluimm n2 r1 r2 | Oandl, r1 :: r2 :: nil, L n1 :: v2 :: nil => make_andlimm n1 r2 v2 diff --git a/ia32/ConstpropOpproof.v b/ia32/ConstpropOpproof.v index 4175d2f9..161b9579 100644 --- a/ia32/ConstpropOpproof.v +++ b/ia32/ConstpropOpproof.v @@ -578,6 +578,20 @@ Proof. econstructor; split; eauto. auto. Qed. +Lemma make_divlimm_correct: + forall n r1 r2 v, + Val.divls e#r1 e#r2 = Some v -> + e#r2 = Vlong n -> + let (op, args) := make_divlimm n r1 r2 in + exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. +Proof. + intros; unfold make_divlimm. + destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?. + rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto. + exists v; auto. + exists v; auto. +Qed. + Lemma make_divluimm_correct: forall n r1 r2 v, Val.divlu e#r1 e#r2 = Some v -> @@ -821,6 +835,9 @@ Proof. (* mull *) rewrite Val.mull_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto. +(* divl *) + assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. + apply make_divlimm_correct; auto. (* divlu *) assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. apply make_divluimm_correct; auto. diff --git a/ia32/Machregs.v b/ia32/Machregs.v index 34d88328..a9383d18 100644 --- a/ia32/Machregs.v +++ b/ia32/Machregs.v @@ -137,6 +137,7 @@ Definition destroyed_by_op (op: operation): list mreg := | Odivlu => AX :: DX :: nil | Omodl => AX :: DX :: nil | Omodlu => AX :: DX :: nil + | Oshrxlimm _ => DX :: nil | Ocmp _ => AX :: CX :: nil | _ => nil end. @@ -217,6 +218,7 @@ Definition mregs_for_operation (op: operation): list (option mreg) * option mreg | Oshll => (None :: Some CX :: nil, None) | Oshrl => (None :: Some CX :: nil, None) | Oshrlu => (None :: Some CX :: nil, None) + | Oshrxlimm _ => (Some AX :: nil, Some AX) | _ => (nil, None) end. @@ -313,6 +315,7 @@ Definition two_address_op (op: operation) : bool := | Oshllimm _ => true | Oshrl => true | Oshrlimm _ => true + | Oshrxlimm _ => false | Oshrlu => true | Oshrluimm _ => true | Ororlimm _ => true diff --git a/ia32/NeedOp.v b/ia32/NeedOp.v index 9a75cba8..575532b1 100644 --- a/ia32/NeedOp.v +++ b/ia32/NeedOp.v @@ -110,6 +110,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval := | Oshllimm _ => op1 (default nv) | Oshrl => op2 (default nv) | Oshrlimm _ => op1 (default nv) + | Oshrxlimm n => op1 (default nv) | Oshrlu => op2 (default nv) | Oshrluimm _ => op1 (default nv) | Ororlimm _ => op1 (default nv) diff --git a/ia32/Op.v b/ia32/Op.v index ed96c132..1d0e8472 100644 --- a/ia32/Op.v +++ b/ia32/Op.v @@ -135,6 +135,7 @@ Inductive operation : Type := | Oshllimm (n: int) (**r [rd = r1 << n] *) | Oshrl (**r [rd = r1 >> r2] (signed) *) | Oshrlimm (n: int) (**r [rd = r1 >> n] (signed) *) + | Oshrxlimm (n: int) (**r [rd = r1 / 2^n] (signed) *) | Oshrlu (**r [rd = r1 >> r2] (unsigned) *) | Oshrluimm (n: int) (**r [rd = r1 >> n] (unsigned) *) | Ororlimm (n: int) (**r rotate right immediate *) @@ -353,6 +354,7 @@ Definition eval_operation | Oshllimm n, v1::nil => Some (Val.shll v1 (Vint n)) | Oshrl, v1::v2::nil => Some (Val.shrl v1 v2) | Oshrlimm n, v1::nil => Some (Val.shrl v1 (Vint n)) + | Oshrxlimm n, v1::nil => Val.shrxl v1 (Vint n) | Oshrlu, v1::v2::nil => Some (Val.shrlu v1 v2) | Oshrluimm n, v1::nil => Some (Val.shrlu v1 (Vint n)) | Ororlimm n, v1::nil => Some (Val.rorl v1 (Vint n)) @@ -521,6 +523,7 @@ Definition type_of_operation (op: operation) : list typ * typ := | Oshllimm _ => (Tlong :: nil, Tlong) | Oshrl => (Tlong :: Tint :: nil, Tlong) | Oshrlimm _ => (Tlong :: nil, Tlong) + | Oshrxlimm _ => (Tlong :: nil, Tlong) | Oshrlu => (Tlong :: Tint :: nil, Tlong) | Oshrluimm _ => (Tlong :: nil, Tlong) | Ororlimm _ => (Tlong :: nil, Tlong) @@ -680,6 +683,7 @@ Proof with (try exact I). destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')... destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')... + destruct v0; inv H0. destruct (Int.ltu n (Int.repr 63)); inv H2... destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... destruct v0; simpl... destruct (Int.ltu n Int64.iwordsize')... destruct v0... @@ -1225,6 +1229,7 @@ Proof. inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto. inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto. inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto. + inv H4; simpl in H1; try discriminate. simpl. destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists. inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto. inv H4; simpl; auto. destruct (Int.ltu n Int64.iwordsize'); auto. inv H4; simpl; auto. diff --git a/ia32/PrintOp.ml b/ia32/PrintOp.ml index 42c8b3e5..b6147197 100644 --- a/ia32/PrintOp.ml +++ b/ia32/PrintOp.ml @@ -134,6 +134,7 @@ let print_operation reg pp = function | Oshllimm n, [r1] -> fprintf pp "%a < fprintf pp "%a >>ls %a" reg r1 reg r2 | Oshrlimm n, [r1] -> fprintf pp "%a >>ls %ld" reg r1 (camlint_of_coqint n) + | Oshrxlimm n, [r1] -> fprintf pp "%a >>lx %ld" reg r1 (camlint_of_coqint n) | Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2 | Oshrluimm n, [r1] -> fprintf pp "%a >>lu %ld" reg r1 (camlint_of_coqint n) | Ororlimm n, [r1] -> fprintf pp "%a rorl %ld" reg r1 (camlint_of_coqint n) diff --git a/ia32/SelectLong.vp b/ia32/SelectLong.vp index c28777e8..32fd9aca 100644 --- a/ia32/SelectLong.vp +++ b/ia32/SelectLong.vp @@ -286,45 +286,18 @@ Nondetfunction mull (e1: expr) (e2: expr) := | _, _ => Eop Omull (e1:::e2:::Enil) end. -Definition divl (e1: expr) (e2: expr) := - if Archi.splitlong then SplitLong.divl e1 e2 else - match is_longconst e1, is_longconst e2 with - | Some n1, Some n2 => longconst (Int64.divs n1 n2) - | _, _ => Eop Odivl (e1:::e2:::Enil) - end. - -Definition modl (e1: expr) (e2: expr) := - if Archi.splitlong then SplitLong.modl e1 e2 else - match is_longconst e1, is_longconst e2 with - | Some n1, Some n2 => longconst (Int64.mods n1 n2) - | _, _ => Eop Omodl (e1:::e2:::Enil) - end. - -Definition divlu (e1 e2: expr) := - if Archi.splitlong then SplitLong.divlu e1 e2 else - let default := Eop Odivlu (e1:::e2:::Enil) in - match is_longconst e1, is_longconst e2 with - | Some n1, Some n2 => longconst (Int64.divu n1 n2) - | _, Some n2 => - match Int64.is_power2 n2 with - | Some l => shrluimm e1 (Int.repr (Int64.unsigned l)) - | None => default - end - | _, _ => default - end. - -Definition modlu (e1 e2: expr) := - if Archi.splitlong then SplitLong.modlu e1 e2 else - let default := Eop Omodlu (e1:::e2:::Enil) in - match is_longconst e1, is_longconst e2 with - | Some n1, Some n2 => longconst (Int64.modu n1 n2) - | _, Some n2 => - match Int64.is_power2 n2 with - | Some l => andl e1 (longconst (Int64.sub n2 Int64.one)) - | None => default - end - | _, _ => default - end. +Definition shrxlimm (e: expr) (n: int) := + if Archi.splitlong then SplitLong.shrxlimm e n else + if Int.eq n Int.zero then e else Eop (Oshrxlimm n) (e ::: Enil). + +Definition divlu_base (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.divlu_base e1 e2 else Eop Odivlu (e1:::e2:::Enil). +Definition modlu_base (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.modlu_base e1 e2 else Eop Omodu (e1:::e2:::Enil). +Definition divls_base (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.divls_base e1 e2 else Eop Odivl (e1:::e2:::Enil). +Definition modls_base (e1: expr) (e2: expr) := + if Archi.splitlong then SplitLong.modls_base e1 e2 else Eop Omodl (e1:::e2:::Enil). Definition cmplu (c: comparison) (e1 e2: expr) := if Archi.splitlong then SplitLong.cmplu c e1 e2 else diff --git a/ia32/SelectLongproof.v b/ia32/SelectLongproof.v index 634da83a..db3dd835 100644 --- a/ia32/SelectLongproof.v +++ b/ia32/SelectLongproof.v @@ -66,24 +66,24 @@ Proof. EvalOp. Qed. -Lemma is_longconst_sound: +Lemma is_longconst_sound_1: forall a n, is_longconst a = Some n -> a = Eop (Olongconst n) Enil. Proof with (try discriminate). unfold is_longconst; intros. destruct a... destruct o... destruct e0... congruence. Qed. -Lemma is_longconst_inv: +Lemma is_longconst_sound: forall v a n le, is_longconst a = Some n -> eval_expr ge sp e m le a v -> v = Vlong n. Proof. - intros. rewrite (is_longconst_sound _ _ H) in H0. InvEval. auto. + intros. rewrite (is_longconst_sound_1 _ _ H) in H0. InvEval. auto. Qed. Theorem eval_intoflong: unary_constructor_sound intoflong Val.loword. Proof. unfold intoflong; destruct Archi.splitlong. apply SplitLongproof.eval_intoflong. red; intros. destruct (is_longconst a) as [n|] eqn:C. -- TrivialExists. simpl. erewrite (is_longconst_inv x) by eauto. auto. +- TrivialExists. simpl. erewrite (is_longconst_sound x) by eauto. auto. - TrivialExists. Qed. @@ -235,16 +235,30 @@ Admitted. Theorem eval_mull: binary_constructor_sound mull Val.mull. Admitted. -Theorem eval_divl: partial_binary_constructor_sound divl Val.divls. +Theorem eval_shrxlimm: + forall le a n x z, + eval_expr ge sp e m le a x -> + Val.shrxl x (Vint n) = Some z -> + exists v, eval_expr ge sp e m le (shrxlimm a n) v /\ Val.lessdef z v. +Proof. + unfold shrxlimm; intros. destruct Archi.splitlong eqn:SL. ++ eapply SplitLongproof.eval_shrxlimm; eauto. apply Archi.splitlong_ptr32; auto. ++ predSpec Int.eq Int.eq_spec n Int.zero. +- subst n. destruct x; simpl in H0; inv H0. econstructor; split; eauto. + change (Int.ltu Int.zero (Int.repr 63)) with true. simpl. rewrite Int64.shrx'_zero; auto. +- TrivialExists. +Qed. + +Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls. Admitted. -Theorem eval_modl: partial_binary_constructor_sound modl Val.modls. +Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls. Admitted. -Theorem eval_divlu: partial_binary_constructor_sound divlu Val.divlu. +Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu. Admitted. -Theorem eval_modlu: partial_binary_constructor_sound modlu Val.modlu. +Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu. Admitted. Theorem eval_cmplu: @@ -259,8 +273,8 @@ Proof. unfold Val.cmplu in H1. destruct (Val.cmplu_bool (Mem.valid_pointer m) c x y) as [vb|] eqn:C; simpl in H1; inv H1. destruct (is_longconst a) as [n1|] eqn:LC1; destruct (is_longconst b) as [n2|] eqn:LC2; - try (assert (x = Vlong n1) by (eapply is_longconst_inv; eauto)); - try (assert (y = Vlong n2) by (eapply is_longconst_inv; eauto)); + try (assert (x = Vlong n1) by (eapply is_longconst_sound; eauto)); + try (assert (y = Vlong n2) by (eapply is_longconst_sound; eauto)); subst. - simpl in C; inv C. EvalOp. destruct (Int64.cmpu c n1 n2); reflexivity. - EvalOp. simpl. rewrite Val.swap_cmplu_bool. rewrite C; auto. @@ -280,8 +294,8 @@ Proof. unfold Val.cmpl in H1. destruct (Val.cmpl_bool c x y) as [vb|] eqn:C; simpl in H1; inv H1. destruct (is_longconst a) as [n1|] eqn:LC1; destruct (is_longconst b) as [n2|] eqn:LC2; - try (assert (x = Vlong n1) by (eapply is_longconst_inv; eauto)); - try (assert (y = Vlong n2) by (eapply is_longconst_inv; eauto)); + try (assert (x = Vlong n1) by (eapply is_longconst_sound; eauto)); + try (assert (y = Vlong n2) by (eapply is_longconst_sound; eauto)); subst. - simpl in C; inv C. EvalOp. destruct (Int64.cmp c n1 n2); reflexivity. - EvalOp. simpl. rewrite Val.swap_cmpl_bool. rewrite C; auto. diff --git a/ia32/ValueAOp.v b/ia32/ValueAOp.v index ce33341e..c8b3278e 100644 --- a/ia32/ValueAOp.v +++ b/ia32/ValueAOp.v @@ -132,6 +132,7 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Oshllimm n, v1::nil => shll v1 (I n) | Oshrl, v1::v2::nil => shrl v1 v2 | Oshrlimm n, v1::nil => shrl v1 (I n) + | Oshrxlimm n, v1::nil => shrxl v1 (I n) | Oshrlu, v1::v2::nil => shrlu v1 v2 | Oshrluimm n, v1::nil => shrlu v1 (I n) | Ororlimm n, v1::nil => rorl v1 (I n) -- cgit