From d2af79a77ed2936ff0ed90cadf8e48637d774d4c Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Tue, 4 Oct 2016 15:52:16 +0200 Subject: Turn 64-bit integer division and modulus by constants into multiply-high This trick was already implemented for 32-bit integer division and modulus. Here we extend it to the 64-bit case. For 32-bit target processors, the runtime library must implement 64-bit multiply-high (signed and unsigned). Tentative implementations are provided for IA32 and PowerPC, but need testing. --- ia32/Asm.v | 10 ++++++++-- ia32/Asmgen.v | 8 ++++++++ ia32/Asmgenproof1.v | 4 ++++ ia32/Machregs.v | 6 ++++++ ia32/NeedOp.v | 5 +---- ia32/Op.v | 10 ++++++++++ ia32/PrintOp.ml | 6 ++++-- ia32/SelectLong.vp | 8 ++++++++ ia32/SelectLongproof.v | 14 ++++++++++++++ ia32/TargetPrinter.ml | 4 ++++ ia32/ValueAOp.v | 2 ++ 11 files changed, 69 insertions(+), 8 deletions(-) (limited to 'ia32') diff --git a/ia32/Asm.v b/ia32/Asm.v index 01ecb15a..9d4036ff 100644 --- a/ia32/Asm.v +++ b/ia32/Asm.v @@ -164,9 +164,9 @@ Inductive instruction: Type := | Pimull_ri (rd: ireg) (n: int) | Pimulq_ri (rd: ireg) (n: int64) | Pimull_r (r1: ireg) -(* | Pimulq_r (r1: ireg) *) + | Pimulq_r (r1: ireg) | Pmull_r (r1: ireg) -(* | Pmulq_r (r1: ireg) *) + | Pmulq_r (r1: ireg) | Pcltd | Pcqto | Pdivl (r1: ireg) @@ -718,9 +718,15 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pimull_r r1 => Next (nextinstr_nf (rs#RAX <- (Val.mul rs#RAX rs#r1) #RDX <- (Val.mulhs rs#RAX rs#r1))) m + | Pimulq_r r1 => + Next (nextinstr_nf (rs#RAX <- (Val.mull rs#RAX rs#r1) + #RDX <- (Val.mullhs rs#RAX rs#r1))) m | Pmull_r r1 => Next (nextinstr_nf (rs#RAX <- (Val.mul rs#RAX rs#r1) #RDX <- (Val.mulhu rs#RAX rs#r1))) m + | Pmulq_r r1 => + Next (nextinstr_nf (rs#RAX <- (Val.mull rs#RAX rs#r1) + #RDX <- (Val.mullhu rs#RAX rs#r1))) m | Pcltd => Next (nextinstr_nf (rs#RDX <- (Val.shr rs#RAX (Vint (Int.repr 31))))) m | Pcqto => diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v index ccf2e6fd..bb26d507 100644 --- a/ia32/Asmgen.v +++ b/ia32/Asmgen.v @@ -453,6 +453,14 @@ Definition transl_op | Omullimm n, a1 :: nil => assertion (mreg_eq a1 res); do r <- ireg_of res; OK (Pimulq_ri r n :: k) + | Omullhs, a1 :: a2 :: nil => + assertion (mreg_eq a1 AX); + assertion (mreg_eq res DX); + do r2 <- ireg_of a2; OK (Pimulq_r r2 :: k) + | Omullhu, a1 :: a2 :: nil => + assertion (mreg_eq a1 AX); + assertion (mreg_eq res DX); + do r2 <- ireg_of a2; OK (Pmulq_r r2 :: k) | Odivl, a1 :: a2 :: nil => assertion (mreg_eq a1 AX); assertion (mreg_eq a2 CX); diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v index 4effe7c9..05b3176a 100644 --- a/ia32/Asmgenproof1.v +++ b/ia32/Asmgenproof1.v @@ -1312,6 +1312,10 @@ Transparent destroyed_by_op. (* lea *) exploit transl_addressing_mode_32_correct; eauto. intros EA. TranslOp. rewrite nextinstr_inv; auto with asmgen. rewrite Pregmap.gss. rewrite normalize_addrmode_32_correct; auto. +(* mullhs *) + apply SAME. TranslOp. destruct H1. Simplifs. +(* mullhu *) + apply SAME. TranslOp. destruct H1. Simplifs. (* divl *) apply SAME. exploit (divls_modls_exists (rs RAX) (rs RCX)). left; congruence. diff --git a/ia32/Machregs.v b/ia32/Machregs.v index a9383d18..034fa4bb 100644 --- a/ia32/Machregs.v +++ b/ia32/Machregs.v @@ -133,6 +133,8 @@ Definition destroyed_by_op (op: operation): list mreg := | Omod => AX :: DX :: nil | Omodu => AX :: DX :: nil | Oshrximm _ => CX :: nil + | Omullhs => AX :: DX :: nil + | Omullhu => AX :: DX :: nil | Odivl => AX :: DX :: nil | Odivlu => AX :: DX :: nil | Omodl => AX :: DX :: nil @@ -211,6 +213,8 @@ Definition mregs_for_operation (op: operation): list (option mreg) * option mreg | Oshr => (None :: Some CX :: nil, None) | Oshru => (None :: Some CX :: nil, None) | Oshrximm _ => (Some AX :: nil, Some AX) + | Omullhs => (Some AX :: None :: nil, Some DX) + | Omullhu => (Some AX :: None :: nil, Some DX) | Odivl => (Some AX :: Some CX :: nil, Some AX) | Odivlu => (Some AX :: Some CX :: nil, Some AX) | Omodl => (Some AX :: Some CX :: nil, Some DX) @@ -300,6 +304,8 @@ Definition two_address_op (op: operation) : bool := | Osubl => true | Omull => true | Omullimm _ => true + | Omullhs => false + | Omullhu => false | Odivl => false | Odivlu => false | Omodl => false diff --git a/ia32/NeedOp.v b/ia32/NeedOp.v index 575532b1..09013cdd 100644 --- a/ia32/NeedOp.v +++ b/ia32/NeedOp.v @@ -95,10 +95,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval := | Osubl => op2 (default nv) | Omull => op2 (default nv) | Omullimm _ => op1 (default nv) - | Odivl => op2 (default nv) - | Odivlu => op2 (default nv) - | Omodl => op2 (default nv) - | Omodlu => op2 (default nv) + | Omullhs | Omullhu | Odivl | Odivlu | Omodl | Omodlu => op2 (default nv) | Oandl => op2 (default nv) | Oandlimm _ => op1 (default nv) | Oorl => op2 (default nv) diff --git a/ia32/Op.v b/ia32/Op.v index 1d0e8472..eb2fd110 100644 --- a/ia32/Op.v +++ b/ia32/Op.v @@ -120,6 +120,8 @@ Inductive operation : Type := | Osubl (**r [rd = r1 - r2] *) | Omull (**r [rd = r1 * r2] *) | Omullimm (n: int64) (**r [rd = r1 * n] *) + | Omullhs (**r [rd = high part of r1 * r2, signed] *) + | Omullhu (**r [rd = high part of r1 * r2, unsigned] *) | Odivl (**r [rd = r1 / r2] (signed) *) | Odivlu (**r [rd = r1 / r2] (unsigned) *) | Omodl (**r [rd = r1 % r2] (signed) *) @@ -339,6 +341,8 @@ Definition eval_operation | Osubl, v1::v2::nil => Some (Val.subl v1 v2) | Omull, v1::v2::nil => Some (Val.mull v1 v2) | Omullimm n, v1::nil => Some (Val.mull v1 (Vlong n)) + | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2) + | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2) | Odivl, v1::v2::nil => Val.divls v1 v2 | Odivlu, v1::v2::nil => Val.divlu v1 v2 | Omodl, v1::v2::nil => Val.modls v1 v2 @@ -508,6 +512,8 @@ Definition type_of_operation (op: operation) : list typ * typ := | Osubl => (Tlong :: Tlong :: nil, Tlong) | Omull => (Tlong :: Tlong :: nil, Tlong) | Omullimm _ => (Tlong :: nil, Tlong) + | Omullhs => (Tlong :: Tlong :: nil, Tlong) + | Omullhu => (Tlong :: Tlong :: nil, Tlong) | Odivl => (Tlong :: Tlong :: nil, Tlong) | Odivlu => (Tlong :: Tlong :: nil, Tlong) | Omodl => (Tlong :: Tlong :: nil, Tlong) @@ -666,6 +672,8 @@ Proof with (try exact I). unfold Val.has_type; destruct Archi.ptr64; simpl; auto. destruct (eq_block b b0); auto. destruct v0; destruct v1... destruct v0... + destruct v0; destruct v1... + destruct v0; destruct v1... destruct v0; destruct v1; simpl in *; inv H0. destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2... destruct v0; destruct v1; simpl in *; inv H0. destruct (Int64.eq i0 Int64.zero); inv H2... @@ -1210,6 +1218,8 @@ Proof. apply Val.subl_inject; auto. inv H4; inv H2; simpl; auto. inv H4; simpl; auto. + inv H4; inv H2; simpl; auto. + inv H4; inv H2; simpl; auto. inv H4; inv H3; simpl in H1; inv H1. simpl. destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2. TrivialExists. inv H4; inv H3; simpl in H1; inv H1. simpl. diff --git a/ia32/PrintOp.ml b/ia32/PrintOp.ml index b6147197..faa5bb5f 100644 --- a/ia32/PrintOp.ml +++ b/ia32/PrintOp.ml @@ -109,7 +109,7 @@ let print_operation reg pp = function | Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n) | Ororimm n, [r1] -> fprintf pp "%a ror %ld" reg r1 (camlint_of_coqint n) | Oshldimm n, [r1;r2] -> fprintf pp "(%a, %a) << %ld" reg r1 reg r2 (camlint_of_coqint n) - | Olea addr, args -> print_addressing reg pp (addr, args); fprintf pp " (lea)" + | Olea addr, args -> print_addressing reg pp (addr, args); fprintf pp " (int)" | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2 | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1 | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1 @@ -119,6 +119,8 @@ let print_operation reg pp = function | Osubl, [r1;r2] -> fprintf pp "%a -l %a" reg r1 reg r2 | Omull, [r1;r2] -> fprintf pp "%a *l %a" reg r1 reg r2 | Omullimm n, [r1] -> fprintf pp "%a *l %Ld" reg r1 (camlint64_of_coqint n) + | Omullhs, [r1;r2] -> fprintf pp "mullhs(%a,%a)" reg r1 reg r2 + | Omullhu, [r1;r2] -> fprintf pp "mullhu(%a,%a)" reg r1 reg r2 | Odivl, [r1;r2] -> fprintf pp "%a /ls %a" reg r1 reg r2 | Odivlu, [r1;r2] -> fprintf pp "%a /lu %a" reg r1 reg r2 | Omodl, [r1;r2] -> fprintf pp "%a %%ls %a" reg r1 reg r2 @@ -138,7 +140,7 @@ let print_operation reg pp = function | Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2 | Oshrluimm n, [r1] -> fprintf pp "%a >>lu %ld" reg r1 (camlint_of_coqint n) | Ororlimm n, [r1] -> fprintf pp "%a rorl %ld" reg r1 (camlint_of_coqint n) - | Oleal addr, args -> print_addressing reg pp (addr, args); fprintf pp " (leal)" + | Oleal addr, args -> print_addressing reg pp (addr, args); fprintf pp " (long)" | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1 | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1 | Oaddf, [r1;r2] -> fprintf pp "%a +f %a" reg r1 reg r2 diff --git a/ia32/SelectLong.vp b/ia32/SelectLong.vp index 77fc4071..2869f823 100644 --- a/ia32/SelectLong.vp +++ b/ia32/SelectLong.vp @@ -285,6 +285,14 @@ Nondetfunction mull (e1: expr) (e2: expr) := | _, _ => Eop Omull (e1:::e2:::Enil) end. +Definition mullhu (e1: expr) (n2: int64) := + if Archi.splitlong then SplitLong.mullhu e1 n2 else + Eop Omullhu (e1 ::: longconst n2 ::: Enil). + +Definition mullhs (e1: expr) (n2: int64) := + if Archi.splitlong then SplitLong.mullhs e1 n2 else + Eop Omullhs (e1 ::: longconst n2 ::: Enil). + Definition shrxlimm (e: expr) (n: int) := if Archi.splitlong then SplitLong.shrxlimm e n else if Int.eq n Int.zero then e else Eop (Oshrxlimm n) (e ::: Enil). diff --git a/ia32/SelectLongproof.v b/ia32/SelectLongproof.v index 4cd15fd3..14b0bcce 100644 --- a/ia32/SelectLongproof.v +++ b/ia32/SelectLongproof.v @@ -428,6 +428,20 @@ Proof. - TrivialExists. Qed. +Theorem eval_mullhu: + forall n, unary_constructor_sound (fun a => mullhu a n) (fun v => Val.mullhu v (Vlong n)). +Proof. + unfold mullhu; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhu; auto. + red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto. +Qed. + +Theorem eval_mullhs: + forall n, unary_constructor_sound (fun a => mullhs a n) (fun v => Val.mullhs v (Vlong n)). +Proof. + unfold mullhs; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhs; auto. + red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto. +Qed. + Theorem eval_shrxlimm: forall le a n x z, eval_expr ge sp e m le a x -> diff --git a/ia32/TargetPrinter.ml b/ia32/TargetPrinter.ml index c3e70042..fbc219d1 100644 --- a/ia32/TargetPrinter.ml +++ b/ia32/TargetPrinter.ml @@ -438,8 +438,12 @@ module Target(System: SYSTEM):TARGET = fprintf oc " imulq %a, %a\n" intconst64 n ireg64 rd | Pimull_r(r1) -> fprintf oc " imull %a\n" ireg32 r1 + | Pimulq_r(r1) -> + fprintf oc " imulq %a\n" ireg64 r1 | Pmull_r(r1) -> fprintf oc " mull %a\n" ireg32 r1 + | Pmulq_r(r1) -> + fprintf oc " mulq %a\n" ireg64 r1 | Pcltd -> fprintf oc " cltd\n" | Pcqto -> diff --git a/ia32/ValueAOp.v b/ia32/ValueAOp.v index c8b3278e..98f0dbb1 100644 --- a/ia32/ValueAOp.v +++ b/ia32/ValueAOp.v @@ -117,6 +117,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Osubl, v1::v2::nil => subl v1 v2 | Omull, v1::v2::nil => mull v1 v2 | Omullimm n, v1::nil => mull v1 (L n) + | Omullhs, v1::v2::nil => mullhs v1 v2 + | Omullhu, v1::v2::nil => mullhu v1 v2 | Odivl, v1::v2::nil => divls v1 v2 | Odivlu, v1::v2::nil => divlu v1 v2 | Omodl, v1::v2::nil => modls v1 v2 -- cgit