Turn 64-bit integer division and modulus by constants into multiply-high

This trick was already implemented for 32-bit integer division and modulus. Here we extend it to the 64-bit case. For 32-bit target processors, the runtime library must implement 64-bit multiply-high (signed and unsigned). Tentative implementations are provided for IA32 and PowerPC, but need testing.
author: Xavier Leroy <xavier.leroy@inria.fr> 2016-10-04 15:52:16 +0200
committer: Xavier Leroy <xavier.leroy@inria.fr> 2016-10-04 15:52:16 +0200
commit: d2af79a77ed2936ff0ed90cadf8e48637d774d4c (patch)
tree: 09300943e12a98ae80598c79d455b31725271ead /ia32
parent: a44893028eb1dd434c68001234ad56d030205a8e (diff)
download: compcert-d2af79a77ed2936ff0ed90cadf8e48637d774d4c.tar.gz
compcert-d2af79a77ed2936ff0ed90cadf8e48637d774d4c.zip
11 files changed, 69 insertions, 8 deletions
diff --git a/ia32/Asm.v b/ia32/Asm.v
index 01ecb15a..9d4036ff 100644
--- a/ia32/Asm.v
+++ b/ia32/Asm.v
@@ -164,9 +164,9 @@ Inductive instruction: Type :=
   | Pimull_ri (rd: ireg) (n: int)
   | Pimulq_ri (rd: ireg) (n: int64)
   | Pimull_r (r1: ireg)
-(*  | Pimulq_r (r1: ireg) *)
+  | Pimulq_r (r1: ireg)
   | Pmull_r (r1: ireg)
-(*  | Pmulq_r (r1: ireg) *)
+  | Pmulq_r (r1: ireg)
   | Pcltd
   | Pcqto
   | Pdivl (r1: ireg)
@@ -718,9 +718,15 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pimull_r r1 =>
       Next (nextinstr_nf (rs#RAX <- (Val.mul rs#RAX rs#r1)
                             #RDX <- (Val.mulhs rs#RAX rs#r1))) m
+  | Pimulq_r r1 =>
+      Next (nextinstr_nf (rs#RAX <- (Val.mull rs#RAX rs#r1)
+                            #RDX <- (Val.mullhs rs#RAX rs#r1))) m
   | Pmull_r r1 =>
       Next (nextinstr_nf (rs#RAX <- (Val.mul rs#RAX rs#r1)
                             #RDX <- (Val.mulhu rs#RAX rs#r1))) m
+  | Pmulq_r r1 =>
+      Next (nextinstr_nf (rs#RAX <- (Val.mull rs#RAX rs#r1)
+                            #RDX <- (Val.mullhu rs#RAX rs#r1))) m
   | Pcltd =>
       Next (nextinstr_nf (rs#RDX <- (Val.shr rs#RAX (Vint (Int.repr 31))))) m
   | Pcqto =>
diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v
index ccf2e6fd..bb26d507 100644
--- a/ia32/Asmgen.v
+++ b/ia32/Asmgen.v
@@ -453,6 +453,14 @@ Definition transl_op
   | Omullimm n, a1 :: nil =>
       assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pimulq_ri r n :: k)
+  | Omullhs, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq res DX);
+      do r2 <- ireg_of a2; OK (Pimulq_r r2 :: k)
+  | Omullhu, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq res DX);
+      do r2 <- ireg_of a2; OK (Pmulq_r r2 :: k)
   | Odivl, a1 :: a2 :: nil =>
       assertion (mreg_eq a1 AX);
       assertion (mreg_eq a2 CX);
diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v
index 4effe7c9..05b3176a 100644
--- a/ia32/Asmgenproof1.v
+++ b/ia32/Asmgenproof1.v
@@ -1312,6 +1312,10 @@ Transparent destroyed_by_op.
 (* lea *)
   exploit transl_addressing_mode_32_correct; eauto. intros EA.
   TranslOp. rewrite nextinstr_inv; auto with asmgen. rewrite Pregmap.gss. rewrite normalize_addrmode_32_correct; auto.
+(* mullhs *)
+  apply SAME. TranslOp. destruct H1. Simplifs.
+(* mullhu *)
+  apply SAME. TranslOp. destruct H1. Simplifs.
 (* divl *)
   apply SAME.
   exploit (divls_modls_exists (rs RAX) (rs RCX)). left; congruence.
diff --git a/ia32/Machregs.v b/ia32/Machregs.v
index a9383d18..034fa4bb 100644
--- a/ia32/Machregs.v
+++ b/ia32/Machregs.v
@@ -133,6 +133,8 @@ Definition destroyed_by_op (op: operation): list mreg :=
   | Omod => AX :: DX :: nil
   | Omodu => AX :: DX :: nil
   | Oshrximm _ => CX :: nil
+  | Omullhs => AX :: DX :: nil
+  | Omullhu => AX :: DX :: nil
   | Odivl => AX :: DX :: nil
   | Odivlu => AX :: DX :: nil
   | Omodl => AX :: DX :: nil
@@ -211,6 +213,8 @@ Definition mregs_for_operation (op: operation): list (option mreg) * option mreg
   | Oshr => (None :: Some CX :: nil, None)
   | Oshru => (None :: Some CX :: nil, None)
   | Oshrximm _ => (Some AX :: nil, Some AX)
+  | Omullhs => (Some AX :: None :: nil, Some DX)
+  | Omullhu => (Some AX :: None :: nil, Some DX)
   | Odivl => (Some AX :: Some CX :: nil, Some AX)
   | Odivlu => (Some AX :: Some CX :: nil, Some AX)
   | Omodl => (Some AX :: Some CX :: nil, Some DX)
@@ -300,6 +304,8 @@ Definition two_address_op (op: operation) : bool :=
   | Osubl => true
   | Omull => true
   | Omullimm _ => true
+  | Omullhs => false
+  | Omullhu => false
   | Odivl => false
   | Odivlu => false
   | Omodl => false
diff --git a/ia32/NeedOp.v b/ia32/NeedOp.v
index 575532b1..09013cdd 100644
--- a/ia32/NeedOp.v
+++ b/ia32/NeedOp.v
@@ -95,10 +95,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Osubl => op2 (default nv)
   | Omull => op2 (default nv)
   | Omullimm _ => op1 (default nv)
-  | Odivl => op2 (default nv)
-  | Odivlu => op2 (default nv)
-  | Omodl => op2 (default nv)
-  | Omodlu => op2 (default nv)
+  | Omullhs | Omullhu | Odivl | Odivlu | Omodl | Omodlu => op2 (default nv)
   | Oandl => op2 (default nv)
   | Oandlimm _ => op1 (default nv)
   | Oorl => op2 (default nv)
diff --git a/ia32/Op.v b/ia32/Op.v
index 1d0e8472..eb2fd110 100644
--- a/ia32/Op.v
+++ b/ia32/Op.v
@@ -120,6 +120,8 @@ Inductive operation : Type :=
   | Osubl                    (**r [rd = r1 - r2] *)
   | Omull                    (**r [rd = r1 * r2] *)
   | Omullimm (n: int64)      (**r [rd = r1 * n] *)
+  | Omullhs                  (**r [rd = high part of r1 * r2, signed] *)
+  | Omullhu                  (**r [rd = high part of r1 * r2, unsigned] *)
   | Odivl                    (**r [rd = r1 / r2] (signed) *)
   | Odivlu                   (**r [rd = r1 / r2] (unsigned) *)
   | Omodl                    (**r [rd = r1 % r2] (signed) *)
@@ -339,6 +341,8 @@ Definition eval_operation
   | Osubl, v1::v2::nil => Some (Val.subl v1 v2)
   | Omull, v1::v2::nil => Some (Val.mull v1 v2)
   | Omullimm n, v1::nil => Some (Val.mull v1 (Vlong n))
+  | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2)
+  | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2)
   | Odivl, v1::v2::nil => Val.divls v1 v2
   | Odivlu, v1::v2::nil => Val.divlu v1 v2
   | Omodl, v1::v2::nil => Val.modls v1 v2
@@ -508,6 +512,8 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osubl => (Tlong :: Tlong :: nil, Tlong)
   | Omull => (Tlong :: Tlong :: nil, Tlong)
   | Omullimm _ => (Tlong :: nil, Tlong)
+  | Omullhs => (Tlong :: Tlong :: nil, Tlong)
+  | Omullhu => (Tlong :: Tlong :: nil, Tlong)
   | Odivl => (Tlong :: Tlong :: nil, Tlong)
   | Odivlu => (Tlong :: Tlong :: nil, Tlong)
   | Omodl => (Tlong :: Tlong :: nil, Tlong)
@@ -666,6 +672,8 @@ Proof with (try exact I).
     unfold Val.has_type; destruct Archi.ptr64; simpl; auto. destruct (eq_block b b0); auto.
   destruct v0; destruct v1...
   destruct v0...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
   destruct v0; destruct v1; simpl in *; inv H0.
     destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2...
   destruct v0; destruct v1; simpl in *; inv H0. destruct (Int64.eq i0 Int64.zero); inv H2...
@@ -1210,6 +1218,8 @@ Proof.
   apply Val.subl_inject; auto.
   inv H4; inv H2; simpl; auto.
   inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
   inv H4; inv H3; simpl in H1; inv H1. simpl.
     destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2. TrivialExists.
   inv H4; inv H3; simpl in H1; inv H1. simpl.
diff --git a/ia32/PrintOp.ml b/ia32/PrintOp.ml
index b6147197..faa5bb5f 100644
--- a/ia32/PrintOp.ml
+++ b/ia32/PrintOp.ml
@@ -109,7 +109,7 @@ let print_operation reg pp = function
   | Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n)
   | Ororimm n, [r1] -> fprintf pp "%a ror %ld" reg r1 (camlint_of_coqint n)
   | Oshldimm n, [r1;r2] -> fprintf pp "(%a, %a) << %ld" reg r1 reg r2 (camlint_of_coqint n)
-  | Olea addr, args -> print_addressing reg pp (addr, args); fprintf pp " (lea)"
+  | Olea addr, args -> print_addressing reg pp (addr, args); fprintf pp " (int)"
   | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
   | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
   | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
@@ -119,6 +119,8 @@ let print_operation reg pp = function
   | Osubl, [r1;r2] -> fprintf pp "%a -l %a" reg r1 reg r2
   | Omull, [r1;r2] -> fprintf pp "%a *l %a" reg r1 reg r2
   | Omullimm n, [r1] -> fprintf pp "%a *l %Ld" reg r1 (camlint64_of_coqint n)
+  | Omullhs, [r1;r2] -> fprintf pp "mullhs(%a,%a)" reg r1 reg r2
+  | Omullhu, [r1;r2] -> fprintf pp "mullhu(%a,%a)" reg r1 reg r2
   | Odivl, [r1;r2] -> fprintf pp "%a /ls %a" reg r1 reg r2
   | Odivlu, [r1;r2] -> fprintf pp "%a /lu %a" reg r1 reg r2
   | Omodl, [r1;r2] -> fprintf pp "%a %%ls %a" reg r1 reg r2
@@ -138,7 +140,7 @@ let print_operation reg pp = function
   | Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2
   | Oshrluimm n, [r1] -> fprintf pp "%a >>lu %ld" reg r1 (camlint_of_coqint n)
   | Ororlimm n, [r1] -> fprintf pp "%a rorl %ld" reg r1 (camlint_of_coqint n)
-  | Oleal addr, args -> print_addressing reg pp (addr, args); fprintf pp " (leal)"
+  | Oleal addr, args -> print_addressing reg pp (addr, args); fprintf pp " (long)"
   | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1
   | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1
   | Oaddf, [r1;r2] -> fprintf pp "%a +f %a" reg r1 reg r2
diff --git a/ia32/SelectLong.vp b/ia32/SelectLong.vp
index 77fc4071..2869f823 100644
--- a/ia32/SelectLong.vp
+++ b/ia32/SelectLong.vp
@@ -285,6 +285,14 @@ Nondetfunction mull (e1: expr) (e2: expr) :=
   | _, _ => Eop Omull (e1:::e2:::Enil)
   end.
 
+Definition mullhu (e1: expr) (n2: int64) :=
+  if Archi.splitlong then SplitLong.mullhu e1 n2 else
+  Eop Omullhu (e1 ::: longconst n2 ::: Enil).
+
+Definition mullhs (e1: expr) (n2: int64) :=
+  if Archi.splitlong then SplitLong.mullhs e1 n2 else
+  Eop Omullhs (e1 ::: longconst n2 ::: Enil).
+
 Definition shrxlimm (e: expr) (n: int) :=
   if Archi.splitlong then SplitLong.shrxlimm e n else
   if Int.eq n Int.zero then e else Eop (Oshrxlimm n) (e ::: Enil).
diff --git a/ia32/SelectLongproof.v b/ia32/SelectLongproof.v
index 4cd15fd3..14b0bcce 100644
--- a/ia32/SelectLongproof.v
+++ b/ia32/SelectLongproof.v
@@ -428,6 +428,20 @@ Proof.
 - TrivialExists.
 Qed.
 
+Theorem eval_mullhu: 
+  forall n, unary_constructor_sound (fun a => mullhu a n) (fun v => Val.mullhu v (Vlong n)).
+Proof.
+  unfold mullhu; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhu; auto.
+  red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto.
+Qed.
+
+Theorem eval_mullhs: 
+  forall n, unary_constructor_sound (fun a => mullhs a n) (fun v => Val.mullhs v (Vlong n)).
+Proof.
+  unfold mullhs; intros. destruct Archi.splitlong eqn:SL. apply SplitLongproof.eval_mullhs; auto.
+  red; intros. TrivialExists. constructor. eauto. constructor. apply eval_longconst. constructor. auto.
+Qed.
+
 Theorem eval_shrxlimm:
   forall le a n x z,
   eval_expr ge sp e m le a x ->
diff --git a/ia32/TargetPrinter.ml b/ia32/TargetPrinter.ml
index c3e70042..fbc219d1 100644
--- a/ia32/TargetPrinter.ml
+++ b/ia32/TargetPrinter.ml
@@ -438,8 +438,12 @@ module Target(System: SYSTEM):TARGET =
           fprintf oc "	imulq	%a, %a\n" intconst64 n ireg64 rd
       | Pimull_r(r1) ->
           fprintf oc "	imull	%a\n" ireg32 r1
+      | Pimulq_r(r1) ->
+          fprintf oc "	imulq	%a\n" ireg64 r1
       | Pmull_r(r1) ->
           fprintf oc "	mull	%a\n" ireg32 r1
+      | Pmulq_r(r1) ->
+          fprintf oc "	mulq	%a\n" ireg64 r1
       | Pcltd ->
           fprintf oc "	cltd\n"
       | Pcqto ->
diff --git a/ia32/ValueAOp.v b/ia32/ValueAOp.v
index c8b3278e..98f0dbb1 100644
--- a/ia32/ValueAOp.v
+++ b/ia32/ValueAOp.v
@@ -117,6 +117,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Osubl, v1::v2::nil => subl v1 v2
   | Omull, v1::v2::nil => mull v1 v2
   | Omullimm n, v1::nil => mull v1 (L n)
+  | Omullhs, v1::v2::nil => mullhs v1 v2
+  | Omullhu, v1::v2::nil => mullhu v1 v2
   | Odivl, v1::v2::nil => divls v1 v2
   | Odivlu, v1::v2::nil => divlu v1 v2
   | Omodl, v1::v2::nil => modls v1 v2
author	Xavier Leroy <xavier.leroy@inria.fr>	2016-10-04 15:52:16 +0200
committer	Xavier Leroy <xavier.leroy@inria.fr>	2016-10-04 15:52:16 +0200
commit	d2af79a77ed2936ff0ed90cadf8e48637d774d4c (patch)
tree	09300943e12a98ae80598c79d455b31725271ead /ia32
parent	a44893028eb1dd434c68001234ad56d030205a8e (diff)
download	compcert-d2af79a77ed2936ff0ed90cadf8e48637d774d4c.tar.gz compcert-d2af79a77ed2936ff0ed90cadf8e48637d774d4c.zip