IA32: model integer division and modulus closer to the machine

lib/Integers.v: define division-remainder of a double word by a word ia32/Asm.v: use it to give Pdiv and Pidiv their "true" semantics like in the processor; add Pcltd as an instruction ia32/*: adapt accordingly Additional benefit: Pcltd could be used for an alternate implementation of shrximm.
author: Xavier Leroy <xavier.leroy@inria.fr> 2016-09-18 10:29:11 +0200
committer: Xavier Leroy <xavier.leroy@inria.fr> 2016-09-18 10:29:11 +0200
commit: 99554c986d023d00192eb3d1fbfe1c0cc138596e (patch)
tree: af3509d66ed70784e3b1bd0126bbddb17fd59fb3
parent: 322f3c865341fdfd5d22ab885b2934a5213ddbaa (diff)
download: compcert-kvx-99554c986d023d00192eb3d1fbfe1c0cc138596e.tar.gz
compcert-kvx-99554c986d023d00192eb3d1fbfe1c0cc138596e.zip
5 files changed, 187 insertions, 50 deletions
diff --git a/ia32/Asm.v b/ia32/Asm.v
index 12d164a6..b4fc950b 100644
--- a/ia32/Asm.v
+++ b/ia32/Asm.v
@@ -151,6 +151,7 @@ Inductive instruction: Type :=
   | Pimul_ri (rd: ireg) (n: int)
   | Pimul_r (r1: ireg)
   | Pmul_r (r1: ireg)
+  | Pcltd
   | Pdiv (r1: ireg)
   | Pidiv (r1: ireg)
   | Pand_rr (rd: ireg) (r1: ireg)
@@ -618,17 +619,25 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pmul_r r1 =>
       Next (nextinstr_nf (rs#EAX <- (Val.mul rs#EAX rs#r1)
                             #EDX <- (Val.mulhu rs#EAX rs#r1))) m
+  | Pcltd =>
+      Next (nextinstr_nf (rs#EDX <- (Val.shr rs#EAX (Vint (Int.repr 31))))) m
   | Pdiv r1 =>
-      let vn := rs#EAX in let vd := (rs#EDX <- Vundef)#r1 in
-      match Val.divu vn vd, Val.modu vn vd with
-      | Some vq, Some vr => Next (nextinstr_nf (rs#EAX <- vq #EDX <- vr)) m
-      | _, _ => Stuck
+      match rs#EDX, rs#EAX, rs#r1 with
+      | Vint nh, Vint nl, Vint d =>
+          match Int.divmodu2 nh nl d with
+          | Some(q, r) => Next (nextinstr_nf (rs#EAX <- (Vint q) #EDX <- (Vint r))) m
+          | None => Stuck
+          end
+      | _, _, _ => Stuck
       end
   | Pidiv r1 =>
-      let vn := rs#EAX in let vd := (rs#EDX <- Vundef)#r1 in
-      match Val.divs vn vd, Val.mods vn vd with
-      | Some vq, Some vr => Next (nextinstr_nf (rs#EAX <- vq #EDX <- vr)) m
-      | _, _ => Stuck
+      match rs#EDX, rs#EAX, rs#r1 with
+      | Vint nh, Vint nl, Vint d =>
+          match Int.divmods2 nh nl d with
+          | Some(q, r) => Next (nextinstr_nf (rs#EAX <- (Vint q) #EDX <- (Vint r))) m
+          | None => Stuck
+          end
+      | _, _, _ => Stuck
       end
   | Pand_rr rd r1 =>
       Next (nextinstr_nf (rs#rd <- (Val.and rs#rd rs#r1))) m
diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v
index fd0d5bc5..1d718c26 100644
--- a/ia32/Asmgen.v
+++ b/ia32/Asmgen.v
@@ -315,22 +315,22 @@ Definition transl_op
       assertion (mreg_eq a1 AX);
       assertion (mreg_eq a2 CX);
       assertion (mreg_eq res AX);
-      OK(Pidiv ECX :: k)
+      OK(Pcltd :: Pidiv ECX :: k)
   | Odivu, a1 :: a2 :: nil =>
       assertion (mreg_eq a1 AX);
       assertion (mreg_eq a2 CX);
       assertion (mreg_eq res AX);
-      OK(Pdiv ECX :: k)
+      OK(Pxor_r EDX :: Pdiv ECX :: k)
   | Omod, a1 :: a2 :: nil =>
       assertion (mreg_eq a1 AX);
       assertion (mreg_eq a2 CX);
       assertion (mreg_eq res DX);
-      OK(Pidiv ECX :: k)
+      OK(Pcltd :: Pidiv ECX :: k)
   | Omodu, a1 :: a2 :: nil =>
       assertion (mreg_eq a1 AX);
       assertion (mreg_eq a2 CX);
       assertion (mreg_eq res DX);
-      OK(Pdiv ECX :: k)
+      OK(Pxor_r EDX :: Pdiv ECX :: k)
   | Oand, a1 :: a2 :: nil =>
       assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pand_rr r r2 :: k)
diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v
index 60cc266e..9703d419 100644
--- a/ia32/Asmgenproof1.v
+++ b/ia32/Asmgenproof1.v
@@ -121,28 +121,44 @@ Qed.
 
 (** Properties of division *)
 
-Remark divs_mods_exist:
+Lemma divu_modu_exists:
   forall v1 v2,
-  match Val.divs v1 v2, Val.mods v1 v2 with
-  | Some _, Some _ => True
-  | None, None => True
-  | _, _ => False
-  end.
+  Val.divu v1 v2 <> None \/ Val.modu v1 v2 <> None ->
+  exists n d q r,
+     v1 = Vint n /\ v2 = Vint d
+  /\ Int.divmodu2 Int.zero n d = Some(q, r)
+  /\ Val.divu v1 v2 = Some (Vint q) /\ Val.modu v1 v2 = Some (Vint r).
 Proof.
-  intros. unfold Val.divs, Val.mods. destruct v1; auto. destruct v2; auto.
-  destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); auto.
+  intros v1 v2; unfold Val.divu, Val.modu.
+  destruct v1; try (intuition discriminate).
+  destruct v2; try (intuition discriminate).
+  predSpec Int.eq Int.eq_spec i0 Int.zero ; try (intuition discriminate).
+  intros _. exists i, i0, (Int.divu i i0), (Int.modu i i0); intuition auto.
+  apply Int.divmodu2_divu_modu; auto.
 Qed.
 
-Remark divu_modu_exist:
+Lemma divs_mods_exists:
   forall v1 v2,
-  match Val.divu v1 v2, Val.modu v1 v2 with
-  | Some _, Some _ => True
-  | None, None => True
-  | _, _ => False
-  end.
+  Val.divs v1 v2 <> None \/ Val.mods v1 v2 <> None ->
+  exists nh nl d q r,
+     Val.shr v1 (Vint (Int.repr 31)) = Vint nh /\ v1 = Vint nl /\ v2 = Vint d
+  /\ Int.divmods2 nh nl d = Some(q, r)
+  /\ Val.divs v1 v2 = Some (Vint q) /\ Val.mods v1 v2 = Some (Vint r).
 Proof.
-  intros. unfold Val.divu, Val.modu. destruct v1; auto. destruct v2; auto.
-  destruct (Int.eq i0 Int.zero); auto.
+  intros v1 v2; unfold Val.divs, Val.mods.
+  destruct v1; try (intuition discriminate).
+  destruct v2; try (intuition discriminate).
+  destruct (Int.eq i0 Int.zero
+            || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone) eqn:OK;
+  try (intuition discriminate).
+  intros _.
+  InvBooleans. 
+  exists (Int.shr i (Int.repr 31)), i, i0, (Int.divs i i0), (Int.mods i i0); intuition auto.
+  rewrite Int.shr_lt_zero. apply Int.divmods2_divs_mods.
+  red; intros; subst i0; rewrite Int.eq_true in H; discriminate.
+  revert H0. predSpec Int.eq Int.eq_spec i (Int.repr Int.min_signed); auto.
+  predSpec Int.eq Int.eq_spec i0 Int.mone; auto.
+  discriminate.
 Qed.
 
 (** Smart constructor for [shrx] *)
@@ -1031,32 +1047,52 @@ Transparent destroyed_by_op.
   apply SAME. TranslOp. destruct H1. Simplifs.
 (* div *)
   apply SAME.
-  specialize (divs_mods_exist (rs EAX) (rs ECX)). rewrite H0.
-  destruct (Val.mods (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
-  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
-  auto. auto.
-  simpl in H3. destruct H3; Simplifs.
+  exploit (divs_mods_exists (rs EAX) (rs ECX)). left; congruence.
+  intros (nh & nl & d & q & r & A & B & C & D & E & F).
+  set (rs1 := nextinstr_nf (rs#EDX <- (Vint nh))).
+  econstructor; split.
+  eapply exec_straight_two with (rs2 := rs1). simpl. rewrite A. reflexivity.
+  simpl. change (rs1 EAX) with (rs EAX); rewrite B.
+  change (rs1 ECX) with (rs ECX); rewrite C.
+  rewrite D. reflexivity. auto. auto.
+  split. change (Vint q = v). congruence.
+  simpl; intros. destruct H2. unfold rs1; Simplifs.
 (* divu *)
   apply SAME.
-  specialize (divu_modu_exist (rs EAX) (rs ECX)). rewrite H0.
-  destruct (Val.modu (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
-  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
-  auto. auto.
-  simpl in H3. destruct H3; Simplifs.
+  exploit (divu_modu_exists (rs EAX) (rs ECX)). left; congruence.
+  intros (n & d & q & r & B & C & D & E & F).
+  set (rs1 := nextinstr_nf (rs#EDX <- Vzero)).
+  econstructor; split.
+  eapply exec_straight_two with (rs2 := rs1). reflexivity.
+  simpl. change (rs1 EAX) with (rs EAX); rewrite B.
+  change (rs1 ECX) with (rs ECX); rewrite C.
+  rewrite D. reflexivity. auto. auto.
+  split. change (Vint q = v). congruence.
+  simpl; intros. destruct H2. unfold rs1; Simplifs.
 (* mod *)
   apply SAME.
-  specialize (divs_mods_exist (rs EAX) (rs ECX)). rewrite H0.
-  destruct (Val.divs (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
-  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
-  auto. auto.
-  simpl in H3. destruct H3; Simplifs.
+  exploit (divs_mods_exists (rs EAX) (rs ECX)). right; congruence.
+  intros (nh & nl & d & q & r & A & B & C & D & E & F).
+  set (rs1 := nextinstr_nf (rs#EDX <- (Vint nh))).
+  econstructor; split.
+  eapply exec_straight_two with (rs2 := rs1). simpl. rewrite A. reflexivity.
+  simpl. change (rs1 EAX) with (rs EAX); rewrite B.
+  change (rs1 ECX) with (rs ECX); rewrite C.
+  rewrite D. reflexivity. auto. auto.
+  split. change (Vint r = v). congruence.
+  simpl; intros. destruct H2. unfold rs1; Simplifs.
 (* modu *)
   apply SAME.
-  specialize (divu_modu_exist (rs EAX) (rs ECX)). rewrite H0.
-  destruct (Val.divu (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
-  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
-  auto. auto.
-  simpl in H3. destruct H3; Simplifs.
+  exploit (divu_modu_exists (rs EAX) (rs ECX)). right; congruence.
+  intros (n & d & q & r & B & C & D & E & F).
+  set (rs1 := nextinstr_nf (rs#EDX <- Vzero)).
+  econstructor; split.
+  eapply exec_straight_two with (rs2 := rs1). reflexivity.
+  simpl. change (rs1 EAX) with (rs EAX); rewrite B.
+  change (rs1 ECX) with (rs ECX); rewrite C.
+  rewrite D. reflexivity. auto. auto.
+  split. change (Vint r = v). congruence.
+  simpl; intros. destruct H2. unfold rs1; Simplifs.
 (* shrximm *)
   apply SAME. eapply mk_shrximm_correct; eauto.
 (* lea *)
diff --git a/ia32/TargetPrinter.ml b/ia32/TargetPrinter.ml
index 979ac800..4ffb701b 100644
--- a/ia32/TargetPrinter.ml
+++ b/ia32/TargetPrinter.ml
@@ -421,11 +421,11 @@ module Target(System: SYSTEM):TARGET =
           fprintf oc "	imull	%a\n" ireg r1
       | Pmul_r(r1) ->
           fprintf oc "	mull	%a\n" ireg r1
+      | Pcltd ->
+          fprintf oc "	cltd\n"
       | Pdiv(r1) ->
-          fprintf oc "	xorl	%%edx, %%edx\n";
           fprintf oc "	divl	%a\n" ireg r1
       | Pidiv(r1) ->
-          fprintf oc "	cltd\n";
           fprintf oc "	idivl	%a\n" ireg r1
       | Pand_rr(rd, r1) ->
           fprintf oc "	andl	%a, %a\n" ireg r1 ireg rd
diff --git a/lib/Integers.v b/lib/Integers.v
index 316dfb52..16c95e01 100644
--- a/lib/Integers.v
+++ b/lib/Integers.v
@@ -403,6 +403,18 @@ Definition is_false (x: int) : Prop := x = zero.
 Definition is_true  (x: int) : Prop := x <> zero.
 Definition notbool  (x: int) : int  := if eq x zero then one else zero.
 
+(** x86-style extended division and modulus *)
+
+Definition divmodu2 (nhi nlo: int) (d: int) : option (int * int) :=
+  if eq_dec d zero then None else
+   (let (q, r) := Z.div_eucl (unsigned nhi * modulus + unsigned nlo) (unsigned d) in
+    if zle q max_unsigned then Some(repr q, repr r) else None).
+
+Definition divmods2 (nhi nlo: int) (d: int) : option (int * int) :=
+  if eq_dec d zero then None else
+   (let (q, r) := Z.quotrem (signed nhi * modulus + unsigned nlo) (signed d) in
+    if zle min_signed q && zle q max_signed then Some(repr q, repr r) else None).
+
 (** * Properties of integers and integer arithmetic *)
 
 (** ** Properties of [modulus], [max_unsigned], etc. *)
@@ -1193,6 +1205,86 @@ Proof.
   rewrite <- neg_mul_distr_l. rewrite mul_mone. rewrite neg_involutive. apply sub_idem.
 Qed.
 
+Theorem divmodu2_divu_modu:
+  forall n d,
+  d <> zero -> divmodu2 zero n d = Some (divu n d, modu n d).
+Proof.
+  unfold divmodu2, divu, modu; intros.
+  rewrite dec_eq_false by auto.
+  set (N := unsigned zero * modulus + unsigned n).
+  assert (E1: unsigned n = N) by (unfold N; rewrite unsigned_zero; ring). rewrite ! E1.
+  set (D := unsigned d).
+  set (Q := N / D); set (R := N mod D).
+  assert (E2: Z.div_eucl N D = (Q, R)).
+  { unfold Q, R, Z.div, Z.modulo. destruct (Z.div_eucl N D); auto. }
+  rewrite E2. rewrite zle_true. auto.
+  assert (unsigned d <> 0).
+  { red; intros. elim H. rewrite <- (repr_unsigned d). rewrite H0; auto. }   
+  assert (0 < D).
+  { unfold D. generalize (unsigned_range d); intros. omega. } 
+  assert (0 <= Q <= max_unsigned).
+  { unfold Q. apply Zdiv_interval_2.
+    rewrite <- E1; apply unsigned_range_2.
+    omega. unfold max_unsigned; generalize modulus_pos; omega. omega. }
+  omega.
+Qed.
+
+Lemma unsigned_signed:
+  forall n, unsigned n = if lt n zero then signed n + modulus else signed n.
+Proof.
+  intros. unfold lt. rewrite signed_zero. unfold signed.
+  generalize (unsigned_range n). rewrite half_modulus_modulus. intros.
+  destruct (zlt (unsigned n) half_modulus).
+- rewrite zlt_false by omega. auto.
+- rewrite zlt_true by omega. ring.
+Qed.
+
+Theorem divmods2_divs_mods:
+  forall n d,
+  d <> zero -> n <> repr min_signed \/ d <> mone ->
+  divmods2 (if lt n zero then mone else zero) n d = Some (divs n d, mods n d).
+Proof.
+  unfold divmods2, divs, mods; intros.
+  rewrite dec_eq_false by auto.
+  set (N := signed (if lt n zero then mone else zero) * modulus + unsigned n).
+  set (D := signed d).
+  assert (D <> 0).
+  { unfold D; red; intros. elim H. rewrite <- (repr_signed d). rewrite H1; auto. }
+  assert (N = signed n).
+  { unfold N. rewrite unsigned_signed. destruct (lt n zero).
+    rewrite signed_mone. ring.
+    rewrite signed_zero. ring. }
+  set (Q := Z.quot N D); set (R := Z.rem N D).
+  assert (E2: Z.quotrem N D = (Q, R)).
+  { unfold Q, R, Z.quot, Z.rem. destruct (Z.quotrem N D); auto. }
+  rewrite E2. 
+  assert (min_signed <= N <= max_signed) by (rewrite H2; apply signed_range).
+  assert (min_signed <= Q <= max_signed).
+  { unfold Q. destruct (zeq D 1); [ | destruct (zeq D (-1))].
+  - (* D = 1 *)
+    rewrite e. rewrite Z.quot_1_r; auto.
+  - (* D = -1 *)
+    rewrite e. change (-1) with (Z.opp 1). rewrite Z.quot_opp_r by omega.
+    rewrite Z.quot_1_r.
+    assert (N <> min_signed).
+    { red; intros; destruct H0.
+    + elim H0. rewrite <- (repr_signed n). rewrite <- H2. rewrite H4. auto.
+    + elim H0. rewrite <- (repr_signed d). unfold D in e; rewrite e; auto. }
+    unfold min_signed, max_signed in *. omega.
+  - (* |D| > 1 *)
+    assert (Z.abs (Z.quot N D) < half_modulus).
+    { rewrite <- Z.quot_abs by omega. apply Zquot_lt_upper_bound.
+      xomega. xomega.
+      apply Zle_lt_trans with (half_modulus * 1).
+      rewrite Z.mul_1_r. unfold min_signed, max_signed in H3; xomega.
+      apply Zmult_lt_compat_l. generalize half_modulus_pos; omega. xomega. }
+    rewrite Z.abs_lt in H4.
+    unfold min_signed, max_signed; omega. 
+  }
+  unfold proj_sumbool; rewrite ! zle_true by omega; simpl.
+  unfold Q, R; rewrite H2; auto.
+Qed.
+
 (** ** Bit-level properties *)
 
 (** ** Properties of bit-level operations over [Z] *)
author	Xavier Leroy <xavier.leroy@inria.fr>	2016-09-18 10:29:11 +0200
committer	Xavier Leroy <xavier.leroy@inria.fr>	2016-09-18 10:29:11 +0200
commit	99554c986d023d00192eb3d1fbfe1c0cc138596e (patch)
tree	af3509d66ed70784e3b1bd0126bbddb17fd59fb3
parent	322f3c865341fdfd5d22ab885b2934a5213ddbaa (diff)
download	compcert-kvx-99554c986d023d00192eb3d1fbfe1c0cc138596e.tar.gz compcert-kvx-99554c986d023d00192eb3d1fbfe1c0cc138596e.zip