x86 branchless implementation of unsigned int32 -> float conversion

Based on a suggestion by Rémi Hutin. The new implementation produces smaller code (3 FP instructions and 2 integer "and") and has no conditional branch. On average, it is a little slower than the old implementation. Hence both implementations are supported: - with -ffavor-branchless, use the new implementation - by default, use the old, branching implementation
author: Xavier Leroy <xavier.leroy@college-de-france.fr> 2019-05-17 16:17:51 +0200
committer: Xavier Leroy <xavier.leroy@college-de-france.fr> 2019-06-07 09:08:40 +0200
commit: b3bfdc3655479f7f8f1c5e3a7571473a72b421cb (patch)
tree: 8fdc27ef9b7c186ee3af9c04327a28b7e8424fd1
parent: 8e3a73448c5ddfa4be3871d7f4fd80281a7549f4 (diff)
download: compcert-b3bfdc3655479f7f8f1c5e3a7571473a72b421cb.tar.gz
compcert-b3bfdc3655479f7f8f1c5e3a7571473a72b421cb.zip
4 files changed, 70 insertions, 8 deletions
diff --git a/lib/Floats.v b/lib/Floats.v
index 9540303b..f8294d04 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -413,6 +413,7 @@ Qed.
   to emulate the former.)   *)
 
 Definition ox8000_0000 := Int.repr Int.half_modulus.  (**r [0x8000_0000] *)
+Definition ox7FFF_FFFF := Int.repr Int.max_signed.    (**r [0x7FFF_FFFF] *)
 
 Theorem of_intu_of_int_1:
   forall x,
@@ -443,6 +444,46 @@ Proof.
   compute_this (Int.unsigned ox8000_0000); smart_omega.
 Qed.
 
+Theorem of_intu_of_int_3:
+  forall x,
+  of_intu x = sub (of_int (Int.and x ox7FFF_FFFF)) (of_int (Int.and x ox8000_0000)).
+Proof.
+  intros.
+  set (hi := Int.and x ox8000_0000).
+  set (lo := Int.and x ox7FFF_FFFF).
+  assert (R: forall n, integer_representable 53 1024 (Int.signed n)).
+  { intros. pose proof (Int.signed_range n).
+    apply integer_representable_n; auto; smart_omega. }
+  unfold sub, of_int. rewrite BofZ_minus by auto. unfold of_intu. f_equal.
+  assert (E: Int.add hi lo = x).
+  { unfold hi, lo. rewrite Int.add_is_or. 
+  - rewrite <- Int.and_or_distrib. apply Int.and_mone.
+  - rewrite Int.and_assoc. rewrite (Int.and_commut ox8000_0000). rewrite Int.and_assoc.
+    change (Int.and ox7FFF_FFFF ox8000_0000) with Int.zero. rewrite ! Int.and_zero; auto.
+  }
+  assert (RNG: 0 <= Int.unsigned lo < two_p 31).
+  { unfold lo. change ox7FFF_FFFF with (Int.repr (two_p 31 - 1)). rewrite <- Int.zero_ext_and by omega.
+    apply Int.zero_ext_range. compute_this Int.zwordsize. omega. }
+  assert (B: forall i, 0 <= i < Int.zwordsize -> Int.testbit ox8000_0000 i = if zeq i 31 then true else false).
+  { intros; unfold Int.testbit. change (Int.unsigned ox8000_0000) with (2^31).
+    destruct (zeq i 31). subst i; auto. apply Z.pow2_bits_false; auto. } 
+  assert (EITHER: hi = Int.zero \/ hi = ox8000_0000).
+  { unfold hi; destruct (Int.testbit x 31) eqn:B31; [right|left];
+    Int.bit_solve; rewrite B by auto.
+  - destruct (zeq i 31). subst i; rewrite B31; auto. apply andb_false_r.
+  - destruct (zeq i 31). subst i; rewrite B31; auto. apply andb_false_r.
+  }
+  assert (SU: - Int.signed hi = Int.unsigned hi).
+  { destruct EITHER as [EQ|EQ]; rewrite EQ; reflexivity. }
+  unfold Z.sub; rewrite SU, <- E. 
+  unfold Int.add; rewrite Int.unsigned_repr, Int.signed_eq_unsigned. omega.
+  - assert (Int.max_signed = two_p 31 - 1) by reflexivity. omega.
+  - assert (Int.unsigned hi = 0 \/ Int.unsigned hi = two_p 31)
+    by (destruct EITHER as [EQ|EQ]; rewrite EQ; [left|right]; reflexivity).
+    assert (Int.max_unsigned = two_p 31 + two_p 31 - 1) by reflexivity.
+    omega.
+Qed.
+
 Theorem to_intu_to_int_1:
   forall x n,
   cmp Clt x (of_intu ox8000_0000) = true ->
diff --git a/x86/SelectOp.vp b/x86/SelectOp.vp
index cf0f37ec..df9c5394 100644
--- a/x86/SelectOp.vp
+++ b/x86/SelectOp.vp
@@ -497,6 +497,8 @@ Nondetfunction floatofint (e: expr) :=
   | _ => Eop Ofloatofint (e ::: Enil)
   end.
 
+Parameter favor_branchless: unit -> bool.
+
 Definition intuoffloat (e: expr) :=
   Elet e
     (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
@@ -508,11 +510,16 @@ Nondetfunction floatofintu (e: expr) :=
   match e with
   | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
   | _ =>
-    let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
-    Elet e
-      (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
-        (floatofint (Eletvar O))
-        (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
+    if favor_branchless tt then
+      Elet e (subf 
+        (Eop Ofloatofint (andimm Float.ox7FFF_FFFF (Eletvar O) ::: Enil))
+        (Eop Ofloatofint (andimm Float.ox8000_0000 (Eletvar O) ::: Enil)))
+    else
+      let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
+      Elet e
+        (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
+          (floatofint (Eletvar O))
+          (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
   end.
 
 Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
diff --git a/x86/SelectOpproof.v b/x86/SelectOpproof.v
index b412ccf7..908d6e4a 100644
--- a/x86/SelectOpproof.v
+++ b/x86/SelectOpproof.v
@@ -870,10 +870,22 @@ Theorem eval_floatofintu:
   exists v, eval_expr ge sp e m le (floatofintu a) v /\ Val.lessdef y v.
 Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros.
-  InvEval. TrivialExists.
-  destruct x; simpl in H0; try discriminate. inv H0.
+- InvEval. TrivialExists.
+- destruct x; simpl in H0; try discriminate. inv H0.
   exists (Vfloat (Float.of_intu i)); split; auto.
-  econstructor. eauto.
+  destruct (favor_branchless tt).
++ econstructor. eauto.
+  assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)) by (constructor; auto).
+  exploit (eval_andimm Float.ox7FFF_FFFF (Vint i :: le) (Eletvar 0)); eauto.
+  simpl. intros [v1 [A1 B1]]. inv B1.
+  exploit (eval_andimm Float.ox8000_0000 (Vint i :: le) (Eletvar 0)); eauto.
+  simpl. intros [v2 [A2 B2]]. inv B2.
+  unfold subf. econstructor.
+  constructor. econstructor. constructor. eexact A1. constructor. simpl; eauto.
+  constructor. econstructor. constructor. eexact A2. constructor. simpl; eauto.
+  constructor.
+  simpl. rewrite Float.of_intu_of_int_3. auto.
++ econstructor. eauto.
   set (fm := Float.of_intu Float.ox8000_0000).
   assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)).
     constructor. auto.
diff --git a/x86/extractionMachdep.v b/x86/extractionMachdep.v
index a29553e8..1509878f 100644
--- a/x86/extractionMachdep.v
+++ b/x86/extractionMachdep.v
@@ -21,6 +21,8 @@ Require SelectOp ConstpropOp.
 
 Extract Constant SelectOp.symbol_is_external =>
   "fun id -> Configuration.system = ""macosx"" && C2C.atom_is_extern id".
+Extract Constant SelectOp.favor_branchless =>
+  "fun () -> !Clflags.option_ffavor_branchless".
 
 (* ConstpropOp *)
author	Xavier Leroy <xavier.leroy@college-de-france.fr>	2019-05-17 16:17:51 +0200
committer	Xavier Leroy <xavier.leroy@college-de-france.fr>	2019-06-07 09:08:40 +0200
commit	b3bfdc3655479f7f8f1c5e3a7571473a72b421cb (patch)
tree	8fdc27ef9b7c186ee3af9c04327a28b7e8424fd1
parent	8e3a73448c5ddfa4be3871d7f4fd80281a7549f4 (diff)
download	compcert-b3bfdc3655479f7f8f1c5e3a7571473a72b421cb.tar.gz compcert-b3bfdc3655479f7f8f1c5e3a7571473a72b421cb.zip