x86 branchless implementation of float -> unsigned int32 conversionfloatofintu

Adapted from Rémi Hutin's pull request #295. On average it is slightly slower than the old, branch-full implementation, hence flag -ffavor-branchless selects between the two implementations.
author: Xavier Leroy <xavier.leroy@college-de-france.fr> 2019-06-07 09:06:29 +0200
committer: Xavier Leroy <xavier.leroy@college-de-france.fr> 2019-06-07 09:08:40 +0200
commit: 36ae0cba3a8cdec08b0f1f426b3ab27f8819afbe (patch)
tree: 6660cbf48eefbb30f37138f5cebaccfca5be41e4
parent: f92ca339e9d1851fa4d469cdc867bfe1719c42a1 (diff)
download: compcert-floatofintu.tar.gz
compcert-floatofintu.zip
2 files changed, 31 insertions, 30 deletions
diff --git a/x86/SelectOp.vp b/x86/SelectOp.vp
index df9c5394..701e8e0d 100644
--- a/x86/SelectOp.vp
+++ b/x86/SelectOp.vp
@@ -500,11 +500,17 @@ Nondetfunction floatofint (e: expr) :=
 Parameter favor_branchless: unit -> bool.
 
 Definition intuoffloat (e: expr) :=
+  let e1 := intoffloat (Eletvar 1%nat) in
+  let e2 := addimm Float.ox8000_0000
+                   (intoffloat (subf (Eletvar 1%nat) (Eletvar 0%nat))) in
   Elet e
     (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
-      (Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil))
-        (intoffloat (Eletvar 1))
-        (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat.
+     (if favor_branchless tt then
+        Eop (Osel (Ccompf Clt) Tint)
+            (e1 ::: e2 ::: Eletvar 1 ::: Eletvar 0 ::: Enil)
+      else
+        Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil))
+                   e1 e2))%nat.
 
 Nondetfunction floatofintu (e: expr) :=
   match e with
diff --git a/x86/SelectOpproof.v b/x86/SelectOpproof.v
index f7f043eb..beb95769 100644
--- a/x86/SelectOpproof.v
+++ b/x86/SelectOpproof.v
@@ -835,33 +835,28 @@ Proof.
   intros. destruct x; simpl in H0; try discriminate.
   destruct (Float.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
   exists (Vint n); split; auto. unfold intuoffloat.
-  set (im := Int.repr Int.half_modulus).
-  set (fm := Float.of_intu im).
-  assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f)).
-    constructor. auto.
-  assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar O) (Vfloat fm)).
-    constructor. auto.
-  econstructor. eauto.
-  econstructor. instantiate (1 := Vfloat fm). EvalOp.
-  eapply eval_Econdition with (va := Float.cmp Clt f fm).
-  eauto with evalexpr.
-  destruct (Float.cmp Clt f fm) eqn:?.
-  exploit Float.to_intu_to_int_1; eauto. intro EQ.
-  EvalOp. simpl. rewrite EQ; auto.
-  exploit Float.to_intu_to_int_2; eauto.
-  change Float.ox8000_0000 with im. fold fm. intro EQ.
-  set (t2 := subf (Eletvar (S O)) (Eletvar O)).
-  set (t3 := intoffloat t2).
-  exploit (eval_subf (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f) (Eletvar O)); eauto.
-  fold t2. intros [v2 [A2 B2]]. simpl in B2. inv B2.
-  exploit (eval_addimm Float.ox8000_0000 (Vfloat fm :: Vfloat f :: le) t3).
-    unfold t3. unfold intoffloat. EvalOp. simpl. rewrite EQ. simpl. eauto.
-  intros [v4 [A4 B4]]. simpl in B4. inv B4.
-  rewrite Int.sub_add_opp in A4. rewrite Int.add_assoc in A4.
-  rewrite (Int.add_commut (Int.neg im)) in A4.
-  rewrite Int.add_neg_zero in A4.
-  rewrite Int.add_zero in A4.
-  auto.
+  set (fm := Float.of_intu Float.ox8000_0000).
+  set (le' := Vfloat fm :: Vfloat f :: le).
+  set (a1 := intoffloat (Eletvar 1)). 
+  set (a2 := addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0)))).
+  assert (A1: exists v1, eval_expr ge sp e m le' a1 v1 /\
+                  (Float.cmp Clt f fm = true -> v1 = Vint n)).
+  { econstructor; split. repeat econstructor; eauto.
+    intros. simpl. erewrite Float.to_intu_to_int_1 by eauto. reflexivity. }
+  assert (A2: exists v2, eval_expr ge sp e m le' a2 v2 /\
+                  (Float.cmp Clt f fm = false -> v2 = Vint n)).
+  { econstructor; split. repeat econstructor; eauto.
+    intros. simpl. erewrite Float.to_intu_to_int_2 by eauto. simpl.
+    rewrite Int.repr_signed, Int.sub_add_opp, Int.add_assoc.
+    rewrite (Int.add_commut (Int.neg Float.ox8000_0000)), Int.add_neg_zero, Int.add_zero.
+    auto. }
+  destruct A1 as (v1 & EV1 & EQ1), A2 as (v2 & EV2 & EQ2).
+  repeat (econstructor; eauto). fold le'.
+  destruct (favor_branchless tt).
+- repeat (econstructor; eauto). simpl.
+  destruct (Float.cmp Clt f fm); [rewrite EQ1 by auto|rewrite EQ2 by auto]; reflexivity.
+- repeat (econstructor; eauto). 
+  destruct (Float.cmp Clt f fm); [rewrite <- EQ1 by auto|rewrite <- EQ2 by auto]; assumption.
 Qed.
 
 Theorem eval_floatofintu:
author	Xavier Leroy <xavier.leroy@college-de-france.fr>	2019-06-07 09:06:29 +0200
committer	Xavier Leroy <xavier.leroy@college-de-france.fr>	2019-06-07 09:08:40 +0200
commit	36ae0cba3a8cdec08b0f1f426b3ab27f8819afbe (patch)
tree	6660cbf48eefbb30f37138f5cebaccfca5be41e4
parent	f92ca339e9d1851fa4d469cdc867bfe1719c42a1 (diff)
download	compcert-floatofintu.tar.gz compcert-floatofintu.zip