From 36ae0cba3a8cdec08b0f1f426b3ab27f8819afbe Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Fri, 7 Jun 2019 09:06:29 +0200 Subject: x86 branchless implementation of float -> unsigned int32 conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapted from RĂ©mi Hutin's pull request #295. On average it is slightly slower than the old, branch-full implementation, hence flag -ffavor-branchless selects between the two implementations. --- x86/SelectOp.vp | 12 +++++++++--- x86/SelectOpproof.v | 49 ++++++++++++++++++++++--------------------------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/x86/SelectOp.vp b/x86/SelectOp.vp index df9c5394..701e8e0d 100644 --- a/x86/SelectOp.vp +++ b/x86/SelectOp.vp @@ -500,11 +500,17 @@ Nondetfunction floatofint (e: expr) := Parameter favor_branchless: unit -> bool. Definition intuoffloat (e: expr) := + let e1 := intoffloat (Eletvar 1%nat) in + let e2 := addimm Float.ox8000_0000 + (intoffloat (subf (Eletvar 1%nat) (Eletvar 0%nat))) in Elet e (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil) - (Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil)) - (intoffloat (Eletvar 1)) - (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat. + (if favor_branchless tt then + Eop (Osel (Ccompf Clt) Tint) + (e1 ::: e2 ::: Eletvar 1 ::: Eletvar 0 ::: Enil) + else + Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil)) + e1 e2))%nat. Nondetfunction floatofintu (e: expr) := match e with diff --git a/x86/SelectOpproof.v b/x86/SelectOpproof.v index f7f043eb..beb95769 100644 --- a/x86/SelectOpproof.v +++ b/x86/SelectOpproof.v @@ -835,33 +835,28 @@ Proof. intros. destruct x; simpl in H0; try discriminate. destruct (Float.to_intu f) as [n|] eqn:?; simpl in H0; inv H0. exists (Vint n); split; auto. unfold intuoffloat. - set (im := Int.repr Int.half_modulus). - set (fm := Float.of_intu im). - assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f)). - constructor. auto. - assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar O) (Vfloat fm)). - constructor. auto. - econstructor. eauto. - econstructor. instantiate (1 := Vfloat fm). EvalOp. - eapply eval_Econdition with (va := Float.cmp Clt f fm). - eauto with evalexpr. - destruct (Float.cmp Clt f fm) eqn:?. - exploit Float.to_intu_to_int_1; eauto. intro EQ. - EvalOp. simpl. rewrite EQ; auto. - exploit Float.to_intu_to_int_2; eauto. - change Float.ox8000_0000 with im. fold fm. intro EQ. - set (t2 := subf (Eletvar (S O)) (Eletvar O)). - set (t3 := intoffloat t2). - exploit (eval_subf (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f) (Eletvar O)); eauto. - fold t2. intros [v2 [A2 B2]]. simpl in B2. inv B2. - exploit (eval_addimm Float.ox8000_0000 (Vfloat fm :: Vfloat f :: le) t3). - unfold t3. unfold intoffloat. EvalOp. simpl. rewrite EQ. simpl. eauto. - intros [v4 [A4 B4]]. simpl in B4. inv B4. - rewrite Int.sub_add_opp in A4. rewrite Int.add_assoc in A4. - rewrite (Int.add_commut (Int.neg im)) in A4. - rewrite Int.add_neg_zero in A4. - rewrite Int.add_zero in A4. - auto. + set (fm := Float.of_intu Float.ox8000_0000). + set (le' := Vfloat fm :: Vfloat f :: le). + set (a1 := intoffloat (Eletvar 1)). + set (a2 := addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0)))). + assert (A1: exists v1, eval_expr ge sp e m le' a1 v1 /\ + (Float.cmp Clt f fm = true -> v1 = Vint n)). + { econstructor; split. repeat econstructor; eauto. + intros. simpl. erewrite Float.to_intu_to_int_1 by eauto. reflexivity. } + assert (A2: exists v2, eval_expr ge sp e m le' a2 v2 /\ + (Float.cmp Clt f fm = false -> v2 = Vint n)). + { econstructor; split. repeat econstructor; eauto. + intros. simpl. erewrite Float.to_intu_to_int_2 by eauto. simpl. + rewrite Int.repr_signed, Int.sub_add_opp, Int.add_assoc. + rewrite (Int.add_commut (Int.neg Float.ox8000_0000)), Int.add_neg_zero, Int.add_zero. + auto. } + destruct A1 as (v1 & EV1 & EQ1), A2 as (v2 & EV2 & EQ2). + repeat (econstructor; eauto). fold le'. + destruct (favor_branchless tt). +- repeat (econstructor; eauto). simpl. + destruct (Float.cmp Clt f fm); [rewrite EQ1 by auto|rewrite EQ2 by auto]; reflexivity. +- repeat (econstructor; eauto). + destruct (Float.cmp Clt f fm); [rewrite <- EQ1 by auto|rewrite <- EQ2 by auto]; assumption. Qed. Theorem eval_floatofintu: -- cgit