diff options
author | Xavier Leroy <xavier.leroy@college-de-france.fr> | 2019-05-17 16:17:51 +0200 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@college-de-france.fr> | 2019-06-07 09:08:40 +0200 |
commit | b3bfdc3655479f7f8f1c5e3a7571473a72b421cb (patch) | |
tree | 8fdc27ef9b7c186ee3af9c04327a28b7e8424fd1 | |
parent | 8e3a73448c5ddfa4be3871d7f4fd80281a7549f4 (diff) | |
download | compcert-b3bfdc3655479f7f8f1c5e3a7571473a72b421cb.tar.gz compcert-b3bfdc3655479f7f8f1c5e3a7571473a72b421cb.zip |
x86 branchless implementation of unsigned int32 -> float conversion
Based on a suggestion by RĂ©mi Hutin.
The new implementation produces smaller code (3 FP instructions and 2
integer "and") and has no conditional branch.
On average, it is a little slower than the old implementation.
Hence both implementations are supported:
- with -ffavor-branchless, use the new implementation
- by default, use the old, branching implementation
-rw-r--r-- | lib/Floats.v | 41 | ||||
-rw-r--r-- | x86/SelectOp.vp | 17 | ||||
-rw-r--r-- | x86/SelectOpproof.v | 18 | ||||
-rw-r--r-- | x86/extractionMachdep.v | 2 |
4 files changed, 70 insertions, 8 deletions
diff --git a/lib/Floats.v b/lib/Floats.v index 9540303b..f8294d04 100644 --- a/lib/Floats.v +++ b/lib/Floats.v @@ -413,6 +413,7 @@ Qed. to emulate the former.) *) Definition ox8000_0000 := Int.repr Int.half_modulus. (**r [0x8000_0000] *) +Definition ox7FFF_FFFF := Int.repr Int.max_signed. (**r [0x7FFF_FFFF] *) Theorem of_intu_of_int_1: forall x, @@ -443,6 +444,46 @@ Proof. compute_this (Int.unsigned ox8000_0000); smart_omega. Qed. +Theorem of_intu_of_int_3: + forall x, + of_intu x = sub (of_int (Int.and x ox7FFF_FFFF)) (of_int (Int.and x ox8000_0000)). +Proof. + intros. + set (hi := Int.and x ox8000_0000). + set (lo := Int.and x ox7FFF_FFFF). + assert (R: forall n, integer_representable 53 1024 (Int.signed n)). + { intros. pose proof (Int.signed_range n). + apply integer_representable_n; auto; smart_omega. } + unfold sub, of_int. rewrite BofZ_minus by auto. unfold of_intu. f_equal. + assert (E: Int.add hi lo = x). + { unfold hi, lo. rewrite Int.add_is_or. + - rewrite <- Int.and_or_distrib. apply Int.and_mone. + - rewrite Int.and_assoc. rewrite (Int.and_commut ox8000_0000). rewrite Int.and_assoc. + change (Int.and ox7FFF_FFFF ox8000_0000) with Int.zero. rewrite ! Int.and_zero; auto. + } + assert (RNG: 0 <= Int.unsigned lo < two_p 31). + { unfold lo. change ox7FFF_FFFF with (Int.repr (two_p 31 - 1)). rewrite <- Int.zero_ext_and by omega. + apply Int.zero_ext_range. compute_this Int.zwordsize. omega. } + assert (B: forall i, 0 <= i < Int.zwordsize -> Int.testbit ox8000_0000 i = if zeq i 31 then true else false). + { intros; unfold Int.testbit. change (Int.unsigned ox8000_0000) with (2^31). + destruct (zeq i 31). subst i; auto. apply Z.pow2_bits_false; auto. } + assert (EITHER: hi = Int.zero \/ hi = ox8000_0000). + { unfold hi; destruct (Int.testbit x 31) eqn:B31; [right|left]; + Int.bit_solve; rewrite B by auto. + - destruct (zeq i 31). subst i; rewrite B31; auto. apply andb_false_r. + - destruct (zeq i 31). subst i; rewrite B31; auto. apply andb_false_r. + } + assert (SU: - Int.signed hi = Int.unsigned hi). + { destruct EITHER as [EQ|EQ]; rewrite EQ; reflexivity. } + unfold Z.sub; rewrite SU, <- E. + unfold Int.add; rewrite Int.unsigned_repr, Int.signed_eq_unsigned. omega. + - assert (Int.max_signed = two_p 31 - 1) by reflexivity. omega. + - assert (Int.unsigned hi = 0 \/ Int.unsigned hi = two_p 31) + by (destruct EITHER as [EQ|EQ]; rewrite EQ; [left|right]; reflexivity). + assert (Int.max_unsigned = two_p 31 + two_p 31 - 1) by reflexivity. + omega. +Qed. + Theorem to_intu_to_int_1: forall x n, cmp Clt x (of_intu ox8000_0000) = true -> diff --git a/x86/SelectOp.vp b/x86/SelectOp.vp index cf0f37ec..df9c5394 100644 --- a/x86/SelectOp.vp +++ b/x86/SelectOp.vp @@ -497,6 +497,8 @@ Nondetfunction floatofint (e: expr) := | _ => Eop Ofloatofint (e ::: Enil) end. +Parameter favor_branchless: unit -> bool. + Definition intuoffloat (e: expr) := Elet e (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil) @@ -508,11 +510,16 @@ Nondetfunction floatofintu (e: expr) := match e with | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil | _ => - let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in - Elet e - (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil)) - (floatofint (Eletvar O)) - (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f)) + if favor_branchless tt then + Elet e (subf + (Eop Ofloatofint (andimm Float.ox7FFF_FFFF (Eletvar O) ::: Enil)) + (Eop Ofloatofint (andimm Float.ox8000_0000 (Eletvar O) ::: Enil))) + else + let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in + Elet e + (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil)) + (floatofint (Eletvar O)) + (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f)) end. Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil). diff --git a/x86/SelectOpproof.v b/x86/SelectOpproof.v index b412ccf7..908d6e4a 100644 --- a/x86/SelectOpproof.v +++ b/x86/SelectOpproof.v @@ -870,10 +870,22 @@ Theorem eval_floatofintu: exists v, eval_expr ge sp e m le (floatofintu a) v /\ Val.lessdef y v. Proof. intros until y; unfold floatofintu. case (floatofintu_match a); intros. - InvEval. TrivialExists. - destruct x; simpl in H0; try discriminate. inv H0. +- InvEval. TrivialExists. +- destruct x; simpl in H0; try discriminate. inv H0. exists (Vfloat (Float.of_intu i)); split; auto. - econstructor. eauto. + destruct (favor_branchless tt). ++ econstructor. eauto. + assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)) by (constructor; auto). + exploit (eval_andimm Float.ox7FFF_FFFF (Vint i :: le) (Eletvar 0)); eauto. + simpl. intros [v1 [A1 B1]]. inv B1. + exploit (eval_andimm Float.ox8000_0000 (Vint i :: le) (Eletvar 0)); eauto. + simpl. intros [v2 [A2 B2]]. inv B2. + unfold subf. econstructor. + constructor. econstructor. constructor. eexact A1. constructor. simpl; eauto. + constructor. econstructor. constructor. eexact A2. constructor. simpl; eauto. + constructor. + simpl. rewrite Float.of_intu_of_int_3. auto. ++ econstructor. eauto. set (fm := Float.of_intu Float.ox8000_0000). assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)). constructor. auto. diff --git a/x86/extractionMachdep.v b/x86/extractionMachdep.v index a29553e8..1509878f 100644 --- a/x86/extractionMachdep.v +++ b/x86/extractionMachdep.v @@ -21,6 +21,8 @@ Require SelectOp ConstpropOp. Extract Constant SelectOp.symbol_is_external => "fun id -> Configuration.system = ""macosx"" && C2C.atom_is_extern id". +Extract Constant SelectOp.favor_branchless => + "fun () -> !Clflags.option_ffavor_branchless". (* ConstpropOp *) |