aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@college-de-france.fr>2019-05-17 16:17:51 +0200
committerXavier Leroy <xavier.leroy@college-de-france.fr>2019-06-07 09:08:40 +0200
commitb3bfdc3655479f7f8f1c5e3a7571473a72b421cb (patch)
tree8fdc27ef9b7c186ee3af9c04327a28b7e8424fd1
parent8e3a73448c5ddfa4be3871d7f4fd80281a7549f4 (diff)
downloadcompcert-b3bfdc3655479f7f8f1c5e3a7571473a72b421cb.tar.gz
compcert-b3bfdc3655479f7f8f1c5e3a7571473a72b421cb.zip
x86 branchless implementation of unsigned int32 -> float conversion
Based on a suggestion by RĂ©mi Hutin. The new implementation produces smaller code (3 FP instructions and 2 integer "and") and has no conditional branch. On average, it is a little slower than the old implementation. Hence both implementations are supported: - with -ffavor-branchless, use the new implementation - by default, use the old, branching implementation
-rw-r--r--lib/Floats.v41
-rw-r--r--x86/SelectOp.vp17
-rw-r--r--x86/SelectOpproof.v18
-rw-r--r--x86/extractionMachdep.v2
4 files changed, 70 insertions, 8 deletions
diff --git a/lib/Floats.v b/lib/Floats.v
index 9540303b..f8294d04 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -413,6 +413,7 @@ Qed.
to emulate the former.) *)
Definition ox8000_0000 := Int.repr Int.half_modulus. (**r [0x8000_0000] *)
+Definition ox7FFF_FFFF := Int.repr Int.max_signed. (**r [0x7FFF_FFFF] *)
Theorem of_intu_of_int_1:
forall x,
@@ -443,6 +444,46 @@ Proof.
compute_this (Int.unsigned ox8000_0000); smart_omega.
Qed.
+Theorem of_intu_of_int_3:
+ forall x,
+ of_intu x = sub (of_int (Int.and x ox7FFF_FFFF)) (of_int (Int.and x ox8000_0000)).
+Proof.
+ intros.
+ set (hi := Int.and x ox8000_0000).
+ set (lo := Int.and x ox7FFF_FFFF).
+ assert (R: forall n, integer_representable 53 1024 (Int.signed n)).
+ { intros. pose proof (Int.signed_range n).
+ apply integer_representable_n; auto; smart_omega. }
+ unfold sub, of_int. rewrite BofZ_minus by auto. unfold of_intu. f_equal.
+ assert (E: Int.add hi lo = x).
+ { unfold hi, lo. rewrite Int.add_is_or.
+ - rewrite <- Int.and_or_distrib. apply Int.and_mone.
+ - rewrite Int.and_assoc. rewrite (Int.and_commut ox8000_0000). rewrite Int.and_assoc.
+ change (Int.and ox7FFF_FFFF ox8000_0000) with Int.zero. rewrite ! Int.and_zero; auto.
+ }
+ assert (RNG: 0 <= Int.unsigned lo < two_p 31).
+ { unfold lo. change ox7FFF_FFFF with (Int.repr (two_p 31 - 1)). rewrite <- Int.zero_ext_and by omega.
+ apply Int.zero_ext_range. compute_this Int.zwordsize. omega. }
+ assert (B: forall i, 0 <= i < Int.zwordsize -> Int.testbit ox8000_0000 i = if zeq i 31 then true else false).
+ { intros; unfold Int.testbit. change (Int.unsigned ox8000_0000) with (2^31).
+ destruct (zeq i 31). subst i; auto. apply Z.pow2_bits_false; auto. }
+ assert (EITHER: hi = Int.zero \/ hi = ox8000_0000).
+ { unfold hi; destruct (Int.testbit x 31) eqn:B31; [right|left];
+ Int.bit_solve; rewrite B by auto.
+ - destruct (zeq i 31). subst i; rewrite B31; auto. apply andb_false_r.
+ - destruct (zeq i 31). subst i; rewrite B31; auto. apply andb_false_r.
+ }
+ assert (SU: - Int.signed hi = Int.unsigned hi).
+ { destruct EITHER as [EQ|EQ]; rewrite EQ; reflexivity. }
+ unfold Z.sub; rewrite SU, <- E.
+ unfold Int.add; rewrite Int.unsigned_repr, Int.signed_eq_unsigned. omega.
+ - assert (Int.max_signed = two_p 31 - 1) by reflexivity. omega.
+ - assert (Int.unsigned hi = 0 \/ Int.unsigned hi = two_p 31)
+ by (destruct EITHER as [EQ|EQ]; rewrite EQ; [left|right]; reflexivity).
+ assert (Int.max_unsigned = two_p 31 + two_p 31 - 1) by reflexivity.
+ omega.
+Qed.
+
Theorem to_intu_to_int_1:
forall x n,
cmp Clt x (of_intu ox8000_0000) = true ->
diff --git a/x86/SelectOp.vp b/x86/SelectOp.vp
index cf0f37ec..df9c5394 100644
--- a/x86/SelectOp.vp
+++ b/x86/SelectOp.vp
@@ -497,6 +497,8 @@ Nondetfunction floatofint (e: expr) :=
| _ => Eop Ofloatofint (e ::: Enil)
end.
+Parameter favor_branchless: unit -> bool.
+
Definition intuoffloat (e: expr) :=
Elet e
(Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
@@ -508,11 +510,16 @@ Nondetfunction floatofintu (e: expr) :=
match e with
| Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
| _ =>
- let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
- Elet e
- (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
- (floatofint (Eletvar O))
- (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
+ if favor_branchless tt then
+ Elet e (subf
+ (Eop Ofloatofint (andimm Float.ox7FFF_FFFF (Eletvar O) ::: Enil))
+ (Eop Ofloatofint (andimm Float.ox8000_0000 (Eletvar O) ::: Enil)))
+ else
+ let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
+ Elet e
+ (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
+ (floatofint (Eletvar O))
+ (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
end.
Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
diff --git a/x86/SelectOpproof.v b/x86/SelectOpproof.v
index b412ccf7..908d6e4a 100644
--- a/x86/SelectOpproof.v
+++ b/x86/SelectOpproof.v
@@ -870,10 +870,22 @@ Theorem eval_floatofintu:
exists v, eval_expr ge sp e m le (floatofintu a) v /\ Val.lessdef y v.
Proof.
intros until y; unfold floatofintu. case (floatofintu_match a); intros.
- InvEval. TrivialExists.
- destruct x; simpl in H0; try discriminate. inv H0.
+- InvEval. TrivialExists.
+- destruct x; simpl in H0; try discriminate. inv H0.
exists (Vfloat (Float.of_intu i)); split; auto.
- econstructor. eauto.
+ destruct (favor_branchless tt).
++ econstructor. eauto.
+ assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)) by (constructor; auto).
+ exploit (eval_andimm Float.ox7FFF_FFFF (Vint i :: le) (Eletvar 0)); eauto.
+ simpl. intros [v1 [A1 B1]]. inv B1.
+ exploit (eval_andimm Float.ox8000_0000 (Vint i :: le) (Eletvar 0)); eauto.
+ simpl. intros [v2 [A2 B2]]. inv B2.
+ unfold subf. econstructor.
+ constructor. econstructor. constructor. eexact A1. constructor. simpl; eauto.
+ constructor. econstructor. constructor. eexact A2. constructor. simpl; eauto.
+ constructor.
+ simpl. rewrite Float.of_intu_of_int_3. auto.
++ econstructor. eauto.
set (fm := Float.of_intu Float.ox8000_0000).
assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)).
constructor. auto.
diff --git a/x86/extractionMachdep.v b/x86/extractionMachdep.v
index a29553e8..1509878f 100644
--- a/x86/extractionMachdep.v
+++ b/x86/extractionMachdep.v
@@ -21,6 +21,8 @@ Require SelectOp ConstpropOp.
Extract Constant SelectOp.symbol_is_external =>
"fun id -> Configuration.system = ""macosx"" && C2C.atom_is_extern id".
+Extract Constant SelectOp.favor_branchless =>
+ "fun () -> !Clflags.option_ffavor_branchless".
(* ConstpropOp *)