aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@college-de-france.fr>2019-07-12 11:42:25 +0200
committerXavier Leroy <xavierleroy@users.noreply.github.com>2019-07-17 09:16:00 +0200
commit8b0de52ffa302298abe8d0d6e3b6ed339a2354ba (patch)
treefcfdb40451908865ba706e184ff434a847b7711b
parentdc5dc1da4cfa10647d13f393b6685f962b9b9faa (diff)
downloadcompcert-8b0de52ffa302298abe8d0d6e3b6ed339a2354ba.tar.gz
compcert-8b0de52ffa302298abe8d0d6e3b6ed339a2354ba.zip
Add floating-point square root and fused multiply-add
We just lift the corresponding functions from Flocq and add the computation of NaN payloads. NaN payloads for FMA are described in the ARM and RISC-V specifications, and were determined experimentally for x86 and for Power.
-rw-r--r--arm/Archi.v5
-rw-r--r--lib/Floats.v54
-rw-r--r--powerpc/Archi.v5
-rw-r--r--riscV/Archi.v5
-rw-r--r--x86_32/Archi.v5
-rw-r--r--x86_64/Archi.v5
6 files changed, 76 insertions, 3 deletions
diff --git a/arm/Archi.v b/arm/Archi.v
index e78ff6ce..16d6c71d 100644
--- a/arm/Archi.v
+++ b/arm/Archi.v
@@ -75,11 +75,16 @@ Lemma choose_nan_32_idem: forall n,
choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
Proof. intros; apply choose_nan_idem. Qed.
+Definition fma_order {A: Type} (x y z: A) := (z, x, y).
+
+Definition fma_invalid_mul_is_nan := true.
+
Definition float_of_single_preserves_sNaN := false.
Global Opaque ptr64 big_endian splitlong
default_nan_64 choose_nan_64
default_nan_32 choose_nan_32
+ fma_order fma_invalid_mul_is_nan
float_of_single_preserves_sNaN.
(** Which ABI to use: either the standard ARM EABI with floats passed
diff --git a/lib/Floats.v b/lib/Floats.v
index 98ccc173..7f136283 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -166,7 +166,7 @@ Proof.
zify; omega.
Qed.
-Definition expand_nan s p H : {x | is_nan 53 1024 x = true} :=
+Definition expand_nan s p H : {x | is_nan _ _ x = true} :=
exist _ (B754_nan 53 1024 s (expand_nan_payload p) (expand_nan_proof p H)) (eq_refl true).
Definition of_single_nan (f : float32) : { x : float | is_nan _ _ x = true } :=
@@ -222,12 +222,38 @@ Additionally, signaling NaNs are converted to quiet NaNs, as required by the sta
Definition cons_pl (x: float) (l: list (bool * positive)) :=
match x with B754_nan s p _ => (s, p) :: l | _ => l end.
-Definition unop_nan (x: float) : {x : float | is_nan 53 1024 x = true} :=
+Definition unop_nan (x: float) : {x : float | is_nan _ _ x = true} :=
quiet_nan_64 (Archi.choose_nan_64 (cons_pl x [])).
-Definition binop_nan (x y: float) : {x : float | is_nan 53 1024 x = true} :=
+Definition binop_nan (x y: float) : {x : float | is_nan _ _ x = true} :=
quiet_nan_64 (Archi.choose_nan_64 (cons_pl x (cons_pl y []))).
+(** For fused multiply-add, the order in which arguments are examined
+ to select a NaN payload varies across platforms. E.g. in [fma x y z],
+ x86 considers [x] first, then [y], then [z], while ARM considers [z] first,
+ then [x], then [y]. The corresponding permutation is defined
+ for each target, as function [Archi.fma_order]. *)
+
+Definition fma_nan_1 (x y z: float) : {x : float | is_nan _ _ x = true} :=
+ let '(a, b, c) := Archi.fma_order x y z in
+ quiet_nan_64 (Archi.choose_nan_64 (cons_pl a (cons_pl b (cons_pl c [])))).
+
+(** One last wrinkle for fused multiply-add: [fma zero infinity nan]
+ can return either the quiesced [nan], or the default NaN arising out
+ of the invalid operation [zero * infinity]. Of our target platforms,
+ only ARM honors the latter case. The choice between the default NaN
+ and [nan] is done as in the case of two-argument arithmetic operations. *)
+
+Definition fma_nan (x y z: float) : {x : float | is_nan _ _ x = true} :=
+ match x, y with
+ | B754_infinity _, B754_zero _ | B754_zero _, B754_infinity _ =>
+ if Archi.fma_invalid_mul_is_nan
+ then quiet_nan_64 (Archi.choose_nan_64 (Archi.default_nan_64 :: cons_pl z []))
+ else fma_nan_1 x y z
+ | _, _ =>
+ fma_nan_1 x y z
+ end.
+
(** ** Operations over double-precision floats *)
Definition zero: float := B754_zero _ _ false. (**r the float [+0.0] *)
@@ -238,6 +264,8 @@ Definition eq_dec: forall (f1 f2: float), {f1 = f2} + {f1 <> f2} := Beq_dec _ _.
Definition neg: float -> float := Bopp _ _ neg_nan. (**r opposite (change sign) *)
Definition abs: float -> float := Babs _ _ abs_nan. (**r absolute value (set sign to [+]) *)
+Definition sqrt: float -> float :=
+ Bsqrt 53 1024 __ __ unop_nan mode_NE. (**r square root *)
Definition add: float -> float -> float :=
Bplus 53 1024 __ __ binop_nan mode_NE. (**r addition *)
Definition sub: float -> float -> float :=
@@ -246,6 +274,8 @@ Definition mul: float -> float -> float :=
Bmult 53 1024 __ __ binop_nan mode_NE. (**r multiplication *)
Definition div: float -> float -> float :=
Bdiv 53 1024 __ __ binop_nan mode_NE. (**r division *)
+Definition fma: float -> float -> float -> float :=
+ Bfma 53 1024 __ __ fma_nan mode_NE. (**r fused multiply-add [x * y + z] *)
Definition compare (f1 f2: float) : option Datatypes.comparison := (**r general comparison *)
Bcompare 53 1024 f1 f2.
Definition cmp (c:comparison) (f1 f2: float) : bool := (**r Boolean comparison *)
@@ -945,6 +975,20 @@ Definition unop_nan (x: float32) : {x : float32 | is_nan _ _ x = true} :=
Definition binop_nan (x y: float32) : {x : float32 | is_nan _ _ x = true} :=
quiet_nan_32 (Archi.choose_nan_32 (cons_pl x (cons_pl y []))).
+Definition fma_nan_1 (x y z: float32) : {x : float32 | is_nan _ _ x = true} :=
+ let '(a, b, c) := Archi.fma_order x y z in
+ quiet_nan_32 (Archi.choose_nan_32 (cons_pl a (cons_pl b (cons_pl c [])))).
+
+Definition fma_nan (x y z: float32) : {x : float32 | is_nan _ _ x = true} :=
+ match x, y with
+ | B754_infinity _, B754_zero _ | B754_zero _, B754_infinity _ =>
+ if Archi.fma_invalid_mul_is_nan
+ then quiet_nan_32 (Archi.choose_nan_32 (Archi.default_nan_32 :: cons_pl z []))
+ else fma_nan_1 x y z
+ | _, _ =>
+ fma_nan_1 x y z
+ end.
+
(** ** Operations over single-precision floats *)
Definition zero: float32 := B754_zero _ _ false. (**r the float [+0.0] *)
@@ -955,6 +999,8 @@ Definition eq_dec: forall (f1 f2: float32), {f1 = f2} + {f1 <> f2} := Beq_dec _
Definition neg: float32 -> float32 := Bopp _ _ neg_nan. (**r opposite (change sign) *)
Definition abs: float32 -> float32 := Babs _ _ abs_nan. (**r absolute value (set sign to [+]) *)
+Definition sqrt: float32 -> float32 :=
+ Bsqrt 24 128 __ __ unop_nan mode_NE. (**r square root *)
Definition add: float32 -> float32 -> float32 :=
Bplus 24 128 __ __ binop_nan mode_NE. (**r addition *)
Definition sub: float32 -> float32 -> float32 :=
@@ -963,6 +1009,8 @@ Definition mul: float32 -> float32 -> float32 :=
Bmult 24 128 __ __ binop_nan mode_NE. (**r multiplication *)
Definition div: float32 -> float32 -> float32 :=
Bdiv 24 128 __ __ binop_nan mode_NE. (**r division *)
+Definition fma: float32 -> float32 -> float32 -> float32 :=
+ Bfma 24 128 __ __ fma_nan mode_NE. (**r fused multiply-add [x * y + z] *)
Definition compare (f1 f2: float32) : option Datatypes.comparison := (**r general comparison *)
Bcompare 24 128 f1 f2.
Definition cmp (c:comparison) (f1 f2: float32) : bool := (**r comparison *)
diff --git a/powerpc/Archi.v b/powerpc/Archi.v
index b99e4812..ab348c14 100644
--- a/powerpc/Archi.v
+++ b/powerpc/Archi.v
@@ -56,9 +56,14 @@ Lemma choose_nan_32_idem: forall n,
choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
Proof. auto. Qed.
+Definition fma_order {A: Type} (x y z: A) := (x, z, y).
+
+Definition fma_invalid_mul_is_nan := false.
+
Definition float_of_single_preserves_sNaN := true.
Global Opaque ptr64 big_endian splitlong
default_nan_64 choose_nan_64
default_nan_32 choose_nan_32
+ fma_order fma_invalid_mul_is_nan
float_of_single_preserves_sNaN.
diff --git a/riscV/Archi.v b/riscV/Archi.v
index e4078132..61d129d0 100644
--- a/riscV/Archi.v
+++ b/riscV/Archi.v
@@ -57,11 +57,16 @@ Lemma choose_nan_32_idem: forall n,
choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
Proof. auto. Qed.
+Definition fma_order {A: Type} (x y z: A) := (x, y, z).
+
+Definition fma_invalid_mul_is_nan := false.
+
Definition float_of_single_preserves_sNaN := false.
Global Opaque ptr64 big_endian splitlong
default_nan_64 choose_nan_64
default_nan_32 choose_nan_32
+ fma_order fma_invalid_mul_is_nan
float_of_single_preserves_sNaN.
(** Whether to generate position-independent code or not *)
diff --git a/x86_32/Archi.v b/x86_32/Archi.v
index c6d66e09..e9d05c14 100644
--- a/x86_32/Archi.v
+++ b/x86_32/Archi.v
@@ -53,9 +53,14 @@ Lemma choose_nan_32_idem: forall n,
choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
Proof. auto. Qed.
+Definition fma_order {A: Type} (x y z: A) := (x, y, z).
+
+Definition fma_invalid_mul_is_nan := false.
+
Definition float_of_single_preserves_sNaN := false.
Global Opaque ptr64 big_endian splitlong
default_nan_64 choose_nan_64
default_nan_32 choose_nan_32
+ fma_order fma_invalid_mul_is_nan
float_of_single_preserves_sNaN.
diff --git a/x86_64/Archi.v b/x86_64/Archi.v
index 8523fdce..959d8dc1 100644
--- a/x86_64/Archi.v
+++ b/x86_64/Archi.v
@@ -53,9 +53,14 @@ Lemma choose_nan_32_idem: forall n,
choose_nan_32 (n :: n :: nil) = choose_nan_32 (n :: nil).
Proof. auto. Qed.
+Definition fma_order {A: Type} (x y z: A) := (x, y, z).
+
+Definition fma_invalid_mul_is_nan := false.
+
Definition float_of_single_preserves_sNaN := false.
Global Opaque ptr64 big_endian splitlong
default_nan_64 choose_nan_64
default_nan_32 choose_nan_32
+ fma_order fma_invalid_mul_is_nan
float_of_single_preserves_sNaN.