fma

author: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2019-08-30 19:10:42 +0200
committer: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2019-08-30 19:10:42 +0200
commit: caac487ae23a9785602cf235f5b4a2b6749f2c18 (patch)
tree: e81ae59a6e06a0373afdfbf8f1ee94f16a2770f2
parent: 1522f289301f37da0324570297c65256d8a32316 (diff)
download: compcert-kvx-caac487ae23a9785602cf235f5b4a2b6749f2c18.tar.gz
compcert-kvx-caac487ae23a9785602cf235f5b4a2b6749f2c18.zip
14 files changed, 166 insertions, 15 deletions
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v
index b7818aaf..ce376af9 100644
--- a/mppa_k1c/Asm.v
+++ b/mppa_k1c/Asm.v
@@ -200,6 +200,10 @@ Inductive instruction : Type :=
   | Psllw               (rd rs1 rs2: ireg)          (**r shift left logical word *)
   | Pmaddw              (rd rs1 rs2: ireg)          (**r multiply-add words *)
   | Pmsubw              (rd rs1 rs2: ireg)          (**r multiply-add words *)
+  | Pfmaddfw            (rd rs1 rs2: ireg)          (**r float fused multiply-add words *)
+  | Pfmsubfw            (rd rs1 rs2: ireg)          (**r float fused multiply-subtract words *)
+  | Pfmaddfl            (rd rs1 rs2: ireg)          (**r float fused multiply-add longs *)
+  | Pfmsubfl            (rd rs1 rs2: ireg)          (**r float fused multiply-subtract longs *)
 
   | Paddl               (rd rs1 rs2: ireg)          (**r add long *)
   | Paddxl (shift : shift1_4)  (rd rs1 rs2: ireg)          (**r add long shift *)
@@ -452,6 +456,10 @@ Definition basic_to_instruction (b: basic) :=
   | PArithARRR Asmvliw.Pmaddl rd rs1 rs2       => Pmaddl rd rs1 rs2
   | PArithARRR Asmvliw.Pmsubw rd rs1 rs2       => Pmsubw rd rs1 rs2
   | PArithARRR Asmvliw.Pmsubl rd rs1 rs2       => Pmsubl rd rs1 rs2
+  | PArithARRR Asmvliw.Pfmaddfw rd rs1 rs2     => Pfmaddfw rd rs1 rs2
+  | PArithARRR Asmvliw.Pfmaddfl rd rs1 rs2     => Pfmaddfl rd rs1 rs2
+  | PArithARRR Asmvliw.Pfmsubfw rd rs1 rs2     => Pfmsubfw rd rs1 rs2
+  | PArithARRR Asmvliw.Pfmsubfl rd rs1 rs2     => Pfmsubfl rd rs1 rs2
   | PArithARRR (Asmvliw.Pcmove cond) rd rs1 rs2=> Pcmove cond rd rs1 rs2
   | PArithARRR (Asmvliw.Pcmoveu cond) rd rs1 rs2=> Pcmoveu cond rd rs1 rs2
 
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index 61caeaf1..6743ae4c 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -1483,6 +1483,10 @@ Definition string_of_name_arrr (n: arith_name_arrr): pstring :=
   | Pmsubl  => "Pmsubl"
   | Pcmove _ => "Pcmove"
   | Pcmoveu _ => "Pcmoveu"
+  | Pfmaddfw  => "Pfmaddfw"
+  | Pfmaddfl  => "Pfmaddfl"
+  | Pfmsubfw  => "Pfmsubfw"
+  | Pfmsubfl  => "Pfmsubfl"
   end.
 
 Definition string_of_name_arr (n: arith_name_arr): pstring :=
diff --git a/mppa_k1c/Asmblockgen.v b/mppa_k1c/Asmblockgen.v
index c2a36ff7..c717af95 100644
--- a/mppa_k1c/Asmblockgen.v
+++ b/mppa_k1c/Asmblockgen.v
@@ -797,6 +797,31 @@ Definition transl_op
       do rd <- freg_of res; do rs <- freg_of a1;
       OK (Pfinvw rd rs ::i k)
 
+  | Ofmaddf, a1 :: a2 :: a3 :: nil =>
+    assertion (mreg_eq a1 res);
+        do rs1 <- freg_of a1;
+        do rs2 <- freg_of a2;
+        do rs3 <- freg_of a3;
+      OK (Pfmaddfl rs1 rs2 rs3 ::i k)
+  | Ofmaddfs, a1 :: a2 :: a3 :: nil =>
+    assertion (mreg_eq a1 res);
+        do rs1 <- freg_of a1;
+        do rs2 <- freg_of a2;
+        do rs3 <- freg_of a3;
+      OK (Pfmaddfw rs1 rs2 rs3 ::i k)
+  | Ofmsubf, a1 :: a2 :: a3 :: nil =>
+    assertion (mreg_eq a1 res);
+        do rs1 <- freg_of a1;
+        do rs2 <- freg_of a2;
+        do rs3 <- freg_of a3;
+      OK (Pfmsubfl rs1 rs2 rs3 ::i k)
+  | Ofmsubfs, a1 :: a2 :: a3 :: nil =>
+    assertion (mreg_eq a1 res);
+        do rs1 <- freg_of a1;
+        do rs2 <- freg_of a2;
+        do rs3 <- freg_of a3;
+      OK (Pfmsubfw rs1 rs2 rs3 ::i k)
+
   | Osingleofint, a1 :: nil =>
       do rd <- freg_of res; do rs <- ireg_of a1;
       OK (Pfloatwrnsz rd rs ::i k)
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
index cb9ce7ae..b0f8501d 100644
--- a/mppa_k1c/Asmvliw.v
+++ b/mppa_k1c/Asmvliw.v
@@ -528,6 +528,10 @@ Inductive arith_name_arrr : Type :=
   | Pmsubl                                           (**r multiply subtract long *)
   | Pcmove (bt: btest)                               (**r conditional move *)
   | Pcmoveu (bt: btest)                              (**r conditional move, test on unsigned semantics *)
+  | Pfmaddfw                                         (**r float fused multiply add word *)
+  | Pfmaddfl                                         (**r float fused multiply add long *)
+  | Pfmsubfw                                         (**r float fused multiply subtract word *)
+  | Pfmsubfl                                         (**r float fused multiply subtract long *)
 .
 
 Inductive arith_name_arri32 : Type :=
@@ -1177,6 +1181,10 @@ Definition arith_eval_arrr n v1 v2 v3 :=
   | Pmsubl => Val.subl v1 (Val.mull v2 v3)
   | Pcmove bt => cmove bt v1 v2 v3
   | Pcmoveu bt => cmoveu bt v1 v2 v3
+  | Pfmaddfw => ExtValues.fmaddfs v1 v2 v3
+  | Pfmaddfl => ExtValues.fmaddf v1 v2 v3
+  | Pfmsubfw => ExtValues.fmsubfs v1 v2 v3
+  | Pfmsubfl => ExtValues.fmsubf v1 v2 v3
   end.
 
 Definition arith_eval_arr n v1 v2 :=
diff --git a/mppa_k1c/Builtins1.v b/mppa_k1c/Builtins1.v
index 5187ea7d..6186961f 100644
--- a/mppa_k1c/Builtins1.v
+++ b/mppa_k1c/Builtins1.v
@@ -24,7 +24,9 @@ Inductive platform_builtin : Type :=
 | BI_fmax
 | BI_fminf
 | BI_fmaxf
-| BI_fabsf.
+| BI_fabsf
+| BI_fma
+| BI_fmaf.
 
 Local Open Scope string_scope.
 
@@ -34,6 +36,8 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
   :: ("__builtin_fminf", BI_fminf)
   :: ("__builtin_fmaxf", BI_fmaxf)
   :: ("__builtin_fabsf", BI_fabsf)
+  :: ("__builtin_fma", BI_fma)
+  :: ("__builtin_fmaf", BI_fmaf)
   :: nil.
 
 Definition platform_builtin_sig (b: platform_builtin) : signature :=
@@ -44,6 +48,10 @@ Definition platform_builtin_sig (b: platform_builtin) : signature :=
       mksignature (Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default
   | BI_fabsf =>
       mksignature (Tsingle :: nil) (Some Tsingle) cc_default
+  | BI_fma =>
+      mksignature (Tfloat :: Tfloat :: Tfloat :: nil) (Some Tfloat) cc_default
+  | BI_fmaf =>
+      mksignature (Tsingle :: Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default
   end.
 
 Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) :=
@@ -53,4 +61,6 @@ Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_re
   | BI_fminf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.min
   | BI_fmaxf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.max
   | BI_fabsf => mkbuiltin_n1t Tsingle Tsingle Float32.abs
+  | BI_fma => mkbuiltin_n3t Tfloat Tfloat Tfloat Tfloat Float.fma
+  | BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma
   end.
diff --git a/mppa_k1c/CBuiltins.ml b/mppa_k1c/CBuiltins.ml
index c0022cb1..3ae6baa7 100644
--- a/mppa_k1c/CBuiltins.ml
+++ b/mppa_k1c/CBuiltins.ml
@@ -130,6 +130,12 @@ let builtins = {
     "__builtin_fminf",
       (TFloat(FFloat, []),
        [TFloat(FFloat, []); TFloat(FFloat, [])], false);
+    "__builtin_fma",
+      (TFloat(FDouble, []),
+       [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false);
+    "__builtin_fmaf",
+      (TFloat(FFloat, []),
+       [TFloat(FFloat, []); TFloat(FFloat, []); TFloat(FFloat, [])], false);
 ]
 }
 
diff --git a/mppa_k1c/ExtValues.v b/mppa_k1c/ExtValues.v
index 3e4b70b5..a8e24c86 100644
--- a/mppa_k1c/ExtValues.v
+++ b/mppa_k1c/ExtValues.v
@@ -715,8 +715,8 @@ Definition triple_op_single f v1 v2 v3 :=
   | _, _, _ => Vundef
   end.
 
-Definition fmaddf := triple_op_float Float.fma.
-Definition fmaddfs := triple_op_single Float32.fma.
+Definition fmaddf := triple_op_float (fun f1 f2 f3 => Float.fma f2 f3 f1).
+Definition fmaddfs := triple_op_single (fun f1 f2 f3 => Float32.fma f2 f3 f1).
 
-Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma f1 (Float.neg f2) f3).
-Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma f1 (Float32.neg f2) f3).
+Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma (Float.neg f2) f3 f1).
+Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma (Float32.neg f2) f3 f1).
diff --git a/mppa_k1c/Machregs.v b/mppa_k1c/Machregs.v
index 5a7d42b4..8098b5d1 100644
--- a/mppa_k1c/Machregs.v
+++ b/mppa_k1c/Machregs.v
@@ -213,6 +213,8 @@ Global Opaque
 
 Definition two_address_op (op: operation) : bool :=
   match op with
+  | Ofmaddf | Ofmaddfs
+  | Ofmsubf | Ofmsubfs
   | Omadd | Omaddimm _
   | Omaddl | Omaddlimm _
   | Omsub | Omsubl
diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml
index e2baa2c0..628ae609 100644
--- a/mppa_k1c/PostpassSchedulingOracle.ml
+++ b/mppa_k1c/PostpassSchedulingOracle.ml
@@ -32,6 +32,7 @@ type real_instruction =
   | Fabsd | Fabsw | Fnegw | Fnegd
   | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw
   | Fmind | Fminw | Fmaxd | Fmaxw | Finvw
+  | Ffmaw | Ffmad | Ffmsw | Ffmsd
   | Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz | Fixedwz | Fixeduwz | Fixeddz | Fixedudz
   | Fcompw | Fcompd
 
@@ -173,6 +174,10 @@ let arith_arr_real = function
   | Pinsfl (_, _) ->  Insf
 
 let arith_arrr_real = function
+  | Pfmaddfw -> Ffmaw
+  | Pfmaddfl -> Ffmad
+  | Pfmsubfw -> Ffmsw
+  | Pfmsubfl -> Ffmsd
   | Pmaddw ->     Maddw
   | Pmaddl ->     Maddd
   | Pmsubw ->     Msbfw
@@ -587,7 +592,8 @@ let rec_to_usage r =
   | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd
   | Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite
   | Fnarrowdw -> alu_full
-  | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw -> mau
+  | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw
+  | Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau
 
 
 let real_inst_to_latency = function
@@ -608,7 +614,8 @@ let real_inst_to_latency = function
   | Set -> 4 (* According to the manual should be 3, but I measured 4 *)
   | Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *)
   | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1
-  | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw -> 4
+  | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw
+  | Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4
 
 let rec_to_info r : inst_info =
   let usage = rec_to_usage r
diff --git a/mppa_k1c/SelectOp.vp b/mppa_k1c/SelectOp.vp
index 6539184c..71078046 100644
--- a/mppa_k1c/SelectOp.vp
+++ b/mppa_k1c/SelectOp.vp
@@ -683,7 +683,18 @@ Nondetfunction divfs_base (e1: expr) :=
      else divfs_baseX e1)
   | _ => divfs_baseX e1
   end.
-End SELECT.
+
+Nondetfunction gen_fma args :=
+  match args with
+  | e1:::e2:::e3:::Enil => Some (Eop Ofmaddf (e3:::e1:::e2:::Enil))
+  | _ => None
+  end.
+
+Nondetfunction gen_fmaf args :=
+  match args with
+  | e1:::e2:::e3:::Enil => Some (Eop Ofmaddfs (e3:::e1:::e2:::Enil))
+  | _ => None
+  end.
 
 Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
   match b with
@@ -692,7 +703,10 @@ Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr
   | BI_fminf => Some (Eop Ominfs args)
   | BI_fmaxf => Some (Eop Omaxfs args)
   | BI_fabsf => Some (Eop Oabsfs args)
+  | BI_fma => gen_fma args
+  | BI_fmaf => gen_fmaf args
   end.
+End SELECT.
 
 (* Local Variables: *)
 (* mode: coq *)
diff --git a/mppa_k1c/SelectOpproof.v b/mppa_k1c/SelectOpproof.v
index 7805a1be..08bcff12 100644
--- a/mppa_k1c/SelectOpproof.v
+++ b/mppa_k1c/SelectOpproof.v
@@ -1663,6 +1663,50 @@ Qed.
 
 (** Platform-specific known builtins *)
 
+Lemma eval_fma:
+  forall al a vl v le,
+  gen_fma al = Some a ->
+  eval_exprlist ge sp e m le al vl ->
+  platform_builtin_sem BI_fma vl = Some v ->
+  exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+  unfold gen_fma.
+  intros until le.
+  intro Heval.
+  destruct (gen_fma_match _) in *; try discriminate.
+  inversion Heval; subst a; clear Heval.
+  intro; InvEval.
+  intro Heval.
+  simpl in Heval.
+  inv Heval.
+  TrivialExists.
+  destruct v0; simpl; trivial;
+    destruct v1; simpl; trivial;
+      destruct v2; simpl; trivial.
+Qed.
+
+Lemma eval_fmaf:
+  forall al a vl v le,
+  gen_fmaf al = Some a ->
+  eval_exprlist ge sp e m le al vl ->
+  platform_builtin_sem BI_fmaf vl = Some v ->
+  exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+  unfold gen_fmaf.
+  intros until le.
+  intro Heval.
+  destruct (gen_fmaf_match _) in *; try discriminate.
+  inversion Heval; subst a; clear Heval.
+  intro; InvEval.
+  intro Heval.
+  simpl in Heval.
+  inv Heval.
+  TrivialExists.
+  destruct v0; simpl; trivial;
+    destruct v1; simpl; trivial;
+      destruct v2; simpl; trivial.
+Qed.
+
 Theorem eval_platform_builtin:
   forall bf al a vl v le,
   platform_builtin bf al = Some a ->
@@ -1670,9 +1714,12 @@ Theorem eval_platform_builtin:
   platform_builtin_sem bf vl = Some v ->
   exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
 Proof.
-  destruct bf; intros until le; intro Heval; inversion Heval; subst a; clear Heval.
-  all: exists v; split; trivial;
-  try repeat (try econstructor; try eassumption).
+  destruct bf; intros until le; intro Heval.
+  all: try (inversion Heval; subst a; clear Heval;
+       exists v; split; trivial;
+       repeat (try econstructor; try eassumption)).
+  - apply eval_fma; assumption.
+  - apply eval_fmaf; assumption.
 Qed.
 
 End CMCONSTR.
diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml
index 3d3b56a2..e626d2b4 100644
--- a/mppa_k1c/TargetPrinter.ml
+++ b/mppa_k1c/TargetPrinter.ml
@@ -574,6 +574,10 @@ module Target (*: TARGET*) =
          fprintf oc "	maddw	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
       | Pmsubw (rd, rs1, rs2) ->
          fprintf oc "	msbfw	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+      | Pfmaddfw (rd, rs1, rs2) ->
+         fprintf oc "	ffmaw	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+      | Pfmsubfw (rd, rs1, rs2) ->
+         fprintf oc "	ffmsw	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
 
       | Paddl (rd, rs1, rs2) ->
          fprintf oc "	addd	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
@@ -615,6 +619,10 @@ module Target (*: TARGET*) =
          fprintf oc "	maddd	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
       | Pmsubl (rd, rs1, rs2) ->
          fprintf oc "	msbfd	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+      | Pfmaddfl (rd, rs1, rs2) ->
+         fprintf oc "	ffmad	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+      | Pfmsubfl (rd, rs1, rs2) ->
+         fprintf oc "	ffmsd	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
 
       | Pfaddd (rd, rs1, rs2) ->
          fprintf oc "	faddd	%a = %a, %a\n" ireg rd ireg rs1 ireg rs2
diff --git a/mppa_k1c/ValueAOp.v b/mppa_k1c/ValueAOp.v
index 4c5fcf71..2c9bdf3e 100644
--- a/mppa_k1c/ValueAOp.v
+++ b/mppa_k1c/ValueAOp.v
@@ -33,10 +33,10 @@ Definition triple_op_single (sem: float32 -> float32 -> float32 -> float32) (x y
   | _, _, _ => ntop3 x y z
   end.
 
-Definition fmaddf := triple_op_float Float.fma.
-Definition fmsubf := triple_op_float (fun x y z => Float.fma x (Float.neg y) z).
-Definition fmaddfs := triple_op_single Float32.fma.
-Definition fmsubfs := triple_op_single (fun x y z => Float32.fma x (Float32.neg y) z).
+Definition fmaddf := triple_op_float (fun x y z => Float.fma y z x).
+Definition fmsubf := triple_op_float (fun x y z => Float.fma (Float.neg y) z x).
+Definition fmaddfs := triple_op_single (fun x y z => Float32.fma y z x).
+Definition fmsubfs := triple_op_single (fun x y z => Float32.fma (Float32.neg y) z x).
 
 Definition invfs (y : aval) :=
   match y with
diff --git a/test/monniaux/builtins/fma.c b/test/monniaux/builtins/fma.c
new file mode 100644
index 00000000..0952e8f3
--- /dev/null
+++ b/test/monniaux/builtins/fma.c
@@ -0,0 +1,12 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+  if (argc < 4) return 1;
+  double x = strtod(argv[1], NULL);
+  double y = strtod(argv[2], NULL);
+  double z = strtod(argv[3], NULL);
+  printf("%g %g %g\n", __builtin_fma(x, y, z), fma(x, y, z), x*y + z);
+  return 0;
+}
author	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2019-08-30 19:10:42 +0200
committer	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2019-08-30 19:10:42 +0200
commit	caac487ae23a9785602cf235f5b4a2b6749f2c18 (patch)
tree	e81ae59a6e06a0373afdfbf8f1ee94f16a2770f2
parent	1522f289301f37da0324570297c65256d8a32316 (diff)
download	compcert-kvx-caac487ae23a9785602cf235f5b4a2b6749f2c18.tar.gz compcert-kvx-caac487ae23a9785602cf235f5b4a2b6749f2c18.zip